4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
39 from ganeti import utils
40 from ganeti import errors
41 from ganeti import hypervisor
42 from ganeti import locking
43 from ganeti import constants
44 from ganeti import objects
45 from ganeti import compat
46 from ganeti import masterd
47 from ganeti import netutils
48 from ganeti import query
49 from ganeti import qlang
50 from ganeti import opcodes
52 from ganeti import rpc
53 from ganeti import pathutils
54 from ganeti import network
55 from ganeti.masterd import iallocator
57 from ganeti.cmdlib.base import ResultWithJobs, LogicalUnit, NoHooksLU, \
59 from ganeti.cmdlib.common import _ExpandInstanceName, _ExpandItemName, \
60 _ExpandNodeName, _ShareAll, _CheckNodeGroupInstances, _GetWantedNodes, \
61 _GetWantedInstances, _RunPostHook, _RedistributeAncillaryFiles, \
62 _MergeAndVerifyHvState, _MergeAndVerifyDiskState, _GetUpdatedIPolicy, \
63 _ComputeNewInstanceViolations, _GetUpdatedParams, _CheckOSParams, \
64 _CheckHVParams, _AdjustCandidatePool, _CheckNodePVs, \
65 _ComputeIPolicyInstanceViolation, _AnnotateDiskParams, _SupportsOob, \
66 _ComputeIPolicySpecViolation, _GetDefaultIAllocator, \
67 _CheckInstancesNodeGroups, _LoadNodeEvacResult, _MapInstanceDisksToNodes, \
68 _CheckInstanceNodeGroups
70 from ganeti.cmdlib.cluster import LUClusterActivateMasterIp, \
71 LUClusterDeactivateMasterIp, LUClusterConfigQuery, LUClusterDestroy, \
72 LUClusterPostInit, _ClusterQuery, LUClusterQuery, LUClusterRedistConf, \
73 LUClusterRename, LUClusterRepairDiskSizes, LUClusterSetParams, \
74 LUClusterVerify, LUClusterVerifyConfig, LUClusterVerifyGroup, \
76 from ganeti.cmdlib.group import LUGroupAdd, LUGroupAssignNodes, \
77 _GroupQuery, LUGroupQuery, LUGroupSetParams, LUGroupRemove, \
78 LUGroupRename, LUGroupEvacuate, LUGroupVerifyDisks
79 from ganeti.cmdlib.tags import LUTagsGet, LUTagsSearch, LUTagsSet, LUTagsDel
80 from ganeti.cmdlib.network import LUNetworkAdd, LUNetworkRemove, \
81 LUNetworkSetParams, _NetworkQuery, LUNetworkQuery, LUNetworkConnect, \
83 from ganeti.cmdlib.test import LUTestDelay, LUTestJqueue, LUTestAllocator
85 import ganeti.masterd.instance # pylint: disable=W0611
89 INSTANCE_DOWN = [constants.ADMINST_DOWN]
90 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
91 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
93 #: Instance status in which an instance can be marked as offline/online
94 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
95 constants.ADMINST_OFFLINE,
99 def _IsExclusiveStorageEnabledNode(cfg, node):
100 """Whether exclusive_storage is in effect for the given node.
102 @type cfg: L{config.ConfigWriter}
103 @param cfg: The cluster configuration
104 @type node: L{objects.Node}
105 @param node: The node
107 @return: The effective value of exclusive_storage
110 return cfg.GetNdParams(node)[constants.ND_EXCLUSIVE_STORAGE]
113 def _IsExclusiveStorageEnabledNodeName(cfg, nodename):
114 """Whether exclusive_storage is in effect for the given node.
116 @type cfg: L{config.ConfigWriter}
117 @param cfg: The cluster configuration
118 @type nodename: string
119 @param nodename: The node
121 @return: The effective value of exclusive_storage
122 @raise errors.OpPrereqError: if no node exists with the given name
125 ni = cfg.GetNodeInfo(nodename)
127 raise errors.OpPrereqError("Invalid node name %s" % nodename,
129 return _IsExclusiveStorageEnabledNode(cfg, ni)
132 def _CopyLockList(names):
133 """Makes a copy of a list of lock names.
135 Handles L{locking.ALL_SET} correctly.
138 if names == locking.ALL_SET:
139 return locking.ALL_SET
144 def _ReleaseLocks(lu, level, names=None, keep=None):
145 """Releases locks owned by an LU.
147 @type lu: L{LogicalUnit}
148 @param level: Lock level
149 @type names: list or None
150 @param names: Names of locks to release
151 @type keep: list or None
152 @param keep: Names of locks to retain
155 assert not (keep is not None and names is not None), \
156 "Only one of the 'names' and the 'keep' parameters can be given"
158 if names is not None:
159 should_release = names.__contains__
161 should_release = lambda name: name not in keep
163 should_release = None
165 owned = lu.owned_locks(level)
167 # Not owning any lock at this level, do nothing
174 # Determine which locks to release
176 if should_release(name):
181 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
183 # Release just some locks
184 lu.glm.release(level, names=release)
186 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
189 lu.glm.release(level)
191 assert not lu.glm.is_owned(level), "No locks should be owned"
194 def _CheckOutputFields(static, dynamic, selected):
195 """Checks whether all selected fields are valid.
197 @type static: L{utils.FieldSet}
198 @param static: static fields set
199 @type dynamic: L{utils.FieldSet}
200 @param dynamic: dynamic fields set
207 delta = f.NonMatching(selected)
209 raise errors.OpPrereqError("Unknown output fields selected: %s"
210 % ",".join(delta), errors.ECODE_INVAL)
213 def _CheckParamsNotGlobal(params, glob_pars, kind, bad_levels, good_levels):
214 """Make sure that none of the given paramters is global.
216 If a global parameter is found, an L{errors.OpPrereqError} exception is
217 raised. This is used to avoid setting global parameters for individual nodes.
219 @type params: dictionary
220 @param params: Parameters to check
221 @type glob_pars: dictionary
222 @param glob_pars: Forbidden parameters
224 @param kind: Kind of parameters (e.g. "node")
225 @type bad_levels: string
226 @param bad_levels: Level(s) at which the parameters are forbidden (e.g.
228 @type good_levels: strings
229 @param good_levels: Level(s) at which the parameters are allowed (e.g.
233 used_globals = glob_pars.intersection(params)
235 msg = ("The following %s parameters are global and cannot"
236 " be customized at %s level, please modify them at"
238 (kind, bad_levels, good_levels, utils.CommaJoin(used_globals)))
239 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
242 def _CheckNodeOnline(lu, node, msg=None):
243 """Ensure that a given node is online.
245 @param lu: the LU on behalf of which we make the check
246 @param node: the node to check
247 @param msg: if passed, should be a message to replace the default one
248 @raise errors.OpPrereqError: if the node is offline
252 msg = "Can't use offline node"
253 if lu.cfg.GetNodeInfo(node).offline:
254 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
257 def _CheckNodeNotDrained(lu, node):
258 """Ensure that a given node is not drained.
260 @param lu: the LU on behalf of which we make the check
261 @param node: the node to check
262 @raise errors.OpPrereqError: if the node is drained
265 if lu.cfg.GetNodeInfo(node).drained:
266 raise errors.OpPrereqError("Can't use drained node %s" % node,
270 def _CheckNodeVmCapable(lu, node):
271 """Ensure that a given node is vm capable.
273 @param lu: the LU on behalf of which we make the check
274 @param node: the node to check
275 @raise errors.OpPrereqError: if the node is not vm capable
278 if not lu.cfg.GetNodeInfo(node).vm_capable:
279 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
283 def _CheckNodeHasOS(lu, node, os_name, force_variant):
284 """Ensure that a node supports a given OS.
286 @param lu: the LU on behalf of which we make the check
287 @param node: the node to check
288 @param os_name: the OS to query about
289 @param force_variant: whether to ignore variant errors
290 @raise errors.OpPrereqError: if the node is not supporting the OS
293 result = lu.rpc.call_os_get(node, os_name)
294 result.Raise("OS '%s' not in supported OS list for node %s" %
296 prereq=True, ecode=errors.ECODE_INVAL)
297 if not force_variant:
298 _CheckOSVariant(result.payload, os_name)
301 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
302 """Ensure that a node has the given secondary ip.
304 @type lu: L{LogicalUnit}
305 @param lu: the LU on behalf of which we make the check
307 @param node: the node to check
308 @type secondary_ip: string
309 @param secondary_ip: the ip to check
310 @type prereq: boolean
311 @param prereq: whether to throw a prerequisite or an execute error
312 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
313 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
316 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
317 result.Raise("Failure checking secondary ip on node %s" % node,
318 prereq=prereq, ecode=errors.ECODE_ENVIRON)
319 if not result.payload:
320 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
321 " please fix and re-run this command" % secondary_ip)
323 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
325 raise errors.OpExecError(msg)
328 def _GetClusterDomainSecret():
329 """Reads the cluster domain secret.
332 return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
336 def _CheckInstanceState(lu, instance, req_states, msg=None):
337 """Ensure that an instance is in one of the required states.
339 @param lu: the LU on behalf of which we make the check
340 @param instance: the instance to check
341 @param msg: if passed, should be a message to replace the default one
342 @raise errors.OpPrereqError: if the instance is not in the required state
346 msg = ("can't use instance from outside %s states" %
347 utils.CommaJoin(req_states))
348 if instance.admin_state not in req_states:
349 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
350 (instance.name, instance.admin_state, msg),
353 if constants.ADMINST_UP not in req_states:
354 pnode = instance.primary_node
355 if not lu.cfg.GetNodeInfo(pnode).offline:
356 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
357 ins_l.Raise("Can't contact node %s for instance information" % pnode,
358 prereq=True, ecode=errors.ECODE_ENVIRON)
359 if instance.name in ins_l.payload:
360 raise errors.OpPrereqError("Instance %s is running, %s" %
361 (instance.name, msg), errors.ECODE_STATE)
363 lu.LogWarning("Primary node offline, ignoring check that instance"
367 def _ComputeIPolicyInstanceSpecViolation(
368 ipolicy, instance_spec, disk_template,
369 _compute_fn=_ComputeIPolicySpecViolation):
370 """Compute if instance specs meets the specs of ipolicy.
373 @param ipolicy: The ipolicy to verify against
374 @param instance_spec: dict
375 @param instance_spec: The instance spec to verify
376 @type disk_template: string
377 @param disk_template: the disk template of the instance
378 @param _compute_fn: The function to verify ipolicy (unittest only)
379 @see: L{_ComputeIPolicySpecViolation}
382 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
383 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
384 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
385 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
386 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
387 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
389 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
390 disk_sizes, spindle_use, disk_template)
393 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
395 _compute_fn=_ComputeIPolicyInstanceViolation):
396 """Compute if instance meets the specs of the new target group.
398 @param ipolicy: The ipolicy to verify
399 @param instance: The instance object to verify
400 @param current_group: The current group of the instance
401 @param target_group: The new group of the instance
402 @type cfg: L{config.ConfigWriter}
403 @param cfg: Cluster configuration
404 @param _compute_fn: The function to verify ipolicy (unittest only)
405 @see: L{_ComputeIPolicySpecViolation}
408 if current_group == target_group:
411 return _compute_fn(ipolicy, instance, cfg)
414 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, cfg, ignore=False,
415 _compute_fn=_ComputeIPolicyNodeViolation):
416 """Checks that the target node is correct in terms of instance policy.
418 @param ipolicy: The ipolicy to verify
419 @param instance: The instance object to verify
420 @param node: The new node to relocate
421 @type cfg: L{config.ConfigWriter}
422 @param cfg: Cluster configuration
423 @param ignore: Ignore violations of the ipolicy
424 @param _compute_fn: The function to verify ipolicy (unittest only)
425 @see: L{_ComputeIPolicySpecViolation}
428 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
429 res = _compute_fn(ipolicy, instance, primary_node.group, node.group, cfg)
432 msg = ("Instance does not meet target node group's (%s) instance"
433 " policy: %s") % (node.group, utils.CommaJoin(res))
437 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
440 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
441 minmem, maxmem, vcpus, nics, disk_template, disks,
442 bep, hvp, hypervisor_name, tags):
443 """Builds instance related env variables for hooks
445 This builds the hook environment from individual variables.
448 @param name: the name of the instance
449 @type primary_node: string
450 @param primary_node: the name of the instance's primary node
451 @type secondary_nodes: list
452 @param secondary_nodes: list of secondary nodes as strings
453 @type os_type: string
454 @param os_type: the name of the instance's OS
456 @param status: the desired status of the instance
458 @param minmem: the minimum memory size of the instance
460 @param maxmem: the maximum memory size of the instance
462 @param vcpus: the count of VCPUs the instance has
464 @param nics: list of tuples (name, uuid, ip, mac, mode, link, net, netinfo)
465 representing the NICs the instance has
466 @type disk_template: string
467 @param disk_template: the disk template of the instance
469 @param disks: list of tuples (name, uuid, size, mode)
471 @param bep: the backend parameters for the instance
473 @param hvp: the hypervisor parameters for the instance
474 @type hypervisor_name: string
475 @param hypervisor_name: the hypervisor for the instance
477 @param tags: list of instance tags as strings
479 @return: the hook environment for this instance
484 "INSTANCE_NAME": name,
485 "INSTANCE_PRIMARY": primary_node,
486 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
487 "INSTANCE_OS_TYPE": os_type,
488 "INSTANCE_STATUS": status,
489 "INSTANCE_MINMEM": minmem,
490 "INSTANCE_MAXMEM": maxmem,
491 # TODO(2.9) remove deprecated "memory" value
492 "INSTANCE_MEMORY": maxmem,
493 "INSTANCE_VCPUS": vcpus,
494 "INSTANCE_DISK_TEMPLATE": disk_template,
495 "INSTANCE_HYPERVISOR": hypervisor_name,
498 nic_count = len(nics)
499 for idx, (name, _, ip, mac, mode, link, net, netinfo) in enumerate(nics):
502 env["INSTANCE_NIC%d_NAME" % idx] = name
503 env["INSTANCE_NIC%d_IP" % idx] = ip
504 env["INSTANCE_NIC%d_MAC" % idx] = mac
505 env["INSTANCE_NIC%d_MODE" % idx] = mode
506 env["INSTANCE_NIC%d_LINK" % idx] = link
508 nobj = objects.Network.FromDict(netinfo)
509 env.update(nobj.HooksDict("INSTANCE_NIC%d_" % idx))
511 # FIXME: broken network reference: the instance NIC specifies a
512 # network, but the relevant network entry was not in the config. This
513 # should be made impossible.
514 env["INSTANCE_NIC%d_NETWORK_NAME" % idx] = net
515 if mode == constants.NIC_MODE_BRIDGED:
516 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
520 env["INSTANCE_NIC_COUNT"] = nic_count
523 disk_count = len(disks)
524 for idx, (name, size, mode) in enumerate(disks):
525 env["INSTANCE_DISK%d_NAME" % idx] = name
526 env["INSTANCE_DISK%d_SIZE" % idx] = size
527 env["INSTANCE_DISK%d_MODE" % idx] = mode
531 env["INSTANCE_DISK_COUNT"] = disk_count
536 env["INSTANCE_TAGS"] = " ".join(tags)
538 for source, kind in [(bep, "BE"), (hvp, "HV")]:
539 for key, value in source.items():
540 env["INSTANCE_%s_%s" % (kind, key)] = value
545 def _NICToTuple(lu, nic):
546 """Build a tupple of nic information.
548 @type lu: L{LogicalUnit}
549 @param lu: the logical unit on whose behalf we execute
550 @type nic: L{objects.NIC}
551 @param nic: nic to convert to hooks tuple
554 cluster = lu.cfg.GetClusterInfo()
555 filled_params = cluster.SimpleFillNIC(nic.nicparams)
556 mode = filled_params[constants.NIC_MODE]
557 link = filled_params[constants.NIC_LINK]
560 nobj = lu.cfg.GetNetwork(nic.network)
561 netinfo = objects.Network.ToDict(nobj)
562 return (nic.name, nic.uuid, nic.ip, nic.mac, mode, link, nic.network, netinfo)
565 def _NICListToTuple(lu, nics):
566 """Build a list of nic information tuples.
568 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
569 value in LUInstanceQueryData.
571 @type lu: L{LogicalUnit}
572 @param lu: the logical unit on whose behalf we execute
573 @type nics: list of L{objects.NIC}
574 @param nics: list of nics to convert to hooks tuples
579 hooks_nics.append(_NICToTuple(lu, nic))
583 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
584 """Builds instance related env variables for hooks from an object.
586 @type lu: L{LogicalUnit}
587 @param lu: the logical unit on whose behalf we execute
588 @type instance: L{objects.Instance}
589 @param instance: the instance for which we should build the
592 @param override: dictionary with key/values that will override
595 @return: the hook environment dictionary
598 cluster = lu.cfg.GetClusterInfo()
599 bep = cluster.FillBE(instance)
600 hvp = cluster.FillHV(instance)
602 "name": instance.name,
603 "primary_node": instance.primary_node,
604 "secondary_nodes": instance.secondary_nodes,
605 "os_type": instance.os,
606 "status": instance.admin_state,
607 "maxmem": bep[constants.BE_MAXMEM],
608 "minmem": bep[constants.BE_MINMEM],
609 "vcpus": bep[constants.BE_VCPUS],
610 "nics": _NICListToTuple(lu, instance.nics),
611 "disk_template": instance.disk_template,
612 "disks": [(disk.name, disk.size, disk.mode)
613 for disk in instance.disks],
616 "hypervisor_name": instance.hypervisor,
617 "tags": instance.tags,
620 args.update(override)
621 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
624 def _DecideSelfPromotion(lu, exceptions=None):
625 """Decide whether I should promote myself as a master candidate.
628 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
629 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
630 # the new node will increase mc_max with one, so:
631 mc_should = min(mc_should + 1, cp_size)
632 return mc_now < mc_should
635 def _CheckNicsBridgesExist(lu, target_nics, target_node):
636 """Check that the brigdes needed by a list of nics exist.
639 cluster = lu.cfg.GetClusterInfo()
640 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
641 brlist = [params[constants.NIC_LINK] for params in paramslist
642 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
644 result = lu.rpc.call_bridges_exist(target_node, brlist)
645 result.Raise("Error checking bridges on destination node '%s'" %
646 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
649 def _CheckInstanceBridgesExist(lu, instance, node=None):
650 """Check that the brigdes needed by an instance exist.
654 node = instance.primary_node
655 _CheckNicsBridgesExist(lu, instance.nics, node)
658 def _CheckOSVariant(os_obj, name):
659 """Check whether an OS name conforms to the os variants specification.
661 @type os_obj: L{objects.OS}
662 @param os_obj: OS object to check
664 @param name: OS name passed by the user, to check for validity
667 variant = objects.OS.GetVariant(name)
668 if not os_obj.supported_variants:
670 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
671 " passed)" % (os_obj.name, variant),
675 raise errors.OpPrereqError("OS name must include a variant",
678 if variant not in os_obj.supported_variants:
679 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
682 def _GetNodeInstancesInner(cfg, fn):
683 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
686 def _GetNodeInstances(cfg, node_name):
687 """Returns a list of all primary and secondary instances on a node.
691 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
694 def _GetNodePrimaryInstances(cfg, node_name):
695 """Returns primary instances on a node.
698 return _GetNodeInstancesInner(cfg,
699 lambda inst: node_name == inst.primary_node)
702 def _GetNodeSecondaryInstances(cfg, node_name):
703 """Returns secondary instances on a node.
706 return _GetNodeInstancesInner(cfg,
707 lambda inst: node_name in inst.secondary_nodes)
710 def _GetStorageTypeArgs(cfg, storage_type):
711 """Returns the arguments for a storage type.
714 # Special case for file storage
715 if storage_type == constants.ST_FILE:
716 # storage.FileStorage wants a list of storage directories
717 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
722 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
725 for dev in instance.disks:
726 cfg.SetDiskID(dev, node_name)
728 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
730 result.Raise("Failed to get disk status from node %s" % node_name,
731 prereq=prereq, ecode=errors.ECODE_ENVIRON)
733 for idx, bdev_status in enumerate(result.payload):
734 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
740 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
741 """Check the sanity of iallocator and node arguments and use the
742 cluster-wide iallocator if appropriate.
744 Check that at most one of (iallocator, node) is specified. If none is
745 specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
746 then the LU's opcode's iallocator slot is filled with the cluster-wide
749 @type iallocator_slot: string
750 @param iallocator_slot: the name of the opcode iallocator slot
751 @type node_slot: string
752 @param node_slot: the name of the opcode target node slot
755 node = getattr(lu.op, node_slot, None)
756 ialloc = getattr(lu.op, iallocator_slot, None)
760 if node is not None and ialloc is not None:
761 raise errors.OpPrereqError("Do not specify both, iallocator and node",
763 elif ((node is None and ialloc is None) or
764 ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
765 default_iallocator = lu.cfg.GetDefaultIAllocator()
766 if default_iallocator:
767 setattr(lu.op, iallocator_slot, default_iallocator)
769 raise errors.OpPrereqError("No iallocator or node given and no"
770 " cluster-wide default iallocator found;"
771 " please specify either an iallocator or a"
772 " node, or set a cluster-wide default"
773 " iallocator", errors.ECODE_INVAL)
776 def _CheckHostnameSane(lu, name):
777 """Ensures that a given hostname resolves to a 'sane' name.
779 The given name is required to be a prefix of the resolved hostname,
780 to prevent accidental mismatches.
782 @param lu: the logical unit on behalf of which we're checking
783 @param name: the name we should resolve and check
784 @return: the resolved hostname object
787 hostname = netutils.GetHostname(name=name)
788 if hostname.name != name:
789 lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
790 if not utils.MatchNameComponent(name, [hostname.name]):
791 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
792 " same as given hostname '%s'") %
793 (hostname.name, name), errors.ECODE_INVAL)
797 def _WaitForSync(lu, instance, disks=None, oneshot=False):
798 """Sleep and poll for an instance's disk to sync.
801 if not instance.disks or disks is not None and not disks:
804 disks = _ExpandCheckDisks(instance, disks)
807 lu.LogInfo("Waiting for instance %s to sync disks", instance.name)
809 node = instance.primary_node
812 lu.cfg.SetDiskID(dev, node)
814 # TODO: Convert to utils.Retry
817 degr_retries = 10 # in seconds, as we sleep 1 second each time
821 cumul_degraded = False
822 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
823 msg = rstats.fail_msg
825 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
828 raise errors.RemoteError("Can't contact node %s for mirror data,"
832 rstats = rstats.payload
834 for i, mstat in enumerate(rstats):
836 lu.LogWarning("Can't compute data for node %s/%s",
837 node, disks[i].iv_name)
840 cumul_degraded = (cumul_degraded or
841 (mstat.is_degraded and mstat.sync_percent is None))
842 if mstat.sync_percent is not None:
844 if mstat.estimated_time is not None:
845 rem_time = ("%s remaining (estimated)" %
846 utils.FormatSeconds(mstat.estimated_time))
847 max_time = mstat.estimated_time
849 rem_time = "no time estimate"
850 lu.LogInfo("- device %s: %5.2f%% done, %s",
851 disks[i].iv_name, mstat.sync_percent, rem_time)
853 # if we're done but degraded, let's do a few small retries, to
854 # make sure we see a stable and not transient situation; therefore
855 # we force restart of the loop
856 if (done or oneshot) and cumul_degraded and degr_retries > 0:
857 logging.info("Degraded disks found, %d retries left", degr_retries)
865 time.sleep(min(60, max_time))
868 lu.LogInfo("Instance %s's disks are in sync", instance.name)
870 return not cumul_degraded
873 def _BlockdevFind(lu, node, dev, instance):
874 """Wrapper around call_blockdev_find to annotate diskparams.
876 @param lu: A reference to the lu object
877 @param node: The node to call out
878 @param dev: The device to find
879 @param instance: The instance object the device belongs to
880 @returns The result of the rpc call
883 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
884 return lu.rpc.call_blockdev_find(node, disk)
887 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
888 """Wrapper around L{_CheckDiskConsistencyInner}.
891 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
892 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
896 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
898 """Check that mirrors are not degraded.
900 @attention: The device has to be annotated already.
902 The ldisk parameter, if True, will change the test from the
903 is_degraded attribute (which represents overall non-ok status for
904 the device(s)) to the ldisk (representing the local storage status).
907 lu.cfg.SetDiskID(dev, node)
911 if on_primary or dev.AssembleOnSecondary():
912 rstats = lu.rpc.call_blockdev_find(node, dev)
913 msg = rstats.fail_msg
915 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
917 elif not rstats.payload:
918 lu.LogWarning("Can't find disk on node %s", node)
922 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
924 result = result and not rstats.payload.is_degraded
927 for child in dev.children:
928 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
934 class LUOobCommand(NoHooksLU):
935 """Logical unit for OOB handling.
939 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
941 def ExpandNames(self):
942 """Gather locks we need.
945 if self.op.node_names:
946 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
947 lock_names = self.op.node_names
949 lock_names = locking.ALL_SET
951 self.needed_locks = {
952 locking.LEVEL_NODE: lock_names,
955 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
957 if not self.op.node_names:
958 # Acquire node allocation lock only if all nodes are affected
959 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
961 def CheckPrereq(self):
962 """Check prerequisites.
965 - the node exists in the configuration
968 Any errors are signaled by raising errors.OpPrereqError.
972 self.master_node = self.cfg.GetMasterNode()
974 assert self.op.power_delay >= 0.0
976 if self.op.node_names:
977 if (self.op.command in self._SKIP_MASTER and
978 self.master_node in self.op.node_names):
979 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
980 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
982 if master_oob_handler:
983 additional_text = ("run '%s %s %s' if you want to operate on the"
984 " master regardless") % (master_oob_handler,
988 additional_text = "it does not support out-of-band operations"
990 raise errors.OpPrereqError(("Operating on the master node %s is not"
991 " allowed for %s; %s") %
992 (self.master_node, self.op.command,
993 additional_text), errors.ECODE_INVAL)
995 self.op.node_names = self.cfg.GetNodeList()
996 if self.op.command in self._SKIP_MASTER:
997 self.op.node_names.remove(self.master_node)
999 if self.op.command in self._SKIP_MASTER:
1000 assert self.master_node not in self.op.node_names
1002 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
1004 raise errors.OpPrereqError("Node %s not found" % node_name,
1007 self.nodes.append(node)
1009 if (not self.op.ignore_status and
1010 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
1011 raise errors.OpPrereqError(("Cannot power off node %s because it is"
1012 " not marked offline") % node_name,
1015 def Exec(self, feedback_fn):
1016 """Execute OOB and return result if we expect any.
1019 master_node = self.master_node
1022 for idx, node in enumerate(utils.NiceSort(self.nodes,
1023 key=lambda node: node.name)):
1024 node_entry = [(constants.RS_NORMAL, node.name)]
1025 ret.append(node_entry)
1027 oob_program = _SupportsOob(self.cfg, node)
1030 node_entry.append((constants.RS_UNAVAIL, None))
1033 logging.info("Executing out-of-band command '%s' using '%s' on %s",
1034 self.op.command, oob_program, node.name)
1035 result = self.rpc.call_run_oob(master_node, oob_program,
1036 self.op.command, node.name,
1040 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
1041 node.name, result.fail_msg)
1042 node_entry.append((constants.RS_NODATA, None))
1045 self._CheckPayload(result)
1046 except errors.OpExecError, err:
1047 self.LogWarning("Payload returned by node '%s' is not valid: %s",
1049 node_entry.append((constants.RS_NODATA, None))
1051 if self.op.command == constants.OOB_HEALTH:
1052 # For health we should log important events
1053 for item, status in result.payload:
1054 if status in [constants.OOB_STATUS_WARNING,
1055 constants.OOB_STATUS_CRITICAL]:
1056 self.LogWarning("Item '%s' on node '%s' has status '%s'",
1057 item, node.name, status)
1059 if self.op.command == constants.OOB_POWER_ON:
1061 elif self.op.command == constants.OOB_POWER_OFF:
1062 node.powered = False
1063 elif self.op.command == constants.OOB_POWER_STATUS:
1064 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
1065 if powered != node.powered:
1066 logging.warning(("Recorded power state (%s) of node '%s' does not"
1067 " match actual power state (%s)"), node.powered,
1070 # For configuration changing commands we should update the node
1071 if self.op.command in (constants.OOB_POWER_ON,
1072 constants.OOB_POWER_OFF):
1073 self.cfg.Update(node, feedback_fn)
1075 node_entry.append((constants.RS_NORMAL, result.payload))
1077 if (self.op.command == constants.OOB_POWER_ON and
1078 idx < len(self.nodes) - 1):
1079 time.sleep(self.op.power_delay)
1083 def _CheckPayload(self, result):
1084 """Checks if the payload is valid.
1086 @param result: RPC result
1087 @raises errors.OpExecError: If payload is not valid
1091 if self.op.command == constants.OOB_HEALTH:
1092 if not isinstance(result.payload, list):
1093 errs.append("command 'health' is expected to return a list but got %s" %
1094 type(result.payload))
1096 for item, status in result.payload:
1097 if status not in constants.OOB_STATUSES:
1098 errs.append("health item '%s' has invalid status '%s'" %
1101 if self.op.command == constants.OOB_POWER_STATUS:
1102 if not isinstance(result.payload, dict):
1103 errs.append("power-status is expected to return a dict but got %s" %
1104 type(result.payload))
1106 if self.op.command in [
1107 constants.OOB_POWER_ON,
1108 constants.OOB_POWER_OFF,
1109 constants.OOB_POWER_CYCLE,
1111 if result.payload is not None:
1112 errs.append("%s is expected to not return payload but got '%s'" %
1113 (self.op.command, result.payload))
1116 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
1117 utils.CommaJoin(errs))
1120 class _OsQuery(_QueryBase):
1121 FIELDS = query.OS_FIELDS
1123 def ExpandNames(self, lu):
1124 # Lock all nodes in shared mode
1125 # Temporary removal of locks, should be reverted later
1126 # TODO: reintroduce locks when they are lighter-weight
1127 lu.needed_locks = {}
1128 #self.share_locks[locking.LEVEL_NODE] = 1
1129 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
1131 # The following variables interact with _QueryBase._GetNames
1133 self.wanted = self.names
1135 self.wanted = locking.ALL_SET
1137 self.do_locking = self.use_locking
1139 def DeclareLocks(self, lu, level):
1143 def _DiagnoseByOS(rlist):
1144 """Remaps a per-node return list into an a per-os per-node dictionary
1146 @param rlist: a map with node names as keys and OS objects as values
1149 @return: a dictionary with osnames as keys and as value another
1150 map, with nodes as keys and tuples of (path, status, diagnose,
1151 variants, parameters, api_versions) as values, eg::
1153 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
1154 (/srv/..., False, "invalid api")],
1155 "node2": [(/srv/..., True, "", [], [])]}
1160 # we build here the list of nodes that didn't fail the RPC (at RPC
1161 # level), so that nodes with a non-responding node daemon don't
1162 # make all OSes invalid
1163 good_nodes = [node_name for node_name in rlist
1164 if not rlist[node_name].fail_msg]
1165 for node_name, nr in rlist.items():
1166 if nr.fail_msg or not nr.payload:
1168 for (name, path, status, diagnose, variants,
1169 params, api_versions) in nr.payload:
1170 if name not in all_os:
1171 # build a list of nodes for this os containing empty lists
1172 # for each node in node_list
1174 for nname in good_nodes:
1175 all_os[name][nname] = []
1176 # convert params from [name, help] to (name, help)
1177 params = [tuple(v) for v in params]
1178 all_os[name][node_name].append((path, status, diagnose,
1179 variants, params, api_versions))
1182 def _GetQueryData(self, lu):
1183 """Computes the list of nodes and their attributes.
1186 # Locking is not used
1187 assert not (compat.any(lu.glm.is_owned(level)
1188 for level in locking.LEVELS
1189 if level != locking.LEVEL_CLUSTER) or
1190 self.do_locking or self.use_locking)
1192 valid_nodes = [node.name
1193 for node in lu.cfg.GetAllNodesInfo().values()
1194 if not node.offline and node.vm_capable]
1195 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
1196 cluster = lu.cfg.GetClusterInfo()
1200 for (os_name, os_data) in pol.items():
1201 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
1202 hidden=(os_name in cluster.hidden_os),
1203 blacklisted=(os_name in cluster.blacklisted_os))
1207 api_versions = set()
1209 for idx, osl in enumerate(os_data.values()):
1210 info.valid = bool(info.valid and osl and osl[0][1])
1214 (node_variants, node_params, node_api) = osl[0][3:6]
1217 variants.update(node_variants)
1218 parameters.update(node_params)
1219 api_versions.update(node_api)
1221 # Filter out inconsistent values
1222 variants.intersection_update(node_variants)
1223 parameters.intersection_update(node_params)
1224 api_versions.intersection_update(node_api)
1226 info.variants = list(variants)
1227 info.parameters = list(parameters)
1228 info.api_versions = list(api_versions)
1230 data[os_name] = info
1232 # Prepare data in requested order
1233 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
1237 class LUOsDiagnose(NoHooksLU):
1238 """Logical unit for OS diagnose/query.
1244 def _BuildFilter(fields, names):
1245 """Builds a filter for querying OSes.
1248 name_filter = qlang.MakeSimpleFilter("name", names)
1250 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
1251 # respective field is not requested
1252 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
1253 for fname in ["hidden", "blacklisted"]
1254 if fname not in fields]
1255 if "valid" not in fields:
1256 status_filter.append([qlang.OP_TRUE, "valid"])
1259 status_filter.insert(0, qlang.OP_AND)
1261 status_filter = None
1263 if name_filter and status_filter:
1264 return [qlang.OP_AND, name_filter, status_filter]
1268 return status_filter
1270 def CheckArguments(self):
1271 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
1272 self.op.output_fields, False)
1274 def ExpandNames(self):
1275 self.oq.ExpandNames(self)
1277 def Exec(self, feedback_fn):
1278 return self.oq.OldStyleQuery(self)
1281 class _ExtStorageQuery(_QueryBase):
1282 FIELDS = query.EXTSTORAGE_FIELDS
1284 def ExpandNames(self, lu):
1285 # Lock all nodes in shared mode
1286 # Temporary removal of locks, should be reverted later
1287 # TODO: reintroduce locks when they are lighter-weight
1288 lu.needed_locks = {}
1289 #self.share_locks[locking.LEVEL_NODE] = 1
1290 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
1292 # The following variables interact with _QueryBase._GetNames
1294 self.wanted = self.names
1296 self.wanted = locking.ALL_SET
1298 self.do_locking = self.use_locking
1300 def DeclareLocks(self, lu, level):
1304 def _DiagnoseByProvider(rlist):
1305 """Remaps a per-node return list into an a per-provider per-node dictionary
1307 @param rlist: a map with node names as keys and ExtStorage objects as values
1310 @return: a dictionary with extstorage providers as keys and as
1311 value another map, with nodes as keys and tuples of
1312 (path, status, diagnose, parameters) as values, eg::
1314 {"provider1": {"node1": [(/usr/lib/..., True, "", [])]
1315 "node2": [(/srv/..., False, "missing file")]
1316 "node3": [(/srv/..., True, "", [])]
1321 # we build here the list of nodes that didn't fail the RPC (at RPC
1322 # level), so that nodes with a non-responding node daemon don't
1323 # make all OSes invalid
1324 good_nodes = [node_name for node_name in rlist
1325 if not rlist[node_name].fail_msg]
1326 for node_name, nr in rlist.items():
1327 if nr.fail_msg or not nr.payload:
1329 for (name, path, status, diagnose, params) in nr.payload:
1330 if name not in all_es:
1331 # build a list of nodes for this os containing empty lists
1332 # for each node in node_list
1334 for nname in good_nodes:
1335 all_es[name][nname] = []
1336 # convert params from [name, help] to (name, help)
1337 params = [tuple(v) for v in params]
1338 all_es[name][node_name].append((path, status, diagnose, params))
1341 def _GetQueryData(self, lu):
1342 """Computes the list of nodes and their attributes.
1345 # Locking is not used
1346 assert not (compat.any(lu.glm.is_owned(level)
1347 for level in locking.LEVELS
1348 if level != locking.LEVEL_CLUSTER) or
1349 self.do_locking or self.use_locking)
1351 valid_nodes = [node.name
1352 for node in lu.cfg.GetAllNodesInfo().values()
1353 if not node.offline and node.vm_capable]
1354 pol = self._DiagnoseByProvider(lu.rpc.call_extstorage_diagnose(valid_nodes))
1358 nodegroup_list = lu.cfg.GetNodeGroupList()
1360 for (es_name, es_data) in pol.items():
1361 # For every provider compute the nodegroup validity.
1362 # To do this we need to check the validity of each node in es_data
1363 # and then construct the corresponding nodegroup dict:
1364 # { nodegroup1: status
1365 # nodegroup2: status
1368 for nodegroup in nodegroup_list:
1369 ndgrp = lu.cfg.GetNodeGroup(nodegroup)
1371 nodegroup_nodes = ndgrp.members
1372 nodegroup_name = ndgrp.name
1375 for node in nodegroup_nodes:
1376 if node in valid_nodes:
1377 if es_data[node] != []:
1378 node_status = es_data[node][0][1]
1379 node_statuses.append(node_status)
1381 node_statuses.append(False)
1383 if False in node_statuses:
1384 ndgrp_data[nodegroup_name] = False
1386 ndgrp_data[nodegroup_name] = True
1388 # Compute the provider's parameters
1390 for idx, esl in enumerate(es_data.values()):
1391 valid = bool(esl and esl[0][1])
1395 node_params = esl[0][3]
1398 parameters.update(node_params)
1400 # Filter out inconsistent values
1401 parameters.intersection_update(node_params)
1403 params = list(parameters)
1405 # Now fill all the info for this provider
1406 info = query.ExtStorageInfo(name=es_name, node_status=es_data,
1407 nodegroup_status=ndgrp_data,
1410 data[es_name] = info
1412 # Prepare data in requested order
1413 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
1417 class LUExtStorageDiagnose(NoHooksLU):
1418 """Logical unit for ExtStorage diagnose/query.
1423 def CheckArguments(self):
1424 self.eq = _ExtStorageQuery(qlang.MakeSimpleFilter("name", self.op.names),
1425 self.op.output_fields, False)
1427 def ExpandNames(self):
1428 self.eq.ExpandNames(self)
1430 def Exec(self, feedback_fn):
1431 return self.eq.OldStyleQuery(self)
1434 class LUNodeRemove(LogicalUnit):
1435 """Logical unit for removing a node.
1438 HPATH = "node-remove"
1439 HTYPE = constants.HTYPE_NODE
1441 def BuildHooksEnv(self):
1446 "OP_TARGET": self.op.node_name,
1447 "NODE_NAME": self.op.node_name,
1450 def BuildHooksNodes(self):
1451 """Build hooks nodes.
1453 This doesn't run on the target node in the pre phase as a failed
1454 node would then be impossible to remove.
1457 all_nodes = self.cfg.GetNodeList()
1459 all_nodes.remove(self.op.node_name)
1462 return (all_nodes, all_nodes)
1464 def CheckPrereq(self):
1465 """Check prerequisites.
1468 - the node exists in the configuration
1469 - it does not have primary or secondary instances
1470 - it's not the master
1472 Any errors are signaled by raising errors.OpPrereqError.
1475 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
1476 node = self.cfg.GetNodeInfo(self.op.node_name)
1477 assert node is not None
1479 masternode = self.cfg.GetMasterNode()
1480 if node.name == masternode:
1481 raise errors.OpPrereqError("Node is the master node, failover to another"
1482 " node is required", errors.ECODE_INVAL)
1484 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
1485 if node.name in instance.all_nodes:
1486 raise errors.OpPrereqError("Instance %s is still running on the node,"
1487 " please remove first" % instance_name,
1489 self.op.node_name = node.name
1492 def Exec(self, feedback_fn):
1493 """Removes the node from the cluster.
1497 logging.info("Stopping the node daemon and removing configs from node %s",
1500 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1502 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
1505 # Promote nodes to master candidate as needed
1506 _AdjustCandidatePool(self, exceptions=[node.name])
1507 self.context.RemoveNode(node.name)
1509 # Run post hooks on the node before it's removed
1510 _RunPostHook(self, node.name)
1512 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
1513 msg = result.fail_msg
1515 self.LogWarning("Errors encountered on the remote node while leaving"
1516 " the cluster: %s", msg)
1518 # Remove node from our /etc/hosts
1519 if self.cfg.GetClusterInfo().modify_etc_hosts:
1520 master_node = self.cfg.GetMasterNode()
1521 result = self.rpc.call_etc_hosts_modify(master_node,
1522 constants.ETC_HOSTS_REMOVE,
1524 result.Raise("Can't update hosts file with new host data")
1525 _RedistributeAncillaryFiles(self)
1528 class _NodeQuery(_QueryBase):
1529 FIELDS = query.NODE_FIELDS
1531 def ExpandNames(self, lu):
1532 lu.needed_locks = {}
1533 lu.share_locks = _ShareAll()
1536 self.wanted = _GetWantedNodes(lu, self.names)
1538 self.wanted = locking.ALL_SET
1540 self.do_locking = (self.use_locking and
1541 query.NQ_LIVE in self.requested_data)
1544 # If any non-static field is requested we need to lock the nodes
1545 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
1546 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
1548 def DeclareLocks(self, lu, level):
1551 def _GetQueryData(self, lu):
1552 """Computes the list of nodes and their attributes.
1555 all_info = lu.cfg.GetAllNodesInfo()
1557 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
1559 # Gather data as requested
1560 if query.NQ_LIVE in self.requested_data:
1561 # filter out non-vm_capable nodes
1562 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
1564 es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, toquery_nodes)
1565 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
1566 [lu.cfg.GetHypervisorType()], es_flags)
1567 live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
1568 for (name, nresult) in node_data.items()
1569 if not nresult.fail_msg and nresult.payload)
1573 if query.NQ_INST in self.requested_data:
1574 node_to_primary = dict([(name, set()) for name in nodenames])
1575 node_to_secondary = dict([(name, set()) for name in nodenames])
1577 inst_data = lu.cfg.GetAllInstancesInfo()
1579 for inst in inst_data.values():
1580 if inst.primary_node in node_to_primary:
1581 node_to_primary[inst.primary_node].add(inst.name)
1582 for secnode in inst.secondary_nodes:
1583 if secnode in node_to_secondary:
1584 node_to_secondary[secnode].add(inst.name)
1586 node_to_primary = None
1587 node_to_secondary = None
1589 if query.NQ_OOB in self.requested_data:
1590 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
1591 for name, node in all_info.iteritems())
1595 if query.NQ_GROUP in self.requested_data:
1596 groups = lu.cfg.GetAllNodeGroupsInfo()
1600 return query.NodeQueryData([all_info[name] for name in nodenames],
1601 live_data, lu.cfg.GetMasterNode(),
1602 node_to_primary, node_to_secondary, groups,
1603 oob_support, lu.cfg.GetClusterInfo())
1606 class LUNodeQuery(NoHooksLU):
1607 """Logical unit for querying nodes.
1610 # pylint: disable=W0142
1613 def CheckArguments(self):
1614 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
1615 self.op.output_fields, self.op.use_locking)
1617 def ExpandNames(self):
1618 self.nq.ExpandNames(self)
1620 def DeclareLocks(self, level):
1621 self.nq.DeclareLocks(self, level)
1623 def Exec(self, feedback_fn):
1624 return self.nq.OldStyleQuery(self)
1627 class LUNodeQueryvols(NoHooksLU):
1628 """Logical unit for getting volumes on node(s).
1632 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
1633 _FIELDS_STATIC = utils.FieldSet("node")
1635 def CheckArguments(self):
1636 _CheckOutputFields(static=self._FIELDS_STATIC,
1637 dynamic=self._FIELDS_DYNAMIC,
1638 selected=self.op.output_fields)
1640 def ExpandNames(self):
1641 self.share_locks = _ShareAll()
1644 self.needed_locks = {
1645 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
1648 self.needed_locks = {
1649 locking.LEVEL_NODE: locking.ALL_SET,
1650 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
1653 def Exec(self, feedback_fn):
1654 """Computes the list of nodes and their attributes.
1657 nodenames = self.owned_locks(locking.LEVEL_NODE)
1658 volumes = self.rpc.call_node_volumes(nodenames)
1660 ilist = self.cfg.GetAllInstancesInfo()
1661 vol2inst = _MapInstanceDisksToNodes(ilist.values())
1664 for node in nodenames:
1665 nresult = volumes[node]
1668 msg = nresult.fail_msg
1670 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
1673 node_vols = sorted(nresult.payload,
1674 key=operator.itemgetter("dev"))
1676 for vol in node_vols:
1678 for field in self.op.output_fields:
1681 elif field == "phys":
1685 elif field == "name":
1687 elif field == "size":
1688 val = int(float(vol["size"]))
1689 elif field == "instance":
1690 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
1692 raise errors.ParameterError(field)
1693 node_output.append(str(val))
1695 output.append(node_output)
1700 class LUNodeQueryStorage(NoHooksLU):
1701 """Logical unit for getting information on storage units on node(s).
1704 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
1707 def CheckArguments(self):
1708 _CheckOutputFields(static=self._FIELDS_STATIC,
1709 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
1710 selected=self.op.output_fields)
1712 def ExpandNames(self):
1713 self.share_locks = _ShareAll()
1716 self.needed_locks = {
1717 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
1720 self.needed_locks = {
1721 locking.LEVEL_NODE: locking.ALL_SET,
1722 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
1725 def Exec(self, feedback_fn):
1726 """Computes the list of nodes and their attributes.
1729 self.nodes = self.owned_locks(locking.LEVEL_NODE)
1731 # Always get name to sort by
1732 if constants.SF_NAME in self.op.output_fields:
1733 fields = self.op.output_fields[:]
1735 fields = [constants.SF_NAME] + self.op.output_fields
1737 # Never ask for node or type as it's only known to the LU
1738 for extra in [constants.SF_NODE, constants.SF_TYPE]:
1739 while extra in fields:
1740 fields.remove(extra)
1742 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
1743 name_idx = field_idx[constants.SF_NAME]
1745 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
1746 data = self.rpc.call_storage_list(self.nodes,
1747 self.op.storage_type, st_args,
1748 self.op.name, fields)
1752 for node in utils.NiceSort(self.nodes):
1753 nresult = data[node]
1757 msg = nresult.fail_msg
1759 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
1762 rows = dict([(row[name_idx], row) for row in nresult.payload])
1764 for name in utils.NiceSort(rows.keys()):
1769 for field in self.op.output_fields:
1770 if field == constants.SF_NODE:
1772 elif field == constants.SF_TYPE:
1773 val = self.op.storage_type
1774 elif field in field_idx:
1775 val = row[field_idx[field]]
1777 raise errors.ParameterError(field)
1786 class _InstanceQuery(_QueryBase):
1787 FIELDS = query.INSTANCE_FIELDS
1789 def ExpandNames(self, lu):
1790 lu.needed_locks = {}
1791 lu.share_locks = _ShareAll()
1794 self.wanted = _GetWantedInstances(lu, self.names)
1796 self.wanted = locking.ALL_SET
1798 self.do_locking = (self.use_locking and
1799 query.IQ_LIVE in self.requested_data)
1801 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
1802 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
1803 lu.needed_locks[locking.LEVEL_NODE] = []
1804 lu.needed_locks[locking.LEVEL_NETWORK] = []
1805 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
1807 self.do_grouplocks = (self.do_locking and
1808 query.IQ_NODES in self.requested_data)
1810 def DeclareLocks(self, lu, level):
1812 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
1813 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
1815 # Lock all groups used by instances optimistically; this requires going
1816 # via the node before it's locked, requiring verification later on
1817 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
1819 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
1820 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
1821 elif level == locking.LEVEL_NODE:
1822 lu._LockInstancesNodes() # pylint: disable=W0212
1824 elif level == locking.LEVEL_NETWORK:
1825 lu.needed_locks[locking.LEVEL_NETWORK] = \
1827 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
1828 for net_uuid in lu.cfg.GetInstanceNetworks(instance_name))
1831 def _CheckGroupLocks(lu):
1832 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
1833 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
1835 # Check if node groups for locked instances are still correct
1836 for instance_name in owned_instances:
1837 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
1839 def _GetQueryData(self, lu):
1840 """Computes the list of instances and their attributes.
1843 if self.do_grouplocks:
1844 self._CheckGroupLocks(lu)
1846 cluster = lu.cfg.GetClusterInfo()
1847 all_info = lu.cfg.GetAllInstancesInfo()
1849 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
1851 instance_list = [all_info[name] for name in instance_names]
1852 nodes = frozenset(itertools.chain(*(inst.all_nodes
1853 for inst in instance_list)))
1854 hv_list = list(set([inst.hypervisor for inst in instance_list]))
1857 wrongnode_inst = set()
1859 # Gather data as requested
1860 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
1862 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
1864 result = node_data[name]
1866 # offline nodes will be in both lists
1867 assert result.fail_msg
1868 offline_nodes.append(name)
1870 bad_nodes.append(name)
1871 elif result.payload:
1872 for inst in result.payload:
1873 if inst in all_info:
1874 if all_info[inst].primary_node == name:
1875 live_data.update(result.payload)
1877 wrongnode_inst.add(inst)
1879 # orphan instance; we don't list it here as we don't
1880 # handle this case yet in the output of instance listing
1881 logging.warning("Orphan instance '%s' found on node %s",
1883 # else no instance is alive
1887 if query.IQ_DISKUSAGE in self.requested_data:
1888 gmi = ganeti.masterd.instance
1889 disk_usage = dict((inst.name,
1890 gmi.ComputeDiskSize(inst.disk_template,
1891 [{constants.IDISK_SIZE: disk.size}
1892 for disk in inst.disks]))
1893 for inst in instance_list)
1897 if query.IQ_CONSOLE in self.requested_data:
1899 for inst in instance_list:
1900 if inst.name in live_data:
1901 # Instance is running
1902 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
1904 consinfo[inst.name] = None
1905 assert set(consinfo.keys()) == set(instance_names)
1909 if query.IQ_NODES in self.requested_data:
1910 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
1912 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
1913 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
1914 for uuid in set(map(operator.attrgetter("group"),
1920 if query.IQ_NETWORKS in self.requested_data:
1921 net_uuids = itertools.chain(*(lu.cfg.GetInstanceNetworks(i.name)
1922 for i in instance_list))
1923 networks = dict((uuid, lu.cfg.GetNetwork(uuid)) for uuid in net_uuids)
1927 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
1928 disk_usage, offline_nodes, bad_nodes,
1929 live_data, wrongnode_inst, consinfo,
1930 nodes, groups, networks)
1933 class LUQuery(NoHooksLU):
1934 """Query for resources/items of a certain kind.
1937 # pylint: disable=W0142
1940 def CheckArguments(self):
1941 qcls = _GetQueryImplementation(self.op.what)
1943 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
1945 def ExpandNames(self):
1946 self.impl.ExpandNames(self)
1948 def DeclareLocks(self, level):
1949 self.impl.DeclareLocks(self, level)
1951 def Exec(self, feedback_fn):
1952 return self.impl.NewStyleQuery(self)
1955 class LUQueryFields(NoHooksLU):
1956 """Query for resources/items of a certain kind.
1959 # pylint: disable=W0142
1962 def CheckArguments(self):
1963 self.qcls = _GetQueryImplementation(self.op.what)
1965 def ExpandNames(self):
1966 self.needed_locks = {}
1968 def Exec(self, feedback_fn):
1969 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
1972 class LUNodeModifyStorage(NoHooksLU):
1973 """Logical unit for modifying a storage volume on a node.
1978 def CheckArguments(self):
1979 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
1981 storage_type = self.op.storage_type
1984 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
1986 raise errors.OpPrereqError("Storage units of type '%s' can not be"
1987 " modified" % storage_type,
1990 diff = set(self.op.changes.keys()) - modifiable
1992 raise errors.OpPrereqError("The following fields can not be modified for"
1993 " storage units of type '%s': %r" %
1994 (storage_type, list(diff)),
1997 def ExpandNames(self):
1998 self.needed_locks = {
1999 locking.LEVEL_NODE: self.op.node_name,
2002 def Exec(self, feedback_fn):
2003 """Computes the list of nodes and their attributes.
2006 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2007 result = self.rpc.call_storage_modify(self.op.node_name,
2008 self.op.storage_type, st_args,
2009 self.op.name, self.op.changes)
2010 result.Raise("Failed to modify storage unit '%s' on %s" %
2011 (self.op.name, self.op.node_name))
2014 class LUNodeAdd(LogicalUnit):
2015 """Logical unit for adding node to the cluster.
2019 HTYPE = constants.HTYPE_NODE
2020 _NFLAGS = ["master_capable", "vm_capable"]
2022 def CheckArguments(self):
2023 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
2024 # validate/normalize the node name
2025 self.hostname = netutils.GetHostname(name=self.op.node_name,
2026 family=self.primary_ip_family)
2027 self.op.node_name = self.hostname.name
2029 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
2030 raise errors.OpPrereqError("Cannot readd the master node",
2033 if self.op.readd and self.op.group:
2034 raise errors.OpPrereqError("Cannot pass a node group when a node is"
2035 " being readded", errors.ECODE_INVAL)
2037 def BuildHooksEnv(self):
2040 This will run on all nodes before, and on all nodes + the new node after.
2044 "OP_TARGET": self.op.node_name,
2045 "NODE_NAME": self.op.node_name,
2046 "NODE_PIP": self.op.primary_ip,
2047 "NODE_SIP": self.op.secondary_ip,
2048 "MASTER_CAPABLE": str(self.op.master_capable),
2049 "VM_CAPABLE": str(self.op.vm_capable),
2052 def BuildHooksNodes(self):
2053 """Build hooks nodes.
2056 # Exclude added node
2057 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
2058 post_nodes = pre_nodes + [self.op.node_name, ]
2060 return (pre_nodes, post_nodes)
2062 def CheckPrereq(self):
2063 """Check prerequisites.
2066 - the new node is not already in the config
2068 - its parameters (single/dual homed) matches the cluster
2070 Any errors are signaled by raising errors.OpPrereqError.
2074 hostname = self.hostname
2075 node = hostname.name
2076 primary_ip = self.op.primary_ip = hostname.ip
2077 if self.op.secondary_ip is None:
2078 if self.primary_ip_family == netutils.IP6Address.family:
2079 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
2080 " IPv4 address must be given as secondary",
2082 self.op.secondary_ip = primary_ip
2084 secondary_ip = self.op.secondary_ip
2085 if not netutils.IP4Address.IsValid(secondary_ip):
2086 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
2087 " address" % secondary_ip, errors.ECODE_INVAL)
2089 node_list = cfg.GetNodeList()
2090 if not self.op.readd and node in node_list:
2091 raise errors.OpPrereqError("Node %s is already in the configuration" %
2092 node, errors.ECODE_EXISTS)
2093 elif self.op.readd and node not in node_list:
2094 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
2097 self.changed_primary_ip = False
2099 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
2100 if self.op.readd and node == existing_node_name:
2101 if existing_node.secondary_ip != secondary_ip:
2102 raise errors.OpPrereqError("Readded node doesn't have the same IP"
2103 " address configuration as before",
2105 if existing_node.primary_ip != primary_ip:
2106 self.changed_primary_ip = True
2110 if (existing_node.primary_ip == primary_ip or
2111 existing_node.secondary_ip == primary_ip or
2112 existing_node.primary_ip == secondary_ip or
2113 existing_node.secondary_ip == secondary_ip):
2114 raise errors.OpPrereqError("New node ip address(es) conflict with"
2115 " existing node %s" % existing_node.name,
2116 errors.ECODE_NOTUNIQUE)
2118 # After this 'if' block, None is no longer a valid value for the
2119 # _capable op attributes
2121 old_node = self.cfg.GetNodeInfo(node)
2122 assert old_node is not None, "Can't retrieve locked node %s" % node
2123 for attr in self._NFLAGS:
2124 if getattr(self.op, attr) is None:
2125 setattr(self.op, attr, getattr(old_node, attr))
2127 for attr in self._NFLAGS:
2128 if getattr(self.op, attr) is None:
2129 setattr(self.op, attr, True)
2131 if self.op.readd and not self.op.vm_capable:
2132 pri, sec = cfg.GetNodeInstances(node)
2134 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
2135 " flag set to false, but it already holds"
2136 " instances" % node,
2139 # check that the type of the node (single versus dual homed) is the
2140 # same as for the master
2141 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
2142 master_singlehomed = myself.secondary_ip == myself.primary_ip
2143 newbie_singlehomed = secondary_ip == primary_ip
2144 if master_singlehomed != newbie_singlehomed:
2145 if master_singlehomed:
2146 raise errors.OpPrereqError("The master has no secondary ip but the"
2147 " new node has one",
2150 raise errors.OpPrereqError("The master has a secondary ip but the"
2151 " new node doesn't have one",
2154 # checks reachability
2155 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
2156 raise errors.OpPrereqError("Node not reachable by ping",
2157 errors.ECODE_ENVIRON)
2159 if not newbie_singlehomed:
2160 # check reachability from my secondary ip to newbie's secondary ip
2161 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
2162 source=myself.secondary_ip):
2163 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
2164 " based ping to node daemon port",
2165 errors.ECODE_ENVIRON)
2172 if self.op.master_capable:
2173 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
2175 self.master_candidate = False
2178 self.new_node = old_node
2180 node_group = cfg.LookupNodeGroup(self.op.group)
2181 self.new_node = objects.Node(name=node,
2182 primary_ip=primary_ip,
2183 secondary_ip=secondary_ip,
2184 master_candidate=self.master_candidate,
2185 offline=False, drained=False,
2186 group=node_group, ndparams={})
2188 if self.op.ndparams:
2189 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2190 _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node",
2191 "node", "cluster or group")
2193 if self.op.hv_state:
2194 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
2196 if self.op.disk_state:
2197 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
2199 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
2200 # it a property on the base class.
2201 rpcrunner = rpc.DnsOnlyRunner()
2202 result = rpcrunner.call_version([node])[node]
2203 result.Raise("Can't get version information from node %s" % node)
2204 if constants.PROTOCOL_VERSION == result.payload:
2205 logging.info("Communication to node %s fine, sw version %s match",
2206 node, result.payload)
2208 raise errors.OpPrereqError("Version mismatch master version %s,"
2209 " node version %s" %
2210 (constants.PROTOCOL_VERSION, result.payload),
2211 errors.ECODE_ENVIRON)
2213 vg_name = cfg.GetVGName()
2214 if vg_name is not None:
2215 vparams = {constants.NV_PVLIST: [vg_name]}
2216 excl_stor = _IsExclusiveStorageEnabledNode(cfg, self.new_node)
2217 cname = self.cfg.GetClusterName()
2218 result = rpcrunner.call_node_verify_light([node], vparams, cname)[node]
2219 (errmsgs, _) = _CheckNodePVs(result.payload, excl_stor)
2221 raise errors.OpPrereqError("Checks on node PVs failed: %s" %
2222 "; ".join(errmsgs), errors.ECODE_ENVIRON)
2224 def Exec(self, feedback_fn):
2225 """Adds the new node to the cluster.
2228 new_node = self.new_node
2229 node = new_node.name
2231 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
2234 # We adding a new node so we assume it's powered
2235 new_node.powered = True
2237 # for re-adds, reset the offline/drained/master-candidate flags;
2238 # we need to reset here, otherwise offline would prevent RPC calls
2239 # later in the procedure; this also means that if the re-add
2240 # fails, we are left with a non-offlined, broken node
2242 new_node.drained = new_node.offline = False # pylint: disable=W0201
2243 self.LogInfo("Readding a node, the offline/drained flags were reset")
2244 # if we demote the node, we do cleanup later in the procedure
2245 new_node.master_candidate = self.master_candidate
2246 if self.changed_primary_ip:
2247 new_node.primary_ip = self.op.primary_ip
2249 # copy the master/vm_capable flags
2250 for attr in self._NFLAGS:
2251 setattr(new_node, attr, getattr(self.op, attr))
2253 # notify the user about any possible mc promotion
2254 if new_node.master_candidate:
2255 self.LogInfo("Node will be a master candidate")
2257 if self.op.ndparams:
2258 new_node.ndparams = self.op.ndparams
2260 new_node.ndparams = {}
2262 if self.op.hv_state:
2263 new_node.hv_state_static = self.new_hv_state
2265 if self.op.disk_state:
2266 new_node.disk_state_static = self.new_disk_state
2268 # Add node to our /etc/hosts, and add key to known_hosts
2269 if self.cfg.GetClusterInfo().modify_etc_hosts:
2270 master_node = self.cfg.GetMasterNode()
2271 result = self.rpc.call_etc_hosts_modify(master_node,
2272 constants.ETC_HOSTS_ADD,
2275 result.Raise("Can't update hosts file with new host data")
2277 if new_node.secondary_ip != new_node.primary_ip:
2278 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
2281 node_verify_list = [self.cfg.GetMasterNode()]
2282 node_verify_param = {
2283 constants.NV_NODELIST: ([node], {}),
2284 # TODO: do a node-net-test as well?
2287 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
2288 self.cfg.GetClusterName())
2289 for verifier in node_verify_list:
2290 result[verifier].Raise("Cannot communicate with node %s" % verifier)
2291 nl_payload = result[verifier].payload[constants.NV_NODELIST]
2293 for failed in nl_payload:
2294 feedback_fn("ssh/hostname verification failed"
2295 " (checking from %s): %s" %
2296 (verifier, nl_payload[failed]))
2297 raise errors.OpExecError("ssh/hostname verification failed")
2300 _RedistributeAncillaryFiles(self)
2301 self.context.ReaddNode(new_node)
2302 # make sure we redistribute the config
2303 self.cfg.Update(new_node, feedback_fn)
2304 # and make sure the new node will not have old files around
2305 if not new_node.master_candidate:
2306 result = self.rpc.call_node_demote_from_mc(new_node.name)
2307 msg = result.fail_msg
2309 self.LogWarning("Node failed to demote itself from master"
2310 " candidate status: %s" % msg)
2312 _RedistributeAncillaryFiles(self, additional_nodes=[node],
2313 additional_vm=self.op.vm_capable)
2314 self.context.AddNode(new_node, self.proc.GetECId())
2317 class LUNodeSetParams(LogicalUnit):
2318 """Modifies the parameters of a node.
2320 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
2321 to the node role (as _ROLE_*)
2322 @cvar _R2F: a dictionary from node role to tuples of flags
2323 @cvar _FLAGS: a list of attribute names corresponding to the flags
2326 HPATH = "node-modify"
2327 HTYPE = constants.HTYPE_NODE
2329 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
2331 (True, False, False): _ROLE_CANDIDATE,
2332 (False, True, False): _ROLE_DRAINED,
2333 (False, False, True): _ROLE_OFFLINE,
2334 (False, False, False): _ROLE_REGULAR,
2336 _R2F = dict((v, k) for k, v in _F2R.items())
2337 _FLAGS = ["master_candidate", "drained", "offline"]
2339 def CheckArguments(self):
2340 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2341 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
2342 self.op.master_capable, self.op.vm_capable,
2343 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
2345 if all_mods.count(None) == len(all_mods):
2346 raise errors.OpPrereqError("Please pass at least one modification",
2348 if all_mods.count(True) > 1:
2349 raise errors.OpPrereqError("Can't set the node into more than one"
2350 " state at the same time",
2353 # Boolean value that tells us whether we might be demoting from MC
2354 self.might_demote = (self.op.master_candidate is False or
2355 self.op.offline is True or
2356 self.op.drained is True or
2357 self.op.master_capable is False)
2359 if self.op.secondary_ip:
2360 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
2361 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
2362 " address" % self.op.secondary_ip,
2365 self.lock_all = self.op.auto_promote and self.might_demote
2366 self.lock_instances = self.op.secondary_ip is not None
2368 def _InstanceFilter(self, instance):
2369 """Filter for getting affected instances.
2372 return (instance.disk_template in constants.DTS_INT_MIRROR and
2373 self.op.node_name in instance.all_nodes)
2375 def ExpandNames(self):
2377 self.needed_locks = {
2378 locking.LEVEL_NODE: locking.ALL_SET,
2380 # Block allocations when all nodes are locked
2381 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
2384 self.needed_locks = {
2385 locking.LEVEL_NODE: self.op.node_name,
2388 # Since modifying a node can have severe effects on currently running
2389 # operations the resource lock is at least acquired in shared mode
2390 self.needed_locks[locking.LEVEL_NODE_RES] = \
2391 self.needed_locks[locking.LEVEL_NODE]
2393 # Get all locks except nodes in shared mode; they are not used for anything
2394 # but read-only access
2395 self.share_locks = _ShareAll()
2396 self.share_locks[locking.LEVEL_NODE] = 0
2397 self.share_locks[locking.LEVEL_NODE_RES] = 0
2398 self.share_locks[locking.LEVEL_NODE_ALLOC] = 0
2400 if self.lock_instances:
2401 self.needed_locks[locking.LEVEL_INSTANCE] = \
2402 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
2404 def BuildHooksEnv(self):
2407 This runs on the master node.
2411 "OP_TARGET": self.op.node_name,
2412 "MASTER_CANDIDATE": str(self.op.master_candidate),
2413 "OFFLINE": str(self.op.offline),
2414 "DRAINED": str(self.op.drained),
2415 "MASTER_CAPABLE": str(self.op.master_capable),
2416 "VM_CAPABLE": str(self.op.vm_capable),
2419 def BuildHooksNodes(self):
2420 """Build hooks nodes.
2423 nl = [self.cfg.GetMasterNode(), self.op.node_name]
2426 def CheckPrereq(self):
2427 """Check prerequisites.
2429 This only checks the instance list against the existing names.
2432 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
2434 if self.lock_instances:
2435 affected_instances = \
2436 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
2438 # Verify instance locks
2439 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
2440 wanted_instances = frozenset(affected_instances.keys())
2441 if wanted_instances - owned_instances:
2442 raise errors.OpPrereqError("Instances affected by changing node %s's"
2443 " secondary IP address have changed since"
2444 " locks were acquired, wanted '%s', have"
2445 " '%s'; retry the operation" %
2447 utils.CommaJoin(wanted_instances),
2448 utils.CommaJoin(owned_instances)),
2451 affected_instances = None
2453 if (self.op.master_candidate is not None or
2454 self.op.drained is not None or
2455 self.op.offline is not None):
2456 # we can't change the master's node flags
2457 if self.op.node_name == self.cfg.GetMasterNode():
2458 raise errors.OpPrereqError("The master role can be changed"
2459 " only via master-failover",
2462 if self.op.master_candidate and not node.master_capable:
2463 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
2464 " it a master candidate" % node.name,
2467 if self.op.vm_capable is False:
2468 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
2470 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
2471 " the vm_capable flag" % node.name,
2474 if node.master_candidate and self.might_demote and not self.lock_all:
2475 assert not self.op.auto_promote, "auto_promote set but lock_all not"
2476 # check if after removing the current node, we're missing master
2478 (mc_remaining, mc_should, _) = \
2479 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
2480 if mc_remaining < mc_should:
2481 raise errors.OpPrereqError("Not enough master candidates, please"
2482 " pass auto promote option to allow"
2483 " promotion (--auto-promote or RAPI"
2484 " auto_promote=True)", errors.ECODE_STATE)
2486 self.old_flags = old_flags = (node.master_candidate,
2487 node.drained, node.offline)
2488 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
2489 self.old_role = old_role = self._F2R[old_flags]
2491 # Check for ineffective changes
2492 for attr in self._FLAGS:
2493 if (getattr(self.op, attr) is False and getattr(node, attr) is False):
2494 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
2495 setattr(self.op, attr, None)
2497 # Past this point, any flag change to False means a transition
2498 # away from the respective state, as only real changes are kept
2500 # TODO: We might query the real power state if it supports OOB
2501 if _SupportsOob(self.cfg, node):
2502 if self.op.offline is False and not (node.powered or
2503 self.op.powered is True):
2504 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
2505 " offline status can be reset") %
2506 self.op.node_name, errors.ECODE_STATE)
2507 elif self.op.powered is not None:
2508 raise errors.OpPrereqError(("Unable to change powered state for node %s"
2509 " as it does not support out-of-band"
2510 " handling") % self.op.node_name,
2513 # If we're being deofflined/drained, we'll MC ourself if needed
2514 if (self.op.drained is False or self.op.offline is False or
2515 (self.op.master_capable and not node.master_capable)):
2516 if _DecideSelfPromotion(self):
2517 self.op.master_candidate = True
2518 self.LogInfo("Auto-promoting node to master candidate")
2520 # If we're no longer master capable, we'll demote ourselves from MC
2521 if self.op.master_capable is False and node.master_candidate:
2522 self.LogInfo("Demoting from master candidate")
2523 self.op.master_candidate = False
2526 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
2527 if self.op.master_candidate:
2528 new_role = self._ROLE_CANDIDATE
2529 elif self.op.drained:
2530 new_role = self._ROLE_DRAINED
2531 elif self.op.offline:
2532 new_role = self._ROLE_OFFLINE
2533 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
2534 # False is still in new flags, which means we're un-setting (the
2536 new_role = self._ROLE_REGULAR
2537 else: # no new flags, nothing, keep old role
2540 self.new_role = new_role
2542 if old_role == self._ROLE_OFFLINE and new_role != old_role:
2543 # Trying to transition out of offline status
2544 result = self.rpc.call_version([node.name])[node.name]
2546 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
2547 " to report its version: %s" %
2548 (node.name, result.fail_msg),
2551 self.LogWarning("Transitioning node from offline to online state"
2552 " without using re-add. Please make sure the node"
2555 # When changing the secondary ip, verify if this is a single-homed to
2556 # multi-homed transition or vice versa, and apply the relevant
2558 if self.op.secondary_ip:
2559 # Ok even without locking, because this can't be changed by any LU
2560 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
2561 master_singlehomed = master.secondary_ip == master.primary_ip
2562 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
2563 if self.op.force and node.name == master.name:
2564 self.LogWarning("Transitioning from single-homed to multi-homed"
2565 " cluster; all nodes will require a secondary IP"
2568 raise errors.OpPrereqError("Changing the secondary ip on a"
2569 " single-homed cluster requires the"
2570 " --force option to be passed, and the"
2571 " target node to be the master",
2573 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
2574 if self.op.force and node.name == master.name:
2575 self.LogWarning("Transitioning from multi-homed to single-homed"
2576 " cluster; secondary IP addresses will have to be"
2579 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
2580 " same as the primary IP on a multi-homed"
2581 " cluster, unless the --force option is"
2582 " passed, and the target node is the"
2583 " master", errors.ECODE_INVAL)
2585 assert not (frozenset(affected_instances) -
2586 self.owned_locks(locking.LEVEL_INSTANCE))
2589 if affected_instances:
2590 msg = ("Cannot change secondary IP address: offline node has"
2591 " instances (%s) configured to use it" %
2592 utils.CommaJoin(affected_instances.keys()))
2593 raise errors.OpPrereqError(msg, errors.ECODE_STATE)
2595 # On online nodes, check that no instances are running, and that
2596 # the node has the new ip and we can reach it.
2597 for instance in affected_instances.values():
2598 _CheckInstanceState(self, instance, INSTANCE_DOWN,
2599 msg="cannot change secondary ip")
2601 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
2602 if master.name != node.name:
2603 # check reachability from master secondary ip to new secondary ip
2604 if not netutils.TcpPing(self.op.secondary_ip,
2605 constants.DEFAULT_NODED_PORT,
2606 source=master.secondary_ip):
2607 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
2608 " based ping to node daemon port",
2609 errors.ECODE_ENVIRON)
2611 if self.op.ndparams:
2612 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
2613 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
2614 _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node",
2615 "node", "cluster or group")
2616 self.new_ndparams = new_ndparams
2618 if self.op.hv_state:
2619 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
2620 self.node.hv_state_static)
2622 if self.op.disk_state:
2623 self.new_disk_state = \
2624 _MergeAndVerifyDiskState(self.op.disk_state,
2625 self.node.disk_state_static)
2627 def Exec(self, feedback_fn):
2632 old_role = self.old_role
2633 new_role = self.new_role
2637 if self.op.ndparams:
2638 node.ndparams = self.new_ndparams
2640 if self.op.powered is not None:
2641 node.powered = self.op.powered
2643 if self.op.hv_state:
2644 node.hv_state_static = self.new_hv_state
2646 if self.op.disk_state:
2647 node.disk_state_static = self.new_disk_state
2649 for attr in ["master_capable", "vm_capable"]:
2650 val = getattr(self.op, attr)
2652 setattr(node, attr, val)
2653 result.append((attr, str(val)))
2655 if new_role != old_role:
2656 # Tell the node to demote itself, if no longer MC and not offline
2657 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
2658 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
2660 self.LogWarning("Node failed to demote itself: %s", msg)
2662 new_flags = self._R2F[new_role]
2663 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
2665 result.append((desc, str(nf)))
2666 (node.master_candidate, node.drained, node.offline) = new_flags
2668 # we locked all nodes, we adjust the CP before updating this node
2670 _AdjustCandidatePool(self, [node.name])
2672 if self.op.secondary_ip:
2673 node.secondary_ip = self.op.secondary_ip
2674 result.append(("secondary_ip", self.op.secondary_ip))
2676 # this will trigger configuration file update, if needed
2677 self.cfg.Update(node, feedback_fn)
2679 # this will trigger job queue propagation or cleanup if the mc
2681 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
2682 self.context.ReaddNode(node)
2687 class LUNodePowercycle(NoHooksLU):
2688 """Powercycles a node.
2693 def CheckArguments(self):
2694 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2695 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
2696 raise errors.OpPrereqError("The node is the master and the force"
2697 " parameter was not set",
2700 def ExpandNames(self):
2701 """Locking for PowercycleNode.
2703 This is a last-resort option and shouldn't block on other
2704 jobs. Therefore, we grab no locks.
2707 self.needed_locks = {}
2709 def Exec(self, feedback_fn):
2713 result = self.rpc.call_node_powercycle(self.op.node_name,
2714 self.cfg.GetHypervisorType())
2715 result.Raise("Failed to schedule the reboot")
2716 return result.payload
2719 class LUInstanceActivateDisks(NoHooksLU):
2720 """Bring up an instance's disks.
2725 def ExpandNames(self):
2726 self._ExpandAndLockInstance()
2727 self.needed_locks[locking.LEVEL_NODE] = []
2728 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2730 def DeclareLocks(self, level):
2731 if level == locking.LEVEL_NODE:
2732 self._LockInstancesNodes()
2734 def CheckPrereq(self):
2735 """Check prerequisites.
2737 This checks that the instance is in the cluster.
2740 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2741 assert self.instance is not None, \
2742 "Cannot retrieve locked instance %s" % self.op.instance_name
2743 _CheckNodeOnline(self, self.instance.primary_node)
2745 def Exec(self, feedback_fn):
2746 """Activate the disks.
2749 disks_ok, disks_info = \
2750 _AssembleInstanceDisks(self, self.instance,
2751 ignore_size=self.op.ignore_size)
2753 raise errors.OpExecError("Cannot activate block devices")
2755 if self.op.wait_for_sync:
2756 if not _WaitForSync(self, self.instance):
2757 raise errors.OpExecError("Some disks of the instance are degraded!")
2762 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
2764 """Prepare the block devices for an instance.
2766 This sets up the block devices on all nodes.
2768 @type lu: L{LogicalUnit}
2769 @param lu: the logical unit on whose behalf we execute
2770 @type instance: L{objects.Instance}
2771 @param instance: the instance for whose disks we assemble
2772 @type disks: list of L{objects.Disk} or None
2773 @param disks: which disks to assemble (or all, if None)
2774 @type ignore_secondaries: boolean
2775 @param ignore_secondaries: if true, errors on secondary nodes
2776 won't result in an error return from the function
2777 @type ignore_size: boolean
2778 @param ignore_size: if true, the current known size of the disk
2779 will not be used during the disk activation, useful for cases
2780 when the size is wrong
2781 @return: False if the operation failed, otherwise a list of
2782 (host, instance_visible_name, node_visible_name)
2783 with the mapping from node devices to instance devices
2788 iname = instance.name
2789 disks = _ExpandCheckDisks(instance, disks)
2791 # With the two passes mechanism we try to reduce the window of
2792 # opportunity for the race condition of switching DRBD to primary
2793 # before handshaking occured, but we do not eliminate it
2795 # The proper fix would be to wait (with some limits) until the
2796 # connection has been made and drbd transitions from WFConnection
2797 # into any other network-connected state (Connected, SyncTarget,
2800 # 1st pass, assemble on all nodes in secondary mode
2801 for idx, inst_disk in enumerate(disks):
2802 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
2804 node_disk = node_disk.Copy()
2805 node_disk.UnsetSize()
2806 lu.cfg.SetDiskID(node_disk, node)
2807 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
2809 msg = result.fail_msg
2811 is_offline_secondary = (node in instance.secondary_nodes and
2813 lu.LogWarning("Could not prepare block device %s on node %s"
2814 " (is_primary=False, pass=1): %s",
2815 inst_disk.iv_name, node, msg)
2816 if not (ignore_secondaries or is_offline_secondary):
2819 # FIXME: race condition on drbd migration to primary
2821 # 2nd pass, do only the primary node
2822 for idx, inst_disk in enumerate(disks):
2825 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
2826 if node != instance.primary_node:
2829 node_disk = node_disk.Copy()
2830 node_disk.UnsetSize()
2831 lu.cfg.SetDiskID(node_disk, node)
2832 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
2834 msg = result.fail_msg
2836 lu.LogWarning("Could not prepare block device %s on node %s"
2837 " (is_primary=True, pass=2): %s",
2838 inst_disk.iv_name, node, msg)
2841 dev_path = result.payload
2843 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
2845 # leave the disks configured for the primary node
2846 # this is a workaround that would be fixed better by
2847 # improving the logical/physical id handling
2849 lu.cfg.SetDiskID(disk, instance.primary_node)
2851 return disks_ok, device_info
2854 def _StartInstanceDisks(lu, instance, force):
2855 """Start the disks of an instance.
2858 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
2859 ignore_secondaries=force)
2861 _ShutdownInstanceDisks(lu, instance)
2862 if force is not None and not force:
2864 hint=("If the message above refers to a secondary node,"
2865 " you can retry the operation using '--force'"))
2866 raise errors.OpExecError("Disk consistency error")
2869 class LUInstanceDeactivateDisks(NoHooksLU):
2870 """Shutdown an instance's disks.
2875 def ExpandNames(self):
2876 self._ExpandAndLockInstance()
2877 self.needed_locks[locking.LEVEL_NODE] = []
2878 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2880 def DeclareLocks(self, level):
2881 if level == locking.LEVEL_NODE:
2882 self._LockInstancesNodes()
2884 def CheckPrereq(self):
2885 """Check prerequisites.
2887 This checks that the instance is in the cluster.
2890 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2891 assert self.instance is not None, \
2892 "Cannot retrieve locked instance %s" % self.op.instance_name
2894 def Exec(self, feedback_fn):
2895 """Deactivate the disks
2898 instance = self.instance
2900 _ShutdownInstanceDisks(self, instance)
2902 _SafeShutdownInstanceDisks(self, instance)
2905 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
2906 """Shutdown block devices of an instance.
2908 This function checks if an instance is running, before calling
2909 _ShutdownInstanceDisks.
2912 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
2913 _ShutdownInstanceDisks(lu, instance, disks=disks)
2916 def _ExpandCheckDisks(instance, disks):
2917 """Return the instance disks selected by the disks list
2919 @type disks: list of L{objects.Disk} or None
2920 @param disks: selected disks
2921 @rtype: list of L{objects.Disk}
2922 @return: selected instance disks to act on
2926 return instance.disks
2928 if not set(disks).issubset(instance.disks):
2929 raise errors.ProgrammerError("Can only act on disks belonging to the"
2934 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
2935 """Shutdown block devices of an instance.
2937 This does the shutdown on all nodes of the instance.
2939 If the ignore_primary is false, errors on the primary node are
2944 disks = _ExpandCheckDisks(instance, disks)
2947 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
2948 lu.cfg.SetDiskID(top_disk, node)
2949 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
2950 msg = result.fail_msg
2952 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
2953 disk.iv_name, node, msg)
2954 if ((node == instance.primary_node and not ignore_primary) or
2955 (node != instance.primary_node and not result.offline)):
2960 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
2961 """Checks if a node has enough free memory.
2963 This function checks if a given node has the needed amount of free
2964 memory. In case the node has less memory or we cannot get the
2965 information from the node, this function raises an OpPrereqError
2968 @type lu: C{LogicalUnit}
2969 @param lu: a logical unit from which we get configuration data
2971 @param node: the node to check
2972 @type reason: C{str}
2973 @param reason: string to use in the error message
2974 @type requested: C{int}
2975 @param requested: the amount of memory in MiB to check for
2976 @type hypervisor_name: C{str}
2977 @param hypervisor_name: the hypervisor to ask for memory stats
2979 @return: node current free memory
2980 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
2981 we cannot check the node
2984 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name], False)
2985 nodeinfo[node].Raise("Can't get data from node %s" % node,
2986 prereq=True, ecode=errors.ECODE_ENVIRON)
2987 (_, _, (hv_info, )) = nodeinfo[node].payload
2989 free_mem = hv_info.get("memory_free", None)
2990 if not isinstance(free_mem, int):
2991 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
2992 " was '%s'" % (node, free_mem),
2993 errors.ECODE_ENVIRON)
2994 if requested > free_mem:
2995 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
2996 " needed %s MiB, available %s MiB" %
2997 (node, reason, requested, free_mem),
3002 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
3003 """Checks if nodes have enough free disk space in all the VGs.
3005 This function checks if all given nodes have the needed amount of
3006 free disk. In case any node has less disk or we cannot get the
3007 information from the node, this function raises an OpPrereqError
3010 @type lu: C{LogicalUnit}
3011 @param lu: a logical unit from which we get configuration data
3012 @type nodenames: C{list}
3013 @param nodenames: the list of node names to check
3014 @type req_sizes: C{dict}
3015 @param req_sizes: the hash of vg and corresponding amount of disk in
3017 @raise errors.OpPrereqError: if the node doesn't have enough disk,
3018 or we cannot check the node
3021 for vg, req_size in req_sizes.items():
3022 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
3025 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
3026 """Checks if nodes have enough free disk space in the specified VG.
3028 This function checks if all given nodes have the needed amount of
3029 free disk. In case any node has less disk or we cannot get the
3030 information from the node, this function raises an OpPrereqError
3033 @type lu: C{LogicalUnit}
3034 @param lu: a logical unit from which we get configuration data
3035 @type nodenames: C{list}
3036 @param nodenames: the list of node names to check
3038 @param vg: the volume group to check
3039 @type requested: C{int}
3040 @param requested: the amount of disk in MiB to check for
3041 @raise errors.OpPrereqError: if the node doesn't have enough disk,
3042 or we cannot check the node
3045 es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, nodenames)
3046 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None, es_flags)
3047 for node in nodenames:
3048 info = nodeinfo[node]
3049 info.Raise("Cannot get current information from node %s" % node,
3050 prereq=True, ecode=errors.ECODE_ENVIRON)
3051 (_, (vg_info, ), _) = info.payload
3052 vg_free = vg_info.get("vg_free", None)
3053 if not isinstance(vg_free, int):
3054 raise errors.OpPrereqError("Can't compute free disk space on node"
3055 " %s for vg %s, result was '%s'" %
3056 (node, vg, vg_free), errors.ECODE_ENVIRON)
3057 if requested > vg_free:
3058 raise errors.OpPrereqError("Not enough disk space on target node %s"
3059 " vg %s: required %d MiB, available %d MiB" %
3060 (node, vg, requested, vg_free),
3064 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
3065 """Checks if nodes have enough physical CPUs
3067 This function checks if all given nodes have the needed number of
3068 physical CPUs. In case any node has less CPUs or we cannot get the
3069 information from the node, this function raises an OpPrereqError
3072 @type lu: C{LogicalUnit}
3073 @param lu: a logical unit from which we get configuration data
3074 @type nodenames: C{list}
3075 @param nodenames: the list of node names to check
3076 @type requested: C{int}
3077 @param requested: the minimum acceptable number of physical CPUs
3078 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
3079 or we cannot check the node
3082 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name], None)
3083 for node in nodenames:
3084 info = nodeinfo[node]
3085 info.Raise("Cannot get current information from node %s" % node,
3086 prereq=True, ecode=errors.ECODE_ENVIRON)
3087 (_, _, (hv_info, )) = info.payload
3088 num_cpus = hv_info.get("cpu_total", None)
3089 if not isinstance(num_cpus, int):
3090 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
3091 " on node %s, result was '%s'" %
3092 (node, num_cpus), errors.ECODE_ENVIRON)
3093 if requested > num_cpus:
3094 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
3095 "required" % (node, num_cpus, requested),
3099 class LUInstanceStartup(LogicalUnit):
3100 """Starts an instance.
3103 HPATH = "instance-start"
3104 HTYPE = constants.HTYPE_INSTANCE
3107 def CheckArguments(self):
3109 if self.op.beparams:
3110 # fill the beparams dict
3111 objects.UpgradeBeParams(self.op.beparams)
3112 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3114 def ExpandNames(self):
3115 self._ExpandAndLockInstance()
3116 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3118 def DeclareLocks(self, level):
3119 if level == locking.LEVEL_NODE_RES:
3120 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
3122 def BuildHooksEnv(self):
3125 This runs on master, primary and secondary nodes of the instance.
3129 "FORCE": self.op.force,
3132 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3136 def BuildHooksNodes(self):
3137 """Build hooks nodes.
3140 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3143 def CheckPrereq(self):
3144 """Check prerequisites.
3146 This checks that the instance is in the cluster.
3149 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3150 assert self.instance is not None, \
3151 "Cannot retrieve locked instance %s" % self.op.instance_name
3154 if self.op.hvparams:
3155 # check hypervisor parameter syntax (locally)
3156 cluster = self.cfg.GetClusterInfo()
3157 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
3158 filled_hvp = cluster.FillHV(instance)
3159 filled_hvp.update(self.op.hvparams)
3160 hv_type = hypervisor.GetHypervisorClass(instance.hypervisor)
3161 hv_type.CheckParameterSyntax(filled_hvp)
3162 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3164 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
3166 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
3168 if self.primary_offline and self.op.ignore_offline_nodes:
3169 self.LogWarning("Ignoring offline primary node")
3171 if self.op.hvparams or self.op.beparams:
3172 self.LogWarning("Overridden parameters are ignored")
3174 _CheckNodeOnline(self, instance.primary_node)
3176 bep = self.cfg.GetClusterInfo().FillBE(instance)
3177 bep.update(self.op.beparams)
3179 # check bridges existence
3180 _CheckInstanceBridgesExist(self, instance)
3182 remote_info = self.rpc.call_instance_info(instance.primary_node,
3184 instance.hypervisor)
3185 remote_info.Raise("Error checking node %s" % instance.primary_node,
3186 prereq=True, ecode=errors.ECODE_ENVIRON)
3187 if not remote_info.payload: # not running already
3188 _CheckNodeFreeMemory(self, instance.primary_node,
3189 "starting instance %s" % instance.name,
3190 bep[constants.BE_MINMEM], instance.hypervisor)
3192 def Exec(self, feedback_fn):
3193 """Start the instance.
3196 instance = self.instance
3197 force = self.op.force
3198 reason = self.op.reason
3200 if not self.op.no_remember:
3201 self.cfg.MarkInstanceUp(instance.name)
3203 if self.primary_offline:
3204 assert self.op.ignore_offline_nodes
3205 self.LogInfo("Primary node offline, marked instance as started")
3207 node_current = instance.primary_node
3209 _StartInstanceDisks(self, instance, force)
3212 self.rpc.call_instance_start(node_current,
3213 (instance, self.op.hvparams,
3215 self.op.startup_paused, reason)
3216 msg = result.fail_msg
3218 _ShutdownInstanceDisks(self, instance)
3219 raise errors.OpExecError("Could not start instance: %s" % msg)
3222 class LUInstanceReboot(LogicalUnit):
3223 """Reboot an instance.
3226 HPATH = "instance-reboot"
3227 HTYPE = constants.HTYPE_INSTANCE
3230 def ExpandNames(self):
3231 self._ExpandAndLockInstance()
3233 def BuildHooksEnv(self):
3236 This runs on master, primary and secondary nodes of the instance.
3240 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
3241 "REBOOT_TYPE": self.op.reboot_type,
3242 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
3245 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3249 def BuildHooksNodes(self):
3250 """Build hooks nodes.
3253 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3256 def CheckPrereq(self):
3257 """Check prerequisites.
3259 This checks that the instance is in the cluster.
3262 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3263 assert self.instance is not None, \
3264 "Cannot retrieve locked instance %s" % self.op.instance_name
3265 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
3266 _CheckNodeOnline(self, instance.primary_node)
3268 # check bridges existence
3269 _CheckInstanceBridgesExist(self, instance)
3271 def Exec(self, feedback_fn):
3272 """Reboot the instance.
3275 instance = self.instance
3276 ignore_secondaries = self.op.ignore_secondaries
3277 reboot_type = self.op.reboot_type
3278 reason = self.op.reason
3280 remote_info = self.rpc.call_instance_info(instance.primary_node,
3282 instance.hypervisor)
3283 remote_info.Raise("Error checking node %s" % instance.primary_node)
3284 instance_running = bool(remote_info.payload)
3286 node_current = instance.primary_node
3288 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
3289 constants.INSTANCE_REBOOT_HARD]:
3290 for disk in instance.disks:
3291 self.cfg.SetDiskID(disk, node_current)
3292 result = self.rpc.call_instance_reboot(node_current, instance,
3294 self.op.shutdown_timeout, reason)
3295 result.Raise("Could not reboot instance")
3297 if instance_running:
3298 result = self.rpc.call_instance_shutdown(node_current, instance,
3299 self.op.shutdown_timeout,
3301 result.Raise("Could not shutdown instance for full reboot")
3302 _ShutdownInstanceDisks(self, instance)
3304 self.LogInfo("Instance %s was already stopped, starting now",
3306 _StartInstanceDisks(self, instance, ignore_secondaries)
3307 result = self.rpc.call_instance_start(node_current,
3308 (instance, None, None), False,
3310 msg = result.fail_msg
3312 _ShutdownInstanceDisks(self, instance)
3313 raise errors.OpExecError("Could not start instance for"
3314 " full reboot: %s" % msg)
3316 self.cfg.MarkInstanceUp(instance.name)
3319 class LUInstanceShutdown(LogicalUnit):
3320 """Shutdown an instance.
3323 HPATH = "instance-stop"
3324 HTYPE = constants.HTYPE_INSTANCE
3327 def ExpandNames(self):
3328 self._ExpandAndLockInstance()
3330 def BuildHooksEnv(self):
3333 This runs on master, primary and secondary nodes of the instance.
3336 env = _BuildInstanceHookEnvByObject(self, self.instance)
3337 env["TIMEOUT"] = self.op.timeout
3340 def BuildHooksNodes(self):
3341 """Build hooks nodes.
3344 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3347 def CheckPrereq(self):
3348 """Check prerequisites.
3350 This checks that the instance is in the cluster.
3353 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3354 assert self.instance is not None, \
3355 "Cannot retrieve locked instance %s" % self.op.instance_name
3357 if not self.op.force:
3358 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
3360 self.LogWarning("Ignoring offline instance check")
3362 self.primary_offline = \
3363 self.cfg.GetNodeInfo(self.instance.primary_node).offline
3365 if self.primary_offline and self.op.ignore_offline_nodes:
3366 self.LogWarning("Ignoring offline primary node")
3368 _CheckNodeOnline(self, self.instance.primary_node)
3370 def Exec(self, feedback_fn):
3371 """Shutdown the instance.
3374 instance = self.instance
3375 node_current = instance.primary_node
3376 timeout = self.op.timeout
3377 reason = self.op.reason
3379 # If the instance is offline we shouldn't mark it as down, as that
3380 # resets the offline flag.
3381 if not self.op.no_remember and instance.admin_state in INSTANCE_ONLINE:
3382 self.cfg.MarkInstanceDown(instance.name)
3384 if self.primary_offline:
3385 assert self.op.ignore_offline_nodes
3386 self.LogInfo("Primary node offline, marked instance as stopped")
3388 result = self.rpc.call_instance_shutdown(node_current, instance, timeout,
3390 msg = result.fail_msg
3392 self.LogWarning("Could not shutdown instance: %s", msg)
3394 _ShutdownInstanceDisks(self, instance)
3397 class LUInstanceReinstall(LogicalUnit):
3398 """Reinstall an instance.
3401 HPATH = "instance-reinstall"
3402 HTYPE = constants.HTYPE_INSTANCE
3405 def ExpandNames(self):
3406 self._ExpandAndLockInstance()
3408 def BuildHooksEnv(self):
3411 This runs on master, primary and secondary nodes of the instance.
3414 return _BuildInstanceHookEnvByObject(self, self.instance)
3416 def BuildHooksNodes(self):
3417 """Build hooks nodes.
3420 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3423 def CheckPrereq(self):
3424 """Check prerequisites.
3426 This checks that the instance is in the cluster and is not running.
3429 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3430 assert instance is not None, \
3431 "Cannot retrieve locked instance %s" % self.op.instance_name
3432 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
3433 " offline, cannot reinstall")
3435 if instance.disk_template == constants.DT_DISKLESS:
3436 raise errors.OpPrereqError("Instance '%s' has no disks" %
3437 self.op.instance_name,
3439 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
3441 if self.op.os_type is not None:
3443 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
3444 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
3445 instance_os = self.op.os_type
3447 instance_os = instance.os
3449 nodelist = list(instance.all_nodes)
3451 if self.op.osparams:
3452 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
3453 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
3454 self.os_inst = i_osdict # the new dict (without defaults)
3458 self.instance = instance
3460 def Exec(self, feedback_fn):
3461 """Reinstall the instance.
3464 inst = self.instance
3466 if self.op.os_type is not None:
3467 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
3468 inst.os = self.op.os_type
3469 # Write to configuration
3470 self.cfg.Update(inst, feedback_fn)
3472 _StartInstanceDisks(self, inst, None)
3474 feedback_fn("Running the instance OS create scripts...")
3475 # FIXME: pass debug option from opcode to backend
3476 result = self.rpc.call_instance_os_add(inst.primary_node,
3477 (inst, self.os_inst), True,
3478 self.op.debug_level)
3479 result.Raise("Could not install OS for instance %s on node %s" %
3480 (inst.name, inst.primary_node))
3482 _ShutdownInstanceDisks(self, inst)
3485 class LUInstanceRecreateDisks(LogicalUnit):
3486 """Recreate an instance's missing disks.
3489 HPATH = "instance-recreate-disks"
3490 HTYPE = constants.HTYPE_INSTANCE
3493 _MODIFYABLE = compat.UniqueFrozenset([
3494 constants.IDISK_SIZE,
3495 constants.IDISK_MODE,
3498 # New or changed disk parameters may have different semantics
3499 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
3500 constants.IDISK_ADOPT,
3502 # TODO: Implement support changing VG while recreating
3504 constants.IDISK_METAVG,
3505 constants.IDISK_PROVIDER,
3506 constants.IDISK_NAME,
3509 def _RunAllocator(self):
3510 """Run the allocator based on input opcode.
3513 be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
3516 # The allocator should actually run in "relocate" mode, but current
3517 # allocators don't support relocating all the nodes of an instance at
3518 # the same time. As a workaround we use "allocate" mode, but this is
3519 # suboptimal for two reasons:
3520 # - The instance name passed to the allocator is present in the list of
3521 # existing instances, so there could be a conflict within the
3522 # internal structures of the allocator. This doesn't happen with the
3523 # current allocators, but it's a liability.
3524 # - The allocator counts the resources used by the instance twice: once
3525 # because the instance exists already, and once because it tries to
3526 # allocate a new instance.
3527 # The allocator could choose some of the nodes on which the instance is
3528 # running, but that's not a problem. If the instance nodes are broken,
3529 # they should be already be marked as drained or offline, and hence
3530 # skipped by the allocator. If instance disks have been lost for other
3531 # reasons, then recreating the disks on the same nodes should be fine.
3532 disk_template = self.instance.disk_template
3533 spindle_use = be_full[constants.BE_SPINDLE_USE]
3534 req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
3535 disk_template=disk_template,
3536 tags=list(self.instance.GetTags()),
3537 os=self.instance.os,
3539 vcpus=be_full[constants.BE_VCPUS],
3540 memory=be_full[constants.BE_MAXMEM],
3541 spindle_use=spindle_use,
3542 disks=[{constants.IDISK_SIZE: d.size,
3543 constants.IDISK_MODE: d.mode}
3544 for d in self.instance.disks],
3545 hypervisor=self.instance.hypervisor,
3546 node_whitelist=None)
3547 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
3549 ial.Run(self.op.iallocator)
3551 assert req.RequiredNodes() == len(self.instance.all_nodes)
3554 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
3555 " %s" % (self.op.iallocator, ial.info),
3558 self.op.nodes = ial.result
3559 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
3560 self.op.instance_name, self.op.iallocator,
3561 utils.CommaJoin(ial.result))
3563 def CheckArguments(self):
3564 if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
3565 # Normalize and convert deprecated list of disk indices
3566 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
3568 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
3570 raise errors.OpPrereqError("Some disks have been specified more than"
3571 " once: %s" % utils.CommaJoin(duplicates),
3574 # We don't want _CheckIAllocatorOrNode selecting the default iallocator
3575 # when neither iallocator nor nodes are specified
3576 if self.op.iallocator or self.op.nodes:
3577 _CheckIAllocatorOrNode(self, "iallocator", "nodes")
3579 for (idx, params) in self.op.disks:
3580 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
3581 unsupported = frozenset(params.keys()) - self._MODIFYABLE
3583 raise errors.OpPrereqError("Parameters for disk %s try to change"
3584 " unmodifyable parameter(s): %s" %
3585 (idx, utils.CommaJoin(unsupported)),
3588 def ExpandNames(self):
3589 self._ExpandAndLockInstance()
3590 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3593 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
3594 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
3596 self.needed_locks[locking.LEVEL_NODE] = []
3597 if self.op.iallocator:
3598 # iallocator will select a new node in the same group
3599 self.needed_locks[locking.LEVEL_NODEGROUP] = []
3600 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
3602 self.needed_locks[locking.LEVEL_NODE_RES] = []
3604 def DeclareLocks(self, level):
3605 if level == locking.LEVEL_NODEGROUP:
3606 assert self.op.iallocator is not None
3607 assert not self.op.nodes
3608 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3609 self.share_locks[locking.LEVEL_NODEGROUP] = 1
3610 # Lock the primary group used by the instance optimistically; this
3611 # requires going via the node before it's locked, requiring
3612 # verification later on
3613 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3614 self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
3616 elif level == locking.LEVEL_NODE:
3617 # If an allocator is used, then we lock all the nodes in the current
3618 # instance group, as we don't know yet which ones will be selected;
3619 # if we replace the nodes without using an allocator, locks are
3620 # already declared in ExpandNames; otherwise, we need to lock all the
3621 # instance nodes for disk re-creation
3622 if self.op.iallocator:
3623 assert not self.op.nodes
3624 assert not self.needed_locks[locking.LEVEL_NODE]
3625 assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
3627 # Lock member nodes of the group of the primary node
3628 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
3629 self.needed_locks[locking.LEVEL_NODE].extend(
3630 self.cfg.GetNodeGroup(group_uuid).members)
3632 assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
3633 elif not self.op.nodes:
3634 self._LockInstancesNodes(primary_only=False)
3635 elif level == locking.LEVEL_NODE_RES:
3637 self.needed_locks[locking.LEVEL_NODE_RES] = \
3638 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
3640 def BuildHooksEnv(self):
3643 This runs on master, primary and secondary nodes of the instance.
3646 return _BuildInstanceHookEnvByObject(self, self.instance)
3648 def BuildHooksNodes(self):
3649 """Build hooks nodes.
3652 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3655 def CheckPrereq(self):
3656 """Check prerequisites.
3658 This checks that the instance is in the cluster and is not running.
3661 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3662 assert instance is not None, \
3663 "Cannot retrieve locked instance %s" % self.op.instance_name
3665 if len(self.op.nodes) != len(instance.all_nodes):
3666 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
3667 " %d replacement nodes were specified" %
3668 (instance.name, len(instance.all_nodes),
3669 len(self.op.nodes)),
3671 assert instance.disk_template != constants.DT_DRBD8 or \
3672 len(self.op.nodes) == 2
3673 assert instance.disk_template != constants.DT_PLAIN or \
3674 len(self.op.nodes) == 1
3675 primary_node = self.op.nodes[0]
3677 primary_node = instance.primary_node
3678 if not self.op.iallocator:
3679 _CheckNodeOnline(self, primary_node)
3681 if instance.disk_template == constants.DT_DISKLESS:
3682 raise errors.OpPrereqError("Instance '%s' has no disks" %
3683 self.op.instance_name, errors.ECODE_INVAL)
3685 # Verify if node group locks are still correct
3686 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
3688 # Node group locks are acquired only for the primary node (and only
3689 # when the allocator is used)
3690 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
3693 # if we replace nodes *and* the old primary is offline, we don't
3694 # check the instance state
3695 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
3696 if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
3697 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
3698 msg="cannot recreate disks")
3701 self.disks = dict(self.op.disks)
3703 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
3705 maxidx = max(self.disks.keys())
3706 if maxidx >= len(instance.disks):
3707 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
3710 if ((self.op.nodes or self.op.iallocator) and
3711 sorted(self.disks.keys()) != range(len(instance.disks))):
3712 raise errors.OpPrereqError("Can't recreate disks partially and"
3713 " change the nodes at the same time",
3716 self.instance = instance
3718 if self.op.iallocator:
3719 self._RunAllocator()
3720 # Release unneeded node and node resource locks
3721 _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
3722 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
3723 _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
3725 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
3727 def Exec(self, feedback_fn):
3728 """Recreate the disks.
3731 instance = self.instance
3733 assert (self.owned_locks(locking.LEVEL_NODE) ==
3734 self.owned_locks(locking.LEVEL_NODE_RES))
3737 mods = [] # keeps track of needed changes
3739 for idx, disk in enumerate(instance.disks):
3741 changes = self.disks[idx]
3743 # Disk should not be recreated
3747 # update secondaries for disks, if needed
3748 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
3749 # need to update the nodes and minors
3750 assert len(self.op.nodes) == 2
3751 assert len(disk.logical_id) == 6 # otherwise disk internals
3753 (_, _, old_port, _, _, old_secret) = disk.logical_id
3754 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
3755 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
3756 new_minors[0], new_minors[1], old_secret)
3757 assert len(disk.logical_id) == len(new_id)
3761 mods.append((idx, new_id, changes))
3763 # now that we have passed all asserts above, we can apply the mods
3764 # in a single run (to avoid partial changes)
3765 for idx, new_id, changes in mods:
3766 disk = instance.disks[idx]
3767 if new_id is not None:
3768 assert disk.dev_type == constants.LD_DRBD8
3769 disk.logical_id = new_id
3771 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
3772 mode=changes.get(constants.IDISK_MODE, None))
3774 # change primary node, if needed
3776 instance.primary_node = self.op.nodes[0]
3777 self.LogWarning("Changing the instance's nodes, you will have to"
3778 " remove any disks left on the older nodes manually")
3781 self.cfg.Update(instance, feedback_fn)
3783 # All touched nodes must be locked
3784 mylocks = self.owned_locks(locking.LEVEL_NODE)
3785 assert mylocks.issuperset(frozenset(instance.all_nodes))
3786 _CreateDisks(self, instance, to_skip=to_skip)
3789 class LUInstanceRename(LogicalUnit):
3790 """Rename an instance.
3793 HPATH = "instance-rename"
3794 HTYPE = constants.HTYPE_INSTANCE
3796 def CheckArguments(self):
3800 if self.op.ip_check and not self.op.name_check:
3801 # TODO: make the ip check more flexible and not depend on the name check
3802 raise errors.OpPrereqError("IP address check requires a name check",
3805 def BuildHooksEnv(self):
3808 This runs on master, primary and secondary nodes of the instance.
3811 env = _BuildInstanceHookEnvByObject(self, self.instance)
3812 env["INSTANCE_NEW_NAME"] = self.op.new_name
3815 def BuildHooksNodes(self):
3816 """Build hooks nodes.
3819 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3822 def CheckPrereq(self):
3823 """Check prerequisites.
3825 This checks that the instance is in the cluster and is not running.
3828 self.op.instance_name = _ExpandInstanceName(self.cfg,
3829 self.op.instance_name)
3830 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3831 assert instance is not None
3832 _CheckNodeOnline(self, instance.primary_node)
3833 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
3834 msg="cannot rename")
3835 self.instance = instance
3837 new_name = self.op.new_name
3838 if self.op.name_check:
3839 hostname = _CheckHostnameSane(self, new_name)
3840 new_name = self.op.new_name = hostname.name
3841 if (self.op.ip_check and
3842 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
3843 raise errors.OpPrereqError("IP %s of instance %s already in use" %
3844 (hostname.ip, new_name),
3845 errors.ECODE_NOTUNIQUE)
3847 instance_list = self.cfg.GetInstanceList()
3848 if new_name in instance_list and new_name != instance.name:
3849 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
3850 new_name, errors.ECODE_EXISTS)
3852 def Exec(self, feedback_fn):
3853 """Rename the instance.
3856 inst = self.instance
3857 old_name = inst.name
3859 rename_file_storage = False
3860 if (inst.disk_template in constants.DTS_FILEBASED and
3861 self.op.new_name != inst.name):
3862 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3863 rename_file_storage = True
3865 self.cfg.RenameInstance(inst.name, self.op.new_name)
3866 # Change the instance lock. This is definitely safe while we hold the BGL.
3867 # Otherwise the new lock would have to be added in acquired mode.
3869 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
3870 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
3871 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
3873 # re-read the instance from the configuration after rename
3874 inst = self.cfg.GetInstanceInfo(self.op.new_name)
3876 if rename_file_storage:
3877 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3878 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
3879 old_file_storage_dir,
3880 new_file_storage_dir)
3881 result.Raise("Could not rename on node %s directory '%s' to '%s'"
3882 " (but the instance has been renamed in Ganeti)" %
3883 (inst.primary_node, old_file_storage_dir,
3884 new_file_storage_dir))
3886 _StartInstanceDisks(self, inst, None)
3887 # update info on disks
3888 info = _GetInstanceInfoText(inst)
3889 for (idx, disk) in enumerate(inst.disks):
3890 for node in inst.all_nodes:
3891 self.cfg.SetDiskID(disk, node)
3892 result = self.rpc.call_blockdev_setinfo(node, disk, info)
3894 self.LogWarning("Error setting info on node %s for disk %s: %s",
3895 node, idx, result.fail_msg)
3897 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
3898 old_name, self.op.debug_level)
3899 msg = result.fail_msg
3901 msg = ("Could not run OS rename script for instance %s on node %s"
3902 " (but the instance has been renamed in Ganeti): %s" %
3903 (inst.name, inst.primary_node, msg))
3904 self.LogWarning(msg)
3906 _ShutdownInstanceDisks(self, inst)
3911 class LUInstanceRemove(LogicalUnit):
3912 """Remove an instance.
3915 HPATH = "instance-remove"
3916 HTYPE = constants.HTYPE_INSTANCE
3919 def ExpandNames(self):
3920 self._ExpandAndLockInstance()
3921 self.needed_locks[locking.LEVEL_NODE] = []
3922 self.needed_locks[locking.LEVEL_NODE_RES] = []
3923 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3925 def DeclareLocks(self, level):
3926 if level == locking.LEVEL_NODE:
3927 self._LockInstancesNodes()
3928 elif level == locking.LEVEL_NODE_RES:
3930 self.needed_locks[locking.LEVEL_NODE_RES] = \
3931 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
3933 def BuildHooksEnv(self):
3936 This runs on master, primary and secondary nodes of the instance.
3939 env = _BuildInstanceHookEnvByObject(self, self.instance)
3940 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
3943 def BuildHooksNodes(self):
3944 """Build hooks nodes.
3947 nl = [self.cfg.GetMasterNode()]
3948 nl_post = list(self.instance.all_nodes) + nl
3949 return (nl, nl_post)
3951 def CheckPrereq(self):
3952 """Check prerequisites.
3954 This checks that the instance is in the cluster.
3957 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3958 assert self.instance is not None, \
3959 "Cannot retrieve locked instance %s" % self.op.instance_name
3961 def Exec(self, feedback_fn):
3962 """Remove the instance.
3965 instance = self.instance
3966 logging.info("Shutting down instance %s on node %s",
3967 instance.name, instance.primary_node)
3969 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
3970 self.op.shutdown_timeout,
3972 msg = result.fail_msg
3974 if self.op.ignore_failures:
3975 feedback_fn("Warning: can't shutdown instance: %s" % msg)
3977 raise errors.OpExecError("Could not shutdown instance %s on"
3979 (instance.name, instance.primary_node, msg))
3981 assert (self.owned_locks(locking.LEVEL_NODE) ==
3982 self.owned_locks(locking.LEVEL_NODE_RES))
3983 assert not (set(instance.all_nodes) -
3984 self.owned_locks(locking.LEVEL_NODE)), \
3985 "Not owning correct locks"
3987 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
3990 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
3991 """Utility function to remove an instance.
3994 logging.info("Removing block devices for instance %s", instance.name)
3996 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
3997 if not ignore_failures:
3998 raise errors.OpExecError("Can't remove instance's disks")
3999 feedback_fn("Warning: can't remove instance's disks")
4001 logging.info("Removing instance %s out of cluster config", instance.name)
4003 lu.cfg.RemoveInstance(instance.name)
4005 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
4006 "Instance lock removal conflict"
4008 # Remove lock for the instance
4009 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4012 class LUInstanceQuery(NoHooksLU):
4013 """Logical unit for querying instances.
4016 # pylint: disable=W0142
4019 def CheckArguments(self):
4020 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
4021 self.op.output_fields, self.op.use_locking)
4023 def ExpandNames(self):
4024 self.iq.ExpandNames(self)
4026 def DeclareLocks(self, level):
4027 self.iq.DeclareLocks(self, level)
4029 def Exec(self, feedback_fn):
4030 return self.iq.OldStyleQuery(self)
4033 def _ExpandNamesForMigration(lu):
4034 """Expands names for use with L{TLMigrateInstance}.
4036 @type lu: L{LogicalUnit}
4039 if lu.op.target_node is not None:
4040 lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node)
4042 lu.needed_locks[locking.LEVEL_NODE] = []
4043 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4045 lu.needed_locks[locking.LEVEL_NODE_RES] = []
4046 lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
4048 # The node allocation lock is actually only needed for externally replicated
4049 # instances (e.g. sharedfile or RBD) and if an iallocator is used.
4050 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
4053 def _DeclareLocksForMigration(lu, level):
4054 """Declares locks for L{TLMigrateInstance}.
4056 @type lu: L{LogicalUnit}
4057 @param level: Lock level
4060 if level == locking.LEVEL_NODE_ALLOC:
4061 assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4063 instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
4065 # Node locks are already declared here rather than at LEVEL_NODE as we need
4066 # the instance object anyway to declare the node allocation lock.
4067 if instance.disk_template in constants.DTS_EXT_MIRROR:
4068 if lu.op.target_node is None:
4069 lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4070 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
4072 lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
4074 del lu.recalculate_locks[locking.LEVEL_NODE]
4076 lu._LockInstancesNodes() # pylint: disable=W0212
4078 elif level == locking.LEVEL_NODE:
4079 # Node locks are declared together with the node allocation lock
4080 assert (lu.needed_locks[locking.LEVEL_NODE] or
4081 lu.needed_locks[locking.LEVEL_NODE] is locking.ALL_SET)
4083 elif level == locking.LEVEL_NODE_RES:
4085 lu.needed_locks[locking.LEVEL_NODE_RES] = \
4086 _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
4089 class LUInstanceFailover(LogicalUnit):
4090 """Failover an instance.
4093 HPATH = "instance-failover"
4094 HTYPE = constants.HTYPE_INSTANCE
4097 def CheckArguments(self):
4098 """Check the arguments.
4101 self.iallocator = getattr(self.op, "iallocator", None)
4102 self.target_node = getattr(self.op, "target_node", None)
4104 def ExpandNames(self):
4105 self._ExpandAndLockInstance()
4106 _ExpandNamesForMigration(self)
4109 TLMigrateInstance(self, self.op.instance_name, False, True, False,
4110 self.op.ignore_consistency, True,
4111 self.op.shutdown_timeout, self.op.ignore_ipolicy)
4113 self.tasklets = [self._migrater]
4115 def DeclareLocks(self, level):
4116 _DeclareLocksForMigration(self, level)
4118 def BuildHooksEnv(self):
4121 This runs on master, primary and secondary nodes of the instance.
4124 instance = self._migrater.instance
4125 source_node = instance.primary_node
4126 target_node = self.op.target_node
4128 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4129 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4130 "OLD_PRIMARY": source_node,
4131 "NEW_PRIMARY": target_node,
4134 if instance.disk_template in constants.DTS_INT_MIRROR:
4135 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
4136 env["NEW_SECONDARY"] = source_node
4138 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
4140 env.update(_BuildInstanceHookEnvByObject(self, instance))
4144 def BuildHooksNodes(self):
4145 """Build hooks nodes.
4148 instance = self._migrater.instance
4149 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4150 return (nl, nl + [instance.primary_node])
4153 class LUInstanceMigrate(LogicalUnit):
4154 """Migrate an instance.
4156 This is migration without shutting down, compared to the failover,
4157 which is done with shutdown.
4160 HPATH = "instance-migrate"
4161 HTYPE = constants.HTYPE_INSTANCE
4164 def ExpandNames(self):
4165 self._ExpandAndLockInstance()
4166 _ExpandNamesForMigration(self)
4169 TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
4170 False, self.op.allow_failover, False,
4171 self.op.allow_runtime_changes,
4172 constants.DEFAULT_SHUTDOWN_TIMEOUT,
4173 self.op.ignore_ipolicy)
4175 self.tasklets = [self._migrater]
4177 def DeclareLocks(self, level):
4178 _DeclareLocksForMigration(self, level)
4180 def BuildHooksEnv(self):
4183 This runs on master, primary and secondary nodes of the instance.
4186 instance = self._migrater.instance
4187 source_node = instance.primary_node
4188 target_node = self.op.target_node
4189 env = _BuildInstanceHookEnvByObject(self, instance)
4191 "MIGRATE_LIVE": self._migrater.live,
4192 "MIGRATE_CLEANUP": self.op.cleanup,
4193 "OLD_PRIMARY": source_node,
4194 "NEW_PRIMARY": target_node,
4195 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
4198 if instance.disk_template in constants.DTS_INT_MIRROR:
4199 env["OLD_SECONDARY"] = target_node
4200 env["NEW_SECONDARY"] = source_node
4202 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
4206 def BuildHooksNodes(self):
4207 """Build hooks nodes.
4210 instance = self._migrater.instance
4211 snodes = list(instance.secondary_nodes)
4212 nl = [self.cfg.GetMasterNode(), instance.primary_node] + snodes
4216 class LUInstanceMove(LogicalUnit):
4217 """Move an instance by data-copying.
4220 HPATH = "instance-move"
4221 HTYPE = constants.HTYPE_INSTANCE
4224 def ExpandNames(self):
4225 self._ExpandAndLockInstance()
4226 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
4227 self.op.target_node = target_node
4228 self.needed_locks[locking.LEVEL_NODE] = [target_node]
4229 self.needed_locks[locking.LEVEL_NODE_RES] = []
4230 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4232 def DeclareLocks(self, level):
4233 if level == locking.LEVEL_NODE:
4234 self._LockInstancesNodes(primary_only=True)
4235 elif level == locking.LEVEL_NODE_RES:
4237 self.needed_locks[locking.LEVEL_NODE_RES] = \
4238 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
4240 def BuildHooksEnv(self):
4243 This runs on master, primary and secondary nodes of the instance.
4247 "TARGET_NODE": self.op.target_node,
4248 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4250 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4253 def BuildHooksNodes(self):
4254 """Build hooks nodes.
4258 self.cfg.GetMasterNode(),
4259 self.instance.primary_node,
4260 self.op.target_node,
4264 def CheckPrereq(self):
4265 """Check prerequisites.
4267 This checks that the instance is in the cluster.
4270 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4271 assert self.instance is not None, \
4272 "Cannot retrieve locked instance %s" % self.op.instance_name
4274 if instance.disk_template not in constants.DTS_COPYABLE:
4275 raise errors.OpPrereqError("Disk template %s not suitable for copying" %
4276 instance.disk_template, errors.ECODE_STATE)
4278 node = self.cfg.GetNodeInfo(self.op.target_node)
4279 assert node is not None, \
4280 "Cannot retrieve locked node %s" % self.op.target_node
4282 self.target_node = target_node = node.name
4284 if target_node == instance.primary_node:
4285 raise errors.OpPrereqError("Instance %s is already on the node %s" %
4286 (instance.name, target_node),
4289 bep = self.cfg.GetClusterInfo().FillBE(instance)
4291 for idx, dsk in enumerate(instance.disks):
4292 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
4293 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
4294 " cannot copy" % idx, errors.ECODE_STATE)
4296 _CheckNodeOnline(self, target_node)
4297 _CheckNodeNotDrained(self, target_node)
4298 _CheckNodeVmCapable(self, target_node)
4299 cluster = self.cfg.GetClusterInfo()
4300 group_info = self.cfg.GetNodeGroup(node.group)
4301 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
4302 _CheckTargetNodeIPolicy(self, ipolicy, instance, node, self.cfg,
4303 ignore=self.op.ignore_ipolicy)
4305 if instance.admin_state == constants.ADMINST_UP:
4306 # check memory requirements on the secondary node
4307 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4308 instance.name, bep[constants.BE_MAXMEM],
4309 instance.hypervisor)
4311 self.LogInfo("Not checking memory on the secondary node as"
4312 " instance will not be started")
4314 # check bridge existance
4315 _CheckInstanceBridgesExist(self, instance, node=target_node)
4317 def Exec(self, feedback_fn):
4318 """Move an instance.
4320 The move is done by shutting it down on its present node, copying
4321 the data over (slow) and starting it on the new node.
4324 instance = self.instance
4326 source_node = instance.primary_node
4327 target_node = self.target_node
4329 self.LogInfo("Shutting down instance %s on source node %s",
4330 instance.name, source_node)
4332 assert (self.owned_locks(locking.LEVEL_NODE) ==
4333 self.owned_locks(locking.LEVEL_NODE_RES))
4335 result = self.rpc.call_instance_shutdown(source_node, instance,
4336 self.op.shutdown_timeout,
4338 msg = result.fail_msg
4340 if self.op.ignore_consistency:
4341 self.LogWarning("Could not shutdown instance %s on node %s."
4342 " Proceeding anyway. Please make sure node"
4343 " %s is down. Error details: %s",
4344 instance.name, source_node, source_node, msg)
4346 raise errors.OpExecError("Could not shutdown instance %s on"
4348 (instance.name, source_node, msg))
4350 # create the target disks
4352 _CreateDisks(self, instance, target_node=target_node)
4353 except errors.OpExecError:
4354 self.LogWarning("Device creation failed")
4355 self.cfg.ReleaseDRBDMinors(instance.name)
4358 cluster_name = self.cfg.GetClusterInfo().cluster_name
4361 # activate, get path, copy the data over
4362 for idx, disk in enumerate(instance.disks):
4363 self.LogInfo("Copying data for disk %d", idx)
4364 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
4365 instance.name, True, idx)
4367 self.LogWarning("Can't assemble newly created disk %d: %s",
4368 idx, result.fail_msg)
4369 errs.append(result.fail_msg)
4371 dev_path = result.payload
4372 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
4373 target_node, dev_path,
4376 self.LogWarning("Can't copy data over for disk %d: %s",
4377 idx, result.fail_msg)
4378 errs.append(result.fail_msg)
4382 self.LogWarning("Some disks failed to copy, aborting")
4384 _RemoveDisks(self, instance, target_node=target_node)
4386 self.cfg.ReleaseDRBDMinors(instance.name)
4387 raise errors.OpExecError("Errors during disk copy: %s" %
4390 instance.primary_node = target_node
4391 self.cfg.Update(instance, feedback_fn)
4393 self.LogInfo("Removing the disks on the original node")
4394 _RemoveDisks(self, instance, target_node=source_node)
4396 # Only start the instance if it's marked as up
4397 if instance.admin_state == constants.ADMINST_UP:
4398 self.LogInfo("Starting instance %s on node %s",
4399 instance.name, target_node)
4401 disks_ok, _ = _AssembleInstanceDisks(self, instance,
4402 ignore_secondaries=True)
4404 _ShutdownInstanceDisks(self, instance)
4405 raise errors.OpExecError("Can't activate the instance's disks")
4407 result = self.rpc.call_instance_start(target_node,
4408 (instance, None, None), False,
4410 msg = result.fail_msg
4412 _ShutdownInstanceDisks(self, instance)
4413 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4414 (instance.name, target_node, msg))
4417 class LUNodeMigrate(LogicalUnit):
4418 """Migrate all instances from a node.
4421 HPATH = "node-migrate"
4422 HTYPE = constants.HTYPE_NODE
4425 def CheckArguments(self):
4428 def ExpandNames(self):
4429 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4431 self.share_locks = _ShareAll()
4432 self.needed_locks = {
4433 locking.LEVEL_NODE: [self.op.node_name],
4436 def BuildHooksEnv(self):
4439 This runs on the master, the primary and all the secondaries.
4443 "NODE_NAME": self.op.node_name,
4444 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
4447 def BuildHooksNodes(self):
4448 """Build hooks nodes.
4451 nl = [self.cfg.GetMasterNode()]
4454 def CheckPrereq(self):
4457 def Exec(self, feedback_fn):
4458 # Prepare jobs for migration instances
4459 allow_runtime_changes = self.op.allow_runtime_changes
4461 [opcodes.OpInstanceMigrate(instance_name=inst.name,
4464 iallocator=self.op.iallocator,
4465 target_node=self.op.target_node,
4466 allow_runtime_changes=allow_runtime_changes,
4467 ignore_ipolicy=self.op.ignore_ipolicy)]
4468 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
4470 # TODO: Run iallocator in this opcode and pass correct placement options to
4471 # OpInstanceMigrate. Since other jobs can modify the cluster between
4472 # running the iallocator and the actual migration, a good consistency model
4473 # will have to be found.
4475 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
4476 frozenset([self.op.node_name]))
4478 return ResultWithJobs(jobs)
4481 class TLMigrateInstance(Tasklet):
4482 """Tasklet class for instance migration.
4485 @ivar live: whether the migration will be done live or non-live;
4486 this variable is initalized only after CheckPrereq has run
4487 @type cleanup: boolean
4488 @ivar cleanup: Wheater we cleanup from a failed migration
4489 @type iallocator: string
4490 @ivar iallocator: The iallocator used to determine target_node
4491 @type target_node: string
4492 @ivar target_node: If given, the target_node to reallocate the instance to
4493 @type failover: boolean
4494 @ivar failover: Whether operation results in failover or migration
4495 @type fallback: boolean
4496 @ivar fallback: Whether fallback to failover is allowed if migration not
4498 @type ignore_consistency: boolean
4499 @ivar ignore_consistency: Wheter we should ignore consistency between source
4501 @type shutdown_timeout: int
4502 @ivar shutdown_timeout: In case of failover timeout of the shutdown
4503 @type ignore_ipolicy: bool
4504 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
4509 _MIGRATION_POLL_INTERVAL = 1 # seconds
4510 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
4512 def __init__(self, lu, instance_name, cleanup, failover, fallback,
4513 ignore_consistency, allow_runtime_changes, shutdown_timeout,
4515 """Initializes this class.
4518 Tasklet.__init__(self, lu)
4521 self.instance_name = instance_name
4522 self.cleanup = cleanup
4523 self.live = False # will be overridden later
4524 self.failover = failover
4525 self.fallback = fallback
4526 self.ignore_consistency = ignore_consistency
4527 self.shutdown_timeout = shutdown_timeout
4528 self.ignore_ipolicy = ignore_ipolicy
4529 self.allow_runtime_changes = allow_runtime_changes
4531 def CheckPrereq(self):
4532 """Check prerequisites.
4534 This checks that the instance is in the cluster.
4537 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
4538 instance = self.cfg.GetInstanceInfo(instance_name)
4539 assert instance is not None
4540 self.instance = instance
4541 cluster = self.cfg.GetClusterInfo()
4543 if (not self.cleanup and
4544 not instance.admin_state == constants.ADMINST_UP and
4545 not self.failover and self.fallback):
4546 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
4547 " switching to failover")
4548 self.failover = True
4550 if instance.disk_template not in constants.DTS_MIRRORED:
4555 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
4556 " %s" % (instance.disk_template, text),
4559 if instance.disk_template in constants.DTS_EXT_MIRROR:
4560 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
4562 if self.lu.op.iallocator:
4563 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
4564 self._RunAllocator()
4566 # We set set self.target_node as it is required by
4568 self.target_node = self.lu.op.target_node
4570 # Check that the target node is correct in terms of instance policy
4571 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
4572 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
4573 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
4575 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo, self.cfg,
4576 ignore=self.ignore_ipolicy)
4578 # self.target_node is already populated, either directly or by the
4580 target_node = self.target_node
4581 if self.target_node == instance.primary_node:
4582 raise errors.OpPrereqError("Cannot migrate instance %s"
4583 " to its primary (%s)" %
4584 (instance.name, instance.primary_node),
4587 if len(self.lu.tasklets) == 1:
4588 # It is safe to release locks only when we're the only tasklet
4590 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
4591 keep=[instance.primary_node, self.target_node])
4592 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
4595 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
4597 secondary_nodes = instance.secondary_nodes
4598 if not secondary_nodes:
4599 raise errors.ConfigurationError("No secondary node but using"
4600 " %s disk template" %
4601 instance.disk_template)
4602 target_node = secondary_nodes[0]
4603 if self.lu.op.iallocator or (self.lu.op.target_node and
4604 self.lu.op.target_node != target_node):
4606 text = "failed over"
4609 raise errors.OpPrereqError("Instances with disk template %s cannot"
4610 " be %s to arbitrary nodes"
4611 " (neither an iallocator nor a target"
4612 " node can be passed)" %
4613 (instance.disk_template, text),
4615 nodeinfo = self.cfg.GetNodeInfo(target_node)
4616 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
4617 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
4619 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo, self.cfg,
4620 ignore=self.ignore_ipolicy)
4622 i_be = cluster.FillBE(instance)
4624 # check memory requirements on the secondary node
4625 if (not self.cleanup and
4626 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
4627 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
4628 "migrating instance %s" %
4630 i_be[constants.BE_MINMEM],
4631 instance.hypervisor)
4633 self.lu.LogInfo("Not checking memory on the secondary node as"
4634 " instance will not be started")
4636 # check if failover must be forced instead of migration
4637 if (not self.cleanup and not self.failover and
4638 i_be[constants.BE_ALWAYS_FAILOVER]):
4639 self.lu.LogInfo("Instance configured to always failover; fallback"
4641 self.failover = True
4643 # check bridge existance
4644 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
4646 if not self.cleanup:
4647 _CheckNodeNotDrained(self.lu, target_node)
4648 if not self.failover:
4649 result = self.rpc.call_instance_migratable(instance.primary_node,
4651 if result.fail_msg and self.fallback:
4652 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
4654 self.failover = True
4656 result.Raise("Can't migrate, please use failover",
4657 prereq=True, ecode=errors.ECODE_STATE)
4659 assert not (self.failover and self.cleanup)
4661 if not self.failover:
4662 if self.lu.op.live is not None and self.lu.op.mode is not None:
4663 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
4664 " parameters are accepted",
4666 if self.lu.op.live is not None:
4668 self.lu.op.mode = constants.HT_MIGRATION_LIVE
4670 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
4671 # reset the 'live' parameter to None so that repeated
4672 # invocations of CheckPrereq do not raise an exception
4673 self.lu.op.live = None
4674 elif self.lu.op.mode is None:
4675 # read the default value from the hypervisor
4676 i_hv = cluster.FillHV(self.instance, skip_globals=False)
4677 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
4679 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
4681 # Failover is never live
4684 if not (self.failover or self.cleanup):
4685 remote_info = self.rpc.call_instance_info(instance.primary_node,
4687 instance.hypervisor)
4688 remote_info.Raise("Error checking instance on node %s" %
4689 instance.primary_node)
4690 instance_running = bool(remote_info.payload)
4691 if instance_running:
4692 self.current_mem = int(remote_info.payload["memory"])
4694 def _RunAllocator(self):
4695 """Run the allocator based on input opcode.
4698 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
4700 # FIXME: add a self.ignore_ipolicy option
4701 req = iallocator.IAReqRelocate(name=self.instance_name,
4702 relocate_from=[self.instance.primary_node])
4703 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
4705 ial.Run(self.lu.op.iallocator)
4708 raise errors.OpPrereqError("Can't compute nodes using"
4709 " iallocator '%s': %s" %
4710 (self.lu.op.iallocator, ial.info),
4712 self.target_node = ial.result[0]
4713 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
4714 self.instance_name, self.lu.op.iallocator,
4715 utils.CommaJoin(ial.result))
4717 def _WaitUntilSync(self):
4718 """Poll with custom rpc for disk sync.
4720 This uses our own step-based rpc call.
4723 self.feedback_fn("* wait until resync is done")
4727 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
4729 (self.instance.disks,
4732 for node, nres in result.items():
4733 nres.Raise("Cannot resync disks on node %s" % node)
4734 node_done, node_percent = nres.payload
4735 all_done = all_done and node_done
4736 if node_percent is not None:
4737 min_percent = min(min_percent, node_percent)
4739 if min_percent < 100:
4740 self.feedback_fn(" - progress: %.1f%%" % min_percent)
4743 def _EnsureSecondary(self, node):
4744 """Demote a node to secondary.
4747 self.feedback_fn("* switching node %s to secondary mode" % node)
4749 for dev in self.instance.disks:
4750 self.cfg.SetDiskID(dev, node)
4752 result = self.rpc.call_blockdev_close(node, self.instance.name,
4753 self.instance.disks)
4754 result.Raise("Cannot change disk to secondary on node %s" % node)
4756 def _GoStandalone(self):
4757 """Disconnect from the network.
4760 self.feedback_fn("* changing into standalone mode")
4761 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
4762 self.instance.disks)
4763 for node, nres in result.items():
4764 nres.Raise("Cannot disconnect disks node %s" % node)
4766 def _GoReconnect(self, multimaster):
4767 """Reconnect to the network.
4773 msg = "single-master"
4774 self.feedback_fn("* changing disks into %s mode" % msg)
4775 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
4776 (self.instance.disks, self.instance),
4777 self.instance.name, multimaster)
4778 for node, nres in result.items():
4779 nres.Raise("Cannot change disks config on node %s" % node)
4781 def _ExecCleanup(self):
4782 """Try to cleanup after a failed migration.
4784 The cleanup is done by:
4785 - check that the instance is running only on one node
4786 (and update the config if needed)
4787 - change disks on its secondary node to secondary
4788 - wait until disks are fully synchronized
4789 - disconnect from the network
4790 - change disks into single-master mode
4791 - wait again until disks are fully synchronized
4794 instance = self.instance
4795 target_node = self.target_node
4796 source_node = self.source_node
4798 # check running on only one node
4799 self.feedback_fn("* checking where the instance actually runs"
4800 " (if this hangs, the hypervisor might be in"
4802 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
4803 for node, result in ins_l.items():
4804 result.Raise("Can't contact node %s" % node)
4806 runningon_source = instance.name in ins_l[source_node].payload
4807 runningon_target = instance.name in ins_l[target_node].payload
4809 if runningon_source and runningon_target:
4810 raise errors.OpExecError("Instance seems to be running on two nodes,"
4811 " or the hypervisor is confused; you will have"
4812 " to ensure manually that it runs only on one"
4813 " and restart this operation")
4815 if not (runningon_source or runningon_target):
4816 raise errors.OpExecError("Instance does not seem to be running at all;"
4817 " in this case it's safer to repair by"
4818 " running 'gnt-instance stop' to ensure disk"
4819 " shutdown, and then restarting it")
4821 if runningon_target:
4822 # the migration has actually succeeded, we need to update the config
4823 self.feedback_fn("* instance running on secondary node (%s),"
4824 " updating config" % target_node)
4825 instance.primary_node = target_node
4826 self.cfg.Update(instance, self.feedback_fn)
4827 demoted_node = source_node
4829 self.feedback_fn("* instance confirmed to be running on its"
4830 " primary node (%s)" % source_node)
4831 demoted_node = target_node
4833 if instance.disk_template in constants.DTS_INT_MIRROR:
4834 self._EnsureSecondary(demoted_node)
4836 self._WaitUntilSync()
4837 except errors.OpExecError:
4838 # we ignore here errors, since if the device is standalone, it
4839 # won't be able to sync
4841 self._GoStandalone()
4842 self._GoReconnect(False)
4843 self._WaitUntilSync()
4845 self.feedback_fn("* done")
4847 def _RevertDiskStatus(self):
4848 """Try to revert the disk status after a failed migration.
4851 target_node = self.target_node
4852 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
4856 self._EnsureSecondary(target_node)
4857 self._GoStandalone()
4858 self._GoReconnect(False)
4859 self._WaitUntilSync()
4860 except errors.OpExecError, err:
4861 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
4862 " please try to recover the instance manually;"
4863 " error '%s'" % str(err))
4865 def _AbortMigration(self):
4866 """Call the hypervisor code to abort a started migration.
4869 instance = self.instance
4870 target_node = self.target_node
4871 source_node = self.source_node
4872 migration_info = self.migration_info
4874 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
4878 abort_msg = abort_result.fail_msg
4880 logging.error("Aborting migration failed on target node %s: %s",
4881 target_node, abort_msg)
4882 # Don't raise an exception here, as we stil have to try to revert the
4883 # disk status, even if this step failed.
4885 abort_result = self.rpc.call_instance_finalize_migration_src(
4886 source_node, instance, False, self.live)
4887 abort_msg = abort_result.fail_msg
4889 logging.error("Aborting migration failed on source node %s: %s",
4890 source_node, abort_msg)
4892 def _ExecMigration(self):
4893 """Migrate an instance.
4895 The migrate is done by:
4896 - change the disks into dual-master mode
4897 - wait until disks are fully synchronized again
4898 - migrate the instance
4899 - change disks on the new secondary node (the old primary) to secondary
4900 - wait until disks are fully synchronized
4901 - change disks into single-master mode
4904 instance = self.instance
4905 target_node = self.target_node
4906 source_node = self.source_node
4908 # Check for hypervisor version mismatch and warn the user.
4909 nodeinfo = self.rpc.call_node_info([source_node, target_node],
4910 None, [self.instance.hypervisor], False)
4911 for ninfo in nodeinfo.values():
4912 ninfo.Raise("Unable to retrieve node information from node '%s'" %
4914 (_, _, (src_info, )) = nodeinfo[source_node].payload
4915 (_, _, (dst_info, )) = nodeinfo[target_node].payload
4917 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
4918 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
4919 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
4920 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
4921 if src_version != dst_version:
4922 self.feedback_fn("* warning: hypervisor version mismatch between"
4923 " source (%s) and target (%s) node" %
4924 (src_version, dst_version))
4926 self.feedback_fn("* checking disk consistency between source and target")
4927 for (idx, dev) in enumerate(instance.disks):
4928 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
4929 raise errors.OpExecError("Disk %s is degraded or not fully"
4930 " synchronized on target node,"
4931 " aborting migration" % idx)
4933 if self.current_mem > self.tgt_free_mem:
4934 if not self.allow_runtime_changes:
4935 raise errors.OpExecError("Memory ballooning not allowed and not enough"
4936 " free memory to fit instance %s on target"
4937 " node %s (have %dMB, need %dMB)" %
4938 (instance.name, target_node,
4939 self.tgt_free_mem, self.current_mem))
4940 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
4941 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
4944 rpcres.Raise("Cannot modify instance runtime memory")
4946 # First get the migration information from the remote node
4947 result = self.rpc.call_migration_info(source_node, instance)
4948 msg = result.fail_msg
4950 log_err = ("Failed fetching source migration information from %s: %s" %
4952 logging.error(log_err)
4953 raise errors.OpExecError(log_err)
4955 self.migration_info = migration_info = result.payload
4957 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
4958 # Then switch the disks to master/master mode
4959 self._EnsureSecondary(target_node)
4960 self._GoStandalone()
4961 self._GoReconnect(True)
4962 self._WaitUntilSync()
4964 self.feedback_fn("* preparing %s to accept the instance" % target_node)
4965 result = self.rpc.call_accept_instance(target_node,
4968 self.nodes_ip[target_node])
4970 msg = result.fail_msg
4972 logging.error("Instance pre-migration failed, trying to revert"
4973 " disk status: %s", msg)
4974 self.feedback_fn("Pre-migration failed, aborting")
4975 self._AbortMigration()
4976 self._RevertDiskStatus()
4977 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
4978 (instance.name, msg))
4980 self.feedback_fn("* migrating instance to %s" % target_node)
4981 result = self.rpc.call_instance_migrate(source_node, instance,
4982 self.nodes_ip[target_node],
4984 msg = result.fail_msg
4986 logging.error("Instance migration failed, trying to revert"
4987 " disk status: %s", msg)
4988 self.feedback_fn("Migration failed, aborting")
4989 self._AbortMigration()
4990 self._RevertDiskStatus()
4991 raise errors.OpExecError("Could not migrate instance %s: %s" %
4992 (instance.name, msg))
4994 self.feedback_fn("* starting memory transfer")
4995 last_feedback = time.time()
4997 result = self.rpc.call_instance_get_migration_status(source_node,
4999 msg = result.fail_msg
5000 ms = result.payload # MigrationStatus instance
5001 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
5002 logging.error("Instance migration failed, trying to revert"
5003 " disk status: %s", msg)
5004 self.feedback_fn("Migration failed, aborting")
5005 self._AbortMigration()
5006 self._RevertDiskStatus()
5008 msg = "hypervisor returned failure"
5009 raise errors.OpExecError("Could not migrate instance %s: %s" %
5010 (instance.name, msg))
5012 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
5013 self.feedback_fn("* memory transfer complete")
5016 if (utils.TimeoutExpired(last_feedback,
5017 self._MIGRATION_FEEDBACK_INTERVAL) and
5018 ms.transferred_ram is not None):
5019 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
5020 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
5021 last_feedback = time.time()
5023 time.sleep(self._MIGRATION_POLL_INTERVAL)
5025 result = self.rpc.call_instance_finalize_migration_src(source_node,
5029 msg = result.fail_msg
5031 logging.error("Instance migration succeeded, but finalization failed"
5032 " on the source node: %s", msg)
5033 raise errors.OpExecError("Could not finalize instance migration: %s" %
5036 instance.primary_node = target_node
5038 # distribute new instance config to the other nodes
5039 self.cfg.Update(instance, self.feedback_fn)
5041 result = self.rpc.call_instance_finalize_migration_dst(target_node,
5045 msg = result.fail_msg
5047 logging.error("Instance migration succeeded, but finalization failed"
5048 " on the target node: %s", msg)
5049 raise errors.OpExecError("Could not finalize instance migration: %s" %
5052 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
5053 self._EnsureSecondary(source_node)
5054 self._WaitUntilSync()
5055 self._GoStandalone()
5056 self._GoReconnect(False)
5057 self._WaitUntilSync()
5059 # If the instance's disk template is `rbd' or `ext' and there was a
5060 # successful migration, unmap the device from the source node.
5061 if self.instance.disk_template in (constants.DT_RBD, constants.DT_EXT):
5062 disks = _ExpandCheckDisks(instance, instance.disks)
5063 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
5065 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
5066 msg = result.fail_msg
5068 logging.error("Migration was successful, but couldn't unmap the"
5069 " block device %s on source node %s: %s",
5070 disk.iv_name, source_node, msg)
5071 logging.error("You need to unmap the device %s manually on %s",
5072 disk.iv_name, source_node)
5074 self.feedback_fn("* done")
5076 def _ExecFailover(self):
5077 """Failover an instance.
5079 The failover is done by shutting it down on its present node and
5080 starting it on the secondary.
5083 instance = self.instance
5084 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5086 source_node = instance.primary_node
5087 target_node = self.target_node
5089 if instance.admin_state == constants.ADMINST_UP:
5090 self.feedback_fn("* checking disk consistency between source and target")
5091 for (idx, dev) in enumerate(instance.disks):
5092 # for drbd, these are drbd over lvm
5093 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
5095 if primary_node.offline:
5096 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
5098 (primary_node.name, idx, target_node))
5099 elif not self.ignore_consistency:
5100 raise errors.OpExecError("Disk %s is degraded on target node,"
5101 " aborting failover" % idx)
5103 self.feedback_fn("* not checking disk consistency as instance is not"
5106 self.feedback_fn("* shutting down instance on source node")
5107 logging.info("Shutting down instance %s on node %s",
5108 instance.name, source_node)
5110 result = self.rpc.call_instance_shutdown(source_node, instance,
5111 self.shutdown_timeout,
5113 msg = result.fail_msg
5115 if self.ignore_consistency or primary_node.offline:
5116 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
5117 " proceeding anyway; please make sure node"
5118 " %s is down; error details: %s",
5119 instance.name, source_node, source_node, msg)
5121 raise errors.OpExecError("Could not shutdown instance %s on"
5123 (instance.name, source_node, msg))
5125 self.feedback_fn("* deactivating the instance's disks on source node")
5126 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
5127 raise errors.OpExecError("Can't shut down the instance's disks")
5129 instance.primary_node = target_node
5130 # distribute new instance config to the other nodes
5131 self.cfg.Update(instance, self.feedback_fn)
5133 # Only start the instance if it's marked as up
5134 if instance.admin_state == constants.ADMINST_UP:
5135 self.feedback_fn("* activating the instance's disks on target node %s" %
5137 logging.info("Starting instance %s on node %s",
5138 instance.name, target_node)
5140 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
5141 ignore_secondaries=True)
5143 _ShutdownInstanceDisks(self.lu, instance)
5144 raise errors.OpExecError("Can't activate the instance's disks")
5146 self.feedback_fn("* starting the instance on the target node %s" %
5148 result = self.rpc.call_instance_start(target_node, (instance, None, None),
5149 False, self.lu.op.reason)
5150 msg = result.fail_msg
5152 _ShutdownInstanceDisks(self.lu, instance)
5153 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5154 (instance.name, target_node, msg))
5156 def Exec(self, feedback_fn):
5157 """Perform the migration.
5160 self.feedback_fn = feedback_fn
5161 self.source_node = self.instance.primary_node
5163 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
5164 if self.instance.disk_template in constants.DTS_INT_MIRROR:
5165 self.target_node = self.instance.secondary_nodes[0]
5166 # Otherwise self.target_node has been populated either
5167 # directly, or through an iallocator.
5169 self.all_nodes = [self.source_node, self.target_node]
5170 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
5171 in self.cfg.GetMultiNodeInfo(self.all_nodes))
5174 feedback_fn("Failover instance %s" % self.instance.name)
5175 self._ExecFailover()
5177 feedback_fn("Migrating instance %s" % self.instance.name)
5180 return self._ExecCleanup()
5182 return self._ExecMigration()
5185 def _CreateBlockDev(lu, node, instance, device, force_create, info,
5187 """Wrapper around L{_CreateBlockDevInner}.
5189 This method annotates the root device first.
5192 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
5193 excl_stor = _IsExclusiveStorageEnabledNodeName(lu.cfg, node)
5194 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
5195 force_open, excl_stor)
5198 def _CreateBlockDevInner(lu, node, instance, device, force_create,
5199 info, force_open, excl_stor):
5200 """Create a tree of block devices on a given node.
5202 If this device type has to be created on secondaries, create it and
5205 If not, just recurse to children keeping the same 'force' value.
5207 @attention: The device has to be annotated already.
5209 @param lu: the lu on whose behalf we execute
5210 @param node: the node on which to create the device
5211 @type instance: L{objects.Instance}
5212 @param instance: the instance which owns the device
5213 @type device: L{objects.Disk}
5214 @param device: the device to create
5215 @type force_create: boolean
5216 @param force_create: whether to force creation of this device; this
5217 will be change to True whenever we find a device which has
5218 CreateOnSecondary() attribute
5219 @param info: the extra 'metadata' we should attach to the device
5220 (this will be represented as a LVM tag)
5221 @type force_open: boolean
5222 @param force_open: this parameter will be passes to the
5223 L{backend.BlockdevCreate} function where it specifies
5224 whether we run on primary or not, and it affects both
5225 the child assembly and the device own Open() execution
5226 @type excl_stor: boolean
5227 @param excl_stor: Whether exclusive_storage is active for the node
5229 @return: list of created devices
5231 created_devices = []
5233 if device.CreateOnSecondary():
5237 for child in device.children:
5238 devs = _CreateBlockDevInner(lu, node, instance, child, force_create,
5239 info, force_open, excl_stor)
5240 created_devices.extend(devs)
5242 if not force_create:
5243 return created_devices
5245 _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
5247 # The device has been completely created, so there is no point in keeping
5248 # its subdevices in the list. We just add the device itself instead.
5249 created_devices = [(node, device)]
5250 return created_devices
5252 except errors.DeviceCreationError, e:
5253 e.created_devices.extend(created_devices)
5255 except errors.OpExecError, e:
5256 raise errors.DeviceCreationError(str(e), created_devices)
5259 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
5261 """Create a single block device on a given node.
5263 This will not recurse over children of the device, so they must be
5266 @param lu: the lu on whose behalf we execute
5267 @param node: the node on which to create the device
5268 @type instance: L{objects.Instance}
5269 @param instance: the instance which owns the device
5270 @type device: L{objects.Disk}
5271 @param device: the device to create
5272 @param info: the extra 'metadata' we should attach to the device
5273 (this will be represented as a LVM tag)
5274 @type force_open: boolean
5275 @param force_open: this parameter will be passes to the
5276 L{backend.BlockdevCreate} function where it specifies
5277 whether we run on primary or not, and it affects both
5278 the child assembly and the device own Open() execution
5279 @type excl_stor: boolean
5280 @param excl_stor: Whether exclusive_storage is active for the node
5283 lu.cfg.SetDiskID(device, node)
5284 result = lu.rpc.call_blockdev_create(node, device, device.size,
5285 instance.name, force_open, info,
5287 result.Raise("Can't create block device %s on"
5288 " node %s for instance %s" % (device, node, instance.name))
5289 if device.physical_id is None:
5290 device.physical_id = result.payload
5293 def _GenerateUniqueNames(lu, exts):
5294 """Generate a suitable LV name.
5296 This will generate a logical volume name for the given instance.
5301 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5302 results.append("%s%s" % (new_id, val))
5306 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
5307 iv_name, p_minor, s_minor):
5308 """Generate a drbd8 device complete with its children.
5311 assert len(vgnames) == len(names) == 2
5312 port = lu.cfg.AllocatePort()
5313 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5315 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5316 logical_id=(vgnames[0], names[0]),
5318 dev_data.uuid = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5319 dev_meta = objects.Disk(dev_type=constants.LD_LV,
5320 size=constants.DRBD_META_SIZE,
5321 logical_id=(vgnames[1], names[1]),
5323 dev_meta.uuid = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5324 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5325 logical_id=(primary, secondary, port,
5328 children=[dev_data, dev_meta],
5329 iv_name=iv_name, params={})
5330 drbd_dev.uuid = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5334 _DISK_TEMPLATE_NAME_PREFIX = {
5335 constants.DT_PLAIN: "",
5336 constants.DT_RBD: ".rbd",
5337 constants.DT_EXT: ".ext",
5341 _DISK_TEMPLATE_DEVICE_TYPE = {
5342 constants.DT_PLAIN: constants.LD_LV,
5343 constants.DT_FILE: constants.LD_FILE,
5344 constants.DT_SHARED_FILE: constants.LD_FILE,
5345 constants.DT_BLOCK: constants.LD_BLOCKDEV,
5346 constants.DT_RBD: constants.LD_RBD,
5347 constants.DT_EXT: constants.LD_EXT,
5351 def _GenerateDiskTemplate(
5352 lu, template_name, instance_name, primary_node, secondary_nodes,
5353 disk_info, file_storage_dir, file_driver, base_index,
5354 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
5355 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
5356 """Generate the entire disk layout for a given template type.
5359 vgname = lu.cfg.GetVGName()
5360 disk_count = len(disk_info)
5363 if template_name == constants.DT_DISKLESS:
5365 elif template_name == constants.DT_DRBD8:
5366 if len(secondary_nodes) != 1:
5367 raise errors.ProgrammerError("Wrong template configuration")
5368 remote_node = secondary_nodes[0]
5369 minors = lu.cfg.AllocateDRBDMinor(
5370 [primary_node, remote_node] * len(disk_info), instance_name)
5372 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
5374 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
5377 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5378 for i in range(disk_count)]):
5379 names.append(lv_prefix + "_data")
5380 names.append(lv_prefix + "_meta")
5381 for idx, disk in enumerate(disk_info):
5382 disk_index = idx + base_index
5383 data_vg = disk.get(constants.IDISK_VG, vgname)
5384 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
5385 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5386 disk[constants.IDISK_SIZE],
5388 names[idx * 2:idx * 2 + 2],
5389 "disk/%d" % disk_index,
5390 minors[idx * 2], minors[idx * 2 + 1])
5391 disk_dev.mode = disk[constants.IDISK_MODE]
5392 disk_dev.name = disk.get(constants.IDISK_NAME, None)
5393 disks.append(disk_dev)
5396 raise errors.ProgrammerError("Wrong template configuration")
5398 if template_name == constants.DT_FILE:
5400 elif template_name == constants.DT_SHARED_FILE:
5401 _req_shr_file_storage()
5403 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
5404 if name_prefix is None:
5407 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
5408 (name_prefix, base_index + i)
5409 for i in range(disk_count)])
5411 if template_name == constants.DT_PLAIN:
5413 def logical_id_fn(idx, _, disk):
5414 vg = disk.get(constants.IDISK_VG, vgname)
5415 return (vg, names[idx])
5417 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
5419 lambda _, disk_index, disk: (file_driver,
5420 "%s/disk%d" % (file_storage_dir,
5422 elif template_name == constants.DT_BLOCK:
5424 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
5425 disk[constants.IDISK_ADOPT])
5426 elif template_name == constants.DT_RBD:
5427 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
5428 elif template_name == constants.DT_EXT:
5429 def logical_id_fn(idx, _, disk):
5430 provider = disk.get(constants.IDISK_PROVIDER, None)
5431 if provider is None:
5432 raise errors.ProgrammerError("Disk template is %s, but '%s' is"
5433 " not found", constants.DT_EXT,
5434 constants.IDISK_PROVIDER)
5435 return (provider, names[idx])
5437 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
5439 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
5441 for idx, disk in enumerate(disk_info):
5443 # Only for the Ext template add disk_info to params
5444 if template_name == constants.DT_EXT:
5445 params[constants.IDISK_PROVIDER] = disk[constants.IDISK_PROVIDER]
5447 if key not in constants.IDISK_PARAMS:
5448 params[key] = disk[key]
5449 disk_index = idx + base_index
5450 size = disk[constants.IDISK_SIZE]
5451 feedback_fn("* disk %s, size %s" %
5452 (disk_index, utils.FormatUnit(size, "h")))
5453 disk_dev = objects.Disk(dev_type=dev_type, size=size,
5454 logical_id=logical_id_fn(idx, disk_index, disk),
5455 iv_name="disk/%d" % disk_index,
5456 mode=disk[constants.IDISK_MODE],
5458 disk_dev.name = disk.get(constants.IDISK_NAME, None)
5459 disk_dev.uuid = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5460 disks.append(disk_dev)
5465 def _GetInstanceInfoText(instance):
5466 """Compute that text that should be added to the disk's metadata.
5469 return "originstname+%s" % instance.name
5472 def _CalcEta(time_taken, written, total_size):
5473 """Calculates the ETA based on size written and total size.
5475 @param time_taken: The time taken so far
5476 @param written: amount written so far
5477 @param total_size: The total size of data to be written
5478 @return: The remaining time in seconds
5481 avg_time = time_taken / float(written)
5482 return (total_size - written) * avg_time
5485 def _WipeDisks(lu, instance, disks=None):
5486 """Wipes instance disks.
5488 @type lu: L{LogicalUnit}
5489 @param lu: the logical unit on whose behalf we execute
5490 @type instance: L{objects.Instance}
5491 @param instance: the instance whose disks we should create
5492 @type disks: None or list of tuple of (number, L{objects.Disk}, number)
5493 @param disks: Disk details; tuple contains disk index, disk object and the
5497 node = instance.primary_node
5500 disks = [(idx, disk, 0)
5501 for (idx, disk) in enumerate(instance.disks)]
5503 for (_, device, _) in disks:
5504 lu.cfg.SetDiskID(device, node)
5506 logging.info("Pausing synchronization of disks of instance '%s'",
5508 result = lu.rpc.call_blockdev_pause_resume_sync(node,
5509 (map(compat.snd, disks),
5512 result.Raise("Failed to pause disk synchronization on node '%s'" % node)
5514 for idx, success in enumerate(result.payload):
5516 logging.warn("Pausing synchronization of disk %s of instance '%s'"
5517 " failed", idx, instance.name)
5520 for (idx, device, offset) in disks:
5521 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
5522 # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
5524 int(min(constants.MAX_WIPE_CHUNK,
5525 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
5529 start_time = time.time()
5534 info_text = (" (from %s to %s)" %
5535 (utils.FormatUnit(offset, "h"),
5536 utils.FormatUnit(size, "h")))
5538 lu.LogInfo("* Wiping disk %s%s", idx, info_text)
5540 logging.info("Wiping disk %d for instance %s on node %s using"
5541 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
5543 while offset < size:
5544 wipe_size = min(wipe_chunk_size, size - offset)
5546 logging.debug("Wiping disk %d, offset %s, chunk %s",
5547 idx, offset, wipe_size)
5549 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
5551 result.Raise("Could not wipe disk %d at offset %d for size %d" %
5552 (idx, offset, wipe_size))
5556 if now - last_output >= 60:
5557 eta = _CalcEta(now - start_time, offset, size)
5558 lu.LogInfo(" - done: %.1f%% ETA: %s",
5559 offset / float(size) * 100, utils.FormatSeconds(eta))
5562 logging.info("Resuming synchronization of disks for instance '%s'",
5565 result = lu.rpc.call_blockdev_pause_resume_sync(node,
5566 (map(compat.snd, disks),
5571 lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
5572 node, result.fail_msg)
5574 for idx, success in enumerate(result.payload):
5576 lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
5577 " failed", idx, instance.name)
5580 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5581 """Create all disks for an instance.
5583 This abstracts away some work from AddInstance.
5585 @type lu: L{LogicalUnit}
5586 @param lu: the logical unit on whose behalf we execute
5587 @type instance: L{objects.Instance}
5588 @param instance: the instance whose disks we should create
5590 @param to_skip: list of indices to skip
5591 @type target_node: string
5592 @param target_node: if passed, overrides the target node for creation
5594 @return: the success of the creation
5597 info = _GetInstanceInfoText(instance)
5598 if target_node is None:
5599 pnode = instance.primary_node
5600 all_nodes = instance.all_nodes
5605 if instance.disk_template in constants.DTS_FILEBASED:
5606 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5607 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5609 result.Raise("Failed to create directory '%s' on"
5610 " node %s" % (file_storage_dir, pnode))
5613 # Note: this needs to be kept in sync with adding of disks in
5614 # LUInstanceSetParams
5615 for idx, device in enumerate(instance.disks):
5616 if to_skip and idx in to_skip:
5618 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
5620 for node in all_nodes:
5621 f_create = node == pnode
5623 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5624 disks_created.append((node, device))
5625 except errors.OpExecError:
5626 logging.warning("Creating disk %s for instance '%s' failed",
5628 except errors.DeviceCreationError, e:
5629 logging.warning("Creating disk %s for instance '%s' failed",
5631 disks_created.extend(e.created_devices)
5632 for (node, disk) in disks_created:
5633 lu.cfg.SetDiskID(disk, node)
5634 result = lu.rpc.call_blockdev_remove(node, disk)
5636 logging.warning("Failed to remove newly-created disk %s on node %s:"
5637 " %s", device, node, result.fail_msg)
5638 raise errors.OpExecError(e.message)
5641 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
5642 """Remove all disks for an instance.
5644 This abstracts away some work from `AddInstance()` and
5645 `RemoveInstance()`. Note that in case some of the devices couldn't
5646 be removed, the removal will continue with the other ones.
5648 @type lu: L{LogicalUnit}
5649 @param lu: the logical unit on whose behalf we execute
5650 @type instance: L{objects.Instance}
5651 @param instance: the instance whose disks we should remove
5652 @type target_node: string
5653 @param target_node: used to override the node on which to remove the disks
5655 @return: the success of the removal
5658 logging.info("Removing block devices for instance %s", instance.name)
5661 ports_to_release = set()
5662 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
5663 for (idx, device) in enumerate(anno_disks):
5665 edata = [(target_node, device)]
5667 edata = device.ComputeNodeTree(instance.primary_node)
5668 for node, disk in edata:
5669 lu.cfg.SetDiskID(disk, node)
5670 result = lu.rpc.call_blockdev_remove(node, disk)
5672 lu.LogWarning("Could not remove disk %s on node %s,"
5673 " continuing anyway: %s", idx, node, result.fail_msg)
5674 if not (result.offline and node != instance.primary_node):
5677 # if this is a DRBD disk, return its port to the pool
5678 if device.dev_type in constants.LDS_DRBD:
5679 ports_to_release.add(device.logical_id[2])
5681 if all_result or ignore_failures:
5682 for port in ports_to_release:
5683 lu.cfg.AddTcpUdpPort(port)
5685 if instance.disk_template in constants.DTS_FILEBASED:
5686 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5690 tgt = instance.primary_node
5691 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5693 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5694 file_storage_dir, instance.primary_node, result.fail_msg)
5700 def _ComputeDiskSizePerVG(disk_template, disks):
5701 """Compute disk size requirements in the volume group
5704 def _compute(disks, payload):
5705 """Universal algorithm.
5710 vgs[disk[constants.IDISK_VG]] = \
5711 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
5715 # Required free disk space as a function of disk and swap space
5717 constants.DT_DISKLESS: {},
5718 constants.DT_PLAIN: _compute(disks, 0),
5719 # 128 MB are added for drbd metadata for each disk
5720 constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
5721 constants.DT_FILE: {},
5722 constants.DT_SHARED_FILE: {},
5725 if disk_template not in req_size_dict:
5726 raise errors.ProgrammerError("Disk template '%s' size requirement"
5727 " is unknown" % disk_template)
5729 return req_size_dict[disk_template]
5732 def _CreateInstanceAllocRequest(op, disks, nics, beparams, node_whitelist):
5733 """Wrapper around IAReqInstanceAlloc.
5735 @param op: The instance opcode
5736 @param disks: The computed disks
5737 @param nics: The computed nics
5738 @param beparams: The full filled beparams
5739 @param node_whitelist: List of nodes which should appear as online to the
5740 allocator (unless the node is already marked offline)
5742 @returns: A filled L{iallocator.IAReqInstanceAlloc}
5745 spindle_use = beparams[constants.BE_SPINDLE_USE]
5746 return iallocator.IAReqInstanceAlloc(name=op.instance_name,
5747 disk_template=op.disk_template,
5750 vcpus=beparams[constants.BE_VCPUS],
5751 memory=beparams[constants.BE_MAXMEM],
5752 spindle_use=spindle_use,
5754 nics=[n.ToDict() for n in nics],
5755 hypervisor=op.hypervisor,
5756 node_whitelist=node_whitelist)
5759 def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
5760 """Computes the nics.
5762 @param op: The instance opcode
5763 @param cluster: Cluster configuration object
5764 @param default_ip: The default ip to assign
5765 @param cfg: An instance of the configuration object
5766 @param ec_id: Execution context ID
5768 @returns: The build up nics
5773 nic_mode_req = nic.get(constants.INIC_MODE, None)
5774 nic_mode = nic_mode_req
5775 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
5776 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
5778 net = nic.get(constants.INIC_NETWORK, None)
5779 link = nic.get(constants.NIC_LINK, None)
5780 ip = nic.get(constants.INIC_IP, None)
5782 if net is None or net.lower() == constants.VALUE_NONE:
5785 if nic_mode_req is not None or link is not None:
5786 raise errors.OpPrereqError("If network is given, no mode or link"
5787 " is allowed to be passed",
5790 # ip validity checks
5791 if ip is None or ip.lower() == constants.VALUE_NONE:
5793 elif ip.lower() == constants.VALUE_AUTO:
5794 if not op.name_check:
5795 raise errors.OpPrereqError("IP address set to auto but name checks"
5796 " have been skipped",
5800 # We defer pool operations until later, so that the iallocator has
5801 # filled in the instance's node(s) dimara
5802 if ip.lower() == constants.NIC_IP_POOL:
5804 raise errors.OpPrereqError("if ip=pool, parameter network"
5805 " must be passed too",
5808 elif not netutils.IPAddress.IsValid(ip):
5809 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
5814 # TODO: check the ip address for uniqueness
5815 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
5816 raise errors.OpPrereqError("Routed nic mode requires an ip address",
5819 # MAC address verification
5820 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
5821 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5822 mac = utils.NormalizeAndValidateMac(mac)
5825 # TODO: We need to factor this out
5826 cfg.ReserveMAC(mac, ec_id)
5827 except errors.ReservationError:
5828 raise errors.OpPrereqError("MAC address %s already in use"
5829 " in cluster" % mac,
5830 errors.ECODE_NOTUNIQUE)
5832 # Build nic parameters
5835 nicparams[constants.NIC_MODE] = nic_mode
5837 nicparams[constants.NIC_LINK] = link
5839 check_params = cluster.SimpleFillNIC(nicparams)
5840 objects.NIC.CheckParameterSyntax(check_params)
5841 net_uuid = cfg.LookupNetwork(net)
5842 name = nic.get(constants.INIC_NAME, None)
5843 if name is not None and name.lower() == constants.VALUE_NONE:
5845 nic_obj = objects.NIC(mac=mac, ip=nic_ip, name=name,
5846 network=net_uuid, nicparams=nicparams)
5847 nic_obj.uuid = cfg.GenerateUniqueID(ec_id)
5848 nics.append(nic_obj)
5853 def _ComputeDisks(op, default_vg):
5854 """Computes the instance disks.
5856 @param op: The instance opcode
5857 @param default_vg: The default_vg to assume
5859 @return: The computed disks
5863 for disk in op.disks:
5864 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
5865 if mode not in constants.DISK_ACCESS_SET:
5866 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
5867 mode, errors.ECODE_INVAL)
5868 size = disk.get(constants.IDISK_SIZE, None)
5870 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
5873 except (TypeError, ValueError):
5874 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
5877 ext_provider = disk.get(constants.IDISK_PROVIDER, None)
5878 if ext_provider and op.disk_template != constants.DT_EXT:
5879 raise errors.OpPrereqError("The '%s' option is only valid for the %s"
5880 " disk template, not %s" %
5881 (constants.IDISK_PROVIDER, constants.DT_EXT,
5882 op.disk_template), errors.ECODE_INVAL)
5884 data_vg = disk.get(constants.IDISK_VG, default_vg)
5885 name = disk.get(constants.IDISK_NAME, None)
5886 if name is not None and name.lower() == constants.VALUE_NONE:
5889 constants.IDISK_SIZE: size,
5890 constants.IDISK_MODE: mode,
5891 constants.IDISK_VG: data_vg,
5892 constants.IDISK_NAME: name,
5895 if constants.IDISK_METAVG in disk:
5896 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
5897 if constants.IDISK_ADOPT in disk:
5898 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
5900 # For extstorage, demand the `provider' option and add any
5901 # additional parameters (ext-params) to the dict
5902 if op.disk_template == constants.DT_EXT:
5904 new_disk[constants.IDISK_PROVIDER] = ext_provider
5906 if key not in constants.IDISK_PARAMS:
5907 new_disk[key] = disk[key]
5909 raise errors.OpPrereqError("Missing provider for template '%s'" %
5910 constants.DT_EXT, errors.ECODE_INVAL)
5912 disks.append(new_disk)
5917 def _ComputeFullBeParams(op, cluster):
5918 """Computes the full beparams.
5920 @param op: The instance opcode
5921 @param cluster: The cluster config object
5923 @return: The fully filled beparams
5926 default_beparams = cluster.beparams[constants.PP_DEFAULT]
5927 for param, value in op.beparams.iteritems():
5928 if value == constants.VALUE_AUTO:
5929 op.beparams[param] = default_beparams[param]
5930 objects.UpgradeBeParams(op.beparams)
5931 utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
5932 return cluster.SimpleFillBE(op.beparams)
5935 def _CheckOpportunisticLocking(op):
5936 """Generate error if opportunistic locking is not possible.
5939 if op.opportunistic_locking and not op.iallocator:
5940 raise errors.OpPrereqError("Opportunistic locking is only available in"
5941 " combination with an instance allocator",
5945 class LUInstanceCreate(LogicalUnit):
5946 """Create an instance.
5949 HPATH = "instance-add"
5950 HTYPE = constants.HTYPE_INSTANCE
5953 def CheckArguments(self):
5957 # do not require name_check to ease forward/backward compatibility
5959 if self.op.no_install and self.op.start:
5960 self.LogInfo("No-installation mode selected, disabling startup")
5961 self.op.start = False
5962 # validate/normalize the instance name
5963 self.op.instance_name = \
5964 netutils.Hostname.GetNormalizedName(self.op.instance_name)
5966 if self.op.ip_check and not self.op.name_check:
5967 # TODO: make the ip check more flexible and not depend on the name check
5968 raise errors.OpPrereqError("Cannot do IP address check without a name"
5969 " check", errors.ECODE_INVAL)
5971 # check nics' parameter names
5972 for nic in self.op.nics:
5973 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
5974 # check that NIC's parameters names are unique and valid
5975 utils.ValidateDeviceNames("NIC", self.op.nics)
5977 # check that disk's names are unique and valid
5978 utils.ValidateDeviceNames("disk", self.op.disks)
5980 cluster = self.cfg.GetClusterInfo()
5981 if not self.op.disk_template in cluster.enabled_disk_templates:
5982 raise errors.OpPrereqError("Cannot create an instance with disk template"
5983 " '%s', because it is not enabled in the"
5984 " cluster. Enabled disk templates are: %s." %
5985 (self.op.disk_template,
5986 ",".join(cluster.enabled_disk_templates)))
5988 # check disks. parameter names and consistent adopt/no-adopt strategy
5989 has_adopt = has_no_adopt = False
5990 for disk in self.op.disks:
5991 if self.op.disk_template != constants.DT_EXT:
5992 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
5993 if constants.IDISK_ADOPT in disk:
5997 if has_adopt and has_no_adopt:
5998 raise errors.OpPrereqError("Either all disks are adopted or none is",
6001 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6002 raise errors.OpPrereqError("Disk adoption is not supported for the"
6003 " '%s' disk template" %
6004 self.op.disk_template,
6006 if self.op.iallocator is not None:
6007 raise errors.OpPrereqError("Disk adoption not allowed with an"
6008 " iallocator script", errors.ECODE_INVAL)
6009 if self.op.mode == constants.INSTANCE_IMPORT:
6010 raise errors.OpPrereqError("Disk adoption not allowed for"
6011 " instance import", errors.ECODE_INVAL)
6013 if self.op.disk_template in constants.DTS_MUST_ADOPT:
6014 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
6015 " but no 'adopt' parameter given" %
6016 self.op.disk_template,
6019 self.adopt_disks = has_adopt
6021 # instance name verification
6022 if self.op.name_check:
6023 self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
6024 self.op.instance_name = self.hostname1.name
6025 # used in CheckPrereq for ip ping check
6026 self.check_ip = self.hostname1.ip
6028 self.check_ip = None
6030 # file storage checks
6031 if (self.op.file_driver and
6032 not self.op.file_driver in constants.FILE_DRIVER):
6033 raise errors.OpPrereqError("Invalid file driver name '%s'" %
6034 self.op.file_driver, errors.ECODE_INVAL)
6036 if self.op.disk_template == constants.DT_FILE:
6037 opcodes.RequireFileStorage()
6038 elif self.op.disk_template == constants.DT_SHARED_FILE:
6039 opcodes.RequireSharedFileStorage()
6041 ### Node/iallocator related checks
6042 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6044 if self.op.pnode is not None:
6045 if self.op.disk_template in constants.DTS_INT_MIRROR:
6046 if self.op.snode is None:
6047 raise errors.OpPrereqError("The networked disk templates need"
6048 " a mirror node", errors.ECODE_INVAL)
6050 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
6052 self.op.snode = None
6054 _CheckOpportunisticLocking(self.op)
6056 self._cds = _GetClusterDomainSecret()
6058 if self.op.mode == constants.INSTANCE_IMPORT:
6059 # On import force_variant must be True, because if we forced it at
6060 # initial install, our only chance when importing it back is that it
6062 self.op.force_variant = True
6064 if self.op.no_install:
6065 self.LogInfo("No-installation mode has no effect during import")
6067 elif self.op.mode == constants.INSTANCE_CREATE:
6068 if self.op.os_type is None:
6069 raise errors.OpPrereqError("No guest OS specified",
6071 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
6072 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
6073 " installation" % self.op.os_type,
6075 if self.op.disk_template is None:
6076 raise errors.OpPrereqError("No disk template specified",
6079 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6080 # Check handshake to ensure both clusters have the same domain secret
6081 src_handshake = self.op.source_handshake
6082 if not src_handshake:
6083 raise errors.OpPrereqError("Missing source handshake",
6086 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6089 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6092 # Load and check source CA
6093 self.source_x509_ca_pem = self.op.source_x509_ca
6094 if not self.source_x509_ca_pem:
6095 raise errors.OpPrereqError("Missing source X509 CA",
6099 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6101 except OpenSSL.crypto.Error, err:
6102 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6103 (err, ), errors.ECODE_INVAL)
6105 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6106 if errcode is not None:
6107 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6110 self.source_x509_ca = cert
6112 src_instance_name = self.op.source_instance_name
6113 if not src_instance_name:
6114 raise errors.OpPrereqError("Missing source instance name",
6117 self.source_instance_name = \
6118 netutils.GetHostname(name=src_instance_name).name
6121 raise errors.OpPrereqError("Invalid instance creation mode %r" %
6122 self.op.mode, errors.ECODE_INVAL)
6124 def ExpandNames(self):
6125 """ExpandNames for CreateInstance.
6127 Figure out the right locks for instance creation.
6130 self.needed_locks = {}
6132 instance_name = self.op.instance_name
6133 # this is just a preventive check, but someone might still add this
6134 # instance in the meantime, and creation will fail at lock-add time
6135 if instance_name in self.cfg.GetInstanceList():
6136 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6137 instance_name, errors.ECODE_EXISTS)
6139 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6141 if self.op.iallocator:
6142 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
6143 # specifying a group on instance creation and then selecting nodes from
6145 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6146 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
6148 if self.op.opportunistic_locking:
6149 self.opportunistic_locks[locking.LEVEL_NODE] = True
6150 self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
6152 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6153 nodelist = [self.op.pnode]
6154 if self.op.snode is not None:
6155 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6156 nodelist.append(self.op.snode)
6157 self.needed_locks[locking.LEVEL_NODE] = nodelist
6159 # in case of import lock the source node too
6160 if self.op.mode == constants.INSTANCE_IMPORT:
6161 src_node = self.op.src_node
6162 src_path = self.op.src_path
6164 if src_path is None:
6165 self.op.src_path = src_path = self.op.instance_name
6167 if src_node is None:
6168 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6169 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
6170 self.op.src_node = None
6171 if os.path.isabs(src_path):
6172 raise errors.OpPrereqError("Importing an instance from a path"
6173 " requires a source node option",
6176 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6177 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6178 self.needed_locks[locking.LEVEL_NODE].append(src_node)
6179 if not os.path.isabs(src_path):
6180 self.op.src_path = src_path = \
6181 utils.PathJoin(pathutils.EXPORT_DIR, src_path)
6183 self.needed_locks[locking.LEVEL_NODE_RES] = \
6184 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
6186 def _RunAllocator(self):
6187 """Run the allocator based on input opcode.
6190 if self.op.opportunistic_locking:
6191 # Only consider nodes for which a lock is held
6192 node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
6194 node_whitelist = None
6196 #TODO Export network to iallocator so that it chooses a pnode
6197 # in a nodegroup that has the desired network connected to
6198 req = _CreateInstanceAllocRequest(self.op, self.disks,
6199 self.nics, self.be_full,
6201 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
6203 ial.Run(self.op.iallocator)
6206 # When opportunistic locks are used only a temporary failure is generated
6207 if self.op.opportunistic_locking:
6208 ecode = errors.ECODE_TEMP_NORES
6210 ecode = errors.ECODE_NORES
6212 raise errors.OpPrereqError("Can't compute nodes using"
6213 " iallocator '%s': %s" %
6214 (self.op.iallocator, ial.info),
6217 self.op.pnode = ial.result[0]
6218 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6219 self.op.instance_name, self.op.iallocator,
6220 utils.CommaJoin(ial.result))
6222 assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
6224 if req.RequiredNodes() == 2:
6225 self.op.snode = ial.result[1]
6227 def BuildHooksEnv(self):
6230 This runs on master, primary and secondary nodes of the instance.
6234 "ADD_MODE": self.op.mode,
6236 if self.op.mode == constants.INSTANCE_IMPORT:
6237 env["SRC_NODE"] = self.op.src_node
6238 env["SRC_PATH"] = self.op.src_path
6239 env["SRC_IMAGES"] = self.src_images
6241 env.update(_BuildInstanceHookEnv(
6242 name=self.op.instance_name,
6243 primary_node=self.op.pnode,
6244 secondary_nodes=self.secondaries,
6245 status=self.op.start,
6246 os_type=self.op.os_type,
6247 minmem=self.be_full[constants.BE_MINMEM],
6248 maxmem=self.be_full[constants.BE_MAXMEM],
6249 vcpus=self.be_full[constants.BE_VCPUS],
6250 nics=_NICListToTuple(self, self.nics),
6251 disk_template=self.op.disk_template,
6252 disks=[(d[constants.IDISK_NAME], d[constants.IDISK_SIZE],
6253 d[constants.IDISK_MODE]) for d in self.disks],
6256 hypervisor_name=self.op.hypervisor,
6262 def BuildHooksNodes(self):
6263 """Build hooks nodes.
6266 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
6269 def _ReadExportInfo(self):
6270 """Reads the export information from disk.
6272 It will override the opcode source node and path with the actual
6273 information, if these two were not specified before.
6275 @return: the export information
6278 assert self.op.mode == constants.INSTANCE_IMPORT
6280 src_node = self.op.src_node
6281 src_path = self.op.src_path
6283 if src_node is None:
6284 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
6285 exp_list = self.rpc.call_export_list(locked_nodes)
6287 for node in exp_list:
6288 if exp_list[node].fail_msg:
6290 if src_path in exp_list[node].payload:
6292 self.op.src_node = src_node = node
6293 self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
6297 raise errors.OpPrereqError("No export found for relative path %s" %
6298 src_path, errors.ECODE_INVAL)
6300 _CheckNodeOnline(self, src_node)
6301 result = self.rpc.call_export_info(src_node, src_path)
6302 result.Raise("No export or invalid export found in dir %s" % src_path)
6304 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6305 if not export_info.has_section(constants.INISECT_EXP):
6306 raise errors.ProgrammerError("Corrupted export config",
6307 errors.ECODE_ENVIRON)
6309 ei_version = export_info.get(constants.INISECT_EXP, "version")
6310 if (int(ei_version) != constants.EXPORT_VERSION):
6311 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6312 (ei_version, constants.EXPORT_VERSION),
6313 errors.ECODE_ENVIRON)
6316 def _ReadExportParams(self, einfo):
6317 """Use export parameters as defaults.
6319 In case the opcode doesn't specify (as in override) some instance
6320 parameters, then try to use them from the export information, if
6324 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6326 if self.op.disk_template is None:
6327 if einfo.has_option(constants.INISECT_INS, "disk_template"):
6328 self.op.disk_template = einfo.get(constants.INISECT_INS,
6330 if self.op.disk_template not in constants.DISK_TEMPLATES:
6331 raise errors.OpPrereqError("Disk template specified in configuration"
6332 " file is not one of the allowed values:"
6334 " ".join(constants.DISK_TEMPLATES),
6337 raise errors.OpPrereqError("No disk template specified and the export"
6338 " is missing the disk_template information",
6341 if not self.op.disks:
6343 # TODO: import the disk iv_name too
6344 for idx in range(constants.MAX_DISKS):
6345 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
6346 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6347 disks.append({constants.IDISK_SIZE: disk_sz})
6348 self.op.disks = disks
6349 if not disks and self.op.disk_template != constants.DT_DISKLESS:
6350 raise errors.OpPrereqError("No disk info specified and the export"
6351 " is missing the disk information",
6354 if not self.op.nics:
6356 for idx in range(constants.MAX_NICS):
6357 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
6359 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6360 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6367 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
6368 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
6370 if (self.op.hypervisor is None and
6371 einfo.has_option(constants.INISECT_INS, "hypervisor")):
6372 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6374 if einfo.has_section(constants.INISECT_HYP):
6375 # use the export parameters but do not override the ones
6376 # specified by the user
6377 for name, value in einfo.items(constants.INISECT_HYP):
6378 if name not in self.op.hvparams:
6379 self.op.hvparams[name] = value
6381 if einfo.has_section(constants.INISECT_BEP):
6382 # use the parameters, without overriding
6383 for name, value in einfo.items(constants.INISECT_BEP):
6384 if name not in self.op.beparams:
6385 self.op.beparams[name] = value
6386 # Compatibility for the old "memory" be param
6387 if name == constants.BE_MEMORY:
6388 if constants.BE_MAXMEM not in self.op.beparams:
6389 self.op.beparams[constants.BE_MAXMEM] = value
6390 if constants.BE_MINMEM not in self.op.beparams:
6391 self.op.beparams[constants.BE_MINMEM] = value
6393 # try to read the parameters old style, from the main section
6394 for name in constants.BES_PARAMETERS:
6395 if (name not in self.op.beparams and
6396 einfo.has_option(constants.INISECT_INS, name)):
6397 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6399 if einfo.has_section(constants.INISECT_OSP):
6400 # use the parameters, without overriding
6401 for name, value in einfo.items(constants.INISECT_OSP):
6402 if name not in self.op.osparams:
6403 self.op.osparams[name] = value
6405 def _RevertToDefaults(self, cluster):
6406 """Revert the instance parameters to the default values.
6410 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6411 for name in self.op.hvparams.keys():
6412 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6413 del self.op.hvparams[name]
6415 be_defs = cluster.SimpleFillBE({})
6416 for name in self.op.beparams.keys():
6417 if name in be_defs and be_defs[name] == self.op.beparams[name]:
6418 del self.op.beparams[name]
6420 nic_defs = cluster.SimpleFillNIC({})
6421 for nic in self.op.nics:
6422 for name in constants.NICS_PARAMETERS:
6423 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6426 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
6427 for name in self.op.osparams.keys():
6428 if name in os_defs and os_defs[name] == self.op.osparams[name]:
6429 del self.op.osparams[name]
6431 def _CalculateFileStorageDir(self):
6432 """Calculate final instance file storage dir.
6435 # file storage dir calculation/check
6436 self.instance_file_storage_dir = None
6437 if self.op.disk_template in constants.DTS_FILEBASED:
6438 # build the full file storage dir path
6441 if self.op.disk_template == constants.DT_SHARED_FILE:
6442 get_fsd_fn = self.cfg.GetSharedFileStorageDir
6444 get_fsd_fn = self.cfg.GetFileStorageDir
6446 cfg_storagedir = get_fsd_fn()
6447 if not cfg_storagedir:
6448 raise errors.OpPrereqError("Cluster file storage dir not defined",
6450 joinargs.append(cfg_storagedir)
6452 if self.op.file_storage_dir is not None:
6453 joinargs.append(self.op.file_storage_dir)
6455 joinargs.append(self.op.instance_name)
6457 # pylint: disable=W0142
6458 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
6460 def CheckPrereq(self): # pylint: disable=R0914
6461 """Check prerequisites.
6464 self._CalculateFileStorageDir()
6466 if self.op.mode == constants.INSTANCE_IMPORT:
6467 export_info = self._ReadExportInfo()
6468 self._ReadExportParams(export_info)
6469 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
6471 self._old_instance_name = None
6473 if (not self.cfg.GetVGName() and
6474 self.op.disk_template not in constants.DTS_NOT_LVM):
6475 raise errors.OpPrereqError("Cluster does not support lvm-based"
6476 " instances", errors.ECODE_STATE)
6478 if (self.op.hypervisor is None or
6479 self.op.hypervisor == constants.VALUE_AUTO):
6480 self.op.hypervisor = self.cfg.GetHypervisorType()
6482 cluster = self.cfg.GetClusterInfo()
6483 enabled_hvs = cluster.enabled_hypervisors
6484 if self.op.hypervisor not in enabled_hvs:
6485 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6487 (self.op.hypervisor, ",".join(enabled_hvs)),
6490 # Check tag validity
6491 for tag in self.op.tags:
6492 objects.TaggableObject.ValidateTag(tag)
6494 # check hypervisor parameter syntax (locally)
6495 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6496 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
6498 hv_type = hypervisor.GetHypervisorClass(self.op.hypervisor)
6499 hv_type.CheckParameterSyntax(filled_hvp)
6500 self.hv_full = filled_hvp
6501 # check that we don't specify global parameters on an instance
6502 _CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS, "hypervisor",
6503 "instance", "cluster")
6505 # fill and remember the beparams dict
6506 self.be_full = _ComputeFullBeParams(self.op, cluster)
6508 # build os parameters
6509 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
6511 # now that hvp/bep are in final format, let's reset to defaults,
6513 if self.op.identify_defaults:
6514 self._RevertToDefaults(cluster)
6517 self.nics = _ComputeNics(self.op, cluster, self.check_ip, self.cfg,
6518 self.proc.GetECId())
6520 # disk checks/pre-build
6521 default_vg = self.cfg.GetVGName()
6522 self.disks = _ComputeDisks(self.op, default_vg)
6524 if self.op.mode == constants.INSTANCE_IMPORT:
6526 for idx in range(len(self.disks)):
6527 option = "disk%d_dump" % idx
6528 if export_info.has_option(constants.INISECT_INS, option):
6529 # FIXME: are the old os-es, disk sizes, etc. useful?
6530 export_name = export_info.get(constants.INISECT_INS, option)
6531 image = utils.PathJoin(self.op.src_path, export_name)
6532 disk_images.append(image)
6534 disk_images.append(False)
6536 self.src_images = disk_images
6538 if self.op.instance_name == self._old_instance_name:
6539 for idx, nic in enumerate(self.nics):
6540 if nic.mac == constants.VALUE_AUTO:
6541 nic_mac_ini = "nic%d_mac" % idx
6542 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6544 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6546 # ip ping checks (we use the same ip that was resolved in ExpandNames)
6547 if self.op.ip_check:
6548 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6549 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6550 (self.check_ip, self.op.instance_name),
6551 errors.ECODE_NOTUNIQUE)
6553 #### mac address generation
6554 # By generating here the mac address both the allocator and the hooks get
6555 # the real final mac address rather than the 'auto' or 'generate' value.
6556 # There is a race condition between the generation and the instance object
6557 # creation, which means that we know the mac is valid now, but we're not
6558 # sure it will be when we actually add the instance. If things go bad
6559 # adding the instance will abort because of a duplicate mac, and the
6560 # creation job will fail.
6561 for nic in self.nics:
6562 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6563 nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
6567 if self.op.iallocator is not None:
6568 self._RunAllocator()
6570 # Release all unneeded node locks
6571 keep_locks = filter(None, [self.op.pnode, self.op.snode, self.op.src_node])
6572 _ReleaseLocks(self, locking.LEVEL_NODE, keep=keep_locks)
6573 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=keep_locks)
6574 _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
6576 assert (self.owned_locks(locking.LEVEL_NODE) ==
6577 self.owned_locks(locking.LEVEL_NODE_RES)), \
6578 "Node locks differ from node resource locks"
6580 #### node related checks
6582 # check primary node
6583 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6584 assert self.pnode is not None, \
6585 "Cannot retrieve locked node %s" % self.op.pnode
6587 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6588 pnode.name, errors.ECODE_STATE)
6590 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6591 pnode.name, errors.ECODE_STATE)
6592 if not pnode.vm_capable:
6593 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
6594 " '%s'" % pnode.name, errors.ECODE_STATE)
6596 self.secondaries = []
6598 # Fill in any IPs from IP pools. This must happen here, because we need to
6599 # know the nic's primary node, as specified by the iallocator
6600 for idx, nic in enumerate(self.nics):
6601 net_uuid = nic.network
6602 if net_uuid is not None:
6603 nobj = self.cfg.GetNetwork(net_uuid)
6604 netparams = self.cfg.GetGroupNetParams(net_uuid, self.pnode.name)
6605 if netparams is None:
6606 raise errors.OpPrereqError("No netparams found for network"
6607 " %s. Propably not connected to"
6608 " node's %s nodegroup" %
6609 (nobj.name, self.pnode.name),
6611 self.LogInfo("NIC/%d inherits netparams %s" %
6612 (idx, netparams.values()))
6613 nic.nicparams = dict(netparams)
6614 if nic.ip is not None:
6615 if nic.ip.lower() == constants.NIC_IP_POOL:
6617 nic.ip = self.cfg.GenerateIp(net_uuid, self.proc.GetECId())
6618 except errors.ReservationError:
6619 raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
6620 " from the address pool" % idx,
6622 self.LogInfo("Chose IP %s from network %s", nic.ip, nobj.name)
6625 self.cfg.ReserveIp(net_uuid, nic.ip, self.proc.GetECId())
6626 except errors.ReservationError:
6627 raise errors.OpPrereqError("IP address %s already in use"
6628 " or does not belong to network %s" %
6629 (nic.ip, nobj.name),
6630 errors.ECODE_NOTUNIQUE)
6632 # net is None, ip None or given
6633 elif self.op.conflicts_check:
6634 _CheckForConflictingIp(self, nic.ip, self.pnode.name)
6636 # mirror node verification
6637 if self.op.disk_template in constants.DTS_INT_MIRROR:
6638 if self.op.snode == pnode.name:
6639 raise errors.OpPrereqError("The secondary node cannot be the"
6640 " primary node", errors.ECODE_INVAL)
6641 _CheckNodeOnline(self, self.op.snode)
6642 _CheckNodeNotDrained(self, self.op.snode)
6643 _CheckNodeVmCapable(self, self.op.snode)
6644 self.secondaries.append(self.op.snode)
6646 snode = self.cfg.GetNodeInfo(self.op.snode)
6647 if pnode.group != snode.group:
6648 self.LogWarning("The primary and secondary nodes are in two"
6649 " different node groups; the disk parameters"
6650 " from the first disk's node group will be"
6653 if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
6655 if self.op.disk_template in constants.DTS_INT_MIRROR:
6657 has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
6658 if compat.any(map(has_es, nodes)):
6659 raise errors.OpPrereqError("Disk template %s not supported with"
6660 " exclusive storage" % self.op.disk_template,
6663 nodenames = [pnode.name] + self.secondaries
6665 if not self.adopt_disks:
6666 if self.op.disk_template == constants.DT_RBD:
6667 # _CheckRADOSFreeSpace() is just a placeholder.
6668 # Any function that checks prerequisites can be placed here.
6669 # Check if there is enough space on the RADOS cluster.
6670 _CheckRADOSFreeSpace()
6671 elif self.op.disk_template == constants.DT_EXT:
6672 # FIXME: Function that checks prereqs if needed
6675 # Check lv size requirements, if not adopting
6676 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
6677 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
6679 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
6680 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
6681 disk[constants.IDISK_ADOPT])
6682 for disk in self.disks])
6683 if len(all_lvs) != len(self.disks):
6684 raise errors.OpPrereqError("Duplicate volume names given for adoption",
6686 for lv_name in all_lvs:
6688 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
6689 # to ReserveLV uses the same syntax
6690 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6691 except errors.ReservationError:
6692 raise errors.OpPrereqError("LV named %s used by another instance" %
6693 lv_name, errors.ECODE_NOTUNIQUE)
6695 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
6696 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
6698 node_lvs = self.rpc.call_lv_list([pnode.name],
6699 vg_names.payload.keys())[pnode.name]
6700 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6701 node_lvs = node_lvs.payload
6703 delta = all_lvs.difference(node_lvs.keys())
6705 raise errors.OpPrereqError("Missing logical volume(s): %s" %
6706 utils.CommaJoin(delta),
6708 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6710 raise errors.OpPrereqError("Online logical volumes found, cannot"
6711 " adopt: %s" % utils.CommaJoin(online_lvs),
6713 # update the size of disk based on what is found
6714 for dsk in self.disks:
6715 dsk[constants.IDISK_SIZE] = \
6716 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
6717 dsk[constants.IDISK_ADOPT])][0]))
6719 elif self.op.disk_template == constants.DT_BLOCK:
6720 # Normalize and de-duplicate device paths
6721 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
6722 for disk in self.disks])
6723 if len(all_disks) != len(self.disks):
6724 raise errors.OpPrereqError("Duplicate disk names given for adoption",
6726 baddisks = [d for d in all_disks
6727 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
6729 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
6730 " cannot be adopted" %
6731 (utils.CommaJoin(baddisks),
6732 constants.ADOPTABLE_BLOCKDEV_ROOT),
6735 node_disks = self.rpc.call_bdev_sizes([pnode.name],
6736 list(all_disks))[pnode.name]
6737 node_disks.Raise("Cannot get block device information from node %s" %
6739 node_disks = node_disks.payload
6740 delta = all_disks.difference(node_disks.keys())
6742 raise errors.OpPrereqError("Missing block device(s): %s" %
6743 utils.CommaJoin(delta),
6745 for dsk in self.disks:
6746 dsk[constants.IDISK_SIZE] = \
6747 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
6749 # Verify instance specs
6750 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
6752 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
6753 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
6754 constants.ISPEC_DISK_COUNT: len(self.disks),
6755 constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
6756 for disk in self.disks],
6757 constants.ISPEC_NIC_COUNT: len(self.nics),
6758 constants.ISPEC_SPINDLE_USE: spindle_use,
6761 group_info = self.cfg.GetNodeGroup(pnode.group)
6762 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
6763 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec,
6764 self.op.disk_template)
6765 if not self.op.ignore_ipolicy and res:
6766 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
6767 (pnode.group, group_info.name, utils.CommaJoin(res)))
6768 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
6770 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6772 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6773 # check OS parameters (remotely)
6774 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
6776 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6778 #TODO: _CheckExtParams (remotely)
6779 # Check parameters for extstorage
6781 # memory check on primary node
6782 #TODO(dynmem): use MINMEM for checking
6784 _CheckNodeFreeMemory(self, self.pnode.name,
6785 "creating instance %s" % self.op.instance_name,
6786 self.be_full[constants.BE_MAXMEM],
6789 self.dry_run_result = list(nodenames)
6791 def Exec(self, feedback_fn):
6792 """Create and add the instance to the cluster.
6795 instance = self.op.instance_name
6796 pnode_name = self.pnode.name
6798 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
6799 self.owned_locks(locking.LEVEL_NODE)), \
6800 "Node locks differ from node resource locks"
6801 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
6803 ht_kind = self.op.hypervisor
6804 if ht_kind in constants.HTS_REQ_PORT:
6805 network_port = self.cfg.AllocatePort()
6809 # This is ugly but we got a chicken-egg problem here
6810 # We can only take the group disk parameters, as the instance
6811 # has no disks yet (we are generating them right here).
6812 node = self.cfg.GetNodeInfo(pnode_name)
6813 nodegroup = self.cfg.GetNodeGroup(node.group)
6814 disks = _GenerateDiskTemplate(self,
6815 self.op.disk_template,
6816 instance, pnode_name,
6819 self.instance_file_storage_dir,
6820 self.op.file_driver,
6823 self.cfg.GetGroupDiskParams(nodegroup))
6825 iobj = objects.Instance(name=instance, os=self.op.os_type,
6826 primary_node=pnode_name,
6827 nics=self.nics, disks=disks,
6828 disk_template=self.op.disk_template,
6829 admin_state=constants.ADMINST_DOWN,
6830 network_port=network_port,
6831 beparams=self.op.beparams,
6832 hvparams=self.op.hvparams,
6833 hypervisor=self.op.hypervisor,
6834 osparams=self.op.osparams,
6838 for tag in self.op.tags:
6841 if self.adopt_disks:
6842 if self.op.disk_template == constants.DT_PLAIN:
6843 # rename LVs to the newly-generated names; we need to construct
6844 # 'fake' LV disks with the old data, plus the new unique_id
6845 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6847 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
6848 rename_to.append(t_dsk.logical_id)
6849 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
6850 self.cfg.SetDiskID(t_dsk, pnode_name)
6851 result = self.rpc.call_blockdev_rename(pnode_name,
6852 zip(tmp_disks, rename_to))
6853 result.Raise("Failed to rename adoped LVs")
6855 feedback_fn("* creating instance disks...")
6857 _CreateDisks(self, iobj)
6858 except errors.OpExecError:
6859 self.LogWarning("Device creation failed")
6860 self.cfg.ReleaseDRBDMinors(instance)
6863 feedback_fn("adding instance %s to cluster config" % instance)
6865 self.cfg.AddInstance(iobj, self.proc.GetECId())
6867 # Declare that we don't want to remove the instance lock anymore, as we've
6868 # added the instance to the config
6869 del self.remove_locks[locking.LEVEL_INSTANCE]
6871 if self.op.mode == constants.INSTANCE_IMPORT:
6872 # Release unused nodes
6873 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
6876 _ReleaseLocks(self, locking.LEVEL_NODE)
6879 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
6880 feedback_fn("* wiping instance disks...")
6882 _WipeDisks(self, iobj)
6883 except errors.OpExecError, err:
6884 logging.exception("Wiping disks failed")
6885 self.LogWarning("Wiping instance disks failed (%s)", err)
6889 # Something is already wrong with the disks, don't do anything else
6891 elif self.op.wait_for_sync:
6892 disk_abort = not _WaitForSync(self, iobj)
6893 elif iobj.disk_template in constants.DTS_INT_MIRROR:
6894 # make sure the disks are not degraded (still sync-ing is ok)
6895 feedback_fn("* checking mirrors status")
6896 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6901 _RemoveDisks(self, iobj)
6902 self.cfg.RemoveInstance(iobj.name)
6903 # Make sure the instance lock gets removed
6904 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6905 raise errors.OpExecError("There are some degraded disks for"
6908 # Release all node resource locks
6909 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
6911 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6912 # we need to set the disks ID to the primary node, since the
6913 # preceding code might or might have not done it, depending on
6914 # disk template and other options
6915 for disk in iobj.disks:
6916 self.cfg.SetDiskID(disk, pnode_name)
6917 if self.op.mode == constants.INSTANCE_CREATE:
6918 if not self.op.no_install:
6919 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
6920 not self.op.wait_for_sync)
6922 feedback_fn("* pausing disk sync to install instance OS")
6923 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
6926 for idx, success in enumerate(result.payload):
6928 logging.warn("pause-sync of instance %s for disk %d failed",
6931 feedback_fn("* running the instance OS create scripts...")
6932 # FIXME: pass debug option from opcode to backend
6934 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
6935 self.op.debug_level)
6937 feedback_fn("* resuming disk sync")
6938 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
6941 for idx, success in enumerate(result.payload):
6943 logging.warn("resume-sync of instance %s for disk %d failed",
6946 os_add_result.Raise("Could not add os for instance %s"
6947 " on node %s" % (instance, pnode_name))
6950 if self.op.mode == constants.INSTANCE_IMPORT:
6951 feedback_fn("* running the instance OS import scripts...")
6955 for idx, image in enumerate(self.src_images):
6959 # FIXME: pass debug option from opcode to backend
6960 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
6961 constants.IEIO_FILE, (image, ),
6962 constants.IEIO_SCRIPT,
6963 (iobj.disks[idx], idx),
6965 transfers.append(dt)
6968 masterd.instance.TransferInstanceData(self, feedback_fn,
6969 self.op.src_node, pnode_name,
6970 self.pnode.secondary_ip,
6972 if not compat.all(import_result):
6973 self.LogWarning("Some disks for instance %s on node %s were not"
6974 " imported successfully" % (instance, pnode_name))
6976 rename_from = self._old_instance_name
6978 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6979 feedback_fn("* preparing remote import...")
6980 # The source cluster will stop the instance before attempting to make
6981 # a connection. In some cases stopping an instance can take a long
6982 # time, hence the shutdown timeout is added to the connection
6984 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
6985 self.op.source_shutdown_timeout)
6986 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
6988 assert iobj.primary_node == self.pnode.name
6990 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
6991 self.source_x509_ca,
6992 self._cds, timeouts)
6993 if not compat.all(disk_results):
6994 # TODO: Should the instance still be started, even if some disks
6995 # failed to import (valid for local imports, too)?
6996 self.LogWarning("Some disks for instance %s on node %s were not"
6997 " imported successfully" % (instance, pnode_name))
6999 rename_from = self.source_instance_name
7002 # also checked in the prereq part
7003 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7006 # Run rename script on newly imported instance
7007 assert iobj.name == instance
7008 feedback_fn("Running rename script for %s" % instance)
7009 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7011 self.op.debug_level)
7013 self.LogWarning("Failed to run rename script for %s on node"
7014 " %s: %s" % (instance, pnode_name, result.fail_msg))
7016 assert not self.owned_locks(locking.LEVEL_NODE_RES)
7019 iobj.admin_state = constants.ADMINST_UP
7020 self.cfg.Update(iobj, feedback_fn)
7021 logging.info("Starting instance %s on node %s", instance, pnode_name)
7022 feedback_fn("* starting instance...")
7023 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
7024 False, self.op.reason)
7025 result.Raise("Could not start instance")
7027 return list(iobj.all_nodes)
7030 class LUInstanceMultiAlloc(NoHooksLU):
7031 """Allocates multiple instances at the same time.
7036 def CheckArguments(self):
7041 for inst in self.op.instances:
7042 if inst.iallocator is not None:
7043 raise errors.OpPrereqError("iallocator are not allowed to be set on"
7044 " instance objects", errors.ECODE_INVAL)
7045 nodes.append(bool(inst.pnode))
7046 if inst.disk_template in constants.DTS_INT_MIRROR:
7047 nodes.append(bool(inst.snode))
7049 has_nodes = compat.any(nodes)
7050 if compat.all(nodes) ^ has_nodes:
7051 raise errors.OpPrereqError("There are instance objects providing"
7052 " pnode/snode while others do not",
7055 if self.op.iallocator is None:
7056 default_iallocator = self.cfg.GetDefaultIAllocator()
7057 if default_iallocator and has_nodes:
7058 self.op.iallocator = default_iallocator
7060 raise errors.OpPrereqError("No iallocator or nodes on the instances"
7061 " given and no cluster-wide default"
7062 " iallocator found; please specify either"
7063 " an iallocator or nodes on the instances"
7064 " or set a cluster-wide default iallocator",
7067 _CheckOpportunisticLocking(self.op)
7069 dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
7071 raise errors.OpPrereqError("There are duplicate instance names: %s" %
7072 utils.CommaJoin(dups), errors.ECODE_INVAL)
7074 def ExpandNames(self):
7075 """Calculate the locks.
7078 self.share_locks = _ShareAll()
7079 self.needed_locks = {
7080 # iallocator will select nodes and even if no iallocator is used,
7081 # collisions with LUInstanceCreate should be avoided
7082 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
7085 if self.op.iallocator:
7086 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7087 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
7089 if self.op.opportunistic_locking:
7090 self.opportunistic_locks[locking.LEVEL_NODE] = True
7091 self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
7094 for inst in self.op.instances:
7095 inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
7096 nodeslist.append(inst.pnode)
7097 if inst.snode is not None:
7098 inst.snode = _ExpandNodeName(self.cfg, inst.snode)
7099 nodeslist.append(inst.snode)
7101 self.needed_locks[locking.LEVEL_NODE] = nodeslist
7102 # Lock resources of instance's primary and secondary nodes (copy to
7103 # prevent accidential modification)
7104 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
7106 def CheckPrereq(self):
7107 """Check prerequisite.
7110 cluster = self.cfg.GetClusterInfo()
7111 default_vg = self.cfg.GetVGName()
7112 ec_id = self.proc.GetECId()
7114 if self.op.opportunistic_locking:
7115 # Only consider nodes for which a lock is held
7116 node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
7118 node_whitelist = None
7120 insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
7121 _ComputeNics(op, cluster, None,
7123 _ComputeFullBeParams(op, cluster),
7125 for op in self.op.instances]
7127 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
7128 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7130 ial.Run(self.op.iallocator)
7133 raise errors.OpPrereqError("Can't compute nodes using"
7134 " iallocator '%s': %s" %
7135 (self.op.iallocator, ial.info),
7138 self.ia_result = ial.result
7141 self.dry_run_result = objects.FillDict(self._ConstructPartialResult(), {
7142 constants.JOB_IDS_KEY: [],
7145 def _ConstructPartialResult(self):
7146 """Contructs the partial result.
7149 (allocatable, failed) = self.ia_result
7151 opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
7152 map(compat.fst, allocatable),
7153 opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
7156 def Exec(self, feedback_fn):
7157 """Executes the opcode.
7160 op2inst = dict((op.instance_name, op) for op in self.op.instances)
7161 (allocatable, failed) = self.ia_result
7164 for (name, nodes) in allocatable:
7165 op = op2inst.pop(name)
7168 (op.pnode, op.snode) = nodes
7174 missing = set(op2inst.keys()) - set(failed)
7175 assert not missing, \
7176 "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
7178 return ResultWithJobs(jobs, **self._ConstructPartialResult())
7181 def _CheckRADOSFreeSpace():
7182 """Compute disk size requirements inside the RADOS cluster.
7185 # For the RADOS cluster we assume there is always enough space.
7189 class LUInstanceConsole(NoHooksLU):
7190 """Connect to an instance's console.
7192 This is somewhat special in that it returns the command line that
7193 you need to run on the master node in order to connect to the
7199 def ExpandNames(self):
7200 self.share_locks = _ShareAll()
7201 self._ExpandAndLockInstance()
7203 def CheckPrereq(self):
7204 """Check prerequisites.
7206 This checks that the instance is in the cluster.
7209 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7210 assert self.instance is not None, \
7211 "Cannot retrieve locked instance %s" % self.op.instance_name
7212 _CheckNodeOnline(self, self.instance.primary_node)
7214 def Exec(self, feedback_fn):
7215 """Connect to the console of an instance
7218 instance = self.instance
7219 node = instance.primary_node
7221 node_insts = self.rpc.call_instance_list([node],
7222 [instance.hypervisor])[node]
7223 node_insts.Raise("Can't get node information from %s" % node)
7225 if instance.name not in node_insts.payload:
7226 if instance.admin_state == constants.ADMINST_UP:
7227 state = constants.INSTST_ERRORDOWN
7228 elif instance.admin_state == constants.ADMINST_DOWN:
7229 state = constants.INSTST_ADMINDOWN
7231 state = constants.INSTST_ADMINOFFLINE
7232 raise errors.OpExecError("Instance %s is not running (state %s)" %
7233 (instance.name, state))
7235 logging.debug("Connecting to console of %s on %s", instance.name, node)
7237 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
7240 def _GetInstanceConsole(cluster, instance):
7241 """Returns console information for an instance.
7243 @type cluster: L{objects.Cluster}
7244 @type instance: L{objects.Instance}
7248 hyper = hypervisor.GetHypervisorClass(instance.hypervisor)
7249 # beparams and hvparams are passed separately, to avoid editing the
7250 # instance and then saving the defaults in the instance itself.
7251 hvparams = cluster.FillHV(instance)
7252 beparams = cluster.FillBE(instance)
7253 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
7255 assert console.instance == instance.name
7256 assert console.Validate()
7258 return console.ToDict()
7261 class LUInstanceReplaceDisks(LogicalUnit):
7262 """Replace the disks of an instance.
7265 HPATH = "mirrors-replace"
7266 HTYPE = constants.HTYPE_INSTANCE
7269 def CheckArguments(self):
7273 remote_node = self.op.remote_node
7274 ialloc = self.op.iallocator
7275 if self.op.mode == constants.REPLACE_DISK_CHG:
7276 if remote_node is None and ialloc is None:
7277 raise errors.OpPrereqError("When changing the secondary either an"
7278 " iallocator script must be used or the"
7279 " new node given", errors.ECODE_INVAL)
7281 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
7283 elif remote_node is not None or ialloc is not None:
7284 # Not replacing the secondary
7285 raise errors.OpPrereqError("The iallocator and new node options can"
7286 " only be used when changing the"
7287 " secondary node", errors.ECODE_INVAL)
7289 def ExpandNames(self):
7290 self._ExpandAndLockInstance()
7292 assert locking.LEVEL_NODE not in self.needed_locks
7293 assert locking.LEVEL_NODE_RES not in self.needed_locks
7294 assert locking.LEVEL_NODEGROUP not in self.needed_locks
7296 assert self.op.iallocator is None or self.op.remote_node is None, \
7297 "Conflicting options"
7299 if self.op.remote_node is not None:
7300 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7302 # Warning: do not remove the locking of the new secondary here
7303 # unless DRBD8.AddChildren is changed to work in parallel;
7304 # currently it doesn't since parallel invocations of
7305 # FindUnusedMinor will conflict
7306 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
7307 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7309 self.needed_locks[locking.LEVEL_NODE] = []
7310 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7312 if self.op.iallocator is not None:
7313 # iallocator will select a new node in the same group
7314 self.needed_locks[locking.LEVEL_NODEGROUP] = []
7315 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7317 self.needed_locks[locking.LEVEL_NODE_RES] = []
7319 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7320 self.op.iallocator, self.op.remote_node,
7321 self.op.disks, self.op.early_release,
7322 self.op.ignore_ipolicy)
7324 self.tasklets = [self.replacer]
7326 def DeclareLocks(self, level):
7327 if level == locking.LEVEL_NODEGROUP:
7328 assert self.op.remote_node is None
7329 assert self.op.iallocator is not None
7330 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7332 self.share_locks[locking.LEVEL_NODEGROUP] = 1
7333 # Lock all groups used by instance optimistically; this requires going
7334 # via the node before it's locked, requiring verification later on
7335 self.needed_locks[locking.LEVEL_NODEGROUP] = \
7336 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
7338 elif level == locking.LEVEL_NODE:
7339 if self.op.iallocator is not None:
7340 assert self.op.remote_node is None
7341 assert not self.needed_locks[locking.LEVEL_NODE]
7342 assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
7344 # Lock member nodes of all locked groups
7345 self.needed_locks[locking.LEVEL_NODE] = \
7347 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
7348 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
7350 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
7352 self._LockInstancesNodes()
7354 elif level == locking.LEVEL_NODE_RES:
7356 self.needed_locks[locking.LEVEL_NODE_RES] = \
7357 self.needed_locks[locking.LEVEL_NODE]
7359 def BuildHooksEnv(self):
7362 This runs on the master, the primary and all the secondaries.
7365 instance = self.replacer.instance
7367 "MODE": self.op.mode,
7368 "NEW_SECONDARY": self.op.remote_node,
7369 "OLD_SECONDARY": instance.secondary_nodes[0],
7371 env.update(_BuildInstanceHookEnvByObject(self, instance))
7374 def BuildHooksNodes(self):
7375 """Build hooks nodes.
7378 instance = self.replacer.instance
7380 self.cfg.GetMasterNode(),
7381 instance.primary_node,
7383 if self.op.remote_node is not None:
7384 nl.append(self.op.remote_node)
7387 def CheckPrereq(self):
7388 """Check prerequisites.
7391 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
7392 self.op.iallocator is None)
7394 # Verify if node group locks are still correct
7395 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7397 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
7399 return LogicalUnit.CheckPrereq(self)
7402 class TLReplaceDisks(Tasklet):
7403 """Replaces disks for an instance.
7405 Note: Locking is not within the scope of this class.
7408 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7409 disks, early_release, ignore_ipolicy):
7410 """Initializes this class.
7413 Tasklet.__init__(self, lu)
7416 self.instance_name = instance_name
7418 self.iallocator_name = iallocator_name
7419 self.remote_node = remote_node
7421 self.early_release = early_release
7422 self.ignore_ipolicy = ignore_ipolicy
7425 self.instance = None
7426 self.new_node = None
7427 self.target_node = None
7428 self.other_node = None
7429 self.remote_node_info = None
7430 self.node_secondary_ip = None
7433 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7434 """Compute a new secondary node using an IAllocator.
7437 req = iallocator.IAReqRelocate(name=instance_name,
7438 relocate_from=list(relocate_from))
7439 ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
7441 ial.Run(iallocator_name)
7444 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7445 " %s" % (iallocator_name, ial.info),
7448 remote_node_name = ial.result[0]
7450 lu.LogInfo("Selected new secondary for instance '%s': %s",
7451 instance_name, remote_node_name)
7453 return remote_node_name
7455 def _FindFaultyDisks(self, node_name):
7456 """Wrapper for L{_FindFaultyInstanceDisks}.
7459 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7462 def _CheckDisksActivated(self, instance):
7463 """Checks if the instance disks are activated.
7465 @param instance: The instance to check disks
7466 @return: True if they are activated, False otherwise
7469 nodes = instance.all_nodes
7471 for idx, dev in enumerate(instance.disks):
7473 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
7474 self.cfg.SetDiskID(dev, node)
7476 result = _BlockdevFind(self, node, dev, instance)
7480 elif result.fail_msg or not result.payload:
7485 def CheckPrereq(self):
7486 """Check prerequisites.
7488 This checks that the instance is in the cluster.
7491 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7492 assert instance is not None, \
7493 "Cannot retrieve locked instance %s" % self.instance_name
7495 if instance.disk_template != constants.DT_DRBD8:
7496 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7497 " instances", errors.ECODE_INVAL)
7499 if len(instance.secondary_nodes) != 1:
7500 raise errors.OpPrereqError("The instance has a strange layout,"
7501 " expected one secondary but found %d" %
7502 len(instance.secondary_nodes),
7505 instance = self.instance
7506 secondary_node = instance.secondary_nodes[0]
7508 if self.iallocator_name is None:
7509 remote_node = self.remote_node
7511 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7512 instance.name, instance.secondary_nodes)
7514 if remote_node is None:
7515 self.remote_node_info = None
7517 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
7518 "Remote node '%s' is not locked" % remote_node
7520 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7521 assert self.remote_node_info is not None, \
7522 "Cannot retrieve locked node %s" % remote_node
7524 if remote_node == self.instance.primary_node:
7525 raise errors.OpPrereqError("The specified node is the primary node of"
7526 " the instance", errors.ECODE_INVAL)
7528 if remote_node == secondary_node:
7529 raise errors.OpPrereqError("The specified node is already the"
7530 " secondary node of the instance",
7533 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7534 constants.REPLACE_DISK_CHG):
7535 raise errors.OpPrereqError("Cannot specify disks to be replaced",
7538 if self.mode == constants.REPLACE_DISK_AUTO:
7539 if not self._CheckDisksActivated(instance):
7540 raise errors.OpPrereqError("Please run activate-disks on instance %s"
7541 " first" % self.instance_name,
7543 faulty_primary = self._FindFaultyDisks(instance.primary_node)
7544 faulty_secondary = self._FindFaultyDisks(secondary_node)
7546 if faulty_primary and faulty_secondary:
7547 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7548 " one node and can not be repaired"
7549 " automatically" % self.instance_name,
7553 self.disks = faulty_primary
7554 self.target_node = instance.primary_node
7555 self.other_node = secondary_node
7556 check_nodes = [self.target_node, self.other_node]
7557 elif faulty_secondary:
7558 self.disks = faulty_secondary
7559 self.target_node = secondary_node
7560 self.other_node = instance.primary_node
7561 check_nodes = [self.target_node, self.other_node]
7567 # Non-automatic modes
7568 if self.mode == constants.REPLACE_DISK_PRI:
7569 self.target_node = instance.primary_node
7570 self.other_node = secondary_node
7571 check_nodes = [self.target_node, self.other_node]
7573 elif self.mode == constants.REPLACE_DISK_SEC:
7574 self.target_node = secondary_node
7575 self.other_node = instance.primary_node
7576 check_nodes = [self.target_node, self.other_node]
7578 elif self.mode == constants.REPLACE_DISK_CHG:
7579 self.new_node = remote_node
7580 self.other_node = instance.primary_node
7581 self.target_node = secondary_node
7582 check_nodes = [self.new_node, self.other_node]
7584 _CheckNodeNotDrained(self.lu, remote_node)
7585 _CheckNodeVmCapable(self.lu, remote_node)
7587 old_node_info = self.cfg.GetNodeInfo(secondary_node)
7588 assert old_node_info is not None
7589 if old_node_info.offline and not self.early_release:
7590 # doesn't make sense to delay the release
7591 self.early_release = True
7592 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7593 " early-release mode", secondary_node)
7596 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7599 # If not specified all disks should be replaced
7601 self.disks = range(len(self.instance.disks))
7603 # TODO: This is ugly, but right now we can't distinguish between internal
7604 # submitted opcode and external one. We should fix that.
7605 if self.remote_node_info:
7606 # We change the node, lets verify it still meets instance policy
7607 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
7608 cluster = self.cfg.GetClusterInfo()
7609 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
7611 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
7612 self.cfg, ignore=self.ignore_ipolicy)
7614 for node in check_nodes:
7615 _CheckNodeOnline(self.lu, node)
7617 touched_nodes = frozenset(node_name for node_name in [self.new_node,
7620 if node_name is not None)
7622 # Release unneeded node and node resource locks
7623 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
7624 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
7625 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
7627 # Release any owned node group
7628 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
7630 # Check whether disks are valid
7631 for disk_idx in self.disks:
7632 instance.FindDisk(disk_idx)
7634 # Get secondary node IP addresses
7635 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
7636 in self.cfg.GetMultiNodeInfo(touched_nodes))
7638 def Exec(self, feedback_fn):
7639 """Execute disk replacement.
7641 This dispatches the disk replacement to the appropriate handler.
7645 # Verify owned locks before starting operation
7646 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
7647 assert set(owned_nodes) == set(self.node_secondary_ip), \
7648 ("Incorrect node locks, owning %s, expected %s" %
7649 (owned_nodes, self.node_secondary_ip.keys()))
7650 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
7651 self.lu.owned_locks(locking.LEVEL_NODE_RES))
7652 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
7654 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
7655 assert list(owned_instances) == [self.instance_name], \
7656 "Instance '%s' not locked" % self.instance_name
7658 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
7659 "Should not own any node group lock at this point"
7662 feedback_fn("No disks need replacement for instance '%s'" %
7666 feedback_fn("Replacing disk(s) %s for instance '%s'" %
7667 (utils.CommaJoin(self.disks), self.instance.name))
7668 feedback_fn("Current primary node: %s" % self.instance.primary_node)
7669 feedback_fn("Current seconary node: %s" %
7670 utils.CommaJoin(self.instance.secondary_nodes))
7672 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
7674 # Activate the instance disks if we're replacing them on a down instance
7676 _StartInstanceDisks(self.lu, self.instance, True)
7679 # Should we replace the secondary node?
7680 if self.new_node is not None:
7681 fn = self._ExecDrbd8Secondary
7683 fn = self._ExecDrbd8DiskOnly
7685 result = fn(feedback_fn)
7687 # Deactivate the instance disks if we're replacing them on a
7690 _SafeShutdownInstanceDisks(self.lu, self.instance)
7692 assert not self.lu.owned_locks(locking.LEVEL_NODE)
7695 # Verify owned locks
7696 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
7697 nodes = frozenset(self.node_secondary_ip)
7698 assert ((self.early_release and not owned_nodes) or
7699 (not self.early_release and not (set(owned_nodes) - nodes))), \
7700 ("Not owning the correct locks, early_release=%s, owned=%r,"
7701 " nodes=%r" % (self.early_release, owned_nodes, nodes))
7705 def _CheckVolumeGroup(self, nodes):
7706 self.lu.LogInfo("Checking volume groups")
7708 vgname = self.cfg.GetVGName()
7710 # Make sure volume group exists on all involved nodes
7711 results = self.rpc.call_vg_list(nodes)
7713 raise errors.OpExecError("Can't list volume groups on the nodes")
7717 res.Raise("Error checking node %s" % node)
7718 if vgname not in res.payload:
7719 raise errors.OpExecError("Volume group '%s' not found on node %s" %
7722 def _CheckDisksExistence(self, nodes):
7723 # Check disk existence
7724 for idx, dev in enumerate(self.instance.disks):
7725 if idx not in self.disks:
7729 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
7730 self.cfg.SetDiskID(dev, node)
7732 result = _BlockdevFind(self, node, dev, self.instance)
7734 msg = result.fail_msg
7735 if msg or not result.payload:
7737 msg = "disk not found"
7738 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7741 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7742 for idx, dev in enumerate(self.instance.disks):
7743 if idx not in self.disks:
7746 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7749 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
7750 on_primary, ldisk=ldisk):
7751 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7752 " replace disks for instance %s" %
7753 (node_name, self.instance.name))
7755 def _CreateNewStorage(self, node_name):
7756 """Create new storage on the primary or secondary node.
7758 This is only used for same-node replaces, not for changing the
7759 secondary node, hence we don't want to modify the existing disk.
7764 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
7765 for idx, dev in enumerate(disks):
7766 if idx not in self.disks:
7769 self.lu.LogInfo("Adding storage on %s for disk/%d", node_name, idx)
7771 self.cfg.SetDiskID(dev, node_name)
7773 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7774 names = _GenerateUniqueNames(self.lu, lv_names)
7776 (data_disk, meta_disk) = dev.children
7777 vg_data = data_disk.logical_id[0]
7778 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7779 logical_id=(vg_data, names[0]),
7780 params=data_disk.params)
7781 vg_meta = meta_disk.logical_id[0]
7782 lv_meta = objects.Disk(dev_type=constants.LD_LV,
7783 size=constants.DRBD_META_SIZE,
7784 logical_id=(vg_meta, names[1]),
7785 params=meta_disk.params)
7787 new_lvs = [lv_data, lv_meta]
7788 old_lvs = [child.Copy() for child in dev.children]
7789 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7790 excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, node_name)
7792 # we pass force_create=True to force the LVM creation
7793 for new_lv in new_lvs:
7794 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
7795 _GetInstanceInfoText(self.instance), False,
7800 def _CheckDevices(self, node_name, iv_names):
7801 for name, (dev, _, _) in iv_names.iteritems():
7802 self.cfg.SetDiskID(dev, node_name)
7804 result = _BlockdevFind(self, node_name, dev, self.instance)
7806 msg = result.fail_msg
7807 if msg or not result.payload:
7809 msg = "disk not found"
7810 raise errors.OpExecError("Can't find DRBD device %s: %s" %
7813 if result.payload.is_degraded:
7814 raise errors.OpExecError("DRBD device %s is degraded!" % name)
7816 def _RemoveOldStorage(self, node_name, iv_names):
7817 for name, (_, old_lvs, _) in iv_names.iteritems():
7818 self.lu.LogInfo("Remove logical volumes for %s", name)
7821 self.cfg.SetDiskID(lv, node_name)
7823 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7825 self.lu.LogWarning("Can't remove old LV: %s", msg,
7826 hint="remove unused LVs manually")
7828 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
7829 """Replace a disk on the primary or secondary for DRBD 8.
7831 The algorithm for replace is quite complicated:
7833 1. for each disk to be replaced:
7835 1. create new LVs on the target node with unique names
7836 1. detach old LVs from the drbd device
7837 1. rename old LVs to name_replaced.<time_t>
7838 1. rename new LVs to old LVs
7839 1. attach the new LVs (with the old names now) to the drbd device
7841 1. wait for sync across all devices
7843 1. for each modified disk:
7845 1. remove old LVs (which have the name name_replaces.<time_t>)
7847 Failures are not very well handled.
7852 # Step: check device activation
7853 self.lu.LogStep(1, steps_total, "Check device existence")
7854 self._CheckDisksExistence([self.other_node, self.target_node])
7855 self._CheckVolumeGroup([self.target_node, self.other_node])
7857 # Step: check other node consistency
7858 self.lu.LogStep(2, steps_total, "Check peer consistency")
7859 self._CheckDisksConsistency(self.other_node,
7860 self.other_node == self.instance.primary_node,
7863 # Step: create new storage
7864 self.lu.LogStep(3, steps_total, "Allocate new storage")
7865 iv_names = self._CreateNewStorage(self.target_node)
7867 # Step: for each lv, detach+rename*2+attach
7868 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7869 for dev, old_lvs, new_lvs in iv_names.itervalues():
7870 self.lu.LogInfo("Detaching %s drbd from local storage", dev.iv_name)
7872 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7874 result.Raise("Can't detach drbd from local storage on node"
7875 " %s for device %s" % (self.target_node, dev.iv_name))
7877 #cfg.Update(instance)
7879 # ok, we created the new LVs, so now we know we have the needed
7880 # storage; as such, we proceed on the target node to rename
7881 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7882 # using the assumption that logical_id == physical_id (which in
7883 # turn is the unique_id on that node)
7885 # FIXME(iustin): use a better name for the replaced LVs
7886 temp_suffix = int(time.time())
7887 ren_fn = lambda d, suff: (d.physical_id[0],
7888 d.physical_id[1] + "_replaced-%s" % suff)
7890 # Build the rename list based on what LVs exist on the node
7891 rename_old_to_new = []
7892 for to_ren in old_lvs:
7893 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7894 if not result.fail_msg and result.payload:
7896 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7898 self.lu.LogInfo("Renaming the old LVs on the target node")
7899 result = self.rpc.call_blockdev_rename(self.target_node,
7901 result.Raise("Can't rename old LVs on node %s" % self.target_node)
7903 # Now we rename the new LVs to the old LVs
7904 self.lu.LogInfo("Renaming the new LVs on the target node")
7905 rename_new_to_old = [(new, old.physical_id)
7906 for old, new in zip(old_lvs, new_lvs)]
7907 result = self.rpc.call_blockdev_rename(self.target_node,
7909 result.Raise("Can't rename new LVs on node %s" % self.target_node)
7911 # Intermediate steps of in memory modifications
7912 for old, new in zip(old_lvs, new_lvs):
7913 new.logical_id = old.logical_id
7914 self.cfg.SetDiskID(new, self.target_node)
7916 # We need to modify old_lvs so that removal later removes the
7917 # right LVs, not the newly added ones; note that old_lvs is a
7919 for disk in old_lvs:
7920 disk.logical_id = ren_fn(disk, temp_suffix)
7921 self.cfg.SetDiskID(disk, self.target_node)
7923 # Now that the new lvs have the old name, we can add them to the device
7924 self.lu.LogInfo("Adding new mirror component on %s", self.target_node)
7925 result = self.rpc.call_blockdev_addchildren(self.target_node,
7926 (dev, self.instance), new_lvs)
7927 msg = result.fail_msg
7929 for new_lv in new_lvs:
7930 msg2 = self.rpc.call_blockdev_remove(self.target_node,
7933 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7934 hint=("cleanup manually the unused logical"
7936 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7938 cstep = itertools.count(5)
7940 if self.early_release:
7941 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
7942 self._RemoveOldStorage(self.target_node, iv_names)
7943 # TODO: Check if releasing locks early still makes sense
7944 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
7946 # Release all resource locks except those used by the instance
7947 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
7948 keep=self.node_secondary_ip.keys())
7950 # Release all node locks while waiting for sync
7951 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
7953 # TODO: Can the instance lock be downgraded here? Take the optional disk
7954 # shutdown in the caller into consideration.
7957 # This can fail as the old devices are degraded and _WaitForSync
7958 # does a combined result over all disks, so we don't check its return value
7959 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
7960 _WaitForSync(self.lu, self.instance)
7962 # Check all devices manually
7963 self._CheckDevices(self.instance.primary_node, iv_names)
7965 # Step: remove old storage
7966 if not self.early_release:
7967 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
7968 self._RemoveOldStorage(self.target_node, iv_names)
7970 def _ExecDrbd8Secondary(self, feedback_fn):
7971 """Replace the secondary node for DRBD 8.
7973 The algorithm for replace is quite complicated:
7974 - for all disks of the instance:
7975 - create new LVs on the new node with same names
7976 - shutdown the drbd device on the old secondary
7977 - disconnect the drbd network on the primary
7978 - create the drbd device on the new secondary
7979 - network attach the drbd on the primary, using an artifice:
7980 the drbd code for Attach() will connect to the network if it
7981 finds a device which is connected to the good local disks but
7983 - wait for sync across all devices
7984 - remove all disks from the old secondary
7986 Failures are not very well handled.
7991 pnode = self.instance.primary_node
7993 # Step: check device activation
7994 self.lu.LogStep(1, steps_total, "Check device existence")
7995 self._CheckDisksExistence([self.instance.primary_node])
7996 self._CheckVolumeGroup([self.instance.primary_node])
7998 # Step: check other node consistency
7999 self.lu.LogStep(2, steps_total, "Check peer consistency")
8000 self._CheckDisksConsistency(self.instance.primary_node, True, True)
8002 # Step: create new storage
8003 self.lu.LogStep(3, steps_total, "Allocate new storage")
8004 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
8005 excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, self.new_node)
8006 for idx, dev in enumerate(disks):
8007 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8008 (self.new_node, idx))
8009 # we pass force_create=True to force LVM creation
8010 for new_lv in dev.children:
8011 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
8012 True, _GetInstanceInfoText(self.instance), False,
8015 # Step 4: dbrd minors and drbd setups changes
8016 # after this, we must manually remove the drbd minors on both the
8017 # error and the success paths
8018 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8019 minors = self.cfg.AllocateDRBDMinor([self.new_node
8020 for dev in self.instance.disks],
8022 logging.debug("Allocated minors %r", minors)
8025 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8026 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8027 (self.new_node, idx))
8028 # create new devices on new_node; note that we create two IDs:
8029 # one without port, so the drbd will be activated without
8030 # networking information on the new node at this stage, and one
8031 # with network, for the latter activation in step 4
8032 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8033 if self.instance.primary_node == o_node1:
8036 assert self.instance.primary_node == o_node2, "Three-node instance?"
8039 new_alone_id = (self.instance.primary_node, self.new_node, None,
8040 p_minor, new_minor, o_secret)
8041 new_net_id = (self.instance.primary_node, self.new_node, o_port,
8042 p_minor, new_minor, o_secret)
8044 iv_names[idx] = (dev, dev.children, new_net_id)
8045 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8047 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8048 logical_id=new_alone_id,
8049 children=dev.children,
8052 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
8055 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
8057 _GetInstanceInfoText(self.instance), False,
8059 except errors.GenericError:
8060 self.cfg.ReleaseDRBDMinors(self.instance.name)
8063 # We have new devices, shutdown the drbd on the old secondary
8064 for idx, dev in enumerate(self.instance.disks):
8065 self.lu.LogInfo("Shutting down drbd for disk/%d on old node", idx)
8066 self.cfg.SetDiskID(dev, self.target_node)
8067 msg = self.rpc.call_blockdev_shutdown(self.target_node,
8068 (dev, self.instance)).fail_msg
8070 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8071 "node: %s" % (idx, msg),
8072 hint=("Please cleanup this device manually as"
8073 " soon as possible"))
8075 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8076 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
8077 self.instance.disks)[pnode]
8079 msg = result.fail_msg
8081 # detaches didn't succeed (unlikely)
8082 self.cfg.ReleaseDRBDMinors(self.instance.name)
8083 raise errors.OpExecError("Can't detach the disks from the network on"
8084 " old node: %s" % (msg,))
8086 # if we managed to detach at least one, we update all the disks of
8087 # the instance to point to the new secondary
8088 self.lu.LogInfo("Updating instance configuration")
8089 for dev, _, new_logical_id in iv_names.itervalues():
8090 dev.logical_id = new_logical_id
8091 self.cfg.SetDiskID(dev, self.instance.primary_node)
8093 self.cfg.Update(self.instance, feedback_fn)
8095 # Release all node locks (the configuration has been updated)
8096 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
8098 # and now perform the drbd attach
8099 self.lu.LogInfo("Attaching primary drbds to new secondary"
8100 " (standalone => connected)")
8101 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8103 self.node_secondary_ip,
8104 (self.instance.disks, self.instance),
8107 for to_node, to_result in result.items():
8108 msg = to_result.fail_msg
8110 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8112 hint=("please do a gnt-instance info to see the"
8113 " status of disks"))
8115 cstep = itertools.count(5)
8117 if self.early_release:
8118 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
8119 self._RemoveOldStorage(self.target_node, iv_names)
8120 # TODO: Check if releasing locks early still makes sense
8121 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
8123 # Release all resource locks except those used by the instance
8124 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
8125 keep=self.node_secondary_ip.keys())
8127 # TODO: Can the instance lock be downgraded here? Take the optional disk
8128 # shutdown in the caller into consideration.
8131 # This can fail as the old devices are degraded and _WaitForSync
8132 # does a combined result over all disks, so we don't check its return value
8133 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
8134 _WaitForSync(self.lu, self.instance)
8136 # Check all devices manually
8137 self._CheckDevices(self.instance.primary_node, iv_names)
8139 # Step: remove old storage
8140 if not self.early_release:
8141 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
8142 self._RemoveOldStorage(self.target_node, iv_names)
8145 class LURepairNodeStorage(NoHooksLU):
8146 """Repairs the volume group on a node.
8151 def CheckArguments(self):
8152 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8154 storage_type = self.op.storage_type
8156 if (constants.SO_FIX_CONSISTENCY not in
8157 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8158 raise errors.OpPrereqError("Storage units of type '%s' can not be"
8159 " repaired" % storage_type,
8162 def ExpandNames(self):
8163 self.needed_locks = {
8164 locking.LEVEL_NODE: [self.op.node_name],
8167 def _CheckFaultyDisks(self, instance, node_name):
8168 """Ensure faulty disks abort the opcode or at least warn."""
8170 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8172 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8173 " node '%s'" % (instance.name, node_name),
8175 except errors.OpPrereqError, err:
8176 if self.op.ignore_consistency:
8177 self.LogWarning(str(err.args[0]))
8181 def CheckPrereq(self):
8182 """Check prerequisites.
8185 # Check whether any instance on this node has faulty disks
8186 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8187 if inst.admin_state != constants.ADMINST_UP:
8189 check_nodes = set(inst.all_nodes)
8190 check_nodes.discard(self.op.node_name)
8191 for inst_node_name in check_nodes:
8192 self._CheckFaultyDisks(inst, inst_node_name)
8194 def Exec(self, feedback_fn):
8195 feedback_fn("Repairing storage unit '%s' on %s ..." %
8196 (self.op.name, self.op.node_name))
8198 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8199 result = self.rpc.call_storage_execute(self.op.node_name,
8200 self.op.storage_type, st_args,
8202 constants.SO_FIX_CONSISTENCY)
8203 result.Raise("Failed to repair storage unit '%s' on %s" %
8204 (self.op.name, self.op.node_name))
8207 class LUNodeEvacuate(NoHooksLU):
8208 """Evacuates instances off a list of nodes.
8213 _MODE2IALLOCATOR = {
8214 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
8215 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
8216 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
8218 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
8219 assert (frozenset(_MODE2IALLOCATOR.values()) ==
8220 constants.IALLOCATOR_NEVAC_MODES)
8222 def CheckArguments(self):
8223 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8225 def ExpandNames(self):
8226 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8228 if self.op.remote_node is not None:
8229 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8230 assert self.op.remote_node
8232 if self.op.remote_node == self.op.node_name:
8233 raise errors.OpPrereqError("Can not use evacuated node as a new"
8234 " secondary node", errors.ECODE_INVAL)
8236 if self.op.mode != constants.NODE_EVAC_SEC:
8237 raise errors.OpPrereqError("Without the use of an iallocator only"
8238 " secondary instances can be evacuated",
8242 self.share_locks = _ShareAll()
8243 self.needed_locks = {
8244 locking.LEVEL_INSTANCE: [],
8245 locking.LEVEL_NODEGROUP: [],
8246 locking.LEVEL_NODE: [],
8249 # Determine nodes (via group) optimistically, needs verification once locks
8250 # have been acquired
8251 self.lock_nodes = self._DetermineNodes()
8253 def _DetermineNodes(self):
8254 """Gets the list of nodes to operate on.
8257 if self.op.remote_node is None:
8258 # Iallocator will choose any node(s) in the same group
8259 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
8261 group_nodes = frozenset([self.op.remote_node])
8263 # Determine nodes to be locked
8264 return set([self.op.node_name]) | group_nodes
8266 def _DetermineInstances(self):
8267 """Builds list of instances to operate on.
8270 assert self.op.mode in constants.NODE_EVAC_MODES
8272 if self.op.mode == constants.NODE_EVAC_PRI:
8273 # Primary instances only
8274 inst_fn = _GetNodePrimaryInstances
8275 assert self.op.remote_node is None, \
8276 "Evacuating primary instances requires iallocator"
8277 elif self.op.mode == constants.NODE_EVAC_SEC:
8278 # Secondary instances only
8279 inst_fn = _GetNodeSecondaryInstances
8282 assert self.op.mode == constants.NODE_EVAC_ALL
8283 inst_fn = _GetNodeInstances
8284 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
8286 raise errors.OpPrereqError("Due to an issue with the iallocator"
8287 " interface it is not possible to evacuate"
8288 " all instances at once; specify explicitly"
8289 " whether to evacuate primary or secondary"
8293 return inst_fn(self.cfg, self.op.node_name)
8295 def DeclareLocks(self, level):
8296 if level == locking.LEVEL_INSTANCE:
8297 # Lock instances optimistically, needs verification once node and group
8298 # locks have been acquired
8299 self.needed_locks[locking.LEVEL_INSTANCE] = \
8300 set(i.name for i in self._DetermineInstances())
8302 elif level == locking.LEVEL_NODEGROUP:
8303 # Lock node groups for all potential target nodes optimistically, needs
8304 # verification once nodes have been acquired
8305 self.needed_locks[locking.LEVEL_NODEGROUP] = \
8306 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
8308 elif level == locking.LEVEL_NODE:
8309 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
8311 def CheckPrereq(self):
8313 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
8314 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
8315 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
8317 need_nodes = self._DetermineNodes()
8319 if not owned_nodes.issuperset(need_nodes):
8320 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
8321 " locks were acquired, current nodes are"
8322 " are '%s', used to be '%s'; retry the"
8325 utils.CommaJoin(need_nodes),
8326 utils.CommaJoin(owned_nodes)),
8329 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
8330 if owned_groups != wanted_groups:
8331 raise errors.OpExecError("Node groups changed since locks were acquired,"
8332 " current groups are '%s', used to be '%s';"
8333 " retry the operation" %
8334 (utils.CommaJoin(wanted_groups),
8335 utils.CommaJoin(owned_groups)))
8337 # Determine affected instances
8338 self.instances = self._DetermineInstances()
8339 self.instance_names = [i.name for i in self.instances]
8341 if set(self.instance_names) != owned_instances:
8342 raise errors.OpExecError("Instances on node '%s' changed since locks"
8343 " were acquired, current instances are '%s',"
8344 " used to be '%s'; retry the operation" %
8346 utils.CommaJoin(self.instance_names),
8347 utils.CommaJoin(owned_instances)))
8349 if self.instance_names:
8350 self.LogInfo("Evacuating instances from node '%s': %s",
8352 utils.CommaJoin(utils.NiceSort(self.instance_names)))
8354 self.LogInfo("No instances to evacuate from node '%s'",
8357 if self.op.remote_node is not None:
8358 for i in self.instances:
8359 if i.primary_node == self.op.remote_node:
8360 raise errors.OpPrereqError("Node %s is the primary node of"
8361 " instance %s, cannot use it as"
8363 (self.op.remote_node, i.name),
8366 def Exec(self, feedback_fn):
8367 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
8369 if not self.instance_names:
8370 # No instances to evacuate
8373 elif self.op.iallocator is not None:
8374 # TODO: Implement relocation to other group
8375 evac_mode = self._MODE2IALLOCATOR[self.op.mode]
8376 req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
8377 instances=list(self.instance_names))
8378 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8380 ial.Run(self.op.iallocator)
8383 raise errors.OpPrereqError("Can't compute node evacuation using"
8384 " iallocator '%s': %s" %
8385 (self.op.iallocator, ial.info),
8388 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
8390 elif self.op.remote_node is not None:
8391 assert self.op.mode == constants.NODE_EVAC_SEC
8393 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
8394 remote_node=self.op.remote_node,
8396 mode=constants.REPLACE_DISK_CHG,
8397 early_release=self.op.early_release)]
8398 for instance_name in self.instance_names]
8401 raise errors.ProgrammerError("No iallocator or remote node")
8403 return ResultWithJobs(jobs)
8406 def _DiskSizeInBytesToMebibytes(lu, size):
8407 """Converts a disk size in bytes to mebibytes.
8409 Warns and rounds up if the size isn't an even multiple of 1 MiB.
8412 (mib, remainder) = divmod(size, 1024 * 1024)
8415 lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
8416 " to not overwrite existing data (%s bytes will not be"
8417 " wiped)", (1024 * 1024) - remainder)
8423 class LUInstanceGrowDisk(LogicalUnit):
8424 """Grow a disk of an instance.
8428 HTYPE = constants.HTYPE_INSTANCE
8431 def ExpandNames(self):
8432 self._ExpandAndLockInstance()
8433 self.needed_locks[locking.LEVEL_NODE] = []
8434 self.needed_locks[locking.LEVEL_NODE_RES] = []
8435 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8436 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
8438 def DeclareLocks(self, level):
8439 if level == locking.LEVEL_NODE:
8440 self._LockInstancesNodes()
8441 elif level == locking.LEVEL_NODE_RES:
8443 self.needed_locks[locking.LEVEL_NODE_RES] = \
8444 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8446 def BuildHooksEnv(self):
8449 This runs on the master, the primary and all the secondaries.
8453 "DISK": self.op.disk,
8454 "AMOUNT": self.op.amount,
8455 "ABSOLUTE": self.op.absolute,
8457 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8460 def BuildHooksNodes(self):
8461 """Build hooks nodes.
8464 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8467 def CheckPrereq(self):
8468 """Check prerequisites.
8470 This checks that the instance is in the cluster.
8473 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8474 assert instance is not None, \
8475 "Cannot retrieve locked instance %s" % self.op.instance_name
8476 nodenames = list(instance.all_nodes)
8477 for node in nodenames:
8478 _CheckNodeOnline(self, node)
8480 self.instance = instance
8482 if instance.disk_template not in constants.DTS_GROWABLE:
8483 raise errors.OpPrereqError("Instance's disk layout does not support"
8484 " growing", errors.ECODE_INVAL)
8486 self.disk = instance.FindDisk(self.op.disk)
8488 if self.op.absolute:
8489 self.target = self.op.amount
8490 self.delta = self.target - self.disk.size
8492 raise errors.OpPrereqError("Requested size (%s) is smaller than "
8493 "current disk size (%s)" %
8494 (utils.FormatUnit(self.target, "h"),
8495 utils.FormatUnit(self.disk.size, "h")),
8498 self.delta = self.op.amount
8499 self.target = self.disk.size + self.delta
8501 raise errors.OpPrereqError("Requested increment (%s) is negative" %
8502 utils.FormatUnit(self.delta, "h"),
8505 self._CheckDiskSpace(nodenames, self.disk.ComputeGrowth(self.delta))
8507 def _CheckDiskSpace(self, nodenames, req_vgspace):
8508 template = self.instance.disk_template
8509 if template not in (constants.DTS_NO_FREE_SPACE_CHECK):
8510 # TODO: check the free disk space for file, when that feature will be
8512 nodes = map(self.cfg.GetNodeInfo, nodenames)
8513 es_nodes = filter(lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n),
8516 # With exclusive storage we need to something smarter than just looking
8517 # at free space; for now, let's simply abort the operation.
8518 raise errors.OpPrereqError("Cannot grow disks when exclusive_storage"
8519 " is enabled", errors.ECODE_STATE)
8520 _CheckNodesFreeDiskPerVG(self, nodenames, req_vgspace)
8522 def Exec(self, feedback_fn):
8523 """Execute disk grow.
8526 instance = self.instance
8529 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
8530 assert (self.owned_locks(locking.LEVEL_NODE) ==
8531 self.owned_locks(locking.LEVEL_NODE_RES))
8533 wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
8535 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8537 raise errors.OpExecError("Cannot activate block device to grow")
8539 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
8540 (self.op.disk, instance.name,
8541 utils.FormatUnit(self.delta, "h"),
8542 utils.FormatUnit(self.target, "h")))
8544 # First run all grow ops in dry-run mode
8545 for node in instance.all_nodes:
8546 self.cfg.SetDiskID(disk, node)
8547 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
8549 result.Raise("Dry-run grow request failed to node %s" % node)
8552 # Get disk size from primary node for wiping
8553 result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
8554 result.Raise("Failed to retrieve disk size from node '%s'" %
8555 instance.primary_node)
8557 (disk_size_in_bytes, ) = result.payload
8559 if disk_size_in_bytes is None:
8560 raise errors.OpExecError("Failed to retrieve disk size from primary"
8561 " node '%s'" % instance.primary_node)
8563 old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
8565 assert old_disk_size >= disk.size, \
8566 ("Retrieved disk size too small (got %s, should be at least %s)" %
8567 (old_disk_size, disk.size))
8569 old_disk_size = None
8571 # We know that (as far as we can test) operations across different
8572 # nodes will succeed, time to run it for real on the backing storage
8573 for node in instance.all_nodes:
8574 self.cfg.SetDiskID(disk, node)
8575 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
8577 result.Raise("Grow request failed to node %s" % node)
8579 # And now execute it for logical storage, on the primary node
8580 node = instance.primary_node
8581 self.cfg.SetDiskID(disk, node)
8582 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
8584 result.Raise("Grow request failed to node %s" % node)
8586 disk.RecordGrow(self.delta)
8587 self.cfg.Update(instance, feedback_fn)
8589 # Changes have been recorded, release node lock
8590 _ReleaseLocks(self, locking.LEVEL_NODE)
8592 # Downgrade lock while waiting for sync
8593 self.glm.downgrade(locking.LEVEL_INSTANCE)
8595 assert wipe_disks ^ (old_disk_size is None)
8598 assert instance.disks[self.op.disk] == disk
8600 # Wipe newly added disk space
8601 _WipeDisks(self, instance,
8602 disks=[(self.op.disk, disk, old_disk_size)])
8604 if self.op.wait_for_sync:
8605 disk_abort = not _WaitForSync(self, instance, disks=[disk])
8607 self.LogWarning("Disk syncing has not returned a good status; check"
8609 if instance.admin_state != constants.ADMINST_UP:
8610 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8611 elif instance.admin_state != constants.ADMINST_UP:
8612 self.LogWarning("Not shutting down the disk even if the instance is"
8613 " not supposed to be running because no wait for"
8614 " sync mode was requested")
8616 assert self.owned_locks(locking.LEVEL_NODE_RES)
8617 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
8620 class LUInstanceQueryData(NoHooksLU):
8621 """Query runtime instance data.
8626 def ExpandNames(self):
8627 self.needed_locks = {}
8629 # Use locking if requested or when non-static information is wanted
8630 if not (self.op.static or self.op.use_locking):
8631 self.LogWarning("Non-static data requested, locks need to be acquired")
8632 self.op.use_locking = True
8634 if self.op.instances or not self.op.use_locking:
8635 # Expand instance names right here
8636 self.wanted_names = _GetWantedInstances(self, self.op.instances)
8638 # Will use acquired locks
8639 self.wanted_names = None
8641 if self.op.use_locking:
8642 self.share_locks = _ShareAll()
8644 if self.wanted_names is None:
8645 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8647 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8649 self.needed_locks[locking.LEVEL_NODEGROUP] = []
8650 self.needed_locks[locking.LEVEL_NODE] = []
8651 self.needed_locks[locking.LEVEL_NETWORK] = []
8652 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8654 def DeclareLocks(self, level):
8655 if self.op.use_locking:
8656 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
8657 if level == locking.LEVEL_NODEGROUP:
8659 # Lock all groups used by instances optimistically; this requires going
8660 # via the node before it's locked, requiring verification later on
8661 self.needed_locks[locking.LEVEL_NODEGROUP] = \
8662 frozenset(group_uuid
8663 for instance_name in owned_instances
8665 self.cfg.GetInstanceNodeGroups(instance_name))
8667 elif level == locking.LEVEL_NODE:
8668 self._LockInstancesNodes()
8670 elif level == locking.LEVEL_NETWORK:
8671 self.needed_locks[locking.LEVEL_NETWORK] = \
8673 for instance_name in owned_instances
8675 self.cfg.GetInstanceNetworks(instance_name))
8677 def CheckPrereq(self):
8678 """Check prerequisites.
8680 This only checks the optional instance list against the existing names.
8683 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
8684 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
8685 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
8686 owned_networks = frozenset(self.owned_locks(locking.LEVEL_NETWORK))
8688 if self.wanted_names is None:
8689 assert self.op.use_locking, "Locking was not used"
8690 self.wanted_names = owned_instances
8692 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
8694 if self.op.use_locking:
8695 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
8698 assert not (owned_instances or owned_groups or
8699 owned_nodes or owned_networks)
8701 self.wanted_instances = instances.values()
8703 def _ComputeBlockdevStatus(self, node, instance, dev):
8704 """Returns the status of a block device
8707 if self.op.static or not node:
8710 self.cfg.SetDiskID(dev, node)
8712 result = self.rpc.call_blockdev_find(node, dev)
8716 result.Raise("Can't compute disk status for %s" % instance.name)
8718 status = result.payload
8722 return (status.dev_path, status.major, status.minor,
8723 status.sync_percent, status.estimated_time,
8724 status.is_degraded, status.ldisk_status)
8726 def _ComputeDiskStatus(self, instance, snode, dev):
8727 """Compute block device status.
8730 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
8732 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
8734 def _ComputeDiskStatusInner(self, instance, snode, dev):
8735 """Compute block device status.
8737 @attention: The device has to be annotated already.
8740 if dev.dev_type in constants.LDS_DRBD:
8741 # we change the snode then (otherwise we use the one passed in)
8742 if dev.logical_id[0] == instance.primary_node:
8743 snode = dev.logical_id[1]
8745 snode = dev.logical_id[0]
8747 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8749 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
8752 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
8759 "iv_name": dev.iv_name,
8760 "dev_type": dev.dev_type,
8761 "logical_id": dev.logical_id,
8762 "physical_id": dev.physical_id,
8763 "pstatus": dev_pstatus,
8764 "sstatus": dev_sstatus,
8765 "children": dev_children,
8772 def Exec(self, feedback_fn):
8773 """Gather and return data"""
8776 cluster = self.cfg.GetClusterInfo()
8778 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
8779 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
8781 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
8782 for node in nodes.values()))
8784 group2name_fn = lambda uuid: groups[uuid].name
8785 for instance in self.wanted_instances:
8786 pnode = nodes[instance.primary_node]
8788 if self.op.static or pnode.offline:
8791 self.LogWarning("Primary node %s is marked offline, returning static"
8792 " information only for instance %s" %
8793 (pnode.name, instance.name))
8795 remote_info = self.rpc.call_instance_info(instance.primary_node,
8797 instance.hypervisor)
8798 remote_info.Raise("Error checking node %s" % instance.primary_node)
8799 remote_info = remote_info.payload
8800 if remote_info and "state" in remote_info:
8803 if instance.admin_state == constants.ADMINST_UP:
8804 remote_state = "down"
8806 remote_state = instance.admin_state
8808 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
8811 snodes_group_uuids = [nodes[snode_name].group
8812 for snode_name in instance.secondary_nodes]
8814 result[instance.name] = {
8815 "name": instance.name,
8816 "config_state": instance.admin_state,
8817 "run_state": remote_state,
8818 "pnode": instance.primary_node,
8819 "pnode_group_uuid": pnode.group,
8820 "pnode_group_name": group2name_fn(pnode.group),
8821 "snodes": instance.secondary_nodes,
8822 "snodes_group_uuids": snodes_group_uuids,
8823 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
8825 # this happens to be the same format used for hooks
8826 "nics": _NICListToTuple(self, instance.nics),
8827 "disk_template": instance.disk_template,
8829 "hypervisor": instance.hypervisor,
8830 "network_port": instance.network_port,
8831 "hv_instance": instance.hvparams,
8832 "hv_actual": cluster.FillHV(instance, skip_globals=True),
8833 "be_instance": instance.beparams,
8834 "be_actual": cluster.FillBE(instance),
8835 "os_instance": instance.osparams,
8836 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8837 "serial_no": instance.serial_no,
8838 "mtime": instance.mtime,
8839 "ctime": instance.ctime,
8840 "uuid": instance.uuid,
8846 def PrepareContainerMods(mods, private_fn):
8847 """Prepares a list of container modifications by adding a private data field.
8849 @type mods: list of tuples; (operation, index, parameters)
8850 @param mods: List of modifications
8851 @type private_fn: callable or None
8852 @param private_fn: Callable for constructing a private data field for a
8857 if private_fn is None:
8862 return [(op, idx, params, fn()) for (op, idx, params) in mods]
8865 def GetItemFromContainer(identifier, kind, container):
8866 """Return the item refered by the identifier.
8868 @type identifier: string
8869 @param identifier: Item index or name or UUID
8871 @param kind: One-word item description
8872 @type container: list
8873 @param container: Container to get the item from
8878 idx = int(identifier)
8881 absidx = len(container) - 1
8883 raise IndexError("Not accepting negative indices other than -1")
8884 elif idx > len(container):
8885 raise IndexError("Got %s index %s, but there are only %s" %
8886 (kind, idx, len(container)))
8889 return (absidx, container[idx])
8893 for idx, item in enumerate(container):
8894 if item.uuid == identifier or item.name == identifier:
8897 raise errors.OpPrereqError("Cannot find %s with identifier %s" %
8898 (kind, identifier), errors.ECODE_NOENT)
8901 #: Type description for changes as returned by L{ApplyContainerMods}'s
8903 _TApplyContModsCbChanges = \
8904 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
8910 def ApplyContainerMods(kind, container, chgdesc, mods,
8911 create_fn, modify_fn, remove_fn):
8912 """Applies descriptions in C{mods} to C{container}.
8915 @param kind: One-word item description
8916 @type container: list
8917 @param container: Container to modify
8918 @type chgdesc: None or list
8919 @param chgdesc: List of applied changes
8921 @param mods: Modifications as returned by L{PrepareContainerMods}
8922 @type create_fn: callable
8923 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
8924 receives absolute item index, parameters and private data object as added
8925 by L{PrepareContainerMods}, returns tuple containing new item and changes
8927 @type modify_fn: callable
8928 @param modify_fn: Callback for modifying an existing item
8929 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
8930 and private data object as added by L{PrepareContainerMods}, returns
8932 @type remove_fn: callable
8933 @param remove_fn: Callback on removing item; receives absolute item index,
8934 item and private data object as added by L{PrepareContainerMods}
8937 for (op, identifier, params, private) in mods:
8940 if op == constants.DDM_ADD:
8941 # Calculate where item will be added
8942 # When adding an item, identifier can only be an index
8944 idx = int(identifier)
8946 raise errors.OpPrereqError("Only possitive integer or -1 is accepted as"
8947 " identifier for %s" % constants.DDM_ADD,
8950 addidx = len(container)
8953 raise IndexError("Not accepting negative indices other than -1")
8954 elif idx > len(container):
8955 raise IndexError("Got %s index %s, but there are only %s" %
8956 (kind, idx, len(container)))
8959 if create_fn is None:
8962 (item, changes) = create_fn(addidx, params, private)
8965 container.append(item)
8968 assert idx <= len(container)
8969 # list.insert does so before the specified index
8970 container.insert(idx, item)
8972 # Retrieve existing item
8973 (absidx, item) = GetItemFromContainer(identifier, kind, container)
8975 if op == constants.DDM_REMOVE:
8978 if remove_fn is not None:
8979 remove_fn(absidx, item, private)
8981 changes = [("%s/%s" % (kind, absidx), "remove")]
8983 assert container[absidx] == item
8984 del container[absidx]
8985 elif op == constants.DDM_MODIFY:
8986 if modify_fn is not None:
8987 changes = modify_fn(absidx, item, params, private)
8989 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
8991 assert _TApplyContModsCbChanges(changes)
8993 if not (chgdesc is None or changes is None):
8994 chgdesc.extend(changes)
8997 def _UpdateIvNames(base_index, disks):
8998 """Updates the C{iv_name} attribute of disks.
9000 @type disks: list of L{objects.Disk}
9003 for (idx, disk) in enumerate(disks):
9004 disk.iv_name = "disk/%s" % (base_index + idx, )
9007 class _InstNicModPrivate:
9008 """Data structure for network interface modifications.
9010 Used by L{LUInstanceSetParams}.
9018 class LUInstanceSetParams(LogicalUnit):
9019 """Modifies an instances's parameters.
9022 HPATH = "instance-modify"
9023 HTYPE = constants.HTYPE_INSTANCE
9027 def _UpgradeDiskNicMods(kind, mods, verify_fn):
9028 assert ht.TList(mods)
9029 assert not mods or len(mods[0]) in (2, 3)
9031 if mods and len(mods[0]) == 2:
9035 for op, params in mods:
9036 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
9037 result.append((op, -1, params))
9041 raise errors.OpPrereqError("Only one %s add or remove operation is"
9042 " supported at a time" % kind,
9045 result.append((constants.DDM_MODIFY, op, params))
9047 assert verify_fn(result)
9054 def _CheckMods(kind, mods, key_types, item_fn):
9055 """Ensures requested disk/NIC modifications are valid.
9058 for (op, _, params) in mods:
9059 assert ht.TDict(params)
9061 # If 'key_types' is an empty dict, we assume we have an
9062 # 'ext' template and thus do not ForceDictType
9064 utils.ForceDictType(params, key_types)
9066 if op == constants.DDM_REMOVE:
9068 raise errors.OpPrereqError("No settings should be passed when"
9069 " removing a %s" % kind,
9071 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
9074 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
9077 def _VerifyDiskModification(op, params):
9078 """Verifies a disk modification.
9081 if op == constants.DDM_ADD:
9082 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
9083 if mode not in constants.DISK_ACCESS_SET:
9084 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9087 size = params.get(constants.IDISK_SIZE, None)
9089 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
9090 constants.IDISK_SIZE, errors.ECODE_INVAL)
9094 except (TypeError, ValueError), err:
9095 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
9098 params[constants.IDISK_SIZE] = size
9099 name = params.get(constants.IDISK_NAME, None)
9100 if name is not None and name.lower() == constants.VALUE_NONE:
9101 params[constants.IDISK_NAME] = None
9103 elif op == constants.DDM_MODIFY:
9104 if constants.IDISK_SIZE in params:
9105 raise errors.OpPrereqError("Disk size change not possible, use"
9106 " grow-disk", errors.ECODE_INVAL)
9108 raise errors.OpPrereqError("Disk modification doesn't support"
9109 " additional arbitrary parameters",
9111 name = params.get(constants.IDISK_NAME, None)
9112 if name is not None and name.lower() == constants.VALUE_NONE:
9113 params[constants.IDISK_NAME] = None
9116 def _VerifyNicModification(op, params):
9117 """Verifies a network interface modification.
9120 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
9121 ip = params.get(constants.INIC_IP, None)
9122 name = params.get(constants.INIC_NAME, None)
9123 req_net = params.get(constants.INIC_NETWORK, None)
9124 link = params.get(constants.NIC_LINK, None)
9125 mode = params.get(constants.NIC_MODE, None)
9126 if name is not None and name.lower() == constants.VALUE_NONE:
9127 params[constants.INIC_NAME] = None
9128 if req_net is not None:
9129 if req_net.lower() == constants.VALUE_NONE:
9130 params[constants.INIC_NETWORK] = None
9132 elif link is not None or mode is not None:
9133 raise errors.OpPrereqError("If network is given"
9134 " mode or link should not",
9137 if op == constants.DDM_ADD:
9138 macaddr = params.get(constants.INIC_MAC, None)
9140 params[constants.INIC_MAC] = constants.VALUE_AUTO
9143 if ip.lower() == constants.VALUE_NONE:
9144 params[constants.INIC_IP] = None
9146 if ip.lower() == constants.NIC_IP_POOL:
9147 if op == constants.DDM_ADD and req_net is None:
9148 raise errors.OpPrereqError("If ip=pool, parameter network"
9152 if not netutils.IPAddress.IsValid(ip):
9153 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9156 if constants.INIC_MAC in params:
9157 macaddr = params[constants.INIC_MAC]
9158 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9159 macaddr = utils.NormalizeAndValidateMac(macaddr)
9161 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
9162 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9163 " modifying an existing NIC",
9166 def CheckArguments(self):
9167 if not (self.op.nics or self.op.disks or self.op.disk_template or
9168 self.op.hvparams or self.op.beparams or self.op.os_name or
9169 self.op.offline is not None or self.op.runtime_mem or
9171 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9173 if self.op.hvparams:
9174 _CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS,
9175 "hypervisor", "instance", "cluster")
9177 self.op.disks = self._UpgradeDiskNicMods(
9178 "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
9179 self.op.nics = self._UpgradeDiskNicMods(
9180 "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
9182 if self.op.disks and self.op.disk_template is not None:
9183 raise errors.OpPrereqError("Disk template conversion and other disk"
9184 " changes not supported at the same time",
9187 if (self.op.disk_template and
9188 self.op.disk_template in constants.DTS_INT_MIRROR and
9189 self.op.remote_node is None):
9190 raise errors.OpPrereqError("Changing the disk template to a mirrored"
9191 " one requires specifying a secondary node",
9194 # Check NIC modifications
9195 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
9196 self._VerifyNicModification)
9199 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9201 def ExpandNames(self):
9202 self._ExpandAndLockInstance()
9203 self.needed_locks[locking.LEVEL_NODEGROUP] = []
9204 # Can't even acquire node locks in shared mode as upcoming changes in
9205 # Ganeti 2.6 will start to modify the node object on disk conversion
9206 self.needed_locks[locking.LEVEL_NODE] = []
9207 self.needed_locks[locking.LEVEL_NODE_RES] = []
9208 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9209 # Look node group to look up the ipolicy
9210 self.share_locks[locking.LEVEL_NODEGROUP] = 1
9212 def DeclareLocks(self, level):
9213 if level == locking.LEVEL_NODEGROUP:
9214 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9215 # Acquire locks for the instance's nodegroups optimistically. Needs
9216 # to be verified in CheckPrereq
9217 self.needed_locks[locking.LEVEL_NODEGROUP] = \
9218 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9219 elif level == locking.LEVEL_NODE:
9220 self._LockInstancesNodes()
9221 if self.op.disk_template and self.op.remote_node:
9222 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9223 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9224 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
9226 self.needed_locks[locking.LEVEL_NODE_RES] = \
9227 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
9229 def BuildHooksEnv(self):
9232 This runs on the master, primary and secondaries.
9236 if constants.BE_MINMEM in self.be_new:
9237 args["minmem"] = self.be_new[constants.BE_MINMEM]
9238 if constants.BE_MAXMEM in self.be_new:
9239 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
9240 if constants.BE_VCPUS in self.be_new:
9241 args["vcpus"] = self.be_new[constants.BE_VCPUS]
9242 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9243 # information at all.
9245 if self._new_nics is not None:
9248 for nic in self._new_nics:
9249 n = copy.deepcopy(nic)
9250 nicparams = self.cluster.SimpleFillNIC(n.nicparams)
9251 n.nicparams = nicparams
9252 nics.append(_NICToTuple(self, n))
9256 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9257 if self.op.disk_template:
9258 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9259 if self.op.runtime_mem:
9260 env["RUNTIME_MEMORY"] = self.op.runtime_mem
9264 def BuildHooksNodes(self):
9265 """Build hooks nodes.
9268 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9271 def _PrepareNicModification(self, params, private, old_ip, old_net_uuid,
9272 old_params, cluster, pnode):
9274 update_params_dict = dict([(key, params[key])
9275 for key in constants.NICS_PARAMETERS
9278 req_link = update_params_dict.get(constants.NIC_LINK, None)
9279 req_mode = update_params_dict.get(constants.NIC_MODE, None)
9282 new_net_uuid_or_name = params.get(constants.INIC_NETWORK, old_net_uuid)
9283 if new_net_uuid_or_name:
9284 new_net_uuid = self.cfg.LookupNetwork(new_net_uuid_or_name)
9285 new_net_obj = self.cfg.GetNetwork(new_net_uuid)
9288 old_net_obj = self.cfg.GetNetwork(old_net_uuid)
9291 netparams = self.cfg.GetGroupNetParams(new_net_uuid, pnode)
9293 raise errors.OpPrereqError("No netparams found for the network"
9294 " %s, probably not connected" %
9295 new_net_obj.name, errors.ECODE_INVAL)
9296 new_params = dict(netparams)
9298 new_params = _GetUpdatedParams(old_params, update_params_dict)
9300 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
9302 new_filled_params = cluster.SimpleFillNIC(new_params)
9303 objects.NIC.CheckParameterSyntax(new_filled_params)
9305 new_mode = new_filled_params[constants.NIC_MODE]
9306 if new_mode == constants.NIC_MODE_BRIDGED:
9307 bridge = new_filled_params[constants.NIC_LINK]
9308 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
9310 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
9312 self.warn.append(msg)
9314 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9316 elif new_mode == constants.NIC_MODE_ROUTED:
9317 ip = params.get(constants.INIC_IP, old_ip)
9319 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
9320 " on a routed NIC", errors.ECODE_INVAL)
9322 elif new_mode == constants.NIC_MODE_OVS:
9323 # TODO: check OVS link
9324 self.LogInfo("OVS links are currently not checked for correctness")
9326 if constants.INIC_MAC in params:
9327 mac = params[constants.INIC_MAC]
9329 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
9331 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9332 # otherwise generate the MAC address
9333 params[constants.INIC_MAC] = \
9334 self.cfg.GenerateMAC(new_net_uuid, self.proc.GetECId())
9336 # or validate/reserve the current one
9338 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9339 except errors.ReservationError:
9340 raise errors.OpPrereqError("MAC address '%s' already in use"
9341 " in cluster" % mac,
9342 errors.ECODE_NOTUNIQUE)
9343 elif new_net_uuid != old_net_uuid:
9345 def get_net_prefix(net_uuid):
9348 nobj = self.cfg.GetNetwork(net_uuid)
9349 mac_prefix = nobj.mac_prefix
9353 new_prefix = get_net_prefix(new_net_uuid)
9354 old_prefix = get_net_prefix(old_net_uuid)
9355 if old_prefix != new_prefix:
9356 params[constants.INIC_MAC] = \
9357 self.cfg.GenerateMAC(new_net_uuid, self.proc.GetECId())
9359 # if there is a change in (ip, network) tuple
9360 new_ip = params.get(constants.INIC_IP, old_ip)
9361 if (new_ip, new_net_uuid) != (old_ip, old_net_uuid):
9363 # if IP is pool then require a network and generate one IP
9364 if new_ip.lower() == constants.NIC_IP_POOL:
9367 new_ip = self.cfg.GenerateIp(new_net_uuid, self.proc.GetECId())
9368 except errors.ReservationError:
9369 raise errors.OpPrereqError("Unable to get a free IP"
9370 " from the address pool",
9372 self.LogInfo("Chose IP %s from network %s",
9375 params[constants.INIC_IP] = new_ip
9377 raise errors.OpPrereqError("ip=pool, but no network found",
9379 # Reserve new IP if in the new network if any
9382 self.cfg.ReserveIp(new_net_uuid, new_ip, self.proc.GetECId())
9383 self.LogInfo("Reserving IP %s in network %s",
9384 new_ip, new_net_obj.name)
9385 except errors.ReservationError:
9386 raise errors.OpPrereqError("IP %s not available in network %s" %
9387 (new_ip, new_net_obj.name),
9388 errors.ECODE_NOTUNIQUE)
9389 # new network is None so check if new IP is a conflicting IP
9390 elif self.op.conflicts_check:
9391 _CheckForConflictingIp(self, new_ip, pnode)
9393 # release old IP if old network is not None
9394 if old_ip and old_net_uuid:
9396 self.cfg.ReleaseIp(old_net_uuid, old_ip, self.proc.GetECId())
9397 except errors.AddressPoolError:
9398 logging.warning("Release IP %s not contained in network %s",
9399 old_ip, old_net_obj.name)
9401 # there are no changes in (ip, network) tuple and old network is not None
9402 elif (old_net_uuid is not None and
9403 (req_link is not None or req_mode is not None)):
9404 raise errors.OpPrereqError("Not allowed to change link or mode of"
9405 " a NIC that is connected to a network",
9408 private.params = new_params
9409 private.filled = new_filled_params
9411 def _PreCheckDiskTemplate(self, pnode_info):
9412 """CheckPrereq checks related to a new disk template."""
9413 # Arguments are passed to avoid configuration lookups
9414 instance = self.instance
9415 pnode = instance.primary_node
9416 cluster = self.cluster
9417 if instance.disk_template == self.op.disk_template:
9418 raise errors.OpPrereqError("Instance already has disk template %s" %
9419 instance.disk_template, errors.ECODE_INVAL)
9421 if (instance.disk_template,
9422 self.op.disk_template) not in self._DISK_CONVERSIONS:
9423 raise errors.OpPrereqError("Unsupported disk template conversion from"
9424 " %s to %s" % (instance.disk_template,
9425 self.op.disk_template),
9427 _CheckInstanceState(self, instance, INSTANCE_DOWN,
9428 msg="cannot change disk template")
9429 if self.op.disk_template in constants.DTS_INT_MIRROR:
9430 if self.op.remote_node == pnode:
9431 raise errors.OpPrereqError("Given new secondary node %s is the same"
9432 " as the primary node of the instance" %
9433 self.op.remote_node, errors.ECODE_STATE)
9434 _CheckNodeOnline(self, self.op.remote_node)
9435 _CheckNodeNotDrained(self, self.op.remote_node)
9436 # FIXME: here we assume that the old instance type is DT_PLAIN
9437 assert instance.disk_template == constants.DT_PLAIN
9438 disks = [{constants.IDISK_SIZE: d.size,
9439 constants.IDISK_VG: d.logical_id[0]}
9440 for d in instance.disks]
9441 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9442 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9444 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
9445 snode_group = self.cfg.GetNodeGroup(snode_info.group)
9446 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
9448 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info, self.cfg,
9449 ignore=self.op.ignore_ipolicy)
9450 if pnode_info.group != snode_info.group:
9451 self.LogWarning("The primary and secondary nodes are in two"
9452 " different node groups; the disk parameters"
9453 " from the first disk's node group will be"
9456 if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
9457 # Make sure none of the nodes require exclusive storage
9458 nodes = [pnode_info]
9459 if self.op.disk_template in constants.DTS_INT_MIRROR:
9461 nodes.append(snode_info)
9462 has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
9463 if compat.any(map(has_es, nodes)):
9464 errmsg = ("Cannot convert disk template from %s to %s when exclusive"
9465 " storage is enabled" % (instance.disk_template,
9466 self.op.disk_template))
9467 raise errors.OpPrereqError(errmsg, errors.ECODE_STATE)
9469 def CheckPrereq(self):
9470 """Check prerequisites.
9472 This only checks the instance list against the existing names.
9475 assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
9476 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9478 cluster = self.cluster = self.cfg.GetClusterInfo()
9479 assert self.instance is not None, \
9480 "Cannot retrieve locked instance %s" % self.op.instance_name
9482 pnode = instance.primary_node
9486 if (self.op.pnode is not None and self.op.pnode != pnode and
9488 # verify that the instance is not up
9489 instance_info = self.rpc.call_instance_info(pnode, instance.name,
9490 instance.hypervisor)
9491 if instance_info.fail_msg:
9492 self.warn.append("Can't get instance runtime information: %s" %
9493 instance_info.fail_msg)
9494 elif instance_info.payload:
9495 raise errors.OpPrereqError("Instance is still running on %s" % pnode,
9498 assert pnode in self.owned_locks(locking.LEVEL_NODE)
9499 nodelist = list(instance.all_nodes)
9500 pnode_info = self.cfg.GetNodeInfo(pnode)
9501 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
9503 #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9504 assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
9505 group_info = self.cfg.GetNodeGroup(pnode_info.group)
9507 # dictionary with instance information after the modification
9510 # Check disk modifications. This is done here and not in CheckArguments
9511 # (as with NICs), because we need to know the instance's disk template
9512 if instance.disk_template == constants.DT_EXT:
9513 self._CheckMods("disk", self.op.disks, {},
9514 self._VerifyDiskModification)
9516 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
9517 self._VerifyDiskModification)
9519 # Prepare disk/NIC modifications
9520 self.diskmod = PrepareContainerMods(self.op.disks, None)
9521 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
9523 # Check the validity of the `provider' parameter
9524 if instance.disk_template in constants.DT_EXT:
9525 for mod in self.diskmod:
9526 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
9527 if mod[0] == constants.DDM_ADD:
9528 if ext_provider is None:
9529 raise errors.OpPrereqError("Instance template is '%s' and parameter"
9530 " '%s' missing, during disk add" %
9532 constants.IDISK_PROVIDER),
9534 elif mod[0] == constants.DDM_MODIFY:
9536 raise errors.OpPrereqError("Parameter '%s' is invalid during disk"
9538 constants.IDISK_PROVIDER,
9541 for mod in self.diskmod:
9542 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
9543 if ext_provider is not None:
9544 raise errors.OpPrereqError("Parameter '%s' is only valid for"
9545 " instances of type '%s'" %
9546 (constants.IDISK_PROVIDER,
9551 if self.op.os_name and not self.op.force:
9552 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9553 self.op.force_variant)
9554 instance_os = self.op.os_name
9556 instance_os = instance.os
9558 assert not (self.op.disk_template and self.op.disks), \
9559 "Can't modify disk template and apply disk changes at the same time"
9561 if self.op.disk_template:
9562 self._PreCheckDiskTemplate(pnode_info)
9564 # hvparams processing
9565 if self.op.hvparams:
9566 hv_type = instance.hypervisor
9567 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9568 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9569 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9572 hypervisor.GetHypervisorClass(hv_type).CheckParameterSyntax(hv_new)
9573 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9574 self.hv_proposed = self.hv_new = hv_new # the new actual values
9575 self.hv_inst = i_hvdict # the new dict (without defaults)
9577 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
9579 self.hv_new = self.hv_inst = {}
9581 # beparams processing
9582 if self.op.beparams:
9583 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9585 objects.UpgradeBeParams(i_bedict)
9586 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9587 be_new = cluster.SimpleFillBE(i_bedict)
9588 self.be_proposed = self.be_new = be_new # the new actual values
9589 self.be_inst = i_bedict # the new dict (without defaults)
9591 self.be_new = self.be_inst = {}
9592 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
9593 be_old = cluster.FillBE(instance)
9595 # CPU param validation -- checking every time a parameter is
9596 # changed to cover all cases where either CPU mask or vcpus have
9598 if (constants.BE_VCPUS in self.be_proposed and
9599 constants.HV_CPU_MASK in self.hv_proposed):
9601 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
9602 # Verify mask is consistent with number of vCPUs. Can skip this
9603 # test if only 1 entry in the CPU mask, which means same mask
9604 # is applied to all vCPUs.
9605 if (len(cpu_list) > 1 and
9606 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
9607 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
9609 (self.be_proposed[constants.BE_VCPUS],
9610 self.hv_proposed[constants.HV_CPU_MASK]),
9613 # Only perform this test if a new CPU mask is given
9614 if constants.HV_CPU_MASK in self.hv_new:
9615 # Calculate the largest CPU number requested
9616 max_requested_cpu = max(map(max, cpu_list))
9617 # Check that all of the instance's nodes have enough physical CPUs to
9618 # satisfy the requested CPU mask
9619 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
9620 max_requested_cpu + 1, instance.hypervisor)
9622 # osparams processing
9623 if self.op.osparams:
9624 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9625 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9626 self.os_inst = i_osdict # the new dict (without defaults)
9630 #TODO(dynmem): do the appropriate check involving MINMEM
9631 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
9632 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
9633 mem_check_list = [pnode]
9634 if be_new[constants.BE_AUTO_BALANCE]:
9635 # either we changed auto_balance to yes or it was from before
9636 mem_check_list.extend(instance.secondary_nodes)
9637 instance_info = self.rpc.call_instance_info(pnode, instance.name,
9638 instance.hypervisor)
9639 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9640 [instance.hypervisor], False)
9641 pninfo = nodeinfo[pnode]
9642 msg = pninfo.fail_msg
9644 # Assume the primary node is unreachable and go ahead
9645 self.warn.append("Can't get info from primary node %s: %s" %
9648 (_, _, (pnhvinfo, )) = pninfo.payload
9649 if not isinstance(pnhvinfo.get("memory_free", None), int):
9650 self.warn.append("Node data from primary node %s doesn't contain"
9651 " free memory information" % pnode)
9652 elif instance_info.fail_msg:
9653 self.warn.append("Can't get instance runtime information: %s" %
9654 instance_info.fail_msg)
9656 if instance_info.payload:
9657 current_mem = int(instance_info.payload["memory"])
9659 # Assume instance not running
9660 # (there is a slight race condition here, but it's not very
9661 # probable, and we have no other way to check)
9662 # TODO: Describe race condition
9664 #TODO(dynmem): do the appropriate check involving MINMEM
9665 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
9666 pnhvinfo["memory_free"])
9668 raise errors.OpPrereqError("This change will prevent the instance"
9669 " from starting, due to %d MB of memory"
9670 " missing on its primary node" %
9671 miss_mem, errors.ECODE_NORES)
9673 if be_new[constants.BE_AUTO_BALANCE]:
9674 for node, nres in nodeinfo.items():
9675 if node not in instance.secondary_nodes:
9677 nres.Raise("Can't get info from secondary node %s" % node,
9678 prereq=True, ecode=errors.ECODE_STATE)
9679 (_, _, (nhvinfo, )) = nres.payload
9680 if not isinstance(nhvinfo.get("memory_free", None), int):
9681 raise errors.OpPrereqError("Secondary node %s didn't return free"
9682 " memory information" % node,
9684 #TODO(dynmem): do the appropriate check involving MINMEM
9685 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
9686 raise errors.OpPrereqError("This change will prevent the instance"
9687 " from failover to its secondary node"
9688 " %s, due to not enough memory" % node,
9691 if self.op.runtime_mem:
9692 remote_info = self.rpc.call_instance_info(instance.primary_node,
9694 instance.hypervisor)
9695 remote_info.Raise("Error checking node %s" % instance.primary_node)
9696 if not remote_info.payload: # not running already
9697 raise errors.OpPrereqError("Instance %s is not running" %
9698 instance.name, errors.ECODE_STATE)
9700 current_memory = remote_info.payload["memory"]
9701 if (not self.op.force and
9702 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
9703 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
9704 raise errors.OpPrereqError("Instance %s must have memory between %d"
9705 " and %d MB of memory unless --force is"
9708 self.be_proposed[constants.BE_MINMEM],
9709 self.be_proposed[constants.BE_MAXMEM]),
9712 delta = self.op.runtime_mem - current_memory
9714 _CheckNodeFreeMemory(self, instance.primary_node,
9715 "ballooning memory for instance %s" %
9716 instance.name, delta, instance.hypervisor)
9718 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9719 raise errors.OpPrereqError("Disk operations not supported for"
9720 " diskless instances", errors.ECODE_INVAL)
9722 def _PrepareNicCreate(_, params, private):
9723 self._PrepareNicModification(params, private, None, None,
9727 def _PrepareNicMod(_, nic, params, private):
9728 self._PrepareNicModification(params, private, nic.ip, nic.network,
9729 nic.nicparams, cluster, pnode)
9732 def _PrepareNicRemove(_, params, __):
9734 net = params.network
9735 if net is not None and ip is not None:
9736 self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
9738 # Verify NIC changes (operating on copy)
9739 nics = instance.nics[:]
9740 ApplyContainerMods("NIC", nics, None, self.nicmod,
9741 _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
9742 if len(nics) > constants.MAX_NICS:
9743 raise errors.OpPrereqError("Instance has too many network interfaces"
9744 " (%d), cannot add more" % constants.MAX_NICS,
9747 def _PrepareDiskMod(_, disk, params, __):
9748 disk.name = params.get(constants.IDISK_NAME, None)
9750 # Verify disk changes (operating on a copy)
9751 disks = copy.deepcopy(instance.disks)
9752 ApplyContainerMods("disk", disks, None, self.diskmod, None, _PrepareDiskMod,
9754 utils.ValidateDeviceNames("disk", disks)
9755 if len(disks) > constants.MAX_DISKS:
9756 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
9757 " more" % constants.MAX_DISKS,
9759 disk_sizes = [disk.size for disk in instance.disks]
9760 disk_sizes.extend(params["size"] for (op, idx, params, private) in
9761 self.diskmod if op == constants.DDM_ADD)
9762 ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
9763 ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
9765 if self.op.offline is not None and self.op.offline:
9766 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE,
9767 msg="can't change to offline")
9769 # Pre-compute NIC changes (necessary to use result in hooks)
9770 self._nic_chgdesc = []
9772 # Operate on copies as this is still in prereq
9773 nics = [nic.Copy() for nic in instance.nics]
9774 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
9775 self._CreateNewNic, self._ApplyNicMods, None)
9776 # Verify that NIC names are unique and valid
9777 utils.ValidateDeviceNames("NIC", nics)
9778 self._new_nics = nics
9779 ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
9781 self._new_nics = None
9782 ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
9784 if not self.op.ignore_ipolicy:
9785 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
9788 # Fill ispec with backend parameters
9789 ispec[constants.ISPEC_SPINDLE_USE] = \
9790 self.be_new.get(constants.BE_SPINDLE_USE, None)
9791 ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
9794 # Copy ispec to verify parameters with min/max values separately
9795 if self.op.disk_template:
9796 new_disk_template = self.op.disk_template
9798 new_disk_template = instance.disk_template
9799 ispec_max = ispec.copy()
9800 ispec_max[constants.ISPEC_MEM_SIZE] = \
9801 self.be_new.get(constants.BE_MAXMEM, None)
9802 res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max,
9804 ispec_min = ispec.copy()
9805 ispec_min[constants.ISPEC_MEM_SIZE] = \
9806 self.be_new.get(constants.BE_MINMEM, None)
9807 res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min,
9810 if (res_max or res_min):
9811 # FIXME: Improve error message by including information about whether
9812 # the upper or lower limit of the parameter fails the ipolicy.
9813 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
9814 (group_info, group_info.name,
9815 utils.CommaJoin(set(res_max + res_min))))
9816 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9818 def _ConvertPlainToDrbd(self, feedback_fn):
9819 """Converts an instance from plain to drbd.
9822 feedback_fn("Converting template to drbd")
9823 instance = self.instance
9824 pnode = instance.primary_node
9825 snode = self.op.remote_node
9827 assert instance.disk_template == constants.DT_PLAIN
9829 # create a fake disk info for _GenerateDiskTemplate
9830 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
9831 constants.IDISK_VG: d.logical_id[0],
9832 constants.IDISK_NAME: d.name}
9833 for d in instance.disks]
9834 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9835 instance.name, pnode, [snode],
9836 disk_info, None, None, 0, feedback_fn,
9838 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
9840 p_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, pnode)
9841 s_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, snode)
9842 info = _GetInstanceInfoText(instance)
9843 feedback_fn("Creating additional volumes...")
9844 # first, create the missing data and meta devices
9845 for disk in anno_disks:
9846 # unfortunately this is... not too nice
9847 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9848 info, True, p_excl_stor)
9849 for child in disk.children:
9850 _CreateSingleBlockDev(self, snode, instance, child, info, True,
9852 # at this stage, all new LVs have been created, we can rename the
9854 feedback_fn("Renaming original volumes...")
9855 rename_list = [(o, n.children[0].logical_id)
9856 for (o, n) in zip(instance.disks, new_disks)]
9857 result = self.rpc.call_blockdev_rename(pnode, rename_list)
9858 result.Raise("Failed to rename original LVs")
9860 feedback_fn("Initializing DRBD devices...")
9861 # all child devices are in place, we can now create the DRBD devices
9863 for disk in anno_disks:
9864 for (node, excl_stor) in [(pnode, p_excl_stor), (snode, s_excl_stor)]:
9865 f_create = node == pnode
9866 _CreateSingleBlockDev(self, node, instance, disk, info, f_create,
9868 except errors.GenericError, e:
9869 feedback_fn("Initializing of DRBD devices failed;"
9870 " renaming back original volumes...")
9871 for disk in new_disks:
9872 self.cfg.SetDiskID(disk, pnode)
9873 rename_back_list = [(n.children[0], o.logical_id)
9874 for (n, o) in zip(new_disks, instance.disks)]
9875 result = self.rpc.call_blockdev_rename(pnode, rename_back_list)
9876 result.Raise("Failed to rename LVs back after error %s" % str(e))
9879 # at this point, the instance has been modified
9880 instance.disk_template = constants.DT_DRBD8
9881 instance.disks = new_disks
9882 self.cfg.Update(instance, feedback_fn)
9884 # Release node locks while waiting for sync
9885 _ReleaseLocks(self, locking.LEVEL_NODE)
9887 # disks are created, waiting for sync
9888 disk_abort = not _WaitForSync(self, instance,
9889 oneshot=not self.op.wait_for_sync)
9891 raise errors.OpExecError("There are some degraded disks for"
9892 " this instance, please cleanup manually")
9894 # Node resource locks will be released by caller
9896 def _ConvertDrbdToPlain(self, feedback_fn):
9897 """Converts an instance from drbd to plain.
9900 instance = self.instance
9902 assert len(instance.secondary_nodes) == 1
9903 assert instance.disk_template == constants.DT_DRBD8
9905 pnode = instance.primary_node
9906 snode = instance.secondary_nodes[0]
9907 feedback_fn("Converting template to plain")
9909 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
9910 new_disks = [d.children[0] for d in instance.disks]
9912 # copy over size, mode and name
9913 for parent, child in zip(old_disks, new_disks):
9914 child.size = parent.size
9915 child.mode = parent.mode
9916 child.name = parent.name
9918 # this is a DRBD disk, return its port to the pool
9919 # NOTE: this must be done right before the call to cfg.Update!
9920 for disk in old_disks:
9921 tcp_port = disk.logical_id[2]
9922 self.cfg.AddTcpUdpPort(tcp_port)
9924 # update instance structure
9925 instance.disks = new_disks
9926 instance.disk_template = constants.DT_PLAIN
9927 _UpdateIvNames(0, instance.disks)
9928 self.cfg.Update(instance, feedback_fn)
9930 # Release locks in case removing disks takes a while
9931 _ReleaseLocks(self, locking.LEVEL_NODE)
9933 feedback_fn("Removing volumes on the secondary node...")
9934 for disk in old_disks:
9935 self.cfg.SetDiskID(disk, snode)
9936 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9938 self.LogWarning("Could not remove block device %s on node %s,"
9939 " continuing anyway: %s", disk.iv_name, snode, msg)
9941 feedback_fn("Removing unneeded volumes on the primary node...")
9942 for idx, disk in enumerate(old_disks):
9943 meta = disk.children[1]
9944 self.cfg.SetDiskID(meta, pnode)
9945 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9947 self.LogWarning("Could not remove metadata for disk %d on node %s,"
9948 " continuing anyway: %s", idx, pnode, msg)
9950 def _CreateNewDisk(self, idx, params, _):
9951 """Creates a new disk.
9954 instance = self.instance
9957 if instance.disk_template in constants.DTS_FILEBASED:
9958 (file_driver, file_path) = instance.disks[0].logical_id
9959 file_path = os.path.dirname(file_path)
9961 file_driver = file_path = None
9964 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
9965 instance.primary_node, instance.secondary_nodes,
9966 [params], file_path, file_driver, idx,
9967 self.Log, self.diskparams)[0]
9969 info = _GetInstanceInfoText(instance)
9971 logging.info("Creating volume %s for instance %s",
9972 disk.iv_name, instance.name)
9973 # Note: this needs to be kept in sync with _CreateDisks
9975 for node in instance.all_nodes:
9976 f_create = (node == instance.primary_node)
9978 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
9979 except errors.OpExecError, err:
9980 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
9981 disk.iv_name, disk, node, err)
9983 if self.cluster.prealloc_wipe_disks:
9985 _WipeDisks(self, instance,
9986 disks=[(idx, disk, 0)])
9989 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
9993 def _ModifyDisk(idx, disk, params, _):
9998 mode = params.get(constants.IDISK_MODE, None)
10001 changes.append(("disk.mode/%d" % idx, disk.mode))
10003 name = params.get(constants.IDISK_NAME, None)
10005 changes.append(("disk.name/%d" % idx, disk.name))
10009 def _RemoveDisk(self, idx, root, _):
10013 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
10014 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
10015 self.cfg.SetDiskID(disk, node)
10016 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10018 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
10019 " continuing anyway", idx, node, msg)
10021 # if this is a DRBD disk, return its port to the pool
10022 if root.dev_type in constants.LDS_DRBD:
10023 self.cfg.AddTcpUdpPort(root.logical_id[2])
10025 def _CreateNewNic(self, idx, params, private):
10026 """Creates data structure for a new network interface.
10029 mac = params[constants.INIC_MAC]
10030 ip = params.get(constants.INIC_IP, None)
10031 net = params.get(constants.INIC_NETWORK, None)
10032 name = params.get(constants.INIC_NAME, None)
10033 net_uuid = self.cfg.LookupNetwork(net)
10034 #TODO: not private.filled?? can a nic have no nicparams??
10035 nicparams = private.filled
10036 nobj = objects.NIC(mac=mac, ip=ip, network=net_uuid, name=name,
10037 nicparams=nicparams)
10038 nobj.uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10042 "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
10043 (mac, ip, private.filled[constants.NIC_MODE],
10044 private.filled[constants.NIC_LINK],
10048 def _ApplyNicMods(self, idx, nic, params, private):
10049 """Modifies a network interface.
10054 for key in [constants.INIC_MAC, constants.INIC_IP, constants.INIC_NAME]:
10056 changes.append(("nic.%s/%d" % (key, idx), params[key]))
10057 setattr(nic, key, params[key])
10059 new_net = params.get(constants.INIC_NETWORK, nic.network)
10060 new_net_uuid = self.cfg.LookupNetwork(new_net)
10061 if new_net_uuid != nic.network:
10062 changes.append(("nic.network/%d" % idx, new_net))
10063 nic.network = new_net_uuid
10066 nic.nicparams = private.filled
10068 for (key, val) in nic.nicparams.items():
10069 changes.append(("nic.%s/%d" % (key, idx), val))
10073 def Exec(self, feedback_fn):
10074 """Modifies an instance.
10076 All parameters take effect only at the next restart of the instance.
10079 # Process here the warnings from CheckPrereq, as we don't have a
10080 # feedback_fn there.
10081 # TODO: Replace with self.LogWarning
10082 for warn in self.warn:
10083 feedback_fn("WARNING: %s" % warn)
10085 assert ((self.op.disk_template is None) ^
10086 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
10087 "Not owning any node resource locks"
10090 instance = self.instance
10094 instance.primary_node = self.op.pnode
10097 if self.op.runtime_mem:
10098 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
10100 self.op.runtime_mem)
10101 rpcres.Raise("Cannot modify instance runtime memory")
10102 result.append(("runtime_memory", self.op.runtime_mem))
10104 # Apply disk changes
10105 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
10106 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
10107 _UpdateIvNames(0, instance.disks)
10109 if self.op.disk_template:
10111 check_nodes = set(instance.all_nodes)
10112 if self.op.remote_node:
10113 check_nodes.add(self.op.remote_node)
10114 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
10115 owned = self.owned_locks(level)
10116 assert not (check_nodes - owned), \
10117 ("Not owning the correct locks, owning %r, expected at least %r" %
10118 (owned, check_nodes))
10120 r_shut = _ShutdownInstanceDisks(self, instance)
10122 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10123 " proceed with disk template conversion")
10124 mode = (instance.disk_template, self.op.disk_template)
10126 self._DISK_CONVERSIONS[mode](self, feedback_fn)
10128 self.cfg.ReleaseDRBDMinors(instance.name)
10130 result.append(("disk_template", self.op.disk_template))
10132 assert instance.disk_template == self.op.disk_template, \
10133 ("Expected disk template '%s', found '%s'" %
10134 (self.op.disk_template, instance.disk_template))
10136 # Release node and resource locks if there are any (they might already have
10137 # been released during disk conversion)
10138 _ReleaseLocks(self, locking.LEVEL_NODE)
10139 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10141 # Apply NIC changes
10142 if self._new_nics is not None:
10143 instance.nics = self._new_nics
10144 result.extend(self._nic_chgdesc)
10147 if self.op.hvparams:
10148 instance.hvparams = self.hv_inst
10149 for key, val in self.op.hvparams.iteritems():
10150 result.append(("hv/%s" % key, val))
10153 if self.op.beparams:
10154 instance.beparams = self.be_inst
10155 for key, val in self.op.beparams.iteritems():
10156 result.append(("be/%s" % key, val))
10159 if self.op.os_name:
10160 instance.os = self.op.os_name
10163 if self.op.osparams:
10164 instance.osparams = self.os_inst
10165 for key, val in self.op.osparams.iteritems():
10166 result.append(("os/%s" % key, val))
10168 if self.op.offline is None:
10171 elif self.op.offline:
10172 # Mark instance as offline
10173 self.cfg.MarkInstanceOffline(instance.name)
10174 result.append(("admin_state", constants.ADMINST_OFFLINE))
10176 # Mark instance as online, but stopped
10177 self.cfg.MarkInstanceDown(instance.name)
10178 result.append(("admin_state", constants.ADMINST_DOWN))
10180 self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
10182 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
10183 self.owned_locks(locking.LEVEL_NODE)), \
10184 "All node locks should have been released by now"
10188 _DISK_CONVERSIONS = {
10189 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
10190 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
10194 class LUInstanceChangeGroup(LogicalUnit):
10195 HPATH = "instance-change-group"
10196 HTYPE = constants.HTYPE_INSTANCE
10199 def ExpandNames(self):
10200 self.share_locks = _ShareAll()
10202 self.needed_locks = {
10203 locking.LEVEL_NODEGROUP: [],
10204 locking.LEVEL_NODE: [],
10205 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
10208 self._ExpandAndLockInstance()
10210 if self.op.target_groups:
10211 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
10212 self.op.target_groups)
10214 self.req_target_uuids = None
10216 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
10218 def DeclareLocks(self, level):
10219 if level == locking.LEVEL_NODEGROUP:
10220 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10222 if self.req_target_uuids:
10223 lock_groups = set(self.req_target_uuids)
10225 # Lock all groups used by instance optimistically; this requires going
10226 # via the node before it's locked, requiring verification later on
10227 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10228 lock_groups.update(instance_groups)
10230 # No target groups, need to lock all of them
10231 lock_groups = locking.ALL_SET
10233 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
10235 elif level == locking.LEVEL_NODE:
10236 if self.req_target_uuids:
10237 # Lock all nodes used by instances
10238 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10239 self._LockInstancesNodes()
10241 # Lock all nodes in all potential target groups
10242 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
10243 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
10244 member_nodes = [node_name
10245 for group in lock_groups
10246 for node_name in self.cfg.GetNodeGroup(group).members]
10247 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
10249 # Lock all nodes as all groups are potential targets
10250 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10252 def CheckPrereq(self):
10253 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
10254 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
10255 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
10257 assert (self.req_target_uuids is None or
10258 owned_groups.issuperset(self.req_target_uuids))
10259 assert owned_instances == set([self.op.instance_name])
10261 # Get instance information
10262 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10264 # Check if node groups for locked instance are still correct
10265 assert owned_nodes.issuperset(self.instance.all_nodes), \
10266 ("Instance %s's nodes changed while we kept the lock" %
10267 self.op.instance_name)
10269 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
10272 if self.req_target_uuids:
10273 # User requested specific target groups
10274 self.target_uuids = frozenset(self.req_target_uuids)
10276 # All groups except those used by the instance are potential targets
10277 self.target_uuids = owned_groups - inst_groups
10279 conflicting_groups = self.target_uuids & inst_groups
10280 if conflicting_groups:
10281 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
10282 " used by the instance '%s'" %
10283 (utils.CommaJoin(conflicting_groups),
10284 self.op.instance_name),
10285 errors.ECODE_INVAL)
10287 if not self.target_uuids:
10288 raise errors.OpPrereqError("There are no possible target groups",
10289 errors.ECODE_INVAL)
10291 def BuildHooksEnv(self):
10292 """Build hooks env.
10295 assert self.target_uuids
10298 "TARGET_GROUPS": " ".join(self.target_uuids),
10301 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10305 def BuildHooksNodes(self):
10306 """Build hooks nodes.
10309 mn = self.cfg.GetMasterNode()
10310 return ([mn], [mn])
10312 def Exec(self, feedback_fn):
10313 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
10315 assert instances == [self.op.instance_name], "Instance not locked"
10317 req = iallocator.IAReqGroupChange(instances=instances,
10318 target_groups=list(self.target_uuids))
10319 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10321 ial.Run(self.op.iallocator)
10323 if not ial.success:
10324 raise errors.OpPrereqError("Can't compute solution for changing group of"
10325 " instance '%s' using iallocator '%s': %s" %
10326 (self.op.instance_name, self.op.iallocator,
10327 ial.info), errors.ECODE_NORES)
10329 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
10331 self.LogInfo("Iallocator returned %s job(s) for changing group of"
10332 " instance '%s'", len(jobs), self.op.instance_name)
10334 return ResultWithJobs(jobs)
10337 class LUBackupQuery(NoHooksLU):
10338 """Query the exports list
10343 def CheckArguments(self):
10344 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
10345 ["node", "export"], self.op.use_locking)
10347 def ExpandNames(self):
10348 self.expq.ExpandNames(self)
10350 def DeclareLocks(self, level):
10351 self.expq.DeclareLocks(self, level)
10353 def Exec(self, feedback_fn):
10356 for (node, expname) in self.expq.OldStyleQuery(self):
10357 if expname is None:
10358 result[node] = False
10360 result.setdefault(node, []).append(expname)
10365 class _ExportQuery(_QueryBase):
10366 FIELDS = query.EXPORT_FIELDS
10368 #: The node name is not a unique key for this query
10369 SORT_FIELD = "node"
10371 def ExpandNames(self, lu):
10372 lu.needed_locks = {}
10374 # The following variables interact with _QueryBase._GetNames
10376 self.wanted = _GetWantedNodes(lu, self.names)
10378 self.wanted = locking.ALL_SET
10380 self.do_locking = self.use_locking
10382 if self.do_locking:
10383 lu.share_locks = _ShareAll()
10384 lu.needed_locks = {
10385 locking.LEVEL_NODE: self.wanted,
10389 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10391 def DeclareLocks(self, lu, level):
10394 def _GetQueryData(self, lu):
10395 """Computes the list of nodes and their attributes.
10398 # Locking is not used
10400 assert not (compat.any(lu.glm.is_owned(level)
10401 for level in locking.LEVELS
10402 if level != locking.LEVEL_CLUSTER) or
10403 self.do_locking or self.use_locking)
10405 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
10409 for (node, nres) in lu.rpc.call_export_list(nodes).items():
10411 result.append((node, None))
10413 result.extend((node, expname) for expname in nres.payload)
10418 class LUBackupPrepare(NoHooksLU):
10419 """Prepares an instance for an export and returns useful information.
10424 def ExpandNames(self):
10425 self._ExpandAndLockInstance()
10427 def CheckPrereq(self):
10428 """Check prerequisites.
10431 instance_name = self.op.instance_name
10433 self.instance = self.cfg.GetInstanceInfo(instance_name)
10434 assert self.instance is not None, \
10435 "Cannot retrieve locked instance %s" % self.op.instance_name
10436 _CheckNodeOnline(self, self.instance.primary_node)
10438 self._cds = _GetClusterDomainSecret()
10440 def Exec(self, feedback_fn):
10441 """Prepares an instance for an export.
10444 instance = self.instance
10446 if self.op.mode == constants.EXPORT_MODE_REMOTE:
10447 salt = utils.GenerateSecret(8)
10449 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
10450 result = self.rpc.call_x509_cert_create(instance.primary_node,
10451 constants.RIE_CERT_VALIDITY)
10452 result.Raise("Can't create X509 key and certificate on %s" % result.node)
10454 (name, cert_pem) = result.payload
10456 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
10460 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
10461 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
10463 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
10469 class LUBackupExport(LogicalUnit):
10470 """Export an instance to an image in the cluster.
10473 HPATH = "instance-export"
10474 HTYPE = constants.HTYPE_INSTANCE
10477 def CheckArguments(self):
10478 """Check the arguments.
10481 self.x509_key_name = self.op.x509_key_name
10482 self.dest_x509_ca_pem = self.op.destination_x509_ca
10484 if self.op.mode == constants.EXPORT_MODE_REMOTE:
10485 if not self.x509_key_name:
10486 raise errors.OpPrereqError("Missing X509 key name for encryption",
10487 errors.ECODE_INVAL)
10489 if not self.dest_x509_ca_pem:
10490 raise errors.OpPrereqError("Missing destination X509 CA",
10491 errors.ECODE_INVAL)
10493 def ExpandNames(self):
10494 self._ExpandAndLockInstance()
10496 # Lock all nodes for local exports
10497 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10498 # FIXME: lock only instance primary and destination node
10500 # Sad but true, for now we have do lock all nodes, as we don't know where
10501 # the previous export might be, and in this LU we search for it and
10502 # remove it from its current node. In the future we could fix this by:
10503 # - making a tasklet to search (share-lock all), then create the
10504 # new one, then one to remove, after
10505 # - removing the removal operation altogether
10506 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10508 # Allocations should be stopped while this LU runs with node locks, but
10509 # it doesn't have to be exclusive
10510 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
10511 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10513 def DeclareLocks(self, level):
10514 """Last minute lock declaration."""
10515 # All nodes are locked anyway, so nothing to do here.
10517 def BuildHooksEnv(self):
10518 """Build hooks env.
10520 This will run on the master, primary node and target node.
10524 "EXPORT_MODE": self.op.mode,
10525 "EXPORT_NODE": self.op.target_node,
10526 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
10527 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
10528 # TODO: Generic function for boolean env variables
10529 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
10532 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10536 def BuildHooksNodes(self):
10537 """Build hooks nodes.
10540 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
10542 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10543 nl.append(self.op.target_node)
10547 def CheckPrereq(self):
10548 """Check prerequisites.
10550 This checks that the instance and node names are valid.
10553 instance_name = self.op.instance_name
10555 self.instance = self.cfg.GetInstanceInfo(instance_name)
10556 assert self.instance is not None, \
10557 "Cannot retrieve locked instance %s" % self.op.instance_name
10558 _CheckNodeOnline(self, self.instance.primary_node)
10560 if (self.op.remove_instance and
10561 self.instance.admin_state == constants.ADMINST_UP and
10562 not self.op.shutdown):
10563 raise errors.OpPrereqError("Can not remove instance without shutting it"
10564 " down before", errors.ECODE_STATE)
10566 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10567 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
10568 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
10569 assert self.dst_node is not None
10571 _CheckNodeOnline(self, self.dst_node.name)
10572 _CheckNodeNotDrained(self, self.dst_node.name)
10575 self.dest_disk_info = None
10576 self.dest_x509_ca = None
10578 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10579 self.dst_node = None
10581 if len(self.op.target_node) != len(self.instance.disks):
10582 raise errors.OpPrereqError(("Received destination information for %s"
10583 " disks, but instance %s has %s disks") %
10584 (len(self.op.target_node), instance_name,
10585 len(self.instance.disks)),
10586 errors.ECODE_INVAL)
10588 cds = _GetClusterDomainSecret()
10590 # Check X509 key name
10592 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10593 except (TypeError, ValueError), err:
10594 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
10595 errors.ECODE_INVAL)
10597 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10598 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10599 errors.ECODE_INVAL)
10601 # Load and verify CA
10603 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10604 except OpenSSL.crypto.Error, err:
10605 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10606 (err, ), errors.ECODE_INVAL)
10608 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10609 if errcode is not None:
10610 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10611 (msg, ), errors.ECODE_INVAL)
10613 self.dest_x509_ca = cert
10615 # Verify target information
10617 for idx, disk_data in enumerate(self.op.target_node):
10619 (host, port, magic) = \
10620 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10621 except errors.GenericError, err:
10622 raise errors.OpPrereqError("Target info for disk %s: %s" %
10623 (idx, err), errors.ECODE_INVAL)
10625 disk_info.append((host, port, magic))
10627 assert len(disk_info) == len(self.op.target_node)
10628 self.dest_disk_info = disk_info
10631 raise errors.ProgrammerError("Unhandled export mode %r" %
10634 # instance disk type verification
10635 # TODO: Implement export support for file-based disks
10636 for disk in self.instance.disks:
10637 if disk.dev_type == constants.LD_FILE:
10638 raise errors.OpPrereqError("Export not supported for instances with"
10639 " file-based disks", errors.ECODE_INVAL)
10641 def _CleanupExports(self, feedback_fn):
10642 """Removes exports of current instance from all other nodes.
10644 If an instance in a cluster with nodes A..D was exported to node C, its
10645 exports will be removed from the nodes A, B and D.
10648 assert self.op.mode != constants.EXPORT_MODE_REMOTE
10650 nodelist = self.cfg.GetNodeList()
10651 nodelist.remove(self.dst_node.name)
10653 # on one-node clusters nodelist will be empty after the removal
10654 # if we proceed the backup would be removed because OpBackupQuery
10655 # substitutes an empty list with the full cluster node list.
10656 iname = self.instance.name
10658 feedback_fn("Removing old exports for instance %s" % iname)
10659 exportlist = self.rpc.call_export_list(nodelist)
10660 for node in exportlist:
10661 if exportlist[node].fail_msg:
10663 if iname in exportlist[node].payload:
10664 msg = self.rpc.call_export_remove(node, iname).fail_msg
10666 self.LogWarning("Could not remove older export for instance %s"
10667 " on node %s: %s", iname, node, msg)
10669 def Exec(self, feedback_fn):
10670 """Export an instance to an image in the cluster.
10673 assert self.op.mode in constants.EXPORT_MODES
10675 instance = self.instance
10676 src_node = instance.primary_node
10678 if self.op.shutdown:
10679 # shutdown the instance, but not the disks
10680 feedback_fn("Shutting down instance %s" % instance.name)
10681 result = self.rpc.call_instance_shutdown(src_node, instance,
10682 self.op.shutdown_timeout,
10684 # TODO: Maybe ignore failures if ignore_remove_failures is set
10685 result.Raise("Could not shutdown instance %s on"
10686 " node %s" % (instance.name, src_node))
10688 # set the disks ID correctly since call_instance_start needs the
10689 # correct drbd minor to create the symlinks
10690 for disk in instance.disks:
10691 self.cfg.SetDiskID(disk, src_node)
10693 activate_disks = (instance.admin_state != constants.ADMINST_UP)
10696 # Activate the instance disks if we'exporting a stopped instance
10697 feedback_fn("Activating disks for %s" % instance.name)
10698 _StartInstanceDisks(self, instance, None)
10701 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10704 helper.CreateSnapshots()
10706 if (self.op.shutdown and
10707 instance.admin_state == constants.ADMINST_UP and
10708 not self.op.remove_instance):
10709 assert not activate_disks
10710 feedback_fn("Starting instance %s" % instance.name)
10711 result = self.rpc.call_instance_start(src_node,
10712 (instance, None, None), False,
10714 msg = result.fail_msg
10716 feedback_fn("Failed to start instance: %s" % msg)
10717 _ShutdownInstanceDisks(self, instance)
10718 raise errors.OpExecError("Could not start instance: %s" % msg)
10720 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10721 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10722 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10723 connect_timeout = constants.RIE_CONNECT_TIMEOUT
10724 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10726 (key_name, _, _) = self.x509_key_name
10729 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10732 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10733 key_name, dest_ca_pem,
10738 # Check for backwards compatibility
10739 assert len(dresults) == len(instance.disks)
10740 assert compat.all(isinstance(i, bool) for i in dresults), \
10741 "Not all results are boolean: %r" % dresults
10745 feedback_fn("Deactivating disks for %s" % instance.name)
10746 _ShutdownInstanceDisks(self, instance)
10748 if not (compat.all(dresults) and fin_resu):
10751 failures.append("export finalization")
10752 if not compat.all(dresults):
10753 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10755 failures.append("disk export: disk(s) %s" % fdsk)
10757 raise errors.OpExecError("Export failed, errors in %s" %
10758 utils.CommaJoin(failures))
10760 # At this point, the export was successful, we can cleanup/finish
10762 # Remove instance if requested
10763 if self.op.remove_instance:
10764 feedback_fn("Removing instance %s" % instance.name)
10765 _RemoveInstance(self, feedback_fn, instance,
10766 self.op.ignore_remove_failures)
10768 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10769 self._CleanupExports(feedback_fn)
10771 return fin_resu, dresults
10774 class LUBackupRemove(NoHooksLU):
10775 """Remove exports related to the named instance.
10780 def ExpandNames(self):
10781 self.needed_locks = {
10782 # We need all nodes to be locked in order for RemoveExport to work, but
10783 # we don't need to lock the instance itself, as nothing will happen to it
10784 # (and we can remove exports also for a removed instance)
10785 locking.LEVEL_NODE: locking.ALL_SET,
10787 # Removing backups is quick, so blocking allocations is justified
10788 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
10791 # Allocations should be stopped while this LU runs with node locks, but it
10792 # doesn't have to be exclusive
10793 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
10795 def Exec(self, feedback_fn):
10796 """Remove any export.
10799 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10800 # If the instance was not found we'll try with the name that was passed in.
10801 # This will only work if it was an FQDN, though.
10803 if not instance_name:
10805 instance_name = self.op.instance_name
10807 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
10808 exportlist = self.rpc.call_export_list(locked_nodes)
10810 for node in exportlist:
10811 msg = exportlist[node].fail_msg
10813 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10815 if instance_name in exportlist[node].payload:
10817 result = self.rpc.call_export_remove(node, instance_name)
10818 msg = result.fail_msg
10820 logging.error("Could not remove export for instance %s"
10821 " on node %s: %s", instance_name, node, msg)
10823 if fqdn_warn and not found:
10824 feedback_fn("Export not found. If trying to remove an export belonging"
10825 " to a deleted instance please use its Fully Qualified"
10829 class LURestrictedCommand(NoHooksLU):
10830 """Logical unit for executing restricted commands.
10835 def ExpandNames(self):
10837 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
10839 self.needed_locks = {
10840 locking.LEVEL_NODE: self.op.nodes,
10842 self.share_locks = {
10843 locking.LEVEL_NODE: not self.op.use_locking,
10846 def CheckPrereq(self):
10847 """Check prerequisites.
10851 def Exec(self, feedback_fn):
10852 """Execute restricted command and return output.
10855 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
10857 # Check if correct locks are held
10858 assert set(self.op.nodes).issubset(owned_nodes)
10860 rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
10864 for node_name in self.op.nodes:
10865 nres = rpcres[node_name]
10867 msg = ("Command '%s' on node '%s' failed: %s" %
10868 (self.op.command, node_name, nres.fail_msg))
10869 result.append((False, msg))
10871 result.append((True, nres.payload))
10876 #: Query type implementations
10878 constants.QR_CLUSTER: _ClusterQuery,
10879 constants.QR_INSTANCE: _InstanceQuery,
10880 constants.QR_NODE: _NodeQuery,
10881 constants.QR_GROUP: _GroupQuery,
10882 constants.QR_NETWORK: _NetworkQuery,
10883 constants.QR_OS: _OsQuery,
10884 constants.QR_EXTSTORAGE: _ExtStorageQuery,
10885 constants.QR_EXPORT: _ExportQuery,
10888 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
10891 def _GetQueryImplementation(name):
10892 """Returns the implemtnation for a query type.
10894 @param name: Query type, must be one of L{constants.QR_VIA_OP}
10898 return _QUERY_IMPL[name]
10900 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
10901 errors.ECODE_INVAL)
10904 def _CheckForConflictingIp(lu, ip, node):
10905 """In case of conflicting IP address raise error.
10908 @param ip: IP address
10910 @param node: node name
10913 (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
10914 if conf_net is not None:
10915 raise errors.OpPrereqError(("The requested IP address (%s) belongs to"
10916 " network %s, but the target NIC does not." %
10918 errors.ECODE_STATE)
10920 return (None, None)