4 # Copyright (C) 2006, 2007, 2008 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
38 from ganeti import ssh
39 from ganeti import utils
40 from ganeti import errors
41 from ganeti import hypervisor
42 from ganeti import locking
43 from ganeti import constants
44 from ganeti import objects
45 from ganeti import serializer
46 from ganeti import ssconf
49 class LogicalUnit(object):
50 """Logical Unit base class.
52 Subclasses must follow these rules:
53 - implement ExpandNames
54 - implement CheckPrereq (except when tasklets are used)
55 - implement Exec (except when tasklets are used)
56 - implement BuildHooksEnv
57 - redefine HPATH and HTYPE
58 - optionally redefine their run requirements:
59 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
61 Note that all commands require root permissions.
63 @ivar dry_run_result: the value (if any) that will be returned to the caller
64 in dry-run mode (signalled by opcode dry_run parameter)
72 def __init__(self, processor, op, context, rpc):
73 """Constructor for LogicalUnit.
75 This needs to be overridden in derived classes in order to check op
81 self.cfg = context.cfg
82 self.context = context
84 # Dicts used to declare locking needs to mcpu
85 self.needed_locks = None
86 self.acquired_locks = {}
87 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
89 self.remove_locks = {}
90 # Used to force good behavior when calling helper functions
91 self.recalculate_locks = {}
94 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
95 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
96 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
98 self.dry_run_result = None
99 # support for generic debug attribute
100 if (not hasattr(self.op, "debug_level") or
101 not isinstance(self.op.debug_level, int)):
102 self.op.debug_level = 0
107 for attr_name in self._OP_REQP:
108 attr_val = getattr(op, attr_name, None)
110 raise errors.OpPrereqError("Required parameter '%s' missing" %
111 attr_name, errors.ECODE_INVAL)
113 self.CheckArguments()
116 """Returns the SshRunner object
120 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
123 ssh = property(fget=__GetSSH)
125 def CheckArguments(self):
126 """Check syntactic validity for the opcode arguments.
128 This method is for doing a simple syntactic check and ensure
129 validity of opcode parameters, without any cluster-related
130 checks. While the same can be accomplished in ExpandNames and/or
131 CheckPrereq, doing these separate is better because:
133 - ExpandNames is left as as purely a lock-related function
134 - CheckPrereq is run after we have acquired locks (and possible
137 The function is allowed to change the self.op attribute so that
138 later methods can no longer worry about missing parameters.
143 def ExpandNames(self):
144 """Expand names for this LU.
146 This method is called before starting to execute the opcode, and it should
147 update all the parameters of the opcode to their canonical form (e.g. a
148 short node name must be fully expanded after this method has successfully
149 completed). This way locking, hooks, logging, ecc. can work correctly.
151 LUs which implement this method must also populate the self.needed_locks
152 member, as a dict with lock levels as keys, and a list of needed lock names
155 - use an empty dict if you don't need any lock
156 - if you don't need any lock at a particular level omit that level
157 - don't put anything for the BGL level
158 - if you want all locks at a level use locking.ALL_SET as a value
160 If you need to share locks (rather than acquire them exclusively) at one
161 level you can modify self.share_locks, setting a true value (usually 1) for
162 that level. By default locks are not shared.
164 This function can also define a list of tasklets, which then will be
165 executed in order instead of the usual LU-level CheckPrereq and Exec
166 functions, if those are not defined by the LU.
170 # Acquire all nodes and one instance
171 self.needed_locks = {
172 locking.LEVEL_NODE: locking.ALL_SET,
173 locking.LEVEL_INSTANCE: ['instance1.example.tld'],
175 # Acquire just two nodes
176 self.needed_locks = {
177 locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
180 self.needed_locks = {} # No, you can't leave it to the default value None
183 # The implementation of this method is mandatory only if the new LU is
184 # concurrent, so that old LUs don't need to be changed all at the same
187 self.needed_locks = {} # Exclusive LUs don't need locks.
189 raise NotImplementedError
191 def DeclareLocks(self, level):
192 """Declare LU locking needs for a level
194 While most LUs can just declare their locking needs at ExpandNames time,
195 sometimes there's the need to calculate some locks after having acquired
196 the ones before. This function is called just before acquiring locks at a
197 particular level, but after acquiring the ones at lower levels, and permits
198 such calculations. It can be used to modify self.needed_locks, and by
199 default it does nothing.
201 This function is only called if you have something already set in
202 self.needed_locks for the level.
204 @param level: Locking level which is going to be locked
205 @type level: member of ganeti.locking.LEVELS
209 def CheckPrereq(self):
210 """Check prerequisites for this LU.
212 This method should check that the prerequisites for the execution
213 of this LU are fulfilled. It can do internode communication, but
214 it should be idempotent - no cluster or system changes are
217 The method should raise errors.OpPrereqError in case something is
218 not fulfilled. Its return value is ignored.
220 This method should also update all the parameters of the opcode to
221 their canonical form if it hasn't been done by ExpandNames before.
224 if self.tasklets is not None:
225 for (idx, tl) in enumerate(self.tasklets):
226 logging.debug("Checking prerequisites for tasklet %s/%s",
227 idx + 1, len(self.tasklets))
230 raise NotImplementedError
232 def Exec(self, feedback_fn):
235 This method should implement the actual work. It should raise
236 errors.OpExecError for failures that are somewhat dealt with in
240 if self.tasklets is not None:
241 for (idx, tl) in enumerate(self.tasklets):
242 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
245 raise NotImplementedError
247 def BuildHooksEnv(self):
248 """Build hooks environment for this LU.
250 This method should return a three-node tuple consisting of: a dict
251 containing the environment that will be used for running the
252 specific hook for this LU, a list of node names on which the hook
253 should run before the execution, and a list of node names on which
254 the hook should run after the execution.
256 The keys of the dict must not have 'GANETI_' prefixed as this will
257 be handled in the hooks runner. Also note additional keys will be
258 added by the hooks runner. If the LU doesn't define any
259 environment, an empty dict (and not None) should be returned.
261 No nodes should be returned as an empty list (and not None).
263 Note that if the HPATH for a LU class is None, this function will
267 raise NotImplementedError
269 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
270 """Notify the LU about the results of its hooks.
272 This method is called every time a hooks phase is executed, and notifies
273 the Logical Unit about the hooks' result. The LU can then use it to alter
274 its result based on the hooks. By default the method does nothing and the
275 previous result is passed back unchanged but any LU can define it if it
276 wants to use the local cluster hook-scripts somehow.
278 @param phase: one of L{constants.HOOKS_PHASE_POST} or
279 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
280 @param hook_results: the results of the multi-node hooks rpc call
281 @param feedback_fn: function used send feedback back to the caller
282 @param lu_result: the previous Exec result this LU had, or None
284 @return: the new Exec result, based on the previous result
288 # API must be kept, thus we ignore the unused argument and could
289 # be a function warnings
290 # pylint: disable-msg=W0613,R0201
293 def _ExpandAndLockInstance(self):
294 """Helper function to expand and lock an instance.
296 Many LUs that work on an instance take its name in self.op.instance_name
297 and need to expand it and then declare the expanded name for locking. This
298 function does it, and then updates self.op.instance_name to the expanded
299 name. It also initializes needed_locks as a dict, if this hasn't been done
303 if self.needed_locks is None:
304 self.needed_locks = {}
306 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
307 "_ExpandAndLockInstance called with instance-level locks set"
308 self.op.instance_name = _ExpandInstanceName(self.cfg,
309 self.op.instance_name)
310 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
312 def _LockInstancesNodes(self, primary_only=False):
313 """Helper function to declare instances' nodes for locking.
315 This function should be called after locking one or more instances to lock
316 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
317 with all primary or secondary nodes for instances already locked and
318 present in self.needed_locks[locking.LEVEL_INSTANCE].
320 It should be called from DeclareLocks, and for safety only works if
321 self.recalculate_locks[locking.LEVEL_NODE] is set.
323 In the future it may grow parameters to just lock some instance's nodes, or
324 to just lock primaries or secondary nodes, if needed.
326 If should be called in DeclareLocks in a way similar to::
328 if level == locking.LEVEL_NODE:
329 self._LockInstancesNodes()
331 @type primary_only: boolean
332 @param primary_only: only lock primary nodes of locked instances
335 assert locking.LEVEL_NODE in self.recalculate_locks, \
336 "_LockInstancesNodes helper function called with no nodes to recalculate"
338 # TODO: check if we're really been called with the instance locks held
340 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
341 # future we might want to have different behaviors depending on the value
342 # of self.recalculate_locks[locking.LEVEL_NODE]
344 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
345 instance = self.context.cfg.GetInstanceInfo(instance_name)
346 wanted_nodes.append(instance.primary_node)
348 wanted_nodes.extend(instance.secondary_nodes)
350 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
351 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
352 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
353 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
355 del self.recalculate_locks[locking.LEVEL_NODE]
358 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
359 """Simple LU which runs no hooks.
361 This LU is intended as a parent for other LogicalUnits which will
362 run no hooks, in order to reduce duplicate code.
368 def BuildHooksEnv(self):
369 """Empty BuildHooksEnv for NoHooksLu.
371 This just raises an error.
374 assert False, "BuildHooksEnv called for NoHooksLUs"
378 """Tasklet base class.
380 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
381 they can mix legacy code with tasklets. Locking needs to be done in the LU,
382 tasklets know nothing about locks.
384 Subclasses must follow these rules:
385 - Implement CheckPrereq
389 def __init__(self, lu):
396 def CheckPrereq(self):
397 """Check prerequisites for this tasklets.
399 This method should check whether the prerequisites for the execution of
400 this tasklet are fulfilled. It can do internode communication, but it
401 should be idempotent - no cluster or system changes are allowed.
403 The method should raise errors.OpPrereqError in case something is not
404 fulfilled. Its return value is ignored.
406 This method should also update all parameters to their canonical form if it
407 hasn't been done before.
410 raise NotImplementedError
412 def Exec(self, feedback_fn):
413 """Execute the tasklet.
415 This method should implement the actual work. It should raise
416 errors.OpExecError for failures that are somewhat dealt with in code, or
420 raise NotImplementedError
423 def _GetWantedNodes(lu, nodes):
424 """Returns list of checked and expanded node names.
426 @type lu: L{LogicalUnit}
427 @param lu: the logical unit on whose behalf we execute
429 @param nodes: list of node names or None for all nodes
431 @return: the list of nodes, sorted
432 @raise errors.ProgrammerError: if the nodes parameter is wrong type
435 if not isinstance(nodes, list):
436 raise errors.OpPrereqError("Invalid argument type 'nodes'",
440 raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
441 " non-empty list of nodes whose name is to be expanded.")
443 wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
444 return utils.NiceSort(wanted)
447 def _GetWantedInstances(lu, instances):
448 """Returns list of checked and expanded instance names.
450 @type lu: L{LogicalUnit}
451 @param lu: the logical unit on whose behalf we execute
452 @type instances: list
453 @param instances: list of instance names or None for all instances
455 @return: the list of instances, sorted
456 @raise errors.OpPrereqError: if the instances parameter is wrong type
457 @raise errors.OpPrereqError: if any of the passed instances is not found
460 if not isinstance(instances, list):
461 raise errors.OpPrereqError("Invalid argument type 'instances'",
465 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
467 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
471 def _CheckOutputFields(static, dynamic, selected):
472 """Checks whether all selected fields are valid.
474 @type static: L{utils.FieldSet}
475 @param static: static fields set
476 @type dynamic: L{utils.FieldSet}
477 @param dynamic: dynamic fields set
484 delta = f.NonMatching(selected)
486 raise errors.OpPrereqError("Unknown output fields selected: %s"
487 % ",".join(delta), errors.ECODE_INVAL)
490 def _CheckBooleanOpField(op, name):
491 """Validates boolean opcode parameters.
493 This will ensure that an opcode parameter is either a boolean value,
494 or None (but that it always exists).
497 val = getattr(op, name, None)
498 if not (val is None or isinstance(val, bool)):
499 raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
500 (name, str(val)), errors.ECODE_INVAL)
501 setattr(op, name, val)
504 def _CheckGlobalHvParams(params):
505 """Validates that given hypervisor params are not global ones.
507 This will ensure that instances don't get customised versions of
511 used_globals = constants.HVC_GLOBALS.intersection(params)
513 msg = ("The following hypervisor parameters are global and cannot"
514 " be customized at instance level, please modify them at"
515 " cluster level: %s" % utils.CommaJoin(used_globals))
516 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
519 def _CheckNodeOnline(lu, node):
520 """Ensure that a given node is online.
522 @param lu: the LU on behalf of which we make the check
523 @param node: the node to check
524 @raise errors.OpPrereqError: if the node is offline
527 if lu.cfg.GetNodeInfo(node).offline:
528 raise errors.OpPrereqError("Can't use offline node %s" % node,
532 def _CheckNodeNotDrained(lu, node):
533 """Ensure that a given node is not drained.
535 @param lu: the LU on behalf of which we make the check
536 @param node: the node to check
537 @raise errors.OpPrereqError: if the node is drained
540 if lu.cfg.GetNodeInfo(node).drained:
541 raise errors.OpPrereqError("Can't use drained node %s" % node,
545 def _CheckNodeHasOS(lu, node, os_name, force_variant):
546 """Ensure that a node supports a given OS.
548 @param lu: the LU on behalf of which we make the check
549 @param node: the node to check
550 @param os_name: the OS to query about
551 @param force_variant: whether to ignore variant errors
552 @raise errors.OpPrereqError: if the node is not supporting the OS
555 result = lu.rpc.call_os_get(node, os_name)
556 result.Raise("OS '%s' not in supported OS list for node %s" %
558 prereq=True, ecode=errors.ECODE_INVAL)
559 if not force_variant:
560 _CheckOSVariant(result.payload, os_name)
563 def _CheckDiskTemplate(template):
564 """Ensure a given disk template is valid.
567 if template not in constants.DISK_TEMPLATES:
568 msg = ("Invalid disk template name '%s', valid templates are: %s" %
569 (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
570 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
571 if template == constants.DT_FILE and not constants.ENABLE_FILE_STORAGE:
572 raise errors.OpPrereqError("File storage disabled at configure time",
576 def _CheckInstanceDown(lu, instance, reason):
577 """Ensure that an instance is not running."""
578 if instance.admin_up:
579 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
580 (instance.name, reason), errors.ECODE_STATE)
582 pnode = instance.primary_node
583 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
584 ins_l.Raise("Can't contact node %s for instance information" % pnode,
585 prereq=True, ecode=errors.ECODE_ENVIRON)
587 if instance.name in ins_l.payload:
588 raise errors.OpPrereqError("Instance %s is running, %s" %
589 (instance.name, reason), errors.ECODE_STATE)
592 def _ExpandItemName(fn, name, kind):
593 """Expand an item name.
595 @param fn: the function to use for expansion
596 @param name: requested item name
597 @param kind: text description ('Node' or 'Instance')
598 @return: the resolved (full) name
599 @raise errors.OpPrereqError: if the item is not found
603 if full_name is None:
604 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
609 def _ExpandNodeName(cfg, name):
610 """Wrapper over L{_ExpandItemName} for nodes."""
611 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
614 def _ExpandInstanceName(cfg, name):
615 """Wrapper over L{_ExpandItemName} for instance."""
616 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
619 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
620 memory, vcpus, nics, disk_template, disks,
621 bep, hvp, hypervisor_name):
622 """Builds instance related env variables for hooks
624 This builds the hook environment from individual variables.
627 @param name: the name of the instance
628 @type primary_node: string
629 @param primary_node: the name of the instance's primary node
630 @type secondary_nodes: list
631 @param secondary_nodes: list of secondary nodes as strings
632 @type os_type: string
633 @param os_type: the name of the instance's OS
634 @type status: boolean
635 @param status: the should_run status of the instance
637 @param memory: the memory size of the instance
639 @param vcpus: the count of VCPUs the instance has
641 @param nics: list of tuples (ip, mac, mode, link) representing
642 the NICs the instance has
643 @type disk_template: string
644 @param disk_template: the disk template of the instance
646 @param disks: the list of (size, mode) pairs
648 @param bep: the backend parameters for the instance
650 @param hvp: the hypervisor parameters for the instance
651 @type hypervisor_name: string
652 @param hypervisor_name: the hypervisor for the instance
654 @return: the hook environment for this instance
663 "INSTANCE_NAME": name,
664 "INSTANCE_PRIMARY": primary_node,
665 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
666 "INSTANCE_OS_TYPE": os_type,
667 "INSTANCE_STATUS": str_status,
668 "INSTANCE_MEMORY": memory,
669 "INSTANCE_VCPUS": vcpus,
670 "INSTANCE_DISK_TEMPLATE": disk_template,
671 "INSTANCE_HYPERVISOR": hypervisor_name,
675 nic_count = len(nics)
676 for idx, (ip, mac, mode, link) in enumerate(nics):
679 env["INSTANCE_NIC%d_IP" % idx] = ip
680 env["INSTANCE_NIC%d_MAC" % idx] = mac
681 env["INSTANCE_NIC%d_MODE" % idx] = mode
682 env["INSTANCE_NIC%d_LINK" % idx] = link
683 if mode == constants.NIC_MODE_BRIDGED:
684 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
688 env["INSTANCE_NIC_COUNT"] = nic_count
691 disk_count = len(disks)
692 for idx, (size, mode) in enumerate(disks):
693 env["INSTANCE_DISK%d_SIZE" % idx] = size
694 env["INSTANCE_DISK%d_MODE" % idx] = mode
698 env["INSTANCE_DISK_COUNT"] = disk_count
700 for source, kind in [(bep, "BE"), (hvp, "HV")]:
701 for key, value in source.items():
702 env["INSTANCE_%s_%s" % (kind, key)] = value
707 def _NICListToTuple(lu, nics):
708 """Build a list of nic information tuples.
710 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
711 value in LUQueryInstanceData.
713 @type lu: L{LogicalUnit}
714 @param lu: the logical unit on whose behalf we execute
715 @type nics: list of L{objects.NIC}
716 @param nics: list of nics to convert to hooks tuples
720 c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
724 filled_params = objects.FillDict(c_nicparams, nic.nicparams)
725 mode = filled_params[constants.NIC_MODE]
726 link = filled_params[constants.NIC_LINK]
727 hooks_nics.append((ip, mac, mode, link))
731 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
732 """Builds instance related env variables for hooks from an object.
734 @type lu: L{LogicalUnit}
735 @param lu: the logical unit on whose behalf we execute
736 @type instance: L{objects.Instance}
737 @param instance: the instance for which we should build the
740 @param override: dictionary with key/values that will override
743 @return: the hook environment dictionary
746 cluster = lu.cfg.GetClusterInfo()
747 bep = cluster.FillBE(instance)
748 hvp = cluster.FillHV(instance)
750 'name': instance.name,
751 'primary_node': instance.primary_node,
752 'secondary_nodes': instance.secondary_nodes,
753 'os_type': instance.os,
754 'status': instance.admin_up,
755 'memory': bep[constants.BE_MEMORY],
756 'vcpus': bep[constants.BE_VCPUS],
757 'nics': _NICListToTuple(lu, instance.nics),
758 'disk_template': instance.disk_template,
759 'disks': [(disk.size, disk.mode) for disk in instance.disks],
762 'hypervisor_name': instance.hypervisor,
765 args.update(override)
766 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
769 def _AdjustCandidatePool(lu, exceptions):
770 """Adjust the candidate pool after node operations.
773 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
775 lu.LogInfo("Promoted nodes to master candidate role: %s",
776 utils.CommaJoin(node.name for node in mod_list))
777 for name in mod_list:
778 lu.context.ReaddNode(name)
779 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
781 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
785 def _DecideSelfPromotion(lu, exceptions=None):
786 """Decide whether I should promote myself as a master candidate.
789 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
790 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
791 # the new node will increase mc_max with one, so:
792 mc_should = min(mc_should + 1, cp_size)
793 return mc_now < mc_should
796 def _CheckNicsBridgesExist(lu, target_nics, target_node,
797 profile=constants.PP_DEFAULT):
798 """Check that the brigdes needed by a list of nics exist.
801 c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
802 paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
803 for nic in target_nics]
804 brlist = [params[constants.NIC_LINK] for params in paramslist
805 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
807 result = lu.rpc.call_bridges_exist(target_node, brlist)
808 result.Raise("Error checking bridges on destination node '%s'" %
809 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
812 def _CheckInstanceBridgesExist(lu, instance, node=None):
813 """Check that the brigdes needed by an instance exist.
817 node = instance.primary_node
818 _CheckNicsBridgesExist(lu, instance.nics, node)
821 def _CheckOSVariant(os_obj, name):
822 """Check whether an OS name conforms to the os variants specification.
824 @type os_obj: L{objects.OS}
825 @param os_obj: OS object to check
827 @param name: OS name passed by the user, to check for validity
830 if not os_obj.supported_variants:
833 variant = name.split("+", 1)[1]
835 raise errors.OpPrereqError("OS name must include a variant",
838 if variant not in os_obj.supported_variants:
839 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
842 def _GetNodeInstancesInner(cfg, fn):
843 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
846 def _GetNodeInstances(cfg, node_name):
847 """Returns a list of all primary and secondary instances on a node.
851 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
854 def _GetNodePrimaryInstances(cfg, node_name):
855 """Returns primary instances on a node.
858 return _GetNodeInstancesInner(cfg,
859 lambda inst: node_name == inst.primary_node)
862 def _GetNodeSecondaryInstances(cfg, node_name):
863 """Returns secondary instances on a node.
866 return _GetNodeInstancesInner(cfg,
867 lambda inst: node_name in inst.secondary_nodes)
870 def _GetStorageTypeArgs(cfg, storage_type):
871 """Returns the arguments for a storage type.
874 # Special case for file storage
875 if storage_type == constants.ST_FILE:
876 # storage.FileStorage wants a list of storage directories
877 return [[cfg.GetFileStorageDir()]]
882 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
885 for dev in instance.disks:
886 cfg.SetDiskID(dev, node_name)
888 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
889 result.Raise("Failed to get disk status from node %s" % node_name,
890 prereq=prereq, ecode=errors.ECODE_ENVIRON)
892 for idx, bdev_status in enumerate(result.payload):
893 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
899 def _FormatTimestamp(secs):
900 """Formats a Unix timestamp with the local timezone.
903 return time.strftime("%F %T %Z", time.gmtime(secs))
906 class LUPostInitCluster(LogicalUnit):
907 """Logical unit for running hooks after cluster initialization.
910 HPATH = "cluster-init"
911 HTYPE = constants.HTYPE_CLUSTER
914 def BuildHooksEnv(self):
918 env = {"OP_TARGET": self.cfg.GetClusterName()}
919 mn = self.cfg.GetMasterNode()
922 def CheckPrereq(self):
923 """No prerequisites to check.
928 def Exec(self, feedback_fn):
935 class LUDestroyCluster(LogicalUnit):
936 """Logical unit for destroying the cluster.
939 HPATH = "cluster-destroy"
940 HTYPE = constants.HTYPE_CLUSTER
943 def BuildHooksEnv(self):
947 env = {"OP_TARGET": self.cfg.GetClusterName()}
950 def CheckPrereq(self):
951 """Check prerequisites.
953 This checks whether the cluster is empty.
955 Any errors are signaled by raising errors.OpPrereqError.
958 master = self.cfg.GetMasterNode()
960 nodelist = self.cfg.GetNodeList()
961 if len(nodelist) != 1 or nodelist[0] != master:
962 raise errors.OpPrereqError("There are still %d node(s) in"
963 " this cluster." % (len(nodelist) - 1),
965 instancelist = self.cfg.GetInstanceList()
967 raise errors.OpPrereqError("There are still %d instance(s) in"
968 " this cluster." % len(instancelist),
971 def Exec(self, feedback_fn):
972 """Destroys the cluster.
975 master = self.cfg.GetMasterNode()
976 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
978 # Run post hooks on master node before it's removed
979 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
981 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
983 # pylint: disable-msg=W0702
984 self.LogWarning("Errors occurred running hooks on %s" % master)
986 result = self.rpc.call_node_stop_master(master, False)
987 result.Raise("Could not disable the master role")
990 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
991 utils.CreateBackup(priv_key)
992 utils.CreateBackup(pub_key)
997 def _VerifyCertificateInner(filename, expired, not_before, not_after, now,
998 warn_days=constants.SSL_CERT_EXPIRATION_WARN,
999 error_days=constants.SSL_CERT_EXPIRATION_ERROR):
1000 """Verifies certificate details for LUVerifyCluster.
1004 msg = "Certificate %s is expired" % filename
1006 if not_before is not None and not_after is not None:
1007 msg += (" (valid from %s to %s)" %
1008 (_FormatTimestamp(not_before),
1009 _FormatTimestamp(not_after)))
1010 elif not_before is not None:
1011 msg += " (valid from %s)" % _FormatTimestamp(not_before)
1012 elif not_after is not None:
1013 msg += " (valid until %s)" % _FormatTimestamp(not_after)
1015 return (LUVerifyCluster.ETYPE_ERROR, msg)
1017 elif not_before is not None and not_before > now:
1018 return (LUVerifyCluster.ETYPE_WARNING,
1019 "Certificate %s not yet valid (valid from %s)" %
1020 (filename, _FormatTimestamp(not_before)))
1022 elif not_after is not None:
1023 remaining_days = int((not_after - now) / (24 * 3600))
1025 msg = ("Certificate %s expires in %d days" % (filename, remaining_days))
1027 if remaining_days <= error_days:
1028 return (LUVerifyCluster.ETYPE_ERROR, msg)
1030 if remaining_days <= warn_days:
1031 return (LUVerifyCluster.ETYPE_WARNING, msg)
1036 def _VerifyCertificate(filename):
1037 """Verifies a certificate for LUVerifyCluster.
1039 @type filename: string
1040 @param filename: Path to PEM file
1044 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1045 utils.ReadFile(filename))
1046 except Exception, err: # pylint: disable-msg=W0703
1047 return (LUVerifyCluster.ETYPE_ERROR,
1048 "Failed to load X509 certificate %s: %s" % (filename, err))
1050 # Depending on the pyOpenSSL version, this can just return (None, None)
1051 (not_before, not_after) = utils.GetX509CertValidity(cert)
1053 return _VerifyCertificateInner(filename, cert.has_expired(),
1054 not_before, not_after, time.time())
1057 class LUVerifyCluster(LogicalUnit):
1058 """Verifies the cluster status.
1061 HPATH = "cluster-verify"
1062 HTYPE = constants.HTYPE_CLUSTER
1063 _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1066 TCLUSTER = "cluster"
1068 TINSTANCE = "instance"
1070 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1071 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1072 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1073 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1074 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1075 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1076 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1077 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1078 ENODEDRBD = (TNODE, "ENODEDRBD")
1079 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1080 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1081 ENODEHV = (TNODE, "ENODEHV")
1082 ENODELVM = (TNODE, "ENODELVM")
1083 ENODEN1 = (TNODE, "ENODEN1")
1084 ENODENET = (TNODE, "ENODENET")
1085 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1086 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1087 ENODERPC = (TNODE, "ENODERPC")
1088 ENODESSH = (TNODE, "ENODESSH")
1089 ENODEVERSION = (TNODE, "ENODEVERSION")
1090 ENODESETUP = (TNODE, "ENODESETUP")
1091 ENODETIME = (TNODE, "ENODETIME")
1093 ETYPE_FIELD = "code"
1094 ETYPE_ERROR = "ERROR"
1095 ETYPE_WARNING = "WARNING"
1097 class NodeImage(object):
1098 """A class representing the logical and physical status of a node.
1100 @ivar volumes: a structure as returned from
1101 L{ganeti.backend.GetVolumeList} (runtime)
1102 @ivar instances: a list of running instances (runtime)
1103 @ivar pinst: list of configured primary instances (config)
1104 @ivar sinst: list of configured secondary instances (config)
1105 @ivar sbp: diction of {secondary-node: list of instances} of all peers
1106 of this node (config)
1107 @ivar mfree: free memory, as reported by hypervisor (runtime)
1108 @ivar dfree: free disk, as reported by the node (runtime)
1109 @ivar offline: the offline status (config)
1110 @type rpc_fail: boolean
1111 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1112 not whether the individual keys were correct) (runtime)
1113 @type lvm_fail: boolean
1114 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1115 @type hyp_fail: boolean
1116 @ivar hyp_fail: whether the RPC call didn't return the instance list
1117 @type ghost: boolean
1118 @ivar ghost: whether this is a known node or not (config)
1121 def __init__(self, offline=False):
1129 self.offline = offline
1130 self.rpc_fail = False
1131 self.lvm_fail = False
1132 self.hyp_fail = False
1135 def ExpandNames(self):
1136 self.needed_locks = {
1137 locking.LEVEL_NODE: locking.ALL_SET,
1138 locking.LEVEL_INSTANCE: locking.ALL_SET,
1140 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1142 def _Error(self, ecode, item, msg, *args, **kwargs):
1143 """Format an error message.
1145 Based on the opcode's error_codes parameter, either format a
1146 parseable error code, or a simpler error string.
1148 This must be called only from Exec and functions called from Exec.
1151 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1153 # first complete the msg
1156 # then format the whole message
1157 if self.op.error_codes:
1158 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1164 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1165 # and finally report it via the feedback_fn
1166 self._feedback_fn(" - %s" % msg)
1168 def _ErrorIf(self, cond, *args, **kwargs):
1169 """Log an error message if the passed condition is True.
1172 cond = bool(cond) or self.op.debug_simulate_errors
1174 self._Error(*args, **kwargs)
1175 # do not mark the operation as failed for WARN cases only
1176 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1177 self.bad = self.bad or cond
1179 def _VerifyNode(self, ninfo, nresult):
1180 """Run multiple tests against a node.
1184 - compares ganeti version
1185 - checks vg existence and size > 20G
1186 - checks config file checksum
1187 - checks ssh to other nodes
1189 @type ninfo: L{objects.Node}
1190 @param ninfo: the node to check
1191 @param nresult: the results from the node
1193 @return: whether overall this call was successful (and we can expect
1194 reasonable values in the respose)
1198 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1200 # main result, nresult should be a non-empty dict
1201 test = not nresult or not isinstance(nresult, dict)
1202 _ErrorIf(test, self.ENODERPC, node,
1203 "unable to verify node: no data returned")
1207 # compares ganeti version
1208 local_version = constants.PROTOCOL_VERSION
1209 remote_version = nresult.get("version", None)
1210 test = not (remote_version and
1211 isinstance(remote_version, (list, tuple)) and
1212 len(remote_version) == 2)
1213 _ErrorIf(test, self.ENODERPC, node,
1214 "connection to node returned invalid data")
1218 test = local_version != remote_version[0]
1219 _ErrorIf(test, self.ENODEVERSION, node,
1220 "incompatible protocol versions: master %s,"
1221 " node %s", local_version, remote_version[0])
1225 # node seems compatible, we can actually try to look into its results
1227 # full package version
1228 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1229 self.ENODEVERSION, node,
1230 "software version mismatch: master %s, node %s",
1231 constants.RELEASE_VERSION, remote_version[1],
1232 code=self.ETYPE_WARNING)
1234 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1235 if isinstance(hyp_result, dict):
1236 for hv_name, hv_result in hyp_result.iteritems():
1237 test = hv_result is not None
1238 _ErrorIf(test, self.ENODEHV, node,
1239 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1242 test = nresult.get(constants.NV_NODESETUP,
1243 ["Missing NODESETUP results"])
1244 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1249 def _VerifyNodeTime(self, ninfo, nresult,
1250 nvinfo_starttime, nvinfo_endtime):
1251 """Check the node time.
1253 @type ninfo: L{objects.Node}
1254 @param ninfo: the node to check
1255 @param nresult: the remote results for the node
1256 @param nvinfo_starttime: the start time of the RPC call
1257 @param nvinfo_endtime: the end time of the RPC call
1261 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1263 ntime = nresult.get(constants.NV_TIME, None)
1265 ntime_merged = utils.MergeTime(ntime)
1266 except (ValueError, TypeError):
1267 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1270 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1271 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1272 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1273 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1277 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1278 "Node time diverges by at least %s from master node time",
1281 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1282 """Check the node time.
1284 @type ninfo: L{objects.Node}
1285 @param ninfo: the node to check
1286 @param nresult: the remote results for the node
1287 @param vg_name: the configured VG name
1294 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1296 # checks vg existence and size > 20G
1297 vglist = nresult.get(constants.NV_VGLIST, None)
1299 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1301 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1302 constants.MIN_VG_SIZE)
1303 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1306 pvlist = nresult.get(constants.NV_PVLIST, None)
1307 test = pvlist is None
1308 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1310 # check that ':' is not present in PV names, since it's a
1311 # special character for lvcreate (denotes the range of PEs to
1313 for _, pvname, owner_vg in pvlist:
1314 test = ":" in pvname
1315 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1316 " '%s' of VG '%s'", pvname, owner_vg)
1318 def _VerifyNodeNetwork(self, ninfo, nresult):
1319 """Check the node time.
1321 @type ninfo: L{objects.Node}
1322 @param ninfo: the node to check
1323 @param nresult: the remote results for the node
1327 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1329 test = constants.NV_NODELIST not in nresult
1330 _ErrorIf(test, self.ENODESSH, node,
1331 "node hasn't returned node ssh connectivity data")
1333 if nresult[constants.NV_NODELIST]:
1334 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1335 _ErrorIf(True, self.ENODESSH, node,
1336 "ssh communication with node '%s': %s", a_node, a_msg)
1338 test = constants.NV_NODENETTEST not in nresult
1339 _ErrorIf(test, self.ENODENET, node,
1340 "node hasn't returned node tcp connectivity data")
1342 if nresult[constants.NV_NODENETTEST]:
1343 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1345 _ErrorIf(True, self.ENODENET, node,
1346 "tcp communication with node '%s': %s",
1347 anode, nresult[constants.NV_NODENETTEST][anode])
1349 def _VerifyInstance(self, instance, instanceconfig, node_image):
1350 """Verify an instance.
1352 This function checks to see if the required block devices are
1353 available on the instance's node.
1356 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1357 node_current = instanceconfig.primary_node
1359 node_vol_should = {}
1360 instanceconfig.MapLVsByNode(node_vol_should)
1362 for node in node_vol_should:
1363 n_img = node_image[node]
1364 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1365 # ignore missing volumes on offline or broken nodes
1367 for volume in node_vol_should[node]:
1368 test = volume not in n_img.volumes
1369 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1370 "volume %s missing on node %s", volume, node)
1372 if instanceconfig.admin_up:
1373 pri_img = node_image[node_current]
1374 test = instance not in pri_img.instances and not pri_img.offline
1375 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1376 "instance not running on its primary node %s",
1379 for node, n_img in node_image.items():
1380 if (not node == node_current):
1381 test = instance in n_img.instances
1382 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1383 "instance should not run on node %s", node)
1385 def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1386 """Verify if there are any unknown volumes in the cluster.
1388 The .os, .swap and backup volumes are ignored. All other volumes are
1389 reported as unknown.
1392 for node, n_img in node_image.items():
1393 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1394 # skip non-healthy nodes
1396 for volume in n_img.volumes:
1397 test = (node not in node_vol_should or
1398 volume not in node_vol_should[node])
1399 self._ErrorIf(test, self.ENODEORPHANLV, node,
1400 "volume %s is unknown", volume)
1402 def _VerifyOrphanInstances(self, instancelist, node_image):
1403 """Verify the list of running instances.
1405 This checks what instances are running but unknown to the cluster.
1408 for node, n_img in node_image.items():
1409 for o_inst in n_img.instances:
1410 test = o_inst not in instancelist
1411 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1412 "instance %s on node %s should not exist", o_inst, node)
1414 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1415 """Verify N+1 Memory Resilience.
1417 Check that if one single node dies we can still start all the
1418 instances it was primary for.
1421 for node, n_img in node_image.items():
1422 # This code checks that every node which is now listed as
1423 # secondary has enough memory to host all instances it is
1424 # supposed to should a single other node in the cluster fail.
1425 # FIXME: not ready for failover to an arbitrary node
1426 # FIXME: does not support file-backed instances
1427 # WARNING: we currently take into account down instances as well
1428 # as up ones, considering that even if they're down someone
1429 # might want to start them even in the event of a node failure.
1430 for prinode, instances in n_img.sbp.items():
1432 for instance in instances:
1433 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1434 if bep[constants.BE_AUTO_BALANCE]:
1435 needed_mem += bep[constants.BE_MEMORY]
1436 test = n_img.mfree < needed_mem
1437 self._ErrorIf(test, self.ENODEN1, node,
1438 "not enough memory on to accommodate"
1439 " failovers should peer node %s fail", prinode)
1441 def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1443 """Verifies and computes the node required file checksums.
1445 @type ninfo: L{objects.Node}
1446 @param ninfo: the node to check
1447 @param nresult: the remote results for the node
1448 @param file_list: required list of files
1449 @param local_cksum: dictionary of local files and their checksums
1450 @param master_files: list of files that only masters should have
1454 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1456 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1457 test = not isinstance(remote_cksum, dict)
1458 _ErrorIf(test, self.ENODEFILECHECK, node,
1459 "node hasn't returned file checksum data")
1463 for file_name in file_list:
1464 node_is_mc = ninfo.master_candidate
1465 must_have = (file_name not in master_files) or node_is_mc
1467 test1 = file_name not in remote_cksum
1469 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1471 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1472 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1473 "file '%s' missing", file_name)
1474 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1475 "file '%s' has wrong checksum", file_name)
1476 # not candidate and this is not a must-have file
1477 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1478 "file '%s' should not exist on non master"
1479 " candidates (and the file is outdated)", file_name)
1480 # all good, except non-master/non-must have combination
1481 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1482 "file '%s' should not exist"
1483 " on non master candidates", file_name)
1485 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1486 """Verifies and the node DRBD status.
1488 @type ninfo: L{objects.Node}
1489 @param ninfo: the node to check
1490 @param nresult: the remote results for the node
1491 @param instanceinfo: the dict of instances
1492 @param drbd_map: the DRBD map as returned by
1493 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1497 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1499 # compute the DRBD minors
1501 for minor, instance in drbd_map[node].items():
1502 test = instance not in instanceinfo
1503 _ErrorIf(test, self.ECLUSTERCFG, None,
1504 "ghost instance '%s' in temporary DRBD map", instance)
1505 # ghost instance should not be running, but otherwise we
1506 # don't give double warnings (both ghost instance and
1507 # unallocated minor in use)
1509 node_drbd[minor] = (instance, False)
1511 instance = instanceinfo[instance]
1512 node_drbd[minor] = (instance.name, instance.admin_up)
1514 # and now check them
1515 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1516 test = not isinstance(used_minors, (tuple, list))
1517 _ErrorIf(test, self.ENODEDRBD, node,
1518 "cannot parse drbd status file: %s", str(used_minors))
1520 # we cannot check drbd status
1523 for minor, (iname, must_exist) in node_drbd.items():
1524 test = minor not in used_minors and must_exist
1525 _ErrorIf(test, self.ENODEDRBD, node,
1526 "drbd minor %d of instance %s is not active", minor, iname)
1527 for minor in used_minors:
1528 test = minor not in node_drbd
1529 _ErrorIf(test, self.ENODEDRBD, node,
1530 "unallocated drbd minor %d is in use", minor)
1532 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1533 """Verifies and updates the node volume data.
1535 This function will update a L{NodeImage}'s internal structures
1536 with data from the remote call.
1538 @type ninfo: L{objects.Node}
1539 @param ninfo: the node to check
1540 @param nresult: the remote results for the node
1541 @param nimg: the node image object
1542 @param vg_name: the configured VG name
1546 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1548 nimg.lvm_fail = True
1549 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1552 elif isinstance(lvdata, basestring):
1553 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1554 utils.SafeEncode(lvdata))
1555 elif not isinstance(lvdata, dict):
1556 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1558 nimg.volumes = lvdata
1559 nimg.lvm_fail = False
1561 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1562 """Verifies and updates the node instance list.
1564 If the listing was successful, then updates this node's instance
1565 list. Otherwise, it marks the RPC call as failed for the instance
1568 @type ninfo: L{objects.Node}
1569 @param ninfo: the node to check
1570 @param nresult: the remote results for the node
1571 @param nimg: the node image object
1574 idata = nresult.get(constants.NV_INSTANCELIST, None)
1575 test = not isinstance(idata, list)
1576 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1577 " (instancelist): %s", utils.SafeEncode(str(idata)))
1579 nimg.hyp_fail = True
1581 nimg.instances = idata
1583 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1584 """Verifies and computes a node information map
1586 @type ninfo: L{objects.Node}
1587 @param ninfo: the node to check
1588 @param nresult: the remote results for the node
1589 @param nimg: the node image object
1590 @param vg_name: the configured VG name
1594 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1596 # try to read free memory (from the hypervisor)
1597 hv_info = nresult.get(constants.NV_HVINFO, None)
1598 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1599 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1602 nimg.mfree = int(hv_info["memory_free"])
1603 except (ValueError, TypeError):
1604 _ErrorIf(True, self.ENODERPC, node,
1605 "node returned invalid nodeinfo, check hypervisor")
1607 # FIXME: devise a free space model for file based instances as well
1608 if vg_name is not None:
1609 test = (constants.NV_VGLIST not in nresult or
1610 vg_name not in nresult[constants.NV_VGLIST])
1611 _ErrorIf(test, self.ENODELVM, node,
1612 "node didn't return data for the volume group '%s'"
1613 " - it is either missing or broken", vg_name)
1616 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1617 except (ValueError, TypeError):
1618 _ErrorIf(True, self.ENODERPC, node,
1619 "node returned invalid LVM info, check LVM status")
1621 def CheckPrereq(self):
1622 """Check prerequisites.
1624 Transform the list of checks we're going to skip into a set and check that
1625 all its members are valid.
1628 self.skip_set = frozenset(self.op.skip_checks)
1629 if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1630 raise errors.OpPrereqError("Invalid checks to be skipped specified",
1633 def BuildHooksEnv(self):
1636 Cluster-Verify hooks just ran in the post phase and their failure makes
1637 the output be logged in the verify output and the verification to fail.
1640 all_nodes = self.cfg.GetNodeList()
1642 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1644 for node in self.cfg.GetAllNodesInfo().values():
1645 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1647 return env, [], all_nodes
1649 def Exec(self, feedback_fn):
1650 """Verify integrity of cluster, performing various test on nodes.
1654 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1655 verbose = self.op.verbose
1656 self._feedback_fn = feedback_fn
1657 feedback_fn("* Verifying global settings")
1658 for msg in self.cfg.VerifyConfig():
1659 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1661 # Check the cluster certificates
1662 for cert_filename in constants.ALL_CERT_FILES:
1663 (errcode, msg) = _VerifyCertificate(cert_filename)
1664 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1666 vg_name = self.cfg.GetVGName()
1667 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1668 nodelist = utils.NiceSort(self.cfg.GetNodeList())
1669 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1670 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1671 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1672 for iname in instancelist)
1673 i_non_redundant = [] # Non redundant instances
1674 i_non_a_balanced = [] # Non auto-balanced instances
1675 n_offline = 0 # Count of offline nodes
1676 n_drained = 0 # Count of nodes being drained
1677 node_vol_should = {}
1679 # FIXME: verify OS list
1680 # do local checksums
1681 master_files = [constants.CLUSTER_CONF_FILE]
1683 file_names = ssconf.SimpleStore().GetFileList()
1684 file_names.extend(constants.ALL_CERT_FILES)
1685 file_names.extend(master_files)
1687 local_checksums = utils.FingerprintFiles(file_names)
1689 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1690 node_verify_param = {
1691 constants.NV_FILELIST: file_names,
1692 constants.NV_NODELIST: [node.name for node in nodeinfo
1693 if not node.offline],
1694 constants.NV_HYPERVISOR: hypervisors,
1695 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1696 node.secondary_ip) for node in nodeinfo
1697 if not node.offline],
1698 constants.NV_INSTANCELIST: hypervisors,
1699 constants.NV_VERSION: None,
1700 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1701 constants.NV_NODESETUP: None,
1702 constants.NV_TIME: None,
1705 if vg_name is not None:
1706 node_verify_param[constants.NV_VGLIST] = None
1707 node_verify_param[constants.NV_LVLIST] = vg_name
1708 node_verify_param[constants.NV_PVLIST] = [vg_name]
1709 node_verify_param[constants.NV_DRBDLIST] = None
1711 # Build our expected cluster state
1712 node_image = dict((node.name, self.NodeImage(offline=node.offline))
1713 for node in nodeinfo)
1715 for instance in instancelist:
1716 inst_config = instanceinfo[instance]
1718 for nname in inst_config.all_nodes:
1719 if nname not in node_image:
1721 gnode = self.NodeImage()
1723 node_image[nname] = gnode
1725 inst_config.MapLVsByNode(node_vol_should)
1727 pnode = inst_config.primary_node
1728 node_image[pnode].pinst.append(instance)
1730 for snode in inst_config.secondary_nodes:
1731 nimg = node_image[snode]
1732 nimg.sinst.append(instance)
1733 if pnode not in nimg.sbp:
1734 nimg.sbp[pnode] = []
1735 nimg.sbp[pnode].append(instance)
1737 # At this point, we have the in-memory data structures complete,
1738 # except for the runtime information, which we'll gather next
1740 # Due to the way our RPC system works, exact response times cannot be
1741 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1742 # time before and after executing the request, we can at least have a time
1744 nvinfo_starttime = time.time()
1745 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1746 self.cfg.GetClusterName())
1747 nvinfo_endtime = time.time()
1749 cluster = self.cfg.GetClusterInfo()
1750 master_node = self.cfg.GetMasterNode()
1751 all_drbd_map = self.cfg.ComputeDRBDMap()
1753 feedback_fn("* Verifying node status")
1754 for node_i in nodeinfo:
1756 nimg = node_image[node]
1760 feedback_fn("* Skipping offline node %s" % (node,))
1764 if node == master_node:
1766 elif node_i.master_candidate:
1767 ntype = "master candidate"
1768 elif node_i.drained:
1774 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1776 msg = all_nvinfo[node].fail_msg
1777 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1779 nimg.rpc_fail = True
1782 nresult = all_nvinfo[node].payload
1784 nimg.call_ok = self._VerifyNode(node_i, nresult)
1785 self._VerifyNodeNetwork(node_i, nresult)
1786 self._VerifyNodeLVM(node_i, nresult, vg_name)
1787 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
1789 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
1790 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
1792 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
1793 self._UpdateNodeInstances(node_i, nresult, nimg)
1794 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
1796 feedback_fn("* Verifying instance status")
1797 for instance in instancelist:
1799 feedback_fn("* Verifying instance %s" % instance)
1800 inst_config = instanceinfo[instance]
1801 self._VerifyInstance(instance, inst_config, node_image)
1802 inst_nodes_offline = []
1804 pnode = inst_config.primary_node
1805 pnode_img = node_image[pnode]
1806 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
1807 self.ENODERPC, pnode, "instance %s, connection to"
1808 " primary node failed", instance)
1810 if pnode_img.offline:
1811 inst_nodes_offline.append(pnode)
1813 # If the instance is non-redundant we cannot survive losing its primary
1814 # node, so we are not N+1 compliant. On the other hand we have no disk
1815 # templates with more than one secondary so that situation is not well
1817 # FIXME: does not support file-backed instances
1818 if not inst_config.secondary_nodes:
1819 i_non_redundant.append(instance)
1820 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
1821 instance, "instance has multiple secondary nodes: %s",
1822 utils.CommaJoin(inst_config.secondary_nodes),
1823 code=self.ETYPE_WARNING)
1825 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1826 i_non_a_balanced.append(instance)
1828 for snode in inst_config.secondary_nodes:
1829 s_img = node_image[snode]
1830 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
1831 "instance %s, connection to secondary node failed", instance)
1834 inst_nodes_offline.append(snode)
1836 # warn that the instance lives on offline nodes
1837 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1838 "instance lives on offline node(s) %s",
1839 utils.CommaJoin(inst_nodes_offline))
1840 # ... or ghost nodes
1841 for node in inst_config.all_nodes:
1842 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
1843 "instance lives on ghost node %s", node)
1845 feedback_fn("* Verifying orphan volumes")
1846 self._VerifyOrphanVolumes(node_vol_should, node_image)
1848 feedback_fn("* Verifying oprhan instances")
1849 self._VerifyOrphanInstances(instancelist, node_image)
1851 if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1852 feedback_fn("* Verifying N+1 Memory redundancy")
1853 self._VerifyNPlusOneMemory(node_image, instanceinfo)
1855 feedback_fn("* Other Notes")
1857 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
1858 % len(i_non_redundant))
1860 if i_non_a_balanced:
1861 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
1862 % len(i_non_a_balanced))
1865 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
1868 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
1872 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1873 """Analyze the post-hooks' result
1875 This method analyses the hook result, handles it, and sends some
1876 nicely-formatted feedback back to the user.
1878 @param phase: one of L{constants.HOOKS_PHASE_POST} or
1879 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1880 @param hooks_results: the results of the multi-node hooks rpc call
1881 @param feedback_fn: function used send feedback back to the caller
1882 @param lu_result: previous Exec result
1883 @return: the new Exec result, based on the previous result
1887 # We only really run POST phase hooks, and are only interested in
1889 if phase == constants.HOOKS_PHASE_POST:
1890 # Used to change hooks' output to proper indentation
1891 indent_re = re.compile('^', re.M)
1892 feedback_fn("* Hooks Results")
1893 assert hooks_results, "invalid result from hooks"
1895 for node_name in hooks_results:
1896 res = hooks_results[node_name]
1898 test = msg and not res.offline
1899 self._ErrorIf(test, self.ENODEHOOKS, node_name,
1900 "Communication failure in hooks execution: %s", msg)
1901 if res.offline or msg:
1902 # No need to investigate payload if node is offline or gave an error.
1903 # override manually lu_result here as _ErrorIf only
1904 # overrides self.bad
1907 for script, hkr, output in res.payload:
1908 test = hkr == constants.HKR_FAIL
1909 self._ErrorIf(test, self.ENODEHOOKS, node_name,
1910 "Script %s failed, output:", script)
1912 output = indent_re.sub(' ', output)
1913 feedback_fn("%s" % output)
1919 class LUVerifyDisks(NoHooksLU):
1920 """Verifies the cluster disks status.
1926 def ExpandNames(self):
1927 self.needed_locks = {
1928 locking.LEVEL_NODE: locking.ALL_SET,
1929 locking.LEVEL_INSTANCE: locking.ALL_SET,
1931 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1933 def CheckPrereq(self):
1934 """Check prerequisites.
1936 This has no prerequisites.
1941 def Exec(self, feedback_fn):
1942 """Verify integrity of cluster disks.
1944 @rtype: tuple of three items
1945 @return: a tuple of (dict of node-to-node_error, list of instances
1946 which need activate-disks, dict of instance: (node, volume) for
1950 result = res_nodes, res_instances, res_missing = {}, [], {}
1952 vg_name = self.cfg.GetVGName()
1953 nodes = utils.NiceSort(self.cfg.GetNodeList())
1954 instances = [self.cfg.GetInstanceInfo(name)
1955 for name in self.cfg.GetInstanceList()]
1958 for inst in instances:
1960 if (not inst.admin_up or
1961 inst.disk_template not in constants.DTS_NET_MIRROR):
1963 inst.MapLVsByNode(inst_lvs)
1964 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1965 for node, vol_list in inst_lvs.iteritems():
1966 for vol in vol_list:
1967 nv_dict[(node, vol)] = inst
1972 node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1976 node_res = node_lvs[node]
1977 if node_res.offline:
1979 msg = node_res.fail_msg
1981 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1982 res_nodes[node] = msg
1985 lvs = node_res.payload
1986 for lv_name, (_, _, lv_online) in lvs.items():
1987 inst = nv_dict.pop((node, lv_name), None)
1988 if (not lv_online and inst is not None
1989 and inst.name not in res_instances):
1990 res_instances.append(inst.name)
1992 # any leftover items in nv_dict are missing LVs, let's arrange the
1994 for key, inst in nv_dict.iteritems():
1995 if inst.name not in res_missing:
1996 res_missing[inst.name] = []
1997 res_missing[inst.name].append(key)
2002 class LURepairDiskSizes(NoHooksLU):
2003 """Verifies the cluster disks sizes.
2006 _OP_REQP = ["instances"]
2009 def ExpandNames(self):
2010 if not isinstance(self.op.instances, list):
2011 raise errors.OpPrereqError("Invalid argument type 'instances'",
2014 if self.op.instances:
2015 self.wanted_names = []
2016 for name in self.op.instances:
2017 full_name = _ExpandInstanceName(self.cfg, name)
2018 self.wanted_names.append(full_name)
2019 self.needed_locks = {
2020 locking.LEVEL_NODE: [],
2021 locking.LEVEL_INSTANCE: self.wanted_names,
2023 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2025 self.wanted_names = None
2026 self.needed_locks = {
2027 locking.LEVEL_NODE: locking.ALL_SET,
2028 locking.LEVEL_INSTANCE: locking.ALL_SET,
2030 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2032 def DeclareLocks(self, level):
2033 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2034 self._LockInstancesNodes(primary_only=True)
2036 def CheckPrereq(self):
2037 """Check prerequisites.
2039 This only checks the optional instance list against the existing names.
2042 if self.wanted_names is None:
2043 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2045 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2046 in self.wanted_names]
2048 def _EnsureChildSizes(self, disk):
2049 """Ensure children of the disk have the needed disk size.
2051 This is valid mainly for DRBD8 and fixes an issue where the
2052 children have smaller disk size.
2054 @param disk: an L{ganeti.objects.Disk} object
2057 if disk.dev_type == constants.LD_DRBD8:
2058 assert disk.children, "Empty children for DRBD8?"
2059 fchild = disk.children[0]
2060 mismatch = fchild.size < disk.size
2062 self.LogInfo("Child disk has size %d, parent %d, fixing",
2063 fchild.size, disk.size)
2064 fchild.size = disk.size
2066 # and we recurse on this child only, not on the metadev
2067 return self._EnsureChildSizes(fchild) or mismatch
2071 def Exec(self, feedback_fn):
2072 """Verify the size of cluster disks.
2075 # TODO: check child disks too
2076 # TODO: check differences in size between primary/secondary nodes
2078 for instance in self.wanted_instances:
2079 pnode = instance.primary_node
2080 if pnode not in per_node_disks:
2081 per_node_disks[pnode] = []
2082 for idx, disk in enumerate(instance.disks):
2083 per_node_disks[pnode].append((instance, idx, disk))
2086 for node, dskl in per_node_disks.items():
2087 newl = [v[2].Copy() for v in dskl]
2089 self.cfg.SetDiskID(dsk, node)
2090 result = self.rpc.call_blockdev_getsizes(node, newl)
2092 self.LogWarning("Failure in blockdev_getsizes call to node"
2093 " %s, ignoring", node)
2095 if len(result.data) != len(dskl):
2096 self.LogWarning("Invalid result from node %s, ignoring node results",
2099 for ((instance, idx, disk), size) in zip(dskl, result.data):
2101 self.LogWarning("Disk %d of instance %s did not return size"
2102 " information, ignoring", idx, instance.name)
2104 if not isinstance(size, (int, long)):
2105 self.LogWarning("Disk %d of instance %s did not return valid"
2106 " size information, ignoring", idx, instance.name)
2109 if size != disk.size:
2110 self.LogInfo("Disk %d of instance %s has mismatched size,"
2111 " correcting: recorded %d, actual %d", idx,
2112 instance.name, disk.size, size)
2114 self.cfg.Update(instance, feedback_fn)
2115 changed.append((instance.name, idx, size))
2116 if self._EnsureChildSizes(disk):
2117 self.cfg.Update(instance, feedback_fn)
2118 changed.append((instance.name, idx, disk.size))
2122 class LURenameCluster(LogicalUnit):
2123 """Rename the cluster.
2126 HPATH = "cluster-rename"
2127 HTYPE = constants.HTYPE_CLUSTER
2130 def BuildHooksEnv(self):
2135 "OP_TARGET": self.cfg.GetClusterName(),
2136 "NEW_NAME": self.op.name,
2138 mn = self.cfg.GetMasterNode()
2139 all_nodes = self.cfg.GetNodeList()
2140 return env, [mn], all_nodes
2142 def CheckPrereq(self):
2143 """Verify that the passed name is a valid one.
2146 hostname = utils.GetHostInfo(self.op.name)
2148 new_name = hostname.name
2149 self.ip = new_ip = hostname.ip
2150 old_name = self.cfg.GetClusterName()
2151 old_ip = self.cfg.GetMasterIP()
2152 if new_name == old_name and new_ip == old_ip:
2153 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2154 " cluster has changed",
2156 if new_ip != old_ip:
2157 if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2158 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2159 " reachable on the network. Aborting." %
2160 new_ip, errors.ECODE_NOTUNIQUE)
2162 self.op.name = new_name
2164 def Exec(self, feedback_fn):
2165 """Rename the cluster.
2168 clustername = self.op.name
2171 # shutdown the master IP
2172 master = self.cfg.GetMasterNode()
2173 result = self.rpc.call_node_stop_master(master, False)
2174 result.Raise("Could not disable the master role")
2177 cluster = self.cfg.GetClusterInfo()
2178 cluster.cluster_name = clustername
2179 cluster.master_ip = ip
2180 self.cfg.Update(cluster, feedback_fn)
2182 # update the known hosts file
2183 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2184 node_list = self.cfg.GetNodeList()
2186 node_list.remove(master)
2189 result = self.rpc.call_upload_file(node_list,
2190 constants.SSH_KNOWN_HOSTS_FILE)
2191 for to_node, to_result in result.iteritems():
2192 msg = to_result.fail_msg
2194 msg = ("Copy of file %s to node %s failed: %s" %
2195 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2196 self.proc.LogWarning(msg)
2199 result = self.rpc.call_node_start_master(master, False, False)
2200 msg = result.fail_msg
2202 self.LogWarning("Could not re-enable the master role on"
2203 " the master, please restart manually: %s", msg)
2206 def _RecursiveCheckIfLVMBased(disk):
2207 """Check if the given disk or its children are lvm-based.
2209 @type disk: L{objects.Disk}
2210 @param disk: the disk to check
2212 @return: boolean indicating whether a LD_LV dev_type was found or not
2216 for chdisk in disk.children:
2217 if _RecursiveCheckIfLVMBased(chdisk):
2219 return disk.dev_type == constants.LD_LV
2222 class LUSetClusterParams(LogicalUnit):
2223 """Change the parameters of the cluster.
2226 HPATH = "cluster-modify"
2227 HTYPE = constants.HTYPE_CLUSTER
2231 def CheckArguments(self):
2235 if not hasattr(self.op, "candidate_pool_size"):
2236 self.op.candidate_pool_size = None
2237 if self.op.candidate_pool_size is not None:
2239 self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2240 except (ValueError, TypeError), err:
2241 raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2242 str(err), errors.ECODE_INVAL)
2243 if self.op.candidate_pool_size < 1:
2244 raise errors.OpPrereqError("At least one master candidate needed",
2246 _CheckBooleanOpField(self.op, "maintain_node_health")
2248 def ExpandNames(self):
2249 # FIXME: in the future maybe other cluster params won't require checking on
2250 # all nodes to be modified.
2251 self.needed_locks = {
2252 locking.LEVEL_NODE: locking.ALL_SET,
2254 self.share_locks[locking.LEVEL_NODE] = 1
2256 def BuildHooksEnv(self):
2261 "OP_TARGET": self.cfg.GetClusterName(),
2262 "NEW_VG_NAME": self.op.vg_name,
2264 mn = self.cfg.GetMasterNode()
2265 return env, [mn], [mn]
2267 def CheckPrereq(self):
2268 """Check prerequisites.
2270 This checks whether the given params don't conflict and
2271 if the given volume group is valid.
2274 if self.op.vg_name is not None and not self.op.vg_name:
2275 instances = self.cfg.GetAllInstancesInfo().values()
2276 for inst in instances:
2277 for disk in inst.disks:
2278 if _RecursiveCheckIfLVMBased(disk):
2279 raise errors.OpPrereqError("Cannot disable lvm storage while"
2280 " lvm-based instances exist",
2283 node_list = self.acquired_locks[locking.LEVEL_NODE]
2285 # if vg_name not None, checks given volume group on all nodes
2287 vglist = self.rpc.call_vg_list(node_list)
2288 for node in node_list:
2289 msg = vglist[node].fail_msg
2291 # ignoring down node
2292 self.LogWarning("Error while gathering data on node %s"
2293 " (ignoring node): %s", node, msg)
2295 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2297 constants.MIN_VG_SIZE)
2299 raise errors.OpPrereqError("Error on node '%s': %s" %
2300 (node, vgstatus), errors.ECODE_ENVIRON)
2302 self.cluster = cluster = self.cfg.GetClusterInfo()
2303 # validate params changes
2304 if self.op.beparams:
2305 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2306 self.new_beparams = objects.FillDict(
2307 cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2309 if self.op.nicparams:
2310 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2311 self.new_nicparams = objects.FillDict(
2312 cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2313 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2316 # check all instances for consistency
2317 for instance in self.cfg.GetAllInstancesInfo().values():
2318 for nic_idx, nic in enumerate(instance.nics):
2319 params_copy = copy.deepcopy(nic.nicparams)
2320 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2322 # check parameter syntax
2324 objects.NIC.CheckParameterSyntax(params_filled)
2325 except errors.ConfigurationError, err:
2326 nic_errors.append("Instance %s, nic/%d: %s" %
2327 (instance.name, nic_idx, err))
2329 # if we're moving instances to routed, check that they have an ip
2330 target_mode = params_filled[constants.NIC_MODE]
2331 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2332 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2333 (instance.name, nic_idx))
2335 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2336 "\n".join(nic_errors))
2338 # hypervisor list/parameters
2339 self.new_hvparams = objects.FillDict(cluster.hvparams, {})
2340 if self.op.hvparams:
2341 if not isinstance(self.op.hvparams, dict):
2342 raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2344 for hv_name, hv_dict in self.op.hvparams.items():
2345 if hv_name not in self.new_hvparams:
2346 self.new_hvparams[hv_name] = hv_dict
2348 self.new_hvparams[hv_name].update(hv_dict)
2350 # os hypervisor parameters
2351 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2353 if not isinstance(self.op.os_hvp, dict):
2354 raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2356 for os_name, hvs in self.op.os_hvp.items():
2357 if not isinstance(hvs, dict):
2358 raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2359 " input"), errors.ECODE_INVAL)
2360 if os_name not in self.new_os_hvp:
2361 self.new_os_hvp[os_name] = hvs
2363 for hv_name, hv_dict in hvs.items():
2364 if hv_name not in self.new_os_hvp[os_name]:
2365 self.new_os_hvp[os_name][hv_name] = hv_dict
2367 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2369 if self.op.enabled_hypervisors is not None:
2370 self.hv_list = self.op.enabled_hypervisors
2371 if not self.hv_list:
2372 raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2373 " least one member",
2375 invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2377 raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2379 utils.CommaJoin(invalid_hvs),
2382 self.hv_list = cluster.enabled_hypervisors
2384 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2385 # either the enabled list has changed, or the parameters have, validate
2386 for hv_name, hv_params in self.new_hvparams.items():
2387 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2388 (self.op.enabled_hypervisors and
2389 hv_name in self.op.enabled_hypervisors)):
2390 # either this is a new hypervisor, or its parameters have changed
2391 hv_class = hypervisor.GetHypervisor(hv_name)
2392 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2393 hv_class.CheckParameterSyntax(hv_params)
2394 _CheckHVParams(self, node_list, hv_name, hv_params)
2397 # no need to check any newly-enabled hypervisors, since the
2398 # defaults have already been checked in the above code-block
2399 for os_name, os_hvp in self.new_os_hvp.items():
2400 for hv_name, hv_params in os_hvp.items():
2401 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2402 # we need to fill in the new os_hvp on top of the actual hv_p
2403 cluster_defaults = self.new_hvparams.get(hv_name, {})
2404 new_osp = objects.FillDict(cluster_defaults, hv_params)
2405 hv_class = hypervisor.GetHypervisor(hv_name)
2406 hv_class.CheckParameterSyntax(new_osp)
2407 _CheckHVParams(self, node_list, hv_name, new_osp)
2410 def Exec(self, feedback_fn):
2411 """Change the parameters of the cluster.
2414 if self.op.vg_name is not None:
2415 new_volume = self.op.vg_name
2418 if new_volume != self.cfg.GetVGName():
2419 self.cfg.SetVGName(new_volume)
2421 feedback_fn("Cluster LVM configuration already in desired"
2422 " state, not changing")
2423 if self.op.hvparams:
2424 self.cluster.hvparams = self.new_hvparams
2426 self.cluster.os_hvp = self.new_os_hvp
2427 if self.op.enabled_hypervisors is not None:
2428 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2429 if self.op.beparams:
2430 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2431 if self.op.nicparams:
2432 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2434 if self.op.candidate_pool_size is not None:
2435 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2436 # we need to update the pool size here, otherwise the save will fail
2437 _AdjustCandidatePool(self, [])
2439 if self.op.maintain_node_health is not None:
2440 self.cluster.maintain_node_health = self.op.maintain_node_health
2442 self.cfg.Update(self.cluster, feedback_fn)
2445 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2446 """Distribute additional files which are part of the cluster configuration.
2448 ConfigWriter takes care of distributing the config and ssconf files, but
2449 there are more files which should be distributed to all nodes. This function
2450 makes sure those are copied.
2452 @param lu: calling logical unit
2453 @param additional_nodes: list of nodes not in the config to distribute to
2456 # 1. Gather target nodes
2457 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2458 dist_nodes = lu.cfg.GetOnlineNodeList()
2459 if additional_nodes is not None:
2460 dist_nodes.extend(additional_nodes)
2461 if myself.name in dist_nodes:
2462 dist_nodes.remove(myself.name)
2464 # 2. Gather files to distribute
2465 dist_files = set([constants.ETC_HOSTS,
2466 constants.SSH_KNOWN_HOSTS_FILE,
2467 constants.RAPI_CERT_FILE,
2468 constants.RAPI_USERS_FILE,
2469 constants.CONFD_HMAC_KEY,
2472 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2473 for hv_name in enabled_hypervisors:
2474 hv_class = hypervisor.GetHypervisor(hv_name)
2475 dist_files.update(hv_class.GetAncillaryFiles())
2477 # 3. Perform the files upload
2478 for fname in dist_files:
2479 if os.path.exists(fname):
2480 result = lu.rpc.call_upload_file(dist_nodes, fname)
2481 for to_node, to_result in result.items():
2482 msg = to_result.fail_msg
2484 msg = ("Copy of file %s to node %s failed: %s" %
2485 (fname, to_node, msg))
2486 lu.proc.LogWarning(msg)
2489 class LURedistributeConfig(NoHooksLU):
2490 """Force the redistribution of cluster configuration.
2492 This is a very simple LU.
2498 def ExpandNames(self):
2499 self.needed_locks = {
2500 locking.LEVEL_NODE: locking.ALL_SET,
2502 self.share_locks[locking.LEVEL_NODE] = 1
2504 def CheckPrereq(self):
2505 """Check prerequisites.
2509 def Exec(self, feedback_fn):
2510 """Redistribute the configuration.
2513 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2514 _RedistributeAncillaryFiles(self)
2517 def _WaitForSync(lu, instance, oneshot=False):
2518 """Sleep and poll for an instance's disk to sync.
2521 if not instance.disks:
2525 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2527 node = instance.primary_node
2529 for dev in instance.disks:
2530 lu.cfg.SetDiskID(dev, node)
2532 # TODO: Convert to utils.Retry
2535 degr_retries = 10 # in seconds, as we sleep 1 second each time
2539 cumul_degraded = False
2540 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2541 msg = rstats.fail_msg
2543 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2546 raise errors.RemoteError("Can't contact node %s for mirror data,"
2547 " aborting." % node)
2550 rstats = rstats.payload
2552 for i, mstat in enumerate(rstats):
2554 lu.LogWarning("Can't compute data for node %s/%s",
2555 node, instance.disks[i].iv_name)
2558 cumul_degraded = (cumul_degraded or
2559 (mstat.is_degraded and mstat.sync_percent is None))
2560 if mstat.sync_percent is not None:
2562 if mstat.estimated_time is not None:
2563 rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2564 max_time = mstat.estimated_time
2566 rem_time = "no time estimate"
2567 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2568 (instance.disks[i].iv_name, mstat.sync_percent,
2571 # if we're done but degraded, let's do a few small retries, to
2572 # make sure we see a stable and not transient situation; therefore
2573 # we force restart of the loop
2574 if (done or oneshot) and cumul_degraded and degr_retries > 0:
2575 logging.info("Degraded disks found, %d retries left", degr_retries)
2583 time.sleep(min(60, max_time))
2586 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2587 return not cumul_degraded
2590 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2591 """Check that mirrors are not degraded.
2593 The ldisk parameter, if True, will change the test from the
2594 is_degraded attribute (which represents overall non-ok status for
2595 the device(s)) to the ldisk (representing the local storage status).
2598 lu.cfg.SetDiskID(dev, node)
2602 if on_primary or dev.AssembleOnSecondary():
2603 rstats = lu.rpc.call_blockdev_find(node, dev)
2604 msg = rstats.fail_msg
2606 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2608 elif not rstats.payload:
2609 lu.LogWarning("Can't find disk on node %s", node)
2613 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2615 result = result and not rstats.payload.is_degraded
2618 for child in dev.children:
2619 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2624 class LUDiagnoseOS(NoHooksLU):
2625 """Logical unit for OS diagnose/query.
2628 _OP_REQP = ["output_fields", "names"]
2630 _FIELDS_STATIC = utils.FieldSet()
2631 _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2632 # Fields that need calculation of global os validity
2633 _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2635 def ExpandNames(self):
2637 raise errors.OpPrereqError("Selective OS query not supported",
2640 _CheckOutputFields(static=self._FIELDS_STATIC,
2641 dynamic=self._FIELDS_DYNAMIC,
2642 selected=self.op.output_fields)
2644 # Lock all nodes, in shared mode
2645 # Temporary removal of locks, should be reverted later
2646 # TODO: reintroduce locks when they are lighter-weight
2647 self.needed_locks = {}
2648 #self.share_locks[locking.LEVEL_NODE] = 1
2649 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2651 def CheckPrereq(self):
2652 """Check prerequisites.
2657 def _DiagnoseByOS(rlist):
2658 """Remaps a per-node return list into an a per-os per-node dictionary
2660 @param rlist: a map with node names as keys and OS objects as values
2663 @return: a dictionary with osnames as keys and as value another map, with
2664 nodes as keys and tuples of (path, status, diagnose) as values, eg::
2666 {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2667 (/srv/..., False, "invalid api")],
2668 "node2": [(/srv/..., True, "")]}
2673 # we build here the list of nodes that didn't fail the RPC (at RPC
2674 # level), so that nodes with a non-responding node daemon don't
2675 # make all OSes invalid
2676 good_nodes = [node_name for node_name in rlist
2677 if not rlist[node_name].fail_msg]
2678 for node_name, nr in rlist.items():
2679 if nr.fail_msg or not nr.payload:
2681 for name, path, status, diagnose, variants in nr.payload:
2682 if name not in all_os:
2683 # build a list of nodes for this os containing empty lists
2684 # for each node in node_list
2686 for nname in good_nodes:
2687 all_os[name][nname] = []
2688 all_os[name][node_name].append((path, status, diagnose, variants))
2691 def Exec(self, feedback_fn):
2692 """Compute the list of OSes.
2695 valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2696 node_data = self.rpc.call_os_diagnose(valid_nodes)
2697 pol = self._DiagnoseByOS(node_data)
2699 calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2700 calc_variants = "variants" in self.op.output_fields
2702 for os_name, os_data in pol.items():
2707 for osl in os_data.values():
2708 valid = valid and osl and osl[0][1]
2713 node_variants = osl[0][3]
2714 if variants is None:
2715 variants = node_variants
2717 variants = [v for v in variants if v in node_variants]
2719 for field in self.op.output_fields:
2722 elif field == "valid":
2724 elif field == "node_status":
2725 # this is just a copy of the dict
2727 for node_name, nos_list in os_data.items():
2728 val[node_name] = nos_list
2729 elif field == "variants":
2732 raise errors.ParameterError(field)
2739 class LURemoveNode(LogicalUnit):
2740 """Logical unit for removing a node.
2743 HPATH = "node-remove"
2744 HTYPE = constants.HTYPE_NODE
2745 _OP_REQP = ["node_name"]
2747 def BuildHooksEnv(self):
2750 This doesn't run on the target node in the pre phase as a failed
2751 node would then be impossible to remove.
2755 "OP_TARGET": self.op.node_name,
2756 "NODE_NAME": self.op.node_name,
2758 all_nodes = self.cfg.GetNodeList()
2760 all_nodes.remove(self.op.node_name)
2762 logging.warning("Node %s which is about to be removed not found"
2763 " in the all nodes list", self.op.node_name)
2764 return env, all_nodes, all_nodes
2766 def CheckPrereq(self):
2767 """Check prerequisites.
2770 - the node exists in the configuration
2771 - it does not have primary or secondary instances
2772 - it's not the master
2774 Any errors are signaled by raising errors.OpPrereqError.
2777 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2778 node = self.cfg.GetNodeInfo(self.op.node_name)
2779 assert node is not None
2781 instance_list = self.cfg.GetInstanceList()
2783 masternode = self.cfg.GetMasterNode()
2784 if node.name == masternode:
2785 raise errors.OpPrereqError("Node is the master node,"
2786 " you need to failover first.",
2789 for instance_name in instance_list:
2790 instance = self.cfg.GetInstanceInfo(instance_name)
2791 if node.name in instance.all_nodes:
2792 raise errors.OpPrereqError("Instance %s is still running on the node,"
2793 " please remove first." % instance_name,
2795 self.op.node_name = node.name
2798 def Exec(self, feedback_fn):
2799 """Removes the node from the cluster.
2803 logging.info("Stopping the node daemon and removing configs from node %s",
2806 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2808 # Promote nodes to master candidate as needed
2809 _AdjustCandidatePool(self, exceptions=[node.name])
2810 self.context.RemoveNode(node.name)
2812 # Run post hooks on the node before it's removed
2813 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2815 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2817 # pylint: disable-msg=W0702
2818 self.LogWarning("Errors occurred running hooks on %s" % node.name)
2820 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2821 msg = result.fail_msg
2823 self.LogWarning("Errors encountered on the remote node while leaving"
2824 " the cluster: %s", msg)
2827 class LUQueryNodes(NoHooksLU):
2828 """Logical unit for querying nodes.
2831 # pylint: disable-msg=W0142
2832 _OP_REQP = ["output_fields", "names", "use_locking"]
2835 _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2836 "master_candidate", "offline", "drained"]
2838 _FIELDS_DYNAMIC = utils.FieldSet(
2840 "mtotal", "mnode", "mfree",
2842 "ctotal", "cnodes", "csockets",
2845 _FIELDS_STATIC = utils.FieldSet(*[
2846 "pinst_cnt", "sinst_cnt",
2847 "pinst_list", "sinst_list",
2848 "pip", "sip", "tags",
2850 "role"] + _SIMPLE_FIELDS
2853 def ExpandNames(self):
2854 _CheckOutputFields(static=self._FIELDS_STATIC,
2855 dynamic=self._FIELDS_DYNAMIC,
2856 selected=self.op.output_fields)
2858 self.needed_locks = {}
2859 self.share_locks[locking.LEVEL_NODE] = 1
2862 self.wanted = _GetWantedNodes(self, self.op.names)
2864 self.wanted = locking.ALL_SET
2866 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2867 self.do_locking = self.do_node_query and self.op.use_locking
2869 # if we don't request only static fields, we need to lock the nodes
2870 self.needed_locks[locking.LEVEL_NODE] = self.wanted
2872 def CheckPrereq(self):
2873 """Check prerequisites.
2876 # The validation of the node list is done in the _GetWantedNodes,
2877 # if non empty, and if empty, there's no validation to do
2880 def Exec(self, feedback_fn):
2881 """Computes the list of nodes and their attributes.
2884 all_info = self.cfg.GetAllNodesInfo()
2886 nodenames = self.acquired_locks[locking.LEVEL_NODE]
2887 elif self.wanted != locking.ALL_SET:
2888 nodenames = self.wanted
2889 missing = set(nodenames).difference(all_info.keys())
2891 raise errors.OpExecError(
2892 "Some nodes were removed before retrieving their data: %s" % missing)
2894 nodenames = all_info.keys()
2896 nodenames = utils.NiceSort(nodenames)
2897 nodelist = [all_info[name] for name in nodenames]
2899 # begin data gathering
2901 if self.do_node_query:
2903 node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2904 self.cfg.GetHypervisorType())
2905 for name in nodenames:
2906 nodeinfo = node_data[name]
2907 if not nodeinfo.fail_msg and nodeinfo.payload:
2908 nodeinfo = nodeinfo.payload
2909 fn = utils.TryConvert
2911 "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2912 "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2913 "mfree": fn(int, nodeinfo.get('memory_free', None)),
2914 "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2915 "dfree": fn(int, nodeinfo.get('vg_free', None)),
2916 "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2917 "bootid": nodeinfo.get('bootid', None),
2918 "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2919 "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2922 live_data[name] = {}
2924 live_data = dict.fromkeys(nodenames, {})
2926 node_to_primary = dict([(name, set()) for name in nodenames])
2927 node_to_secondary = dict([(name, set()) for name in nodenames])
2929 inst_fields = frozenset(("pinst_cnt", "pinst_list",
2930 "sinst_cnt", "sinst_list"))
2931 if inst_fields & frozenset(self.op.output_fields):
2932 inst_data = self.cfg.GetAllInstancesInfo()
2934 for inst in inst_data.values():
2935 if inst.primary_node in node_to_primary:
2936 node_to_primary[inst.primary_node].add(inst.name)
2937 for secnode in inst.secondary_nodes:
2938 if secnode in node_to_secondary:
2939 node_to_secondary[secnode].add(inst.name)
2941 master_node = self.cfg.GetMasterNode()
2943 # end data gathering
2946 for node in nodelist:
2948 for field in self.op.output_fields:
2949 if field in self._SIMPLE_FIELDS:
2950 val = getattr(node, field)
2951 elif field == "pinst_list":
2952 val = list(node_to_primary[node.name])
2953 elif field == "sinst_list":
2954 val = list(node_to_secondary[node.name])
2955 elif field == "pinst_cnt":
2956 val = len(node_to_primary[node.name])
2957 elif field == "sinst_cnt":
2958 val = len(node_to_secondary[node.name])
2959 elif field == "pip":
2960 val = node.primary_ip
2961 elif field == "sip":
2962 val = node.secondary_ip
2963 elif field == "tags":
2964 val = list(node.GetTags())
2965 elif field == "master":
2966 val = node.name == master_node
2967 elif self._FIELDS_DYNAMIC.Matches(field):
2968 val = live_data[node.name].get(field, None)
2969 elif field == "role":
2970 if node.name == master_node:
2972 elif node.master_candidate:
2981 raise errors.ParameterError(field)
2982 node_output.append(val)
2983 output.append(node_output)
2988 class LUQueryNodeVolumes(NoHooksLU):
2989 """Logical unit for getting volumes on node(s).
2992 _OP_REQP = ["nodes", "output_fields"]
2994 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2995 _FIELDS_STATIC = utils.FieldSet("node")
2997 def ExpandNames(self):
2998 _CheckOutputFields(static=self._FIELDS_STATIC,
2999 dynamic=self._FIELDS_DYNAMIC,
3000 selected=self.op.output_fields)
3002 self.needed_locks = {}
3003 self.share_locks[locking.LEVEL_NODE] = 1
3004 if not self.op.nodes:
3005 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3007 self.needed_locks[locking.LEVEL_NODE] = \
3008 _GetWantedNodes(self, self.op.nodes)
3010 def CheckPrereq(self):
3011 """Check prerequisites.
3013 This checks that the fields required are valid output fields.
3016 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3018 def Exec(self, feedback_fn):
3019 """Computes the list of nodes and their attributes.
3022 nodenames = self.nodes
3023 volumes = self.rpc.call_node_volumes(nodenames)
3025 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3026 in self.cfg.GetInstanceList()]
3028 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3031 for node in nodenames:
3032 nresult = volumes[node]
3035 msg = nresult.fail_msg
3037 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3040 node_vols = nresult.payload[:]
3041 node_vols.sort(key=lambda vol: vol['dev'])
3043 for vol in node_vols:
3045 for field in self.op.output_fields:
3048 elif field == "phys":
3052 elif field == "name":
3054 elif field == "size":
3055 val = int(float(vol['size']))
3056 elif field == "instance":
3058 if node not in lv_by_node[inst]:
3060 if vol['name'] in lv_by_node[inst][node]:
3066 raise errors.ParameterError(field)
3067 node_output.append(str(val))
3069 output.append(node_output)
3074 class LUQueryNodeStorage(NoHooksLU):
3075 """Logical unit for getting information on storage units on node(s).
3078 _OP_REQP = ["nodes", "storage_type", "output_fields"]
3080 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3082 def ExpandNames(self):
3083 storage_type = self.op.storage_type
3085 if storage_type not in constants.VALID_STORAGE_TYPES:
3086 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
3089 _CheckOutputFields(static=self._FIELDS_STATIC,
3090 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3091 selected=self.op.output_fields)
3093 self.needed_locks = {}
3094 self.share_locks[locking.LEVEL_NODE] = 1
3097 self.needed_locks[locking.LEVEL_NODE] = \
3098 _GetWantedNodes(self, self.op.nodes)
3100 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3102 def CheckPrereq(self):
3103 """Check prerequisites.
3105 This checks that the fields required are valid output fields.
3108 self.op.name = getattr(self.op, "name", None)
3110 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3112 def Exec(self, feedback_fn):
3113 """Computes the list of nodes and their attributes.
3116 # Always get name to sort by
3117 if constants.SF_NAME in self.op.output_fields:
3118 fields = self.op.output_fields[:]
3120 fields = [constants.SF_NAME] + self.op.output_fields
3122 # Never ask for node or type as it's only known to the LU
3123 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3124 while extra in fields:
3125 fields.remove(extra)
3127 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3128 name_idx = field_idx[constants.SF_NAME]
3130 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3131 data = self.rpc.call_storage_list(self.nodes,
3132 self.op.storage_type, st_args,
3133 self.op.name, fields)
3137 for node in utils.NiceSort(self.nodes):
3138 nresult = data[node]
3142 msg = nresult.fail_msg
3144 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3147 rows = dict([(row[name_idx], row) for row in nresult.payload])
3149 for name in utils.NiceSort(rows.keys()):
3154 for field in self.op.output_fields:
3155 if field == constants.SF_NODE:
3157 elif field == constants.SF_TYPE:
3158 val = self.op.storage_type
3159 elif field in field_idx:
3160 val = row[field_idx[field]]
3162 raise errors.ParameterError(field)
3171 class LUModifyNodeStorage(NoHooksLU):
3172 """Logical unit for modifying a storage volume on a node.
3175 _OP_REQP = ["node_name", "storage_type", "name", "changes"]
3178 def CheckArguments(self):
3179 self.opnode_name = _ExpandNodeName(self.cfg, self.op.node_name)
3181 storage_type = self.op.storage_type
3182 if storage_type not in constants.VALID_STORAGE_TYPES:
3183 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
3186 def ExpandNames(self):
3187 self.needed_locks = {
3188 locking.LEVEL_NODE: self.op.node_name,
3191 def CheckPrereq(self):
3192 """Check prerequisites.
3195 storage_type = self.op.storage_type
3198 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3200 raise errors.OpPrereqError("Storage units of type '%s' can not be"
3201 " modified" % storage_type,
3204 diff = set(self.op.changes.keys()) - modifiable
3206 raise errors.OpPrereqError("The following fields can not be modified for"
3207 " storage units of type '%s': %r" %
3208 (storage_type, list(diff)),
3211 def Exec(self, feedback_fn):
3212 """Computes the list of nodes and their attributes.
3215 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3216 result = self.rpc.call_storage_modify(self.op.node_name,
3217 self.op.storage_type, st_args,
3218 self.op.name, self.op.changes)
3219 result.Raise("Failed to modify storage unit '%s' on %s" %
3220 (self.op.name, self.op.node_name))
3223 class LUAddNode(LogicalUnit):
3224 """Logical unit for adding node to the cluster.
3228 HTYPE = constants.HTYPE_NODE
3229 _OP_REQP = ["node_name"]
3231 def CheckArguments(self):
3232 # validate/normalize the node name
3233 self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3235 def BuildHooksEnv(self):
3238 This will run on all nodes before, and on all nodes + the new node after.
3242 "OP_TARGET": self.op.node_name,
3243 "NODE_NAME": self.op.node_name,
3244 "NODE_PIP": self.op.primary_ip,
3245 "NODE_SIP": self.op.secondary_ip,
3247 nodes_0 = self.cfg.GetNodeList()
3248 nodes_1 = nodes_0 + [self.op.node_name, ]
3249 return env, nodes_0, nodes_1
3251 def CheckPrereq(self):
3252 """Check prerequisites.
3255 - the new node is not already in the config
3257 - its parameters (single/dual homed) matches the cluster
3259 Any errors are signaled by raising errors.OpPrereqError.
3262 node_name = self.op.node_name
3265 dns_data = utils.GetHostInfo(node_name)
3267 node = dns_data.name
3268 primary_ip = self.op.primary_ip = dns_data.ip
3269 secondary_ip = getattr(self.op, "secondary_ip", None)
3270 if secondary_ip is None:
3271 secondary_ip = primary_ip
3272 if not utils.IsValidIP(secondary_ip):
3273 raise errors.OpPrereqError("Invalid secondary IP given",
3275 self.op.secondary_ip = secondary_ip
3277 node_list = cfg.GetNodeList()
3278 if not self.op.readd and node in node_list:
3279 raise errors.OpPrereqError("Node %s is already in the configuration" %
3280 node, errors.ECODE_EXISTS)
3281 elif self.op.readd and node not in node_list:
3282 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3285 for existing_node_name in node_list:
3286 existing_node = cfg.GetNodeInfo(existing_node_name)
3288 if self.op.readd and node == existing_node_name:
3289 if (existing_node.primary_ip != primary_ip or
3290 existing_node.secondary_ip != secondary_ip):
3291 raise errors.OpPrereqError("Readded node doesn't have the same IP"
3292 " address configuration as before",
3296 if (existing_node.primary_ip == primary_ip or
3297 existing_node.secondary_ip == primary_ip or
3298 existing_node.primary_ip == secondary_ip or
3299 existing_node.secondary_ip == secondary_ip):
3300 raise errors.OpPrereqError("New node ip address(es) conflict with"
3301 " existing node %s" % existing_node.name,
3302 errors.ECODE_NOTUNIQUE)
3304 # check that the type of the node (single versus dual homed) is the
3305 # same as for the master
3306 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3307 master_singlehomed = myself.secondary_ip == myself.primary_ip
3308 newbie_singlehomed = secondary_ip == primary_ip
3309 if master_singlehomed != newbie_singlehomed:
3310 if master_singlehomed:
3311 raise errors.OpPrereqError("The master has no private ip but the"
3312 " new node has one",
3315 raise errors.OpPrereqError("The master has a private ip but the"
3316 " new node doesn't have one",
3319 # checks reachability
3320 if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3321 raise errors.OpPrereqError("Node not reachable by ping",
3322 errors.ECODE_ENVIRON)
3324 if not newbie_singlehomed:
3325 # check reachability from my secondary ip to newbie's secondary ip
3326 if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3327 source=myself.secondary_ip):
3328 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3329 " based ping to noded port",
3330 errors.ECODE_ENVIRON)
3337 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3340 self.new_node = self.cfg.GetNodeInfo(node)
3341 assert self.new_node is not None, "Can't retrieve locked node %s" % node
3343 self.new_node = objects.Node(name=node,
3344 primary_ip=primary_ip,
3345 secondary_ip=secondary_ip,
3346 master_candidate=self.master_candidate,
3347 offline=False, drained=False)
3349 def Exec(self, feedback_fn):
3350 """Adds the new node to the cluster.
3353 new_node = self.new_node
3354 node = new_node.name
3356 # for re-adds, reset the offline/drained/master-candidate flags;
3357 # we need to reset here, otherwise offline would prevent RPC calls
3358 # later in the procedure; this also means that if the re-add
3359 # fails, we are left with a non-offlined, broken node
3361 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3362 self.LogInfo("Readding a node, the offline/drained flags were reset")
3363 # if we demote the node, we do cleanup later in the procedure
3364 new_node.master_candidate = self.master_candidate
3366 # notify the user about any possible mc promotion
3367 if new_node.master_candidate:
3368 self.LogInfo("Node will be a master candidate")
3370 # check connectivity
3371 result = self.rpc.call_version([node])[node]
3372 result.Raise("Can't get version information from node %s" % node)
3373 if constants.PROTOCOL_VERSION == result.payload:
3374 logging.info("Communication to node %s fine, sw version %s match",
3375 node, result.payload)
3377 raise errors.OpExecError("Version mismatch master version %s,"
3378 " node version %s" %
3379 (constants.PROTOCOL_VERSION, result.payload))
3382 if self.cfg.GetClusterInfo().modify_ssh_setup:
3383 logging.info("Copy ssh key to node %s", node)
3384 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3386 keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3387 constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3391 keyarray.append(utils.ReadFile(i))
3393 result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3394 keyarray[2], keyarray[3], keyarray[4],
3396 result.Raise("Cannot transfer ssh keys to the new node")
3398 # Add node to our /etc/hosts, and add key to known_hosts
3399 if self.cfg.GetClusterInfo().modify_etc_hosts:
3400 utils.AddHostToEtcHosts(new_node.name)
3402 if new_node.secondary_ip != new_node.primary_ip:
3403 result = self.rpc.call_node_has_ip_address(new_node.name,
3404 new_node.secondary_ip)
3405 result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3406 prereq=True, ecode=errors.ECODE_ENVIRON)
3407 if not result.payload:
3408 raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3409 " you gave (%s). Please fix and re-run this"
3410 " command." % new_node.secondary_ip)
3412 node_verify_list = [self.cfg.GetMasterNode()]
3413 node_verify_param = {
3414 constants.NV_NODELIST: [node],
3415 # TODO: do a node-net-test as well?
3418 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3419 self.cfg.GetClusterName())
3420 for verifier in node_verify_list:
3421 result[verifier].Raise("Cannot communicate with node %s" % verifier)
3422 nl_payload = result[verifier].payload[constants.NV_NODELIST]
3424 for failed in nl_payload:
3425 feedback_fn("ssh/hostname verification failed"
3426 " (checking from %s): %s" %
3427 (verifier, nl_payload[failed]))
3428 raise errors.OpExecError("ssh/hostname verification failed.")
3431 _RedistributeAncillaryFiles(self)
3432 self.context.ReaddNode(new_node)
3433 # make sure we redistribute the config
3434 self.cfg.Update(new_node, feedback_fn)
3435 # and make sure the new node will not have old files around
3436 if not new_node.master_candidate:
3437 result = self.rpc.call_node_demote_from_mc(new_node.name)
3438 msg = result.fail_msg
3440 self.LogWarning("Node failed to demote itself from master"
3441 " candidate status: %s" % msg)
3443 _RedistributeAncillaryFiles(self, additional_nodes=[node])
3444 self.context.AddNode(new_node, self.proc.GetECId())
3447 class LUSetNodeParams(LogicalUnit):
3448 """Modifies the parameters of a node.
3451 HPATH = "node-modify"
3452 HTYPE = constants.HTYPE_NODE
3453 _OP_REQP = ["node_name"]
3456 def CheckArguments(self):
3457 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3458 _CheckBooleanOpField(self.op, 'master_candidate')
3459 _CheckBooleanOpField(self.op, 'offline')
3460 _CheckBooleanOpField(self.op, 'drained')
3461 _CheckBooleanOpField(self.op, 'auto_promote')
3462 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3463 if all_mods.count(None) == 3:
3464 raise errors.OpPrereqError("Please pass at least one modification",
3466 if all_mods.count(True) > 1:
3467 raise errors.OpPrereqError("Can't set the node into more than one"
3468 " state at the same time",
3471 # Boolean value that tells us whether we're offlining or draining the node
3472 self.offline_or_drain = (self.op.offline == True or
3473 self.op.drained == True)
3474 self.deoffline_or_drain = (self.op.offline == False or
3475 self.op.drained == False)
3476 self.might_demote = (self.op.master_candidate == False or
3477 self.offline_or_drain)
3479 self.lock_all = self.op.auto_promote and self.might_demote
3482 def ExpandNames(self):
3484 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3486 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3488 def BuildHooksEnv(self):
3491 This runs on the master node.
3495 "OP_TARGET": self.op.node_name,
3496 "MASTER_CANDIDATE": str(self.op.master_candidate),
3497 "OFFLINE": str(self.op.offline),
3498 "DRAINED": str(self.op.drained),
3500 nl = [self.cfg.GetMasterNode(),
3504 def CheckPrereq(self):
3505 """Check prerequisites.
3507 This only checks the instance list against the existing names.
3510 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3512 if (self.op.master_candidate is not None or
3513 self.op.drained is not None or
3514 self.op.offline is not None):
3515 # we can't change the master's node flags
3516 if self.op.node_name == self.cfg.GetMasterNode():
3517 raise errors.OpPrereqError("The master role can be changed"
3518 " only via masterfailover",
3522 if node.master_candidate and self.might_demote and not self.lock_all:
3523 assert not self.op.auto_promote, "auto-promote set but lock_all not"
3524 # check if after removing the current node, we're missing master
3526 (mc_remaining, mc_should, _) = \
3527 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3528 if mc_remaining < mc_should:
3529 raise errors.OpPrereqError("Not enough master candidates, please"
3530 " pass auto_promote to allow promotion",
3533 if (self.op.master_candidate == True and
3534 ((node.offline and not self.op.offline == False) or
3535 (node.drained and not self.op.drained == False))):
3536 raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3537 " to master_candidate" % node.name,
3540 # If we're being deofflined/drained, we'll MC ourself if needed
3541 if (self.deoffline_or_drain and not self.offline_or_drain and not
3542 self.op.master_candidate == True and not node.master_candidate):
3543 self.op.master_candidate = _DecideSelfPromotion(self)
3544 if self.op.master_candidate:
3545 self.LogInfo("Autopromoting node to master candidate")
3549 def Exec(self, feedback_fn):
3558 if self.op.offline is not None:
3559 node.offline = self.op.offline
3560 result.append(("offline", str(self.op.offline)))
3561 if self.op.offline == True:
3562 if node.master_candidate:
3563 node.master_candidate = False
3565 result.append(("master_candidate", "auto-demotion due to offline"))
3567 node.drained = False
3568 result.append(("drained", "clear drained status due to offline"))
3570 if self.op.master_candidate is not None:
3571 node.master_candidate = self.op.master_candidate
3573 result.append(("master_candidate", str(self.op.master_candidate)))
3574 if self.op.master_candidate == False:
3575 rrc = self.rpc.call_node_demote_from_mc(node.name)
3578 self.LogWarning("Node failed to demote itself: %s" % msg)
3580 if self.op.drained is not None:
3581 node.drained = self.op.drained
3582 result.append(("drained", str(self.op.drained)))
3583 if self.op.drained == True:
3584 if node.master_candidate:
3585 node.master_candidate = False
3587 result.append(("master_candidate", "auto-demotion due to drain"))
3588 rrc = self.rpc.call_node_demote_from_mc(node.name)
3591 self.LogWarning("Node failed to demote itself: %s" % msg)
3593 node.offline = False
3594 result.append(("offline", "clear offline status due to drain"))
3596 # we locked all nodes, we adjust the CP before updating this node
3598 _AdjustCandidatePool(self, [node.name])
3600 # this will trigger configuration file update, if needed
3601 self.cfg.Update(node, feedback_fn)
3603 # this will trigger job queue propagation or cleanup
3605 self.context.ReaddNode(node)
3610 class LUPowercycleNode(NoHooksLU):
3611 """Powercycles a node.
3614 _OP_REQP = ["node_name", "force"]
3617 def CheckArguments(self):
3618 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3619 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3620 raise errors.OpPrereqError("The node is the master and the force"
3621 " parameter was not set",
3624 def ExpandNames(self):
3625 """Locking for PowercycleNode.
3627 This is a last-resort option and shouldn't block on other
3628 jobs. Therefore, we grab no locks.
3631 self.needed_locks = {}
3633 def CheckPrereq(self):
3634 """Check prerequisites.
3636 This LU has no prereqs.
3641 def Exec(self, feedback_fn):
3645 result = self.rpc.call_node_powercycle(self.op.node_name,
3646 self.cfg.GetHypervisorType())
3647 result.Raise("Failed to schedule the reboot")
3648 return result.payload
3651 class LUQueryClusterInfo(NoHooksLU):
3652 """Query cluster configuration.
3658 def ExpandNames(self):
3659 self.needed_locks = {}
3661 def CheckPrereq(self):
3662 """No prerequsites needed for this LU.
3667 def Exec(self, feedback_fn):
3668 """Return cluster config.
3671 cluster = self.cfg.GetClusterInfo()
3674 # Filter just for enabled hypervisors
3675 for os_name, hv_dict in cluster.os_hvp.items():
3676 os_hvp[os_name] = {}
3677 for hv_name, hv_params in hv_dict.items():
3678 if hv_name in cluster.enabled_hypervisors:
3679 os_hvp[os_name][hv_name] = hv_params
3682 "software_version": constants.RELEASE_VERSION,
3683 "protocol_version": constants.PROTOCOL_VERSION,
3684 "config_version": constants.CONFIG_VERSION,
3685 "os_api_version": max(constants.OS_API_VERSIONS),
3686 "export_version": constants.EXPORT_VERSION,
3687 "architecture": (platform.architecture()[0], platform.machine()),
3688 "name": cluster.cluster_name,
3689 "master": cluster.master_node,
3690 "default_hypervisor": cluster.enabled_hypervisors[0],
3691 "enabled_hypervisors": cluster.enabled_hypervisors,
3692 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3693 for hypervisor_name in cluster.enabled_hypervisors]),
3695 "beparams": cluster.beparams,
3696 "nicparams": cluster.nicparams,
3697 "candidate_pool_size": cluster.candidate_pool_size,
3698 "master_netdev": cluster.master_netdev,
3699 "volume_group_name": cluster.volume_group_name,
3700 "file_storage_dir": cluster.file_storage_dir,
3701 "maintain_node_health": cluster.maintain_node_health,
3702 "ctime": cluster.ctime,
3703 "mtime": cluster.mtime,
3704 "uuid": cluster.uuid,
3705 "tags": list(cluster.GetTags()),
3711 class LUQueryConfigValues(NoHooksLU):
3712 """Return configuration values.
3717 _FIELDS_DYNAMIC = utils.FieldSet()
3718 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3721 def ExpandNames(self):
3722 self.needed_locks = {}
3724 _CheckOutputFields(static=self._FIELDS_STATIC,
3725 dynamic=self._FIELDS_DYNAMIC,
3726 selected=self.op.output_fields)
3728 def CheckPrereq(self):
3729 """No prerequisites.
3734 def Exec(self, feedback_fn):
3735 """Dump a representation of the cluster config to the standard output.
3739 for field in self.op.output_fields:
3740 if field == "cluster_name":
3741 entry = self.cfg.GetClusterName()
3742 elif field == "master_node":
3743 entry = self.cfg.GetMasterNode()
3744 elif field == "drain_flag":
3745 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3746 elif field == "watcher_pause":
3747 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3749 raise errors.ParameterError(field)
3750 values.append(entry)
3754 class LUActivateInstanceDisks(NoHooksLU):
3755 """Bring up an instance's disks.
3758 _OP_REQP = ["instance_name"]
3761 def ExpandNames(self):
3762 self._ExpandAndLockInstance()
3763 self.needed_locks[locking.LEVEL_NODE] = []
3764 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3766 def DeclareLocks(self, level):
3767 if level == locking.LEVEL_NODE:
3768 self._LockInstancesNodes()
3770 def CheckPrereq(self):
3771 """Check prerequisites.
3773 This checks that the instance is in the cluster.
3776 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3777 assert self.instance is not None, \
3778 "Cannot retrieve locked instance %s" % self.op.instance_name
3779 _CheckNodeOnline(self, self.instance.primary_node)
3780 if not hasattr(self.op, "ignore_size"):
3781 self.op.ignore_size = False
3783 def Exec(self, feedback_fn):
3784 """Activate the disks.
3787 disks_ok, disks_info = \
3788 _AssembleInstanceDisks(self, self.instance,
3789 ignore_size=self.op.ignore_size)
3791 raise errors.OpExecError("Cannot activate block devices")
3796 def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3798 """Prepare the block devices for an instance.
3800 This sets up the block devices on all nodes.
3802 @type lu: L{LogicalUnit}
3803 @param lu: the logical unit on whose behalf we execute
3804 @type instance: L{objects.Instance}
3805 @param instance: the instance for whose disks we assemble
3806 @type ignore_secondaries: boolean
3807 @param ignore_secondaries: if true, errors on secondary nodes
3808 won't result in an error return from the function
3809 @type ignore_size: boolean
3810 @param ignore_size: if true, the current known size of the disk
3811 will not be used during the disk activation, useful for cases
3812 when the size is wrong
3813 @return: False if the operation failed, otherwise a list of
3814 (host, instance_visible_name, node_visible_name)
3815 with the mapping from node devices to instance devices
3820 iname = instance.name
3821 # With the two passes mechanism we try to reduce the window of
3822 # opportunity for the race condition of switching DRBD to primary
3823 # before handshaking occured, but we do not eliminate it
3825 # The proper fix would be to wait (with some limits) until the
3826 # connection has been made and drbd transitions from WFConnection
3827 # into any other network-connected state (Connected, SyncTarget,
3830 # 1st pass, assemble on all nodes in secondary mode
3831 for inst_disk in instance.disks:
3832 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3834 node_disk = node_disk.Copy()
3835 node_disk.UnsetSize()
3836 lu.cfg.SetDiskID(node_disk, node)
3837 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3838 msg = result.fail_msg
3840 lu.proc.LogWarning("Could not prepare block device %s on node %s"
3841 " (is_primary=False, pass=1): %s",
3842 inst_disk.iv_name, node, msg)
3843 if not ignore_secondaries:
3846 # FIXME: race condition on drbd migration to primary
3848 # 2nd pass, do only the primary node
3849 for inst_disk in instance.disks:
3852 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3853 if node != instance.primary_node:
3856 node_disk = node_disk.Copy()
3857 node_disk.UnsetSize()
3858 lu.cfg.SetDiskID(node_disk, node)
3859 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3860 msg = result.fail_msg
3862 lu.proc.LogWarning("Could not prepare block device %s on node %s"
3863 " (is_primary=True, pass=2): %s",
3864 inst_disk.iv_name, node, msg)
3867 dev_path = result.payload
3869 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3871 # leave the disks configured for the primary node
3872 # this is a workaround that would be fixed better by
3873 # improving the logical/physical id handling
3874 for disk in instance.disks:
3875 lu.cfg.SetDiskID(disk, instance.primary_node)
3877 return disks_ok, device_info
3880 def _StartInstanceDisks(lu, instance, force):
3881 """Start the disks of an instance.
3884 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3885 ignore_secondaries=force)
3887 _ShutdownInstanceDisks(lu, instance)
3888 if force is not None and not force:
3889 lu.proc.LogWarning("", hint="If the message above refers to a"
3891 " you can retry the operation using '--force'.")
3892 raise errors.OpExecError("Disk consistency error")
3895 class LUDeactivateInstanceDisks(NoHooksLU):
3896 """Shutdown an instance's disks.
3899 _OP_REQP = ["instance_name"]
3902 def ExpandNames(self):
3903 self._ExpandAndLockInstance()
3904 self.needed_locks[locking.LEVEL_NODE] = []
3905 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3907 def DeclareLocks(self, level):
3908 if level == locking.LEVEL_NODE:
3909 self._LockInstancesNodes()
3911 def CheckPrereq(self):
3912 """Check prerequisites.
3914 This checks that the instance is in the cluster.
3917 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3918 assert self.instance is not None, \
3919 "Cannot retrieve locked instance %s" % self.op.instance_name
3921 def Exec(self, feedback_fn):
3922 """Deactivate the disks
3925 instance = self.instance
3926 _SafeShutdownInstanceDisks(self, instance)
3929 def _SafeShutdownInstanceDisks(lu, instance):
3930 """Shutdown block devices of an instance.
3932 This function checks if an instance is running, before calling
3933 _ShutdownInstanceDisks.
3936 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
3937 _ShutdownInstanceDisks(lu, instance)
3940 def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3941 """Shutdown block devices of an instance.
3943 This does the shutdown on all nodes of the instance.
3945 If the ignore_primary is false, errors on the primary node are
3950 for disk in instance.disks:
3951 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3952 lu.cfg.SetDiskID(top_disk, node)
3953 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3954 msg = result.fail_msg
3956 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3957 disk.iv_name, node, msg)
3958 if not ignore_primary or node != instance.primary_node:
3963 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3964 """Checks if a node has enough free memory.
3966 This function check if a given node has the needed amount of free
3967 memory. In case the node has less memory or we cannot get the
3968 information from the node, this function raise an OpPrereqError
3971 @type lu: C{LogicalUnit}
3972 @param lu: a logical unit from which we get configuration data
3974 @param node: the node to check
3975 @type reason: C{str}
3976 @param reason: string to use in the error message
3977 @type requested: C{int}
3978 @param requested: the amount of memory in MiB to check for
3979 @type hypervisor_name: C{str}
3980 @param hypervisor_name: the hypervisor to ask for memory stats
3981 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3982 we cannot check the node
3985 nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
3986 nodeinfo[node].Raise("Can't get data from node %s" % node,
3987 prereq=True, ecode=errors.ECODE_ENVIRON)
3988 free_mem = nodeinfo[node].payload.get('memory_free', None)
3989 if not isinstance(free_mem, int):
3990 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
3991 " was '%s'" % (node, free_mem),
3992 errors.ECODE_ENVIRON)
3993 if requested > free_mem:
3994 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
3995 " needed %s MiB, available %s MiB" %
3996 (node, reason, requested, free_mem),
4000 def _CheckNodesFreeDisk(lu, nodenames, requested):
4001 """Checks if nodes have enough free disk space in the default VG.
4003 This function check if all given nodes have the needed amount of
4004 free disk. In case any node has less disk or we cannot get the
4005 information from the node, this function raise an OpPrereqError
4008 @type lu: C{LogicalUnit}
4009 @param lu: a logical unit from which we get configuration data
4010 @type nodenames: C{list}
4011 @param nodenames: the list of node names to check
4012 @type requested: C{int}
4013 @param requested: the amount of disk in MiB to check for
4014 @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4015 we cannot check the node
4018 nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4019 lu.cfg.GetHypervisorType())
4020 for node in nodenames:
4021 info = nodeinfo[node]
4022 info.Raise("Cannot get current information from node %s" % node,
4023 prereq=True, ecode=errors.ECODE_ENVIRON)
4024 vg_free = info.payload.get("vg_free", None)
4025 if not isinstance(vg_free, int):
4026 raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4027 " result was '%s'" % (node, vg_free),
4028 errors.ECODE_ENVIRON)
4029 if requested > vg_free:
4030 raise errors.OpPrereqError("Not enough disk space on target node %s:"
4031 " required %d MiB, available %d MiB" %
4032 (node, requested, vg_free),
4036 class LUStartupInstance(LogicalUnit):
4037 """Starts an instance.
4040 HPATH = "instance-start"
4041 HTYPE = constants.HTYPE_INSTANCE
4042 _OP_REQP = ["instance_name", "force"]
4045 def ExpandNames(self):
4046 self._ExpandAndLockInstance()
4048 def BuildHooksEnv(self):
4051 This runs on master, primary and secondary nodes of the instance.
4055 "FORCE": self.op.force,
4057 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4058 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4061 def CheckPrereq(self):
4062 """Check prerequisites.
4064 This checks that the instance is in the cluster.
4067 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4068 assert self.instance is not None, \
4069 "Cannot retrieve locked instance %s" % self.op.instance_name
4072 self.beparams = getattr(self.op, "beparams", {})
4074 if not isinstance(self.beparams, dict):
4075 raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
4076 " dict" % (type(self.beparams), ),
4078 # fill the beparams dict
4079 utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
4080 self.op.beparams = self.beparams
4083 self.hvparams = getattr(self.op, "hvparams", {})
4085 if not isinstance(self.hvparams, dict):
4086 raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
4087 " dict" % (type(self.hvparams), ),
4090 # check hypervisor parameter syntax (locally)
4091 cluster = self.cfg.GetClusterInfo()
4092 utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
4093 filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
4095 filled_hvp.update(self.hvparams)
4096 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4097 hv_type.CheckParameterSyntax(filled_hvp)
4098 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4099 self.op.hvparams = self.hvparams
4101 _CheckNodeOnline(self, instance.primary_node)
4103 bep = self.cfg.GetClusterInfo().FillBE(instance)
4104 # check bridges existence
4105 _CheckInstanceBridgesExist(self, instance)
4107 remote_info = self.rpc.call_instance_info(instance.primary_node,
4109 instance.hypervisor)
4110 remote_info.Raise("Error checking node %s" % instance.primary_node,
4111 prereq=True, ecode=errors.ECODE_ENVIRON)
4112 if not remote_info.payload: # not running already
4113 _CheckNodeFreeMemory(self, instance.primary_node,
4114 "starting instance %s" % instance.name,
4115 bep[constants.BE_MEMORY], instance.hypervisor)
4117 def Exec(self, feedback_fn):
4118 """Start the instance.
4121 instance = self.instance
4122 force = self.op.force
4124 self.cfg.MarkInstanceUp(instance.name)
4126 node_current = instance.primary_node
4128 _StartInstanceDisks(self, instance, force)
4130 result = self.rpc.call_instance_start(node_current, instance,
4131 self.hvparams, self.beparams)
4132 msg = result.fail_msg
4134 _ShutdownInstanceDisks(self, instance)
4135 raise errors.OpExecError("Could not start instance: %s" % msg)
4138 class LURebootInstance(LogicalUnit):
4139 """Reboot an instance.
4142 HPATH = "instance-reboot"
4143 HTYPE = constants.HTYPE_INSTANCE
4144 _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
4147 def CheckArguments(self):
4148 """Check the arguments.
4151 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4152 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4154 def ExpandNames(self):
4155 if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
4156 constants.INSTANCE_REBOOT_HARD,
4157 constants.INSTANCE_REBOOT_FULL]:
4158 raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
4159 (constants.INSTANCE_REBOOT_SOFT,
4160 constants.INSTANCE_REBOOT_HARD,
4161 constants.INSTANCE_REBOOT_FULL))
4162 self._ExpandAndLockInstance()
4164 def BuildHooksEnv(self):
4167 This runs on master, primary and secondary nodes of the instance.
4171 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4172 "REBOOT_TYPE": self.op.reboot_type,
4173 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4175 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4176 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4179 def CheckPrereq(self):
4180 """Check prerequisites.
4182 This checks that the instance is in the cluster.
4185 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4186 assert self.instance is not None, \
4187 "Cannot retrieve locked instance %s" % self.op.instance_name
4189 _CheckNodeOnline(self, instance.primary_node)
4191 # check bridges existence
4192 _CheckInstanceBridgesExist(self, instance)
4194 def Exec(self, feedback_fn):
4195 """Reboot the instance.
4198 instance = self.instance
4199 ignore_secondaries = self.op.ignore_secondaries
4200 reboot_type = self.op.reboot_type
4202 node_current = instance.primary_node
4204 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4205 constants.INSTANCE_REBOOT_HARD]:
4206 for disk in instance.disks:
4207 self.cfg.SetDiskID(disk, node_current)
4208 result = self.rpc.call_instance_reboot(node_current, instance,
4210 self.shutdown_timeout)
4211 result.Raise("Could not reboot instance")
4213 result = self.rpc.call_instance_shutdown(node_current, instance,
4214 self.shutdown_timeout)
4215 result.Raise("Could not shutdown instance for full reboot")
4216 _ShutdownInstanceDisks(self, instance)
4217 _StartInstanceDisks(self, instance, ignore_secondaries)
4218 result = self.rpc.call_instance_start(node_current, instance, None, None)
4219 msg = result.fail_msg
4221 _ShutdownInstanceDisks(self, instance)
4222 raise errors.OpExecError("Could not start instance for"
4223 " full reboot: %s" % msg)
4225 self.cfg.MarkInstanceUp(instance.name)
4228 class LUShutdownInstance(LogicalUnit):
4229 """Shutdown an instance.
4232 HPATH = "instance-stop"
4233 HTYPE = constants.HTYPE_INSTANCE
4234 _OP_REQP = ["instance_name"]
4237 def CheckArguments(self):
4238 """Check the arguments.
4241 self.timeout = getattr(self.op, "timeout",
4242 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4244 def ExpandNames(self):
4245 self._ExpandAndLockInstance()
4247 def BuildHooksEnv(self):
4250 This runs on master, primary and secondary nodes of the instance.
4253 env = _BuildInstanceHookEnvByObject(self, self.instance)
4254 env["TIMEOUT"] = self.timeout
4255 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4258 def CheckPrereq(self):
4259 """Check prerequisites.
4261 This checks that the instance is in the cluster.
4264 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4265 assert self.instance is not None, \
4266 "Cannot retrieve locked instance %s" % self.op.instance_name
4267 _CheckNodeOnline(self, self.instance.primary_node)
4269 def Exec(self, feedback_fn):
4270 """Shutdown the instance.
4273 instance = self.instance
4274 node_current = instance.primary_node
4275 timeout = self.timeout
4276 self.cfg.MarkInstanceDown(instance.name)
4277 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4278 msg = result.fail_msg
4280 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4282 _ShutdownInstanceDisks(self, instance)
4285 class LUReinstallInstance(LogicalUnit):
4286 """Reinstall an instance.
4289 HPATH = "instance-reinstall"
4290 HTYPE = constants.HTYPE_INSTANCE
4291 _OP_REQP = ["instance_name"]
4294 def ExpandNames(self):
4295 self._ExpandAndLockInstance()
4297 def BuildHooksEnv(self):
4300 This runs on master, primary and secondary nodes of the instance.
4303 env = _BuildInstanceHookEnvByObject(self, self.instance)
4304 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4307 def CheckPrereq(self):
4308 """Check prerequisites.
4310 This checks that the instance is in the cluster and is not running.
4313 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4314 assert instance is not None, \
4315 "Cannot retrieve locked instance %s" % self.op.instance_name
4316 _CheckNodeOnline(self, instance.primary_node)
4318 if instance.disk_template == constants.DT_DISKLESS:
4319 raise errors.OpPrereqError("Instance '%s' has no disks" %
4320 self.op.instance_name,
4322 _CheckInstanceDown(self, instance, "cannot reinstall")
4324 self.op.os_type = getattr(self.op, "os_type", None)
4325 self.op.force_variant = getattr(self.op, "force_variant", False)
4326 if self.op.os_type is not None:
4328 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4329 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4331 self.instance = instance
4333 def Exec(self, feedback_fn):
4334 """Reinstall the instance.
4337 inst = self.instance
4339 if self.op.os_type is not None:
4340 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4341 inst.os = self.op.os_type
4342 self.cfg.Update(inst, feedback_fn)
4344 _StartInstanceDisks(self, inst, None)
4346 feedback_fn("Running the instance OS create scripts...")
4347 # FIXME: pass debug option from opcode to backend
4348 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4349 self.op.debug_level)
4350 result.Raise("Could not install OS for instance %s on node %s" %
4351 (inst.name, inst.primary_node))
4353 _ShutdownInstanceDisks(self, inst)
4356 class LURecreateInstanceDisks(LogicalUnit):
4357 """Recreate an instance's missing disks.
4360 HPATH = "instance-recreate-disks"
4361 HTYPE = constants.HTYPE_INSTANCE
4362 _OP_REQP = ["instance_name", "disks"]
4365 def CheckArguments(self):
4366 """Check the arguments.
4369 if not isinstance(self.op.disks, list):
4370 raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4371 for item in self.op.disks:
4372 if (not isinstance(item, int) or
4374 raise errors.OpPrereqError("Invalid disk specification '%s'" %
4375 str(item), errors.ECODE_INVAL)
4377 def ExpandNames(self):
4378 self._ExpandAndLockInstance()
4380 def BuildHooksEnv(self):
4383 This runs on master, primary and secondary nodes of the instance.
4386 env = _BuildInstanceHookEnvByObject(self, self.instance)
4387 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4390 def CheckPrereq(self):
4391 """Check prerequisites.
4393 This checks that the instance is in the cluster and is not running.
4396 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4397 assert instance is not None, \
4398 "Cannot retrieve locked instance %s" % self.op.instance_name
4399 _CheckNodeOnline(self, instance.primary_node)
4401 if instance.disk_template == constants.DT_DISKLESS:
4402 raise errors.OpPrereqError("Instance '%s' has no disks" %
4403 self.op.instance_name, errors.ECODE_INVAL)
4404 _CheckInstanceDown(self, instance, "cannot recreate disks")
4406 if not self.op.disks:
4407 self.op.disks = range(len(instance.disks))
4409 for idx in self.op.disks:
4410 if idx >= len(instance.disks):
4411 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4414 self.instance = instance
4416 def Exec(self, feedback_fn):
4417 """Recreate the disks.
4421 for idx, _ in enumerate(self.instance.disks):
4422 if idx not in self.op.disks: # disk idx has not been passed in
4426 _CreateDisks(self, self.instance, to_skip=to_skip)
4429 class LURenameInstance(LogicalUnit):
4430 """Rename an instance.
4433 HPATH = "instance-rename"
4434 HTYPE = constants.HTYPE_INSTANCE
4435 _OP_REQP = ["instance_name", "new_name"]
4437 def BuildHooksEnv(self):
4440 This runs on master, primary and secondary nodes of the instance.
4443 env = _BuildInstanceHookEnvByObject(self, self.instance)
4444 env["INSTANCE_NEW_NAME"] = self.op.new_name
4445 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4448 def CheckPrereq(self):
4449 """Check prerequisites.
4451 This checks that the instance is in the cluster and is not running.
4454 self.op.instance_name = _ExpandInstanceName(self.cfg,
4455 self.op.instance_name)
4456 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4457 assert instance is not None
4458 _CheckNodeOnline(self, instance.primary_node)
4459 _CheckInstanceDown(self, instance, "cannot rename")
4460 self.instance = instance
4462 # new name verification
4463 name_info = utils.GetHostInfo(self.op.new_name)
4465 self.op.new_name = new_name = name_info.name
4466 instance_list = self.cfg.GetInstanceList()
4467 if new_name in instance_list:
4468 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4469 new_name, errors.ECODE_EXISTS)
4471 if not getattr(self.op, "ignore_ip", False):
4472 if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4473 raise errors.OpPrereqError("IP %s of instance %s already in use" %
4474 (name_info.ip, new_name),
4475 errors.ECODE_NOTUNIQUE)
4478 def Exec(self, feedback_fn):
4479 """Reinstall the instance.
4482 inst = self.instance
4483 old_name = inst.name
4485 if inst.disk_template == constants.DT_FILE:
4486 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4488 self.cfg.RenameInstance(inst.name, self.op.new_name)
4489 # Change the instance lock. This is definitely safe while we hold the BGL
4490 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4491 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4493 # re-read the instance from the configuration after rename
4494 inst = self.cfg.GetInstanceInfo(self.op.new_name)
4496 if inst.disk_template == constants.DT_FILE:
4497 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4498 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4499 old_file_storage_dir,
4500 new_file_storage_dir)
4501 result.Raise("Could not rename on node %s directory '%s' to '%s'"
4502 " (but the instance has been renamed in Ganeti)" %
4503 (inst.primary_node, old_file_storage_dir,
4504 new_file_storage_dir))
4506 _StartInstanceDisks(self, inst, None)
4508 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4509 old_name, self.op.debug_level)
4510 msg = result.fail_msg
4512 msg = ("Could not run OS rename script for instance %s on node %s"
4513 " (but the instance has been renamed in Ganeti): %s" %
4514 (inst.name, inst.primary_node, msg))
4515 self.proc.LogWarning(msg)
4517 _ShutdownInstanceDisks(self, inst)
4520 class LURemoveInstance(LogicalUnit):
4521 """Remove an instance.
4524 HPATH = "instance-remove"
4525 HTYPE = constants.HTYPE_INSTANCE
4526 _OP_REQP = ["instance_name", "ignore_failures"]
4529 def CheckArguments(self):
4530 """Check the arguments.
4533 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4534 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4536 def ExpandNames(self):
4537 self._ExpandAndLockInstance()
4538 self.needed_locks[locking.LEVEL_NODE] = []
4539 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4541 def DeclareLocks(self, level):
4542 if level == locking.LEVEL_NODE:
4543 self._LockInstancesNodes()
4545 def BuildHooksEnv(self):
4548 This runs on master, primary and secondary nodes of the instance.
4551 env = _BuildInstanceHookEnvByObject(self, self.instance)
4552 env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4553 nl = [self.cfg.GetMasterNode()]
4554 nl_post = list(self.instance.all_nodes) + nl
4555 return env, nl, nl_post
4557 def CheckPrereq(self):
4558 """Check prerequisites.
4560 This checks that the instance is in the cluster.
4563 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4564 assert self.instance is not None, \
4565 "Cannot retrieve locked instance %s" % self.op.instance_name
4567 def Exec(self, feedback_fn):
4568 """Remove the instance.
4571 instance = self.instance
4572 logging.info("Shutting down instance %s on node %s",
4573 instance.name, instance.primary_node)
4575 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4576 self.shutdown_timeout)
4577 msg = result.fail_msg
4579 if self.op.ignore_failures:
4580 feedback_fn("Warning: can't shutdown instance: %s" % msg)
4582 raise errors.OpExecError("Could not shutdown instance %s on"
4584 (instance.name, instance.primary_node, msg))
4586 logging.info("Removing block devices for instance %s", instance.name)
4588 if not _RemoveDisks(self, instance):
4589 if self.op.ignore_failures:
4590 feedback_fn("Warning: can't remove instance's disks")
4592 raise errors.OpExecError("Can't remove instance's disks")
4594 logging.info("Removing instance %s out of cluster config", instance.name)
4596 self.cfg.RemoveInstance(instance.name)
4597 self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4600 class LUQueryInstances(NoHooksLU):
4601 """Logical unit for querying instances.
4604 # pylint: disable-msg=W0142
4605 _OP_REQP = ["output_fields", "names", "use_locking"]
4607 _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4608 "serial_no", "ctime", "mtime", "uuid"]
4609 _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4611 "disk_template", "ip", "mac", "bridge",
4612 "nic_mode", "nic_link",
4613 "sda_size", "sdb_size", "vcpus", "tags",
4614 "network_port", "beparams",
4615 r"(disk)\.(size)/([0-9]+)",
4616 r"(disk)\.(sizes)", "disk_usage",
4617 r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4618 r"(nic)\.(bridge)/([0-9]+)",
4619 r"(nic)\.(macs|ips|modes|links|bridges)",
4620 r"(disk|nic)\.(count)",
4622 ] + _SIMPLE_FIELDS +
4624 for name in constants.HVS_PARAMETERS
4625 if name not in constants.HVC_GLOBALS] +
4627 for name in constants.BES_PARAMETERS])
4628 _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4631 def ExpandNames(self):
4632 _CheckOutputFields(static=self._FIELDS_STATIC,
4633 dynamic=self._FIELDS_DYNAMIC,
4634 selected=self.op.output_fields)
4636 self.needed_locks = {}
4637 self.share_locks[locking.LEVEL_INSTANCE] = 1
4638 self.share_locks[locking.LEVEL_NODE] = 1
4641 self.wanted = _GetWantedInstances(self, self.op.names)
4643 self.wanted = locking.ALL_SET
4645 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4646 self.do_locking = self.do_node_query and self.op.use_locking
4648 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4649 self.needed_locks[locking.LEVEL_NODE] = []
4650 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4652 def DeclareLocks(self, level):
4653 if level == locking.LEVEL_NODE and self.do_locking:
4654 self._LockInstancesNodes()
4656 def CheckPrereq(self):
4657 """Check prerequisites.
4662 def Exec(self, feedback_fn):
4663 """Computes the list of nodes and their attributes.
4666 # pylint: disable-msg=R0912
4667 # way too many branches here
4668 all_info = self.cfg.GetAllInstancesInfo()
4669 if self.wanted == locking.ALL_SET:
4670 # caller didn't specify instance names, so ordering is not important
4672 instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4674 instance_names = all_info.keys()
4675 instance_names = utils.NiceSort(instance_names)
4677 # caller did specify names, so we must keep the ordering
4679 tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4681 tgt_set = all_info.keys()
4682 missing = set(self.wanted).difference(tgt_set)
4684 raise errors.OpExecError("Some instances were removed before"
4685 " retrieving their data: %s" % missing)
4686 instance_names = self.wanted
4688 instance_list = [all_info[iname] for iname in instance_names]
4690 # begin data gathering
4692 nodes = frozenset([inst.primary_node for inst in instance_list])
4693 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4697 if self.do_node_query:
4699 node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4701 result = node_data[name]
4703 # offline nodes will be in both lists
4704 off_nodes.append(name)
4706 bad_nodes.append(name)
4709 live_data.update(result.payload)
4710 # else no instance is alive
4712 live_data = dict([(name, {}) for name in instance_names])
4714 # end data gathering
4719 cluster = self.cfg.GetClusterInfo()
4720 for instance in instance_list:
4722 i_hv = cluster.FillHV(instance, skip_globals=True)
4723 i_be = cluster.FillBE(instance)
4724 i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4725 nic.nicparams) for nic in instance.nics]
4726 for field in self.op.output_fields:
4727 st_match = self._FIELDS_STATIC.Matches(field)
4728 if field in self._SIMPLE_FIELDS:
4729 val = getattr(instance, field)
4730 elif field == "pnode":
4731 val = instance.primary_node
4732 elif field == "snodes":
4733 val = list(instance.secondary_nodes)
4734 elif field == "admin_state":
4735 val = instance.admin_up
4736 elif field == "oper_state":
4737 if instance.primary_node in bad_nodes:
4740 val = bool(live_data.get(instance.name))
4741 elif field == "status":
4742 if instance.primary_node in off_nodes:
4743 val = "ERROR_nodeoffline"
4744 elif instance.primary_node in bad_nodes:
4745 val = "ERROR_nodedown"
4747 running = bool(live_data.get(instance.name))
4749 if instance.admin_up:
4754 if instance.admin_up:
4758 elif field == "oper_ram":
4759 if instance.primary_node in bad_nodes:
4761 elif instance.name in live_data:
4762 val = live_data[instance.name].get("memory", "?")
4765 elif field == "vcpus":
4766 val = i_be[constants.BE_VCPUS]
4767 elif field == "disk_template":
4768 val = instance.disk_template
4771 val = instance.nics[0].ip
4774 elif field == "nic_mode":
4776 val = i_nicp[0][constants.NIC_MODE]
4779 elif field == "nic_link":
4781 val = i_nicp[0][constants.NIC_LINK]
4784 elif field == "bridge":
4785 if (instance.nics and
4786 i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4787 val = i_nicp[0][constants.NIC_LINK]
4790 elif field == "mac":
4792 val = instance.nics[0].mac
4795 elif field == "sda_size" or field == "sdb_size":
4796 idx = ord(field[2]) - ord('a')
4798 val = instance.FindDisk(idx).size
4799 except errors.OpPrereqError:
4801 elif field == "disk_usage": # total disk usage per node
4802 disk_sizes = [{'size': disk.size} for disk in instance.disks]
4803 val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4804 elif field == "tags":
4805 val = list(instance.GetTags())
4806 elif field == "hvparams":
4808 elif (field.startswith(HVPREFIX) and
4809 field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4810 field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4811 val = i_hv.get(field[len(HVPREFIX):], None)
4812 elif field == "beparams":
4814 elif (field.startswith(BEPREFIX) and
4815 field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4816 val = i_be.get(field[len(BEPREFIX):], None)
4817 elif st_match and st_match.groups():
4818 # matches a variable list
4819 st_groups = st_match.groups()
4820 if st_groups and st_groups[0] == "disk":
4821 if st_groups[1] == "count":
4822 val = len(instance.disks)
4823 elif st_groups[1] == "sizes":
4824 val = [disk.size for disk in instance.disks]
4825 elif st_groups[1] == "size":
4827 val = instance.FindDisk(st_groups[2]).size
4828 except errors.OpPrereqError:
4831 assert False, "Unhandled disk parameter"
4832 elif st_groups[0] == "nic":
4833 if st_groups[1] == "count":
4834 val = len(instance.nics)
4835 elif st_groups[1] == "macs":
4836 val = [nic.mac for nic in instance.nics]
4837 elif st_groups[1] == "ips":
4838 val = [nic.ip for nic in instance.nics]
4839 elif st_groups[1] == "modes":
4840 val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4841 elif st_groups[1] == "links":
4842 val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4843 elif st_groups[1] == "bridges":
4846 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4847 val.append(nicp[constants.NIC_LINK])
4852 nic_idx = int(st_groups[2])
4853 if nic_idx >= len(instance.nics):
4856 if st_groups[1] == "mac":
4857 val = instance.nics[nic_idx].mac
4858 elif st_groups[1] == "ip":
4859 val = instance.nics[nic_idx].ip
4860 elif st_groups[1] == "mode":
4861 val = i_nicp[nic_idx][constants.NIC_MODE]
4862 elif st_groups[1] == "link":
4863 val = i_nicp[nic_idx][constants.NIC_LINK]
4864 elif st_groups[1] == "bridge":
4865 nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4866 if nic_mode == constants.NIC_MODE_BRIDGED:
4867 val = i_nicp[nic_idx][constants.NIC_LINK]
4871 assert False, "Unhandled NIC parameter"
4873 assert False, ("Declared but unhandled variable parameter '%s'" %
4876 assert False, "Declared but unhandled parameter '%s'" % field
4883 class LUFailoverInstance(LogicalUnit):
4884 """Failover an instance.
4887 HPATH = "instance-failover"
4888 HTYPE = constants.HTYPE_INSTANCE
4889 _OP_REQP = ["instance_name", "ignore_consistency"]
4892 def CheckArguments(self):
4893 """Check the arguments.
4896 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4897 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4899 def ExpandNames(self):
4900 self._ExpandAndLockInstance()
4901 self.needed_locks[locking.LEVEL_NODE] = []
4902 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4904 def DeclareLocks(self, level):
4905 if level == locking.LEVEL_NODE:
4906 self._LockInstancesNodes()
4908 def BuildHooksEnv(self):
4911 This runs on master, primary and secondary nodes of the instance.
4914 instance = self.instance
4915 source_node = instance.primary_node
4916 target_node = instance.secondary_nodes[0]
4918 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4919 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4920 "OLD_PRIMARY": source_node,
4921 "OLD_SECONDARY": target_node,
4922 "NEW_PRIMARY": target_node,
4923 "NEW_SECONDARY": source_node,
4925 env.update(_BuildInstanceHookEnvByObject(self, instance))
4926 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4928 nl_post.append(source_node)
4929 return env, nl, nl_post
4931 def CheckPrereq(self):
4932 """Check prerequisites.
4934 This checks that the instance is in the cluster.
4937 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4938 assert self.instance is not None, \
4939 "Cannot retrieve locked instance %s" % self.op.instance_name
4941 bep = self.cfg.GetClusterInfo().FillBE(instance)
4942 if instance.disk_template not in constants.DTS_NET_MIRROR:
4943 raise errors.OpPrereqError("Instance's disk layout is not"
4944 " network mirrored, cannot failover.",
4947 secondary_nodes = instance.secondary_nodes
4948 if not secondary_nodes:
4949 raise errors.ProgrammerError("no secondary node but using "
4950 "a mirrored disk template")
4952 target_node = secondary_nodes[0]
4953 _CheckNodeOnline(self, target_node)
4954 _CheckNodeNotDrained(self, target_node)
4955 if instance.admin_up:
4956 # check memory requirements on the secondary node
4957 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4958 instance.name, bep[constants.BE_MEMORY],
4959 instance.hypervisor)
4961 self.LogInfo("Not checking memory on the secondary node as"
4962 " instance will not be started")
4964 # check bridge existance
4965 _CheckInstanceBridgesExist(self, instance, node=target_node)
4967 def Exec(self, feedback_fn):
4968 """Failover an instance.
4970 The failover is done by shutting it down on its present node and
4971 starting it on the secondary.
4974 instance = self.instance
4976 source_node = instance.primary_node
4977 target_node = instance.secondary_nodes[0]
4979 if instance.admin_up:
4980 feedback_fn("* checking disk consistency between source and target")
4981 for dev in instance.disks:
4982 # for drbd, these are drbd over lvm
4983 if not _CheckDiskConsistency(self, dev, target_node, False):
4984 if not self.op.ignore_consistency:
4985 raise errors.OpExecError("Disk %s is degraded on target node,"
4986 " aborting failover." % dev.iv_name)
4988 feedback_fn("* not checking disk consistency as instance is not running")
4990 feedback_fn("* shutting down instance on source node")
4991 logging.info("Shutting down instance %s on node %s",
4992 instance.name, source_node)
4994 result = self.rpc.call_instance_shutdown(source_node, instance,
4995 self.shutdown_timeout)
4996 msg = result.fail_msg
4998 if self.op.ignore_consistency:
4999 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5000 " Proceeding anyway. Please make sure node"
5001 " %s is down. Error details: %s",
5002 instance.name, source_node, source_node, msg)
5004 raise errors.OpExecError("Could not shutdown instance %s on"
5006 (instance.name, source_node, msg))
5008 feedback_fn("* deactivating the instance's disks on source node")
5009 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5010 raise errors.OpExecError("Can't shut down the instance's disks.")
5012 instance.primary_node = target_node
5013 # distribute new instance config to the other nodes
5014 self.cfg.Update(instance, feedback_fn)
5016 # Only start the instance if it's marked as up
5017 if instance.admin_up:
5018 feedback_fn("* activating the instance's disks on target node")
5019 logging.info("Starting instance %s on node %s",
5020 instance.name, target_node)
5022 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5023 ignore_secondaries=True)
5025 _ShutdownInstanceDisks(self, instance)
5026 raise errors.OpExecError("Can't activate the instance's disks")
5028 feedback_fn("* starting the instance on the target node")
5029 result = self.rpc.call_instance_start(target_node, instance, None, None)
5030 msg = result.fail_msg
5032 _ShutdownInstanceDisks(self, instance)
5033 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5034 (instance.name, target_node, msg))
5037 class LUMigrateInstance(LogicalUnit):
5038 """Migrate an instance.
5040 This is migration without shutting down, compared to the failover,
5041 which is done with shutdown.
5044 HPATH = "instance-migrate"
5045 HTYPE = constants.HTYPE_INSTANCE
5046 _OP_REQP = ["instance_name", "live", "cleanup"]
5050 def ExpandNames(self):
5051 self._ExpandAndLockInstance()
5053 self.needed_locks[locking.LEVEL_NODE] = []
5054 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5056 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5057 self.op.live, self.op.cleanup)
5058 self.tasklets = [self._migrater]
5060 def DeclareLocks(self, level):
5061 if level == locking.LEVEL_NODE:
5062 self._LockInstancesNodes()
5064 def BuildHooksEnv(self):
5067 This runs on master, primary and secondary nodes of the instance.
5070 instance = self._migrater.instance
5071 source_node = instance.primary_node
5072 target_node = instance.secondary_nodes[0]
5073 env = _BuildInstanceHookEnvByObject(self, instance)
5074 env["MIGRATE_LIVE"] = self.op.live
5075 env["MIGRATE_CLEANUP"] = self.op.cleanup
5077 "OLD_PRIMARY": source_node,
5078 "OLD_SECONDARY": target_node,
5079 "NEW_PRIMARY": target_node,
5080 "NEW_SECONDARY": source_node,
5082 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5084 nl_post.append(source_node)
5085 return env, nl, nl_post
5088 class LUMoveInstance(LogicalUnit):
5089 """Move an instance by data-copying.
5092 HPATH = "instance-move"
5093 HTYPE = constants.HTYPE_INSTANCE
5094 _OP_REQP = ["instance_name", "target_node"]
5097 def CheckArguments(self):
5098 """Check the arguments.
5101 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5102 constants.DEFAULT_SHUTDOWN_TIMEOUT)
5104 def ExpandNames(self):
5105 self._ExpandAndLockInstance()
5106 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5107 self.op.target_node = target_node
5108 self.needed_locks[locking.LEVEL_NODE] = [target_node]
5109 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5111 def DeclareLocks(self, level):
5112 if level == locking.LEVEL_NODE:
5113 self._LockInstancesNodes(primary_only=True)
5115 def BuildHooksEnv(self):
5118 This runs on master, primary and secondary nodes of the instance.
5122 "TARGET_NODE": self.op.target_node,
5123 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5125 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5126 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5127 self.op.target_node]
5130 def CheckPrereq(self):
5131 """Check prerequisites.
5133 This checks that the instance is in the cluster.
5136 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5137 assert self.instance is not None, \
5138 "Cannot retrieve locked instance %s" % self.op.instance_name
5140 node = self.cfg.GetNodeInfo(self.op.target_node)
5141 assert node is not None, \
5142 "Cannot retrieve locked node %s" % self.op.target_node
5144 self.target_node = target_node = node.name
5146 if target_node == instance.primary_node:
5147 raise errors.OpPrereqError("Instance %s is already on the node %s" %
5148 (instance.name, target_node),
5151 bep = self.cfg.GetClusterInfo().FillBE(instance)
5153 for idx, dsk in enumerate(instance.disks):
5154 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5155 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5156 " cannot copy" % idx, errors.ECODE_STATE)
5158 _CheckNodeOnline(self, target_node)
5159 _CheckNodeNotDrained(self, target_node)
5161 if instance.admin_up:
5162 # check memory requirements on the secondary node
5163 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5164 instance.name, bep[constants.BE_MEMORY],
5165 instance.hypervisor)
5167 self.LogInfo("Not checking memory on the secondary node as"
5168 " instance will not be started")
5170 # check bridge existance
5171 _CheckInstanceBridgesExist(self, instance, node=target_node)
5173 def Exec(self, feedback_fn):
5174 """Move an instance.
5176 The move is done by shutting it down on its present node, copying
5177 the data over (slow) and starting it on the new node.
5180 instance = self.instance
5182 source_node = instance.primary_node
5183 target_node = self.target_node
5185 self.LogInfo("Shutting down instance %s on source node %s",
5186 instance.name, source_node)
5188 result = self.rpc.call_instance_shutdown(source_node, instance,
5189 self.shutdown_timeout)
5190 msg = result.fail_msg
5192 if self.op.ignore_consistency:
5193 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5194 " Proceeding anyway. Please make sure node"
5195 " %s is down. Error details: %s",
5196 instance.name, source_node, source_node, msg)
5198 raise errors.OpExecError("Could not shutdown instance %s on"
5200 (instance.name, source_node, msg))
5202 # create the target disks
5204 _CreateDisks(self, instance, target_node=target_node)
5205 except errors.OpExecError:
5206 self.LogWarning("Device creation failed, reverting...")
5208 _RemoveDisks(self, instance, target_node=target_node)
5210 self.cfg.ReleaseDRBDMinors(instance.name)
5213 cluster_name = self.cfg.GetClusterInfo().cluster_name
5216 # activate, get path, copy the data over
5217 for idx, disk in enumerate(instance.disks):
5218 self.LogInfo("Copying data for disk %d", idx)
5219 result = self.rpc.call_blockdev_assemble(target_node, disk,
5220 instance.name, True)
5222 self.LogWarning("Can't assemble newly created disk %d: %s",
5223 idx, result.fail_msg)
5224 errs.append(result.fail_msg)
5226 dev_path = result.payload
5227 result = self.rpc.call_blockdev_export(source_node, disk,
5228 target_node, dev_path,
5231 self.LogWarning("Can't copy data over for disk %d: %s",
5232 idx, result.fail_msg)
5233 errs.append(result.fail_msg)
5237 self.LogWarning("Some disks failed to copy, aborting")
5239 _RemoveDisks(self, instance, target_node=target_node)
5241 self.cfg.ReleaseDRBDMinors(instance.name)
5242 raise errors.OpExecError("Errors during disk copy: %s" %
5245 instance.primary_node = target_node
5246 self.cfg.Update(instance, feedback_fn)
5248 self.LogInfo("Removing the disks on the original node")
5249 _RemoveDisks(self, instance, target_node=source_node)
5251 # Only start the instance if it's marked as up
5252 if instance.admin_up:
5253 self.LogInfo("Starting instance %s on node %s",
5254 instance.name, target_node)
5256 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5257 ignore_secondaries=True)
5259 _ShutdownInstanceDisks(self, instance)
5260 raise errors.OpExecError("Can't activate the instance's disks")
5262 result = self.rpc.call_instance_start(target_node, instance, None, None)
5263 msg = result.fail_msg
5265 _ShutdownInstanceDisks(self, instance)
5266 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5267 (instance.name, target_node, msg))
5270 class LUMigrateNode(LogicalUnit):
5271 """Migrate all instances from a node.
5274 HPATH = "node-migrate"
5275 HTYPE = constants.HTYPE_NODE
5276 _OP_REQP = ["node_name", "live"]
5279 def ExpandNames(self):
5280 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5282 self.needed_locks = {
5283 locking.LEVEL_NODE: [self.op.node_name],
5286 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5288 # Create tasklets for migrating instances for all instances on this node
5292 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5293 logging.debug("Migrating instance %s", inst.name)
5294 names.append(inst.name)
5296 tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5298 self.tasklets = tasklets
5300 # Declare instance locks
5301 self.needed_locks[locking.LEVEL_INSTANCE] = names
5303 def DeclareLocks(self, level):
5304 if level == locking.LEVEL_NODE:
5305 self._LockInstancesNodes()
5307 def BuildHooksEnv(self):
5310 This runs on the master, the primary and all the secondaries.
5314 "NODE_NAME": self.op.node_name,
5317 nl = [self.cfg.GetMasterNode()]
5319 return (env, nl, nl)
5322 class TLMigrateInstance(Tasklet):
5323 def __init__(self, lu, instance_name, live, cleanup):
5324 """Initializes this class.
5327 Tasklet.__init__(self, lu)
5330 self.instance_name = instance_name
5332 self.cleanup = cleanup
5334 def CheckPrereq(self):
5335 """Check prerequisites.
5337 This checks that the instance is in the cluster.
5340 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5341 instance = self.cfg.GetInstanceInfo(instance_name)
5342 assert instance is not None
5344 if instance.disk_template != constants.DT_DRBD8:
5345 raise errors.OpPrereqError("Instance's disk layout is not"
5346 " drbd8, cannot migrate.", errors.ECODE_STATE)
5348 secondary_nodes = instance.secondary_nodes
5349 if not secondary_nodes:
5350 raise errors.ConfigurationError("No secondary node but using"
5351 " drbd8 disk template")
5353 i_be = self.cfg.GetClusterInfo().FillBE(instance)
5355 target_node = secondary_nodes[0]
5356 # check memory requirements on the secondary node
5357 _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
5358 instance.name, i_be[constants.BE_MEMORY],
5359 instance.hypervisor)
5361 # check bridge existance
5362 _CheckInstanceBridgesExist(self, instance, node=target_node)
5364 if not self.cleanup:
5365 _CheckNodeNotDrained(self, target_node)
5366 result = self.rpc.call_instance_migratable(instance.primary_node,
5368 result.Raise("Can't migrate, please use failover",
5369 prereq=True, ecode=errors.ECODE_STATE)
5371 self.instance = instance
5373 def _WaitUntilSync(self):
5374 """Poll with custom rpc for disk sync.
5376 This uses our own step-based rpc call.
5379 self.feedback_fn("* wait until resync is done")
5383 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5385 self.instance.disks)
5387 for node, nres in result.items():
5388 nres.Raise("Cannot resync disks on node %s" % node)
5389 node_done, node_percent = nres.payload
5390 all_done = all_done and node_done
5391 if node_percent is not None:
5392 min_percent = min(min_percent, node_percent)
5394 if min_percent < 100:
5395 self.feedback_fn(" - progress: %.1f%%" % min_percent)
5398 def _EnsureSecondary(self, node):
5399 """Demote a node to secondary.
5402 self.feedback_fn("* switching node %s to secondary mode" % node)
5404 for dev in self.instance.disks:
5405 self.cfg.SetDiskID(dev, node)
5407 result = self.rpc.call_blockdev_close(node, self.instance.name,
5408 self.instance.disks)
5409 result.Raise("Cannot change disk to secondary on node %s" % node)
5411 def _GoStandalone(self):
5412 """Disconnect from the network.
5415 self.feedback_fn("* changing into standalone mode")
5416 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5417 self.instance.disks)
5418 for node, nres in result.items():
5419 nres.Raise("Cannot disconnect disks node %s" % node)
5421 def _GoReconnect(self, multimaster):
5422 """Reconnect to the network.
5428 msg = "single-master"
5429 self.feedback_fn("* changing disks into %s mode" % msg)
5430 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5431 self.instance.disks,
5432 self.instance.name, multimaster)
5433 for node, nres in result.items():
5434 nres.Raise("Cannot change disks config on node %s" % node)
5436 def _ExecCleanup(self):
5437 """Try to cleanup after a failed migration.
5439 The cleanup is done by:
5440 - check that the instance is running only on one node
5441 (and update the config if needed)
5442 - change disks on its secondary node to secondary
5443 - wait until disks are fully synchronized
5444 - disconnect from the network
5445 - change disks into single-master mode
5446 - wait again until disks are fully synchronized
5449 instance = self.instance
5450 target_node = self.target_node
5451 source_node = self.source_node
5453 # check running on only one node
5454 self.feedback_fn("* checking where the instance actually runs"
5455 " (if this hangs, the hypervisor might be in"
5457 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5458 for node, result in ins_l.items():
5459 result.Raise("Can't contact node %s" % node)
5461 runningon_source = instance.name in ins_l[source_node].payload
5462 runningon_target = instance.name in ins_l[target_node].payload
5464 if runningon_source and runningon_target:
5465 raise errors.OpExecError("Instance seems to be running on two nodes,"
5466 " or the hypervisor is confused. You will have"
5467 " to ensure manually that it runs only on one"
5468 " and restart this operation.")
5470 if not (runningon_source or runningon_target):
5471 raise errors.OpExecError("Instance does not seem to be running at all."
5472 " In this case, it's safer to repair by"
5473 " running 'gnt-instance stop' to ensure disk"
5474 " shutdown, and then restarting it.")
5476 if runningon_target:
5477 # the migration has actually succeeded, we need to update the config
5478 self.feedback_fn("* instance running on secondary node (%s),"
5479 " updating config" % target_node)
5480 instance.primary_node = target_node
5481 self.cfg.Update(instance, self.feedback_fn)
5482 demoted_node = source_node
5484 self.feedback_fn("* instance confirmed to be running on its"
5485 " primary node (%s)" % source_node)
5486 demoted_node = target_node
5488 self._EnsureSecondary(demoted_node)
5490 self._WaitUntilSync()
5491 except errors.OpExecError:
5492 # we ignore here errors, since if the device is standalone, it
5493 # won't be able to sync
5495 self._GoStandalone()
5496 self._GoReconnect(False)
5497 self._WaitUntilSync()
5499 self.feedback_fn("* done")
5501 def _RevertDiskStatus(self):
5502 """Try to revert the disk status after a failed migration.
5505 target_node = self.target_node
5507 self._EnsureSecondary(target_node)
5508 self._GoStandalone()
5509 self._GoReconnect(False)
5510 self._WaitUntilSync()
5511 except errors.OpExecError, err:
5512 self.lu.LogWarning("Migration failed and I can't reconnect the"
5513 " drives: error '%s'\n"
5514 "Please look and recover the instance status" %
5517 def _AbortMigration(self):
5518 """Call the hypervisor code to abort a started migration.
5521 instance = self.instance
5522 target_node = self.target_node
5523 migration_info = self.migration_info
5525 abort_result = self.rpc.call_finalize_migration(target_node,
5529 abort_msg = abort_result.fail_msg
5531 logging.error("Aborting migration failed on target node %s: %s",
5532 target_node, abort_msg)
5533 # Don't raise an exception here, as we stil have to try to revert the
5534 # disk status, even if this step failed.
5536 def _ExecMigration(self):
5537 """Migrate an instance.
5539 The migrate is done by:
5540 - change the disks into dual-master mode
5541 - wait until disks are fully synchronized again
5542 - migrate the instance
5543 - change disks on the new secondary node (the old primary) to secondary
5544 - wait until disks are fully synchronized
5545 - change disks into single-master mode
5548 instance = self.instance
5549 target_node = self.target_node
5550 source_node = self.source_node
5552 self.feedback_fn("* checking disk consistency between source and target")
5553 for dev in instance.disks:
5554 if not _CheckDiskConsistency(self, dev, target_node, False):
5555 raise errors.OpExecError("Disk %s is degraded or not fully"
5556 " synchronized on target node,"
5557 " aborting migrate." % dev.iv_name)
5559 # First get the migration information from the remote node
5560 result = self.rpc.call_migration_info(source_node, instance)
5561 msg = result.fail_msg
5563 log_err = ("Failed fetching source migration information from %s: %s" %
5565 logging.error(log_err)
5566 raise errors.OpExecError(log_err)
5568 self.migration_info = migration_info = result.payload
5570 # Then switch the disks to master/master mode
5571 self._EnsureSecondary(target_node)
5572 self._GoStandalone()
5573 self._GoReconnect(True)
5574 self._WaitUntilSync()
5576 self.feedback_fn("* preparing %s to accept the instance" % target_node)
5577 result = self.rpc.call_accept_instance(target_node,
5580 self.nodes_ip[target_node])
5582 msg = result.fail_msg
5584 logging.error("Instance pre-migration failed, trying to revert"
5585 " disk status: %s", msg)
5586 self.feedback_fn("Pre-migration failed, aborting")
5587 self._AbortMigration()
5588 self._RevertDiskStatus()
5589 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5590 (instance.name, msg))
5592 self.feedback_fn("* migrating instance to %s" % target_node)
5594 result = self.rpc.call_instance_migrate(source_node, instance,
5595 self.nodes_ip[target_node],
5597 msg = result.fail_msg
5599 logging.error("Instance migration failed, trying to revert"
5600 " disk status: %s", msg)
5601 self.feedback_fn("Migration failed, aborting")
5602 self._AbortMigration()
5603 self._RevertDiskStatus()
5604 raise errors.OpExecError("Could not migrate instance %s: %s" %
5605 (instance.name, msg))
5608 instance.primary_node = target_node
5609 # distribute new instance config to the other nodes
5610 self.cfg.Update(instance, self.feedback_fn)
5612 result = self.rpc.call_finalize_migration(target_node,
5616 msg = result.fail_msg
5618 logging.error("Instance migration succeeded, but finalization failed:"
5620 raise errors.OpExecError("Could not finalize instance migration: %s" %
5623 self._EnsureSecondary(source_node)
5624 self._WaitUntilSync()
5625 self._GoStandalone()
5626 self._GoReconnect(False)
5627 self._WaitUntilSync()
5629 self.feedback_fn("* done")
5631 def Exec(self, feedback_fn):
5632 """Perform the migration.
5635 feedback_fn("Migrating instance %s" % self.instance.name)
5637 self.feedback_fn = feedback_fn
5639 self.source_node = self.instance.primary_node
5640 self.target_node = self.instance.secondary_nodes[0]
5641 self.all_nodes = [self.source_node, self.target_node]
5643 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5644 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5648 return self._ExecCleanup()
5650 return self._ExecMigration()
5653 def _CreateBlockDev(lu, node, instance, device, force_create,
5655 """Create a tree of block devices on a given node.
5657 If this device type has to be created on secondaries, create it and
5660 If not, just recurse to children keeping the same 'force' value.
5662 @param lu: the lu on whose behalf we execute
5663 @param node: the node on which to create the device
5664 @type instance: L{objects.Instance}
5665 @param instance: the instance which owns the device
5666 @type device: L{objects.Disk}
5667 @param device: the device to create
5668 @type force_create: boolean
5669 @param force_create: whether to force creation of this device; this
5670 will be change to True whenever we find a device which has
5671 CreateOnSecondary() attribute
5672 @param info: the extra 'metadata' we should attach to the device
5673 (this will be represented as a LVM tag)
5674 @type force_open: boolean
5675 @param force_open: this parameter will be passes to the
5676 L{backend.BlockdevCreate} function where it specifies
5677 whether we run on primary or not, and it affects both
5678 the child assembly and the device own Open() execution
5681 if device.CreateOnSecondary():
5685 for child in device.children:
5686 _CreateBlockDev(lu, node, instance, child, force_create,
5689 if not force_create:
5692 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5695 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5696 """Create a single block device on a given node.
5698 This will not recurse over children of the device, so they must be
5701 @param lu: the lu on whose behalf we execute
5702 @param node: the node on which to create the device
5703 @type instance: L{objects.Instance}
5704 @param instance: the instance which owns the device
5705 @type device: L{objects.Disk}
5706 @param device: the device to create
5707 @param info: the extra 'metadata' we should attach to the device
5708 (this will be represented as a LVM tag)
5709 @type force_open: boolean
5710 @param force_open: this parameter will be passes to the
5711 L{backend.BlockdevCreate} function where it specifies
5712 whether we run on primary or not, and it affects both
5713 the child assembly and the device own Open() execution
5716 lu.cfg.SetDiskID(device, node)
5717 result = lu.rpc.call_blockdev_create(node, device, device.size,
5718 instance.name, force_open, info)
5719 result.Raise("Can't create block device %s on"
5720 " node %s for instance %s" % (device, node, instance.name))
5721 if device.physical_id is None:
5722 device.physical_id = result.payload
5725 def _GenerateUniqueNames(lu, exts):
5726 """Generate a suitable LV name.
5728 This will generate a logical volume name for the given instance.
5733 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5734 results.append("%s%s" % (new_id, val))
5738 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5740 """Generate a drbd8 device complete with its children.
5743 port = lu.cfg.AllocatePort()
5744 vgname = lu.cfg.GetVGName()
5745 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5746 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5747 logical_id=(vgname, names[0]))
5748 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5749 logical_id=(vgname, names[1]))
5750 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5751 logical_id=(primary, secondary, port,
5754 children=[dev_data, dev_meta],
5759 def _GenerateDiskTemplate(lu, template_name,
5760 instance_name, primary_node,
5761 secondary_nodes, disk_info,
5762 file_storage_dir, file_driver,
5764 """Generate the entire disk layout for a given template type.
5767 #TODO: compute space requirements
5769 vgname = lu.cfg.GetVGName()
5770 disk_count = len(disk_info)
5772 if template_name == constants.DT_DISKLESS:
5774 elif template_name == constants.DT_PLAIN:
5775 if len(secondary_nodes) != 0:
5776 raise errors.ProgrammerError("Wrong template configuration")
5778 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5779 for i in range(disk_count)])
5780 for idx, disk in enumerate(disk_info):
5781 disk_index = idx + base_index
5782 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5783 logical_id=(vgname, names[idx]),
5784 iv_name="disk/%d" % disk_index,
5786 disks.append(disk_dev)
5787 elif template_name == constants.DT_DRBD8:
5788 if len(secondary_nodes) != 1:
5789 raise errors.ProgrammerError("Wrong template configuration")
5790 remote_node = secondary_nodes[0]
5791 minors = lu.cfg.AllocateDRBDMinor(
5792 [primary_node, remote_node] * len(disk_info), instance_name)
5795 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5796 for i in range(disk_count)]):
5797 names.append(lv_prefix + "_data")
5798 names.append(lv_prefix + "_meta")
5799 for idx, disk in enumerate(disk_info):
5800 disk_index = idx + base_index
5801 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5802 disk["size"], names[idx*2:idx*2+2],
5803 "disk/%d" % disk_index,
5804 minors[idx*2], minors[idx*2+1])
5805 disk_dev.mode = disk["mode"]
5806 disks.append(disk_dev)
5807 elif template_name == constants.DT_FILE:
5808 if len(secondary_nodes) != 0:
5809 raise errors.ProgrammerError("Wrong template configuration")
5811 for idx, disk in enumerate(disk_info):
5812 disk_index = idx + base_index
5813 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5814 iv_name="disk/%d" % disk_index,
5815 logical_id=(file_driver,
5816 "%s/disk%d" % (file_storage_dir,
5819 disks.append(disk_dev)
5821 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5825 def _GetInstanceInfoText(instance):
5826 """Compute that text that should be added to the disk's metadata.
5829 return "originstname+%s" % instance.name
5832 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5833 """Create all disks for an instance.
5835 This abstracts away some work from AddInstance.
5837 @type lu: L{LogicalUnit}
5838 @param lu: the logical unit on whose behalf we execute
5839 @type instance: L{objects.Instance}
5840 @param instance: the instance whose disks we should create
5842 @param to_skip: list of indices to skip
5843 @type target_node: string
5844 @param target_node: if passed, overrides the target node for creation
5846 @return: the success of the creation
5849 info = _GetInstanceInfoText(instance)
5850 if target_node is None:
5851 pnode = instance.primary_node
5852 all_nodes = instance.all_nodes
5857 if instance.disk_template == constants.DT_FILE:
5858 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5859 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5861 result.Raise("Failed to create directory '%s' on"
5862 " node %s" % (file_storage_dir, pnode))
5864 # Note: this needs to be kept in sync with adding of disks in
5865 # LUSetInstanceParams
5866 for idx, device in enumerate(instance.disks):
5867 if to_skip and idx in to_skip:
5869 logging.info("Creating volume %s for instance %s",
5870 device.iv_name, instance.name)
5872 for node in all_nodes:
5873 f_create = node == pnode
5874 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5877 def _RemoveDisks(lu, instance, target_node=None):
5878 """Remove all disks for an instance.
5880 This abstracts away some work from `AddInstance()` and
5881 `RemoveInstance()`. Note that in case some of the devices couldn't
5882 be removed, the removal will continue with the other ones (compare
5883 with `_CreateDisks()`).
5885 @type lu: L{LogicalUnit}
5886 @param lu: the logical unit on whose behalf we execute
5887 @type instance: L{objects.Instance}
5888 @param instance: the instance whose disks we should remove
5889 @type target_node: string
5890 @param target_node: used to override the node on which to remove the disks
5892 @return: the success of the removal
5895 logging.info("Removing block devices for instance %s", instance.name)
5898 for device in instance.disks:
5900 edata = [(target_node, device)]
5902 edata = device.ComputeNodeTree(instance.primary_node)
5903 for node, disk in edata:
5904 lu.cfg.SetDiskID(disk, node)
5905 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5907 lu.LogWarning("Could not remove block device %s on node %s,"
5908 " continuing anyway: %s", device.iv_name, node, msg)
5911 if instance.disk_template == constants.DT_FILE:
5912 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5916 tgt = instance.primary_node
5917 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5919 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5920 file_storage_dir, instance.primary_node, result.fail_msg)
5926 def _ComputeDiskSize(disk_template, disks):
5927 """Compute disk size requirements in the volume group
5930 # Required free disk space as a function of disk and swap space
5932 constants.DT_DISKLESS: None,
5933 constants.DT_PLAIN: sum(d["size"] for d in disks),
5934 # 128 MB are added for drbd metadata for each disk
5935 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5936 constants.DT_FILE: None,
5939 if disk_template not in req_size_dict:
5940 raise errors.ProgrammerError("Disk template '%s' size requirement"
5941 " is unknown" % disk_template)
5943 return req_size_dict[disk_template]
5946 def _CheckHVParams(lu, nodenames, hvname, hvparams):
5947 """Hypervisor parameter validation.
5949 This function abstract the hypervisor parameter validation to be
5950 used in both instance create and instance modify.
5952 @type lu: L{LogicalUnit}
5953 @param lu: the logical unit for which we check
5954 @type nodenames: list
5955 @param nodenames: the list of nodes on which we should check
5956 @type hvname: string
5957 @param hvname: the name of the hypervisor we should use
5958 @type hvparams: dict
5959 @param hvparams: the parameters which we need to check
5960 @raise errors.OpPrereqError: if the parameters are not valid
5963 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
5966 for node in nodenames:
5970 info.Raise("Hypervisor parameter validation failed on node %s" % node)
5973 class LUCreateInstance(LogicalUnit):
5974 """Create an instance.
5977 HPATH = "instance-add"
5978 HTYPE = constants.HTYPE_INSTANCE
5979 _OP_REQP = ["instance_name", "disks",
5981 "wait_for_sync", "ip_check", "nics",
5982 "hvparams", "beparams"]
5985 def CheckArguments(self):
5989 # set optional parameters to none if they don't exist
5990 for attr in ["pnode", "snode", "iallocator", "hypervisor",
5992 if not hasattr(self.op, attr):
5993 setattr(self.op, attr, None)
5995 # do not require name_check to ease forward/backward compatibility
5997 if not hasattr(self.op, "name_check"):
5998 self.op.name_check = True
5999 if not hasattr(self.op, "no_install"):
6000 self.op.no_install = False
6001 if self.op.no_install and self.op.start:
6002 self.LogInfo("No-installation mode selected, disabling startup")
6003 self.op.start = False
6004 # validate/normalize the instance name
6005 self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6006 if self.op.ip_check and not self.op.name_check:
6007 # TODO: make the ip check more flexible and not depend on the name check
6008 raise errors.OpPrereqError("Cannot do ip checks without a name check",
6010 # check disk information: either all adopt, or no adopt
6011 has_adopt = has_no_adopt = False
6012 for disk in self.op.disks:
6017 if has_adopt and has_no_adopt:
6018 raise errors.OpPrereqError("Either all disks are adopted or none is",
6021 if self.op.disk_template != constants.DT_PLAIN:
6022 raise errors.OpPrereqError("Disk adoption is only supported for the"
6023 " 'plain' disk template",
6025 if self.op.iallocator is not None:
6026 raise errors.OpPrereqError("Disk adoption not allowed with an"
6027 " iallocator script", errors.ECODE_INVAL)
6028 if self.op.mode == constants.INSTANCE_IMPORT:
6029 raise errors.OpPrereqError("Disk adoption not allowed for"
6030 " instance import", errors.ECODE_INVAL)
6032 self.adopt_disks = has_adopt
6034 # verify creation mode
6035 if self.op.mode not in (constants.INSTANCE_CREATE,
6036 constants.INSTANCE_IMPORT):
6037 raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
6038 self.op.mode, errors.ECODE_INVAL)
6040 # instance name verification
6041 if self.op.name_check:
6042 self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6043 self.op.instance_name = self.hostname1.name
6044 # used in CheckPrereq for ip ping check
6045 self.check_ip = self.hostname1.ip
6047 self.check_ip = None
6049 # file storage checks
6050 if (self.op.file_driver and
6051 not self.op.file_driver in constants.FILE_DRIVER):
6052 raise errors.OpPrereqError("Invalid file driver name '%s'" %
6053 self.op.file_driver, errors.ECODE_INVAL)
6055 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6056 raise errors.OpPrereqError("File storage directory path not absolute",
6059 ### Node/iallocator related checks
6060 if [self.op.iallocator, self.op.pnode].count(None) != 1:
6061 raise errors.OpPrereqError("One and only one of iallocator and primary"
6062 " node must be given",
6065 if self.op.mode == constants.INSTANCE_IMPORT:
6066 # On import force_variant must be True, because if we forced it at
6067 # initial install, our only chance when importing it back is that it
6069 self.op.force_variant = True
6071 if self.op.no_install:
6072 self.LogInfo("No-installation mode has no effect during import")
6074 else: # INSTANCE_CREATE
6075 if getattr(self.op, "os_type", None) is None:
6076 raise errors.OpPrereqError("No guest OS specified",
6078 self.op.force_variant = getattr(self.op, "force_variant", False)
6079 if self.op.disk_template is None:
6080 raise errors.OpPrereqError("No disk template specified",
6083 def ExpandNames(self):
6084 """ExpandNames for CreateInstance.
6086 Figure out the right locks for instance creation.
6089 self.needed_locks = {}
6091 instance_name = self.op.instance_name
6092 # this is just a preventive check, but someone might still add this
6093 # instance in the meantime, and creation will fail at lock-add time
6094 if instance_name in self.cfg.GetInstanceList():
6095 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6096 instance_name, errors.ECODE_EXISTS)
6098 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6100 if self.op.iallocator:
6101 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6103 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6104 nodelist = [self.op.pnode]
6105 if self.op.snode is not None:
6106 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6107 nodelist.append(self.op.snode)
6108 self.needed_locks[locking.LEVEL_NODE] = nodelist
6110 # in case of import lock the source node too
6111 if self.op.mode == constants.INSTANCE_IMPORT:
6112 src_node = getattr(self.op, "src_node", None)
6113 src_path = getattr(self.op, "src_path", None)
6115 if src_path is None:
6116 self.op.src_path = src_path = self.op.instance_name
6118 if src_node is None:
6119 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6120 self.op.src_node = None
6121 if os.path.isabs(src_path):
6122 raise errors.OpPrereqError("Importing an instance from an absolute"
6123 " path requires a source node option.",
6126 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6127 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6128 self.needed_locks[locking.LEVEL_NODE].append(src_node)
6129 if not os.path.isabs(src_path):
6130 self.op.src_path = src_path = \
6131 utils.PathJoin(constants.EXPORT_DIR, src_path)
6133 def _RunAllocator(self):
6134 """Run the allocator based on input opcode.
6137 nics = [n.ToDict() for n in self.nics]
6138 ial = IAllocator(self.cfg, self.rpc,
6139 mode=constants.IALLOCATOR_MODE_ALLOC,
6140 name=self.op.instance_name,
6141 disk_template=self.op.disk_template,
6144 vcpus=self.be_full[constants.BE_VCPUS],
6145 mem_size=self.be_full[constants.BE_MEMORY],
6148 hypervisor=self.op.hypervisor,
6151 ial.Run(self.op.iallocator)
6154 raise errors.OpPrereqError("Can't compute nodes using"
6155 " iallocator '%s': %s" %
6156 (self.op.iallocator, ial.info),
6158 if len(ial.result) != ial.required_nodes:
6159 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6160 " of nodes (%s), required %s" %
6161 (self.op.iallocator, len(ial.result),
6162 ial.required_nodes), errors.ECODE_FAULT)
6163 self.op.pnode = ial.result[0]
6164 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6165 self.op.instance_name, self.op.iallocator,
6166 utils.CommaJoin(ial.result))
6167 if ial.required_nodes == 2:
6168 self.op.snode = ial.result[1]
6170 def BuildHooksEnv(self):
6173 This runs on master, primary and secondary nodes of the instance.
6177 "ADD_MODE": self.op.mode,
6179 if self.op.mode == constants.INSTANCE_IMPORT:
6180 env["SRC_NODE"] = self.op.src_node
6181 env["SRC_PATH"] = self.op.src_path
6182 env["SRC_IMAGES"] = self.src_images
6184 env.update(_BuildInstanceHookEnv(
6185 name=self.op.instance_name,
6186 primary_node=self.op.pnode,
6187 secondary_nodes=self.secondaries,
6188 status=self.op.start,
6189 os_type=self.op.os_type,
6190 memory=self.be_full[constants.BE_MEMORY],
6191 vcpus=self.be_full[constants.BE_VCPUS],
6192 nics=_NICListToTuple(self, self.nics),
6193 disk_template=self.op.disk_template,
6194 disks=[(d["size"], d["mode"]) for d in self.disks],
6197 hypervisor_name=self.op.hypervisor,
6200 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6204 def _ReadExportInfo(self):
6205 """Reads the export information from disk.
6207 It will override the opcode source node and path with the actual
6208 information, if these two were not specified before.
6210 @return: the export information
6213 assert self.op.mode == constants.INSTANCE_IMPORT
6215 src_node = self.op.src_node
6216 src_path = self.op.src_path
6218 if src_node is None:
6219 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6220 exp_list = self.rpc.call_export_list(locked_nodes)
6222 for node in exp_list:
6223 if exp_list[node].fail_msg:
6225 if src_path in exp_list[node].payload:
6227 self.op.src_node = src_node = node
6228 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6232 raise errors.OpPrereqError("No export found for relative path %s" %
6233 src_path, errors.ECODE_INVAL)
6235 _CheckNodeOnline(self, src_node)
6236 result = self.rpc.call_export_info(src_node, src_path)
6237 result.Raise("No export or invalid export found in dir %s" % src_path)
6239 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6240 if not export_info.has_section(constants.INISECT_EXP):
6241 raise errors.ProgrammerError("Corrupted export config",
6242 errors.ECODE_ENVIRON)
6244 ei_version = export_info.get(constants.INISECT_EXP, "version")
6245 if (int(ei_version) != constants.EXPORT_VERSION):
6246 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6247 (ei_version, constants.EXPORT_VERSION),
6248 errors.ECODE_ENVIRON)
6251 def _ReadExportParams(self, einfo):
6252 """Use export parameters as defaults.
6254 In case the opcode doesn't specify (as in override) some instance
6255 parameters, then try to use them from the export information, if
6259 if self.op.disk_template is None:
6260 if einfo.has_option(constants.INISECT_INS, "disk_template"):
6261 self.op.disk_template = einfo.get(constants.INISECT_INS,
6264 raise errors.OpPrereqError("No disk template specified and the export"
6265 " is missing the disk_template information",
6268 if not self.op.disks:
6269 if einfo.has_option(constants.INISECT_INS, "disk_count"):
6271 # TODO: import the disk iv_name too
6272 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6273 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6274 disks.append({"size": disk_sz})
6275 self.op.disks = disks
6277 raise errors.OpPrereqError("No disk info specified and the export"
6278 " is missing the disk information",
6281 if (not self.op.nics and
6282 einfo.has_option(constants.INISECT_INS, "nic_count")):
6284 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6286 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6287 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6292 if (self.op.hypervisor is None and
6293 einfo.has_option(constants.INISECT_INS, "hypervisor")):
6294 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6295 if einfo.has_section(constants.INISECT_HYP):
6296 # use the export parameters but do not override the ones
6297 # specified by the user
6298 for name, value in einfo.items(constants.INISECT_HYP):
6299 if name not in self.op.hvparams:
6300 self.op.hvparams[name] = value
6302 if einfo.has_section(constants.INISECT_BEP):
6303 # use the parameters, without overriding
6304 for name, value in einfo.items(constants.INISECT_BEP):
6305 if name not in self.op.beparams:
6306 self.op.beparams[name] = value
6308 # try to read the parameters old style, from the main section
6309 for name in constants.BES_PARAMETERS:
6310 if (name not in self.op.beparams and
6311 einfo.has_option(constants.INISECT_INS, name)):
6312 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6314 def CheckPrereq(self):
6315 """Check prerequisites.
6318 if self.op.mode == constants.INSTANCE_IMPORT:
6319 export_info = self._ReadExportInfo()
6320 self._ReadExportParams(export_info)
6322 _CheckDiskTemplate(self.op.disk_template)
6324 if (not self.cfg.GetVGName() and
6325 self.op.disk_template not in constants.DTS_NOT_LVM):
6326 raise errors.OpPrereqError("Cluster does not support lvm-based"
6327 " instances", errors.ECODE_STATE)
6329 if self.op.hypervisor is None:
6330 self.op.hypervisor = self.cfg.GetHypervisorType()
6332 cluster = self.cfg.GetClusterInfo()
6333 enabled_hvs = cluster.enabled_hypervisors
6334 if self.op.hypervisor not in enabled_hvs:
6335 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6336 " cluster (%s)" % (self.op.hypervisor,
6337 ",".join(enabled_hvs)),
6340 # check hypervisor parameter syntax (locally)
6341 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6342 filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
6344 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6345 hv_type.CheckParameterSyntax(filled_hvp)
6346 self.hv_full = filled_hvp
6347 # check that we don't specify global parameters on an instance
6348 _CheckGlobalHvParams(self.op.hvparams)
6350 # fill and remember the beparams dict
6351 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6352 self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
6357 for idx, nic in enumerate(self.op.nics):
6358 nic_mode_req = nic.get("mode", None)
6359 nic_mode = nic_mode_req
6360 if nic_mode is None:
6361 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6363 # in routed mode, for the first nic, the default ip is 'auto'
6364 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6365 default_ip_mode = constants.VALUE_AUTO
6367 default_ip_mode = constants.VALUE_NONE
6369 # ip validity checks
6370 ip = nic.get("ip", default_ip_mode)
6371 if ip is None or ip.lower() == constants.VALUE_NONE:
6373 elif ip.lower() == constants.VALUE_AUTO:
6374 if not self.op.name_check:
6375 raise errors.OpPrereqError("IP address set to auto but name checks"
6376 " have been skipped. Aborting.",
6378 nic_ip = self.hostname1.ip
6380 if not utils.IsValidIP(ip):
6381 raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6382 " like a valid IP" % ip,
6386 # TODO: check the ip address for uniqueness
6387 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6388 raise errors.OpPrereqError("Routed nic mode requires an ip address",
6391 # MAC address verification
6392 mac = nic.get("mac", constants.VALUE_AUTO)
6393 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6394 mac = utils.NormalizeAndValidateMac(mac)
6397 self.cfg.ReserveMAC(mac, self.proc.GetECId())
6398 except errors.ReservationError:
6399 raise errors.OpPrereqError("MAC address %s already in use"
6400 " in cluster" % mac,
6401 errors.ECODE_NOTUNIQUE)
6403 # bridge verification
6404 bridge = nic.get("bridge", None)
6405 link = nic.get("link", None)
6407 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6408 " at the same time", errors.ECODE_INVAL)
6409 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6410 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6417 nicparams[constants.NIC_MODE] = nic_mode_req
6419 nicparams[constants.NIC_LINK] = link
6421 check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
6423 objects.NIC.CheckParameterSyntax(check_params)
6424 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6426 # disk checks/pre-build
6428 for disk in self.op.disks:
6429 mode = disk.get("mode", constants.DISK_RDWR)
6430 if mode not in constants.DISK_ACCESS_SET:
6431 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6432 mode, errors.ECODE_INVAL)
6433 size = disk.get("size", None)
6435 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6438 except (TypeError, ValueError):
6439 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6441 new_disk = {"size": size, "mode": mode}
6443 new_disk["adopt"] = disk["adopt"]
6444 self.disks.append(new_disk)
6446 if self.op.mode == constants.INSTANCE_IMPORT:
6448 # Check that the new instance doesn't have less disks than the export
6449 instance_disks = len(self.disks)
6450 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6451 if instance_disks < export_disks:
6452 raise errors.OpPrereqError("Not enough disks to import."
6453 " (instance: %d, export: %d)" %
6454 (instance_disks, export_disks),
6457 self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
6459 for idx in range(export_disks):
6460 option = 'disk%d_dump' % idx
6461 if export_info.has_option(constants.INISECT_INS, option):
6462 # FIXME: are the old os-es, disk sizes, etc. useful?
6463 export_name = export_info.get(constants.INISECT_INS, option)
6464 image = utils.PathJoin(self.op.src_path, export_name)
6465 disk_images.append(image)
6467 disk_images.append(False)
6469 self.src_images = disk_images
6471 old_name = export_info.get(constants.INISECT_INS, 'name')
6473 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6474 except (TypeError, ValueError), err:
6475 raise errors.OpPrereqError("Invalid export file, nic_count is not"
6476 " an integer: %s" % str(err),
6478 if self.op.instance_name == old_name:
6479 for idx, nic in enumerate(self.nics):
6480 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6481 nic_mac_ini = 'nic%d_mac' % idx
6482 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6484 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6486 # ip ping checks (we use the same ip that was resolved in ExpandNames)
6487 if self.op.ip_check:
6488 if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6489 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6490 (self.check_ip, self.op.instance_name),
6491 errors.ECODE_NOTUNIQUE)
6493 #### mac address generation
6494 # By generating here the mac address both the allocator and the hooks get
6495 # the real final mac address rather than the 'auto' or 'generate' value.
6496 # There is a race condition between the generation and the instance object
6497 # creation, which means that we know the mac is valid now, but we're not
6498 # sure it will be when we actually add the instance. If things go bad
6499 # adding the instance will abort because of a duplicate mac, and the
6500 # creation job will fail.
6501 for nic in self.nics:
6502 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6503 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6507 if self.op.iallocator is not None:
6508 self._RunAllocator()
6510 #### node related checks
6512 # check primary node
6513 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6514 assert self.pnode is not None, \
6515 "Cannot retrieve locked node %s" % self.op.pnode
6517 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6518 pnode.name, errors.ECODE_STATE)
6520 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6521 pnode.name, errors.ECODE_STATE)
6523 self.secondaries = []
6525 # mirror node verification
6526 if self.op.disk_template in constants.DTS_NET_MIRROR:
6527 if self.op.snode is None:
6528 raise errors.OpPrereqError("The networked disk templates need"
6529 " a mirror node", errors.ECODE_INVAL)
6530 if self.op.snode == pnode.name:
6531 raise errors.OpPrereqError("The secondary node cannot be the"
6532 " primary node.", errors.ECODE_INVAL)
6533 _CheckNodeOnline(self, self.op.snode)
6534 _CheckNodeNotDrained(self, self.op.snode)
6535 self.secondaries.append(self.op.snode)
6537 nodenames = [pnode.name] + self.secondaries
6539 req_size = _ComputeDiskSize(self.op.disk_template,
6542 # Check lv size requirements, if not adopting
6543 if req_size is not None and not self.adopt_disks:
6544 _CheckNodesFreeDisk(self, nodenames, req_size)
6546 if self.adopt_disks: # instead, we must check the adoption data
6547 all_lvs = set([i["adopt"] for i in self.disks])
6548 if len(all_lvs) != len(self.disks):
6549 raise errors.OpPrereqError("Duplicate volume names given for adoption",
6551 for lv_name in all_lvs:
6553 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6554 except errors.ReservationError:
6555 raise errors.OpPrereqError("LV named %s used by another instance" %
6556 lv_name, errors.ECODE_NOTUNIQUE)
6558 node_lvs = self.rpc.call_lv_list([pnode.name],
6559 self.cfg.GetVGName())[pnode.name]
6560 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6561 node_lvs = node_lvs.payload
6562 delta = all_lvs.difference(node_lvs.keys())
6564 raise errors.OpPrereqError("Missing logical volume(s): %s" %
6565 utils.CommaJoin(delta),
6567 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6569 raise errors.OpPrereqError("Online logical volumes found, cannot"
6570 " adopt: %s" % utils.CommaJoin(online_lvs),
6572 # update the size of disk based on what is found
6573 for dsk in self.disks:
6574 dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6576 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6578 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6580 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6582 # memory check on primary node
6584 _CheckNodeFreeMemory(self, self.pnode.name,
6585 "creating instance %s" % self.op.instance_name,
6586 self.be_full[constants.BE_MEMORY],
6589 self.dry_run_result = list(nodenames)
6591 def Exec(self, feedback_fn):
6592 """Create and add the instance to the cluster.
6595 instance = self.op.instance_name
6596 pnode_name = self.pnode.name
6598 ht_kind = self.op.hypervisor
6599 if ht_kind in constants.HTS_REQ_PORT:
6600 network_port = self.cfg.AllocatePort()
6604 ##if self.op.vnc_bind_address is None:
6605 ## self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
6607 # this is needed because os.path.join does not accept None arguments
6608 if self.op.file_storage_dir is None:
6609 string_file_storage_dir = ""
6611 string_file_storage_dir = self.op.file_storage_dir
6613 # build the full file storage dir path
6614 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6615 string_file_storage_dir, instance)
6618 disks = _GenerateDiskTemplate(self,
6619 self.op.disk_template,
6620 instance, pnode_name,
6624 self.op.file_driver,
6627 iobj = objects.Instance(name=instance, os=self.op.os_type,
6628 primary_node=pnode_name,
6629 nics=self.nics, disks=disks,
6630 disk_template=self.op.disk_template,
6632 network_port=network_port,
6633 beparams=self.op.beparams,
6634 hvparams=self.op.hvparams,
6635 hypervisor=self.op.hypervisor,
6638 if self.adopt_disks:
6639 # rename LVs to the newly-generated names; we need to construct
6640 # 'fake' LV disks with the old data, plus the new unique_id
6641 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6643 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6644 rename_to.append(t_dsk.logical_id)
6645 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6646 self.cfg.SetDiskID(t_dsk, pnode_name)
6647 result = self.rpc.call_blockdev_rename(pnode_name,
6648 zip(tmp_disks, rename_to))
6649 result.Raise("Failed to rename adoped LVs")
6651 feedback_fn("* creating instance disks...")
6653 _CreateDisks(self, iobj)
6654 except errors.OpExecError:
6655 self.LogWarning("Device creation failed, reverting...")
6657 _RemoveDisks(self, iobj)
6659 self.cfg.ReleaseDRBDMinors(instance)
6662 feedback_fn("adding instance %s to cluster config" % instance)
6664 self.cfg.AddInstance(iobj, self.proc.GetECId())
6666 # Declare that we don't want to remove the instance lock anymore, as we've
6667 # added the instance to the config
6668 del self.remove_locks[locking.LEVEL_INSTANCE]
6669 # Unlock all the nodes
6670 if self.op.mode == constants.INSTANCE_IMPORT:
6671 nodes_keep = [self.op.src_node]
6672 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6673 if node != self.op.src_node]
6674 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6675 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6677 self.context.glm.release(locking.LEVEL_NODE)
6678 del self.acquired_locks[locking.LEVEL_NODE]
6680 if self.op.wait_for_sync:
6681 disk_abort = not _WaitForSync(self, iobj)
6682 elif iobj.disk_template in constants.DTS_NET_MIRROR:
6683 # make sure the disks are not degraded (still sync-ing is ok)
6685 feedback_fn("* checking mirrors status")
6686 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6691 _RemoveDisks(self, iobj)
6692 self.cfg.RemoveInstance(iobj.name)
6693 # Make sure the instance lock gets removed
6694 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6695 raise errors.OpExecError("There are some degraded disks for"
6698 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6699 if self.op.mode == constants.INSTANCE_CREATE:
6700 if not self.op.no_install:
6701 feedback_fn("* running the instance OS create scripts...")
6702 # FIXME: pass debug option from opcode to backend
6703 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6704 self.op.debug_level)
6705 result.Raise("Could not add os for instance %s"
6706 " on node %s" % (instance, pnode_name))
6708 elif self.op.mode == constants.INSTANCE_IMPORT:
6709 feedback_fn("* running the instance OS import scripts...")
6710 src_node = self.op.src_node
6711 src_images = self.src_images
6712 cluster_name = self.cfg.GetClusterName()
6713 # FIXME: pass debug option from opcode to backend
6714 import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
6715 src_node, src_images,
6717 self.op.debug_level)
6718 msg = import_result.fail_msg
6720 self.LogWarning("Error while importing the disk images for instance"
6721 " %s on node %s: %s" % (instance, pnode_name, msg))
6723 # also checked in the prereq part
6724 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6728 iobj.admin_up = True
6729 self.cfg.Update(iobj, feedback_fn)
6730 logging.info("Starting instance %s on node %s", instance, pnode_name)
6731 feedback_fn("* starting instance...")
6732 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6733 result.Raise("Could not start instance")
6735 return list(iobj.all_nodes)
6738 class LUConnectConsole(NoHooksLU):
6739 """Connect to an instance's console.
6741 This is somewhat special in that it returns the command line that
6742 you need to run on the master node in order to connect to the
6746 _OP_REQP = ["instance_name"]
6749 def ExpandNames(self):
6750 self._ExpandAndLockInstance()
6752 def CheckPrereq(self):
6753 """Check prerequisites.
6755 This checks that the instance is in the cluster.
6758 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6759 assert self.instance is not None, \
6760 "Cannot retrieve locked instance %s" % self.op.instance_name
6761 _CheckNodeOnline(self, self.instance.primary_node)
6763 def Exec(self, feedback_fn):
6764 """Connect to the console of an instance
6767 instance = self.instance
6768 node = instance.primary_node
6770 node_insts = self.rpc.call_instance_list([node],
6771 [instance.hypervisor])[node]
6772 node_insts.Raise("Can't get node information from %s" % node)
6774 if instance.name not in node_insts.payload:
6775 raise errors.OpExecError("Instance %s is not running." % instance.name)
6777 logging.debug("Connecting to console of %s on %s", instance.name, node)
6779 hyper = hypervisor.GetHypervisor(instance.hypervisor)
6780 cluster = self.cfg.GetClusterInfo()
6781 # beparams and hvparams are passed separately, to avoid editing the
6782 # instance and then saving the defaults in the instance itself.
6783 hvparams = cluster.FillHV(instance)
6784 beparams = cluster.FillBE(instance)
6785 console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6788 return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6791 class LUReplaceDisks(LogicalUnit):
6792 """Replace the disks of an instance.
6795 HPATH = "mirrors-replace"
6796 HTYPE = constants.HTYPE_INSTANCE
6797 _OP_REQP = ["instance_name", "mode", "disks"]
6800 def CheckArguments(self):
6801 if not hasattr(self.op, "remote_node"):
6802 self.op.remote_node = None
6803 if not hasattr(self.op, "iallocator"):
6804 self.op.iallocator = None
6805 if not hasattr(self.op, "early_release"):
6806 self.op.early_release = False
6808 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6811 def ExpandNames(self):
6812 self._ExpandAndLockInstance()
6814 if self.op.iallocator is not None:
6815 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6817 elif self.op.remote_node is not None:
6818 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6819 self.op.remote_node = remote_node
6821 # Warning: do not remove the locking of the new secondary here
6822 # unless DRBD8.AddChildren is changed to work in parallel;
6823 # currently it doesn't since parallel invocations of
6824 # FindUnusedMinor will conflict
6825 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6826 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6829 self.needed_locks[locking.LEVEL_NODE] = []
6830 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6832 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6833 self.op.iallocator, self.op.remote_node,
6834 self.op.disks, False, self.op.early_release)
6836 self.tasklets = [self.replacer]
6838 def DeclareLocks(self, level):
6839 # If we're not already locking all nodes in the set we have to declare the
6840 # instance's primary/secondary nodes.
6841 if (level == locking.LEVEL_NODE and
6842 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6843 self._LockInstancesNodes()
6845 def BuildHooksEnv(self):
6848 This runs on the master, the primary and all the secondaries.
6851 instance = self.replacer.instance
6853 "MODE": self.op.mode,
6854 "NEW_SECONDARY": self.op.remote_node,
6855 "OLD_SECONDARY": instance.secondary_nodes[0],
6857 env.update(_BuildInstanceHookEnvByObject(self, instance))
6859 self.cfg.GetMasterNode(),
6860 instance.primary_node,
6862 if self.op.remote_node is not None:
6863 nl.append(self.op.remote_node)
6867 class LUEvacuateNode(LogicalUnit):
6868 """Relocate the secondary instances from a node.
6871 HPATH = "node-evacuate"
6872 HTYPE = constants.HTYPE_NODE
6873 _OP_REQP = ["node_name"]
6876 def CheckArguments(self):
6877 if not hasattr(self.op, "remote_node"):
6878 self.op.remote_node = None
6879 if not hasattr(self.op, "iallocator"):
6880 self.op.iallocator = None
6881 if not hasattr(self.op, "early_release"):
6882 self.op.early_release = False
6884 TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6885 self.op.remote_node,
6888 def ExpandNames(self):
6889 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6891 self.needed_locks = {}
6893 # Declare node locks
6894 if self.op.iallocator is not None:
6895 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6897 elif self.op.remote_node is not None:
6898 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6900 # Warning: do not remove the locking of the new secondary here
6901 # unless DRBD8.AddChildren is changed to work in parallel;
6902 # currently it doesn't since parallel invocations of
6903 # FindUnusedMinor will conflict
6904 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
6905 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6908 raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
6910 # Create tasklets for replacing disks for all secondary instances on this
6915 for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
6916 logging.debug("Replacing disks for instance %s", inst.name)
6917 names.append(inst.name)
6919 replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
6920 self.op.iallocator, self.op.remote_node, [],
6921 True, self.op.early_release)
6922 tasklets.append(replacer)
6924 self.tasklets = tasklets
6925 self.instance_names = names
6927 # Declare instance locks
6928 self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
6930 def DeclareLocks(self, level):
6931 # If we're not already locking all nodes in the set we have to declare the
6932 # instance's primary/secondary nodes.
6933 if (level == locking.LEVEL_NODE and
6934 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6935 self._LockInstancesNodes()
6937 def BuildHooksEnv(self):
6940 This runs on the master, the primary and all the secondaries.
6944 "NODE_NAME": self.op.node_name,
6947 nl = [self.cfg.GetMasterNode()]
6949 if self.op.remote_node is not None:
6950 env["NEW_SECONDARY"] = self.op.remote_node
6951 nl.append(self.op.remote_node)
6953 return (env, nl, nl)
6956 class TLReplaceDisks(Tasklet):
6957 """Replaces disks for an instance.
6959 Note: Locking is not within the scope of this class.
6962 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
6963 disks, delay_iallocator, early_release):
6964 """Initializes this class.
6967 Tasklet.__init__(self, lu)
6970 self.instance_name = instance_name
6972 self.iallocator_name = iallocator_name
6973 self.remote_node = remote_node
6975 self.delay_iallocator = delay_iallocator
6976 self.early_release = early_release
6979 self.instance = None
6980 self.new_node = None
6981 self.target_node = None
6982 self.other_node = None
6983 self.remote_node_info = None
6984 self.node_secondary_ip = None
6987 def CheckArguments(mode, remote_node, iallocator):
6988 """Helper function for users of this class.
6991 # check for valid parameter combination
6992 if mode == constants.REPLACE_DISK_CHG:
6993 if remote_node is None and iallocator is None:
6994 raise errors.OpPrereqError("When changing the secondary either an"
6995 " iallocator script must be used or the"
6996 " new node given", errors.ECODE_INVAL)
6998 if remote_node is not None and iallocator is not None:
6999 raise errors.OpPrereqError("Give either the iallocator or the new"
7000 " secondary, not both", errors.ECODE_INVAL)
7002 elif remote_node is not None or iallocator is not None:
7003 # Not replacing the secondary
7004 raise errors.OpPrereqError("The iallocator and new node options can"
7005 " only be used when changing the"
7006 " secondary node", errors.ECODE_INVAL)
7009 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7010 """Compute a new secondary node using an IAllocator.
7013 ial = IAllocator(lu.cfg, lu.rpc,
7014 mode=constants.IALLOCATOR_MODE_RELOC,
7016 relocate_from=relocate_from)
7018 ial.Run(iallocator_name)
7021 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7022 " %s" % (iallocator_name, ial.info),
7025 if len(ial.result) != ial.required_nodes:
7026 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7027 " of nodes (%s), required %s" %
7029 len(ial.result), ial.required_nodes),
7032 remote_node_name = ial.result[0]
7034 lu.LogInfo("Selected new secondary for instance '%s': %s",
7035 instance_name, remote_node_name)
7037 return remote_node_name
7039 def _FindFaultyDisks(self, node_name):
7040 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7043 def CheckPrereq(self):
7044 """Check prerequisites.
7046 This checks that the instance is in the cluster.
7049 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7050 assert instance is not None, \
7051 "Cannot retrieve locked instance %s" % self.instance_name
7053 if instance.disk_template != constants.DT_DRBD8:
7054 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7055 " instances", errors.ECODE_INVAL)
7057 if len(instance.secondary_nodes) != 1:
7058 raise errors.OpPrereqError("The instance has a strange layout,"
7059 " expected one secondary but found %d" %
7060 len(instance.secondary_nodes),
7063 if not self.delay_iallocator:
7064 self._CheckPrereq2()
7066 def _CheckPrereq2(self):
7067 """Check prerequisites, second part.
7069 This function should always be part of CheckPrereq. It was separated and is
7070 now called from Exec because during node evacuation iallocator was only
7071 called with an unmodified cluster model, not taking planned changes into
7075 instance = self.instance
7076 secondary_node = instance.secondary_nodes[0]
7078 if self.iallocator_name is None:
7079 remote_node = self.remote_node
7081 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7082 instance.name, instance.secondary_nodes)
7084 if remote_node is not None:
7085 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7086 assert self.remote_node_info is not None, \
7087 "Cannot retrieve locked node %s" % remote_node
7089 self.remote_node_info = None
7091 if remote_node == self.instance.primary_node:
7092 raise errors.OpPrereqError("The specified node is the primary node of"
7093 " the instance.", errors.ECODE_INVAL)
7095 if remote_node == secondary_node:
7096 raise errors.OpPrereqError("The specified node is already the"
7097 " secondary node of the instance.",
7100 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7101 constants.REPLACE_DISK_CHG):
7102 raise errors.OpPrereqError("Cannot specify disks to be replaced",
7105 if self.mode == constants.REPLACE_DISK_AUTO:
7106 faulty_primary = self._FindFaultyDisks(instance.primary_node)
7107 faulty_secondary = self._FindFaultyDisks(secondary_node)
7109 if faulty_primary and faulty_secondary:
7110 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7111 " one node and can not be repaired"
7112 " automatically" % self.instance_name,
7116 self.disks = faulty_primary
7117 self.target_node = instance.primary_node
7118 self.other_node = secondary_node
7119 check_nodes = [self.target_node, self.other_node]
7120 elif faulty_secondary:
7121 self.disks = faulty_secondary
7122 self.target_node = secondary_node
7123 self.other_node = instance.primary_node
7124 check_nodes = [self.target_node, self.other_node]
7130 # Non-automatic modes
7131 if self.mode == constants.REPLACE_DISK_PRI:
7132 self.target_node = instance.primary_node
7133 self.other_node = secondary_node
7134 check_nodes = [self.target_node, self.other_node]
7136 elif self.mode == constants.REPLACE_DISK_SEC:
7137 self.target_node = secondary_node
7138 self.other_node = instance.primary_node
7139 check_nodes = [self.target_node, self.other_node]
7141 elif self.mode == constants.REPLACE_DISK_CHG:
7142 self.new_node = remote_node
7143 self.other_node = instance.primary_node
7144 self.target_node = secondary_node
7145 check_nodes = [self.new_node, self.other_node]
7147 _CheckNodeNotDrained(self.lu, remote_node)
7149 old_node_info = self.cfg.GetNodeInfo(secondary_node)
7150 assert old_node_info is not None
7151 if old_node_info.offline and not self.early_release:
7152 # doesn't make sense to delay the release
7153 self.early_release = True
7154 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7155 " early-release mode", secondary_node)
7158 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7161 # If not specified all disks should be replaced
7163 self.disks = range(len(self.instance.disks))
7165 for node in check_nodes:
7166 _CheckNodeOnline(self.lu, node)
7168 # Check whether disks are valid
7169 for disk_idx in self.disks:
7170 instance.FindDisk(disk_idx)
7172 # Get secondary node IP addresses
7175 for node_name in [self.target_node, self.other_node, self.new_node]:
7176 if node_name is not None:
7177 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7179 self.node_secondary_ip = node_2nd_ip
7181 def Exec(self, feedback_fn):
7182 """Execute disk replacement.
7184 This dispatches the disk replacement to the appropriate handler.
7187 if self.delay_iallocator:
7188 self._CheckPrereq2()
7191 feedback_fn("No disks need replacement")
7194 feedback_fn("Replacing disk(s) %s for %s" %
7195 (utils.CommaJoin(self.disks), self.instance.name))
7197 activate_disks = (not self.instance.admin_up)
7199 # Activate the instance disks if we're replacing them on a down instance
7201 _StartInstanceDisks(self.lu, self.instance, True)
7204 # Should we replace the secondary node?
7205 if self.new_node is not None:
7206 fn = self._ExecDrbd8Secondary
7208 fn = self._ExecDrbd8DiskOnly
7210 return fn(feedback_fn)
7213 # Deactivate the instance disks if we're replacing them on a
7216 _SafeShutdownInstanceDisks(self.lu, self.instance)
7218 def _CheckVolumeGroup(self, nodes):
7219 self.lu.LogInfo("Checking volume groups")
7221 vgname = self.cfg.GetVGName()
7223 # Make sure volume group exists on all involved nodes
7224 results = self.rpc.call_vg_list(nodes)
7226 raise errors.OpExecError("Can't list volume groups on the nodes")
7230 res.Raise("Error checking node %s" % node)
7231 if vgname not in res.payload:
7232 raise errors.OpExecError("Volume group '%s' not found on node %s" %
7235 def _CheckDisksExistence(self, nodes):
7236 # Check disk existence
7237 for idx, dev in enumerate(self.instance.disks):
7238 if idx not in self.disks:
7242 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7243 self.cfg.SetDiskID(dev, node)
7245 result = self.rpc.call_blockdev_find(node, dev)
7247 msg = result.fail_msg
7248 if msg or not result.payload:
7250 msg = "disk not found"
7251 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7254 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7255 for idx, dev in enumerate(self.instance.disks):
7256 if idx not in self.disks:
7259 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7262 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7264 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7265 " replace disks for instance %s" %
7266 (node_name, self.instance.name))
7268 def _CreateNewStorage(self, node_name):
7269 vgname = self.cfg.GetVGName()
7272 for idx, dev in enumerate(self.instance.disks):
7273 if idx not in self.disks:
7276 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7278 self.cfg.SetDiskID(dev, node_name)
7280 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7281 names = _GenerateUniqueNames(self.lu, lv_names)
7283 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7284 logical_id=(vgname, names[0]))
7285 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7286 logical_id=(vgname, names[1]))
7288 new_lvs = [lv_data, lv_meta]
7289 old_lvs = dev.children
7290 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7292 # we pass force_create=True to force the LVM creation
7293 for new_lv in new_lvs:
7294 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7295 _GetInstanceInfoText(self.instance), False)
7299 def _CheckDevices(self, node_name, iv_names):
7300 for name, (dev, _, _) in iv_names.iteritems():
7301 self.cfg.SetDiskID(dev, node_name)
7303 result = self.rpc.call_blockdev_find(node_name, dev)
7305 msg = result.fail_msg
7306 if msg or not result.payload:
7308 msg = "disk not found"
7309 raise errors.OpExecError("Can't find DRBD device %s: %s" %
7312 if result.payload.is_degraded:
7313 raise errors.OpExecError("DRBD device %s is degraded!" % name)
7315 def _RemoveOldStorage(self, node_name, iv_names):
7316 for name, (_, old_lvs, _) in iv_names.iteritems():
7317 self.lu.LogInfo("Remove logical volumes for %s" % name)
7320 self.cfg.SetDiskID(lv, node_name)
7322 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7324 self.lu.LogWarning("Can't remove old LV: %s" % msg,
7325 hint="remove unused LVs manually")
7327 def _ReleaseNodeLock(self, node_name):
7328 """Releases the lock for a given node."""
7329 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7331 def _ExecDrbd8DiskOnly(self, feedback_fn):
7332 """Replace a disk on the primary or secondary for DRBD 8.
7334 The algorithm for replace is quite complicated:
7336 1. for each disk to be replaced:
7338 1. create new LVs on the target node with unique names
7339 1. detach old LVs from the drbd device
7340 1. rename old LVs to name_replaced.<time_t>
7341 1. rename new LVs to old LVs
7342 1. attach the new LVs (with the old names now) to the drbd device
7344 1. wait for sync across all devices
7346 1. for each modified disk:
7348 1. remove old LVs (which have the name name_replaces.<time_t>)
7350 Failures are not very well handled.
7355 # Step: check device activation
7356 self.lu.LogStep(1, steps_total, "Check device existence")
7357 self._CheckDisksExistence([self.other_node, self.target_node])
7358 self._CheckVolumeGroup([self.target_node, self.other_node])
7360 # Step: check other node consistency
7361 self.lu.LogStep(2, steps_total, "Check peer consistency")
7362 self._CheckDisksConsistency(self.other_node,
7363 self.other_node == self.instance.primary_node,
7366 # Step: create new storage
7367 self.lu.LogStep(3, steps_total, "Allocate new storage")
7368 iv_names = self._CreateNewStorage(self.target_node)
7370 # Step: for each lv, detach+rename*2+attach
7371 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7372 for dev, old_lvs, new_lvs in iv_names.itervalues():
7373 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7375 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7377 result.Raise("Can't detach drbd from local storage on node"
7378 " %s for device %s" % (self.target_node, dev.iv_name))
7380 #cfg.Update(instance)
7382 # ok, we created the new LVs, so now we know we have the needed
7383 # storage; as such, we proceed on the target node to rename
7384 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7385 # using the assumption that logical_id == physical_id (which in
7386 # turn is the unique_id on that node)
7388 # FIXME(iustin): use a better name for the replaced LVs
7389 temp_suffix = int(time.time())
7390 ren_fn = lambda d, suff: (d.physical_id[0],
7391 d.physical_id[1] + "_replaced-%s" % suff)
7393 # Build the rename list based on what LVs exist on the node
7394 rename_old_to_new = []
7395 for to_ren in old_lvs:
7396 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7397 if not result.fail_msg and result.payload:
7399 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7401 self.lu.LogInfo("Renaming the old LVs on the target node")
7402 result = self.rpc.call_blockdev_rename(self.target_node,
7404 result.Raise("Can't rename old LVs on node %s" % self.target_node)
7406 # Now we rename the new LVs to the old LVs
7407 self.lu.LogInfo("Renaming the new LVs on the target node")
7408 rename_new_to_old = [(new, old.physical_id)
7409 for old, new in zip(old_lvs, new_lvs)]
7410 result = self.rpc.call_blockdev_rename(self.target_node,
7412 result.Raise("Can't rename new LVs on node %s" % self.target_node)
7414 for old, new in zip(old_lvs, new_lvs):
7415 new.logical_id = old.logical_id
7416 self.cfg.SetDiskID(new, self.target_node)
7418 for disk in old_lvs:
7419 disk.logical_id = ren_fn(disk, temp_suffix)
7420 self.cfg.SetDiskID(disk, self.target_node)
7422 # Now that the new lvs have the old name, we can add them to the device
7423 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7424 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7426 msg = result.fail_msg
7428 for new_lv in new_lvs:
7429 msg2 = self.rpc.call_blockdev_remove(self.target_node,
7432 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7433 hint=("cleanup manually the unused logical"
7435 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7437 dev.children = new_lvs
7439 self.cfg.Update(self.instance, feedback_fn)
7442 if self.early_release:
7443 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7445 self._RemoveOldStorage(self.target_node, iv_names)
7446 # WARNING: we release both node locks here, do not do other RPCs
7447 # than WaitForSync to the primary node
7448 self._ReleaseNodeLock([self.target_node, self.other_node])
7451 # This can fail as the old devices are degraded and _WaitForSync
7452 # does a combined result over all disks, so we don't check its return value
7453 self.lu.LogStep(cstep, steps_total, "Sync devices")
7455 _WaitForSync(self.lu, self.instance)
7457 # Check all devices manually
7458 self._CheckDevices(self.instance.primary_node, iv_names)
7460 # Step: remove old storage
7461 if not self.early_release:
7462 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7464 self._RemoveOldStorage(self.target_node, iv_names)
7466 def _ExecDrbd8Secondary(self, feedback_fn):
7467 """Replace the secondary node for DRBD 8.
7469 The algorithm for replace is quite complicated:
7470 - for all disks of the instance:
7471 - create new LVs on the new node with same names
7472 - shutdown the drbd device on the old secondary
7473 - disconnect the drbd network on the primary
7474 - create the drbd device on the new secondary
7475 - network attach the drbd on the primary, using an artifice:
7476 the drbd code for Attach() will connect to the network if it
7477 finds a device which is connected to the good local disks but
7479 - wait for sync across all devices
7480 - remove all disks from the old secondary
7482 Failures are not very well handled.
7487 # Step: check device activation
7488 self.lu.LogStep(1, steps_total, "Check device existence")
7489 self._CheckDisksExistence([self.instance.primary_node])
7490 self._CheckVolumeGroup([self.instance.primary_node])
7492 # Step: check other node consistency
7493 self.lu.LogStep(2, steps_total, "Check peer consistency")
7494 self._CheckDisksConsistency(self.instance.primary_node, True, True)
7496 # Step: create new storage
7497 self.lu.LogStep(3, steps_total, "Allocate new storage")
7498 for idx, dev in enumerate(self.instance.disks):
7499 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7500 (self.new_node, idx))
7501 # we pass force_create=True to force LVM creation
7502 for new_lv in dev.children:
7503 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7504 _GetInstanceInfoText(self.instance), False)
7506 # Step 4: dbrd minors and drbd setups changes
7507 # after this, we must manually remove the drbd minors on both the
7508 # error and the success paths
7509 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7510 minors = self.cfg.AllocateDRBDMinor([self.new_node
7511 for dev in self.instance.disks],
7513 logging.debug("Allocated minors %r", minors)
7516 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7517 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7518 (self.new_node, idx))
7519 # create new devices on new_node; note that we create two IDs:
7520 # one without port, so the drbd will be activated without
7521 # networking information on the new node at this stage, and one
7522 # with network, for the latter activation in step 4
7523 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7524 if self.instance.primary_node == o_node1:
7527 assert self.instance.primary_node == o_node2, "Three-node instance?"
7530 new_alone_id = (self.instance.primary_node, self.new_node, None,
7531 p_minor, new_minor, o_secret)
7532 new_net_id = (self.instance.primary_node, self.new_node, o_port,
7533 p_minor, new_minor, o_secret)
7535 iv_names[idx] = (dev, dev.children, new_net_id)
7536 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7538 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7539 logical_id=new_alone_id,
7540 children=dev.children,
7543 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7544 _GetInstanceInfoText(self.instance), False)
7545 except errors.GenericError:
7546 self.cfg.ReleaseDRBDMinors(self.instance.name)
7549 # We have new devices, shutdown the drbd on the old secondary
7550 for idx, dev in enumerate(self.instance.disks):
7551 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7552 self.cfg.SetDiskID(dev, self.target_node)
7553 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7555 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7556 "node: %s" % (idx, msg),
7557 hint=("Please cleanup this device manually as"
7558 " soon as possible"))
7560 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7561 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7562 self.node_secondary_ip,
7563 self.instance.disks)\
7564 [self.instance.primary_node]
7566 msg = result.fail_msg
7568 # detaches didn't succeed (unlikely)
7569 self.cfg.ReleaseDRBDMinors(self.instance.name)
7570 raise errors.OpExecError("Can't detach the disks from the network on"
7571 " old node: %s" % (msg,))
7573 # if we managed to detach at least one, we update all the disks of
7574 # the instance to point to the new secondary
7575 self.lu.LogInfo("Updating instance configuration")
7576 for dev, _, new_logical_id in iv_names.itervalues():
7577 dev.logical_id = new_logical_id
7578 self.cfg.SetDiskID(dev, self.instance.primary_node)
7580 self.cfg.Update(self.instance, feedback_fn)
7582 # and now perform the drbd attach
7583 self.lu.LogInfo("Attaching primary drbds to new secondary"
7584 " (standalone => connected)")
7585 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7587 self.node_secondary_ip,
7588 self.instance.disks,
7591 for to_node, to_result in result.items():
7592 msg = to_result.fail_msg
7594 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7596 hint=("please do a gnt-instance info to see the"
7597 " status of disks"))
7599 if self.early_release:
7600 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7602 self._RemoveOldStorage(self.target_node, iv_names)
7603 # WARNING: we release all node locks here, do not do other RPCs
7604 # than WaitForSync to the primary node
7605 self._ReleaseNodeLock([self.instance.primary_node,
7610 # This can fail as the old devices are degraded and _WaitForSync
7611 # does a combined result over all disks, so we don't check its return value
7612 self.lu.LogStep(cstep, steps_total, "Sync devices")
7614 _WaitForSync(self.lu, self.instance)
7616 # Check all devices manually
7617 self._CheckDevices(self.instance.primary_node, iv_names)
7619 # Step: remove old storage
7620 if not self.early_release:
7621 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7622 self._RemoveOldStorage(self.target_node, iv_names)
7625 class LURepairNodeStorage(NoHooksLU):
7626 """Repairs the volume group on a node.
7629 _OP_REQP = ["node_name"]
7632 def CheckArguments(self):
7633 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7635 def ExpandNames(self):
7636 self.needed_locks = {
7637 locking.LEVEL_NODE: [self.op.node_name],
7640 def _CheckFaultyDisks(self, instance, node_name):
7641 """Ensure faulty disks abort the opcode or at least warn."""
7643 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7645 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7646 " node '%s'" % (instance.name, node_name),
7648 except errors.OpPrereqError, err:
7649 if self.op.ignore_consistency:
7650 self.proc.LogWarning(str(err.args[0]))
7654 def CheckPrereq(self):
7655 """Check prerequisites.
7658 storage_type = self.op.storage_type
7660 if (constants.SO_FIX_CONSISTENCY not in
7661 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7662 raise errors.OpPrereqError("Storage units of type '%s' can not be"
7663 " repaired" % storage_type,
7666 # Check whether any instance on this node has faulty disks
7667 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7668 if not inst.admin_up:
7670 check_nodes = set(inst.all_nodes)
7671 check_nodes.discard(self.op.node_name)
7672 for inst_node_name in check_nodes:
7673 self._CheckFaultyDisks(inst, inst_node_name)
7675 def Exec(self, feedback_fn):
7676 feedback_fn("Repairing storage unit '%s' on %s ..." %
7677 (self.op.name, self.op.node_name))
7679 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7680 result = self.rpc.call_storage_execute(self.op.node_name,
7681 self.op.storage_type, st_args,
7683 constants.SO_FIX_CONSISTENCY)
7684 result.Raise("Failed to repair storage unit '%s' on %s" %
7685 (self.op.name, self.op.node_name))
7688 class LUNodeEvacuationStrategy(NoHooksLU):
7689 """Computes the node evacuation strategy.
7692 _OP_REQP = ["nodes"]
7695 def CheckArguments(self):
7696 if not hasattr(self.op, "remote_node"):
7697 self.op.remote_node = None
7698 if not hasattr(self.op, "iallocator"):
7699 self.op.iallocator = None
7700 if self.op.remote_node is not None and self.op.iallocator is not None:
7701 raise errors.OpPrereqError("Give either the iallocator or the new"
7702 " secondary, not both", errors.ECODE_INVAL)
7704 def ExpandNames(self):
7705 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
7706 self.needed_locks = locks = {}
7707 if self.op.remote_node is None:
7708 locks[locking.LEVEL_NODE] = locking.ALL_SET
7710 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7711 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
7713 def CheckPrereq(self):
7716 def Exec(self, feedback_fn):
7717 if self.op.remote_node is not None:
7719 for node in self.op.nodes:
7720 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
7723 if i.primary_node == self.op.remote_node:
7724 raise errors.OpPrereqError("Node %s is the primary node of"
7725 " instance %s, cannot use it as"
7727 (self.op.remote_node, i.name),
7729 result.append([i.name, self.op.remote_node])
7731 ial = IAllocator(self.cfg, self.rpc,
7732 mode=constants.IALLOCATOR_MODE_MEVAC,
7733 evac_nodes=self.op.nodes)
7734 ial.Run(self.op.iallocator, validate=True)
7736 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
7742 class LUGrowDisk(LogicalUnit):
7743 """Grow a disk of an instance.
7747 HTYPE = constants.HTYPE_INSTANCE
7748 _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7751 def ExpandNames(self):
7752 self._ExpandAndLockInstance()
7753 self.needed_locks[locking.LEVEL_NODE] = []
7754 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7756 def DeclareLocks(self, level):
7757 if level == locking.LEVEL_NODE:
7758 self._LockInstancesNodes()
7760 def BuildHooksEnv(self):
7763 This runs on the master, the primary and all the secondaries.
7767 "DISK": self.op.disk,
7768 "AMOUNT": self.op.amount,
7770 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7771 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7774 def CheckPrereq(self):
7775 """Check prerequisites.
7777 This checks that the instance is in the cluster.
7780 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7781 assert instance is not None, \
7782 "Cannot retrieve locked instance %s" % self.op.instance_name
7783 nodenames = list(instance.all_nodes)
7784 for node in nodenames:
7785 _CheckNodeOnline(self, node)
7788 self.instance = instance
7790 if instance.disk_template not in constants.DTS_GROWABLE:
7791 raise errors.OpPrereqError("Instance's disk layout does not support"
7792 " growing.", errors.ECODE_INVAL)
7794 self.disk = instance.FindDisk(self.op.disk)
7796 if instance.disk_template != constants.DT_FILE:
7797 # TODO: check the free disk space for file, when that feature will be
7799 _CheckNodesFreeDisk(self, nodenames, self.op.amount)
7801 def Exec(self, feedback_fn):
7802 """Execute disk grow.
7805 instance = self.instance
7807 for node in instance.all_nodes:
7808 self.cfg.SetDiskID(disk, node)
7809 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7810 result.Raise("Grow request failed to node %s" % node)
7812 # TODO: Rewrite code to work properly
7813 # DRBD goes into sync mode for a short amount of time after executing the
7814 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
7815 # calling "resize" in sync mode fails. Sleeping for a short amount of
7816 # time is a work-around.
7819 disk.RecordGrow(self.op.amount)
7820 self.cfg.Update(instance, feedback_fn)
7821 if self.op.wait_for_sync:
7822 disk_abort = not _WaitForSync(self, instance)
7824 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7825 " status.\nPlease check the instance.")
7828 class LUQueryInstanceData(NoHooksLU):
7829 """Query runtime instance data.
7832 _OP_REQP = ["instances", "static"]
7835 def ExpandNames(self):
7836 self.needed_locks = {}
7837 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7839 if not isinstance(self.op.instances, list):
7840 raise errors.OpPrereqError("Invalid argument type 'instances'",
7843 if self.op.instances:
7844 self.wanted_names = []
7845 for name in self.op.instances:
7846 full_name = _ExpandInstanceName(self.cfg, name)
7847 self.wanted_names.append(full_name)
7848 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
7850 self.wanted_names = None
7851 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
7853 self.needed_locks[locking.LEVEL_NODE] = []
7854 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7856 def DeclareLocks(self, level):
7857 if level == locking.LEVEL_NODE:
7858 self._LockInstancesNodes()
7860 def CheckPrereq(self):
7861 """Check prerequisites.
7863 This only checks the optional instance list against the existing names.
7866 if self.wanted_names is None:
7867 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
7869 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
7870 in self.wanted_names]
7873 def _ComputeBlockdevStatus(self, node, instance_name, dev):
7874 """Returns the status of a block device
7877 if self.op.static or not node:
7880 self.cfg.SetDiskID(dev, node)
7882 result = self.rpc.call_blockdev_find(node, dev)
7886 result.Raise("Can't compute disk status for %s" % instance_name)
7888 status = result.payload
7892 return (status.dev_path, status.major, status.minor,
7893 status.sync_percent, status.estimated_time,
7894 status.is_degraded, status.ldisk_status)
7896 def _ComputeDiskStatus(self, instance, snode, dev):
7897 """Compute block device status.
7900 if dev.dev_type in constants.LDS_DRBD:
7901 # we change the snode then (otherwise we use the one passed in)
7902 if dev.logical_id[0] == instance.primary_node:
7903 snode = dev.logical_id[1]
7905 snode = dev.logical_id[0]
7907 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
7909 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
7912 dev_children = [self._ComputeDiskStatus(instance, snode, child)
7913 for child in dev.children]
7918 "iv_name": dev.iv_name,
7919 "dev_type": dev.dev_type,
7920 "logical_id": dev.logical_id,
7921 "physical_id": dev.physical_id,
7922 "pstatus": dev_pstatus,
7923 "sstatus": dev_sstatus,
7924 "children": dev_children,
7931 def Exec(self, feedback_fn):
7932 """Gather and return data"""
7935 cluster = self.cfg.GetClusterInfo()
7937 for instance in self.wanted_instances:
7938 if not self.op.static:
7939 remote_info = self.rpc.call_instance_info(instance.primary_node,
7941 instance.hypervisor)
7942 remote_info.Raise("Error checking node %s" % instance.primary_node)
7943 remote_info = remote_info.payload
7944 if remote_info and "state" in remote_info:
7947 remote_state = "down"
7950 if instance.admin_up:
7953 config_state = "down"
7955 disks = [self._ComputeDiskStatus(instance, None, device)
7956 for device in instance.disks]
7959 "name": instance.name,
7960 "config_state": config_state,
7961 "run_state": remote_state,
7962 "pnode": instance.primary_node,
7963 "snodes": instance.secondary_nodes,
7965 # this happens to be the same format used for hooks
7966 "nics": _NICListToTuple(self, instance.nics),
7968 "hypervisor": instance.hypervisor,
7969 "network_port": instance.network_port,
7970 "hv_instance": instance.hvparams,
7971 "hv_actual": cluster.FillHV(instance, skip_globals=True),
7972 "be_instance": instance.beparams,
7973 "be_actual": cluster.FillBE(instance),
7974 "serial_no": instance.serial_no,
7975 "mtime": instance.mtime,
7976 "ctime": instance.ctime,
7977 "uuid": instance.uuid,
7980 result[instance.name] = idict
7985 class LUSetInstanceParams(LogicalUnit):
7986 """Modifies an instances's parameters.
7989 HPATH = "instance-modify"
7990 HTYPE = constants.HTYPE_INSTANCE
7991 _OP_REQP = ["instance_name"]
7994 def CheckArguments(self):
7995 if not hasattr(self.op, 'nics'):
7997 if not hasattr(self.op, 'disks'):
7999 if not hasattr(self.op, 'beparams'):
8000 self.op.beparams = {}
8001 if not hasattr(self.op, 'hvparams'):
8002 self.op.hvparams = {}
8003 if not hasattr(self.op, "disk_template"):
8004 self.op.disk_template = None
8005 if not hasattr(self.op, "remote_node"):
8006 self.op.remote_node = None
8007 if not hasattr(self.op, "os_name"):
8008 self.op.os_name = None
8009 if not hasattr(self.op, "force_variant"):
8010 self.op.force_variant = False
8011 self.op.force = getattr(self.op, "force", False)
8012 if not (self.op.nics or self.op.disks or self.op.disk_template or
8013 self.op.hvparams or self.op.beparams or self.op.os_name):
8014 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8016 if self.op.hvparams:
8017 _CheckGlobalHvParams(self.op.hvparams)
8021 for disk_op, disk_dict in self.op.disks:
8022 if disk_op == constants.DDM_REMOVE:
8025 elif disk_op == constants.DDM_ADD:
8028 if not isinstance(disk_op, int):
8029 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8030 if not isinstance(disk_dict, dict):
8031 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8032 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8034 if disk_op == constants.DDM_ADD:
8035 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8036 if mode not in constants.DISK_ACCESS_SET:
8037 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8039 size = disk_dict.get('size', None)
8041 raise errors.OpPrereqError("Required disk parameter size missing",
8045 except (TypeError, ValueError), err:
8046 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8047 str(err), errors.ECODE_INVAL)
8048 disk_dict['size'] = size
8050 # modification of disk
8051 if 'size' in disk_dict:
8052 raise errors.OpPrereqError("Disk size change not possible, use"
8053 " grow-disk", errors.ECODE_INVAL)
8055 if disk_addremove > 1:
8056 raise errors.OpPrereqError("Only one disk add or remove operation"
8057 " supported at a time", errors.ECODE_INVAL)
8059 if self.op.disks and self.op.disk_template is not None:
8060 raise errors.OpPrereqError("Disk template conversion and other disk"
8061 " changes not supported at the same time",
8064 if self.op.disk_template:
8065 _CheckDiskTemplate(self.op.disk_template)
8066 if (self.op.disk_template in constants.DTS_NET_MIRROR and
8067 self.op.remote_node is None):
8068 raise errors.OpPrereqError("Changing the disk template to a mirrored"
8069 " one requires specifying a secondary node",
8074 for nic_op, nic_dict in self.op.nics:
8075 if nic_op == constants.DDM_REMOVE:
8078 elif nic_op == constants.DDM_ADD:
8081 if not isinstance(nic_op, int):
8082 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8083 if not isinstance(nic_dict, dict):
8084 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8085 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8087 # nic_dict should be a dict
8088 nic_ip = nic_dict.get('ip', None)
8089 if nic_ip is not None:
8090 if nic_ip.lower() == constants.VALUE_NONE:
8091 nic_dict['ip'] = None
8093 if not utils.IsValidIP(nic_ip):
8094 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8097 nic_bridge = nic_dict.get('bridge', None)
8098 nic_link = nic_dict.get('link', None)
8099 if nic_bridge and nic_link:
8100 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8101 " at the same time", errors.ECODE_INVAL)
8102 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8103 nic_dict['bridge'] = None
8104 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8105 nic_dict['link'] = None
8107 if nic_op == constants.DDM_ADD:
8108 nic_mac = nic_dict.get('mac', None)
8110 nic_dict['mac'] = constants.VALUE_AUTO
8112 if 'mac' in nic_dict:
8113 nic_mac = nic_dict['mac']
8114 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8115 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8117 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8118 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8119 " modifying an existing nic",
8122 if nic_addremove > 1:
8123 raise errors.OpPrereqError("Only one NIC add or remove operation"
8124 " supported at a time", errors.ECODE_INVAL)
8126 def ExpandNames(self):
8127 self._ExpandAndLockInstance()
8128 self.needed_locks[locking.LEVEL_NODE] = []
8129 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8131 def DeclareLocks(self, level):
8132 if level == locking.LEVEL_NODE:
8133 self._LockInstancesNodes()
8134 if self.op.disk_template and self.op.remote_node:
8135 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8136 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8138 def BuildHooksEnv(self):
8141 This runs on the master, primary and secondaries.
8145 if constants.BE_MEMORY in self.be_new:
8146 args['memory'] = self.be_new[constants.BE_MEMORY]
8147 if constants.BE_VCPUS in self.be_new:
8148 args['vcpus'] = self.be_new[constants.BE_VCPUS]
8149 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8150 # information at all.
8153 nic_override = dict(self.op.nics)
8154 c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
8155 for idx, nic in enumerate(self.instance.nics):
8156 if idx in nic_override:
8157 this_nic_override = nic_override[idx]
8159 this_nic_override = {}
8160 if 'ip' in this_nic_override:
8161 ip = this_nic_override['ip']
8164 if 'mac' in this_nic_override:
8165 mac = this_nic_override['mac']
8168 if idx in self.nic_pnew:
8169 nicparams = self.nic_pnew[idx]
8171 nicparams = objects.FillDict(c_nicparams, nic.nicparams)
8172 mode = nicparams[constants.NIC_MODE]
8173 link = nicparams[constants.NIC_LINK]
8174 args['nics'].append((ip, mac, mode, link))
8175 if constants.DDM_ADD in nic_override:
8176 ip = nic_override[constants.DDM_ADD].get('ip', None)
8177 mac = nic_override[constants.DDM_ADD]['mac']
8178 nicparams = self.nic_pnew[constants.DDM_ADD]
8179 mode = nicparams[constants.NIC_MODE]
8180 link = nicparams[constants.NIC_LINK]
8181 args['nics'].append((ip, mac, mode, link))
8182 elif constants.DDM_REMOVE in nic_override:
8183 del args['nics'][-1]
8185 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8186 if self.op.disk_template:
8187 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8188 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8192 def _GetUpdatedParams(old_params, update_dict,
8193 default_values, parameter_types):
8194 """Return the new params dict for the given params.
8196 @type old_params: dict
8197 @param old_params: old parameters
8198 @type update_dict: dict
8199 @param update_dict: dict containing new parameter values,
8200 or constants.VALUE_DEFAULT to reset the
8201 parameter to its default value
8202 @type default_values: dict
8203 @param default_values: default values for the filled parameters
8204 @type parameter_types: dict
8205 @param parameter_types: dict mapping target dict keys to types
8206 in constants.ENFORCEABLE_TYPES
8207 @rtype: (dict, dict)
8208 @return: (new_parameters, filled_parameters)
8211 params_copy = copy.deepcopy(old_params)
8212 for key, val in update_dict.iteritems():
8213 if val == constants.VALUE_DEFAULT:
8215 del params_copy[key]
8219 params_copy[key] = val
8220 utils.ForceDictType(params_copy, parameter_types)
8221 params_filled = objects.FillDict(default_values, params_copy)
8222 return (params_copy, params_filled)
8224 def CheckPrereq(self):
8225 """Check prerequisites.
8227 This only checks the instance list against the existing names.
8230 self.force = self.op.force
8232 # checking the new params on the primary/secondary nodes
8234 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8235 cluster = self.cluster = self.cfg.GetClusterInfo()
8236 assert self.instance is not None, \
8237 "Cannot retrieve locked instance %s" % self.op.instance_name
8238 pnode = instance.primary_node
8239 nodelist = list(instance.all_nodes)
8241 if self.op.disk_template:
8242 if instance.disk_template == self.op.disk_template:
8243 raise errors.OpPrereqError("Instance already has disk template %s" %
8244 instance.disk_template, errors.ECODE_INVAL)
8246 if (instance.disk_template,
8247 self.op.disk_template) not in self._DISK_CONVERSIONS:
8248 raise errors.OpPrereqError("Unsupported disk template conversion from"
8249 " %s to %s" % (instance.disk_template,
8250 self.op.disk_template),
8252 if self.op.disk_template in constants.DTS_NET_MIRROR:
8253 _CheckNodeOnline(self, self.op.remote_node)
8254 _CheckNodeNotDrained(self, self.op.remote_node)
8255 disks = [{"size": d.size} for d in instance.disks]
8256 required = _ComputeDiskSize(self.op.disk_template, disks)
8257 _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8258 _CheckInstanceDown(self, instance, "cannot change disk template")
8260 # hvparams processing
8261 if self.op.hvparams:
8262 i_hvdict, hv_new = self._GetUpdatedParams(
8263 instance.hvparams, self.op.hvparams,
8264 cluster.hvparams[instance.hypervisor],
8265 constants.HVS_PARAMETER_TYPES)
8267 hypervisor.GetHypervisor(
8268 instance.hypervisor).CheckParameterSyntax(hv_new)
8269 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8270 self.hv_new = hv_new # the new actual values
8271 self.hv_inst = i_hvdict # the new dict (without defaults)
8273 self.hv_new = self.hv_inst = {}
8275 # beparams processing
8276 if self.op.beparams:
8277 i_bedict, be_new = self._GetUpdatedParams(
8278 instance.beparams, self.op.beparams,
8279 cluster.beparams[constants.PP_DEFAULT],
8280 constants.BES_PARAMETER_TYPES)
8281 self.be_new = be_new # the new actual values
8282 self.be_inst = i_bedict # the new dict (without defaults)
8284 self.be_new = self.be_inst = {}
8288 if constants.BE_MEMORY in self.op.beparams and not self.force:
8289 mem_check_list = [pnode]
8290 if be_new[constants.BE_AUTO_BALANCE]:
8291 # either we changed auto_balance to yes or it was from before
8292 mem_check_list.extend(instance.secondary_nodes)
8293 instance_info = self.rpc.call_instance_info(pnode, instance.name,
8294 instance.hypervisor)
8295 nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8296 instance.hypervisor)
8297 pninfo = nodeinfo[pnode]
8298 msg = pninfo.fail_msg
8300 # Assume the primary node is unreachable and go ahead
8301 self.warn.append("Can't get info from primary node %s: %s" %
8303 elif not isinstance(pninfo.payload.get('memory_free', None), int):
8304 self.warn.append("Node data from primary node %s doesn't contain"
8305 " free memory information" % pnode)
8306 elif instance_info.fail_msg:
8307 self.warn.append("Can't get instance runtime information: %s" %
8308 instance_info.fail_msg)
8310 if instance_info.payload:
8311 current_mem = int(instance_info.payload['memory'])
8313 # Assume instance not running
8314 # (there is a slight race condition here, but it's not very probable,
8315 # and we have no other way to check)
8317 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8318 pninfo.payload['memory_free'])
8320 raise errors.OpPrereqError("This change will prevent the instance"
8321 " from starting, due to %d MB of memory"
8322 " missing on its primary node" % miss_mem,
8325 if be_new[constants.BE_AUTO_BALANCE]:
8326 for node, nres in nodeinfo.items():
8327 if node not in instance.secondary_nodes:
8331 self.warn.append("Can't get info from secondary node %s: %s" %
8333 elif not isinstance(nres.payload.get('memory_free', None), int):
8334 self.warn.append("Secondary node %s didn't return free"
8335 " memory information" % node)
8336 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8337 self.warn.append("Not enough memory to failover instance to"
8338 " secondary node %s" % node)
8343 for nic_op, nic_dict in self.op.nics:
8344 if nic_op == constants.DDM_REMOVE:
8345 if not instance.nics:
8346 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8349 if nic_op != constants.DDM_ADD:
8351 if not instance.nics:
8352 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8353 " no NICs" % nic_op,
8355 if nic_op < 0 or nic_op >= len(instance.nics):
8356 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8358 (nic_op, len(instance.nics) - 1),
8360 old_nic_params = instance.nics[nic_op].nicparams
8361 old_nic_ip = instance.nics[nic_op].ip
8366 update_params_dict = dict([(key, nic_dict[key])
8367 for key in constants.NICS_PARAMETERS
8368 if key in nic_dict])
8370 if 'bridge' in nic_dict:
8371 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8373 new_nic_params, new_filled_nic_params = \
8374 self._GetUpdatedParams(old_nic_params, update_params_dict,
8375 cluster.nicparams[constants.PP_DEFAULT],
8376 constants.NICS_PARAMETER_TYPES)
8377 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8378 self.nic_pinst[nic_op] = new_nic_params
8379 self.nic_pnew[nic_op] = new_filled_nic_params
8380 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8382 if new_nic_mode == constants.NIC_MODE_BRIDGED:
8383 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8384 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8386 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8388 self.warn.append(msg)
8390 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8391 if new_nic_mode == constants.NIC_MODE_ROUTED:
8392 if 'ip' in nic_dict:
8393 nic_ip = nic_dict['ip']
8397 raise errors.OpPrereqError('Cannot set the nic ip to None'
8398 ' on a routed nic', errors.ECODE_INVAL)
8399 if 'mac' in nic_dict:
8400 nic_mac = nic_dict['mac']
8402 raise errors.OpPrereqError('Cannot set the nic mac to None',
8404 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8405 # otherwise generate the mac
8406 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8408 # or validate/reserve the current one
8410 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8411 except errors.ReservationError:
8412 raise errors.OpPrereqError("MAC address %s already in use"
8413 " in cluster" % nic_mac,
8414 errors.ECODE_NOTUNIQUE)
8417 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8418 raise errors.OpPrereqError("Disk operations not supported for"
8419 " diskless instances",
8421 for disk_op, _ in self.op.disks:
8422 if disk_op == constants.DDM_REMOVE:
8423 if len(instance.disks) == 1:
8424 raise errors.OpPrereqError("Cannot remove the last disk of"
8425 " an instance", errors.ECODE_INVAL)
8426 _CheckInstanceDown(self, instance, "cannot remove disks")
8428 if (disk_op == constants.DDM_ADD and
8429 len(instance.nics) >= constants.MAX_DISKS):
8430 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8431 " add more" % constants.MAX_DISKS,
8433 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8435 if disk_op < 0 or disk_op >= len(instance.disks):
8436 raise errors.OpPrereqError("Invalid disk index %s, valid values"
8438 (disk_op, len(instance.disks)),
8442 if self.op.os_name and not self.op.force:
8443 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8444 self.op.force_variant)
8448 def _ConvertPlainToDrbd(self, feedback_fn):
8449 """Converts an instance from plain to drbd.
8452 feedback_fn("Converting template to drbd")
8453 instance = self.instance
8454 pnode = instance.primary_node
8455 snode = self.op.remote_node
8457 # create a fake disk info for _GenerateDiskTemplate
8458 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8459 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8460 instance.name, pnode, [snode],
8461 disk_info, None, None, 0)
8462 info = _GetInstanceInfoText(instance)
8463 feedback_fn("Creating aditional volumes...")
8464 # first, create the missing data and meta devices
8465 for disk in new_disks:
8466 # unfortunately this is... not too nice
8467 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8469 for child in disk.children:
8470 _CreateSingleBlockDev(self, snode, instance, child, info, True)
8471 # at this stage, all new LVs have been created, we can rename the
8473 feedback_fn("Renaming original volumes...")
8474 rename_list = [(o, n.children[0].logical_id)
8475 for (o, n) in zip(instance.disks, new_disks)]
8476 result = self.rpc.call_blockdev_rename(pnode, rename_list)
8477 result.Raise("Failed to rename original LVs")
8479 feedback_fn("Initializing DRBD devices...")
8480 # all child devices are in place, we can now create the DRBD devices
8481 for disk in new_disks:
8482 for node in [pnode, snode]:
8483 f_create = node == pnode
8484 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8486 # at this point, the instance has been modified
8487 instance.disk_template = constants.DT_DRBD8
8488 instance.disks = new_disks
8489 self.cfg.Update(instance, feedback_fn)
8491 # disks are created, waiting for sync
8492 disk_abort = not _WaitForSync(self, instance)
8494 raise errors.OpExecError("There are some degraded disks for"
8495 " this instance, please cleanup manually")
8497 def _ConvertDrbdToPlain(self, feedback_fn):
8498 """Converts an instance from drbd to plain.
8501 instance = self.instance
8502 assert len(instance.secondary_nodes) == 1
8503 pnode = instance.primary_node
8504 snode = instance.secondary_nodes[0]
8505 feedback_fn("Converting template to plain")
8507 old_disks = instance.disks
8508 new_disks = [d.children[0] for d in old_disks]
8510 # copy over size and mode
8511 for parent, child in zip(old_disks, new_disks):
8512 child.size = parent.size
8513 child.mode = parent.mode
8515 # update instance structure
8516 instance.disks = new_disks
8517 instance.disk_template = constants.DT_PLAIN
8518 self.cfg.Update(instance, feedback_fn)
8520 feedback_fn("Removing volumes on the secondary node...")
8521 for disk in old_disks:
8522 self.cfg.SetDiskID(disk, snode)
8523 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8525 self.LogWarning("Could not remove block device %s on node %s,"
8526 " continuing anyway: %s", disk.iv_name, snode, msg)
8528 feedback_fn("Removing unneeded volumes on the primary node...")
8529 for idx, disk in enumerate(old_disks):
8530 meta = disk.children[1]
8531 self.cfg.SetDiskID(meta, pnode)
8532 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8534 self.LogWarning("Could not remove metadata for disk %d on node %s,"
8535 " continuing anyway: %s", idx, pnode, msg)
8538 def Exec(self, feedback_fn):
8539 """Modifies an instance.
8541 All parameters take effect only at the next restart of the instance.
8544 # Process here the warnings from CheckPrereq, as we don't have a
8545 # feedback_fn there.
8546 for warn in self.warn:
8547 feedback_fn("WARNING: %s" % warn)
8550 instance = self.instance
8552 for disk_op, disk_dict in self.op.disks:
8553 if disk_op == constants.DDM_REMOVE:
8554 # remove the last disk
8555 device = instance.disks.pop()
8556 device_idx = len(instance.disks)
8557 for node, disk in device.ComputeNodeTree(instance.primary_node):
8558 self.cfg.SetDiskID(disk, node)
8559 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8561 self.LogWarning("Could not remove disk/%d on node %s: %s,"
8562 " continuing anyway", device_idx, node, msg)
8563 result.append(("disk/%d" % device_idx, "remove"))
8564 elif disk_op == constants.DDM_ADD:
8566 if instance.disk_template == constants.DT_FILE:
8567 file_driver, file_path = instance.disks[0].logical_id
8568 file_path = os.path.dirname(file_path)
8570 file_driver = file_path = None
8571 disk_idx_base = len(instance.disks)
8572 new_disk = _GenerateDiskTemplate(self,
8573 instance.disk_template,
8574 instance.name, instance.primary_node,
8575 instance.secondary_nodes,
8580 instance.disks.append(new_disk)
8581 info = _GetInstanceInfoText(instance)
8583 logging.info("Creating volume %s for instance %s",
8584 new_disk.iv_name, instance.name)
8585 # Note: this needs to be kept in sync with _CreateDisks
8587 for node in instance.all_nodes:
8588 f_create = node == instance.primary_node
8590 _CreateBlockDev(self, node, instance, new_disk,
8591 f_create, info, f_create)
8592 except errors.OpExecError, err:
8593 self.LogWarning("Failed to create volume %s (%s) on"
8595 new_disk.iv_name, new_disk, node, err)
8596 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8597 (new_disk.size, new_disk.mode)))
8599 # change a given disk
8600 instance.disks[disk_op].mode = disk_dict['mode']
8601 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8603 if self.op.disk_template:
8604 r_shut = _ShutdownInstanceDisks(self, instance)
8606 raise errors.OpExecError("Cannot shutdow instance disks, unable to"
8607 " proceed with disk template conversion")
8608 mode = (instance.disk_template, self.op.disk_template)
8610 self._DISK_CONVERSIONS[mode](self, feedback_fn)
8612 self.cfg.ReleaseDRBDMinors(instance.name)
8614 result.append(("disk_template", self.op.disk_template))
8617 for nic_op, nic_dict in self.op.nics:
8618 if nic_op == constants.DDM_REMOVE:
8619 # remove the last nic
8620 del instance.nics[-1]
8621 result.append(("nic.%d" % len(instance.nics), "remove"))
8622 elif nic_op == constants.DDM_ADD:
8623 # mac and bridge should be set, by now
8624 mac = nic_dict['mac']
8625 ip = nic_dict.get('ip', None)
8626 nicparams = self.nic_pinst[constants.DDM_ADD]
8627 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8628 instance.nics.append(new_nic)
8629 result.append(("nic.%d" % (len(instance.nics) - 1),
8630 "add:mac=%s,ip=%s,mode=%s,link=%s" %
8631 (new_nic.mac, new_nic.ip,
8632 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8633 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8636 for key in 'mac', 'ip':
8638 setattr(instance.nics[nic_op], key, nic_dict[key])
8639 if nic_op in self.nic_pinst:
8640 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8641 for key, val in nic_dict.iteritems():
8642 result.append(("nic.%s/%d" % (key, nic_op), val))
8645 if self.op.hvparams:
8646 instance.hvparams = self.hv_inst
8647 for key, val in self.op.hvparams.iteritems():
8648 result.append(("hv/%s" % key, val))
8651 if self.op.beparams:
8652 instance.beparams = self.be_inst
8653 for key, val in self.op.beparams.iteritems():
8654 result.append(("be/%s" % key, val))
8658 instance.os = self.op.os_name
8660 self.cfg.Update(instance, feedback_fn)
8664 _DISK_CONVERSIONS = {
8665 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
8666 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
8669 class LUQueryExports(NoHooksLU):
8670 """Query the exports list
8673 _OP_REQP = ['nodes']
8676 def ExpandNames(self):
8677 self.needed_locks = {}
8678 self.share_locks[locking.LEVEL_NODE] = 1
8679 if not self.op.nodes:
8680 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8682 self.needed_locks[locking.LEVEL_NODE] = \
8683 _GetWantedNodes(self, self.op.nodes)
8685 def CheckPrereq(self):
8686 """Check prerequisites.
8689 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8691 def Exec(self, feedback_fn):
8692 """Compute the list of all the exported system images.
8695 @return: a dictionary with the structure node->(export-list)
8696 where export-list is a list of the instances exported on
8700 rpcresult = self.rpc.call_export_list(self.nodes)
8702 for node in rpcresult:
8703 if rpcresult[node].fail_msg:
8704 result[node] = False
8706 result[node] = rpcresult[node].payload
8711 class LUExportInstance(LogicalUnit):
8712 """Export an instance to an image in the cluster.
8715 HPATH = "instance-export"
8716 HTYPE = constants.HTYPE_INSTANCE
8717 _OP_REQP = ["instance_name", "target_node", "shutdown"]
8720 def CheckArguments(self):
8721 """Check the arguments.
8724 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
8725 constants.DEFAULT_SHUTDOWN_TIMEOUT)
8727 def ExpandNames(self):
8728 self._ExpandAndLockInstance()
8729 # FIXME: lock only instance primary and destination node
8731 # Sad but true, for now we have do lock all nodes, as we don't know where
8732 # the previous export might be, and and in this LU we search for it and
8733 # remove it from its current node. In the future we could fix this by:
8734 # - making a tasklet to search (share-lock all), then create the new one,
8735 # then one to remove, after
8736 # - removing the removal operation altogether
8737 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8739 def DeclareLocks(self, level):
8740 """Last minute lock declaration."""
8741 # All nodes are locked anyway, so nothing to do here.
8743 def BuildHooksEnv(self):
8746 This will run on the master, primary node and target node.
8750 "EXPORT_NODE": self.op.target_node,
8751 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
8752 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
8754 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8755 nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
8756 self.op.target_node]
8759 def CheckPrereq(self):
8760 """Check prerequisites.
8762 This checks that the instance and node names are valid.
8765 instance_name = self.op.instance_name
8766 self.instance = self.cfg.GetInstanceInfo(instance_name)
8767 assert self.instance is not None, \
8768 "Cannot retrieve locked instance %s" % self.op.instance_name
8769 _CheckNodeOnline(self, self.instance.primary_node)
8771 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8772 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
8773 assert self.dst_node is not None
8775 _CheckNodeOnline(self, self.dst_node.name)
8776 _CheckNodeNotDrained(self, self.dst_node.name)
8778 # instance disk type verification
8779 for disk in self.instance.disks:
8780 if disk.dev_type == constants.LD_FILE:
8781 raise errors.OpPrereqError("Export not supported for instances with"
8782 " file-based disks", errors.ECODE_INVAL)
8784 def Exec(self, feedback_fn):
8785 """Export an instance to an image in the cluster.
8788 instance = self.instance
8789 dst_node = self.dst_node
8790 src_node = instance.primary_node
8792 if self.op.shutdown:
8793 # shutdown the instance, but not the disks
8794 feedback_fn("Shutting down instance %s" % instance.name)
8795 result = self.rpc.call_instance_shutdown(src_node, instance,
8796 self.shutdown_timeout)
8797 result.Raise("Could not shutdown instance %s on"
8798 " node %s" % (instance.name, src_node))
8800 vgname = self.cfg.GetVGName()
8804 # set the disks ID correctly since call_instance_start needs the
8805 # correct drbd minor to create the symlinks
8806 for disk in instance.disks:
8807 self.cfg.SetDiskID(disk, src_node)
8809 activate_disks = (not instance.admin_up)
8812 # Activate the instance disks if we'exporting a stopped instance
8813 feedback_fn("Activating disks for %s" % instance.name)
8814 _StartInstanceDisks(self, instance, None)
8820 for idx, disk in enumerate(instance.disks):
8821 feedback_fn("Creating a snapshot of disk/%s on node %s" %
8824 # result.payload will be a snapshot of an lvm leaf of the one we
8826 result = self.rpc.call_blockdev_snapshot(src_node, disk)
8827 msg = result.fail_msg
8829 self.LogWarning("Could not snapshot disk/%s on node %s: %s",
8831 snap_disks.append(False)
8833 disk_id = (vgname, result.payload)
8834 new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
8835 logical_id=disk_id, physical_id=disk_id,
8836 iv_name=disk.iv_name)
8837 snap_disks.append(new_dev)
8840 if self.op.shutdown and instance.admin_up:
8841 feedback_fn("Starting instance %s" % instance.name)
8842 result = self.rpc.call_instance_start(src_node, instance, None, None)
8843 msg = result.fail_msg
8845 _ShutdownInstanceDisks(self, instance)
8846 raise errors.OpExecError("Could not start instance: %s" % msg)
8848 # TODO: check for size
8850 cluster_name = self.cfg.GetClusterName()
8851 for idx, dev in enumerate(snap_disks):
8852 feedback_fn("Exporting snapshot %s from %s to %s" %
8853 (idx, src_node, dst_node.name))
8855 # FIXME: pass debug from opcode to backend
8856 result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
8857 instance, cluster_name,
8858 idx, self.op.debug_level)
8859 msg = result.fail_msg
8861 self.LogWarning("Could not export disk/%s from node %s to"
8862 " node %s: %s", idx, src_node, dst_node.name, msg)
8863 dresults.append(False)
8865 dresults.append(True)
8866 msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
8868 self.LogWarning("Could not remove snapshot for disk/%d from node"
8869 " %s: %s", idx, src_node, msg)
8871 dresults.append(False)
8873 feedback_fn("Finalizing export on %s" % dst_node.name)
8874 result = self.rpc.call_finalize_export(dst_node.name, instance,
8877 msg = result.fail_msg
8879 self.LogWarning("Could not finalize export for instance %s"
8880 " on node %s: %s", instance.name, dst_node.name, msg)
8885 feedback_fn("Deactivating disks for %s" % instance.name)
8886 _ShutdownInstanceDisks(self, instance)
8888 nodelist = self.cfg.GetNodeList()
8889 nodelist.remove(dst_node.name)
8891 # on one-node clusters nodelist will be empty after the removal
8892 # if we proceed the backup would be removed because OpQueryExports
8893 # substitutes an empty list with the full cluster node list.
8894 iname = instance.name
8896 feedback_fn("Removing old exports for instance %s" % iname)
8897 exportlist = self.rpc.call_export_list(nodelist)
8898 for node in exportlist:
8899 if exportlist[node].fail_msg:
8901 if iname in exportlist[node].payload:
8902 msg = self.rpc.call_export_remove(node, iname).fail_msg
8904 self.LogWarning("Could not remove older export for instance %s"
8905 " on node %s: %s", iname, node, msg)
8906 return fin_resu, dresults
8909 class LURemoveExport(NoHooksLU):
8910 """Remove exports related to the named instance.
8913 _OP_REQP = ["instance_name"]
8916 def ExpandNames(self):
8917 self.needed_locks = {}
8918 # We need all nodes to be locked in order for RemoveExport to work, but we
8919 # don't need to lock the instance itself, as nothing will happen to it (and
8920 # we can remove exports also for a removed instance)
8921 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8923 def CheckPrereq(self):
8924 """Check prerequisites.
8928 def Exec(self, feedback_fn):
8929 """Remove any export.
8932 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
8933 # If the instance was not found we'll try with the name that was passed in.
8934 # This will only work if it was an FQDN, though.
8936 if not instance_name:
8938 instance_name = self.op.instance_name
8940 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
8941 exportlist = self.rpc.call_export_list(locked_nodes)
8943 for node in exportlist:
8944 msg = exportlist[node].fail_msg
8946 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
8948 if instance_name in exportlist[node].payload:
8950 result = self.rpc.call_export_remove(node, instance_name)
8951 msg = result.fail_msg
8953 logging.error("Could not remove export for instance %s"
8954 " on node %s: %s", instance_name, node, msg)
8956 if fqdn_warn and not found:
8957 feedback_fn("Export not found. If trying to remove an export belonging"
8958 " to a deleted instance please use its Fully Qualified"
8962 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
8965 This is an abstract class which is the parent of all the other tags LUs.
8969 def ExpandNames(self):
8970 self.needed_locks = {}
8971 if self.op.kind == constants.TAG_NODE:
8972 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
8973 self.needed_locks[locking.LEVEL_NODE] = self.op.name
8974 elif self.op.kind == constants.TAG_INSTANCE:
8975 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
8976 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
8978 def CheckPrereq(self):
8979 """Check prerequisites.
8982 if self.op.kind == constants.TAG_CLUSTER:
8983 self.target = self.cfg.GetClusterInfo()
8984 elif self.op.kind == constants.TAG_NODE:
8985 self.target = self.cfg.GetNodeInfo(self.op.name)
8986 elif self.op.kind == constants.TAG_INSTANCE:
8987 self.target = self.cfg.GetInstanceInfo(self.op.name)
8989 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
8990 str(self.op.kind), errors.ECODE_INVAL)
8993 class LUGetTags(TagsLU):
8994 """Returns the tags of a given object.
8997 _OP_REQP = ["kind", "name"]
9000 def Exec(self, feedback_fn):
9001 """Returns the tag list.
9004 return list(self.target.GetTags())
9007 class LUSearchTags(NoHooksLU):
9008 """Searches the tags for a given pattern.
9011 _OP_REQP = ["pattern"]
9014 def ExpandNames(self):
9015 self.needed_locks = {}
9017 def CheckPrereq(self):
9018 """Check prerequisites.
9020 This checks the pattern passed for validity by compiling it.
9024 self.re = re.compile(self.op.pattern)
9025 except re.error, err:
9026 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9027 (self.op.pattern, err), errors.ECODE_INVAL)
9029 def Exec(self, feedback_fn):
9030 """Returns the tag list.
9034 tgts = [("/cluster", cfg.GetClusterInfo())]
9035 ilist = cfg.GetAllInstancesInfo().values()
9036 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9037 nlist = cfg.GetAllNodesInfo().values()
9038 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9040 for path, target in tgts:
9041 for tag in target.GetTags():
9042 if self.re.search(tag):
9043 results.append((path, tag))
9047 class LUAddTags(TagsLU):
9048 """Sets a tag on a given object.
9051 _OP_REQP = ["kind", "name", "tags"]
9054 def CheckPrereq(self):
9055 """Check prerequisites.
9057 This checks the type and length of the tag name and value.
9060 TagsLU.CheckPrereq(self)
9061 for tag in self.op.tags:
9062 objects.TaggableObject.ValidateTag(tag)
9064 def Exec(self, feedback_fn):
9069 for tag in self.op.tags:
9070 self.target.AddTag(tag)
9071 except errors.TagError, err:
9072 raise errors.OpExecError("Error while setting tag: %s" % str(err))
9073 self.cfg.Update(self.target, feedback_fn)
9076 class LUDelTags(TagsLU):
9077 """Delete a list of tags from a given object.
9080 _OP_REQP = ["kind", "name", "tags"]
9083 def CheckPrereq(self):
9084 """Check prerequisites.
9086 This checks that we have the given tag.
9089 TagsLU.CheckPrereq(self)
9090 for tag in self.op.tags:
9091 objects.TaggableObject.ValidateTag(tag)
9092 del_tags = frozenset(self.op.tags)
9093 cur_tags = self.target.GetTags()
9094 if not del_tags <= cur_tags:
9095 diff_tags = del_tags - cur_tags
9096 diff_names = ["'%s'" % tag for tag in diff_tags]
9098 raise errors.OpPrereqError("Tag(s) %s not found" %
9099 (",".join(diff_names)), errors.ECODE_NOENT)
9101 def Exec(self, feedback_fn):
9102 """Remove the tag from the object.
9105 for tag in self.op.tags:
9106 self.target.RemoveTag(tag)
9107 self.cfg.Update(self.target, feedback_fn)
9110 class LUTestDelay(NoHooksLU):
9111 """Sleep for a specified amount of time.
9113 This LU sleeps on the master and/or nodes for a specified amount of
9117 _OP_REQP = ["duration", "on_master", "on_nodes"]
9120 def ExpandNames(self):
9121 """Expand names and set required locks.
9123 This expands the node list, if any.
9126 self.needed_locks = {}
9127 if self.op.on_nodes:
9128 # _GetWantedNodes can be used here, but is not always appropriate to use
9129 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9131 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9132 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9134 def CheckPrereq(self):
9135 """Check prerequisites.
9139 def Exec(self, feedback_fn):
9140 """Do the actual sleep.
9143 if self.op.on_master:
9144 if not utils.TestDelay(self.op.duration):
9145 raise errors.OpExecError("Error during master delay test")
9146 if self.op.on_nodes:
9147 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9148 for node, node_result in result.items():
9149 node_result.Raise("Failure during rpc call to node %s" % node)
9152 class IAllocator(object):
9153 """IAllocator framework.
9155 An IAllocator instance has three sets of attributes:
9156 - cfg that is needed to query the cluster
9157 - input data (all members of the _KEYS class attribute are required)
9158 - four buffer attributes (in|out_data|text), that represent the
9159 input (to the external script) in text and data structure format,
9160 and the output from it, again in two formats
9161 - the result variables from the script (success, info, nodes) for
9165 # pylint: disable-msg=R0902
9166 # lots of instance attributes
9168 "name", "mem_size", "disks", "disk_template",
9169 "os", "tags", "nics", "vcpus", "hypervisor",
9172 "name", "relocate_from",
9178 def __init__(self, cfg, rpc, mode, **kwargs):
9181 # init buffer variables
9182 self.in_text = self.out_text = self.in_data = self.out_data = None
9183 # init all input fields so that pylint is happy
9185 self.mem_size = self.disks = self.disk_template = None
9186 self.os = self.tags = self.nics = self.vcpus = None
9187 self.hypervisor = None
9188 self.relocate_from = None
9190 self.evac_nodes = None
9192 self.required_nodes = None
9193 # init result fields
9194 self.success = self.info = self.result = None
9195 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9196 keyset = self._ALLO_KEYS
9197 fn = self._AddNewInstance
9198 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9199 keyset = self._RELO_KEYS
9200 fn = self._AddRelocateInstance
9201 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9202 keyset = self._EVAC_KEYS
9203 fn = self._AddEvacuateNodes
9205 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9206 " IAllocator" % self.mode)
9208 if key not in keyset:
9209 raise errors.ProgrammerError("Invalid input parameter '%s' to"
9210 " IAllocator" % key)
9211 setattr(self, key, kwargs[key])
9214 if key not in kwargs:
9215 raise errors.ProgrammerError("Missing input parameter '%s' to"
9216 " IAllocator" % key)
9217 self._BuildInputData(fn)
9219 def _ComputeClusterData(self):
9220 """Compute the generic allocator input data.
9222 This is the data that is independent of the actual operation.
9226 cluster_info = cfg.GetClusterInfo()
9229 "version": constants.IALLOCATOR_VERSION,
9230 "cluster_name": cfg.GetClusterName(),
9231 "cluster_tags": list(cluster_info.GetTags()),
9232 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9233 # we don't have job IDs
9235 iinfo = cfg.GetAllInstancesInfo().values()
9236 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9240 node_list = cfg.GetNodeList()
9242 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9243 hypervisor_name = self.hypervisor
9244 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9245 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9246 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9247 hypervisor_name = cluster_info.enabled_hypervisors[0]
9249 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9252 self.rpc.call_all_instances_info(node_list,
9253 cluster_info.enabled_hypervisors)
9254 for nname, nresult in node_data.items():
9255 # first fill in static (config-based) values
9256 ninfo = cfg.GetNodeInfo(nname)
9258 "tags": list(ninfo.GetTags()),
9259 "primary_ip": ninfo.primary_ip,
9260 "secondary_ip": ninfo.secondary_ip,
9261 "offline": ninfo.offline,
9262 "drained": ninfo.drained,
9263 "master_candidate": ninfo.master_candidate,
9266 if not (ninfo.offline or ninfo.drained):
9267 nresult.Raise("Can't get data for node %s" % nname)
9268 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9270 remote_info = nresult.payload
9272 for attr in ['memory_total', 'memory_free', 'memory_dom0',
9273 'vg_size', 'vg_free', 'cpu_total']:
9274 if attr not in remote_info:
9275 raise errors.OpExecError("Node '%s' didn't return attribute"
9276 " '%s'" % (nname, attr))
9277 if not isinstance(remote_info[attr], int):
9278 raise errors.OpExecError("Node '%s' returned invalid value"
9280 (nname, attr, remote_info[attr]))
9281 # compute memory used by primary instances
9282 i_p_mem = i_p_up_mem = 0
9283 for iinfo, beinfo in i_list:
9284 if iinfo.primary_node == nname:
9285 i_p_mem += beinfo[constants.BE_MEMORY]
9286 if iinfo.name not in node_iinfo[nname].payload:
9289 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9290 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9291 remote_info['memory_free'] -= max(0, i_mem_diff)
9294 i_p_up_mem += beinfo[constants.BE_MEMORY]
9296 # compute memory used by instances
9298 "total_memory": remote_info['memory_total'],
9299 "reserved_memory": remote_info['memory_dom0'],
9300 "free_memory": remote_info['memory_free'],
9301 "total_disk": remote_info['vg_size'],
9302 "free_disk": remote_info['vg_free'],
9303 "total_cpus": remote_info['cpu_total'],
9304 "i_pri_memory": i_p_mem,
9305 "i_pri_up_memory": i_p_up_mem,
9309 node_results[nname] = pnr
9310 data["nodes"] = node_results
9314 for iinfo, beinfo in i_list:
9316 for nic in iinfo.nics:
9317 filled_params = objects.FillDict(
9318 cluster_info.nicparams[constants.PP_DEFAULT],
9320 nic_dict = {"mac": nic.mac,
9322 "mode": filled_params[constants.NIC_MODE],
9323 "link": filled_params[constants.NIC_LINK],
9325 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9326 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9327 nic_data.append(nic_dict)
9329 "tags": list(iinfo.GetTags()),
9330 "admin_up": iinfo.admin_up,
9331 "vcpus": beinfo[constants.BE_VCPUS],
9332 "memory": beinfo[constants.BE_MEMORY],
9334 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9336 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9337 "disk_template": iinfo.disk_template,
9338 "hypervisor": iinfo.hypervisor,
9340 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9342 instance_data[iinfo.name] = pir
9344 data["instances"] = instance_data
9348 def _AddNewInstance(self):
9349 """Add new instance data to allocator structure.
9351 This in combination with _AllocatorGetClusterData will create the
9352 correct structure needed as input for the allocator.
9354 The checks for the completeness of the opcode must have already been
9358 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9360 if self.disk_template in constants.DTS_NET_MIRROR:
9361 self.required_nodes = 2
9363 self.required_nodes = 1
9366 "disk_template": self.disk_template,
9369 "vcpus": self.vcpus,
9370 "memory": self.mem_size,
9371 "disks": self.disks,
9372 "disk_space_total": disk_space,
9374 "required_nodes": self.required_nodes,
9378 def _AddRelocateInstance(self):
9379 """Add relocate instance data to allocator structure.
9381 This in combination with _IAllocatorGetClusterData will create the
9382 correct structure needed as input for the allocator.
9384 The checks for the completeness of the opcode must have already been
9388 instance = self.cfg.GetInstanceInfo(self.name)
9389 if instance is None:
9390 raise errors.ProgrammerError("Unknown instance '%s' passed to"
9391 " IAllocator" % self.name)
9393 if instance.disk_template not in constants.DTS_NET_MIRROR:
9394 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9397 if len(instance.secondary_nodes) != 1:
9398 raise errors.OpPrereqError("Instance has not exactly one secondary node",
9401 self.required_nodes = 1
9402 disk_sizes = [{'size': disk.size} for disk in instance.disks]
9403 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9407 "disk_space_total": disk_space,
9408 "required_nodes": self.required_nodes,
9409 "relocate_from": self.relocate_from,
9413 def _AddEvacuateNodes(self):
9414 """Add evacuate nodes data to allocator structure.
9418 "evac_nodes": self.evac_nodes
9422 def _BuildInputData(self, fn):
9423 """Build input data structures.
9426 self._ComputeClusterData()
9429 request["type"] = self.mode
9430 self.in_data["request"] = request
9432 self.in_text = serializer.Dump(self.in_data)
9434 def Run(self, name, validate=True, call_fn=None):
9435 """Run an instance allocator and return the results.
9439 call_fn = self.rpc.call_iallocator_runner
9441 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9442 result.Raise("Failure while running the iallocator script")
9444 self.out_text = result.payload
9446 self._ValidateResult()
9448 def _ValidateResult(self):
9449 """Process the allocator results.
9451 This will process and if successful save the result in
9452 self.out_data and the other parameters.
9456 rdict = serializer.Load(self.out_text)
9457 except Exception, err:
9458 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9460 if not isinstance(rdict, dict):
9461 raise errors.OpExecError("Can't parse iallocator results: not a dict")
9463 # TODO: remove backwards compatiblity in later versions
9464 if "nodes" in rdict and "result" not in rdict:
9465 rdict["result"] = rdict["nodes"]
9468 for key in "success", "info", "result":
9469 if key not in rdict:
9470 raise errors.OpExecError("Can't parse iallocator results:"
9471 " missing key '%s'" % key)
9472 setattr(self, key, rdict[key])
9474 if not isinstance(rdict["result"], list):
9475 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9477 self.out_data = rdict
9480 class LUTestAllocator(NoHooksLU):
9481 """Run allocator tests.
9483 This LU runs the allocator tests
9486 _OP_REQP = ["direction", "mode", "name"]
9488 def CheckPrereq(self):
9489 """Check prerequisites.
9491 This checks the opcode parameters depending on the director and mode test.
9494 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9495 for attr in ["name", "mem_size", "disks", "disk_template",
9496 "os", "tags", "nics", "vcpus"]:
9497 if not hasattr(self.op, attr):
9498 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9499 attr, errors.ECODE_INVAL)
9500 iname = self.cfg.ExpandInstanceName(self.op.name)
9501 if iname is not None:
9502 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
9503 iname, errors.ECODE_EXISTS)
9504 if not isinstance(self.op.nics, list):
9505 raise errors.OpPrereqError("Invalid parameter 'nics'",
9507 for row in self.op.nics:
9508 if (not isinstance(row, dict) or
9511 "bridge" not in row):
9512 raise errors.OpPrereqError("Invalid contents of the 'nics'"
9513 " parameter", errors.ECODE_INVAL)
9514 if not isinstance(self.op.disks, list):
9515 raise errors.OpPrereqError("Invalid parameter 'disks'",
9517 for row in self.op.disks:
9518 if (not isinstance(row, dict) or
9519 "size" not in row or
9520 not isinstance(row["size"], int) or
9521 "mode" not in row or
9522 row["mode"] not in ['r', 'w']):
9523 raise errors.OpPrereqError("Invalid contents of the 'disks'"
9524 " parameter", errors.ECODE_INVAL)
9525 if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
9526 self.op.hypervisor = self.cfg.GetHypervisorType()
9527 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9528 if not hasattr(self.op, "name"):
9529 raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
9531 fname = _ExpandInstanceName(self.cfg, self.op.name)
9532 self.op.name = fname
9533 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
9534 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9535 if not hasattr(self.op, "evac_nodes"):
9536 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
9537 " opcode input", errors.ECODE_INVAL)
9539 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
9540 self.op.mode, errors.ECODE_INVAL)
9542 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
9543 if not hasattr(self.op, "allocator") or self.op.allocator is None:
9544 raise errors.OpPrereqError("Missing allocator name",
9546 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
9547 raise errors.OpPrereqError("Wrong allocator test '%s'" %
9548 self.op.direction, errors.ECODE_INVAL)
9550 def Exec(self, feedback_fn):
9551 """Run the allocator test.
9554 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9555 ial = IAllocator(self.cfg, self.rpc,
9558 mem_size=self.op.mem_size,
9559 disks=self.op.disks,
9560 disk_template=self.op.disk_template,
9564 vcpus=self.op.vcpus,
9565 hypervisor=self.op.hypervisor,
9567 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9568 ial = IAllocator(self.cfg, self.rpc,
9571 relocate_from=list(self.relocate_from),
9573 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9574 ial = IAllocator(self.cfg, self.rpc,
9576 evac_nodes=self.op.evac_nodes)
9578 raise errors.ProgrammerError("Uncatched mode %s in"
9579 " LUTestAllocator.Exec", self.op.mode)
9581 if self.op.direction == constants.IALLOCATOR_DIR_IN:
9582 result = ial.in_text
9584 ial.Run(self.op.allocator, validate=False)
9585 result = ial.out_text