4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
62 from ganeti import rpc
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_UP = [constants.ADMINST_UP]
72 INSTANCE_DOWN = [constants.ADMINST_DOWN]
73 INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
74 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
75 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
79 """Data container for LU results with jobs.
81 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
83 contained in the C{jobs} attribute and include the job IDs in the opcode
87 def __init__(self, jobs, **kwargs):
88 """Initializes this class.
90 Additional return values can be specified as keyword arguments.
92 @type jobs: list of lists of L{opcode.OpCode}
93 @param jobs: A list of lists of opcode objects
100 class LogicalUnit(object):
101 """Logical Unit base class.
103 Subclasses must follow these rules:
104 - implement ExpandNames
105 - implement CheckPrereq (except when tasklets are used)
106 - implement Exec (except when tasklets are used)
107 - implement BuildHooksEnv
108 - implement BuildHooksNodes
109 - redefine HPATH and HTYPE
110 - optionally redefine their run requirements:
111 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
113 Note that all commands require root permissions.
115 @ivar dry_run_result: the value (if any) that will be returned to the caller
116 in dry-run mode (signalled by opcode dry_run parameter)
123 def __init__(self, processor, op, context, rpc_runner):
124 """Constructor for LogicalUnit.
126 This needs to be overridden in derived classes in order to check op
130 self.proc = processor
132 self.cfg = context.cfg
133 self.glm = context.glm
135 self.owned_locks = context.glm.list_owned
136 self.context = context
137 self.rpc = rpc_runner
138 # Dicts used to declare locking needs to mcpu
139 self.needed_locks = None
140 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
142 self.remove_locks = {}
143 # Used to force good behavior when calling helper functions
144 self.recalculate_locks = {}
146 self.Log = processor.Log # pylint: disable=C0103
147 self.LogWarning = processor.LogWarning # pylint: disable=C0103
148 self.LogInfo = processor.LogInfo # pylint: disable=C0103
149 self.LogStep = processor.LogStep # pylint: disable=C0103
150 # support for dry-run
151 self.dry_run_result = None
152 # support for generic debug attribute
153 if (not hasattr(self.op, "debug_level") or
154 not isinstance(self.op.debug_level, int)):
155 self.op.debug_level = 0
160 # Validate opcode parameters and set defaults
161 self.op.Validate(True)
163 self.CheckArguments()
165 def CheckArguments(self):
166 """Check syntactic validity for the opcode arguments.
168 This method is for doing a simple syntactic check and ensure
169 validity of opcode parameters, without any cluster-related
170 checks. While the same can be accomplished in ExpandNames and/or
171 CheckPrereq, doing these separate is better because:
173 - ExpandNames is left as as purely a lock-related function
174 - CheckPrereq is run after we have acquired locks (and possible
177 The function is allowed to change the self.op attribute so that
178 later methods can no longer worry about missing parameters.
183 def ExpandNames(self):
184 """Expand names for this LU.
186 This method is called before starting to execute the opcode, and it should
187 update all the parameters of the opcode to their canonical form (e.g. a
188 short node name must be fully expanded after this method has successfully
189 completed). This way locking, hooks, logging, etc. can work correctly.
191 LUs which implement this method must also populate the self.needed_locks
192 member, as a dict with lock levels as keys, and a list of needed lock names
195 - use an empty dict if you don't need any lock
196 - if you don't need any lock at a particular level omit that level
197 - don't put anything for the BGL level
198 - if you want all locks at a level use locking.ALL_SET as a value
200 If you need to share locks (rather than acquire them exclusively) at one
201 level you can modify self.share_locks, setting a true value (usually 1) for
202 that level. By default locks are not shared.
204 This function can also define a list of tasklets, which then will be
205 executed in order instead of the usual LU-level CheckPrereq and Exec
206 functions, if those are not defined by the LU.
210 # Acquire all nodes and one instance
211 self.needed_locks = {
212 locking.LEVEL_NODE: locking.ALL_SET,
213 locking.LEVEL_INSTANCE: ['instance1.example.com'],
215 # Acquire just two nodes
216 self.needed_locks = {
217 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
220 self.needed_locks = {} # No, you can't leave it to the default value None
223 # The implementation of this method is mandatory only if the new LU is
224 # concurrent, so that old LUs don't need to be changed all at the same
227 self.needed_locks = {} # Exclusive LUs don't need locks.
229 raise NotImplementedError
231 def DeclareLocks(self, level):
232 """Declare LU locking needs for a level
234 While most LUs can just declare their locking needs at ExpandNames time,
235 sometimes there's the need to calculate some locks after having acquired
236 the ones before. This function is called just before acquiring locks at a
237 particular level, but after acquiring the ones at lower levels, and permits
238 such calculations. It can be used to modify self.needed_locks, and by
239 default it does nothing.
241 This function is only called if you have something already set in
242 self.needed_locks for the level.
244 @param level: Locking level which is going to be locked
245 @type level: member of ganeti.locking.LEVELS
249 def CheckPrereq(self):
250 """Check prerequisites for this LU.
252 This method should check that the prerequisites for the execution
253 of this LU are fulfilled. It can do internode communication, but
254 it should be idempotent - no cluster or system changes are
257 The method should raise errors.OpPrereqError in case something is
258 not fulfilled. Its return value is ignored.
260 This method should also update all the parameters of the opcode to
261 their canonical form if it hasn't been done by ExpandNames before.
264 if self.tasklets is not None:
265 for (idx, tl) in enumerate(self.tasklets):
266 logging.debug("Checking prerequisites for tasklet %s/%s",
267 idx + 1, len(self.tasklets))
272 def Exec(self, feedback_fn):
275 This method should implement the actual work. It should raise
276 errors.OpExecError for failures that are somewhat dealt with in
280 if self.tasklets is not None:
281 for (idx, tl) in enumerate(self.tasklets):
282 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
285 raise NotImplementedError
287 def BuildHooksEnv(self):
288 """Build hooks environment for this LU.
291 @return: Dictionary containing the environment that will be used for
292 running the hooks for this LU. The keys of the dict must not be prefixed
293 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
294 will extend the environment with additional variables. If no environment
295 should be defined, an empty dictionary should be returned (not C{None}).
296 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
300 raise NotImplementedError
302 def BuildHooksNodes(self):
303 """Build list of nodes to run LU's hooks.
305 @rtype: tuple; (list, list)
306 @return: Tuple containing a list of node names on which the hook
307 should run before the execution and a list of node names on which the
308 hook should run after the execution. No nodes should be returned as an
309 empty list (and not None).
310 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
314 raise NotImplementedError
316 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
317 """Notify the LU about the results of its hooks.
319 This method is called every time a hooks phase is executed, and notifies
320 the Logical Unit about the hooks' result. The LU can then use it to alter
321 its result based on the hooks. By default the method does nothing and the
322 previous result is passed back unchanged but any LU can define it if it
323 wants to use the local cluster hook-scripts somehow.
325 @param phase: one of L{constants.HOOKS_PHASE_POST} or
326 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
327 @param hook_results: the results of the multi-node hooks rpc call
328 @param feedback_fn: function used send feedback back to the caller
329 @param lu_result: the previous Exec result this LU had, or None
331 @return: the new Exec result, based on the previous result
335 # API must be kept, thus we ignore the unused argument and could
336 # be a function warnings
337 # pylint: disable=W0613,R0201
340 def _ExpandAndLockInstance(self):
341 """Helper function to expand and lock an instance.
343 Many LUs that work on an instance take its name in self.op.instance_name
344 and need to expand it and then declare the expanded name for locking. This
345 function does it, and then updates self.op.instance_name to the expanded
346 name. It also initializes needed_locks as a dict, if this hasn't been done
350 if self.needed_locks is None:
351 self.needed_locks = {}
353 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
354 "_ExpandAndLockInstance called with instance-level locks set"
355 self.op.instance_name = _ExpandInstanceName(self.cfg,
356 self.op.instance_name)
357 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
359 def _LockInstancesNodes(self, primary_only=False,
360 level=locking.LEVEL_NODE):
361 """Helper function to declare instances' nodes for locking.
363 This function should be called after locking one or more instances to lock
364 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
365 with all primary or secondary nodes for instances already locked and
366 present in self.needed_locks[locking.LEVEL_INSTANCE].
368 It should be called from DeclareLocks, and for safety only works if
369 self.recalculate_locks[locking.LEVEL_NODE] is set.
371 In the future it may grow parameters to just lock some instance's nodes, or
372 to just lock primaries or secondary nodes, if needed.
374 If should be called in DeclareLocks in a way similar to::
376 if level == locking.LEVEL_NODE:
377 self._LockInstancesNodes()
379 @type primary_only: boolean
380 @param primary_only: only lock primary nodes of locked instances
381 @param level: Which lock level to use for locking nodes
384 assert level in self.recalculate_locks, \
385 "_LockInstancesNodes helper function called with no nodes to recalculate"
387 # TODO: check if we're really been called with the instance locks held
389 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
390 # future we might want to have different behaviors depending on the value
391 # of self.recalculate_locks[locking.LEVEL_NODE]
393 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
394 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
395 wanted_nodes.append(instance.primary_node)
397 wanted_nodes.extend(instance.secondary_nodes)
399 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
400 self.needed_locks[level] = wanted_nodes
401 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
402 self.needed_locks[level].extend(wanted_nodes)
404 raise errors.ProgrammerError("Unknown recalculation mode")
406 del self.recalculate_locks[level]
409 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
410 """Simple LU which runs no hooks.
412 This LU is intended as a parent for other LogicalUnits which will
413 run no hooks, in order to reduce duplicate code.
419 def BuildHooksEnv(self):
420 """Empty BuildHooksEnv for NoHooksLu.
422 This just raises an error.
425 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
427 def BuildHooksNodes(self):
428 """Empty BuildHooksNodes for NoHooksLU.
431 raise AssertionError("BuildHooksNodes called for NoHooksLU")
435 """Tasklet base class.
437 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
438 they can mix legacy code with tasklets. Locking needs to be done in the LU,
439 tasklets know nothing about locks.
441 Subclasses must follow these rules:
442 - Implement CheckPrereq
446 def __init__(self, lu):
453 def CheckPrereq(self):
454 """Check prerequisites for this tasklets.
456 This method should check whether the prerequisites for the execution of
457 this tasklet are fulfilled. It can do internode communication, but it
458 should be idempotent - no cluster or system changes are allowed.
460 The method should raise errors.OpPrereqError in case something is not
461 fulfilled. Its return value is ignored.
463 This method should also update all parameters to their canonical form if it
464 hasn't been done before.
469 def Exec(self, feedback_fn):
470 """Execute the tasklet.
472 This method should implement the actual work. It should raise
473 errors.OpExecError for failures that are somewhat dealt with in code, or
477 raise NotImplementedError
481 """Base for query utility classes.
484 #: Attribute holding field definitions
487 def __init__(self, qfilter, fields, use_locking):
488 """Initializes this class.
491 self.use_locking = use_locking
493 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
495 self.requested_data = self.query.RequestedData()
496 self.names = self.query.RequestedNames()
498 # Sort only if no names were requested
499 self.sort_by_name = not self.names
501 self.do_locking = None
504 def _GetNames(self, lu, all_names, lock_level):
505 """Helper function to determine names asked for in the query.
509 names = lu.owned_locks(lock_level)
513 if self.wanted == locking.ALL_SET:
514 assert not self.names
515 # caller didn't specify names, so ordering is not important
516 return utils.NiceSort(names)
518 # caller specified names and we must keep the same order
520 assert not self.do_locking or lu.glm.is_owned(lock_level)
522 missing = set(self.wanted).difference(names)
524 raise errors.OpExecError("Some items were removed before retrieving"
525 " their data: %s" % missing)
527 # Return expanded names
530 def ExpandNames(self, lu):
531 """Expand names for this query.
533 See L{LogicalUnit.ExpandNames}.
536 raise NotImplementedError()
538 def DeclareLocks(self, lu, level):
539 """Declare locks for this query.
541 See L{LogicalUnit.DeclareLocks}.
544 raise NotImplementedError()
546 def _GetQueryData(self, lu):
547 """Collects all data for this query.
549 @return: Query data object
552 raise NotImplementedError()
554 def NewStyleQuery(self, lu):
555 """Collect data and execute query.
558 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
559 sort_by_name=self.sort_by_name)
561 def OldStyleQuery(self, lu):
562 """Collect data and execute query.
565 return self.query.OldStyleQuery(self._GetQueryData(lu),
566 sort_by_name=self.sort_by_name)
570 """Returns a dict declaring all lock levels shared.
573 return dict.fromkeys(locking.LEVELS, 1)
576 def _MakeLegacyNodeInfo(data):
577 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
579 Converts the data into a single dictionary. This is fine for most use cases,
580 but some require information from more than one volume group or hypervisor.
583 (bootid, (vg_info, ), (hv_info, )) = data
585 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
590 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
591 """Checks if the owned node groups are still correct for an instance.
593 @type cfg: L{config.ConfigWriter}
594 @param cfg: The cluster configuration
595 @type instance_name: string
596 @param instance_name: Instance name
597 @type owned_groups: set or frozenset
598 @param owned_groups: List of currently owned node groups
601 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
603 if not owned_groups.issuperset(inst_groups):
604 raise errors.OpPrereqError("Instance %s's node groups changed since"
605 " locks were acquired, current groups are"
606 " are '%s', owning groups '%s'; retry the"
609 utils.CommaJoin(inst_groups),
610 utils.CommaJoin(owned_groups)),
616 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
617 """Checks if the instances in a node group are still correct.
619 @type cfg: L{config.ConfigWriter}
620 @param cfg: The cluster configuration
621 @type group_uuid: string
622 @param group_uuid: Node group UUID
623 @type owned_instances: set or frozenset
624 @param owned_instances: List of currently owned instances
627 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
628 if owned_instances != wanted_instances:
629 raise errors.OpPrereqError("Instances in node group '%s' changed since"
630 " locks were acquired, wanted '%s', have '%s';"
631 " retry the operation" %
633 utils.CommaJoin(wanted_instances),
634 utils.CommaJoin(owned_instances)),
637 return wanted_instances
640 def _SupportsOob(cfg, node):
641 """Tells if node supports OOB.
643 @type cfg: L{config.ConfigWriter}
644 @param cfg: The cluster configuration
645 @type node: L{objects.Node}
646 @param node: The node
647 @return: The OOB script if supported or an empty string otherwise
650 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
653 def _GetWantedNodes(lu, nodes):
654 """Returns list of checked and expanded node names.
656 @type lu: L{LogicalUnit}
657 @param lu: the logical unit on whose behalf we execute
659 @param nodes: list of node names or None for all nodes
661 @return: the list of nodes, sorted
662 @raise errors.ProgrammerError: if the nodes parameter is wrong type
666 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
668 return utils.NiceSort(lu.cfg.GetNodeList())
671 def _GetWantedInstances(lu, instances):
672 """Returns list of checked and expanded instance names.
674 @type lu: L{LogicalUnit}
675 @param lu: the logical unit on whose behalf we execute
676 @type instances: list
677 @param instances: list of instance names or None for all instances
679 @return: the list of instances, sorted
680 @raise errors.OpPrereqError: if the instances parameter is wrong type
681 @raise errors.OpPrereqError: if any of the passed instances is not found
685 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
687 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
691 def _GetUpdatedParams(old_params, update_dict,
692 use_default=True, use_none=False):
693 """Return the new version of a parameter dictionary.
695 @type old_params: dict
696 @param old_params: old parameters
697 @type update_dict: dict
698 @param update_dict: dict containing new parameter values, or
699 constants.VALUE_DEFAULT to reset the parameter to its default
701 @param use_default: boolean
702 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
703 values as 'to be deleted' values
704 @param use_none: boolean
705 @type use_none: whether to recognise C{None} values as 'to be
708 @return: the new parameter dictionary
711 params_copy = copy.deepcopy(old_params)
712 for key, val in update_dict.iteritems():
713 if ((use_default and val == constants.VALUE_DEFAULT) or
714 (use_none and val is None)):
720 params_copy[key] = val
724 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
725 """Return the new version of a instance policy.
727 @param group_policy: whether this policy applies to a group and thus
728 we should support removal of policy entries
731 use_none = use_default = group_policy
732 ipolicy = copy.deepcopy(old_ipolicy)
733 for key, value in new_ipolicy.items():
734 if key not in constants.IPOLICY_ALL_KEYS:
735 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
737 if key in constants.IPOLICY_ISPECS:
738 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
739 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
741 use_default=use_default)
743 if not value or value == [constants.VALUE_DEFAULT]:
747 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
748 " on the cluster'" % key,
751 if key in constants.IPOLICY_PARAMETERS:
752 # FIXME: we assume all such values are float
754 ipolicy[key] = float(value)
755 except (TypeError, ValueError), err:
756 raise errors.OpPrereqError("Invalid value for attribute"
757 " '%s': '%s', error: %s" %
758 (key, value, err), errors.ECODE_INVAL)
760 # FIXME: we assume all others are lists; this should be redone
762 ipolicy[key] = list(value)
764 objects.InstancePolicy.CheckParameterSyntax(ipolicy)
765 except errors.ConfigurationError, err:
766 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
771 def _UpdateAndVerifySubDict(base, updates, type_check):
772 """Updates and verifies a dict with sub dicts of the same type.
774 @param base: The dict with the old data
775 @param updates: The dict with the new data
776 @param type_check: Dict suitable to ForceDictType to verify correct types
777 @returns: A new dict with updated and verified values
781 new = _GetUpdatedParams(old, value)
782 utils.ForceDictType(new, type_check)
785 ret = copy.deepcopy(base)
786 ret.update(dict((key, fn(base.get(key, {}), value))
787 for key, value in updates.items()))
791 def _MergeAndVerifyHvState(op_input, obj_input):
792 """Combines the hv state from an opcode with the one of the object
794 @param op_input: The input dict from the opcode
795 @param obj_input: The input dict from the objects
796 @return: The verified and updated dict
800 invalid_hvs = set(op_input) - constants.HYPER_TYPES
802 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
803 " %s" % utils.CommaJoin(invalid_hvs),
805 if obj_input is None:
807 type_check = constants.HVSTS_PARAMETER_TYPES
808 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
813 def _MergeAndVerifyDiskState(op_input, obj_input):
814 """Combines the disk state from an opcode with the one of the object
816 @param op_input: The input dict from the opcode
817 @param obj_input: The input dict from the objects
818 @return: The verified and updated dict
821 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
823 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
824 utils.CommaJoin(invalid_dst),
826 type_check = constants.DSS_PARAMETER_TYPES
827 if obj_input is None:
829 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
831 for key, value in op_input.items())
836 def _ReleaseLocks(lu, level, names=None, keep=None):
837 """Releases locks owned by an LU.
839 @type lu: L{LogicalUnit}
840 @param level: Lock level
841 @type names: list or None
842 @param names: Names of locks to release
843 @type keep: list or None
844 @param keep: Names of locks to retain
847 assert not (keep is not None and names is not None), \
848 "Only one of the 'names' and the 'keep' parameters can be given"
850 if names is not None:
851 should_release = names.__contains__
853 should_release = lambda name: name not in keep
855 should_release = None
857 owned = lu.owned_locks(level)
859 # Not owning any lock at this level, do nothing
866 # Determine which locks to release
868 if should_release(name):
873 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
875 # Release just some locks
876 lu.glm.release(level, names=release)
878 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
881 lu.glm.release(level)
883 assert not lu.glm.is_owned(level), "No locks should be owned"
886 def _MapInstanceDisksToNodes(instances):
887 """Creates a map from (node, volume) to instance name.
889 @type instances: list of L{objects.Instance}
890 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
893 return dict(((node, vol), inst.name)
894 for inst in instances
895 for (node, vols) in inst.MapLVsByNode().items()
899 def _RunPostHook(lu, node_name):
900 """Runs the post-hook for an opcode on a single node.
903 hm = lu.proc.BuildHooksManager(lu)
905 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
907 # pylint: disable=W0702
908 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
911 def _CheckOutputFields(static, dynamic, selected):
912 """Checks whether all selected fields are valid.
914 @type static: L{utils.FieldSet}
915 @param static: static fields set
916 @type dynamic: L{utils.FieldSet}
917 @param dynamic: dynamic fields set
924 delta = f.NonMatching(selected)
926 raise errors.OpPrereqError("Unknown output fields selected: %s"
927 % ",".join(delta), errors.ECODE_INVAL)
930 def _CheckGlobalHvParams(params):
931 """Validates that given hypervisor params are not global ones.
933 This will ensure that instances don't get customised versions of
937 used_globals = constants.HVC_GLOBALS.intersection(params)
939 msg = ("The following hypervisor parameters are global and cannot"
940 " be customized at instance level, please modify them at"
941 " cluster level: %s" % utils.CommaJoin(used_globals))
942 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
945 def _CheckNodeOnline(lu, node, msg=None):
946 """Ensure that a given node is online.
948 @param lu: the LU on behalf of which we make the check
949 @param node: the node to check
950 @param msg: if passed, should be a message to replace the default one
951 @raise errors.OpPrereqError: if the node is offline
955 msg = "Can't use offline node"
956 if lu.cfg.GetNodeInfo(node).offline:
957 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
960 def _CheckNodeNotDrained(lu, node):
961 """Ensure that a given node is not drained.
963 @param lu: the LU on behalf of which we make the check
964 @param node: the node to check
965 @raise errors.OpPrereqError: if the node is drained
968 if lu.cfg.GetNodeInfo(node).drained:
969 raise errors.OpPrereqError("Can't use drained node %s" % node,
973 def _CheckNodeVmCapable(lu, node):
974 """Ensure that a given node is vm capable.
976 @param lu: the LU on behalf of which we make the check
977 @param node: the node to check
978 @raise errors.OpPrereqError: if the node is not vm capable
981 if not lu.cfg.GetNodeInfo(node).vm_capable:
982 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
986 def _CheckNodeHasOS(lu, node, os_name, force_variant):
987 """Ensure that a node supports a given OS.
989 @param lu: the LU on behalf of which we make the check
990 @param node: the node to check
991 @param os_name: the OS to query about
992 @param force_variant: whether to ignore variant errors
993 @raise errors.OpPrereqError: if the node is not supporting the OS
996 result = lu.rpc.call_os_get(node, os_name)
997 result.Raise("OS '%s' not in supported OS list for node %s" %
999 prereq=True, ecode=errors.ECODE_INVAL)
1000 if not force_variant:
1001 _CheckOSVariant(result.payload, os_name)
1004 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1005 """Ensure that a node has the given secondary ip.
1007 @type lu: L{LogicalUnit}
1008 @param lu: the LU on behalf of which we make the check
1010 @param node: the node to check
1011 @type secondary_ip: string
1012 @param secondary_ip: the ip to check
1013 @type prereq: boolean
1014 @param prereq: whether to throw a prerequisite or an execute error
1015 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1016 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1019 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1020 result.Raise("Failure checking secondary ip on node %s" % node,
1021 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1022 if not result.payload:
1023 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1024 " please fix and re-run this command" % secondary_ip)
1026 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1028 raise errors.OpExecError(msg)
1031 def _GetClusterDomainSecret():
1032 """Reads the cluster domain secret.
1035 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1039 def _CheckInstanceState(lu, instance, req_states, msg=None):
1040 """Ensure that an instance is in one of the required states.
1042 @param lu: the LU on behalf of which we make the check
1043 @param instance: the instance to check
1044 @param msg: if passed, should be a message to replace the default one
1045 @raise errors.OpPrereqError: if the instance is not in the required state
1049 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1050 if instance.admin_state not in req_states:
1051 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1052 (instance.name, instance.admin_state, msg),
1055 if constants.ADMINST_UP not in req_states:
1056 pnode = instance.primary_node
1057 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1058 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1059 prereq=True, ecode=errors.ECODE_ENVIRON)
1061 if instance.name in ins_l.payload:
1062 raise errors.OpPrereqError("Instance %s is running, %s" %
1063 (instance.name, msg), errors.ECODE_STATE)
1066 def _ComputeMinMaxSpec(name, ipolicy, value):
1067 """Computes if value is in the desired range.
1069 @param name: name of the parameter for which we perform the check
1070 @param ipolicy: dictionary containing min, max and std values
1071 @param value: actual value that we want to use
1072 @return: None or element not meeting the criteria
1076 if value in [None, constants.VALUE_AUTO]:
1078 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1079 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1080 if value > max_v or min_v > value:
1081 return ("%s value %s is not in range [%s, %s]" %
1082 (name, value, min_v, max_v))
1086 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1087 nic_count, disk_sizes,
1088 _compute_fn=_ComputeMinMaxSpec):
1089 """Verifies ipolicy against provided specs.
1092 @param ipolicy: The ipolicy
1094 @param mem_size: The memory size
1095 @type cpu_count: int
1096 @param cpu_count: Used cpu cores
1097 @type disk_count: int
1098 @param disk_count: Number of disks used
1099 @type nic_count: int
1100 @param nic_count: Number of nics used
1101 @type disk_sizes: list of ints
1102 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1103 @param _compute_fn: The compute function (unittest only)
1104 @return: A list of violations, or an empty list of no violations are found
1107 assert disk_count == len(disk_sizes)
1110 (constants.ISPEC_MEM_SIZE, mem_size),
1111 (constants.ISPEC_CPU_COUNT, cpu_count),
1112 (constants.ISPEC_DISK_COUNT, disk_count),
1113 (constants.ISPEC_NIC_COUNT, nic_count),
1114 ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
1117 (_compute_fn(name, ipolicy, value)
1118 for (name, value) in test_settings))
1121 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1122 _compute_fn=_ComputeIPolicySpecViolation):
1123 """Compute if instance meets the specs of ipolicy.
1126 @param ipolicy: The ipolicy to verify against
1127 @type instance: L{objects.Instance}
1128 @param instance: The instance to verify
1129 @param _compute_fn: The function to verify ipolicy (unittest only)
1130 @see: L{_ComputeIPolicySpecViolation}
1133 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1134 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1135 disk_count = len(instance.disks)
1136 disk_sizes = [disk.size for disk in instance.disks]
1137 nic_count = len(instance.nics)
1139 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1143 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1144 _compute_fn=_ComputeIPolicySpecViolation):
1145 """Compute if instance specs meets the specs of ipolicy.
1148 @param ipolicy: The ipolicy to verify against
1149 @param instance_spec: dict
1150 @param instance_spec: The instance spec to verify
1151 @param _compute_fn: The function to verify ipolicy (unittest only)
1152 @see: L{_ComputeIPolicySpecViolation}
1155 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1156 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1157 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1158 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1159 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1161 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1165 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1167 _compute_fn=_ComputeIPolicyInstanceViolation):
1168 """Compute if instance meets the specs of the new target group.
1170 @param ipolicy: The ipolicy to verify
1171 @param instance: The instance object to verify
1172 @param current_group: The current group of the instance
1173 @param target_group: The new group of the instance
1174 @param _compute_fn: The function to verify ipolicy (unittest only)
1175 @see: L{_ComputeIPolicySpecViolation}
1178 if current_group == target_group:
1181 return _compute_fn(ipolicy, instance)
1184 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1185 _compute_fn=_ComputeIPolicyNodeViolation):
1186 """Checks that the target node is correct in terms of instance policy.
1188 @param ipolicy: The ipolicy to verify
1189 @param instance: The instance object to verify
1190 @param node: The new node to relocate
1191 @param ignore: Ignore violations of the ipolicy
1192 @param _compute_fn: The function to verify ipolicy (unittest only)
1193 @see: L{_ComputeIPolicySpecViolation}
1196 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1197 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1200 msg = ("Instance does not meet target node group's (%s) instance"
1201 " policy: %s") % (node.group, utils.CommaJoin(res))
1205 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1208 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1209 """Computes a set of any instances that would violate the new ipolicy.
1211 @param old_ipolicy: The current (still in-place) ipolicy
1212 @param new_ipolicy: The new (to become) ipolicy
1213 @param instances: List of instances to verify
1214 @return: A list of instances which violates the new ipolicy but did not before
1217 return (_ComputeViolatingInstances(old_ipolicy, instances) -
1218 _ComputeViolatingInstances(new_ipolicy, instances))
1221 def _ExpandItemName(fn, name, kind):
1222 """Expand an item name.
1224 @param fn: the function to use for expansion
1225 @param name: requested item name
1226 @param kind: text description ('Node' or 'Instance')
1227 @return: the resolved (full) name
1228 @raise errors.OpPrereqError: if the item is not found
1231 full_name = fn(name)
1232 if full_name is None:
1233 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1238 def _ExpandNodeName(cfg, name):
1239 """Wrapper over L{_ExpandItemName} for nodes."""
1240 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1243 def _ExpandInstanceName(cfg, name):
1244 """Wrapper over L{_ExpandItemName} for instance."""
1245 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1248 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1249 minmem, maxmem, vcpus, nics, disk_template, disks,
1250 bep, hvp, hypervisor_name, tags):
1251 """Builds instance related env variables for hooks
1253 This builds the hook environment from individual variables.
1256 @param name: the name of the instance
1257 @type primary_node: string
1258 @param primary_node: the name of the instance's primary node
1259 @type secondary_nodes: list
1260 @param secondary_nodes: list of secondary nodes as strings
1261 @type os_type: string
1262 @param os_type: the name of the instance's OS
1263 @type status: string
1264 @param status: the desired status of the instance
1265 @type minmem: string
1266 @param minmem: the minimum memory size of the instance
1267 @type maxmem: string
1268 @param maxmem: the maximum memory size of the instance
1270 @param vcpus: the count of VCPUs the instance has
1272 @param nics: list of tuples (ip, mac, mode, link) representing
1273 the NICs the instance has
1274 @type disk_template: string
1275 @param disk_template: the disk template of the instance
1277 @param disks: the list of (size, mode) pairs
1279 @param bep: the backend parameters for the instance
1281 @param hvp: the hypervisor parameters for the instance
1282 @type hypervisor_name: string
1283 @param hypervisor_name: the hypervisor for the instance
1285 @param tags: list of instance tags as strings
1287 @return: the hook environment for this instance
1292 "INSTANCE_NAME": name,
1293 "INSTANCE_PRIMARY": primary_node,
1294 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1295 "INSTANCE_OS_TYPE": os_type,
1296 "INSTANCE_STATUS": status,
1297 "INSTANCE_MINMEM": minmem,
1298 "INSTANCE_MAXMEM": maxmem,
1299 # TODO(2.7) remove deprecated "memory" value
1300 "INSTANCE_MEMORY": maxmem,
1301 "INSTANCE_VCPUS": vcpus,
1302 "INSTANCE_DISK_TEMPLATE": disk_template,
1303 "INSTANCE_HYPERVISOR": hypervisor_name,
1306 nic_count = len(nics)
1307 for idx, (ip, mac, mode, link) in enumerate(nics):
1310 env["INSTANCE_NIC%d_IP" % idx] = ip
1311 env["INSTANCE_NIC%d_MAC" % idx] = mac
1312 env["INSTANCE_NIC%d_MODE" % idx] = mode
1313 env["INSTANCE_NIC%d_LINK" % idx] = link
1314 if mode == constants.NIC_MODE_BRIDGED:
1315 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1319 env["INSTANCE_NIC_COUNT"] = nic_count
1322 disk_count = len(disks)
1323 for idx, (size, mode) in enumerate(disks):
1324 env["INSTANCE_DISK%d_SIZE" % idx] = size
1325 env["INSTANCE_DISK%d_MODE" % idx] = mode
1329 env["INSTANCE_DISK_COUNT"] = disk_count
1334 env["INSTANCE_TAGS"] = " ".join(tags)
1336 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1337 for key, value in source.items():
1338 env["INSTANCE_%s_%s" % (kind, key)] = value
1343 def _NICListToTuple(lu, nics):
1344 """Build a list of nic information tuples.
1346 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1347 value in LUInstanceQueryData.
1349 @type lu: L{LogicalUnit}
1350 @param lu: the logical unit on whose behalf we execute
1351 @type nics: list of L{objects.NIC}
1352 @param nics: list of nics to convert to hooks tuples
1356 cluster = lu.cfg.GetClusterInfo()
1360 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1361 mode = filled_params[constants.NIC_MODE]
1362 link = filled_params[constants.NIC_LINK]
1363 hooks_nics.append((ip, mac, mode, link))
1367 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1368 """Builds instance related env variables for hooks from an object.
1370 @type lu: L{LogicalUnit}
1371 @param lu: the logical unit on whose behalf we execute
1372 @type instance: L{objects.Instance}
1373 @param instance: the instance for which we should build the
1375 @type override: dict
1376 @param override: dictionary with key/values that will override
1379 @return: the hook environment dictionary
1382 cluster = lu.cfg.GetClusterInfo()
1383 bep = cluster.FillBE(instance)
1384 hvp = cluster.FillHV(instance)
1386 "name": instance.name,
1387 "primary_node": instance.primary_node,
1388 "secondary_nodes": instance.secondary_nodes,
1389 "os_type": instance.os,
1390 "status": instance.admin_state,
1391 "maxmem": bep[constants.BE_MAXMEM],
1392 "minmem": bep[constants.BE_MINMEM],
1393 "vcpus": bep[constants.BE_VCPUS],
1394 "nics": _NICListToTuple(lu, instance.nics),
1395 "disk_template": instance.disk_template,
1396 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1399 "hypervisor_name": instance.hypervisor,
1400 "tags": instance.tags,
1403 args.update(override)
1404 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1407 def _AdjustCandidatePool(lu, exceptions):
1408 """Adjust the candidate pool after node operations.
1411 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1413 lu.LogInfo("Promoted nodes to master candidate role: %s",
1414 utils.CommaJoin(node.name for node in mod_list))
1415 for name in mod_list:
1416 lu.context.ReaddNode(name)
1417 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1419 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1423 def _DecideSelfPromotion(lu, exceptions=None):
1424 """Decide whether I should promote myself as a master candidate.
1427 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1428 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1429 # the new node will increase mc_max with one, so:
1430 mc_should = min(mc_should + 1, cp_size)
1431 return mc_now < mc_should
1434 def _CalculateGroupIPolicy(cluster, group):
1435 """Calculate instance policy for group.
1438 return cluster.SimpleFillIPolicy(group.ipolicy)
1441 def _ComputeViolatingInstances(ipolicy, instances):
1442 """Computes a set of instances who violates given ipolicy.
1444 @param ipolicy: The ipolicy to verify
1445 @type instances: object.Instance
1446 @param instances: List of instances to verify
1447 @return: A frozenset of instance names violating the ipolicy
1450 return frozenset([inst.name for inst in instances
1451 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1454 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1455 """Check that the brigdes needed by a list of nics exist.
1458 cluster = lu.cfg.GetClusterInfo()
1459 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1460 brlist = [params[constants.NIC_LINK] for params in paramslist
1461 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1463 result = lu.rpc.call_bridges_exist(target_node, brlist)
1464 result.Raise("Error checking bridges on destination node '%s'" %
1465 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1468 def _CheckInstanceBridgesExist(lu, instance, node=None):
1469 """Check that the brigdes needed by an instance exist.
1473 node = instance.primary_node
1474 _CheckNicsBridgesExist(lu, instance.nics, node)
1477 def _CheckOSVariant(os_obj, name):
1478 """Check whether an OS name conforms to the os variants specification.
1480 @type os_obj: L{objects.OS}
1481 @param os_obj: OS object to check
1483 @param name: OS name passed by the user, to check for validity
1486 variant = objects.OS.GetVariant(name)
1487 if not os_obj.supported_variants:
1489 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1490 " passed)" % (os_obj.name, variant),
1494 raise errors.OpPrereqError("OS name must include a variant",
1497 if variant not in os_obj.supported_variants:
1498 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1501 def _GetNodeInstancesInner(cfg, fn):
1502 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1505 def _GetNodeInstances(cfg, node_name):
1506 """Returns a list of all primary and secondary instances on a node.
1510 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1513 def _GetNodePrimaryInstances(cfg, node_name):
1514 """Returns primary instances on a node.
1517 return _GetNodeInstancesInner(cfg,
1518 lambda inst: node_name == inst.primary_node)
1521 def _GetNodeSecondaryInstances(cfg, node_name):
1522 """Returns secondary instances on a node.
1525 return _GetNodeInstancesInner(cfg,
1526 lambda inst: node_name in inst.secondary_nodes)
1529 def _GetStorageTypeArgs(cfg, storage_type):
1530 """Returns the arguments for a storage type.
1533 # Special case for file storage
1534 if storage_type == constants.ST_FILE:
1535 # storage.FileStorage wants a list of storage directories
1536 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1541 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1544 for dev in instance.disks:
1545 cfg.SetDiskID(dev, node_name)
1547 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1548 result.Raise("Failed to get disk status from node %s" % node_name,
1549 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1551 for idx, bdev_status in enumerate(result.payload):
1552 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1558 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1559 """Check the sanity of iallocator and node arguments and use the
1560 cluster-wide iallocator if appropriate.
1562 Check that at most one of (iallocator, node) is specified. If none is
1563 specified, then the LU's opcode's iallocator slot is filled with the
1564 cluster-wide default iallocator.
1566 @type iallocator_slot: string
1567 @param iallocator_slot: the name of the opcode iallocator slot
1568 @type node_slot: string
1569 @param node_slot: the name of the opcode target node slot
1572 node = getattr(lu.op, node_slot, None)
1573 iallocator = getattr(lu.op, iallocator_slot, None)
1575 if node is not None and iallocator is not None:
1576 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1578 elif node is None and iallocator is None:
1579 default_iallocator = lu.cfg.GetDefaultIAllocator()
1580 if default_iallocator:
1581 setattr(lu.op, iallocator_slot, default_iallocator)
1583 raise errors.OpPrereqError("No iallocator or node given and no"
1584 " cluster-wide default iallocator found;"
1585 " please specify either an iallocator or a"
1586 " node, or set a cluster-wide default"
1590 def _GetDefaultIAllocator(cfg, iallocator):
1591 """Decides on which iallocator to use.
1593 @type cfg: L{config.ConfigWriter}
1594 @param cfg: Cluster configuration object
1595 @type iallocator: string or None
1596 @param iallocator: Iallocator specified in opcode
1598 @return: Iallocator name
1602 # Use default iallocator
1603 iallocator = cfg.GetDefaultIAllocator()
1606 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1607 " opcode nor as a cluster-wide default",
1613 class LUClusterPostInit(LogicalUnit):
1614 """Logical unit for running hooks after cluster initialization.
1617 HPATH = "cluster-init"
1618 HTYPE = constants.HTYPE_CLUSTER
1620 def BuildHooksEnv(self):
1625 "OP_TARGET": self.cfg.GetClusterName(),
1628 def BuildHooksNodes(self):
1629 """Build hooks nodes.
1632 return ([], [self.cfg.GetMasterNode()])
1634 def Exec(self, feedback_fn):
1641 class LUClusterDestroy(LogicalUnit):
1642 """Logical unit for destroying the cluster.
1645 HPATH = "cluster-destroy"
1646 HTYPE = constants.HTYPE_CLUSTER
1648 def BuildHooksEnv(self):
1653 "OP_TARGET": self.cfg.GetClusterName(),
1656 def BuildHooksNodes(self):
1657 """Build hooks nodes.
1662 def CheckPrereq(self):
1663 """Check prerequisites.
1665 This checks whether the cluster is empty.
1667 Any errors are signaled by raising errors.OpPrereqError.
1670 master = self.cfg.GetMasterNode()
1672 nodelist = self.cfg.GetNodeList()
1673 if len(nodelist) != 1 or nodelist[0] != master:
1674 raise errors.OpPrereqError("There are still %d node(s) in"
1675 " this cluster." % (len(nodelist) - 1),
1677 instancelist = self.cfg.GetInstanceList()
1679 raise errors.OpPrereqError("There are still %d instance(s) in"
1680 " this cluster." % len(instancelist),
1683 def Exec(self, feedback_fn):
1684 """Destroys the cluster.
1687 master_params = self.cfg.GetMasterNetworkParameters()
1689 # Run post hooks on master node before it's removed
1690 _RunPostHook(self, master_params.name)
1692 ems = self.cfg.GetUseExternalMipScript()
1693 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1696 self.LogWarning("Error disabling the master IP address: %s",
1699 return master_params.name
1702 def _VerifyCertificate(filename):
1703 """Verifies a certificate for L{LUClusterVerifyConfig}.
1705 @type filename: string
1706 @param filename: Path to PEM file
1710 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1711 utils.ReadFile(filename))
1712 except Exception, err: # pylint: disable=W0703
1713 return (LUClusterVerifyConfig.ETYPE_ERROR,
1714 "Failed to load X509 certificate %s: %s" % (filename, err))
1717 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1718 constants.SSL_CERT_EXPIRATION_ERROR)
1721 fnamemsg = "While verifying %s: %s" % (filename, msg)
1726 return (None, fnamemsg)
1727 elif errcode == utils.CERT_WARNING:
1728 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1729 elif errcode == utils.CERT_ERROR:
1730 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1732 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1735 def _GetAllHypervisorParameters(cluster, instances):
1736 """Compute the set of all hypervisor parameters.
1738 @type cluster: L{objects.Cluster}
1739 @param cluster: the cluster object
1740 @param instances: list of L{objects.Instance}
1741 @param instances: additional instances from which to obtain parameters
1742 @rtype: list of (origin, hypervisor, parameters)
1743 @return: a list with all parameters found, indicating the hypervisor they
1744 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1749 for hv_name in cluster.enabled_hypervisors:
1750 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1752 for os_name, os_hvp in cluster.os_hvp.items():
1753 for hv_name, hv_params in os_hvp.items():
1755 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1756 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1758 # TODO: collapse identical parameter values in a single one
1759 for instance in instances:
1760 if instance.hvparams:
1761 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1762 cluster.FillHV(instance)))
1767 class _VerifyErrors(object):
1768 """Mix-in for cluster/group verify LUs.
1770 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1771 self.op and self._feedback_fn to be available.)
1775 ETYPE_FIELD = "code"
1776 ETYPE_ERROR = "ERROR"
1777 ETYPE_WARNING = "WARNING"
1779 def _Error(self, ecode, item, msg, *args, **kwargs):
1780 """Format an error message.
1782 Based on the opcode's error_codes parameter, either format a
1783 parseable error code, or a simpler error string.
1785 This must be called only from Exec and functions called from Exec.
1788 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1789 itype, etxt, _ = ecode
1790 # first complete the msg
1793 # then format the whole message
1794 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1795 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1801 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1802 # and finally report it via the feedback_fn
1803 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1805 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1806 """Log an error message if the passed condition is True.
1810 or self.op.debug_simulate_errors) # pylint: disable=E1101
1812 # If the error code is in the list of ignored errors, demote the error to a
1814 (_, etxt, _) = ecode
1815 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1816 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1819 self._Error(ecode, *args, **kwargs)
1821 # do not mark the operation as failed for WARN cases only
1822 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1823 self.bad = self.bad or cond
1826 class LUClusterVerify(NoHooksLU):
1827 """Submits all jobs necessary to verify the cluster.
1832 def ExpandNames(self):
1833 self.needed_locks = {}
1835 def Exec(self, feedback_fn):
1838 if self.op.group_name:
1839 groups = [self.op.group_name]
1840 depends_fn = lambda: None
1842 groups = self.cfg.GetNodeGroupList()
1844 # Verify global configuration
1846 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1849 # Always depend on global verification
1850 depends_fn = lambda: [(-len(jobs), [])]
1852 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1853 ignore_errors=self.op.ignore_errors,
1854 depends=depends_fn())]
1855 for group in groups)
1857 # Fix up all parameters
1858 for op in itertools.chain(*jobs): # pylint: disable=W0142
1859 op.debug_simulate_errors = self.op.debug_simulate_errors
1860 op.verbose = self.op.verbose
1861 op.error_codes = self.op.error_codes
1863 op.skip_checks = self.op.skip_checks
1864 except AttributeError:
1865 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1867 return ResultWithJobs(jobs)
1870 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1871 """Verifies the cluster config.
1876 def _VerifyHVP(self, hvp_data):
1877 """Verifies locally the syntax of the hypervisor parameters.
1880 for item, hv_name, hv_params in hvp_data:
1881 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1884 hv_class = hypervisor.GetHypervisor(hv_name)
1885 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1886 hv_class.CheckParameterSyntax(hv_params)
1887 except errors.GenericError, err:
1888 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1890 def ExpandNames(self):
1891 # Information can be safely retrieved as the BGL is acquired in exclusive
1893 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1894 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1895 self.all_node_info = self.cfg.GetAllNodesInfo()
1896 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1897 self.needed_locks = {}
1899 def Exec(self, feedback_fn):
1900 """Verify integrity of cluster, performing various test on nodes.
1904 self._feedback_fn = feedback_fn
1906 feedback_fn("* Verifying cluster config")
1908 for msg in self.cfg.VerifyConfig():
1909 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1911 feedback_fn("* Verifying cluster certificate files")
1913 for cert_filename in constants.ALL_CERT_FILES:
1914 (errcode, msg) = _VerifyCertificate(cert_filename)
1915 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1917 feedback_fn("* Verifying hypervisor parameters")
1919 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1920 self.all_inst_info.values()))
1922 feedback_fn("* Verifying all nodes belong to an existing group")
1924 # We do this verification here because, should this bogus circumstance
1925 # occur, it would never be caught by VerifyGroup, which only acts on
1926 # nodes/instances reachable from existing node groups.
1928 dangling_nodes = set(node.name for node in self.all_node_info.values()
1929 if node.group not in self.all_group_info)
1931 dangling_instances = {}
1932 no_node_instances = []
1934 for inst in self.all_inst_info.values():
1935 if inst.primary_node in dangling_nodes:
1936 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1937 elif inst.primary_node not in self.all_node_info:
1938 no_node_instances.append(inst.name)
1943 utils.CommaJoin(dangling_instances.get(node.name,
1945 for node in dangling_nodes]
1947 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1949 "the following nodes (and their instances) belong to a non"
1950 " existing group: %s", utils.CommaJoin(pretty_dangling))
1952 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1954 "the following instances have a non-existing primary-node:"
1955 " %s", utils.CommaJoin(no_node_instances))
1960 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1961 """Verifies the status of a node group.
1964 HPATH = "cluster-verify"
1965 HTYPE = constants.HTYPE_CLUSTER
1968 _HOOKS_INDENT_RE = re.compile("^", re.M)
1970 class NodeImage(object):
1971 """A class representing the logical and physical status of a node.
1974 @ivar name: the node name to which this object refers
1975 @ivar volumes: a structure as returned from
1976 L{ganeti.backend.GetVolumeList} (runtime)
1977 @ivar instances: a list of running instances (runtime)
1978 @ivar pinst: list of configured primary instances (config)
1979 @ivar sinst: list of configured secondary instances (config)
1980 @ivar sbp: dictionary of {primary-node: list of instances} for all
1981 instances for which this node is secondary (config)
1982 @ivar mfree: free memory, as reported by hypervisor (runtime)
1983 @ivar dfree: free disk, as reported by the node (runtime)
1984 @ivar offline: the offline status (config)
1985 @type rpc_fail: boolean
1986 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1987 not whether the individual keys were correct) (runtime)
1988 @type lvm_fail: boolean
1989 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1990 @type hyp_fail: boolean
1991 @ivar hyp_fail: whether the RPC call didn't return the instance list
1992 @type ghost: boolean
1993 @ivar ghost: whether this is a known node or not (config)
1994 @type os_fail: boolean
1995 @ivar os_fail: whether the RPC call didn't return valid OS data
1997 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1998 @type vm_capable: boolean
1999 @ivar vm_capable: whether the node can host instances
2002 def __init__(self, offline=False, name=None, vm_capable=True):
2011 self.offline = offline
2012 self.vm_capable = vm_capable
2013 self.rpc_fail = False
2014 self.lvm_fail = False
2015 self.hyp_fail = False
2017 self.os_fail = False
2020 def ExpandNames(self):
2021 # This raises errors.OpPrereqError on its own:
2022 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2024 # Get instances in node group; this is unsafe and needs verification later
2025 inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
2027 self.needed_locks = {
2028 locking.LEVEL_INSTANCE: inst_names,
2029 locking.LEVEL_NODEGROUP: [self.group_uuid],
2030 locking.LEVEL_NODE: [],
2033 self.share_locks = _ShareAll()
2035 def DeclareLocks(self, level):
2036 if level == locking.LEVEL_NODE:
2037 # Get members of node group; this is unsafe and needs verification later
2038 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2040 all_inst_info = self.cfg.GetAllInstancesInfo()
2042 # In Exec(), we warn about mirrored instances that have primary and
2043 # secondary living in separate node groups. To fully verify that
2044 # volumes for these instances are healthy, we will need to do an
2045 # extra call to their secondaries. We ensure here those nodes will
2047 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2048 # Important: access only the instances whose lock is owned
2049 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2050 nodes.update(all_inst_info[inst].secondary_nodes)
2052 self.needed_locks[locking.LEVEL_NODE] = nodes
2054 def CheckPrereq(self):
2055 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2056 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2058 group_nodes = set(self.group_info.members)
2059 group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
2062 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2064 unlocked_instances = \
2065 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2068 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2069 utils.CommaJoin(unlocked_nodes))
2071 if unlocked_instances:
2072 raise errors.OpPrereqError("Missing lock for instances: %s" %
2073 utils.CommaJoin(unlocked_instances))
2075 self.all_node_info = self.cfg.GetAllNodesInfo()
2076 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2078 self.my_node_names = utils.NiceSort(group_nodes)
2079 self.my_inst_names = utils.NiceSort(group_instances)
2081 self.my_node_info = dict((name, self.all_node_info[name])
2082 for name in self.my_node_names)
2084 self.my_inst_info = dict((name, self.all_inst_info[name])
2085 for name in self.my_inst_names)
2087 # We detect here the nodes that will need the extra RPC calls for verifying
2088 # split LV volumes; they should be locked.
2089 extra_lv_nodes = set()
2091 for inst in self.my_inst_info.values():
2092 if inst.disk_template in constants.DTS_INT_MIRROR:
2093 group = self.my_node_info[inst.primary_node].group
2094 for nname in inst.secondary_nodes:
2095 if self.all_node_info[nname].group != group:
2096 extra_lv_nodes.add(nname)
2098 unlocked_lv_nodes = \
2099 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2101 if unlocked_lv_nodes:
2102 raise errors.OpPrereqError("these nodes could be locked: %s" %
2103 utils.CommaJoin(unlocked_lv_nodes))
2104 self.extra_lv_nodes = list(extra_lv_nodes)
2106 def _VerifyNode(self, ninfo, nresult):
2107 """Perform some basic validation on data returned from a node.
2109 - check the result data structure is well formed and has all the
2111 - check ganeti version
2113 @type ninfo: L{objects.Node}
2114 @param ninfo: the node to check
2115 @param nresult: the results from the node
2117 @return: whether overall this call was successful (and we can expect
2118 reasonable values in the respose)
2122 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2124 # main result, nresult should be a non-empty dict
2125 test = not nresult or not isinstance(nresult, dict)
2126 _ErrorIf(test, constants.CV_ENODERPC, node,
2127 "unable to verify node: no data returned")
2131 # compares ganeti version
2132 local_version = constants.PROTOCOL_VERSION
2133 remote_version = nresult.get("version", None)
2134 test = not (remote_version and
2135 isinstance(remote_version, (list, tuple)) and
2136 len(remote_version) == 2)
2137 _ErrorIf(test, constants.CV_ENODERPC, node,
2138 "connection to node returned invalid data")
2142 test = local_version != remote_version[0]
2143 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2144 "incompatible protocol versions: master %s,"
2145 " node %s", local_version, remote_version[0])
2149 # node seems compatible, we can actually try to look into its results
2151 # full package version
2152 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2153 constants.CV_ENODEVERSION, node,
2154 "software version mismatch: master %s, node %s",
2155 constants.RELEASE_VERSION, remote_version[1],
2156 code=self.ETYPE_WARNING)
2158 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2159 if ninfo.vm_capable and isinstance(hyp_result, dict):
2160 for hv_name, hv_result in hyp_result.iteritems():
2161 test = hv_result is not None
2162 _ErrorIf(test, constants.CV_ENODEHV, node,
2163 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2165 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2166 if ninfo.vm_capable and isinstance(hvp_result, list):
2167 for item, hv_name, hv_result in hvp_result:
2168 _ErrorIf(True, constants.CV_ENODEHV, node,
2169 "hypervisor %s parameter verify failure (source %s): %s",
2170 hv_name, item, hv_result)
2172 test = nresult.get(constants.NV_NODESETUP,
2173 ["Missing NODESETUP results"])
2174 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2179 def _VerifyNodeTime(self, ninfo, nresult,
2180 nvinfo_starttime, nvinfo_endtime):
2181 """Check the node time.
2183 @type ninfo: L{objects.Node}
2184 @param ninfo: the node to check
2185 @param nresult: the remote results for the node
2186 @param nvinfo_starttime: the start time of the RPC call
2187 @param nvinfo_endtime: the end time of the RPC call
2191 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2193 ntime = nresult.get(constants.NV_TIME, None)
2195 ntime_merged = utils.MergeTime(ntime)
2196 except (ValueError, TypeError):
2197 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2200 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2201 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2202 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2203 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2207 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2208 "Node time diverges by at least %s from master node time",
2211 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2212 """Check the node LVM results.
2214 @type ninfo: L{objects.Node}
2215 @param ninfo: the node to check
2216 @param nresult: the remote results for the node
2217 @param vg_name: the configured VG name
2224 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2226 # checks vg existence and size > 20G
2227 vglist = nresult.get(constants.NV_VGLIST, None)
2229 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2231 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2232 constants.MIN_VG_SIZE)
2233 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2236 pvlist = nresult.get(constants.NV_PVLIST, None)
2237 test = pvlist is None
2238 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2240 # check that ':' is not present in PV names, since it's a
2241 # special character for lvcreate (denotes the range of PEs to
2243 for _, pvname, owner_vg in pvlist:
2244 test = ":" in pvname
2245 _ErrorIf(test, constants.CV_ENODELVM, node,
2246 "Invalid character ':' in PV '%s' of VG '%s'",
2249 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2250 """Check the node bridges.
2252 @type ninfo: L{objects.Node}
2253 @param ninfo: the node to check
2254 @param nresult: the remote results for the node
2255 @param bridges: the expected list of bridges
2262 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2264 missing = nresult.get(constants.NV_BRIDGES, None)
2265 test = not isinstance(missing, list)
2266 _ErrorIf(test, constants.CV_ENODENET, node,
2267 "did not return valid bridge information")
2269 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2270 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2272 def _VerifyNodeUserScripts(self, ninfo, nresult):
2273 """Check the results of user scripts presence and executability on the node
2275 @type ninfo: L{objects.Node}
2276 @param ninfo: the node to check
2277 @param nresult: the remote results for the node
2282 test = not constants.NV_USERSCRIPTS in nresult
2283 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2284 "did not return user scripts information")
2286 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2288 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2289 "user scripts not present or not executable: %s" %
2290 utils.CommaJoin(sorted(broken_scripts)))
2292 def _VerifyNodeNetwork(self, ninfo, nresult):
2293 """Check the node network connectivity results.
2295 @type ninfo: L{objects.Node}
2296 @param ninfo: the node to check
2297 @param nresult: the remote results for the node
2301 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2303 test = constants.NV_NODELIST not in nresult
2304 _ErrorIf(test, constants.CV_ENODESSH, node,
2305 "node hasn't returned node ssh connectivity data")
2307 if nresult[constants.NV_NODELIST]:
2308 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2309 _ErrorIf(True, constants.CV_ENODESSH, node,
2310 "ssh communication with node '%s': %s", a_node, a_msg)
2312 test = constants.NV_NODENETTEST not in nresult
2313 _ErrorIf(test, constants.CV_ENODENET, node,
2314 "node hasn't returned node tcp connectivity data")
2316 if nresult[constants.NV_NODENETTEST]:
2317 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2319 _ErrorIf(True, constants.CV_ENODENET, node,
2320 "tcp communication with node '%s': %s",
2321 anode, nresult[constants.NV_NODENETTEST][anode])
2323 test = constants.NV_MASTERIP not in nresult
2324 _ErrorIf(test, constants.CV_ENODENET, node,
2325 "node hasn't returned node master IP reachability data")
2327 if not nresult[constants.NV_MASTERIP]:
2328 if node == self.master_node:
2329 msg = "the master node cannot reach the master IP (not configured?)"
2331 msg = "cannot reach the master IP"
2332 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2334 def _VerifyInstance(self, instance, instanceconfig, node_image,
2336 """Verify an instance.
2338 This function checks to see if the required block devices are
2339 available on the instance's node.
2342 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2343 node_current = instanceconfig.primary_node
2345 node_vol_should = {}
2346 instanceconfig.MapLVsByNode(node_vol_should)
2348 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2349 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2350 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, err)
2352 for node in node_vol_should:
2353 n_img = node_image[node]
2354 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2355 # ignore missing volumes on offline or broken nodes
2357 for volume in node_vol_should[node]:
2358 test = volume not in n_img.volumes
2359 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2360 "volume %s missing on node %s", volume, node)
2362 if instanceconfig.admin_state == constants.ADMINST_UP:
2363 pri_img = node_image[node_current]
2364 test = instance not in pri_img.instances and not pri_img.offline
2365 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2366 "instance not running on its primary node %s",
2369 diskdata = [(nname, success, status, idx)
2370 for (nname, disks) in diskstatus.items()
2371 for idx, (success, status) in enumerate(disks)]
2373 for nname, success, bdev_status, idx in diskdata:
2374 # the 'ghost node' construction in Exec() ensures that we have a
2376 snode = node_image[nname]
2377 bad_snode = snode.ghost or snode.offline
2378 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2379 not success and not bad_snode,
2380 constants.CV_EINSTANCEFAULTYDISK, instance,
2381 "couldn't retrieve status for disk/%s on %s: %s",
2382 idx, nname, bdev_status)
2383 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2384 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2385 constants.CV_EINSTANCEFAULTYDISK, instance,
2386 "disk/%s on %s is faulty", idx, nname)
2388 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2389 """Verify if there are any unknown volumes in the cluster.
2391 The .os, .swap and backup volumes are ignored. All other volumes are
2392 reported as unknown.
2394 @type reserved: L{ganeti.utils.FieldSet}
2395 @param reserved: a FieldSet of reserved volume names
2398 for node, n_img in node_image.items():
2399 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2400 # skip non-healthy nodes
2402 for volume in n_img.volumes:
2403 test = ((node not in node_vol_should or
2404 volume not in node_vol_should[node]) and
2405 not reserved.Matches(volume))
2406 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2407 "volume %s is unknown", volume)
2409 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2410 """Verify N+1 Memory Resilience.
2412 Check that if one single node dies we can still start all the
2413 instances it was primary for.
2416 cluster_info = self.cfg.GetClusterInfo()
2417 for node, n_img in node_image.items():
2418 # This code checks that every node which is now listed as
2419 # secondary has enough memory to host all instances it is
2420 # supposed to should a single other node in the cluster fail.
2421 # FIXME: not ready for failover to an arbitrary node
2422 # FIXME: does not support file-backed instances
2423 # WARNING: we currently take into account down instances as well
2424 # as up ones, considering that even if they're down someone
2425 # might want to start them even in the event of a node failure.
2427 # we're skipping offline nodes from the N+1 warning, since
2428 # most likely we don't have good memory infromation from them;
2429 # we already list instances living on such nodes, and that's
2432 #TODO(dynmem): use MINMEM for checking
2433 #TODO(dynmem): also consider ballooning out other instances
2434 for prinode, instances in n_img.sbp.items():
2436 for instance in instances:
2437 bep = cluster_info.FillBE(instance_cfg[instance])
2438 if bep[constants.BE_AUTO_BALANCE]:
2439 needed_mem += bep[constants.BE_MAXMEM]
2440 test = n_img.mfree < needed_mem
2441 self._ErrorIf(test, constants.CV_ENODEN1, node,
2442 "not enough memory to accomodate instance failovers"
2443 " should node %s fail (%dMiB needed, %dMiB available)",
2444 prinode, needed_mem, n_img.mfree)
2447 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2448 (files_all, files_opt, files_mc, files_vm)):
2449 """Verifies file checksums collected from all nodes.
2451 @param errorif: Callback for reporting errors
2452 @param nodeinfo: List of L{objects.Node} objects
2453 @param master_node: Name of master node
2454 @param all_nvinfo: RPC results
2457 # Define functions determining which nodes to consider for a file
2460 (files_mc, lambda node: (node.master_candidate or
2461 node.name == master_node)),
2462 (files_vm, lambda node: node.vm_capable),
2465 # Build mapping from filename to list of nodes which should have the file
2467 for (files, fn) in files2nodefn:
2469 filenodes = nodeinfo
2471 filenodes = filter(fn, nodeinfo)
2472 nodefiles.update((filename,
2473 frozenset(map(operator.attrgetter("name"), filenodes)))
2474 for filename in files)
2476 assert set(nodefiles) == (files_all | files_mc | files_vm)
2478 fileinfo = dict((filename, {}) for filename in nodefiles)
2479 ignore_nodes = set()
2481 for node in nodeinfo:
2483 ignore_nodes.add(node.name)
2486 nresult = all_nvinfo[node.name]
2488 if nresult.fail_msg or not nresult.payload:
2491 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2493 test = not (node_files and isinstance(node_files, dict))
2494 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2495 "Node did not return file checksum data")
2497 ignore_nodes.add(node.name)
2500 # Build per-checksum mapping from filename to nodes having it
2501 for (filename, checksum) in node_files.items():
2502 assert filename in nodefiles
2503 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2505 for (filename, checksums) in fileinfo.items():
2506 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2508 # Nodes having the file
2509 with_file = frozenset(node_name
2510 for nodes in fileinfo[filename].values()
2511 for node_name in nodes) - ignore_nodes
2513 expected_nodes = nodefiles[filename] - ignore_nodes
2515 # Nodes missing file
2516 missing_file = expected_nodes - with_file
2518 if filename in files_opt:
2520 errorif(missing_file and missing_file != expected_nodes,
2521 constants.CV_ECLUSTERFILECHECK, None,
2522 "File %s is optional, but it must exist on all or no"
2523 " nodes (not found on %s)",
2524 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2526 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2527 "File %s is missing from node(s) %s", filename,
2528 utils.CommaJoin(utils.NiceSort(missing_file)))
2530 # Warn if a node has a file it shouldn't
2531 unexpected = with_file - expected_nodes
2533 constants.CV_ECLUSTERFILECHECK, None,
2534 "File %s should not exist on node(s) %s",
2535 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2537 # See if there are multiple versions of the file
2538 test = len(checksums) > 1
2540 variants = ["variant %s on %s" %
2541 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2542 for (idx, (checksum, nodes)) in
2543 enumerate(sorted(checksums.items()))]
2547 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2548 "File %s found with %s different checksums (%s)",
2549 filename, len(checksums), "; ".join(variants))
2551 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2553 """Verifies and the node DRBD status.
2555 @type ninfo: L{objects.Node}
2556 @param ninfo: the node to check
2557 @param nresult: the remote results for the node
2558 @param instanceinfo: the dict of instances
2559 @param drbd_helper: the configured DRBD usermode helper
2560 @param drbd_map: the DRBD map as returned by
2561 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2565 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2568 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2569 test = (helper_result == None)
2570 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2571 "no drbd usermode helper returned")
2573 status, payload = helper_result
2575 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2576 "drbd usermode helper check unsuccessful: %s", payload)
2577 test = status and (payload != drbd_helper)
2578 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2579 "wrong drbd usermode helper: %s", payload)
2581 # compute the DRBD minors
2583 for minor, instance in drbd_map[node].items():
2584 test = instance not in instanceinfo
2585 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2586 "ghost instance '%s' in temporary DRBD map", instance)
2587 # ghost instance should not be running, but otherwise we
2588 # don't give double warnings (both ghost instance and
2589 # unallocated minor in use)
2591 node_drbd[minor] = (instance, False)
2593 instance = instanceinfo[instance]
2594 node_drbd[minor] = (instance.name,
2595 instance.admin_state == constants.ADMINST_UP)
2597 # and now check them
2598 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2599 test = not isinstance(used_minors, (tuple, list))
2600 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2601 "cannot parse drbd status file: %s", str(used_minors))
2603 # we cannot check drbd status
2606 for minor, (iname, must_exist) in node_drbd.items():
2607 test = minor not in used_minors and must_exist
2608 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2609 "drbd minor %d of instance %s is not active", minor, iname)
2610 for minor in used_minors:
2611 test = minor not in node_drbd
2612 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2613 "unallocated drbd minor %d is in use", minor)
2615 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2616 """Builds the node OS structures.
2618 @type ninfo: L{objects.Node}
2619 @param ninfo: the node to check
2620 @param nresult: the remote results for the node
2621 @param nimg: the node image object
2625 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2627 remote_os = nresult.get(constants.NV_OSLIST, None)
2628 test = (not isinstance(remote_os, list) or
2629 not compat.all(isinstance(v, list) and len(v) == 7
2630 for v in remote_os))
2632 _ErrorIf(test, constants.CV_ENODEOS, node,
2633 "node hasn't returned valid OS data")
2642 for (name, os_path, status, diagnose,
2643 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2645 if name not in os_dict:
2648 # parameters is a list of lists instead of list of tuples due to
2649 # JSON lacking a real tuple type, fix it:
2650 parameters = [tuple(v) for v in parameters]
2651 os_dict[name].append((os_path, status, diagnose,
2652 set(variants), set(parameters), set(api_ver)))
2654 nimg.oslist = os_dict
2656 def _VerifyNodeOS(self, ninfo, nimg, base):
2657 """Verifies the node OS list.
2659 @type ninfo: L{objects.Node}
2660 @param ninfo: the node to check
2661 @param nimg: the node image object
2662 @param base: the 'template' node we match against (e.g. from the master)
2666 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2668 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2670 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2671 for os_name, os_data in nimg.oslist.items():
2672 assert os_data, "Empty OS status for OS %s?!" % os_name
2673 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2674 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2675 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2676 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2677 "OS '%s' has multiple entries (first one shadows the rest): %s",
2678 os_name, utils.CommaJoin([v[0] for v in os_data]))
2679 # comparisons with the 'base' image
2680 test = os_name not in base.oslist
2681 _ErrorIf(test, constants.CV_ENODEOS, node,
2682 "Extra OS %s not present on reference node (%s)",
2686 assert base.oslist[os_name], "Base node has empty OS status?"
2687 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2689 # base OS is invalid, skipping
2691 for kind, a, b in [("API version", f_api, b_api),
2692 ("variants list", f_var, b_var),
2693 ("parameters", beautify_params(f_param),
2694 beautify_params(b_param))]:
2695 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2696 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2697 kind, os_name, base.name,
2698 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2700 # check any missing OSes
2701 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2702 _ErrorIf(missing, constants.CV_ENODEOS, node,
2703 "OSes present on reference node %s but missing on this node: %s",
2704 base.name, utils.CommaJoin(missing))
2706 def _VerifyOob(self, ninfo, nresult):
2707 """Verifies out of band functionality of a node.
2709 @type ninfo: L{objects.Node}
2710 @param ninfo: the node to check
2711 @param nresult: the remote results for the node
2715 # We just have to verify the paths on master and/or master candidates
2716 # as the oob helper is invoked on the master
2717 if ((ninfo.master_candidate or ninfo.master_capable) and
2718 constants.NV_OOB_PATHS in nresult):
2719 for path_result in nresult[constants.NV_OOB_PATHS]:
2720 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2722 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2723 """Verifies and updates the node volume data.
2725 This function will update a L{NodeImage}'s internal structures
2726 with data from the remote call.
2728 @type ninfo: L{objects.Node}
2729 @param ninfo: the node to check
2730 @param nresult: the remote results for the node
2731 @param nimg: the node image object
2732 @param vg_name: the configured VG name
2736 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2738 nimg.lvm_fail = True
2739 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2742 elif isinstance(lvdata, basestring):
2743 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2744 utils.SafeEncode(lvdata))
2745 elif not isinstance(lvdata, dict):
2746 _ErrorIf(True, constants.CV_ENODELVM, node,
2747 "rpc call to node failed (lvlist)")
2749 nimg.volumes = lvdata
2750 nimg.lvm_fail = False
2752 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2753 """Verifies and updates the node instance list.
2755 If the listing was successful, then updates this node's instance
2756 list. Otherwise, it marks the RPC call as failed for the instance
2759 @type ninfo: L{objects.Node}
2760 @param ninfo: the node to check
2761 @param nresult: the remote results for the node
2762 @param nimg: the node image object
2765 idata = nresult.get(constants.NV_INSTANCELIST, None)
2766 test = not isinstance(idata, list)
2767 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2768 "rpc call to node failed (instancelist): %s",
2769 utils.SafeEncode(str(idata)))
2771 nimg.hyp_fail = True
2773 nimg.instances = idata
2775 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2776 """Verifies and computes a node information map
2778 @type ninfo: L{objects.Node}
2779 @param ninfo: the node to check
2780 @param nresult: the remote results for the node
2781 @param nimg: the node image object
2782 @param vg_name: the configured VG name
2786 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2788 # try to read free memory (from the hypervisor)
2789 hv_info = nresult.get(constants.NV_HVINFO, None)
2790 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2791 _ErrorIf(test, constants.CV_ENODEHV, node,
2792 "rpc call to node failed (hvinfo)")
2795 nimg.mfree = int(hv_info["memory_free"])
2796 except (ValueError, TypeError):
2797 _ErrorIf(True, constants.CV_ENODERPC, node,
2798 "node returned invalid nodeinfo, check hypervisor")
2800 # FIXME: devise a free space model for file based instances as well
2801 if vg_name is not None:
2802 test = (constants.NV_VGLIST not in nresult or
2803 vg_name not in nresult[constants.NV_VGLIST])
2804 _ErrorIf(test, constants.CV_ENODELVM, node,
2805 "node didn't return data for the volume group '%s'"
2806 " - it is either missing or broken", vg_name)
2809 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2810 except (ValueError, TypeError):
2811 _ErrorIf(True, constants.CV_ENODERPC, node,
2812 "node returned invalid LVM info, check LVM status")
2814 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2815 """Gets per-disk status information for all instances.
2817 @type nodelist: list of strings
2818 @param nodelist: Node names
2819 @type node_image: dict of (name, L{objects.Node})
2820 @param node_image: Node objects
2821 @type instanceinfo: dict of (name, L{objects.Instance})
2822 @param instanceinfo: Instance objects
2823 @rtype: {instance: {node: [(succes, payload)]}}
2824 @return: a dictionary of per-instance dictionaries with nodes as
2825 keys and disk information as values; the disk information is a
2826 list of tuples (success, payload)
2829 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2832 node_disks_devonly = {}
2833 diskless_instances = set()
2834 diskless = constants.DT_DISKLESS
2836 for nname in nodelist:
2837 node_instances = list(itertools.chain(node_image[nname].pinst,
2838 node_image[nname].sinst))
2839 diskless_instances.update(inst for inst in node_instances
2840 if instanceinfo[inst].disk_template == diskless)
2841 disks = [(inst, disk)
2842 for inst in node_instances
2843 for disk in instanceinfo[inst].disks]
2846 # No need to collect data
2849 node_disks[nname] = disks
2851 # Creating copies as SetDiskID below will modify the objects and that can
2852 # lead to incorrect data returned from nodes
2853 devonly = [dev.Copy() for (_, dev) in disks]
2856 self.cfg.SetDiskID(dev, nname)
2858 node_disks_devonly[nname] = devonly
2860 assert len(node_disks) == len(node_disks_devonly)
2862 # Collect data from all nodes with disks
2863 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2866 assert len(result) == len(node_disks)
2870 for (nname, nres) in result.items():
2871 disks = node_disks[nname]
2874 # No data from this node
2875 data = len(disks) * [(False, "node offline")]
2878 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2879 "while getting disk information: %s", msg)
2881 # No data from this node
2882 data = len(disks) * [(False, msg)]
2885 for idx, i in enumerate(nres.payload):
2886 if isinstance(i, (tuple, list)) and len(i) == 2:
2889 logging.warning("Invalid result from node %s, entry %d: %s",
2891 data.append((False, "Invalid result from the remote node"))
2893 for ((inst, _), status) in zip(disks, data):
2894 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2896 # Add empty entries for diskless instances.
2897 for inst in diskless_instances:
2898 assert inst not in instdisk
2901 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2902 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2903 compat.all(isinstance(s, (tuple, list)) and
2904 len(s) == 2 for s in statuses)
2905 for inst, nnames in instdisk.items()
2906 for nname, statuses in nnames.items())
2907 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2912 def _SshNodeSelector(group_uuid, all_nodes):
2913 """Create endless iterators for all potential SSH check hosts.
2916 nodes = [node for node in all_nodes
2917 if (node.group != group_uuid and
2919 keyfunc = operator.attrgetter("group")
2921 return map(itertools.cycle,
2922 [sorted(map(operator.attrgetter("name"), names))
2923 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2927 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2928 """Choose which nodes should talk to which other nodes.
2930 We will make nodes contact all nodes in their group, and one node from
2933 @warning: This algorithm has a known issue if one node group is much
2934 smaller than others (e.g. just one node). In such a case all other
2935 nodes will talk to the single node.
2938 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2939 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2941 return (online_nodes,
2942 dict((name, sorted([i.next() for i in sel]))
2943 for name in online_nodes))
2945 def BuildHooksEnv(self):
2948 Cluster-Verify hooks just ran in the post phase and their failure makes
2949 the output be logged in the verify output and the verification to fail.
2953 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2956 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2957 for node in self.my_node_info.values())
2961 def BuildHooksNodes(self):
2962 """Build hooks nodes.
2965 return ([], self.my_node_names)
2967 def Exec(self, feedback_fn):
2968 """Verify integrity of the node group, performing various test on nodes.
2971 # This method has too many local variables. pylint: disable=R0914
2972 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2974 if not self.my_node_names:
2976 feedback_fn("* Empty node group, skipping verification")
2980 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2981 verbose = self.op.verbose
2982 self._feedback_fn = feedback_fn
2984 vg_name = self.cfg.GetVGName()
2985 drbd_helper = self.cfg.GetDRBDHelper()
2986 cluster = self.cfg.GetClusterInfo()
2987 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2988 hypervisors = cluster.enabled_hypervisors
2989 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2991 i_non_redundant = [] # Non redundant instances
2992 i_non_a_balanced = [] # Non auto-balanced instances
2993 i_offline = 0 # Count of offline instances
2994 n_offline = 0 # Count of offline nodes
2995 n_drained = 0 # Count of nodes being drained
2996 node_vol_should = {}
2998 # FIXME: verify OS list
3001 filemap = _ComputeAncillaryFiles(cluster, False)
3003 # do local checksums
3004 master_node = self.master_node = self.cfg.GetMasterNode()
3005 master_ip = self.cfg.GetMasterIP()
3007 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3010 if self.cfg.GetUseExternalMipScript():
3011 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3013 node_verify_param = {
3014 constants.NV_FILELIST:
3015 utils.UniqueSequence(filename
3016 for files in filemap
3017 for filename in files),
3018 constants.NV_NODELIST:
3019 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3020 self.all_node_info.values()),
3021 constants.NV_HYPERVISOR: hypervisors,
3022 constants.NV_HVPARAMS:
3023 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3024 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3025 for node in node_data_list
3026 if not node.offline],
3027 constants.NV_INSTANCELIST: hypervisors,
3028 constants.NV_VERSION: None,
3029 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3030 constants.NV_NODESETUP: None,
3031 constants.NV_TIME: None,
3032 constants.NV_MASTERIP: (master_node, master_ip),
3033 constants.NV_OSLIST: None,
3034 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3035 constants.NV_USERSCRIPTS: user_scripts,
3038 if vg_name is not None:
3039 node_verify_param[constants.NV_VGLIST] = None
3040 node_verify_param[constants.NV_LVLIST] = vg_name
3041 node_verify_param[constants.NV_PVLIST] = [vg_name]
3042 node_verify_param[constants.NV_DRBDLIST] = None
3045 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3048 # FIXME: this needs to be changed per node-group, not cluster-wide
3050 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3051 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3052 bridges.add(default_nicpp[constants.NIC_LINK])
3053 for instance in self.my_inst_info.values():
3054 for nic in instance.nics:
3055 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3056 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3057 bridges.add(full_nic[constants.NIC_LINK])
3060 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3062 # Build our expected cluster state
3063 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3065 vm_capable=node.vm_capable))
3066 for node in node_data_list)
3070 for node in self.all_node_info.values():
3071 path = _SupportsOob(self.cfg, node)
3072 if path and path not in oob_paths:
3073 oob_paths.append(path)
3076 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3078 for instance in self.my_inst_names:
3079 inst_config = self.my_inst_info[instance]
3081 for nname in inst_config.all_nodes:
3082 if nname not in node_image:
3083 gnode = self.NodeImage(name=nname)
3084 gnode.ghost = (nname not in self.all_node_info)
3085 node_image[nname] = gnode
3087 inst_config.MapLVsByNode(node_vol_should)
3089 pnode = inst_config.primary_node
3090 node_image[pnode].pinst.append(instance)
3092 for snode in inst_config.secondary_nodes:
3093 nimg = node_image[snode]
3094 nimg.sinst.append(instance)
3095 if pnode not in nimg.sbp:
3096 nimg.sbp[pnode] = []
3097 nimg.sbp[pnode].append(instance)
3099 # At this point, we have the in-memory data structures complete,
3100 # except for the runtime information, which we'll gather next
3102 # Due to the way our RPC system works, exact response times cannot be
3103 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3104 # time before and after executing the request, we can at least have a time
3106 nvinfo_starttime = time.time()
3107 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3109 self.cfg.GetClusterName())
3110 nvinfo_endtime = time.time()
3112 if self.extra_lv_nodes and vg_name is not None:
3114 self.rpc.call_node_verify(self.extra_lv_nodes,
3115 {constants.NV_LVLIST: vg_name},
3116 self.cfg.GetClusterName())
3118 extra_lv_nvinfo = {}
3120 all_drbd_map = self.cfg.ComputeDRBDMap()
3122 feedback_fn("* Gathering disk information (%s nodes)" %
3123 len(self.my_node_names))
3124 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3127 feedback_fn("* Verifying configuration file consistency")
3129 # If not all nodes are being checked, we need to make sure the master node
3130 # and a non-checked vm_capable node are in the list.
3131 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3133 vf_nvinfo = all_nvinfo.copy()
3134 vf_node_info = list(self.my_node_info.values())
3135 additional_nodes = []
3136 if master_node not in self.my_node_info:
3137 additional_nodes.append(master_node)
3138 vf_node_info.append(self.all_node_info[master_node])
3139 # Add the first vm_capable node we find which is not included
3140 for node in absent_nodes:
3141 nodeinfo = self.all_node_info[node]
3142 if nodeinfo.vm_capable and not nodeinfo.offline:
3143 additional_nodes.append(node)
3144 vf_node_info.append(self.all_node_info[node])
3146 key = constants.NV_FILELIST
3147 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3148 {key: node_verify_param[key]},
3149 self.cfg.GetClusterName()))
3151 vf_nvinfo = all_nvinfo
3152 vf_node_info = self.my_node_info.values()
3154 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3156 feedback_fn("* Verifying node status")
3160 for node_i in node_data_list:
3162 nimg = node_image[node]
3166 feedback_fn("* Skipping offline node %s" % (node,))
3170 if node == master_node:
3172 elif node_i.master_candidate:
3173 ntype = "master candidate"
3174 elif node_i.drained:
3180 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3182 msg = all_nvinfo[node].fail_msg
3183 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3186 nimg.rpc_fail = True
3189 nresult = all_nvinfo[node].payload
3191 nimg.call_ok = self._VerifyNode(node_i, nresult)
3192 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3193 self._VerifyNodeNetwork(node_i, nresult)
3194 self._VerifyNodeUserScripts(node_i, nresult)
3195 self._VerifyOob(node_i, nresult)
3198 self._VerifyNodeLVM(node_i, nresult, vg_name)
3199 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3202 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3203 self._UpdateNodeInstances(node_i, nresult, nimg)
3204 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3205 self._UpdateNodeOS(node_i, nresult, nimg)
3207 if not nimg.os_fail:
3208 if refos_img is None:
3210 self._VerifyNodeOS(node_i, nimg, refos_img)
3211 self._VerifyNodeBridges(node_i, nresult, bridges)
3213 # Check whether all running instancies are primary for the node. (This
3214 # can no longer be done from _VerifyInstance below, since some of the
3215 # wrong instances could be from other node groups.)
3216 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3218 for inst in non_primary_inst:
3219 # FIXME: investigate best way to handle offline insts
3220 if inst.admin_state == constants.ADMINST_OFFLINE:
3222 feedback_fn("* Skipping offline instance %s" % inst.name)
3225 test = inst in self.all_inst_info
3226 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3227 "instance should not run on node %s", node_i.name)
3228 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3229 "node is running unknown instance %s", inst)
3231 for node, result in extra_lv_nvinfo.items():
3232 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3233 node_image[node], vg_name)
3235 feedback_fn("* Verifying instance status")
3236 for instance in self.my_inst_names:
3238 feedback_fn("* Verifying instance %s" % instance)
3239 inst_config = self.my_inst_info[instance]
3240 self._VerifyInstance(instance, inst_config, node_image,
3242 inst_nodes_offline = []
3244 pnode = inst_config.primary_node
3245 pnode_img = node_image[pnode]
3246 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3247 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3248 " primary node failed", instance)
3250 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3252 constants.CV_EINSTANCEBADNODE, instance,
3253 "instance is marked as running and lives on offline node %s",
3254 inst_config.primary_node)
3256 # If the instance is non-redundant we cannot survive losing its primary
3257 # node, so we are not N+1 compliant. On the other hand we have no disk
3258 # templates with more than one secondary so that situation is not well
3260 # FIXME: does not support file-backed instances
3261 if not inst_config.secondary_nodes:
3262 i_non_redundant.append(instance)
3264 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3265 constants.CV_EINSTANCELAYOUT,
3266 instance, "instance has multiple secondary nodes: %s",
3267 utils.CommaJoin(inst_config.secondary_nodes),
3268 code=self.ETYPE_WARNING)
3270 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3271 pnode = inst_config.primary_node
3272 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3273 instance_groups = {}
3275 for node in instance_nodes:
3276 instance_groups.setdefault(self.all_node_info[node].group,
3280 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3281 # Sort so that we always list the primary node first.
3282 for group, nodes in sorted(instance_groups.items(),
3283 key=lambda (_, nodes): pnode in nodes,
3286 self._ErrorIf(len(instance_groups) > 1,
3287 constants.CV_EINSTANCESPLITGROUPS,
3288 instance, "instance has primary and secondary nodes in"
3289 " different groups: %s", utils.CommaJoin(pretty_list),
3290 code=self.ETYPE_WARNING)
3292 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3293 i_non_a_balanced.append(instance)
3295 for snode in inst_config.secondary_nodes:
3296 s_img = node_image[snode]
3297 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3298 snode, "instance %s, connection to secondary node failed",
3302 inst_nodes_offline.append(snode)
3304 # warn that the instance lives on offline nodes
3305 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3306 "instance has offline secondary node(s) %s",
3307 utils.CommaJoin(inst_nodes_offline))
3308 # ... or ghost/non-vm_capable nodes
3309 for node in inst_config.all_nodes:
3310 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3311 instance, "instance lives on ghost node %s", node)
3312 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3313 instance, "instance lives on non-vm_capable node %s", node)
3315 feedback_fn("* Verifying orphan volumes")
3316 reserved = utils.FieldSet(*cluster.reserved_lvs)
3318 # We will get spurious "unknown volume" warnings if any node of this group
3319 # is secondary for an instance whose primary is in another group. To avoid
3320 # them, we find these instances and add their volumes to node_vol_should.
3321 for inst in self.all_inst_info.values():
3322 for secondary in inst.secondary_nodes:
3323 if (secondary in self.my_node_info
3324 and inst.name not in self.my_inst_info):
3325 inst.MapLVsByNode(node_vol_should)
3328 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3330 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3331 feedback_fn("* Verifying N+1 Memory redundancy")
3332 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3334 feedback_fn("* Other Notes")
3336 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3337 % len(i_non_redundant))
3339 if i_non_a_balanced:
3340 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3341 % len(i_non_a_balanced))
3344 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3347 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3350 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3354 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3355 """Analyze the post-hooks' result
3357 This method analyses the hook result, handles it, and sends some
3358 nicely-formatted feedback back to the user.
3360 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3361 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3362 @param hooks_results: the results of the multi-node hooks rpc call
3363 @param feedback_fn: function used send feedback back to the caller
3364 @param lu_result: previous Exec result
3365 @return: the new Exec result, based on the previous result
3369 # We only really run POST phase hooks, only for non-empty groups,
3370 # and are only interested in their results
3371 if not self.my_node_names:
3374 elif phase == constants.HOOKS_PHASE_POST:
3375 # Used to change hooks' output to proper indentation
3376 feedback_fn("* Hooks Results")
3377 assert hooks_results, "invalid result from hooks"
3379 for node_name in hooks_results:
3380 res = hooks_results[node_name]
3382 test = msg and not res.offline
3383 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3384 "Communication failure in hooks execution: %s", msg)
3385 if res.offline or msg:
3386 # No need to investigate payload if node is offline or gave
3389 for script, hkr, output in res.payload:
3390 test = hkr == constants.HKR_FAIL
3391 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3392 "Script %s failed, output:", script)
3394 output = self._HOOKS_INDENT_RE.sub(" ", output)
3395 feedback_fn("%s" % output)
3401 class LUClusterVerifyDisks(NoHooksLU):
3402 """Verifies the cluster disks status.
3407 def ExpandNames(self):
3408 self.share_locks = _ShareAll()
3409 self.needed_locks = {
3410 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3413 def Exec(self, feedback_fn):
3414 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3416 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3417 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3418 for group in group_names])
3421 class LUGroupVerifyDisks(NoHooksLU):
3422 """Verifies the status of all disks in a node group.
3427 def ExpandNames(self):
3428 # Raises errors.OpPrereqError on its own if group can't be found
3429 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3431 self.share_locks = _ShareAll()
3432 self.needed_locks = {
3433 locking.LEVEL_INSTANCE: [],
3434 locking.LEVEL_NODEGROUP: [],
3435 locking.LEVEL_NODE: [],
3438 def DeclareLocks(self, level):
3439 if level == locking.LEVEL_INSTANCE:
3440 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3442 # Lock instances optimistically, needs verification once node and group
3443 # locks have been acquired
3444 self.needed_locks[locking.LEVEL_INSTANCE] = \
3445 self.cfg.GetNodeGroupInstances(self.group_uuid)
3447 elif level == locking.LEVEL_NODEGROUP:
3448 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3450 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3451 set([self.group_uuid] +
3452 # Lock all groups used by instances optimistically; this requires
3453 # going via the node before it's locked, requiring verification
3456 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3457 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3459 elif level == locking.LEVEL_NODE:
3460 # This will only lock the nodes in the group to be verified which contain
3462 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3463 self._LockInstancesNodes()
3465 # Lock all nodes in group to be verified
3466 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3467 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3468 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3470 def CheckPrereq(self):
3471 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3472 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3473 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3475 assert self.group_uuid in owned_groups
3477 # Check if locked instances are still correct
3478 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3480 # Get instance information
3481 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3483 # Check if node groups for locked instances are still correct
3484 for (instance_name, inst) in self.instances.items():
3485 assert owned_nodes.issuperset(inst.all_nodes), \
3486 "Instance %s's nodes changed while we kept the lock" % instance_name
3488 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3491 assert self.group_uuid in inst_groups, \
3492 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3494 def Exec(self, feedback_fn):
3495 """Verify integrity of cluster disks.
3497 @rtype: tuple of three items
3498 @return: a tuple of (dict of node-to-node_error, list of instances
3499 which need activate-disks, dict of instance: (node, volume) for
3504 res_instances = set()
3507 nv_dict = _MapInstanceDisksToNodes([inst
3508 for inst in self.instances.values()
3509 if inst.admin_state == constants.ADMINST_UP])
3512 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3513 set(self.cfg.GetVmCapableNodeList()))
3515 node_lvs = self.rpc.call_lv_list(nodes, [])
3517 for (node, node_res) in node_lvs.items():
3518 if node_res.offline:
3521 msg = node_res.fail_msg
3523 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3524 res_nodes[node] = msg
3527 for lv_name, (_, _, lv_online) in node_res.payload.items():
3528 inst = nv_dict.pop((node, lv_name), None)
3529 if not (lv_online or inst is None):
3530 res_instances.add(inst)
3532 # any leftover items in nv_dict are missing LVs, let's arrange the data
3534 for key, inst in nv_dict.iteritems():
3535 res_missing.setdefault(inst, []).append(list(key))
3537 return (res_nodes, list(res_instances), res_missing)
3540 class LUClusterRepairDiskSizes(NoHooksLU):
3541 """Verifies the cluster disks sizes.
3546 def ExpandNames(self):
3547 if self.op.instances:
3548 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3549 self.needed_locks = {
3550 locking.LEVEL_NODE_RES: [],
3551 locking.LEVEL_INSTANCE: self.wanted_names,
3553 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3555 self.wanted_names = None
3556 self.needed_locks = {
3557 locking.LEVEL_NODE_RES: locking.ALL_SET,
3558 locking.LEVEL_INSTANCE: locking.ALL_SET,
3560 self.share_locks = {
3561 locking.LEVEL_NODE_RES: 1,
3562 locking.LEVEL_INSTANCE: 0,
3565 def DeclareLocks(self, level):
3566 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3567 self._LockInstancesNodes(primary_only=True, level=level)
3569 def CheckPrereq(self):
3570 """Check prerequisites.
3572 This only checks the optional instance list against the existing names.
3575 if self.wanted_names is None:
3576 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3578 self.wanted_instances = \
3579 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3581 def _EnsureChildSizes(self, disk):
3582 """Ensure children of the disk have the needed disk size.
3584 This is valid mainly for DRBD8 and fixes an issue where the
3585 children have smaller disk size.
3587 @param disk: an L{ganeti.objects.Disk} object
3590 if disk.dev_type == constants.LD_DRBD8:
3591 assert disk.children, "Empty children for DRBD8?"
3592 fchild = disk.children[0]
3593 mismatch = fchild.size < disk.size
3595 self.LogInfo("Child disk has size %d, parent %d, fixing",
3596 fchild.size, disk.size)
3597 fchild.size = disk.size
3599 # and we recurse on this child only, not on the metadev
3600 return self._EnsureChildSizes(fchild) or mismatch
3604 def Exec(self, feedback_fn):
3605 """Verify the size of cluster disks.
3608 # TODO: check child disks too
3609 # TODO: check differences in size between primary/secondary nodes
3611 for instance in self.wanted_instances:
3612 pnode = instance.primary_node
3613 if pnode not in per_node_disks:
3614 per_node_disks[pnode] = []
3615 for idx, disk in enumerate(instance.disks):
3616 per_node_disks[pnode].append((instance, idx, disk))
3618 assert not (frozenset(per_node_disks.keys()) -
3619 self.owned_locks(locking.LEVEL_NODE_RES)), \
3620 "Not owning correct locks"
3621 assert not self.owned_locks(locking.LEVEL_NODE)
3624 for node, dskl in per_node_disks.items():
3625 newl = [v[2].Copy() for v in dskl]
3627 self.cfg.SetDiskID(dsk, node)
3628 result = self.rpc.call_blockdev_getsize(node, newl)
3630 self.LogWarning("Failure in blockdev_getsize call to node"
3631 " %s, ignoring", node)
3633 if len(result.payload) != len(dskl):
3634 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3635 " result.payload=%s", node, len(dskl), result.payload)
3636 self.LogWarning("Invalid result from node %s, ignoring node results",
3639 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3641 self.LogWarning("Disk %d of instance %s did not return size"
3642 " information, ignoring", idx, instance.name)
3644 if not isinstance(size, (int, long)):
3645 self.LogWarning("Disk %d of instance %s did not return valid"
3646 " size information, ignoring", idx, instance.name)
3649 if size != disk.size:
3650 self.LogInfo("Disk %d of instance %s has mismatched size,"
3651 " correcting: recorded %d, actual %d", idx,
3652 instance.name, disk.size, size)
3654 self.cfg.Update(instance, feedback_fn)
3655 changed.append((instance.name, idx, size))
3656 if self._EnsureChildSizes(disk):
3657 self.cfg.Update(instance, feedback_fn)
3658 changed.append((instance.name, idx, disk.size))
3662 class LUClusterRename(LogicalUnit):
3663 """Rename the cluster.
3666 HPATH = "cluster-rename"
3667 HTYPE = constants.HTYPE_CLUSTER
3669 def BuildHooksEnv(self):
3674 "OP_TARGET": self.cfg.GetClusterName(),
3675 "NEW_NAME": self.op.name,
3678 def BuildHooksNodes(self):
3679 """Build hooks nodes.
3682 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3684 def CheckPrereq(self):
3685 """Verify that the passed name is a valid one.
3688 hostname = netutils.GetHostname(name=self.op.name,
3689 family=self.cfg.GetPrimaryIPFamily())
3691 new_name = hostname.name
3692 self.ip = new_ip = hostname.ip
3693 old_name = self.cfg.GetClusterName()
3694 old_ip = self.cfg.GetMasterIP()
3695 if new_name == old_name and new_ip == old_ip:
3696 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3697 " cluster has changed",
3699 if new_ip != old_ip:
3700 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3701 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3702 " reachable on the network" %
3703 new_ip, errors.ECODE_NOTUNIQUE)
3705 self.op.name = new_name
3707 def Exec(self, feedback_fn):
3708 """Rename the cluster.
3711 clustername = self.op.name
3714 # shutdown the master IP
3715 master_params = self.cfg.GetMasterNetworkParameters()
3716 ems = self.cfg.GetUseExternalMipScript()
3717 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3719 result.Raise("Could not disable the master role")
3722 cluster = self.cfg.GetClusterInfo()
3723 cluster.cluster_name = clustername
3724 cluster.master_ip = new_ip
3725 self.cfg.Update(cluster, feedback_fn)
3727 # update the known hosts file
3728 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3729 node_list = self.cfg.GetOnlineNodeList()
3731 node_list.remove(master_params.name)
3734 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3736 master_params.ip = new_ip
3737 result = self.rpc.call_node_activate_master_ip(master_params.name,
3739 msg = result.fail_msg
3741 self.LogWarning("Could not re-enable the master role on"
3742 " the master, please restart manually: %s", msg)
3747 def _ValidateNetmask(cfg, netmask):
3748 """Checks if a netmask is valid.
3750 @type cfg: L{config.ConfigWriter}
3751 @param cfg: The cluster configuration
3753 @param netmask: the netmask to be verified
3754 @raise errors.OpPrereqError: if the validation fails
3757 ip_family = cfg.GetPrimaryIPFamily()
3759 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3760 except errors.ProgrammerError:
3761 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3763 if not ipcls.ValidateNetmask(netmask):
3764 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3768 class LUClusterSetParams(LogicalUnit):
3769 """Change the parameters of the cluster.
3772 HPATH = "cluster-modify"
3773 HTYPE = constants.HTYPE_CLUSTER
3776 def CheckArguments(self):
3780 if self.op.uid_pool:
3781 uidpool.CheckUidPool(self.op.uid_pool)
3783 if self.op.add_uids:
3784 uidpool.CheckUidPool(self.op.add_uids)
3786 if self.op.remove_uids:
3787 uidpool.CheckUidPool(self.op.remove_uids)
3789 if self.op.master_netmask is not None:
3790 _ValidateNetmask(self.cfg, self.op.master_netmask)
3792 if self.op.diskparams:
3793 for dt_params in self.op.diskparams.values():
3794 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3796 def ExpandNames(self):
3797 # FIXME: in the future maybe other cluster params won't require checking on
3798 # all nodes to be modified.
3799 self.needed_locks = {
3800 locking.LEVEL_NODE: locking.ALL_SET,
3801 locking.LEVEL_INSTANCE: locking.ALL_SET,
3802 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3804 self.share_locks = {
3805 locking.LEVEL_NODE: 1,
3806 locking.LEVEL_INSTANCE: 1,
3807 locking.LEVEL_NODEGROUP: 1,
3810 def BuildHooksEnv(self):
3815 "OP_TARGET": self.cfg.GetClusterName(),
3816 "NEW_VG_NAME": self.op.vg_name,
3819 def BuildHooksNodes(self):
3820 """Build hooks nodes.
3823 mn = self.cfg.GetMasterNode()
3826 def CheckPrereq(self):
3827 """Check prerequisites.
3829 This checks whether the given params don't conflict and
3830 if the given volume group is valid.
3833 if self.op.vg_name is not None and not self.op.vg_name:
3834 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3835 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3836 " instances exist", errors.ECODE_INVAL)
3838 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3839 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3840 raise errors.OpPrereqError("Cannot disable drbd helper while"
3841 " drbd-based instances exist",
3844 node_list = self.owned_locks(locking.LEVEL_NODE)
3846 # if vg_name not None, checks given volume group on all nodes
3848 vglist = self.rpc.call_vg_list(node_list)
3849 for node in node_list:
3850 msg = vglist[node].fail_msg
3852 # ignoring down node
3853 self.LogWarning("Error while gathering data on node %s"
3854 " (ignoring node): %s", node, msg)
3856 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3858 constants.MIN_VG_SIZE)
3860 raise errors.OpPrereqError("Error on node '%s': %s" %
3861 (node, vgstatus), errors.ECODE_ENVIRON)
3863 if self.op.drbd_helper:
3864 # checks given drbd helper on all nodes
3865 helpers = self.rpc.call_drbd_helper(node_list)
3866 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3868 self.LogInfo("Not checking drbd helper on offline node %s", node)
3870 msg = helpers[node].fail_msg
3872 raise errors.OpPrereqError("Error checking drbd helper on node"
3873 " '%s': %s" % (node, msg),
3874 errors.ECODE_ENVIRON)
3875 node_helper = helpers[node].payload
3876 if node_helper != self.op.drbd_helper:
3877 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3878 (node, node_helper), errors.ECODE_ENVIRON)
3880 self.cluster = cluster = self.cfg.GetClusterInfo()
3881 # validate params changes
3882 if self.op.beparams:
3883 objects.UpgradeBeParams(self.op.beparams)
3884 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3885 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3887 if self.op.ndparams:
3888 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3889 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3891 # TODO: we need a more general way to handle resetting
3892 # cluster-level parameters to default values
3893 if self.new_ndparams["oob_program"] == "":
3894 self.new_ndparams["oob_program"] = \
3895 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3897 if self.op.hv_state:
3898 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3899 self.cluster.hv_state_static)
3900 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3901 for hv, values in new_hv_state.items())
3903 if self.op.disk_state:
3904 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3905 self.cluster.disk_state_static)
3906 self.new_disk_state = \
3907 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3908 for name, values in svalues.items()))
3909 for storage, svalues in new_disk_state.items())
3912 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3915 all_instances = self.cfg.GetAllInstancesInfo().values()
3917 for group in self.cfg.GetAllNodeGroupsInfo().values():
3918 instances = frozenset([inst for inst in all_instances
3919 if compat.any(node in group.members
3920 for node in inst.all_nodes)])
3921 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3922 new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3924 new_ipolicy, instances)
3926 violations.update(new)
3929 self.LogWarning("After the ipolicy change the following instances"
3930 " violate them: %s",
3931 utils.CommaJoin(violations))
3933 if self.op.nicparams:
3934 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3935 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3936 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3939 # check all instances for consistency
3940 for instance in self.cfg.GetAllInstancesInfo().values():
3941 for nic_idx, nic in enumerate(instance.nics):
3942 params_copy = copy.deepcopy(nic.nicparams)
3943 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3945 # check parameter syntax
3947 objects.NIC.CheckParameterSyntax(params_filled)
3948 except errors.ConfigurationError, err:
3949 nic_errors.append("Instance %s, nic/%d: %s" %
3950 (instance.name, nic_idx, err))
3952 # if we're moving instances to routed, check that they have an ip
3953 target_mode = params_filled[constants.NIC_MODE]
3954 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3955 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3956 " address" % (instance.name, nic_idx))
3958 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3959 "\n".join(nic_errors))
3961 # hypervisor list/parameters
3962 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3963 if self.op.hvparams:
3964 for hv_name, hv_dict in self.op.hvparams.items():
3965 if hv_name not in self.new_hvparams:
3966 self.new_hvparams[hv_name] = hv_dict
3968 self.new_hvparams[hv_name].update(hv_dict)
3970 # disk template parameters
3971 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3972 if self.op.diskparams:
3973 for dt_name, dt_params in self.op.diskparams.items():
3974 if dt_name not in self.op.diskparams:
3975 self.new_diskparams[dt_name] = dt_params
3977 self.new_diskparams[dt_name].update(dt_params)
3979 # os hypervisor parameters
3980 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3982 for os_name, hvs in self.op.os_hvp.items():
3983 if os_name not in self.new_os_hvp:
3984 self.new_os_hvp[os_name] = hvs
3986 for hv_name, hv_dict in hvs.items():
3987 if hv_name not in self.new_os_hvp[os_name]:
3988 self.new_os_hvp[os_name][hv_name] = hv_dict
3990 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3993 self.new_osp = objects.FillDict(cluster.osparams, {})
3994 if self.op.osparams:
3995 for os_name, osp in self.op.osparams.items():
3996 if os_name not in self.new_osp:
3997 self.new_osp[os_name] = {}
3999 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4002 if not self.new_osp[os_name]:
4003 # we removed all parameters
4004 del self.new_osp[os_name]
4006 # check the parameter validity (remote check)
4007 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4008 os_name, self.new_osp[os_name])
4010 # changes to the hypervisor list
4011 if self.op.enabled_hypervisors is not None:
4012 self.hv_list = self.op.enabled_hypervisors
4013 for hv in self.hv_list:
4014 # if the hypervisor doesn't already exist in the cluster
4015 # hvparams, we initialize it to empty, and then (in both
4016 # cases) we make sure to fill the defaults, as we might not
4017 # have a complete defaults list if the hypervisor wasn't
4019 if hv not in new_hvp:
4021 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4022 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4024 self.hv_list = cluster.enabled_hypervisors
4026 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4027 # either the enabled list has changed, or the parameters have, validate
4028 for hv_name, hv_params in self.new_hvparams.items():
4029 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4030 (self.op.enabled_hypervisors and
4031 hv_name in self.op.enabled_hypervisors)):
4032 # either this is a new hypervisor, or its parameters have changed
4033 hv_class = hypervisor.GetHypervisor(hv_name)
4034 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4035 hv_class.CheckParameterSyntax(hv_params)
4036 _CheckHVParams(self, node_list, hv_name, hv_params)
4039 # no need to check any newly-enabled hypervisors, since the
4040 # defaults have already been checked in the above code-block
4041 for os_name, os_hvp in self.new_os_hvp.items():
4042 for hv_name, hv_params in os_hvp.items():
4043 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4044 # we need to fill in the new os_hvp on top of the actual hv_p
4045 cluster_defaults = self.new_hvparams.get(hv_name, {})
4046 new_osp = objects.FillDict(cluster_defaults, hv_params)
4047 hv_class = hypervisor.GetHypervisor(hv_name)
4048 hv_class.CheckParameterSyntax(new_osp)
4049 _CheckHVParams(self, node_list, hv_name, new_osp)
4051 if self.op.default_iallocator:
4052 alloc_script = utils.FindFile(self.op.default_iallocator,
4053 constants.IALLOCATOR_SEARCH_PATH,
4055 if alloc_script is None:
4056 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4057 " specified" % self.op.default_iallocator,
4060 def Exec(self, feedback_fn):
4061 """Change the parameters of the cluster.
4064 if self.op.vg_name is not None:
4065 new_volume = self.op.vg_name
4068 if new_volume != self.cfg.GetVGName():
4069 self.cfg.SetVGName(new_volume)
4071 feedback_fn("Cluster LVM configuration already in desired"
4072 " state, not changing")
4073 if self.op.drbd_helper is not None:
4074 new_helper = self.op.drbd_helper
4077 if new_helper != self.cfg.GetDRBDHelper():
4078 self.cfg.SetDRBDHelper(new_helper)
4080 feedback_fn("Cluster DRBD helper already in desired state,"
4082 if self.op.hvparams:
4083 self.cluster.hvparams = self.new_hvparams
4085 self.cluster.os_hvp = self.new_os_hvp
4086 if self.op.enabled_hypervisors is not None:
4087 self.cluster.hvparams = self.new_hvparams
4088 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4089 if self.op.beparams:
4090 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4091 if self.op.nicparams:
4092 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4094 self.cluster.ipolicy = self.new_ipolicy
4095 if self.op.osparams:
4096 self.cluster.osparams = self.new_osp
4097 if self.op.ndparams:
4098 self.cluster.ndparams = self.new_ndparams
4099 if self.op.diskparams:
4100 self.cluster.diskparams = self.new_diskparams
4101 if self.op.hv_state:
4102 self.cluster.hv_state_static = self.new_hv_state
4103 if self.op.disk_state:
4104 self.cluster.disk_state_static = self.new_disk_state
4106 if self.op.candidate_pool_size is not None:
4107 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4108 # we need to update the pool size here, otherwise the save will fail
4109 _AdjustCandidatePool(self, [])
4111 if self.op.maintain_node_health is not None:
4112 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4113 feedback_fn("Note: CONFD was disabled at build time, node health"
4114 " maintenance is not useful (still enabling it)")
4115 self.cluster.maintain_node_health = self.op.maintain_node_health
4117 if self.op.prealloc_wipe_disks is not None:
4118 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4120 if self.op.add_uids is not None:
4121 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4123 if self.op.remove_uids is not None:
4124 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4126 if self.op.uid_pool is not None:
4127 self.cluster.uid_pool = self.op.uid_pool
4129 if self.op.default_iallocator is not None:
4130 self.cluster.default_iallocator = self.op.default_iallocator
4132 if self.op.reserved_lvs is not None:
4133 self.cluster.reserved_lvs = self.op.reserved_lvs
4135 if self.op.use_external_mip_script is not None:
4136 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4138 def helper_os(aname, mods, desc):
4140 lst = getattr(self.cluster, aname)
4141 for key, val in mods:
4142 if key == constants.DDM_ADD:
4144 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4147 elif key == constants.DDM_REMOVE:
4151 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4153 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4155 if self.op.hidden_os:
4156 helper_os("hidden_os", self.op.hidden_os, "hidden")
4158 if self.op.blacklisted_os:
4159 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4161 if self.op.master_netdev:
4162 master_params = self.cfg.GetMasterNetworkParameters()
4163 ems = self.cfg.GetUseExternalMipScript()
4164 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4165 self.cluster.master_netdev)
4166 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4168 result.Raise("Could not disable the master ip")
4169 feedback_fn("Changing master_netdev from %s to %s" %
4170 (master_params.netdev, self.op.master_netdev))
4171 self.cluster.master_netdev = self.op.master_netdev
4173 if self.op.master_netmask:
4174 master_params = self.cfg.GetMasterNetworkParameters()
4175 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4176 result = self.rpc.call_node_change_master_netmask(master_params.name,
4177 master_params.netmask,
4178 self.op.master_netmask,
4180 master_params.netdev)
4182 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4185 self.cluster.master_netmask = self.op.master_netmask
4187 self.cfg.Update(self.cluster, feedback_fn)
4189 if self.op.master_netdev:
4190 master_params = self.cfg.GetMasterNetworkParameters()
4191 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4192 self.op.master_netdev)
4193 ems = self.cfg.GetUseExternalMipScript()
4194 result = self.rpc.call_node_activate_master_ip(master_params.name,
4197 self.LogWarning("Could not re-enable the master ip on"
4198 " the master, please restart manually: %s",
4202 def _UploadHelper(lu, nodes, fname):
4203 """Helper for uploading a file and showing warnings.
4206 if os.path.exists(fname):
4207 result = lu.rpc.call_upload_file(nodes, fname)
4208 for to_node, to_result in result.items():
4209 msg = to_result.fail_msg
4211 msg = ("Copy of file %s to node %s failed: %s" %
4212 (fname, to_node, msg))
4213 lu.proc.LogWarning(msg)
4216 def _ComputeAncillaryFiles(cluster, redist):
4217 """Compute files external to Ganeti which need to be consistent.
4219 @type redist: boolean
4220 @param redist: Whether to include files which need to be redistributed
4223 # Compute files for all nodes
4225 constants.SSH_KNOWN_HOSTS_FILE,
4226 constants.CONFD_HMAC_KEY,
4227 constants.CLUSTER_DOMAIN_SECRET_FILE,
4228 constants.SPICE_CERT_FILE,
4229 constants.SPICE_CACERT_FILE,
4230 constants.RAPI_USERS_FILE,
4234 files_all.update(constants.ALL_CERT_FILES)
4235 files_all.update(ssconf.SimpleStore().GetFileList())
4237 # we need to ship at least the RAPI certificate
4238 files_all.add(constants.RAPI_CERT_FILE)
4240 if cluster.modify_etc_hosts:
4241 files_all.add(constants.ETC_HOSTS)
4243 # Files which are optional, these must:
4244 # - be present in one other category as well
4245 # - either exist or not exist on all nodes of that category (mc, vm all)
4247 constants.RAPI_USERS_FILE,
4250 # Files which should only be on master candidates
4254 files_mc.add(constants.CLUSTER_CONF_FILE)
4256 # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4258 files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4260 # Files which should only be on VM-capable nodes
4261 files_vm = set(filename
4262 for hv_name in cluster.enabled_hypervisors
4263 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4265 files_opt |= set(filename
4266 for hv_name in cluster.enabled_hypervisors
4267 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4269 # Filenames in each category must be unique
4270 all_files_set = files_all | files_mc | files_vm
4271 assert (len(all_files_set) ==
4272 sum(map(len, [files_all, files_mc, files_vm]))), \
4273 "Found file listed in more than one file list"
4275 # Optional files must be present in one other category
4276 assert all_files_set.issuperset(files_opt), \
4277 "Optional file not in a different required list"
4279 return (files_all, files_opt, files_mc, files_vm)
4282 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4283 """Distribute additional files which are part of the cluster configuration.
4285 ConfigWriter takes care of distributing the config and ssconf files, but
4286 there are more files which should be distributed to all nodes. This function
4287 makes sure those are copied.
4289 @param lu: calling logical unit
4290 @param additional_nodes: list of nodes not in the config to distribute to
4291 @type additional_vm: boolean
4292 @param additional_vm: whether the additional nodes are vm-capable or not
4295 # Gather target nodes
4296 cluster = lu.cfg.GetClusterInfo()
4297 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4299 online_nodes = lu.cfg.GetOnlineNodeList()
4300 vm_nodes = lu.cfg.GetVmCapableNodeList()
4302 if additional_nodes is not None:
4303 online_nodes.extend(additional_nodes)
4305 vm_nodes.extend(additional_nodes)
4307 # Never distribute to master node
4308 for nodelist in [online_nodes, vm_nodes]:
4309 if master_info.name in nodelist:
4310 nodelist.remove(master_info.name)
4313 (files_all, _, files_mc, files_vm) = \
4314 _ComputeAncillaryFiles(cluster, True)
4316 # Never re-distribute configuration file from here
4317 assert not (constants.CLUSTER_CONF_FILE in files_all or
4318 constants.CLUSTER_CONF_FILE in files_vm)
4319 assert not files_mc, "Master candidates not handled in this function"
4322 (online_nodes, files_all),
4323 (vm_nodes, files_vm),
4327 for (node_list, files) in filemap:
4329 _UploadHelper(lu, node_list, fname)
4332 class LUClusterRedistConf(NoHooksLU):
4333 """Force the redistribution of cluster configuration.
4335 This is a very simple LU.
4340 def ExpandNames(self):
4341 self.needed_locks = {
4342 locking.LEVEL_NODE: locking.ALL_SET,
4344 self.share_locks[locking.LEVEL_NODE] = 1
4346 def Exec(self, feedback_fn):
4347 """Redistribute the configuration.
4350 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4351 _RedistributeAncillaryFiles(self)
4354 class LUClusterActivateMasterIp(NoHooksLU):
4355 """Activate the master IP on the master node.
4358 def Exec(self, feedback_fn):
4359 """Activate the master IP.
4362 master_params = self.cfg.GetMasterNetworkParameters()
4363 ems = self.cfg.GetUseExternalMipScript()
4364 result = self.rpc.call_node_activate_master_ip(master_params.name,
4366 result.Raise("Could not activate the master IP")
4369 class LUClusterDeactivateMasterIp(NoHooksLU):
4370 """Deactivate the master IP on the master node.
4373 def Exec(self, feedback_fn):
4374 """Deactivate the master IP.
4377 master_params = self.cfg.GetMasterNetworkParameters()
4378 ems = self.cfg.GetUseExternalMipScript()
4379 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4381 result.Raise("Could not deactivate the master IP")
4384 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4385 """Sleep and poll for an instance's disk to sync.
4388 if not instance.disks or disks is not None and not disks:
4391 disks = _ExpandCheckDisks(instance, disks)
4394 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4396 node = instance.primary_node
4399 lu.cfg.SetDiskID(dev, node)
4401 # TODO: Convert to utils.Retry
4404 degr_retries = 10 # in seconds, as we sleep 1 second each time
4408 cumul_degraded = False
4409 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4410 msg = rstats.fail_msg
4412 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4415 raise errors.RemoteError("Can't contact node %s for mirror data,"
4416 " aborting." % node)
4419 rstats = rstats.payload
4421 for i, mstat in enumerate(rstats):
4423 lu.LogWarning("Can't compute data for node %s/%s",
4424 node, disks[i].iv_name)
4427 cumul_degraded = (cumul_degraded or
4428 (mstat.is_degraded and mstat.sync_percent is None))
4429 if mstat.sync_percent is not None:
4431 if mstat.estimated_time is not None:
4432 rem_time = ("%s remaining (estimated)" %
4433 utils.FormatSeconds(mstat.estimated_time))
4434 max_time = mstat.estimated_time
4436 rem_time = "no time estimate"
4437 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4438 (disks[i].iv_name, mstat.sync_percent, rem_time))
4440 # if we're done but degraded, let's do a few small retries, to
4441 # make sure we see a stable and not transient situation; therefore
4442 # we force restart of the loop
4443 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4444 logging.info("Degraded disks found, %d retries left", degr_retries)
4452 time.sleep(min(60, max_time))
4455 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4456 return not cumul_degraded
4459 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4460 """Check that mirrors are not degraded.
4462 The ldisk parameter, if True, will change the test from the
4463 is_degraded attribute (which represents overall non-ok status for
4464 the device(s)) to the ldisk (representing the local storage status).
4467 lu.cfg.SetDiskID(dev, node)
4471 if on_primary or dev.AssembleOnSecondary():
4472 rstats = lu.rpc.call_blockdev_find(node, dev)
4473 msg = rstats.fail_msg
4475 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4477 elif not rstats.payload:
4478 lu.LogWarning("Can't find disk on node %s", node)
4482 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4484 result = result and not rstats.payload.is_degraded
4487 for child in dev.children:
4488 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4493 class LUOobCommand(NoHooksLU):
4494 """Logical unit for OOB handling.
4498 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4500 def ExpandNames(self):
4501 """Gather locks we need.
4504 if self.op.node_names:
4505 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4506 lock_names = self.op.node_names
4508 lock_names = locking.ALL_SET
4510 self.needed_locks = {
4511 locking.LEVEL_NODE: lock_names,
4514 def CheckPrereq(self):
4515 """Check prerequisites.
4518 - the node exists in the configuration
4521 Any errors are signaled by raising errors.OpPrereqError.
4525 self.master_node = self.cfg.GetMasterNode()
4527 assert self.op.power_delay >= 0.0
4529 if self.op.node_names:
4530 if (self.op.command in self._SKIP_MASTER and
4531 self.master_node in self.op.node_names):
4532 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4533 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4535 if master_oob_handler:
4536 additional_text = ("run '%s %s %s' if you want to operate on the"
4537 " master regardless") % (master_oob_handler,
4541 additional_text = "it does not support out-of-band operations"
4543 raise errors.OpPrereqError(("Operating on the master node %s is not"
4544 " allowed for %s; %s") %
4545 (self.master_node, self.op.command,
4546 additional_text), errors.ECODE_INVAL)
4548 self.op.node_names = self.cfg.GetNodeList()
4549 if self.op.command in self._SKIP_MASTER:
4550 self.op.node_names.remove(self.master_node)
4552 if self.op.command in self._SKIP_MASTER:
4553 assert self.master_node not in self.op.node_names
4555 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4557 raise errors.OpPrereqError("Node %s not found" % node_name,
4560 self.nodes.append(node)
4562 if (not self.op.ignore_status and
4563 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4564 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4565 " not marked offline") % node_name,
4568 def Exec(self, feedback_fn):
4569 """Execute OOB and return result if we expect any.
4572 master_node = self.master_node
4575 for idx, node in enumerate(utils.NiceSort(self.nodes,
4576 key=lambda node: node.name)):
4577 node_entry = [(constants.RS_NORMAL, node.name)]
4578 ret.append(node_entry)
4580 oob_program = _SupportsOob(self.cfg, node)
4583 node_entry.append((constants.RS_UNAVAIL, None))
4586 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4587 self.op.command, oob_program, node.name)
4588 result = self.rpc.call_run_oob(master_node, oob_program,
4589 self.op.command, node.name,
4593 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4594 node.name, result.fail_msg)
4595 node_entry.append((constants.RS_NODATA, None))
4598 self._CheckPayload(result)
4599 except errors.OpExecError, err:
4600 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4602 node_entry.append((constants.RS_NODATA, None))
4604 if self.op.command == constants.OOB_HEALTH:
4605 # For health we should log important events
4606 for item, status in result.payload:
4607 if status in [constants.OOB_STATUS_WARNING,
4608 constants.OOB_STATUS_CRITICAL]:
4609 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4610 item, node.name, status)
4612 if self.op.command == constants.OOB_POWER_ON:
4614 elif self.op.command == constants.OOB_POWER_OFF:
4615 node.powered = False
4616 elif self.op.command == constants.OOB_POWER_STATUS:
4617 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4618 if powered != node.powered:
4619 logging.warning(("Recorded power state (%s) of node '%s' does not"
4620 " match actual power state (%s)"), node.powered,
4623 # For configuration changing commands we should update the node
4624 if self.op.command in (constants.OOB_POWER_ON,
4625 constants.OOB_POWER_OFF):
4626 self.cfg.Update(node, feedback_fn)
4628 node_entry.append((constants.RS_NORMAL, result.payload))
4630 if (self.op.command == constants.OOB_POWER_ON and
4631 idx < len(self.nodes) - 1):
4632 time.sleep(self.op.power_delay)
4636 def _CheckPayload(self, result):
4637 """Checks if the payload is valid.
4639 @param result: RPC result
4640 @raises errors.OpExecError: If payload is not valid
4644 if self.op.command == constants.OOB_HEALTH:
4645 if not isinstance(result.payload, list):
4646 errs.append("command 'health' is expected to return a list but got %s" %
4647 type(result.payload))
4649 for item, status in result.payload:
4650 if status not in constants.OOB_STATUSES:
4651 errs.append("health item '%s' has invalid status '%s'" %
4654 if self.op.command == constants.OOB_POWER_STATUS:
4655 if not isinstance(result.payload, dict):
4656 errs.append("power-status is expected to return a dict but got %s" %
4657 type(result.payload))
4659 if self.op.command in [
4660 constants.OOB_POWER_ON,
4661 constants.OOB_POWER_OFF,
4662 constants.OOB_POWER_CYCLE,
4664 if result.payload is not None:
4665 errs.append("%s is expected to not return payload but got '%s'" %
4666 (self.op.command, result.payload))
4669 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4670 utils.CommaJoin(errs))
4673 class _OsQuery(_QueryBase):
4674 FIELDS = query.OS_FIELDS
4676 def ExpandNames(self, lu):
4677 # Lock all nodes in shared mode
4678 # Temporary removal of locks, should be reverted later
4679 # TODO: reintroduce locks when they are lighter-weight
4680 lu.needed_locks = {}
4681 #self.share_locks[locking.LEVEL_NODE] = 1
4682 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4684 # The following variables interact with _QueryBase._GetNames
4686 self.wanted = self.names
4688 self.wanted = locking.ALL_SET
4690 self.do_locking = self.use_locking
4692 def DeclareLocks(self, lu, level):
4696 def _DiagnoseByOS(rlist):
4697 """Remaps a per-node return list into an a per-os per-node dictionary
4699 @param rlist: a map with node names as keys and OS objects as values
4702 @return: a dictionary with osnames as keys and as value another
4703 map, with nodes as keys and tuples of (path, status, diagnose,
4704 variants, parameters, api_versions) as values, eg::
4706 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4707 (/srv/..., False, "invalid api")],
4708 "node2": [(/srv/..., True, "", [], [])]}
4713 # we build here the list of nodes that didn't fail the RPC (at RPC
4714 # level), so that nodes with a non-responding node daemon don't
4715 # make all OSes invalid
4716 good_nodes = [node_name for node_name in rlist
4717 if not rlist[node_name].fail_msg]
4718 for node_name, nr in rlist.items():
4719 if nr.fail_msg or not nr.payload:
4721 for (name, path, status, diagnose, variants,
4722 params, api_versions) in nr.payload:
4723 if name not in all_os:
4724 # build a list of nodes for this os containing empty lists
4725 # for each node in node_list
4727 for nname in good_nodes:
4728 all_os[name][nname] = []
4729 # convert params from [name, help] to (name, help)
4730 params = [tuple(v) for v in params]
4731 all_os[name][node_name].append((path, status, diagnose,
4732 variants, params, api_versions))
4735 def _GetQueryData(self, lu):
4736 """Computes the list of nodes and their attributes.
4739 # Locking is not used
4740 assert not (compat.any(lu.glm.is_owned(level)
4741 for level in locking.LEVELS
4742 if level != locking.LEVEL_CLUSTER) or
4743 self.do_locking or self.use_locking)
4745 valid_nodes = [node.name
4746 for node in lu.cfg.GetAllNodesInfo().values()
4747 if not node.offline and node.vm_capable]
4748 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4749 cluster = lu.cfg.GetClusterInfo()
4753 for (os_name, os_data) in pol.items():
4754 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4755 hidden=(os_name in cluster.hidden_os),
4756 blacklisted=(os_name in cluster.blacklisted_os))
4760 api_versions = set()
4762 for idx, osl in enumerate(os_data.values()):
4763 info.valid = bool(info.valid and osl and osl[0][1])
4767 (node_variants, node_params, node_api) = osl[0][3:6]
4770 variants.update(node_variants)
4771 parameters.update(node_params)
4772 api_versions.update(node_api)
4774 # Filter out inconsistent values
4775 variants.intersection_update(node_variants)
4776 parameters.intersection_update(node_params)
4777 api_versions.intersection_update(node_api)
4779 info.variants = list(variants)
4780 info.parameters = list(parameters)
4781 info.api_versions = list(api_versions)
4783 data[os_name] = info
4785 # Prepare data in requested order
4786 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4790 class LUOsDiagnose(NoHooksLU):
4791 """Logical unit for OS diagnose/query.
4797 def _BuildFilter(fields, names):
4798 """Builds a filter for querying OSes.
4801 name_filter = qlang.MakeSimpleFilter("name", names)
4803 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4804 # respective field is not requested
4805 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4806 for fname in ["hidden", "blacklisted"]
4807 if fname not in fields]
4808 if "valid" not in fields:
4809 status_filter.append([qlang.OP_TRUE, "valid"])
4812 status_filter.insert(0, qlang.OP_AND)
4814 status_filter = None
4816 if name_filter and status_filter:
4817 return [qlang.OP_AND, name_filter, status_filter]
4821 return status_filter
4823 def CheckArguments(self):
4824 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4825 self.op.output_fields, False)
4827 def ExpandNames(self):
4828 self.oq.ExpandNames(self)
4830 def Exec(self, feedback_fn):
4831 return self.oq.OldStyleQuery(self)
4834 class LUNodeRemove(LogicalUnit):
4835 """Logical unit for removing a node.
4838 HPATH = "node-remove"
4839 HTYPE = constants.HTYPE_NODE
4841 def BuildHooksEnv(self):
4844 This doesn't run on the target node in the pre phase as a failed
4845 node would then be impossible to remove.
4849 "OP_TARGET": self.op.node_name,
4850 "NODE_NAME": self.op.node_name,
4853 def BuildHooksNodes(self):
4854 """Build hooks nodes.
4857 all_nodes = self.cfg.GetNodeList()
4859 all_nodes.remove(self.op.node_name)
4861 logging.warning("Node '%s', which is about to be removed, was not found"
4862 " in the list of all nodes", self.op.node_name)
4863 return (all_nodes, all_nodes)
4865 def CheckPrereq(self):
4866 """Check prerequisites.
4869 - the node exists in the configuration
4870 - it does not have primary or secondary instances
4871 - it's not the master
4873 Any errors are signaled by raising errors.OpPrereqError.
4876 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4877 node = self.cfg.GetNodeInfo(self.op.node_name)
4878 assert node is not None
4880 masternode = self.cfg.GetMasterNode()
4881 if node.name == masternode:
4882 raise errors.OpPrereqError("Node is the master node, failover to another"
4883 " node is required", errors.ECODE_INVAL)
4885 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4886 if node.name in instance.all_nodes:
4887 raise errors.OpPrereqError("Instance %s is still running on the node,"
4888 " please remove first" % instance_name,
4890 self.op.node_name = node.name
4893 def Exec(self, feedback_fn):
4894 """Removes the node from the cluster.
4898 logging.info("Stopping the node daemon and removing configs from node %s",
4901 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4903 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4906 # Promote nodes to master candidate as needed
4907 _AdjustCandidatePool(self, exceptions=[node.name])
4908 self.context.RemoveNode(node.name)
4910 # Run post hooks on the node before it's removed
4911 _RunPostHook(self, node.name)
4913 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4914 msg = result.fail_msg
4916 self.LogWarning("Errors encountered on the remote node while leaving"
4917 " the cluster: %s", msg)
4919 # Remove node from our /etc/hosts
4920 if self.cfg.GetClusterInfo().modify_etc_hosts:
4921 master_node = self.cfg.GetMasterNode()
4922 result = self.rpc.call_etc_hosts_modify(master_node,
4923 constants.ETC_HOSTS_REMOVE,
4925 result.Raise("Can't update hosts file with new host data")
4926 _RedistributeAncillaryFiles(self)
4929 class _NodeQuery(_QueryBase):
4930 FIELDS = query.NODE_FIELDS
4932 def ExpandNames(self, lu):
4933 lu.needed_locks = {}
4934 lu.share_locks = _ShareAll()
4937 self.wanted = _GetWantedNodes(lu, self.names)
4939 self.wanted = locking.ALL_SET
4941 self.do_locking = (self.use_locking and
4942 query.NQ_LIVE in self.requested_data)
4945 # If any non-static field is requested we need to lock the nodes
4946 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4948 def DeclareLocks(self, lu, level):
4951 def _GetQueryData(self, lu):
4952 """Computes the list of nodes and their attributes.
4955 all_info = lu.cfg.GetAllNodesInfo()
4957 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4959 # Gather data as requested
4960 if query.NQ_LIVE in self.requested_data:
4961 # filter out non-vm_capable nodes
4962 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4964 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4965 [lu.cfg.GetHypervisorType()])
4966 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4967 for (name, nresult) in node_data.items()
4968 if not nresult.fail_msg and nresult.payload)
4972 if query.NQ_INST in self.requested_data:
4973 node_to_primary = dict([(name, set()) for name in nodenames])
4974 node_to_secondary = dict([(name, set()) for name in nodenames])
4976 inst_data = lu.cfg.GetAllInstancesInfo()
4978 for inst in inst_data.values():
4979 if inst.primary_node in node_to_primary:
4980 node_to_primary[inst.primary_node].add(inst.name)
4981 for secnode in inst.secondary_nodes:
4982 if secnode in node_to_secondary:
4983 node_to_secondary[secnode].add(inst.name)
4985 node_to_primary = None
4986 node_to_secondary = None
4988 if query.NQ_OOB in self.requested_data:
4989 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4990 for name, node in all_info.iteritems())
4994 if query.NQ_GROUP in self.requested_data:
4995 groups = lu.cfg.GetAllNodeGroupsInfo()
4999 return query.NodeQueryData([all_info[name] for name in nodenames],
5000 live_data, lu.cfg.GetMasterNode(),
5001 node_to_primary, node_to_secondary, groups,
5002 oob_support, lu.cfg.GetClusterInfo())
5005 class LUNodeQuery(NoHooksLU):
5006 """Logical unit for querying nodes.
5009 # pylint: disable=W0142
5012 def CheckArguments(self):
5013 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5014 self.op.output_fields, self.op.use_locking)
5016 def ExpandNames(self):
5017 self.nq.ExpandNames(self)
5019 def DeclareLocks(self, level):
5020 self.nq.DeclareLocks(self, level)
5022 def Exec(self, feedback_fn):
5023 return self.nq.OldStyleQuery(self)
5026 class LUNodeQueryvols(NoHooksLU):
5027 """Logical unit for getting volumes on node(s).
5031 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5032 _FIELDS_STATIC = utils.FieldSet("node")
5034 def CheckArguments(self):
5035 _CheckOutputFields(static=self._FIELDS_STATIC,
5036 dynamic=self._FIELDS_DYNAMIC,
5037 selected=self.op.output_fields)
5039 def ExpandNames(self):
5040 self.share_locks = _ShareAll()
5041 self.needed_locks = {}
5043 if not self.op.nodes:
5044 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5046 self.needed_locks[locking.LEVEL_NODE] = \
5047 _GetWantedNodes(self, self.op.nodes)
5049 def Exec(self, feedback_fn):
5050 """Computes the list of nodes and their attributes.
5053 nodenames = self.owned_locks(locking.LEVEL_NODE)
5054 volumes = self.rpc.call_node_volumes(nodenames)
5056 ilist = self.cfg.GetAllInstancesInfo()
5057 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5060 for node in nodenames:
5061 nresult = volumes[node]
5064 msg = nresult.fail_msg
5066 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5069 node_vols = sorted(nresult.payload,
5070 key=operator.itemgetter("dev"))
5072 for vol in node_vols:
5074 for field in self.op.output_fields:
5077 elif field == "phys":
5081 elif field == "name":
5083 elif field == "size":
5084 val = int(float(vol["size"]))
5085 elif field == "instance":
5086 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5088 raise errors.ParameterError(field)
5089 node_output.append(str(val))
5091 output.append(node_output)
5096 class LUNodeQueryStorage(NoHooksLU):
5097 """Logical unit for getting information on storage units on node(s).
5100 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5103 def CheckArguments(self):
5104 _CheckOutputFields(static=self._FIELDS_STATIC,
5105 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5106 selected=self.op.output_fields)
5108 def ExpandNames(self):
5109 self.share_locks = _ShareAll()
5110 self.needed_locks = {}
5113 self.needed_locks[locking.LEVEL_NODE] = \
5114 _GetWantedNodes(self, self.op.nodes)
5116 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5118 def Exec(self, feedback_fn):
5119 """Computes the list of nodes and their attributes.
5122 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5124 # Always get name to sort by
5125 if constants.SF_NAME in self.op.output_fields:
5126 fields = self.op.output_fields[:]
5128 fields = [constants.SF_NAME] + self.op.output_fields
5130 # Never ask for node or type as it's only known to the LU
5131 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5132 while extra in fields:
5133 fields.remove(extra)
5135 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5136 name_idx = field_idx[constants.SF_NAME]
5138 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5139 data = self.rpc.call_storage_list(self.nodes,
5140 self.op.storage_type, st_args,
5141 self.op.name, fields)
5145 for node in utils.NiceSort(self.nodes):
5146 nresult = data[node]
5150 msg = nresult.fail_msg
5152 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5155 rows = dict([(row[name_idx], row) for row in nresult.payload])
5157 for name in utils.NiceSort(rows.keys()):
5162 for field in self.op.output_fields:
5163 if field == constants.SF_NODE:
5165 elif field == constants.SF_TYPE:
5166 val = self.op.storage_type
5167 elif field in field_idx:
5168 val = row[field_idx[field]]
5170 raise errors.ParameterError(field)
5179 class _InstanceQuery(_QueryBase):
5180 FIELDS = query.INSTANCE_FIELDS
5182 def ExpandNames(self, lu):
5183 lu.needed_locks = {}
5184 lu.share_locks = _ShareAll()
5187 self.wanted = _GetWantedInstances(lu, self.names)
5189 self.wanted = locking.ALL_SET
5191 self.do_locking = (self.use_locking and
5192 query.IQ_LIVE in self.requested_data)
5194 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5195 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5196 lu.needed_locks[locking.LEVEL_NODE] = []
5197 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5199 self.do_grouplocks = (self.do_locking and
5200 query.IQ_NODES in self.requested_data)
5202 def DeclareLocks(self, lu, level):
5204 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5205 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5207 # Lock all groups used by instances optimistically; this requires going
5208 # via the node before it's locked, requiring verification later on
5209 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5211 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5212 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5213 elif level == locking.LEVEL_NODE:
5214 lu._LockInstancesNodes() # pylint: disable=W0212
5217 def _CheckGroupLocks(lu):
5218 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5219 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5221 # Check if node groups for locked instances are still correct
5222 for instance_name in owned_instances:
5223 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5225 def _GetQueryData(self, lu):
5226 """Computes the list of instances and their attributes.
5229 if self.do_grouplocks:
5230 self._CheckGroupLocks(lu)
5232 cluster = lu.cfg.GetClusterInfo()
5233 all_info = lu.cfg.GetAllInstancesInfo()
5235 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5237 instance_list = [all_info[name] for name in instance_names]
5238 nodes = frozenset(itertools.chain(*(inst.all_nodes
5239 for inst in instance_list)))
5240 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5243 wrongnode_inst = set()
5245 # Gather data as requested
5246 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5248 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5250 result = node_data[name]
5252 # offline nodes will be in both lists
5253 assert result.fail_msg
5254 offline_nodes.append(name)
5256 bad_nodes.append(name)
5257 elif result.payload:
5258 for inst in result.payload:
5259 if inst in all_info:
5260 if all_info[inst].primary_node == name:
5261 live_data.update(result.payload)
5263 wrongnode_inst.add(inst)
5265 # orphan instance; we don't list it here as we don't
5266 # handle this case yet in the output of instance listing
5267 logging.warning("Orphan instance '%s' found on node %s",
5269 # else no instance is alive
5273 if query.IQ_DISKUSAGE in self.requested_data:
5274 disk_usage = dict((inst.name,
5275 _ComputeDiskSize(inst.disk_template,
5276 [{constants.IDISK_SIZE: disk.size}
5277 for disk in inst.disks]))
5278 for inst in instance_list)
5282 if query.IQ_CONSOLE in self.requested_data:
5284 for inst in instance_list:
5285 if inst.name in live_data:
5286 # Instance is running
5287 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5289 consinfo[inst.name] = None
5290 assert set(consinfo.keys()) == set(instance_names)
5294 if query.IQ_NODES in self.requested_data:
5295 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5297 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5298 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5299 for uuid in set(map(operator.attrgetter("group"),
5305 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5306 disk_usage, offline_nodes, bad_nodes,
5307 live_data, wrongnode_inst, consinfo,
5311 class LUQuery(NoHooksLU):
5312 """Query for resources/items of a certain kind.
5315 # pylint: disable=W0142
5318 def CheckArguments(self):
5319 qcls = _GetQueryImplementation(self.op.what)
5321 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5323 def ExpandNames(self):
5324 self.impl.ExpandNames(self)
5326 def DeclareLocks(self, level):
5327 self.impl.DeclareLocks(self, level)
5329 def Exec(self, feedback_fn):
5330 return self.impl.NewStyleQuery(self)
5333 class LUQueryFields(NoHooksLU):
5334 """Query for resources/items of a certain kind.
5337 # pylint: disable=W0142
5340 def CheckArguments(self):
5341 self.qcls = _GetQueryImplementation(self.op.what)
5343 def ExpandNames(self):
5344 self.needed_locks = {}
5346 def Exec(self, feedback_fn):
5347 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5350 class LUNodeModifyStorage(NoHooksLU):
5351 """Logical unit for modifying a storage volume on a node.
5356 def CheckArguments(self):
5357 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5359 storage_type = self.op.storage_type
5362 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5364 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5365 " modified" % storage_type,
5368 diff = set(self.op.changes.keys()) - modifiable
5370 raise errors.OpPrereqError("The following fields can not be modified for"
5371 " storage units of type '%s': %r" %
5372 (storage_type, list(diff)),
5375 def ExpandNames(self):
5376 self.needed_locks = {
5377 locking.LEVEL_NODE: self.op.node_name,
5380 def Exec(self, feedback_fn):
5381 """Computes the list of nodes and their attributes.
5384 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5385 result = self.rpc.call_storage_modify(self.op.node_name,
5386 self.op.storage_type, st_args,
5387 self.op.name, self.op.changes)
5388 result.Raise("Failed to modify storage unit '%s' on %s" %
5389 (self.op.name, self.op.node_name))
5392 class LUNodeAdd(LogicalUnit):
5393 """Logical unit for adding node to the cluster.
5397 HTYPE = constants.HTYPE_NODE
5398 _NFLAGS = ["master_capable", "vm_capable"]
5400 def CheckArguments(self):
5401 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5402 # validate/normalize the node name
5403 self.hostname = netutils.GetHostname(name=self.op.node_name,
5404 family=self.primary_ip_family)
5405 self.op.node_name = self.hostname.name
5407 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5408 raise errors.OpPrereqError("Cannot readd the master node",
5411 if self.op.readd and self.op.group:
5412 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5413 " being readded", errors.ECODE_INVAL)
5415 def BuildHooksEnv(self):
5418 This will run on all nodes before, and on all nodes + the new node after.
5422 "OP_TARGET": self.op.node_name,
5423 "NODE_NAME": self.op.node_name,
5424 "NODE_PIP": self.op.primary_ip,
5425 "NODE_SIP": self.op.secondary_ip,
5426 "MASTER_CAPABLE": str(self.op.master_capable),
5427 "VM_CAPABLE": str(self.op.vm_capable),
5430 def BuildHooksNodes(self):
5431 """Build hooks nodes.
5434 # Exclude added node
5435 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5436 post_nodes = pre_nodes + [self.op.node_name, ]
5438 return (pre_nodes, post_nodes)
5440 def CheckPrereq(self):
5441 """Check prerequisites.
5444 - the new node is not already in the config
5446 - its parameters (single/dual homed) matches the cluster
5448 Any errors are signaled by raising errors.OpPrereqError.
5452 hostname = self.hostname
5453 node = hostname.name
5454 primary_ip = self.op.primary_ip = hostname.ip
5455 if self.op.secondary_ip is None:
5456 if self.primary_ip_family == netutils.IP6Address.family:
5457 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5458 " IPv4 address must be given as secondary",
5460 self.op.secondary_ip = primary_ip
5462 secondary_ip = self.op.secondary_ip
5463 if not netutils.IP4Address.IsValid(secondary_ip):
5464 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5465 " address" % secondary_ip, errors.ECODE_INVAL)
5467 node_list = cfg.GetNodeList()
5468 if not self.op.readd and node in node_list:
5469 raise errors.OpPrereqError("Node %s is already in the configuration" %
5470 node, errors.ECODE_EXISTS)
5471 elif self.op.readd and node not in node_list:
5472 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5475 self.changed_primary_ip = False
5477 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5478 if self.op.readd and node == existing_node_name:
5479 if existing_node.secondary_ip != secondary_ip:
5480 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5481 " address configuration as before",
5483 if existing_node.primary_ip != primary_ip:
5484 self.changed_primary_ip = True
5488 if (existing_node.primary_ip == primary_ip or
5489 existing_node.secondary_ip == primary_ip or
5490 existing_node.primary_ip == secondary_ip or
5491 existing_node.secondary_ip == secondary_ip):
5492 raise errors.OpPrereqError("New node ip address(es) conflict with"
5493 " existing node %s" % existing_node.name,
5494 errors.ECODE_NOTUNIQUE)
5496 # After this 'if' block, None is no longer a valid value for the
5497 # _capable op attributes
5499 old_node = self.cfg.GetNodeInfo(node)
5500 assert old_node is not None, "Can't retrieve locked node %s" % node
5501 for attr in self._NFLAGS:
5502 if getattr(self.op, attr) is None:
5503 setattr(self.op, attr, getattr(old_node, attr))
5505 for attr in self._NFLAGS:
5506 if getattr(self.op, attr) is None:
5507 setattr(self.op, attr, True)
5509 if self.op.readd and not self.op.vm_capable:
5510 pri, sec = cfg.GetNodeInstances(node)
5512 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5513 " flag set to false, but it already holds"
5514 " instances" % node,
5517 # check that the type of the node (single versus dual homed) is the
5518 # same as for the master
5519 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5520 master_singlehomed = myself.secondary_ip == myself.primary_ip
5521 newbie_singlehomed = secondary_ip == primary_ip
5522 if master_singlehomed != newbie_singlehomed:
5523 if master_singlehomed:
5524 raise errors.OpPrereqError("The master has no secondary ip but the"
5525 " new node has one",
5528 raise errors.OpPrereqError("The master has a secondary ip but the"
5529 " new node doesn't have one",
5532 # checks reachability
5533 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5534 raise errors.OpPrereqError("Node not reachable by ping",
5535 errors.ECODE_ENVIRON)
5537 if not newbie_singlehomed:
5538 # check reachability from my secondary ip to newbie's secondary ip
5539 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5540 source=myself.secondary_ip):
5541 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5542 " based ping to node daemon port",
5543 errors.ECODE_ENVIRON)
5550 if self.op.master_capable:
5551 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5553 self.master_candidate = False
5556 self.new_node = old_node
5558 node_group = cfg.LookupNodeGroup(self.op.group)
5559 self.new_node = objects.Node(name=node,
5560 primary_ip=primary_ip,
5561 secondary_ip=secondary_ip,
5562 master_candidate=self.master_candidate,
5563 offline=False, drained=False,
5566 if self.op.ndparams:
5567 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5569 if self.op.hv_state:
5570 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5572 if self.op.disk_state:
5573 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5575 def Exec(self, feedback_fn):
5576 """Adds the new node to the cluster.
5579 new_node = self.new_node
5580 node = new_node.name
5582 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5585 # We adding a new node so we assume it's powered
5586 new_node.powered = True
5588 # for re-adds, reset the offline/drained/master-candidate flags;
5589 # we need to reset here, otherwise offline would prevent RPC calls
5590 # later in the procedure; this also means that if the re-add
5591 # fails, we are left with a non-offlined, broken node
5593 new_node.drained = new_node.offline = False # pylint: disable=W0201
5594 self.LogInfo("Readding a node, the offline/drained flags were reset")
5595 # if we demote the node, we do cleanup later in the procedure
5596 new_node.master_candidate = self.master_candidate
5597 if self.changed_primary_ip:
5598 new_node.primary_ip = self.op.primary_ip
5600 # copy the master/vm_capable flags
5601 for attr in self._NFLAGS:
5602 setattr(new_node, attr, getattr(self.op, attr))
5604 # notify the user about any possible mc promotion
5605 if new_node.master_candidate:
5606 self.LogInfo("Node will be a master candidate")
5608 if self.op.ndparams:
5609 new_node.ndparams = self.op.ndparams
5611 new_node.ndparams = {}
5613 if self.op.hv_state:
5614 new_node.hv_state_static = self.new_hv_state
5616 if self.op.disk_state:
5617 new_node.disk_state_static = self.new_disk_state
5619 # check connectivity
5620 result = self.rpc.call_version([node])[node]
5621 result.Raise("Can't get version information from node %s" % node)
5622 if constants.PROTOCOL_VERSION == result.payload:
5623 logging.info("Communication to node %s fine, sw version %s match",
5624 node, result.payload)
5626 raise errors.OpExecError("Version mismatch master version %s,"
5627 " node version %s" %
5628 (constants.PROTOCOL_VERSION, result.payload))
5630 # Add node to our /etc/hosts, and add key to known_hosts
5631 if self.cfg.GetClusterInfo().modify_etc_hosts:
5632 master_node = self.cfg.GetMasterNode()
5633 result = self.rpc.call_etc_hosts_modify(master_node,
5634 constants.ETC_HOSTS_ADD,
5637 result.Raise("Can't update hosts file with new host data")
5639 if new_node.secondary_ip != new_node.primary_ip:
5640 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5643 node_verify_list = [self.cfg.GetMasterNode()]
5644 node_verify_param = {
5645 constants.NV_NODELIST: ([node], {}),
5646 # TODO: do a node-net-test as well?
5649 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5650 self.cfg.GetClusterName())
5651 for verifier in node_verify_list:
5652 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5653 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5655 for failed in nl_payload:
5656 feedback_fn("ssh/hostname verification failed"
5657 " (checking from %s): %s" %
5658 (verifier, nl_payload[failed]))
5659 raise errors.OpExecError("ssh/hostname verification failed")
5662 _RedistributeAncillaryFiles(self)
5663 self.context.ReaddNode(new_node)
5664 # make sure we redistribute the config
5665 self.cfg.Update(new_node, feedback_fn)
5666 # and make sure the new node will not have old files around
5667 if not new_node.master_candidate:
5668 result = self.rpc.call_node_demote_from_mc(new_node.name)
5669 msg = result.fail_msg
5671 self.LogWarning("Node failed to demote itself from master"
5672 " candidate status: %s" % msg)
5674 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5675 additional_vm=self.op.vm_capable)
5676 self.context.AddNode(new_node, self.proc.GetECId())
5679 class LUNodeSetParams(LogicalUnit):
5680 """Modifies the parameters of a node.
5682 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5683 to the node role (as _ROLE_*)
5684 @cvar _R2F: a dictionary from node role to tuples of flags
5685 @cvar _FLAGS: a list of attribute names corresponding to the flags
5688 HPATH = "node-modify"
5689 HTYPE = constants.HTYPE_NODE
5691 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5693 (True, False, False): _ROLE_CANDIDATE,
5694 (False, True, False): _ROLE_DRAINED,
5695 (False, False, True): _ROLE_OFFLINE,
5696 (False, False, False): _ROLE_REGULAR,
5698 _R2F = dict((v, k) for k, v in _F2R.items())
5699 _FLAGS = ["master_candidate", "drained", "offline"]
5701 def CheckArguments(self):
5702 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5703 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5704 self.op.master_capable, self.op.vm_capable,
5705 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5707 if all_mods.count(None) == len(all_mods):
5708 raise errors.OpPrereqError("Please pass at least one modification",
5710 if all_mods.count(True) > 1:
5711 raise errors.OpPrereqError("Can't set the node into more than one"
5712 " state at the same time",
5715 # Boolean value that tells us whether we might be demoting from MC
5716 self.might_demote = (self.op.master_candidate == False or
5717 self.op.offline == True or
5718 self.op.drained == True or
5719 self.op.master_capable == False)
5721 if self.op.secondary_ip:
5722 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5723 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5724 " address" % self.op.secondary_ip,
5727 self.lock_all = self.op.auto_promote and self.might_demote
5728 self.lock_instances = self.op.secondary_ip is not None
5730 def _InstanceFilter(self, instance):
5731 """Filter for getting affected instances.
5734 return (instance.disk_template in constants.DTS_INT_MIRROR and
5735 self.op.node_name in instance.all_nodes)
5737 def ExpandNames(self):
5739 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5741 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5743 # Since modifying a node can have severe effects on currently running
5744 # operations the resource lock is at least acquired in shared mode
5745 self.needed_locks[locking.LEVEL_NODE_RES] = \
5746 self.needed_locks[locking.LEVEL_NODE]
5748 # Get node resource and instance locks in shared mode; they are not used
5749 # for anything but read-only access
5750 self.share_locks[locking.LEVEL_NODE_RES] = 1
5751 self.share_locks[locking.LEVEL_INSTANCE] = 1
5753 if self.lock_instances:
5754 self.needed_locks[locking.LEVEL_INSTANCE] = \
5755 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5757 def BuildHooksEnv(self):
5760 This runs on the master node.
5764 "OP_TARGET": self.op.node_name,
5765 "MASTER_CANDIDATE": str(self.op.master_candidate),
5766 "OFFLINE": str(self.op.offline),
5767 "DRAINED": str(self.op.drained),
5768 "MASTER_CAPABLE": str(self.op.master_capable),
5769 "VM_CAPABLE": str(self.op.vm_capable),
5772 def BuildHooksNodes(self):
5773 """Build hooks nodes.
5776 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5779 def CheckPrereq(self):
5780 """Check prerequisites.
5782 This only checks the instance list against the existing names.
5785 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5787 if self.lock_instances:
5788 affected_instances = \
5789 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5791 # Verify instance locks
5792 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5793 wanted_instances = frozenset(affected_instances.keys())
5794 if wanted_instances - owned_instances:
5795 raise errors.OpPrereqError("Instances affected by changing node %s's"
5796 " secondary IP address have changed since"
5797 " locks were acquired, wanted '%s', have"
5798 " '%s'; retry the operation" %
5800 utils.CommaJoin(wanted_instances),
5801 utils.CommaJoin(owned_instances)),
5804 affected_instances = None
5806 if (self.op.master_candidate is not None or
5807 self.op.drained is not None or
5808 self.op.offline is not None):
5809 # we can't change the master's node flags
5810 if self.op.node_name == self.cfg.GetMasterNode():
5811 raise errors.OpPrereqError("The master role can be changed"
5812 " only via master-failover",
5815 if self.op.master_candidate and not node.master_capable:
5816 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5817 " it a master candidate" % node.name,
5820 if self.op.vm_capable == False:
5821 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5823 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5824 " the vm_capable flag" % node.name,
5827 if node.master_candidate and self.might_demote and not self.lock_all:
5828 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5829 # check if after removing the current node, we're missing master
5831 (mc_remaining, mc_should, _) = \
5832 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5833 if mc_remaining < mc_should:
5834 raise errors.OpPrereqError("Not enough master candidates, please"
5835 " pass auto promote option to allow"
5836 " promotion", errors.ECODE_STATE)
5838 self.old_flags = old_flags = (node.master_candidate,
5839 node.drained, node.offline)
5840 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5841 self.old_role = old_role = self._F2R[old_flags]
5843 # Check for ineffective changes
5844 for attr in self._FLAGS:
5845 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5846 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5847 setattr(self.op, attr, None)
5849 # Past this point, any flag change to False means a transition
5850 # away from the respective state, as only real changes are kept
5852 # TODO: We might query the real power state if it supports OOB
5853 if _SupportsOob(self.cfg, node):
5854 if self.op.offline is False and not (node.powered or
5855 self.op.powered == True):
5856 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5857 " offline status can be reset") %
5859 elif self.op.powered is not None:
5860 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5861 " as it does not support out-of-band"
5862 " handling") % self.op.node_name)
5864 # If we're being deofflined/drained, we'll MC ourself if needed
5865 if (self.op.drained == False or self.op.offline == False or
5866 (self.op.master_capable and not node.master_capable)):
5867 if _DecideSelfPromotion(self):
5868 self.op.master_candidate = True
5869 self.LogInfo("Auto-promoting node to master candidate")
5871 # If we're no longer master capable, we'll demote ourselves from MC
5872 if self.op.master_capable == False and node.master_candidate:
5873 self.LogInfo("Demoting from master candidate")
5874 self.op.master_candidate = False
5877 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5878 if self.op.master_candidate:
5879 new_role = self._ROLE_CANDIDATE
5880 elif self.op.drained:
5881 new_role = self._ROLE_DRAINED
5882 elif self.op.offline:
5883 new_role = self._ROLE_OFFLINE
5884 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5885 # False is still in new flags, which means we're un-setting (the
5887 new_role = self._ROLE_REGULAR
5888 else: # no new flags, nothing, keep old role
5891 self.new_role = new_role
5893 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5894 # Trying to transition out of offline status
5895 # TODO: Use standard RPC runner, but make sure it works when the node is
5896 # still marked offline
5897 result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5899 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5900 " to report its version: %s" %
5901 (node.name, result.fail_msg),
5904 self.LogWarning("Transitioning node from offline to online state"
5905 " without using re-add. Please make sure the node"
5908 if self.op.secondary_ip:
5909 # Ok even without locking, because this can't be changed by any LU
5910 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5911 master_singlehomed = master.secondary_ip == master.primary_ip
5912 if master_singlehomed and self.op.secondary_ip:
5913 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5914 " homed cluster", errors.ECODE_INVAL)
5916 assert not (frozenset(affected_instances) -
5917 self.owned_locks(locking.LEVEL_INSTANCE))
5920 if affected_instances:
5921 raise errors.OpPrereqError("Cannot change secondary IP address:"
5922 " offline node has instances (%s)"
5923 " configured to use it" %
5924 utils.CommaJoin(affected_instances.keys()))
5926 # On online nodes, check that no instances are running, and that
5927 # the node has the new ip and we can reach it.
5928 for instance in affected_instances.values():
5929 _CheckInstanceState(self, instance, INSTANCE_DOWN,
5930 msg="cannot change secondary ip")
5932 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5933 if master.name != node.name:
5934 # check reachability from master secondary ip to new secondary ip
5935 if not netutils.TcpPing(self.op.secondary_ip,
5936 constants.DEFAULT_NODED_PORT,
5937 source=master.secondary_ip):
5938 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5939 " based ping to node daemon port",
5940 errors.ECODE_ENVIRON)
5942 if self.op.ndparams:
5943 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5944 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5945 self.new_ndparams = new_ndparams
5947 if self.op.hv_state:
5948 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5949 self.node.hv_state_static)
5951 if self.op.disk_state:
5952 self.new_disk_state = \
5953 _MergeAndVerifyDiskState(self.op.disk_state,
5954 self.node.disk_state_static)
5956 def Exec(self, feedback_fn):
5961 old_role = self.old_role
5962 new_role = self.new_role
5966 if self.op.ndparams:
5967 node.ndparams = self.new_ndparams
5969 if self.op.powered is not None:
5970 node.powered = self.op.powered
5972 if self.op.hv_state:
5973 node.hv_state_static = self.new_hv_state
5975 if self.op.disk_state:
5976 node.disk_state_static = self.new_disk_state
5978 for attr in ["master_capable", "vm_capable"]:
5979 val = getattr(self.op, attr)
5981 setattr(node, attr, val)
5982 result.append((attr, str(val)))
5984 if new_role != old_role:
5985 # Tell the node to demote itself, if no longer MC and not offline
5986 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5987 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5989 self.LogWarning("Node failed to demote itself: %s", msg)
5991 new_flags = self._R2F[new_role]
5992 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5994 result.append((desc, str(nf)))
5995 (node.master_candidate, node.drained, node.offline) = new_flags
5997 # we locked all nodes, we adjust the CP before updating this node
5999 _AdjustCandidatePool(self, [node.name])
6001 if self.op.secondary_ip:
6002 node.secondary_ip = self.op.secondary_ip
6003 result.append(("secondary_ip", self.op.secondary_ip))
6005 # this will trigger configuration file update, if needed
6006 self.cfg.Update(node, feedback_fn)
6008 # this will trigger job queue propagation or cleanup if the mc
6010 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6011 self.context.ReaddNode(node)
6016 class LUNodePowercycle(NoHooksLU):
6017 """Powercycles a node.
6022 def CheckArguments(self):
6023 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6024 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6025 raise errors.OpPrereqError("The node is the master and the force"
6026 " parameter was not set",
6029 def ExpandNames(self):
6030 """Locking for PowercycleNode.
6032 This is a last-resort option and shouldn't block on other
6033 jobs. Therefore, we grab no locks.
6036 self.needed_locks = {}
6038 def Exec(self, feedback_fn):
6042 result = self.rpc.call_node_powercycle(self.op.node_name,
6043 self.cfg.GetHypervisorType())
6044 result.Raise("Failed to schedule the reboot")
6045 return result.payload
6048 class LUClusterQuery(NoHooksLU):
6049 """Query cluster configuration.
6054 def ExpandNames(self):
6055 self.needed_locks = {}
6057 def Exec(self, feedback_fn):
6058 """Return cluster config.
6061 cluster = self.cfg.GetClusterInfo()
6064 # Filter just for enabled hypervisors
6065 for os_name, hv_dict in cluster.os_hvp.items():
6066 os_hvp[os_name] = {}
6067 for hv_name, hv_params in hv_dict.items():
6068 if hv_name in cluster.enabled_hypervisors:
6069 os_hvp[os_name][hv_name] = hv_params
6071 # Convert ip_family to ip_version
6072 primary_ip_version = constants.IP4_VERSION
6073 if cluster.primary_ip_family == netutils.IP6Address.family:
6074 primary_ip_version = constants.IP6_VERSION
6077 "software_version": constants.RELEASE_VERSION,
6078 "protocol_version": constants.PROTOCOL_VERSION,
6079 "config_version": constants.CONFIG_VERSION,
6080 "os_api_version": max(constants.OS_API_VERSIONS),
6081 "export_version": constants.EXPORT_VERSION,
6082 "architecture": (platform.architecture()[0], platform.machine()),
6083 "name": cluster.cluster_name,
6084 "master": cluster.master_node,
6085 "default_hypervisor": cluster.primary_hypervisor,
6086 "enabled_hypervisors": cluster.enabled_hypervisors,
6087 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6088 for hypervisor_name in cluster.enabled_hypervisors]),
6090 "beparams": cluster.beparams,
6091 "osparams": cluster.osparams,
6092 "ipolicy": cluster.ipolicy,
6093 "nicparams": cluster.nicparams,
6094 "ndparams": cluster.ndparams,
6095 "candidate_pool_size": cluster.candidate_pool_size,
6096 "master_netdev": cluster.master_netdev,
6097 "master_netmask": cluster.master_netmask,
6098 "use_external_mip_script": cluster.use_external_mip_script,
6099 "volume_group_name": cluster.volume_group_name,
6100 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6101 "file_storage_dir": cluster.file_storage_dir,
6102 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6103 "maintain_node_health": cluster.maintain_node_health,
6104 "ctime": cluster.ctime,
6105 "mtime": cluster.mtime,
6106 "uuid": cluster.uuid,
6107 "tags": list(cluster.GetTags()),
6108 "uid_pool": cluster.uid_pool,
6109 "default_iallocator": cluster.default_iallocator,
6110 "reserved_lvs": cluster.reserved_lvs,
6111 "primary_ip_version": primary_ip_version,
6112 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6113 "hidden_os": cluster.hidden_os,
6114 "blacklisted_os": cluster.blacklisted_os,
6120 class LUClusterConfigQuery(NoHooksLU):
6121 """Return configuration values.
6125 _FIELDS_DYNAMIC = utils.FieldSet()
6126 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
6127 "watcher_pause", "volume_group_name")
6129 def CheckArguments(self):
6130 _CheckOutputFields(static=self._FIELDS_STATIC,
6131 dynamic=self._FIELDS_DYNAMIC,
6132 selected=self.op.output_fields)
6134 def ExpandNames(self):
6135 self.needed_locks = {}
6137 def Exec(self, feedback_fn):
6138 """Dump a representation of the cluster config to the standard output.
6142 for field in self.op.output_fields:
6143 if field == "cluster_name":
6144 entry = self.cfg.GetClusterName()
6145 elif field == "master_node":
6146 entry = self.cfg.GetMasterNode()
6147 elif field == "drain_flag":
6148 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6149 elif field == "watcher_pause":
6150 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6151 elif field == "volume_group_name":
6152 entry = self.cfg.GetVGName()
6154 raise errors.ParameterError(field)
6155 values.append(entry)
6159 class LUInstanceActivateDisks(NoHooksLU):
6160 """Bring up an instance's disks.
6165 def ExpandNames(self):
6166 self._ExpandAndLockInstance()
6167 self.needed_locks[locking.LEVEL_NODE] = []
6168 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6170 def DeclareLocks(self, level):
6171 if level == locking.LEVEL_NODE:
6172 self._LockInstancesNodes()
6174 def CheckPrereq(self):
6175 """Check prerequisites.
6177 This checks that the instance is in the cluster.
6180 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6181 assert self.instance is not None, \
6182 "Cannot retrieve locked instance %s" % self.op.instance_name
6183 _CheckNodeOnline(self, self.instance.primary_node)
6185 def Exec(self, feedback_fn):
6186 """Activate the disks.
6189 disks_ok, disks_info = \
6190 _AssembleInstanceDisks(self, self.instance,
6191 ignore_size=self.op.ignore_size)
6193 raise errors.OpExecError("Cannot activate block devices")
6198 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6200 """Prepare the block devices for an instance.
6202 This sets up the block devices on all nodes.
6204 @type lu: L{LogicalUnit}
6205 @param lu: the logical unit on whose behalf we execute
6206 @type instance: L{objects.Instance}
6207 @param instance: the instance for whose disks we assemble
6208 @type disks: list of L{objects.Disk} or None
6209 @param disks: which disks to assemble (or all, if None)
6210 @type ignore_secondaries: boolean
6211 @param ignore_secondaries: if true, errors on secondary nodes
6212 won't result in an error return from the function
6213 @type ignore_size: boolean
6214 @param ignore_size: if true, the current known size of the disk
6215 will not be used during the disk activation, useful for cases
6216 when the size is wrong
6217 @return: False if the operation failed, otherwise a list of
6218 (host, instance_visible_name, node_visible_name)
6219 with the mapping from node devices to instance devices
6224 iname = instance.name
6225 disks = _ExpandCheckDisks(instance, disks)
6227 # With the two passes mechanism we try to reduce the window of
6228 # opportunity for the race condition of switching DRBD to primary
6229 # before handshaking occured, but we do not eliminate it
6231 # The proper fix would be to wait (with some limits) until the
6232 # connection has been made and drbd transitions from WFConnection
6233 # into any other network-connected state (Connected, SyncTarget,
6236 # 1st pass, assemble on all nodes in secondary mode
6237 for idx, inst_disk in enumerate(disks):
6238 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6240 node_disk = node_disk.Copy()
6241 node_disk.UnsetSize()
6242 lu.cfg.SetDiskID(node_disk, node)
6243 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
6244 msg = result.fail_msg
6246 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6247 " (is_primary=False, pass=1): %s",
6248 inst_disk.iv_name, node, msg)
6249 if not ignore_secondaries:
6252 # FIXME: race condition on drbd migration to primary
6254 # 2nd pass, do only the primary node
6255 for idx, inst_disk in enumerate(disks):
6258 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6259 if node != instance.primary_node:
6262 node_disk = node_disk.Copy()
6263 node_disk.UnsetSize()
6264 lu.cfg.SetDiskID(node_disk, node)
6265 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6266 msg = result.fail_msg
6268 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6269 " (is_primary=True, pass=2): %s",
6270 inst_disk.iv_name, node, msg)
6273 dev_path = result.payload
6275 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6277 # leave the disks configured for the primary node
6278 # this is a workaround that would be fixed better by
6279 # improving the logical/physical id handling
6281 lu.cfg.SetDiskID(disk, instance.primary_node)
6283 return disks_ok, device_info
6286 def _StartInstanceDisks(lu, instance, force):
6287 """Start the disks of an instance.
6290 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6291 ignore_secondaries=force)
6293 _ShutdownInstanceDisks(lu, instance)
6294 if force is not None and not force:
6295 lu.proc.LogWarning("", hint="If the message above refers to a"
6297 " you can retry the operation using '--force'.")
6298 raise errors.OpExecError("Disk consistency error")
6301 class LUInstanceDeactivateDisks(NoHooksLU):
6302 """Shutdown an instance's disks.
6307 def ExpandNames(self):
6308 self._ExpandAndLockInstance()
6309 self.needed_locks[locking.LEVEL_NODE] = []
6310 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6312 def DeclareLocks(self, level):
6313 if level == locking.LEVEL_NODE:
6314 self._LockInstancesNodes()
6316 def CheckPrereq(self):
6317 """Check prerequisites.
6319 This checks that the instance is in the cluster.
6322 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6323 assert self.instance is not None, \
6324 "Cannot retrieve locked instance %s" % self.op.instance_name
6326 def Exec(self, feedback_fn):
6327 """Deactivate the disks
6330 instance = self.instance
6332 _ShutdownInstanceDisks(self, instance)
6334 _SafeShutdownInstanceDisks(self, instance)
6337 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6338 """Shutdown block devices of an instance.
6340 This function checks if an instance is running, before calling
6341 _ShutdownInstanceDisks.
6344 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6345 _ShutdownInstanceDisks(lu, instance, disks=disks)
6348 def _ExpandCheckDisks(instance, disks):
6349 """Return the instance disks selected by the disks list
6351 @type disks: list of L{objects.Disk} or None
6352 @param disks: selected disks
6353 @rtype: list of L{objects.Disk}
6354 @return: selected instance disks to act on
6358 return instance.disks
6360 if not set(disks).issubset(instance.disks):
6361 raise errors.ProgrammerError("Can only act on disks belonging to the"
6366 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6367 """Shutdown block devices of an instance.
6369 This does the shutdown on all nodes of the instance.
6371 If the ignore_primary is false, errors on the primary node are
6376 disks = _ExpandCheckDisks(instance, disks)
6379 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6380 lu.cfg.SetDiskID(top_disk, node)
6381 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6382 msg = result.fail_msg
6384 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6385 disk.iv_name, node, msg)
6386 if ((node == instance.primary_node and not ignore_primary) or
6387 (node != instance.primary_node and not result.offline)):
6392 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6393 """Checks if a node has enough free memory.
6395 This function check if a given node has the needed amount of free
6396 memory. In case the node has less memory or we cannot get the
6397 information from the node, this function raise an OpPrereqError
6400 @type lu: C{LogicalUnit}
6401 @param lu: a logical unit from which we get configuration data
6403 @param node: the node to check
6404 @type reason: C{str}
6405 @param reason: string to use in the error message
6406 @type requested: C{int}
6407 @param requested: the amount of memory in MiB to check for
6408 @type hypervisor_name: C{str}
6409 @param hypervisor_name: the hypervisor to ask for memory stats
6411 @return: node current free memory
6412 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6413 we cannot check the node
6416 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6417 nodeinfo[node].Raise("Can't get data from node %s" % node,
6418 prereq=True, ecode=errors.ECODE_ENVIRON)
6419 (_, _, (hv_info, )) = nodeinfo[node].payload
6421 free_mem = hv_info.get("memory_free", None)
6422 if not isinstance(free_mem, int):
6423 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6424 " was '%s'" % (node, free_mem),
6425 errors.ECODE_ENVIRON)
6426 if requested > free_mem:
6427 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6428 " needed %s MiB, available %s MiB" %
6429 (node, reason, requested, free_mem),
6434 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6435 """Checks if nodes have enough free disk space in the all VGs.
6437 This function check if all given nodes have the needed amount of
6438 free disk. In case any node has less disk or we cannot get the
6439 information from the node, this function raise an OpPrereqError
6442 @type lu: C{LogicalUnit}
6443 @param lu: a logical unit from which we get configuration data
6444 @type nodenames: C{list}
6445 @param nodenames: the list of node names to check
6446 @type req_sizes: C{dict}
6447 @param req_sizes: the hash of vg and corresponding amount of disk in
6449 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6450 or we cannot check the node
6453 for vg, req_size in req_sizes.items():
6454 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6457 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6458 """Checks if nodes have enough free disk space in the specified VG.
6460 This function check if all given nodes have the needed amount of
6461 free disk. In case any node has less disk or we cannot get the
6462 information from the node, this function raise an OpPrereqError
6465 @type lu: C{LogicalUnit}
6466 @param lu: a logical unit from which we get configuration data
6467 @type nodenames: C{list}
6468 @param nodenames: the list of node names to check
6470 @param vg: the volume group to check
6471 @type requested: C{int}
6472 @param requested: the amount of disk in MiB to check for
6473 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6474 or we cannot check the node
6477 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6478 for node in nodenames:
6479 info = nodeinfo[node]
6480 info.Raise("Cannot get current information from node %s" % node,
6481 prereq=True, ecode=errors.ECODE_ENVIRON)
6482 (_, (vg_info, ), _) = info.payload
6483 vg_free = vg_info.get("vg_free", None)
6484 if not isinstance(vg_free, int):
6485 raise errors.OpPrereqError("Can't compute free disk space on node"
6486 " %s for vg %s, result was '%s'" %
6487 (node, vg, vg_free), errors.ECODE_ENVIRON)
6488 if requested > vg_free:
6489 raise errors.OpPrereqError("Not enough disk space on target node %s"
6490 " vg %s: required %d MiB, available %d MiB" %
6491 (node, vg, requested, vg_free),
6495 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6496 """Checks if nodes have enough physical CPUs
6498 This function checks if all given nodes have the needed number of
6499 physical CPUs. In case any node has less CPUs or we cannot get the
6500 information from the node, this function raises an OpPrereqError
6503 @type lu: C{LogicalUnit}
6504 @param lu: a logical unit from which we get configuration data
6505 @type nodenames: C{list}
6506 @param nodenames: the list of node names to check
6507 @type requested: C{int}
6508 @param requested: the minimum acceptable number of physical CPUs
6509 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6510 or we cannot check the node
6513 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6514 for node in nodenames:
6515 info = nodeinfo[node]
6516 info.Raise("Cannot get current information from node %s" % node,
6517 prereq=True, ecode=errors.ECODE_ENVIRON)
6518 (_, _, (hv_info, )) = info.payload
6519 num_cpus = hv_info.get("cpu_total", None)
6520 if not isinstance(num_cpus, int):
6521 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6522 " on node %s, result was '%s'" %
6523 (node, num_cpus), errors.ECODE_ENVIRON)
6524 if requested > num_cpus:
6525 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6526 "required" % (node, num_cpus, requested),
6530 class LUInstanceStartup(LogicalUnit):
6531 """Starts an instance.
6534 HPATH = "instance-start"
6535 HTYPE = constants.HTYPE_INSTANCE
6538 def CheckArguments(self):
6540 if self.op.beparams:
6541 # fill the beparams dict
6542 objects.UpgradeBeParams(self.op.beparams)
6543 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6545 def ExpandNames(self):
6546 self._ExpandAndLockInstance()
6547 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6549 def DeclareLocks(self, level):
6550 if level == locking.LEVEL_NODE_RES:
6551 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6553 def BuildHooksEnv(self):
6556 This runs on master, primary and secondary nodes of the instance.
6560 "FORCE": self.op.force,
6563 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6567 def BuildHooksNodes(self):
6568 """Build hooks nodes.
6571 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6574 def CheckPrereq(self):
6575 """Check prerequisites.
6577 This checks that the instance is in the cluster.
6580 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6581 assert self.instance is not None, \
6582 "Cannot retrieve locked instance %s" % self.op.instance_name
6585 if self.op.hvparams:
6586 # check hypervisor parameter syntax (locally)
6587 cluster = self.cfg.GetClusterInfo()
6588 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6589 filled_hvp = cluster.FillHV(instance)
6590 filled_hvp.update(self.op.hvparams)
6591 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6592 hv_type.CheckParameterSyntax(filled_hvp)
6593 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6595 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6597 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6599 if self.primary_offline and self.op.ignore_offline_nodes:
6600 self.proc.LogWarning("Ignoring offline primary node")
6602 if self.op.hvparams or self.op.beparams:
6603 self.proc.LogWarning("Overridden parameters are ignored")
6605 _CheckNodeOnline(self, instance.primary_node)
6607 bep = self.cfg.GetClusterInfo().FillBE(instance)
6608 bep.update(self.op.beparams)
6610 # check bridges existence
6611 _CheckInstanceBridgesExist(self, instance)
6613 remote_info = self.rpc.call_instance_info(instance.primary_node,
6615 instance.hypervisor)
6616 remote_info.Raise("Error checking node %s" % instance.primary_node,
6617 prereq=True, ecode=errors.ECODE_ENVIRON)
6618 if not remote_info.payload: # not running already
6619 _CheckNodeFreeMemory(self, instance.primary_node,
6620 "starting instance %s" % instance.name,
6621 bep[constants.BE_MINMEM], instance.hypervisor)
6623 def Exec(self, feedback_fn):
6624 """Start the instance.
6627 instance = self.instance
6628 force = self.op.force
6630 if not self.op.no_remember:
6631 self.cfg.MarkInstanceUp(instance.name)
6633 if self.primary_offline:
6634 assert self.op.ignore_offline_nodes
6635 self.proc.LogInfo("Primary node offline, marked instance as started")
6637 node_current = instance.primary_node
6639 _StartInstanceDisks(self, instance, force)
6642 self.rpc.call_instance_start(node_current,
6643 (instance, self.op.hvparams,
6645 self.op.startup_paused)
6646 msg = result.fail_msg
6648 _ShutdownInstanceDisks(self, instance)
6649 raise errors.OpExecError("Could not start instance: %s" % msg)
6652 class LUInstanceReboot(LogicalUnit):
6653 """Reboot an instance.
6656 HPATH = "instance-reboot"
6657 HTYPE = constants.HTYPE_INSTANCE
6660 def ExpandNames(self):
6661 self._ExpandAndLockInstance()
6663 def BuildHooksEnv(self):
6666 This runs on master, primary and secondary nodes of the instance.
6670 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6671 "REBOOT_TYPE": self.op.reboot_type,
6672 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6675 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6679 def BuildHooksNodes(self):
6680 """Build hooks nodes.
6683 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6686 def CheckPrereq(self):
6687 """Check prerequisites.
6689 This checks that the instance is in the cluster.
6692 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6693 assert self.instance is not None, \
6694 "Cannot retrieve locked instance %s" % self.op.instance_name
6695 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6696 _CheckNodeOnline(self, instance.primary_node)
6698 # check bridges existence
6699 _CheckInstanceBridgesExist(self, instance)
6701 def Exec(self, feedback_fn):
6702 """Reboot the instance.
6705 instance = self.instance
6706 ignore_secondaries = self.op.ignore_secondaries
6707 reboot_type = self.op.reboot_type
6709 remote_info = self.rpc.call_instance_info(instance.primary_node,
6711 instance.hypervisor)
6712 remote_info.Raise("Error checking node %s" % instance.primary_node)
6713 instance_running = bool(remote_info.payload)
6715 node_current = instance.primary_node
6717 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6718 constants.INSTANCE_REBOOT_HARD]:
6719 for disk in instance.disks:
6720 self.cfg.SetDiskID(disk, node_current)
6721 result = self.rpc.call_instance_reboot(node_current, instance,
6723 self.op.shutdown_timeout)
6724 result.Raise("Could not reboot instance")
6726 if instance_running:
6727 result = self.rpc.call_instance_shutdown(node_current, instance,
6728 self.op.shutdown_timeout)
6729 result.Raise("Could not shutdown instance for full reboot")
6730 _ShutdownInstanceDisks(self, instance)
6732 self.LogInfo("Instance %s was already stopped, starting now",
6734 _StartInstanceDisks(self, instance, ignore_secondaries)
6735 result = self.rpc.call_instance_start(node_current,
6736 (instance, None, None), False)
6737 msg = result.fail_msg
6739 _ShutdownInstanceDisks(self, instance)
6740 raise errors.OpExecError("Could not start instance for"
6741 " full reboot: %s" % msg)
6743 self.cfg.MarkInstanceUp(instance.name)
6746 class LUInstanceShutdown(LogicalUnit):
6747 """Shutdown an instance.
6750 HPATH = "instance-stop"
6751 HTYPE = constants.HTYPE_INSTANCE
6754 def ExpandNames(self):
6755 self._ExpandAndLockInstance()
6757 def BuildHooksEnv(self):
6760 This runs on master, primary and secondary nodes of the instance.
6763 env = _BuildInstanceHookEnvByObject(self, self.instance)
6764 env["TIMEOUT"] = self.op.timeout
6767 def BuildHooksNodes(self):
6768 """Build hooks nodes.
6771 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6774 def CheckPrereq(self):
6775 """Check prerequisites.
6777 This checks that the instance is in the cluster.
6780 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6781 assert self.instance is not None, \
6782 "Cannot retrieve locked instance %s" % self.op.instance_name
6784 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6786 self.primary_offline = \
6787 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6789 if self.primary_offline and self.op.ignore_offline_nodes:
6790 self.proc.LogWarning("Ignoring offline primary node")
6792 _CheckNodeOnline(self, self.instance.primary_node)
6794 def Exec(self, feedback_fn):
6795 """Shutdown the instance.
6798 instance = self.instance
6799 node_current = instance.primary_node
6800 timeout = self.op.timeout
6802 if not self.op.no_remember:
6803 self.cfg.MarkInstanceDown(instance.name)
6805 if self.primary_offline:
6806 assert self.op.ignore_offline_nodes
6807 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6809 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6810 msg = result.fail_msg
6812 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6814 _ShutdownInstanceDisks(self, instance)
6817 class LUInstanceReinstall(LogicalUnit):
6818 """Reinstall an instance.
6821 HPATH = "instance-reinstall"
6822 HTYPE = constants.HTYPE_INSTANCE
6825 def ExpandNames(self):
6826 self._ExpandAndLockInstance()
6828 def BuildHooksEnv(self):
6831 This runs on master, primary and secondary nodes of the instance.
6834 return _BuildInstanceHookEnvByObject(self, self.instance)
6836 def BuildHooksNodes(self):
6837 """Build hooks nodes.
6840 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6843 def CheckPrereq(self):
6844 """Check prerequisites.
6846 This checks that the instance is in the cluster and is not running.
6849 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6850 assert instance is not None, \
6851 "Cannot retrieve locked instance %s" % self.op.instance_name
6852 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6853 " offline, cannot reinstall")
6854 for node in instance.secondary_nodes:
6855 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6856 " cannot reinstall")
6858 if instance.disk_template == constants.DT_DISKLESS:
6859 raise errors.OpPrereqError("Instance '%s' has no disks" %
6860 self.op.instance_name,
6862 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6864 if self.op.os_type is not None:
6866 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6867 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6868 instance_os = self.op.os_type
6870 instance_os = instance.os
6872 nodelist = list(instance.all_nodes)
6874 if self.op.osparams:
6875 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6876 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6877 self.os_inst = i_osdict # the new dict (without defaults)
6881 self.instance = instance
6883 def Exec(self, feedback_fn):
6884 """Reinstall the instance.
6887 inst = self.instance
6889 if self.op.os_type is not None:
6890 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6891 inst.os = self.op.os_type
6892 # Write to configuration
6893 self.cfg.Update(inst, feedback_fn)
6895 _StartInstanceDisks(self, inst, None)
6897 feedback_fn("Running the instance OS create scripts...")
6898 # FIXME: pass debug option from opcode to backend
6899 result = self.rpc.call_instance_os_add(inst.primary_node,
6900 (inst, self.os_inst), True,
6901 self.op.debug_level)
6902 result.Raise("Could not install OS for instance %s on node %s" %
6903 (inst.name, inst.primary_node))
6905 _ShutdownInstanceDisks(self, inst)
6908 class LUInstanceRecreateDisks(LogicalUnit):
6909 """Recreate an instance's missing disks.
6912 HPATH = "instance-recreate-disks"
6913 HTYPE = constants.HTYPE_INSTANCE
6916 _MODIFYABLE = frozenset([
6917 constants.IDISK_SIZE,
6918 constants.IDISK_MODE,
6921 # New or changed disk parameters may have different semantics
6922 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
6923 constants.IDISK_ADOPT,
6925 # TODO: Implement support changing VG while recreating
6927 constants.IDISK_METAVG,
6930 def CheckArguments(self):
6931 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
6932 # Normalize and convert deprecated list of disk indices
6933 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
6935 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
6937 raise errors.OpPrereqError("Some disks have been specified more than"
6938 " once: %s" % utils.CommaJoin(duplicates),
6941 for (idx, params) in self.op.disks:
6942 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
6943 unsupported = frozenset(params.keys()) - self._MODIFYABLE
6945 raise errors.OpPrereqError("Parameters for disk %s try to change"
6946 " unmodifyable parameter(s): %s" %
6947 (idx, utils.CommaJoin(unsupported)),
6950 def ExpandNames(self):
6951 self._ExpandAndLockInstance()
6952 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6954 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6955 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6957 self.needed_locks[locking.LEVEL_NODE] = []
6958 self.needed_locks[locking.LEVEL_NODE_RES] = []
6960 def DeclareLocks(self, level):
6961 if level == locking.LEVEL_NODE:
6962 # if we replace the nodes, we only need to lock the old primary,
6963 # otherwise we need to lock all nodes for disk re-creation
6964 primary_only = bool(self.op.nodes)
6965 self._LockInstancesNodes(primary_only=primary_only)
6966 elif level == locking.LEVEL_NODE_RES:
6968 self.needed_locks[locking.LEVEL_NODE_RES] = \
6969 self.needed_locks[locking.LEVEL_NODE][:]
6971 def BuildHooksEnv(self):
6974 This runs on master, primary and secondary nodes of the instance.
6977 return _BuildInstanceHookEnvByObject(self, self.instance)
6979 def BuildHooksNodes(self):
6980 """Build hooks nodes.
6983 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6986 def CheckPrereq(self):
6987 """Check prerequisites.
6989 This checks that the instance is in the cluster and is not running.
6992 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6993 assert instance is not None, \
6994 "Cannot retrieve locked instance %s" % self.op.instance_name
6996 if len(self.op.nodes) != len(instance.all_nodes):
6997 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6998 " %d replacement nodes were specified" %
6999 (instance.name, len(instance.all_nodes),
7000 len(self.op.nodes)),
7002 assert instance.disk_template != constants.DT_DRBD8 or \
7003 len(self.op.nodes) == 2
7004 assert instance.disk_template != constants.DT_PLAIN or \
7005 len(self.op.nodes) == 1
7006 primary_node = self.op.nodes[0]
7008 primary_node = instance.primary_node
7009 _CheckNodeOnline(self, primary_node)
7011 if instance.disk_template == constants.DT_DISKLESS:
7012 raise errors.OpPrereqError("Instance '%s' has no disks" %
7013 self.op.instance_name, errors.ECODE_INVAL)
7015 # if we replace nodes *and* the old primary is offline, we don't
7017 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7018 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7019 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7020 if not (self.op.nodes and old_pnode.offline):
7021 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7022 msg="cannot recreate disks")
7025 self.disks = dict(self.op.disks)
7027 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7029 maxidx = max(self.disks.keys())
7030 if maxidx >= len(instance.disks):
7031 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7034 if (self.op.nodes and
7035 sorted(self.disks.keys()) != range(len(instance.disks))):
7036 raise errors.OpPrereqError("Can't recreate disks partially and"
7037 " change the nodes at the same time",
7040 self.instance = instance
7042 def Exec(self, feedback_fn):
7043 """Recreate the disks.
7046 instance = self.instance
7048 assert (self.owned_locks(locking.LEVEL_NODE) ==
7049 self.owned_locks(locking.LEVEL_NODE_RES))
7052 mods = [] # keeps track of needed changes
7054 for idx, disk in enumerate(instance.disks):
7056 changes = self.disks[idx]
7058 # Disk should not be recreated
7062 # update secondaries for disks, if needed
7063 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7064 # need to update the nodes and minors
7065 assert len(self.op.nodes) == 2
7066 assert len(disk.logical_id) == 6 # otherwise disk internals
7068 (_, _, old_port, _, _, old_secret) = disk.logical_id
7069 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7070 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7071 new_minors[0], new_minors[1], old_secret)
7072 assert len(disk.logical_id) == len(new_id)
7076 mods.append((idx, new_id, changes))
7078 # now that we have passed all asserts above, we can apply the mods
7079 # in a single run (to avoid partial changes)
7080 for idx, new_id, changes in mods:
7081 disk = instance.disks[idx]
7082 if new_id is not None:
7083 assert disk.dev_type == constants.LD_DRBD8
7084 disk.logical_id = new_id
7086 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7087 mode=changes.get(constants.IDISK_MODE, None))
7089 # change primary node, if needed
7091 instance.primary_node = self.op.nodes[0]
7092 self.LogWarning("Changing the instance's nodes, you will have to"
7093 " remove any disks left on the older nodes manually")
7096 self.cfg.Update(instance, feedback_fn)
7098 _CreateDisks(self, instance, to_skip=to_skip)
7101 class LUInstanceRename(LogicalUnit):
7102 """Rename an instance.
7105 HPATH = "instance-rename"
7106 HTYPE = constants.HTYPE_INSTANCE
7108 def CheckArguments(self):
7112 if self.op.ip_check and not self.op.name_check:
7113 # TODO: make the ip check more flexible and not depend on the name check
7114 raise errors.OpPrereqError("IP address check requires a name check",
7117 def BuildHooksEnv(self):
7120 This runs on master, primary and secondary nodes of the instance.
7123 env = _BuildInstanceHookEnvByObject(self, self.instance)
7124 env["INSTANCE_NEW_NAME"] = self.op.new_name
7127 def BuildHooksNodes(self):
7128 """Build hooks nodes.
7131 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7134 def CheckPrereq(self):
7135 """Check prerequisites.
7137 This checks that the instance is in the cluster and is not running.
7140 self.op.instance_name = _ExpandInstanceName(self.cfg,
7141 self.op.instance_name)
7142 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7143 assert instance is not None
7144 _CheckNodeOnline(self, instance.primary_node)
7145 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7146 msg="cannot rename")
7147 self.instance = instance
7149 new_name = self.op.new_name
7150 if self.op.name_check:
7151 hostname = netutils.GetHostname(name=new_name)
7152 if hostname.name != new_name:
7153 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7155 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7156 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7157 " same as given hostname '%s'") %
7158 (hostname.name, self.op.new_name),
7160 new_name = self.op.new_name = hostname.name
7161 if (self.op.ip_check and
7162 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7163 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7164 (hostname.ip, new_name),
7165 errors.ECODE_NOTUNIQUE)
7167 instance_list = self.cfg.GetInstanceList()
7168 if new_name in instance_list and new_name != instance.name:
7169 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7170 new_name, errors.ECODE_EXISTS)
7172 def Exec(self, feedback_fn):
7173 """Rename the instance.
7176 inst = self.instance
7177 old_name = inst.name
7179 rename_file_storage = False
7180 if (inst.disk_template in constants.DTS_FILEBASED and
7181 self.op.new_name != inst.name):
7182 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7183 rename_file_storage = True
7185 self.cfg.RenameInstance(inst.name, self.op.new_name)
7186 # Change the instance lock. This is definitely safe while we hold the BGL.
7187 # Otherwise the new lock would have to be added in acquired mode.
7189 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7190 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7192 # re-read the instance from the configuration after rename
7193 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7195 if rename_file_storage:
7196 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7197 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7198 old_file_storage_dir,
7199 new_file_storage_dir)
7200 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7201 " (but the instance has been renamed in Ganeti)" %
7202 (inst.primary_node, old_file_storage_dir,
7203 new_file_storage_dir))
7205 _StartInstanceDisks(self, inst, None)
7207 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7208 old_name, self.op.debug_level)
7209 msg = result.fail_msg
7211 msg = ("Could not run OS rename script for instance %s on node %s"
7212 " (but the instance has been renamed in Ganeti): %s" %
7213 (inst.name, inst.primary_node, msg))
7214 self.proc.LogWarning(msg)
7216 _ShutdownInstanceDisks(self, inst)
7221 class LUInstanceRemove(LogicalUnit):
7222 """Remove an instance.
7225 HPATH = "instance-remove"
7226 HTYPE = constants.HTYPE_INSTANCE
7229 def ExpandNames(self):
7230 self._ExpandAndLockInstance()
7231 self.needed_locks[locking.LEVEL_NODE] = []
7232 self.needed_locks[locking.LEVEL_NODE_RES] = []
7233 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7235 def DeclareLocks(self, level):
7236 if level == locking.LEVEL_NODE:
7237 self._LockInstancesNodes()
7238 elif level == locking.LEVEL_NODE_RES:
7240 self.needed_locks[locking.LEVEL_NODE_RES] = \
7241 self.needed_locks[locking.LEVEL_NODE][:]
7243 def BuildHooksEnv(self):
7246 This runs on master, primary and secondary nodes of the instance.
7249 env = _BuildInstanceHookEnvByObject(self, self.instance)
7250 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7253 def BuildHooksNodes(self):
7254 """Build hooks nodes.
7257 nl = [self.cfg.GetMasterNode()]
7258 nl_post = list(self.instance.all_nodes) + nl
7259 return (nl, nl_post)
7261 def CheckPrereq(self):
7262 """Check prerequisites.
7264 This checks that the instance is in the cluster.
7267 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7268 assert self.instance is not None, \
7269 "Cannot retrieve locked instance %s" % self.op.instance_name
7271 def Exec(self, feedback_fn):
7272 """Remove the instance.
7275 instance = self.instance
7276 logging.info("Shutting down instance %s on node %s",
7277 instance.name, instance.primary_node)
7279 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7280 self.op.shutdown_timeout)
7281 msg = result.fail_msg
7283 if self.op.ignore_failures:
7284 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7286 raise errors.OpExecError("Could not shutdown instance %s on"
7288 (instance.name, instance.primary_node, msg))
7290 assert (self.owned_locks(locking.LEVEL_NODE) ==
7291 self.owned_locks(locking.LEVEL_NODE_RES))
7292 assert not (set(instance.all_nodes) -
7293 self.owned_locks(locking.LEVEL_NODE)), \
7294 "Not owning correct locks"
7296 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7299 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7300 """Utility function to remove an instance.
7303 logging.info("Removing block devices for instance %s", instance.name)
7305 if not _RemoveDisks(lu, instance):
7306 if not ignore_failures:
7307 raise errors.OpExecError("Can't remove instance's disks")
7308 feedback_fn("Warning: can't remove instance's disks")
7310 logging.info("Removing instance %s out of cluster config", instance.name)
7312 lu.cfg.RemoveInstance(instance.name)
7314 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7315 "Instance lock removal conflict"
7317 # Remove lock for the instance
7318 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7321 class LUInstanceQuery(NoHooksLU):
7322 """Logical unit for querying instances.
7325 # pylint: disable=W0142
7328 def CheckArguments(self):
7329 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7330 self.op.output_fields, self.op.use_locking)
7332 def ExpandNames(self):
7333 self.iq.ExpandNames(self)
7335 def DeclareLocks(self, level):
7336 self.iq.DeclareLocks(self, level)
7338 def Exec(self, feedback_fn):
7339 return self.iq.OldStyleQuery(self)
7342 class LUInstanceFailover(LogicalUnit):
7343 """Failover an instance.
7346 HPATH = "instance-failover"
7347 HTYPE = constants.HTYPE_INSTANCE
7350 def CheckArguments(self):
7351 """Check the arguments.
7354 self.iallocator = getattr(self.op, "iallocator", None)
7355 self.target_node = getattr(self.op, "target_node", None)
7357 def ExpandNames(self):
7358 self._ExpandAndLockInstance()
7360 if self.op.target_node is not None:
7361 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7363 self.needed_locks[locking.LEVEL_NODE] = []
7364 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7366 self.needed_locks[locking.LEVEL_NODE_RES] = []
7367 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7369 ignore_consistency = self.op.ignore_consistency
7370 shutdown_timeout = self.op.shutdown_timeout
7371 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7374 ignore_consistency=ignore_consistency,
7375 shutdown_timeout=shutdown_timeout,
7376 ignore_ipolicy=self.op.ignore_ipolicy)
7377 self.tasklets = [self._migrater]
7379 def DeclareLocks(self, level):
7380 if level == locking.LEVEL_NODE:
7381 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7382 if instance.disk_template in constants.DTS_EXT_MIRROR:
7383 if self.op.target_node is None:
7384 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7386 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7387 self.op.target_node]
7388 del self.recalculate_locks[locking.LEVEL_NODE]
7390 self._LockInstancesNodes()
7391 elif level == locking.LEVEL_NODE_RES:
7393 self.needed_locks[locking.LEVEL_NODE_RES] = \
7394 self.needed_locks[locking.LEVEL_NODE][:]
7396 def BuildHooksEnv(self):
7399 This runs on master, primary and secondary nodes of the instance.
7402 instance = self._migrater.instance
7403 source_node = instance.primary_node
7404 target_node = self.op.target_node
7406 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7407 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7408 "OLD_PRIMARY": source_node,
7409 "NEW_PRIMARY": target_node,
7412 if instance.disk_template in constants.DTS_INT_MIRROR:
7413 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7414 env["NEW_SECONDARY"] = source_node
7416 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7418 env.update(_BuildInstanceHookEnvByObject(self, instance))
7422 def BuildHooksNodes(self):
7423 """Build hooks nodes.
7426 instance = self._migrater.instance
7427 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7428 return (nl, nl + [instance.primary_node])
7431 class LUInstanceMigrate(LogicalUnit):
7432 """Migrate an instance.
7434 This is migration without shutting down, compared to the failover,
7435 which is done with shutdown.
7438 HPATH = "instance-migrate"
7439 HTYPE = constants.HTYPE_INSTANCE
7442 def ExpandNames(self):
7443 self._ExpandAndLockInstance()
7445 if self.op.target_node is not None:
7446 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7448 self.needed_locks[locking.LEVEL_NODE] = []
7449 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7451 self.needed_locks[locking.LEVEL_NODE] = []
7452 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7455 TLMigrateInstance(self, self.op.instance_name,
7456 cleanup=self.op.cleanup,
7458 fallback=self.op.allow_failover,
7459 allow_runtime_changes=self.op.allow_runtime_changes,
7460 ignore_ipolicy=self.op.ignore_ipolicy)
7461 self.tasklets = [self._migrater]
7463 def DeclareLocks(self, level):
7464 if level == locking.LEVEL_NODE:
7465 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7466 if instance.disk_template in constants.DTS_EXT_MIRROR:
7467 if self.op.target_node is None:
7468 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7470 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7471 self.op.target_node]
7472 del self.recalculate_locks[locking.LEVEL_NODE]
7474 self._LockInstancesNodes()
7475 elif level == locking.LEVEL_NODE_RES:
7477 self.needed_locks[locking.LEVEL_NODE_RES] = \
7478 self.needed_locks[locking.LEVEL_NODE][:]
7480 def BuildHooksEnv(self):
7483 This runs on master, primary and secondary nodes of the instance.
7486 instance = self._migrater.instance
7487 source_node = instance.primary_node
7488 target_node = self.op.target_node
7489 env = _BuildInstanceHookEnvByObject(self, instance)
7491 "MIGRATE_LIVE": self._migrater.live,
7492 "MIGRATE_CLEANUP": self.op.cleanup,
7493 "OLD_PRIMARY": source_node,
7494 "NEW_PRIMARY": target_node,
7495 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7498 if instance.disk_template in constants.DTS_INT_MIRROR:
7499 env["OLD_SECONDARY"] = target_node
7500 env["NEW_SECONDARY"] = source_node
7502 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7506 def BuildHooksNodes(self):
7507 """Build hooks nodes.
7510 instance = self._migrater.instance
7511 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7512 return (nl, nl + [instance.primary_node])
7515 class LUInstanceMove(LogicalUnit):
7516 """Move an instance by data-copying.
7519 HPATH = "instance-move"
7520 HTYPE = constants.HTYPE_INSTANCE
7523 def ExpandNames(self):
7524 self._ExpandAndLockInstance()
7525 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7526 self.op.target_node = target_node
7527 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7528 self.needed_locks[locking.LEVEL_NODE_RES] = []
7529 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7531 def DeclareLocks(self, level):
7532 if level == locking.LEVEL_NODE:
7533 self._LockInstancesNodes(primary_only=True)
7534 elif level == locking.LEVEL_NODE_RES:
7536 self.needed_locks[locking.LEVEL_NODE_RES] = \
7537 self.needed_locks[locking.LEVEL_NODE][:]
7539 def BuildHooksEnv(self):
7542 This runs on master, primary and secondary nodes of the instance.
7546 "TARGET_NODE": self.op.target_node,
7547 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7549 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7552 def BuildHooksNodes(self):
7553 """Build hooks nodes.
7557 self.cfg.GetMasterNode(),
7558 self.instance.primary_node,
7559 self.op.target_node,
7563 def CheckPrereq(self):
7564 """Check prerequisites.
7566 This checks that the instance is in the cluster.
7569 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7570 assert self.instance is not None, \
7571 "Cannot retrieve locked instance %s" % self.op.instance_name
7573 node = self.cfg.GetNodeInfo(self.op.target_node)
7574 assert node is not None, \
7575 "Cannot retrieve locked node %s" % self.op.target_node
7577 self.target_node = target_node = node.name
7579 if target_node == instance.primary_node:
7580 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7581 (instance.name, target_node),
7584 bep = self.cfg.GetClusterInfo().FillBE(instance)
7586 for idx, dsk in enumerate(instance.disks):
7587 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7588 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7589 " cannot copy" % idx, errors.ECODE_STATE)
7591 _CheckNodeOnline(self, target_node)
7592 _CheckNodeNotDrained(self, target_node)
7593 _CheckNodeVmCapable(self, target_node)
7594 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7595 self.cfg.GetNodeGroup(node.group))
7596 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7597 ignore=self.op.ignore_ipolicy)
7599 if instance.admin_state == constants.ADMINST_UP:
7600 # check memory requirements on the secondary node
7601 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7602 instance.name, bep[constants.BE_MAXMEM],
7603 instance.hypervisor)
7605 self.LogInfo("Not checking memory on the secondary node as"
7606 " instance will not be started")
7608 # check bridge existance
7609 _CheckInstanceBridgesExist(self, instance, node=target_node)
7611 def Exec(self, feedback_fn):
7612 """Move an instance.
7614 The move is done by shutting it down on its present node, copying
7615 the data over (slow) and starting it on the new node.
7618 instance = self.instance
7620 source_node = instance.primary_node
7621 target_node = self.target_node
7623 self.LogInfo("Shutting down instance %s on source node %s",
7624 instance.name, source_node)
7626 assert (self.owned_locks(locking.LEVEL_NODE) ==
7627 self.owned_locks(locking.LEVEL_NODE_RES))
7629 result = self.rpc.call_instance_shutdown(source_node, instance,
7630 self.op.shutdown_timeout)
7631 msg = result.fail_msg
7633 if self.op.ignore_consistency:
7634 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7635 " Proceeding anyway. Please make sure node"
7636 " %s is down. Error details: %s",
7637 instance.name, source_node, source_node, msg)
7639 raise errors.OpExecError("Could not shutdown instance %s on"
7641 (instance.name, source_node, msg))
7643 # create the target disks
7645 _CreateDisks(self, instance, target_node=target_node)
7646 except errors.OpExecError:
7647 self.LogWarning("Device creation failed, reverting...")
7649 _RemoveDisks(self, instance, target_node=target_node)
7651 self.cfg.ReleaseDRBDMinors(instance.name)
7654 cluster_name = self.cfg.GetClusterInfo().cluster_name
7657 # activate, get path, copy the data over
7658 for idx, disk in enumerate(instance.disks):
7659 self.LogInfo("Copying data for disk %d", idx)
7660 result = self.rpc.call_blockdev_assemble(target_node, disk,
7661 instance.name, True, idx)
7663 self.LogWarning("Can't assemble newly created disk %d: %s",
7664 idx, result.fail_msg)
7665 errs.append(result.fail_msg)
7667 dev_path = result.payload
7668 result = self.rpc.call_blockdev_export(source_node, disk,
7669 target_node, dev_path,
7672 self.LogWarning("Can't copy data over for disk %d: %s",
7673 idx, result.fail_msg)
7674 errs.append(result.fail_msg)
7678 self.LogWarning("Some disks failed to copy, aborting")
7680 _RemoveDisks(self, instance, target_node=target_node)
7682 self.cfg.ReleaseDRBDMinors(instance.name)
7683 raise errors.OpExecError("Errors during disk copy: %s" %
7686 instance.primary_node = target_node
7687 self.cfg.Update(instance, feedback_fn)
7689 self.LogInfo("Removing the disks on the original node")
7690 _RemoveDisks(self, instance, target_node=source_node)
7692 # Only start the instance if it's marked as up
7693 if instance.admin_state == constants.ADMINST_UP:
7694 self.LogInfo("Starting instance %s on node %s",
7695 instance.name, target_node)
7697 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7698 ignore_secondaries=True)
7700 _ShutdownInstanceDisks(self, instance)
7701 raise errors.OpExecError("Can't activate the instance's disks")
7703 result = self.rpc.call_instance_start(target_node,
7704 (instance, None, None), False)
7705 msg = result.fail_msg
7707 _ShutdownInstanceDisks(self, instance)
7708 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7709 (instance.name, target_node, msg))
7712 class LUNodeMigrate(LogicalUnit):
7713 """Migrate all instances from a node.
7716 HPATH = "node-migrate"
7717 HTYPE = constants.HTYPE_NODE
7720 def CheckArguments(self):
7723 def ExpandNames(self):
7724 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7726 self.share_locks = _ShareAll()
7727 self.needed_locks = {
7728 locking.LEVEL_NODE: [self.op.node_name],
7731 def BuildHooksEnv(self):
7734 This runs on the master, the primary and all the secondaries.
7738 "NODE_NAME": self.op.node_name,
7739 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7742 def BuildHooksNodes(self):
7743 """Build hooks nodes.
7746 nl = [self.cfg.GetMasterNode()]
7749 def CheckPrereq(self):
7752 def Exec(self, feedback_fn):
7753 # Prepare jobs for migration instances
7754 allow_runtime_changes = self.op.allow_runtime_changes
7756 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7759 iallocator=self.op.iallocator,
7760 target_node=self.op.target_node,
7761 allow_runtime_changes=allow_runtime_changes,
7762 ignore_ipolicy=self.op.ignore_ipolicy)]
7763 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7766 # TODO: Run iallocator in this opcode and pass correct placement options to
7767 # OpInstanceMigrate. Since other jobs can modify the cluster between
7768 # running the iallocator and the actual migration, a good consistency model
7769 # will have to be found.
7771 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7772 frozenset([self.op.node_name]))
7774 return ResultWithJobs(jobs)
7777 class TLMigrateInstance(Tasklet):
7778 """Tasklet class for instance migration.
7781 @ivar live: whether the migration will be done live or non-live;
7782 this variable is initalized only after CheckPrereq has run
7783 @type cleanup: boolean
7784 @ivar cleanup: Wheater we cleanup from a failed migration
7785 @type iallocator: string
7786 @ivar iallocator: The iallocator used to determine target_node
7787 @type target_node: string
7788 @ivar target_node: If given, the target_node to reallocate the instance to
7789 @type failover: boolean
7790 @ivar failover: Whether operation results in failover or migration
7791 @type fallback: boolean
7792 @ivar fallback: Whether fallback to failover is allowed if migration not
7794 @type ignore_consistency: boolean
7795 @ivar ignore_consistency: Wheter we should ignore consistency between source
7797 @type shutdown_timeout: int
7798 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7799 @type ignore_ipolicy: bool
7800 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7805 _MIGRATION_POLL_INTERVAL = 1 # seconds
7806 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7808 def __init__(self, lu, instance_name, cleanup=False,
7809 failover=False, fallback=False,
7810 ignore_consistency=False,
7811 allow_runtime_changes=True,
7812 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7813 ignore_ipolicy=False):
7814 """Initializes this class.
7817 Tasklet.__init__(self, lu)
7820 self.instance_name = instance_name
7821 self.cleanup = cleanup
7822 self.live = False # will be overridden later
7823 self.failover = failover
7824 self.fallback = fallback
7825 self.ignore_consistency = ignore_consistency
7826 self.shutdown_timeout = shutdown_timeout
7827 self.ignore_ipolicy = ignore_ipolicy
7828 self.allow_runtime_changes = allow_runtime_changes
7830 def CheckPrereq(self):
7831 """Check prerequisites.
7833 This checks that the instance is in the cluster.
7836 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7837 instance = self.cfg.GetInstanceInfo(instance_name)
7838 assert instance is not None
7839 self.instance = instance
7840 cluster = self.cfg.GetClusterInfo()
7842 if (not self.cleanup and
7843 not instance.admin_state == constants.ADMINST_UP and
7844 not self.failover and self.fallback):
7845 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7846 " switching to failover")
7847 self.failover = True
7849 if instance.disk_template not in constants.DTS_MIRRORED:
7854 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7855 " %s" % (instance.disk_template, text),
7858 if instance.disk_template in constants.DTS_EXT_MIRROR:
7859 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7861 if self.lu.op.iallocator:
7862 self._RunAllocator()
7864 # We set set self.target_node as it is required by
7866 self.target_node = self.lu.op.target_node
7868 # Check that the target node is correct in terms of instance policy
7869 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
7870 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7871 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7872 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7873 ignore=self.ignore_ipolicy)
7875 # self.target_node is already populated, either directly or by the
7877 target_node = self.target_node
7878 if self.target_node == instance.primary_node:
7879 raise errors.OpPrereqError("Cannot migrate instance %s"
7880 " to its primary (%s)" %
7881 (instance.name, instance.primary_node))
7883 if len(self.lu.tasklets) == 1:
7884 # It is safe to release locks only when we're the only tasklet
7886 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7887 keep=[instance.primary_node, self.target_node])
7890 secondary_nodes = instance.secondary_nodes
7891 if not secondary_nodes:
7892 raise errors.ConfigurationError("No secondary node but using"
7893 " %s disk template" %
7894 instance.disk_template)
7895 target_node = secondary_nodes[0]
7896 if self.lu.op.iallocator or (self.lu.op.target_node and
7897 self.lu.op.target_node != target_node):
7899 text = "failed over"
7902 raise errors.OpPrereqError("Instances with disk template %s cannot"
7903 " be %s to arbitrary nodes"
7904 " (neither an iallocator nor a target"
7905 " node can be passed)" %
7906 (instance.disk_template, text),
7908 nodeinfo = self.cfg.GetNodeInfo(target_node)
7909 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7910 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7911 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7912 ignore=self.ignore_ipolicy)
7914 i_be = cluster.FillBE(instance)
7916 # check memory requirements on the secondary node
7917 if (not self.cleanup and
7918 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
7919 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
7920 "migrating instance %s" %
7922 i_be[constants.BE_MINMEM],
7923 instance.hypervisor)
7925 self.lu.LogInfo("Not checking memory on the secondary node as"
7926 " instance will not be started")
7928 # check if failover must be forced instead of migration
7929 if (not self.cleanup and not self.failover and
7930 i_be[constants.BE_ALWAYS_FAILOVER]):
7932 self.lu.LogInfo("Instance configured to always failover; fallback"
7934 self.failover = True
7936 raise errors.OpPrereqError("This instance has been configured to"
7937 " always failover, please allow failover",
7940 # check bridge existance
7941 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7943 if not self.cleanup:
7944 _CheckNodeNotDrained(self.lu, target_node)
7945 if not self.failover:
7946 result = self.rpc.call_instance_migratable(instance.primary_node,
7948 if result.fail_msg and self.fallback:
7949 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7951 self.failover = True
7953 result.Raise("Can't migrate, please use failover",
7954 prereq=True, ecode=errors.ECODE_STATE)
7956 assert not (self.failover and self.cleanup)
7958 if not self.failover:
7959 if self.lu.op.live is not None and self.lu.op.mode is not None:
7960 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7961 " parameters are accepted",
7963 if self.lu.op.live is not None:
7965 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7967 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7968 # reset the 'live' parameter to None so that repeated
7969 # invocations of CheckPrereq do not raise an exception
7970 self.lu.op.live = None
7971 elif self.lu.op.mode is None:
7972 # read the default value from the hypervisor
7973 i_hv = cluster.FillHV(self.instance, skip_globals=False)
7974 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7976 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7978 # Failover is never live
7981 if not (self.failover or self.cleanup):
7982 remote_info = self.rpc.call_instance_info(instance.primary_node,
7984 instance.hypervisor)
7985 remote_info.Raise("Error checking instance on node %s" %
7986 instance.primary_node)
7987 instance_running = bool(remote_info.payload)
7988 if instance_running:
7989 self.current_mem = int(remote_info.payload["memory"])
7991 def _RunAllocator(self):
7992 """Run the allocator based on input opcode.
7995 # FIXME: add a self.ignore_ipolicy option
7996 ial = IAllocator(self.cfg, self.rpc,
7997 mode=constants.IALLOCATOR_MODE_RELOC,
7998 name=self.instance_name,
7999 # TODO See why hail breaks with a single node below
8000 relocate_from=[self.instance.primary_node,
8001 self.instance.primary_node],
8004 ial.Run(self.lu.op.iallocator)
8007 raise errors.OpPrereqError("Can't compute nodes using"
8008 " iallocator '%s': %s" %
8009 (self.lu.op.iallocator, ial.info),
8011 if len(ial.result) != ial.required_nodes:
8012 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8013 " of nodes (%s), required %s" %
8014 (self.lu.op.iallocator, len(ial.result),
8015 ial.required_nodes), errors.ECODE_FAULT)
8016 self.target_node = ial.result[0]
8017 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8018 self.instance_name, self.lu.op.iallocator,
8019 utils.CommaJoin(ial.result))
8021 def _WaitUntilSync(self):
8022 """Poll with custom rpc for disk sync.
8024 This uses our own step-based rpc call.
8027 self.feedback_fn("* wait until resync is done")
8031 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8033 self.instance.disks)
8035 for node, nres in result.items():
8036 nres.Raise("Cannot resync disks on node %s" % node)
8037 node_done, node_percent = nres.payload
8038 all_done = all_done and node_done
8039 if node_percent is not None:
8040 min_percent = min(min_percent, node_percent)
8042 if min_percent < 100:
8043 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8046 def _EnsureSecondary(self, node):
8047 """Demote a node to secondary.
8050 self.feedback_fn("* switching node %s to secondary mode" % node)
8052 for dev in self.instance.disks:
8053 self.cfg.SetDiskID(dev, node)
8055 result = self.rpc.call_blockdev_close(node, self.instance.name,
8056 self.instance.disks)
8057 result.Raise("Cannot change disk to secondary on node %s" % node)
8059 def _GoStandalone(self):
8060 """Disconnect from the network.
8063 self.feedback_fn("* changing into standalone mode")
8064 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8065 self.instance.disks)
8066 for node, nres in result.items():
8067 nres.Raise("Cannot disconnect disks node %s" % node)
8069 def _GoReconnect(self, multimaster):
8070 """Reconnect to the network.
8076 msg = "single-master"
8077 self.feedback_fn("* changing disks into %s mode" % msg)
8078 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8079 self.instance.disks,
8080 self.instance.name, multimaster)
8081 for node, nres in result.items():
8082 nres.Raise("Cannot change disks config on node %s" % node)
8084 def _ExecCleanup(self):
8085 """Try to cleanup after a failed migration.
8087 The cleanup is done by:
8088 - check that the instance is running only on one node
8089 (and update the config if needed)
8090 - change disks on its secondary node to secondary
8091 - wait until disks are fully synchronized
8092 - disconnect from the network
8093 - change disks into single-master mode
8094 - wait again until disks are fully synchronized
8097 instance = self.instance
8098 target_node = self.target_node
8099 source_node = self.source_node
8101 # check running on only one node
8102 self.feedback_fn("* checking where the instance actually runs"
8103 " (if this hangs, the hypervisor might be in"
8105 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8106 for node, result in ins_l.items():
8107 result.Raise("Can't contact node %s" % node)
8109 runningon_source = instance.name in ins_l[source_node].payload
8110 runningon_target = instance.name in ins_l[target_node].payload
8112 if runningon_source and runningon_target:
8113 raise errors.OpExecError("Instance seems to be running on two nodes,"
8114 " or the hypervisor is confused; you will have"
8115 " to ensure manually that it runs only on one"
8116 " and restart this operation")
8118 if not (runningon_source or runningon_target):
8119 raise errors.OpExecError("Instance does not seem to be running at all;"
8120 " in this case it's safer to repair by"
8121 " running 'gnt-instance stop' to ensure disk"
8122 " shutdown, and then restarting it")
8124 if runningon_target:
8125 # the migration has actually succeeded, we need to update the config
8126 self.feedback_fn("* instance running on secondary node (%s),"
8127 " updating config" % target_node)
8128 instance.primary_node = target_node
8129 self.cfg.Update(instance, self.feedback_fn)
8130 demoted_node = source_node
8132 self.feedback_fn("* instance confirmed to be running on its"
8133 " primary node (%s)" % source_node)
8134 demoted_node = target_node
8136 if instance.disk_template in constants.DTS_INT_MIRROR:
8137 self._EnsureSecondary(demoted_node)
8139 self._WaitUntilSync()
8140 except errors.OpExecError:
8141 # we ignore here errors, since if the device is standalone, it
8142 # won't be able to sync
8144 self._GoStandalone()
8145 self._GoReconnect(False)
8146 self._WaitUntilSync()
8148 self.feedback_fn("* done")
8150 def _RevertDiskStatus(self):
8151 """Try to revert the disk status after a failed migration.
8154 target_node = self.target_node
8155 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8159 self._EnsureSecondary(target_node)
8160 self._GoStandalone()
8161 self._GoReconnect(False)
8162 self._WaitUntilSync()
8163 except errors.OpExecError, err:
8164 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8165 " please try to recover the instance manually;"
8166 " error '%s'" % str(err))
8168 def _AbortMigration(self):
8169 """Call the hypervisor code to abort a started migration.
8172 instance = self.instance
8173 target_node = self.target_node
8174 source_node = self.source_node
8175 migration_info = self.migration_info
8177 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8181 abort_msg = abort_result.fail_msg
8183 logging.error("Aborting migration failed on target node %s: %s",
8184 target_node, abort_msg)
8185 # Don't raise an exception here, as we stil have to try to revert the
8186 # disk status, even if this step failed.
8188 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8189 instance, False, self.live)
8190 abort_msg = abort_result.fail_msg
8192 logging.error("Aborting migration failed on source node %s: %s",
8193 source_node, abort_msg)
8195 def _ExecMigration(self):
8196 """Migrate an instance.
8198 The migrate is done by:
8199 - change the disks into dual-master mode
8200 - wait until disks are fully synchronized again
8201 - migrate the instance
8202 - change disks on the new secondary node (the old primary) to secondary
8203 - wait until disks are fully synchronized
8204 - change disks into single-master mode
8207 instance = self.instance
8208 target_node = self.target_node
8209 source_node = self.source_node
8211 # Check for hypervisor version mismatch and warn the user.
8212 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8213 None, [self.instance.hypervisor])
8214 for ninfo in nodeinfo.values():
8215 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8217 (_, _, (src_info, )) = nodeinfo[source_node].payload
8218 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8220 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8221 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8222 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8223 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8224 if src_version != dst_version:
8225 self.feedback_fn("* warning: hypervisor version mismatch between"
8226 " source (%s) and target (%s) node" %
8227 (src_version, dst_version))
8229 self.feedback_fn("* checking disk consistency between source and target")
8230 for dev in instance.disks:
8231 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8232 raise errors.OpExecError("Disk %s is degraded or not fully"
8233 " synchronized on target node,"
8234 " aborting migration" % dev.iv_name)
8236 if self.current_mem > self.tgt_free_mem:
8237 if not self.allow_runtime_changes:
8238 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8239 " free memory to fit instance %s on target"
8240 " node %s (have %dMB, need %dMB)" %
8241 (instance.name, target_node,
8242 self.tgt_free_mem, self.current_mem))
8243 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8244 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8247 rpcres.Raise("Cannot modify instance runtime memory")
8249 # First get the migration information from the remote node
8250 result = self.rpc.call_migration_info(source_node, instance)
8251 msg = result.fail_msg
8253 log_err = ("Failed fetching source migration information from %s: %s" %
8255 logging.error(log_err)
8256 raise errors.OpExecError(log_err)
8258 self.migration_info = migration_info = result.payload
8260 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8261 # Then switch the disks to master/master mode
8262 self._EnsureSecondary(target_node)
8263 self._GoStandalone()
8264 self._GoReconnect(True)
8265 self._WaitUntilSync()
8267 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8268 result = self.rpc.call_accept_instance(target_node,
8271 self.nodes_ip[target_node])
8273 msg = result.fail_msg
8275 logging.error("Instance pre-migration failed, trying to revert"
8276 " disk status: %s", msg)
8277 self.feedback_fn("Pre-migration failed, aborting")
8278 self._AbortMigration()
8279 self._RevertDiskStatus()
8280 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8281 (instance.name, msg))
8283 self.feedback_fn("* migrating instance to %s" % target_node)
8284 result = self.rpc.call_instance_migrate(source_node, instance,
8285 self.nodes_ip[target_node],
8287 msg = result.fail_msg
8289 logging.error("Instance migration failed, trying to revert"
8290 " disk status: %s", msg)
8291 self.feedback_fn("Migration failed, aborting")
8292 self._AbortMigration()
8293 self._RevertDiskStatus()
8294 raise errors.OpExecError("Could not migrate instance %s: %s" %
8295 (instance.name, msg))
8297 self.feedback_fn("* starting memory transfer")
8298 last_feedback = time.time()
8300 result = self.rpc.call_instance_get_migration_status(source_node,
8302 msg = result.fail_msg
8303 ms = result.payload # MigrationStatus instance
8304 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8305 logging.error("Instance migration failed, trying to revert"
8306 " disk status: %s", msg)
8307 self.feedback_fn("Migration failed, aborting")
8308 self._AbortMigration()
8309 self._RevertDiskStatus()
8310 raise errors.OpExecError("Could not migrate instance %s: %s" %
8311 (instance.name, msg))
8313 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8314 self.feedback_fn("* memory transfer complete")
8317 if (utils.TimeoutExpired(last_feedback,
8318 self._MIGRATION_FEEDBACK_INTERVAL) and
8319 ms.transferred_ram is not None):
8320 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8321 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8322 last_feedback = time.time()
8324 time.sleep(self._MIGRATION_POLL_INTERVAL)
8326 result = self.rpc.call_instance_finalize_migration_src(source_node,
8330 msg = result.fail_msg
8332 logging.error("Instance migration succeeded, but finalization failed"
8333 " on the source node: %s", msg)
8334 raise errors.OpExecError("Could not finalize instance migration: %s" %
8337 instance.primary_node = target_node
8339 # distribute new instance config to the other nodes
8340 self.cfg.Update(instance, self.feedback_fn)
8342 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8346 msg = result.fail_msg
8348 logging.error("Instance migration succeeded, but finalization failed"
8349 " on the target node: %s", msg)
8350 raise errors.OpExecError("Could not finalize instance migration: %s" %
8353 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8354 self._EnsureSecondary(source_node)
8355 self._WaitUntilSync()
8356 self._GoStandalone()
8357 self._GoReconnect(False)
8358 self._WaitUntilSync()
8360 # If the instance's disk template is `rbd' and there was a successful
8361 # migration, unmap the device from the source node.
8362 if self.instance.disk_template == constants.DT_RBD:
8363 disks = _ExpandCheckDisks(instance, instance.disks)
8364 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8366 result = self.rpc.call_blockdev_shutdown(source_node, disk)
8367 msg = result.fail_msg
8369 logging.error("Migration was successful, but couldn't unmap the"
8370 " block device %s on source node %s: %s",
8371 disk.iv_name, source_node, msg)
8372 logging.error("You need to unmap the device %s manually on %s",
8373 disk.iv_name, source_node)
8375 self.feedback_fn("* done")
8377 def _ExecFailover(self):
8378 """Failover an instance.
8380 The failover is done by shutting it down on its present node and
8381 starting it on the secondary.
8384 instance = self.instance
8385 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8387 source_node = instance.primary_node
8388 target_node = self.target_node
8390 if instance.admin_state == constants.ADMINST_UP:
8391 self.feedback_fn("* checking disk consistency between source and target")
8392 for dev in instance.disks:
8393 # for drbd, these are drbd over lvm
8394 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8395 if primary_node.offline:
8396 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8398 (primary_node.name, dev.iv_name, target_node))
8399 elif not self.ignore_consistency:
8400 raise errors.OpExecError("Disk %s is degraded on target node,"
8401 " aborting failover" % dev.iv_name)
8403 self.feedback_fn("* not checking disk consistency as instance is not"
8406 self.feedback_fn("* shutting down instance on source node")
8407 logging.info("Shutting down instance %s on node %s",
8408 instance.name, source_node)
8410 result = self.rpc.call_instance_shutdown(source_node, instance,
8411 self.shutdown_timeout)
8412 msg = result.fail_msg
8414 if self.ignore_consistency or primary_node.offline:
8415 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8416 " proceeding anyway; please make sure node"
8417 " %s is down; error details: %s",
8418 instance.name, source_node, source_node, msg)
8420 raise errors.OpExecError("Could not shutdown instance %s on"
8422 (instance.name, source_node, msg))
8424 self.feedback_fn("* deactivating the instance's disks on source node")
8425 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8426 raise errors.OpExecError("Can't shut down the instance's disks")
8428 instance.primary_node = target_node
8429 # distribute new instance config to the other nodes
8430 self.cfg.Update(instance, self.feedback_fn)
8432 # Only start the instance if it's marked as up
8433 if instance.admin_state == constants.ADMINST_UP:
8434 self.feedback_fn("* activating the instance's disks on target node %s" %
8436 logging.info("Starting instance %s on node %s",
8437 instance.name, target_node)
8439 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8440 ignore_secondaries=True)
8442 _ShutdownInstanceDisks(self.lu, instance)
8443 raise errors.OpExecError("Can't activate the instance's disks")
8445 self.feedback_fn("* starting the instance on the target node %s" %
8447 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8449 msg = result.fail_msg
8451 _ShutdownInstanceDisks(self.lu, instance)
8452 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8453 (instance.name, target_node, msg))
8455 def Exec(self, feedback_fn):
8456 """Perform the migration.
8459 self.feedback_fn = feedback_fn
8460 self.source_node = self.instance.primary_node
8462 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8463 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8464 self.target_node = self.instance.secondary_nodes[0]
8465 # Otherwise self.target_node has been populated either
8466 # directly, or through an iallocator.
8468 self.all_nodes = [self.source_node, self.target_node]
8469 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8470 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8473 feedback_fn("Failover instance %s" % self.instance.name)
8474 self._ExecFailover()
8476 feedback_fn("Migrating instance %s" % self.instance.name)
8479 return self._ExecCleanup()
8481 return self._ExecMigration()
8484 def _CreateBlockDev(lu, node, instance, device, force_create,
8486 """Create a tree of block devices on a given node.
8488 If this device type has to be created on secondaries, create it and
8491 If not, just recurse to children keeping the same 'force' value.
8493 @param lu: the lu on whose behalf we execute
8494 @param node: the node on which to create the device
8495 @type instance: L{objects.Instance}
8496 @param instance: the instance which owns the device
8497 @type device: L{objects.Disk}
8498 @param device: the device to create
8499 @type force_create: boolean
8500 @param force_create: whether to force creation of this device; this
8501 will be change to True whenever we find a device which has
8502 CreateOnSecondary() attribute
8503 @param info: the extra 'metadata' we should attach to the device
8504 (this will be represented as a LVM tag)
8505 @type force_open: boolean
8506 @param force_open: this parameter will be passes to the
8507 L{backend.BlockdevCreate} function where it specifies
8508 whether we run on primary or not, and it affects both
8509 the child assembly and the device own Open() execution
8512 if device.CreateOnSecondary():
8516 for child in device.children:
8517 _CreateBlockDev(lu, node, instance, child, force_create,
8520 if not force_create:
8523 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8526 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8527 """Create a single block device on a given node.
8529 This will not recurse over children of the device, so they must be
8532 @param lu: the lu on whose behalf we execute
8533 @param node: the node on which to create the device
8534 @type instance: L{objects.Instance}
8535 @param instance: the instance which owns the device
8536 @type device: L{objects.Disk}
8537 @param device: the device to create
8538 @param info: the extra 'metadata' we should attach to the device
8539 (this will be represented as a LVM tag)
8540 @type force_open: boolean
8541 @param force_open: this parameter will be passes to the
8542 L{backend.BlockdevCreate} function where it specifies
8543 whether we run on primary or not, and it affects both
8544 the child assembly and the device own Open() execution
8547 lu.cfg.SetDiskID(device, node)
8548 result = lu.rpc.call_blockdev_create(node, device, device.size,
8549 instance.name, force_open, info)
8550 result.Raise("Can't create block device %s on"
8551 " node %s for instance %s" % (device, node, instance.name))
8552 if device.physical_id is None:
8553 device.physical_id = result.payload
8556 def _GenerateUniqueNames(lu, exts):
8557 """Generate a suitable LV name.
8559 This will generate a logical volume name for the given instance.
8564 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8565 results.append("%s%s" % (new_id, val))
8569 def _ComputeLDParams(disk_template, disk_params):
8570 """Computes Logical Disk parameters from Disk Template parameters.
8572 @type disk_template: string
8573 @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8574 @type disk_params: dict
8575 @param disk_params: disk template parameters; dict(template_name -> parameters
8577 @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8578 contains the LD parameters of the node. The tree is flattened in-order.
8581 if disk_template not in constants.DISK_TEMPLATES:
8582 raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8585 dt_params = disk_params[disk_template]
8586 if disk_template == constants.DT_DRBD8:
8588 constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8589 constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8590 constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8591 constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8592 constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8593 constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8594 constants.LDP_DYNAMIC_RESYNC: dt_params[constants.DRBD_DYNAMIC_RESYNC],
8595 constants.LDP_PLAN_AHEAD: dt_params[constants.DRBD_PLAN_AHEAD],
8596 constants.LDP_FILL_TARGET: dt_params[constants.DRBD_FILL_TARGET],
8597 constants.LDP_DELAY_TARGET: dt_params[constants.DRBD_DELAY_TARGET],
8598 constants.LDP_MAX_RATE: dt_params[constants.DRBD_MAX_RATE],
8599 constants.LDP_MIN_RATE: dt_params[constants.DRBD_MIN_RATE],
8603 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8606 result.append(drbd_params)
8610 constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8613 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8615 result.append(data_params)
8619 constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8622 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8624 result.append(meta_params)
8626 elif (disk_template == constants.DT_FILE or
8627 disk_template == constants.DT_SHARED_FILE):
8628 result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8630 elif disk_template == constants.DT_PLAIN:
8632 constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8635 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8637 result.append(params)
8639 elif disk_template == constants.DT_BLOCK:
8640 result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8642 elif disk_template == constants.DT_RBD:
8644 constants.LDP_POOL: dt_params[constants.RBD_POOL]
8647 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_RBD],
8649 result.append(params)
8654 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8655 iv_name, p_minor, s_minor, drbd_params, data_params,
8657 """Generate a drbd8 device complete with its children.
8660 assert len(vgnames) == len(names) == 2
8661 port = lu.cfg.AllocatePort()
8662 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8664 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8665 logical_id=(vgnames[0], names[0]),
8667 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8668 logical_id=(vgnames[1], names[1]),
8670 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8671 logical_id=(primary, secondary, port,
8674 children=[dev_data, dev_meta],
8675 iv_name=iv_name, params=drbd_params)
8679 def _GenerateDiskTemplate(lu, template_name,
8680 instance_name, primary_node,
8681 secondary_nodes, disk_info,
8682 file_storage_dir, file_driver,
8683 base_index, feedback_fn, disk_params):
8684 """Generate the entire disk layout for a given template type.
8687 #TODO: compute space requirements
8689 vgname = lu.cfg.GetVGName()
8690 disk_count = len(disk_info)
8692 ld_params = _ComputeLDParams(template_name, disk_params)
8693 if template_name == constants.DT_DISKLESS:
8695 elif template_name == constants.DT_PLAIN:
8697 raise errors.ProgrammerError("Wrong template configuration")
8699 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8700 for i in range(disk_count)])
8701 for idx, disk in enumerate(disk_info):
8702 disk_index = idx + base_index
8703 vg = disk.get(constants.IDISK_VG, vgname)
8704 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8705 disk_dev = objects.Disk(dev_type=constants.LD_LV,
8706 size=disk[constants.IDISK_SIZE],
8707 logical_id=(vg, names[idx]),
8708 iv_name="disk/%d" % disk_index,
8709 mode=disk[constants.IDISK_MODE],
8710 params=ld_params[0])
8711 disks.append(disk_dev)
8712 elif template_name == constants.DT_DRBD8:
8713 drbd_params, data_params, meta_params = ld_params
8714 if len(secondary_nodes) != 1:
8715 raise errors.ProgrammerError("Wrong template configuration")
8716 remote_node = secondary_nodes[0]
8717 minors = lu.cfg.AllocateDRBDMinor(
8718 [primary_node, remote_node] * len(disk_info), instance_name)
8721 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8722 for i in range(disk_count)]):
8723 names.append(lv_prefix + "_data")
8724 names.append(lv_prefix + "_meta")
8725 for idx, disk in enumerate(disk_info):
8726 disk_index = idx + base_index
8727 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8728 data_vg = disk.get(constants.IDISK_VG, vgname)
8729 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8730 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8731 disk[constants.IDISK_SIZE],
8733 names[idx * 2:idx * 2 + 2],
8734 "disk/%d" % disk_index,
8735 minors[idx * 2], minors[idx * 2 + 1],
8736 drbd_params, data_params, meta_params)
8737 disk_dev.mode = disk[constants.IDISK_MODE]
8738 disks.append(disk_dev)
8739 elif template_name == constants.DT_FILE:
8741 raise errors.ProgrammerError("Wrong template configuration")
8743 opcodes.RequireFileStorage()
8745 for idx, disk in enumerate(disk_info):
8746 disk_index = idx + base_index
8747 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8748 size=disk[constants.IDISK_SIZE],
8749 iv_name="disk/%d" % disk_index,
8750 logical_id=(file_driver,
8751 "%s/disk%d" % (file_storage_dir,
8753 mode=disk[constants.IDISK_MODE],
8754 params=ld_params[0])
8755 disks.append(disk_dev)
8756 elif template_name == constants.DT_SHARED_FILE:
8758 raise errors.ProgrammerError("Wrong template configuration")
8760 opcodes.RequireSharedFileStorage()
8762 for idx, disk in enumerate(disk_info):
8763 disk_index = idx + base_index
8764 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8765 size=disk[constants.IDISK_SIZE],
8766 iv_name="disk/%d" % disk_index,
8767 logical_id=(file_driver,
8768 "%s/disk%d" % (file_storage_dir,
8770 mode=disk[constants.IDISK_MODE],
8771 params=ld_params[0])
8772 disks.append(disk_dev)
8773 elif template_name == constants.DT_BLOCK:
8775 raise errors.ProgrammerError("Wrong template configuration")
8777 for idx, disk in enumerate(disk_info):
8778 disk_index = idx + base_index
8779 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8780 size=disk[constants.IDISK_SIZE],
8781 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8782 disk[constants.IDISK_ADOPT]),
8783 iv_name="disk/%d" % disk_index,
8784 mode=disk[constants.IDISK_MODE],
8785 params=ld_params[0])
8786 disks.append(disk_dev)
8787 elif template_name == constants.DT_RBD:
8789 raise errors.ProgrammerError("Wrong template configuration")
8791 names = _GenerateUniqueNames(lu, [".rbd.disk%d" % (base_index + i)
8792 for i in range(disk_count)])
8794 for idx, disk in enumerate(disk_info):
8795 disk_index = idx + base_index
8796 disk_dev = objects.Disk(dev_type=constants.LD_RBD,
8797 size=disk[constants.IDISK_SIZE],
8798 logical_id=("rbd", names[idx]),
8799 iv_name="disk/%d" % disk_index,
8800 mode=disk[constants.IDISK_MODE],
8801 params=ld_params[0])
8802 disks.append(disk_dev)
8805 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8809 def _GetInstanceInfoText(instance):
8810 """Compute that text that should be added to the disk's metadata.
8813 return "originstname+%s" % instance.name
8816 def _CalcEta(time_taken, written, total_size):
8817 """Calculates the ETA based on size written and total size.
8819 @param time_taken: The time taken so far
8820 @param written: amount written so far
8821 @param total_size: The total size of data to be written
8822 @return: The remaining time in seconds
8825 avg_time = time_taken / float(written)
8826 return (total_size - written) * avg_time
8829 def _WipeDisks(lu, instance):
8830 """Wipes instance disks.
8832 @type lu: L{LogicalUnit}
8833 @param lu: the logical unit on whose behalf we execute
8834 @type instance: L{objects.Instance}
8835 @param instance: the instance whose disks we should create
8836 @return: the success of the wipe
8839 node = instance.primary_node
8841 for device in instance.disks:
8842 lu.cfg.SetDiskID(device, node)
8844 logging.info("Pause sync of instance %s disks", instance.name)
8845 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8847 for idx, success in enumerate(result.payload):
8849 logging.warn("pause-sync of instance %s for disks %d failed",
8853 for idx, device in enumerate(instance.disks):
8854 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8855 # MAX_WIPE_CHUNK at max
8856 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8857 constants.MIN_WIPE_CHUNK_PERCENT)
8858 # we _must_ make this an int, otherwise rounding errors will
8860 wipe_chunk_size = int(wipe_chunk_size)
8862 lu.LogInfo("* Wiping disk %d", idx)
8863 logging.info("Wiping disk %d for instance %s, node %s using"
8864 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8869 start_time = time.time()
8871 while offset < size:
8872 wipe_size = min(wipe_chunk_size, size - offset)
8873 logging.debug("Wiping disk %d, offset %s, chunk %s",
8874 idx, offset, wipe_size)
8875 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8876 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8877 (idx, offset, wipe_size))
8880 if now - last_output >= 60:
8881 eta = _CalcEta(now - start_time, offset, size)
8882 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8883 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8886 logging.info("Resume sync of instance %s disks", instance.name)
8888 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8890 for idx, success in enumerate(result.payload):
8892 lu.LogWarning("Resume sync of disk %d failed, please have a"
8893 " look at the status and troubleshoot the issue", idx)
8894 logging.warn("resume-sync of instance %s for disks %d failed",
8898 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8899 """Create all disks for an instance.
8901 This abstracts away some work from AddInstance.
8903 @type lu: L{LogicalUnit}
8904 @param lu: the logical unit on whose behalf we execute
8905 @type instance: L{objects.Instance}
8906 @param instance: the instance whose disks we should create
8908 @param to_skip: list of indices to skip
8909 @type target_node: string
8910 @param target_node: if passed, overrides the target node for creation
8912 @return: the success of the creation
8915 info = _GetInstanceInfoText(instance)
8916 if target_node is None:
8917 pnode = instance.primary_node
8918 all_nodes = instance.all_nodes
8923 if instance.disk_template in constants.DTS_FILEBASED:
8924 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8925 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8927 result.Raise("Failed to create directory '%s' on"
8928 " node %s" % (file_storage_dir, pnode))
8930 # Note: this needs to be kept in sync with adding of disks in
8931 # LUInstanceSetParams
8932 for idx, device in enumerate(instance.disks):
8933 if to_skip and idx in to_skip:
8935 logging.info("Creating volume %s for instance %s",
8936 device.iv_name, instance.name)
8938 for node in all_nodes:
8939 f_create = node == pnode
8940 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8943 def _RemoveDisks(lu, instance, target_node=None):
8944 """Remove all disks for an instance.
8946 This abstracts away some work from `AddInstance()` and
8947 `RemoveInstance()`. Note that in case some of the devices couldn't
8948 be removed, the removal will continue with the other ones (compare
8949 with `_CreateDisks()`).
8951 @type lu: L{LogicalUnit}
8952 @param lu: the logical unit on whose behalf we execute
8953 @type instance: L{objects.Instance}
8954 @param instance: the instance whose disks we should remove
8955 @type target_node: string
8956 @param target_node: used to override the node on which to remove the disks
8958 @return: the success of the removal
8961 logging.info("Removing block devices for instance %s", instance.name)
8964 for device in instance.disks:
8966 edata = [(target_node, device)]
8968 edata = device.ComputeNodeTree(instance.primary_node)
8969 for node, disk in edata:
8970 lu.cfg.SetDiskID(disk, node)
8971 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8973 lu.LogWarning("Could not remove block device %s on node %s,"
8974 " continuing anyway: %s", device.iv_name, node, msg)
8977 # if this is a DRBD disk, return its port to the pool
8978 if device.dev_type in constants.LDS_DRBD:
8979 tcp_port = device.logical_id[2]
8980 lu.cfg.AddTcpUdpPort(tcp_port)
8982 if instance.disk_template == constants.DT_FILE:
8983 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8987 tgt = instance.primary_node
8988 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8990 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8991 file_storage_dir, instance.primary_node, result.fail_msg)
8997 def _ComputeDiskSizePerVG(disk_template, disks):
8998 """Compute disk size requirements in the volume group
9001 def _compute(disks, payload):
9002 """Universal algorithm.
9007 vgs[disk[constants.IDISK_VG]] = \
9008 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9012 # Required free disk space as a function of disk and swap space
9014 constants.DT_DISKLESS: {},
9015 constants.DT_PLAIN: _compute(disks, 0),
9016 # 128 MB are added for drbd metadata for each disk
9017 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9018 constants.DT_FILE: {},
9019 constants.DT_SHARED_FILE: {},
9022 if disk_template not in req_size_dict:
9023 raise errors.ProgrammerError("Disk template '%s' size requirement"
9024 " is unknown" % disk_template)
9026 return req_size_dict[disk_template]
9029 def _ComputeDiskSize(disk_template, disks):
9030 """Compute disk size requirements in the volume group
9033 # Required free disk space as a function of disk and swap space
9035 constants.DT_DISKLESS: None,
9036 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9037 # 128 MB are added for drbd metadata for each disk
9039 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9040 constants.DT_FILE: None,
9041 constants.DT_SHARED_FILE: 0,
9042 constants.DT_BLOCK: 0,
9043 constants.DT_RBD: 0,
9046 if disk_template not in req_size_dict:
9047 raise errors.ProgrammerError("Disk template '%s' size requirement"
9048 " is unknown" % disk_template)
9050 return req_size_dict[disk_template]
9053 def _FilterVmNodes(lu, nodenames):
9054 """Filters out non-vm_capable nodes from a list.
9056 @type lu: L{LogicalUnit}
9057 @param lu: the logical unit for which we check
9058 @type nodenames: list
9059 @param nodenames: the list of nodes on which we should check
9061 @return: the list of vm-capable nodes
9064 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9065 return [name for name in nodenames if name not in vm_nodes]
9068 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9069 """Hypervisor parameter validation.
9071 This function abstract the hypervisor parameter validation to be
9072 used in both instance create and instance modify.
9074 @type lu: L{LogicalUnit}
9075 @param lu: the logical unit for which we check
9076 @type nodenames: list
9077 @param nodenames: the list of nodes on which we should check
9078 @type hvname: string
9079 @param hvname: the name of the hypervisor we should use
9080 @type hvparams: dict
9081 @param hvparams: the parameters which we need to check
9082 @raise errors.OpPrereqError: if the parameters are not valid
9085 nodenames = _FilterVmNodes(lu, nodenames)
9087 cluster = lu.cfg.GetClusterInfo()
9088 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9090 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9091 for node in nodenames:
9095 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9098 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9099 """OS parameters validation.
9101 @type lu: L{LogicalUnit}
9102 @param lu: the logical unit for which we check
9103 @type required: boolean
9104 @param required: whether the validation should fail if the OS is not
9106 @type nodenames: list
9107 @param nodenames: the list of nodes on which we should check
9108 @type osname: string
9109 @param osname: the name of the hypervisor we should use
9110 @type osparams: dict
9111 @param osparams: the parameters which we need to check
9112 @raise errors.OpPrereqError: if the parameters are not valid
9115 nodenames = _FilterVmNodes(lu, nodenames)
9116 result = lu.rpc.call_os_validate(nodenames, required, osname,
9117 [constants.OS_VALIDATE_PARAMETERS],
9119 for node, nres in result.items():
9120 # we don't check for offline cases since this should be run only
9121 # against the master node and/or an instance's nodes
9122 nres.Raise("OS Parameters validation failed on node %s" % node)
9123 if not nres.payload:
9124 lu.LogInfo("OS %s not found on node %s, validation skipped",
9128 class LUInstanceCreate(LogicalUnit):
9129 """Create an instance.
9132 HPATH = "instance-add"
9133 HTYPE = constants.HTYPE_INSTANCE
9136 def CheckArguments(self):
9140 # do not require name_check to ease forward/backward compatibility
9142 if self.op.no_install and self.op.start:
9143 self.LogInfo("No-installation mode selected, disabling startup")
9144 self.op.start = False
9145 # validate/normalize the instance name
9146 self.op.instance_name = \
9147 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9149 if self.op.ip_check and not self.op.name_check:
9150 # TODO: make the ip check more flexible and not depend on the name check
9151 raise errors.OpPrereqError("Cannot do IP address check without a name"
9152 " check", errors.ECODE_INVAL)
9154 # check nics' parameter names
9155 for nic in self.op.nics:
9156 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9158 # check disks. parameter names and consistent adopt/no-adopt strategy
9159 has_adopt = has_no_adopt = False
9160 for disk in self.op.disks:
9161 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9162 if constants.IDISK_ADOPT in disk:
9166 if has_adopt and has_no_adopt:
9167 raise errors.OpPrereqError("Either all disks are adopted or none is",
9170 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9171 raise errors.OpPrereqError("Disk adoption is not supported for the"
9172 " '%s' disk template" %
9173 self.op.disk_template,
9175 if self.op.iallocator is not None:
9176 raise errors.OpPrereqError("Disk adoption not allowed with an"
9177 " iallocator script", errors.ECODE_INVAL)
9178 if self.op.mode == constants.INSTANCE_IMPORT:
9179 raise errors.OpPrereqError("Disk adoption not allowed for"
9180 " instance import", errors.ECODE_INVAL)
9182 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9183 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9184 " but no 'adopt' parameter given" %
9185 self.op.disk_template,
9188 self.adopt_disks = has_adopt
9190 # instance name verification
9191 if self.op.name_check:
9192 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9193 self.op.instance_name = self.hostname1.name
9194 # used in CheckPrereq for ip ping check
9195 self.check_ip = self.hostname1.ip
9197 self.check_ip = None
9199 # file storage checks
9200 if (self.op.file_driver and
9201 not self.op.file_driver in constants.FILE_DRIVER):
9202 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9203 self.op.file_driver, errors.ECODE_INVAL)
9205 if self.op.disk_template == constants.DT_FILE:
9206 opcodes.RequireFileStorage()
9207 elif self.op.disk_template == constants.DT_SHARED_FILE:
9208 opcodes.RequireSharedFileStorage()
9210 ### Node/iallocator related checks
9211 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9213 if self.op.pnode is not None:
9214 if self.op.disk_template in constants.DTS_INT_MIRROR:
9215 if self.op.snode is None:
9216 raise errors.OpPrereqError("The networked disk templates need"
9217 " a mirror node", errors.ECODE_INVAL)
9219 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9221 self.op.snode = None
9223 self._cds = _GetClusterDomainSecret()
9225 if self.op.mode == constants.INSTANCE_IMPORT:
9226 # On import force_variant must be True, because if we forced it at
9227 # initial install, our only chance when importing it back is that it
9229 self.op.force_variant = True
9231 if self.op.no_install:
9232 self.LogInfo("No-installation mode has no effect during import")
9234 elif self.op.mode == constants.INSTANCE_CREATE:
9235 if self.op.os_type is None:
9236 raise errors.OpPrereqError("No guest OS specified",
9238 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9239 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9240 " installation" % self.op.os_type,
9242 if self.op.disk_template is None:
9243 raise errors.OpPrereqError("No disk template specified",
9246 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9247 # Check handshake to ensure both clusters have the same domain secret
9248 src_handshake = self.op.source_handshake
9249 if not src_handshake:
9250 raise errors.OpPrereqError("Missing source handshake",
9253 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9256 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9259 # Load and check source CA
9260 self.source_x509_ca_pem = self.op.source_x509_ca
9261 if not self.source_x509_ca_pem:
9262 raise errors.OpPrereqError("Missing source X509 CA",
9266 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9268 except OpenSSL.crypto.Error, err:
9269 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9270 (err, ), errors.ECODE_INVAL)
9272 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9273 if errcode is not None:
9274 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9277 self.source_x509_ca = cert
9279 src_instance_name = self.op.source_instance_name
9280 if not src_instance_name:
9281 raise errors.OpPrereqError("Missing source instance name",
9284 self.source_instance_name = \
9285 netutils.GetHostname(name=src_instance_name).name
9288 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9289 self.op.mode, errors.ECODE_INVAL)
9291 def ExpandNames(self):
9292 """ExpandNames for CreateInstance.
9294 Figure out the right locks for instance creation.
9297 self.needed_locks = {}
9299 instance_name = self.op.instance_name
9300 # this is just a preventive check, but someone might still add this
9301 # instance in the meantime, and creation will fail at lock-add time
9302 if instance_name in self.cfg.GetInstanceList():
9303 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9304 instance_name, errors.ECODE_EXISTS)
9306 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9308 if self.op.iallocator:
9309 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9310 # specifying a group on instance creation and then selecting nodes from
9312 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9313 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9315 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9316 nodelist = [self.op.pnode]
9317 if self.op.snode is not None:
9318 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9319 nodelist.append(self.op.snode)
9320 self.needed_locks[locking.LEVEL_NODE] = nodelist
9321 # Lock resources of instance's primary and secondary nodes (copy to
9322 # prevent accidential modification)
9323 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9325 # in case of import lock the source node too
9326 if self.op.mode == constants.INSTANCE_IMPORT:
9327 src_node = self.op.src_node
9328 src_path = self.op.src_path
9330 if src_path is None:
9331 self.op.src_path = src_path = self.op.instance_name
9333 if src_node is None:
9334 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9335 self.op.src_node = None
9336 if os.path.isabs(src_path):
9337 raise errors.OpPrereqError("Importing an instance from a path"
9338 " requires a source node option",
9341 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9342 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9343 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9344 if not os.path.isabs(src_path):
9345 self.op.src_path = src_path = \
9346 utils.PathJoin(constants.EXPORT_DIR, src_path)
9348 def _RunAllocator(self):
9349 """Run the allocator based on input opcode.
9352 nics = [n.ToDict() for n in self.nics]
9353 ial = IAllocator(self.cfg, self.rpc,
9354 mode=constants.IALLOCATOR_MODE_ALLOC,
9355 name=self.op.instance_name,
9356 disk_template=self.op.disk_template,
9359 vcpus=self.be_full[constants.BE_VCPUS],
9360 memory=self.be_full[constants.BE_MAXMEM],
9363 hypervisor=self.op.hypervisor,
9366 ial.Run(self.op.iallocator)
9369 raise errors.OpPrereqError("Can't compute nodes using"
9370 " iallocator '%s': %s" %
9371 (self.op.iallocator, ial.info),
9373 if len(ial.result) != ial.required_nodes:
9374 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9375 " of nodes (%s), required %s" %
9376 (self.op.iallocator, len(ial.result),
9377 ial.required_nodes), errors.ECODE_FAULT)
9378 self.op.pnode = ial.result[0]
9379 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9380 self.op.instance_name, self.op.iallocator,
9381 utils.CommaJoin(ial.result))
9382 if ial.required_nodes == 2:
9383 self.op.snode = ial.result[1]
9385 def BuildHooksEnv(self):
9388 This runs on master, primary and secondary nodes of the instance.
9392 "ADD_MODE": self.op.mode,
9394 if self.op.mode == constants.INSTANCE_IMPORT:
9395 env["SRC_NODE"] = self.op.src_node
9396 env["SRC_PATH"] = self.op.src_path
9397 env["SRC_IMAGES"] = self.src_images
9399 env.update(_BuildInstanceHookEnv(
9400 name=self.op.instance_name,
9401 primary_node=self.op.pnode,
9402 secondary_nodes=self.secondaries,
9403 status=self.op.start,
9404 os_type=self.op.os_type,
9405 minmem=self.be_full[constants.BE_MINMEM],
9406 maxmem=self.be_full[constants.BE_MAXMEM],
9407 vcpus=self.be_full[constants.BE_VCPUS],
9408 nics=_NICListToTuple(self, self.nics),
9409 disk_template=self.op.disk_template,
9410 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9411 for d in self.disks],
9414 hypervisor_name=self.op.hypervisor,
9420 def BuildHooksNodes(self):
9421 """Build hooks nodes.
9424 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9427 def _ReadExportInfo(self):
9428 """Reads the export information from disk.
9430 It will override the opcode source node and path with the actual
9431 information, if these two were not specified before.
9433 @return: the export information
9436 assert self.op.mode == constants.INSTANCE_IMPORT
9438 src_node = self.op.src_node
9439 src_path = self.op.src_path
9441 if src_node is None:
9442 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9443 exp_list = self.rpc.call_export_list(locked_nodes)
9445 for node in exp_list:
9446 if exp_list[node].fail_msg:
9448 if src_path in exp_list[node].payload:
9450 self.op.src_node = src_node = node
9451 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9455 raise errors.OpPrereqError("No export found for relative path %s" %
9456 src_path, errors.ECODE_INVAL)
9458 _CheckNodeOnline(self, src_node)
9459 result = self.rpc.call_export_info(src_node, src_path)
9460 result.Raise("No export or invalid export found in dir %s" % src_path)
9462 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9463 if not export_info.has_section(constants.INISECT_EXP):
9464 raise errors.ProgrammerError("Corrupted export config",
9465 errors.ECODE_ENVIRON)
9467 ei_version = export_info.get(constants.INISECT_EXP, "version")
9468 if (int(ei_version) != constants.EXPORT_VERSION):
9469 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9470 (ei_version, constants.EXPORT_VERSION),
9471 errors.ECODE_ENVIRON)
9474 def _ReadExportParams(self, einfo):
9475 """Use export parameters as defaults.
9477 In case the opcode doesn't specify (as in override) some instance
9478 parameters, then try to use them from the export information, if
9482 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9484 if self.op.disk_template is None:
9485 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9486 self.op.disk_template = einfo.get(constants.INISECT_INS,
9488 if self.op.disk_template not in constants.DISK_TEMPLATES:
9489 raise errors.OpPrereqError("Disk template specified in configuration"
9490 " file is not one of the allowed values:"
9491 " %s" % " ".join(constants.DISK_TEMPLATES))
9493 raise errors.OpPrereqError("No disk template specified and the export"
9494 " is missing the disk_template information",
9497 if not self.op.disks:
9499 # TODO: import the disk iv_name too
9500 for idx in range(constants.MAX_DISKS):
9501 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9502 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9503 disks.append({constants.IDISK_SIZE: disk_sz})
9504 self.op.disks = disks
9505 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9506 raise errors.OpPrereqError("No disk info specified and the export"
9507 " is missing the disk information",
9510 if not self.op.nics:
9512 for idx in range(constants.MAX_NICS):
9513 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9515 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9516 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9523 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9524 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9526 if (self.op.hypervisor is None and
9527 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9528 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9530 if einfo.has_section(constants.INISECT_HYP):
9531 # use the export parameters but do not override the ones
9532 # specified by the user
9533 for name, value in einfo.items(constants.INISECT_HYP):
9534 if name not in self.op.hvparams:
9535 self.op.hvparams[name] = value
9537 if einfo.has_section(constants.INISECT_BEP):
9538 # use the parameters, without overriding
9539 for name, value in einfo.items(constants.INISECT_BEP):
9540 if name not in self.op.beparams:
9541 self.op.beparams[name] = value
9542 # Compatibility for the old "memory" be param
9543 if name == constants.BE_MEMORY:
9544 if constants.BE_MAXMEM not in self.op.beparams:
9545 self.op.beparams[constants.BE_MAXMEM] = value
9546 if constants.BE_MINMEM not in self.op.beparams:
9547 self.op.beparams[constants.BE_MINMEM] = value
9549 # try to read the parameters old style, from the main section
9550 for name in constants.BES_PARAMETERS:
9551 if (name not in self.op.beparams and
9552 einfo.has_option(constants.INISECT_INS, name)):
9553 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9555 if einfo.has_section(constants.INISECT_OSP):
9556 # use the parameters, without overriding
9557 for name, value in einfo.items(constants.INISECT_OSP):
9558 if name not in self.op.osparams:
9559 self.op.osparams[name] = value
9561 def _RevertToDefaults(self, cluster):
9562 """Revert the instance parameters to the default values.
9566 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9567 for name in self.op.hvparams.keys():
9568 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9569 del self.op.hvparams[name]
9571 be_defs = cluster.SimpleFillBE({})
9572 for name in self.op.beparams.keys():
9573 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9574 del self.op.beparams[name]
9576 nic_defs = cluster.SimpleFillNIC({})
9577 for nic in self.op.nics:
9578 for name in constants.NICS_PARAMETERS:
9579 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9582 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9583 for name in self.op.osparams.keys():
9584 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9585 del self.op.osparams[name]
9587 def _CalculateFileStorageDir(self):
9588 """Calculate final instance file storage dir.
9591 # file storage dir calculation/check
9592 self.instance_file_storage_dir = None
9593 if self.op.disk_template in constants.DTS_FILEBASED:
9594 # build the full file storage dir path
9597 if self.op.disk_template == constants.DT_SHARED_FILE:
9598 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9600 get_fsd_fn = self.cfg.GetFileStorageDir
9602 cfg_storagedir = get_fsd_fn()
9603 if not cfg_storagedir:
9604 raise errors.OpPrereqError("Cluster file storage dir not defined")
9605 joinargs.append(cfg_storagedir)
9607 if self.op.file_storage_dir is not None:
9608 joinargs.append(self.op.file_storage_dir)
9610 joinargs.append(self.op.instance_name)
9612 # pylint: disable=W0142
9613 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9615 def CheckPrereq(self): # pylint: disable=R0914
9616 """Check prerequisites.
9619 self._CalculateFileStorageDir()
9621 if self.op.mode == constants.INSTANCE_IMPORT:
9622 export_info = self._ReadExportInfo()
9623 self._ReadExportParams(export_info)
9625 if (not self.cfg.GetVGName() and
9626 self.op.disk_template not in constants.DTS_NOT_LVM):
9627 raise errors.OpPrereqError("Cluster does not support lvm-based"
9628 " instances", errors.ECODE_STATE)
9630 if (self.op.hypervisor is None or
9631 self.op.hypervisor == constants.VALUE_AUTO):
9632 self.op.hypervisor = self.cfg.GetHypervisorType()
9634 cluster = self.cfg.GetClusterInfo()
9635 enabled_hvs = cluster.enabled_hypervisors
9636 if self.op.hypervisor not in enabled_hvs:
9637 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9638 " cluster (%s)" % (self.op.hypervisor,
9639 ",".join(enabled_hvs)),
9642 # Check tag validity
9643 for tag in self.op.tags:
9644 objects.TaggableObject.ValidateTag(tag)
9646 # check hypervisor parameter syntax (locally)
9647 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9648 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9650 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9651 hv_type.CheckParameterSyntax(filled_hvp)
9652 self.hv_full = filled_hvp
9653 # check that we don't specify global parameters on an instance
9654 _CheckGlobalHvParams(self.op.hvparams)
9656 # fill and remember the beparams dict
9657 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9658 for param, value in self.op.beparams.iteritems():
9659 if value == constants.VALUE_AUTO:
9660 self.op.beparams[param] = default_beparams[param]
9661 objects.UpgradeBeParams(self.op.beparams)
9662 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9663 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9665 # build os parameters
9666 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9668 # now that hvp/bep are in final format, let's reset to defaults,
9670 if self.op.identify_defaults:
9671 self._RevertToDefaults(cluster)
9675 for idx, nic in enumerate(self.op.nics):
9676 nic_mode_req = nic.get(constants.INIC_MODE, None)
9677 nic_mode = nic_mode_req
9678 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9679 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9681 # in routed mode, for the first nic, the default ip is 'auto'
9682 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9683 default_ip_mode = constants.VALUE_AUTO
9685 default_ip_mode = constants.VALUE_NONE
9687 # ip validity checks
9688 ip = nic.get(constants.INIC_IP, default_ip_mode)
9689 if ip is None or ip.lower() == constants.VALUE_NONE:
9691 elif ip.lower() == constants.VALUE_AUTO:
9692 if not self.op.name_check:
9693 raise errors.OpPrereqError("IP address set to auto but name checks"
9694 " have been skipped",
9696 nic_ip = self.hostname1.ip
9698 if not netutils.IPAddress.IsValid(ip):
9699 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9703 # TODO: check the ip address for uniqueness
9704 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9705 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9708 # MAC address verification
9709 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9710 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9711 mac = utils.NormalizeAndValidateMac(mac)
9714 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9715 except errors.ReservationError:
9716 raise errors.OpPrereqError("MAC address %s already in use"
9717 " in cluster" % mac,
9718 errors.ECODE_NOTUNIQUE)
9720 # Build nic parameters
9721 link = nic.get(constants.INIC_LINK, None)
9722 if link == constants.VALUE_AUTO:
9723 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9726 nicparams[constants.NIC_MODE] = nic_mode
9728 nicparams[constants.NIC_LINK] = link
9730 check_params = cluster.SimpleFillNIC(nicparams)
9731 objects.NIC.CheckParameterSyntax(check_params)
9732 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9734 # disk checks/pre-build
9735 default_vg = self.cfg.GetVGName()
9737 for disk in self.op.disks:
9738 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9739 if mode not in constants.DISK_ACCESS_SET:
9740 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9741 mode, errors.ECODE_INVAL)
9742 size = disk.get(constants.IDISK_SIZE, None)
9744 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9747 except (TypeError, ValueError):
9748 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9751 data_vg = disk.get(constants.IDISK_VG, default_vg)
9753 constants.IDISK_SIZE: size,
9754 constants.IDISK_MODE: mode,
9755 constants.IDISK_VG: data_vg,
9757 if constants.IDISK_METAVG in disk:
9758 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9759 if constants.IDISK_ADOPT in disk:
9760 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9761 self.disks.append(new_disk)
9763 if self.op.mode == constants.INSTANCE_IMPORT:
9765 for idx in range(len(self.disks)):
9766 option = "disk%d_dump" % idx
9767 if export_info.has_option(constants.INISECT_INS, option):
9768 # FIXME: are the old os-es, disk sizes, etc. useful?
9769 export_name = export_info.get(constants.INISECT_INS, option)
9770 image = utils.PathJoin(self.op.src_path, export_name)
9771 disk_images.append(image)
9773 disk_images.append(False)
9775 self.src_images = disk_images
9777 old_name = export_info.get(constants.INISECT_INS, "name")
9778 if self.op.instance_name == old_name:
9779 for idx, nic in enumerate(self.nics):
9780 if nic.mac == constants.VALUE_AUTO:
9781 nic_mac_ini = "nic%d_mac" % idx
9782 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9784 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9786 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9787 if self.op.ip_check:
9788 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9789 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9790 (self.check_ip, self.op.instance_name),
9791 errors.ECODE_NOTUNIQUE)
9793 #### mac address generation
9794 # By generating here the mac address both the allocator and the hooks get
9795 # the real final mac address rather than the 'auto' or 'generate' value.
9796 # There is a race condition between the generation and the instance object
9797 # creation, which means that we know the mac is valid now, but we're not
9798 # sure it will be when we actually add the instance. If things go bad
9799 # adding the instance will abort because of a duplicate mac, and the
9800 # creation job will fail.
9801 for nic in self.nics:
9802 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9803 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9807 if self.op.iallocator is not None:
9808 self._RunAllocator()
9810 # Release all unneeded node locks
9811 _ReleaseLocks(self, locking.LEVEL_NODE,
9812 keep=filter(None, [self.op.pnode, self.op.snode,
9814 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9815 keep=filter(None, [self.op.pnode, self.op.snode,
9818 #### node related checks
9820 # check primary node
9821 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9822 assert self.pnode is not None, \
9823 "Cannot retrieve locked node %s" % self.op.pnode
9825 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9826 pnode.name, errors.ECODE_STATE)
9828 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9829 pnode.name, errors.ECODE_STATE)
9830 if not pnode.vm_capable:
9831 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9832 " '%s'" % pnode.name, errors.ECODE_STATE)
9834 self.secondaries = []
9836 # mirror node verification
9837 if self.op.disk_template in constants.DTS_INT_MIRROR:
9838 if self.op.snode == pnode.name:
9839 raise errors.OpPrereqError("The secondary node cannot be the"
9840 " primary node", errors.ECODE_INVAL)
9841 _CheckNodeOnline(self, self.op.snode)
9842 _CheckNodeNotDrained(self, self.op.snode)
9843 _CheckNodeVmCapable(self, self.op.snode)
9844 self.secondaries.append(self.op.snode)
9846 snode = self.cfg.GetNodeInfo(self.op.snode)
9847 if pnode.group != snode.group:
9848 self.LogWarning("The primary and secondary nodes are in two"
9849 " different node groups; the disk parameters"
9850 " from the first disk's node group will be"
9853 nodenames = [pnode.name] + self.secondaries
9855 # Verify instance specs
9857 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9858 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9859 constants.ISPEC_DISK_COUNT: len(self.disks),
9860 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9861 constants.ISPEC_NIC_COUNT: len(self.nics),
9864 group_info = self.cfg.GetNodeGroup(pnode.group)
9865 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9866 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9867 if not self.op.ignore_ipolicy and res:
9868 raise errors.OpPrereqError(("Instance allocation to group %s violates"
9869 " policy: %s") % (pnode.group,
9870 utils.CommaJoin(res)),
9873 # disk parameters (not customizable at instance or node level)
9874 # just use the primary node parameters, ignoring the secondary.
9875 self.diskparams = group_info.diskparams
9877 if not self.adopt_disks:
9878 if self.op.disk_template == constants.DT_RBD:
9879 # _CheckRADOSFreeSpace() is just a placeholder.
9880 # Any function that checks prerequisites can be placed here.
9881 # Check if there is enough space on the RADOS cluster.
9882 _CheckRADOSFreeSpace()
9884 # Check lv size requirements, if not adopting
9885 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9886 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9888 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9889 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9890 disk[constants.IDISK_ADOPT])
9891 for disk in self.disks])
9892 if len(all_lvs) != len(self.disks):
9893 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9895 for lv_name in all_lvs:
9897 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9898 # to ReserveLV uses the same syntax
9899 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9900 except errors.ReservationError:
9901 raise errors.OpPrereqError("LV named %s used by another instance" %
9902 lv_name, errors.ECODE_NOTUNIQUE)
9904 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9905 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9907 node_lvs = self.rpc.call_lv_list([pnode.name],
9908 vg_names.payload.keys())[pnode.name]
9909 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9910 node_lvs = node_lvs.payload
9912 delta = all_lvs.difference(node_lvs.keys())
9914 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9915 utils.CommaJoin(delta),
9917 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9919 raise errors.OpPrereqError("Online logical volumes found, cannot"
9920 " adopt: %s" % utils.CommaJoin(online_lvs),
9922 # update the size of disk based on what is found
9923 for dsk in self.disks:
9924 dsk[constants.IDISK_SIZE] = \
9925 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9926 dsk[constants.IDISK_ADOPT])][0]))
9928 elif self.op.disk_template == constants.DT_BLOCK:
9929 # Normalize and de-duplicate device paths
9930 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9931 for disk in self.disks])
9932 if len(all_disks) != len(self.disks):
9933 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9935 baddisks = [d for d in all_disks
9936 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9938 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9939 " cannot be adopted" %
9940 (", ".join(baddisks),
9941 constants.ADOPTABLE_BLOCKDEV_ROOT),
9944 node_disks = self.rpc.call_bdev_sizes([pnode.name],
9945 list(all_disks))[pnode.name]
9946 node_disks.Raise("Cannot get block device information from node %s" %
9948 node_disks = node_disks.payload
9949 delta = all_disks.difference(node_disks.keys())
9951 raise errors.OpPrereqError("Missing block device(s): %s" %
9952 utils.CommaJoin(delta),
9954 for dsk in self.disks:
9955 dsk[constants.IDISK_SIZE] = \
9956 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9958 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9960 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9961 # check OS parameters (remotely)
9962 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9964 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9966 # memory check on primary node
9967 #TODO(dynmem): use MINMEM for checking
9969 _CheckNodeFreeMemory(self, self.pnode.name,
9970 "creating instance %s" % self.op.instance_name,
9971 self.be_full[constants.BE_MAXMEM],
9974 self.dry_run_result = list(nodenames)
9976 def Exec(self, feedback_fn):
9977 """Create and add the instance to the cluster.
9980 instance = self.op.instance_name
9981 pnode_name = self.pnode.name
9983 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9984 self.owned_locks(locking.LEVEL_NODE)), \
9985 "Node locks differ from node resource locks"
9987 ht_kind = self.op.hypervisor
9988 if ht_kind in constants.HTS_REQ_PORT:
9989 network_port = self.cfg.AllocatePort()
9993 disks = _GenerateDiskTemplate(self,
9994 self.op.disk_template,
9995 instance, pnode_name,
9998 self.instance_file_storage_dir,
9999 self.op.file_driver,
10004 iobj = objects.Instance(name=instance, os=self.op.os_type,
10005 primary_node=pnode_name,
10006 nics=self.nics, disks=disks,
10007 disk_template=self.op.disk_template,
10008 admin_state=constants.ADMINST_DOWN,
10009 network_port=network_port,
10010 beparams=self.op.beparams,
10011 hvparams=self.op.hvparams,
10012 hypervisor=self.op.hypervisor,
10013 osparams=self.op.osparams,
10017 for tag in self.op.tags:
10020 if self.adopt_disks:
10021 if self.op.disk_template == constants.DT_PLAIN:
10022 # rename LVs to the newly-generated names; we need to construct
10023 # 'fake' LV disks with the old data, plus the new unique_id
10024 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10026 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10027 rename_to.append(t_dsk.logical_id)
10028 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10029 self.cfg.SetDiskID(t_dsk, pnode_name)
10030 result = self.rpc.call_blockdev_rename(pnode_name,
10031 zip(tmp_disks, rename_to))
10032 result.Raise("Failed to rename adoped LVs")
10034 feedback_fn("* creating instance disks...")
10036 _CreateDisks(self, iobj)
10037 except errors.OpExecError:
10038 self.LogWarning("Device creation failed, reverting...")
10040 _RemoveDisks(self, iobj)
10042 self.cfg.ReleaseDRBDMinors(instance)
10045 feedback_fn("adding instance %s to cluster config" % instance)
10047 self.cfg.AddInstance(iobj, self.proc.GetECId())
10049 # Declare that we don't want to remove the instance lock anymore, as we've
10050 # added the instance to the config
10051 del self.remove_locks[locking.LEVEL_INSTANCE]
10053 if self.op.mode == constants.INSTANCE_IMPORT:
10054 # Release unused nodes
10055 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10057 # Release all nodes
10058 _ReleaseLocks(self, locking.LEVEL_NODE)
10061 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10062 feedback_fn("* wiping instance disks...")
10064 _WipeDisks(self, iobj)
10065 except errors.OpExecError, err:
10066 logging.exception("Wiping disks failed")
10067 self.LogWarning("Wiping instance disks failed (%s)", err)
10071 # Something is already wrong with the disks, don't do anything else
10073 elif self.op.wait_for_sync:
10074 disk_abort = not _WaitForSync(self, iobj)
10075 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10076 # make sure the disks are not degraded (still sync-ing is ok)
10077 feedback_fn("* checking mirrors status")
10078 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10083 _RemoveDisks(self, iobj)
10084 self.cfg.RemoveInstance(iobj.name)
10085 # Make sure the instance lock gets removed
10086 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10087 raise errors.OpExecError("There are some degraded disks for"
10090 # Release all node resource locks
10091 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10093 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10094 if self.op.mode == constants.INSTANCE_CREATE:
10095 if not self.op.no_install:
10096 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10097 not self.op.wait_for_sync)
10099 feedback_fn("* pausing disk sync to install instance OS")
10100 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10102 for idx, success in enumerate(result.payload):
10104 logging.warn("pause-sync of instance %s for disk %d failed",
10107 feedback_fn("* running the instance OS create scripts...")
10108 # FIXME: pass debug option from opcode to backend
10110 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10111 self.op.debug_level)
10113 feedback_fn("* resuming disk sync")
10114 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10116 for idx, success in enumerate(result.payload):
10118 logging.warn("resume-sync of instance %s for disk %d failed",
10121 os_add_result.Raise("Could not add os for instance %s"
10122 " on node %s" % (instance, pnode_name))
10124 elif self.op.mode == constants.INSTANCE_IMPORT:
10125 feedback_fn("* running the instance OS import scripts...")
10129 for idx, image in enumerate(self.src_images):
10133 # FIXME: pass debug option from opcode to backend
10134 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10135 constants.IEIO_FILE, (image, ),
10136 constants.IEIO_SCRIPT,
10137 (iobj.disks[idx], idx),
10139 transfers.append(dt)
10142 masterd.instance.TransferInstanceData(self, feedback_fn,
10143 self.op.src_node, pnode_name,
10144 self.pnode.secondary_ip,
10146 if not compat.all(import_result):
10147 self.LogWarning("Some disks for instance %s on node %s were not"
10148 " imported successfully" % (instance, pnode_name))
10150 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10151 feedback_fn("* preparing remote import...")
10152 # The source cluster will stop the instance before attempting to make a
10153 # connection. In some cases stopping an instance can take a long time,
10154 # hence the shutdown timeout is added to the connection timeout.
10155 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10156 self.op.source_shutdown_timeout)
10157 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10159 assert iobj.primary_node == self.pnode.name
10161 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10162 self.source_x509_ca,
10163 self._cds, timeouts)
10164 if not compat.all(disk_results):
10165 # TODO: Should the instance still be started, even if some disks
10166 # failed to import (valid for local imports, too)?
10167 self.LogWarning("Some disks for instance %s on node %s were not"
10168 " imported successfully" % (instance, pnode_name))
10170 # Run rename script on newly imported instance
10171 assert iobj.name == instance
10172 feedback_fn("Running rename script for %s" % instance)
10173 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10174 self.source_instance_name,
10175 self.op.debug_level)
10176 if result.fail_msg:
10177 self.LogWarning("Failed to run rename script for %s on node"
10178 " %s: %s" % (instance, pnode_name, result.fail_msg))
10181 # also checked in the prereq part
10182 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10185 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10188 iobj.admin_state = constants.ADMINST_UP
10189 self.cfg.Update(iobj, feedback_fn)
10190 logging.info("Starting instance %s on node %s", instance, pnode_name)
10191 feedback_fn("* starting instance...")
10192 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10194 result.Raise("Could not start instance")
10196 return list(iobj.all_nodes)
10199 def _CheckRADOSFreeSpace():
10200 """Compute disk size requirements inside the RADOS cluster.
10203 # For the RADOS cluster we assume there is always enough space.
10207 class LUInstanceConsole(NoHooksLU):
10208 """Connect to an instance's console.
10210 This is somewhat special in that it returns the command line that
10211 you need to run on the master node in order to connect to the
10217 def ExpandNames(self):
10218 self.share_locks = _ShareAll()
10219 self._ExpandAndLockInstance()
10221 def CheckPrereq(self):
10222 """Check prerequisites.
10224 This checks that the instance is in the cluster.
10227 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10228 assert self.instance is not None, \
10229 "Cannot retrieve locked instance %s" % self.op.instance_name
10230 _CheckNodeOnline(self, self.instance.primary_node)
10232 def Exec(self, feedback_fn):
10233 """Connect to the console of an instance
10236 instance = self.instance
10237 node = instance.primary_node
10239 node_insts = self.rpc.call_instance_list([node],
10240 [instance.hypervisor])[node]
10241 node_insts.Raise("Can't get node information from %s" % node)
10243 if instance.name not in node_insts.payload:
10244 if instance.admin_state == constants.ADMINST_UP:
10245 state = constants.INSTST_ERRORDOWN
10246 elif instance.admin_state == constants.ADMINST_DOWN:
10247 state = constants.INSTST_ADMINDOWN
10249 state = constants.INSTST_ADMINOFFLINE
10250 raise errors.OpExecError("Instance %s is not running (state %s)" %
10251 (instance.name, state))
10253 logging.debug("Connecting to console of %s on %s", instance.name, node)
10255 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10258 def _GetInstanceConsole(cluster, instance):
10259 """Returns console information for an instance.
10261 @type cluster: L{objects.Cluster}
10262 @type instance: L{objects.Instance}
10266 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10267 # beparams and hvparams are passed separately, to avoid editing the
10268 # instance and then saving the defaults in the instance itself.
10269 hvparams = cluster.FillHV(instance)
10270 beparams = cluster.FillBE(instance)
10271 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10273 assert console.instance == instance.name
10274 assert console.Validate()
10276 return console.ToDict()
10279 class LUInstanceReplaceDisks(LogicalUnit):
10280 """Replace the disks of an instance.
10283 HPATH = "mirrors-replace"
10284 HTYPE = constants.HTYPE_INSTANCE
10287 def CheckArguments(self):
10288 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10289 self.op.iallocator)
10291 def ExpandNames(self):
10292 self._ExpandAndLockInstance()
10294 assert locking.LEVEL_NODE not in self.needed_locks
10295 assert locking.LEVEL_NODE_RES not in self.needed_locks
10296 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10298 assert self.op.iallocator is None or self.op.remote_node is None, \
10299 "Conflicting options"
10301 if self.op.remote_node is not None:
10302 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10304 # Warning: do not remove the locking of the new secondary here
10305 # unless DRBD8.AddChildren is changed to work in parallel;
10306 # currently it doesn't since parallel invocations of
10307 # FindUnusedMinor will conflict
10308 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10309 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10311 self.needed_locks[locking.LEVEL_NODE] = []
10312 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10314 if self.op.iallocator is not None:
10315 # iallocator will select a new node in the same group
10316 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10318 self.needed_locks[locking.LEVEL_NODE_RES] = []
10320 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10321 self.op.iallocator, self.op.remote_node,
10322 self.op.disks, False, self.op.early_release,
10323 self.op.ignore_ipolicy)
10325 self.tasklets = [self.replacer]
10327 def DeclareLocks(self, level):
10328 if level == locking.LEVEL_NODEGROUP:
10329 assert self.op.remote_node is None
10330 assert self.op.iallocator is not None
10331 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10333 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10334 # Lock all groups used by instance optimistically; this requires going
10335 # via the node before it's locked, requiring verification later on
10336 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10337 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10339 elif level == locking.LEVEL_NODE:
10340 if self.op.iallocator is not None:
10341 assert self.op.remote_node is None
10342 assert not self.needed_locks[locking.LEVEL_NODE]
10344 # Lock member nodes of all locked groups
10345 self.needed_locks[locking.LEVEL_NODE] = [node_name
10346 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10347 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10349 self._LockInstancesNodes()
10350 elif level == locking.LEVEL_NODE_RES:
10352 self.needed_locks[locking.LEVEL_NODE_RES] = \
10353 self.needed_locks[locking.LEVEL_NODE]
10355 def BuildHooksEnv(self):
10356 """Build hooks env.
10358 This runs on the master, the primary and all the secondaries.
10361 instance = self.replacer.instance
10363 "MODE": self.op.mode,
10364 "NEW_SECONDARY": self.op.remote_node,
10365 "OLD_SECONDARY": instance.secondary_nodes[0],
10367 env.update(_BuildInstanceHookEnvByObject(self, instance))
10370 def BuildHooksNodes(self):
10371 """Build hooks nodes.
10374 instance = self.replacer.instance
10376 self.cfg.GetMasterNode(),
10377 instance.primary_node,
10379 if self.op.remote_node is not None:
10380 nl.append(self.op.remote_node)
10383 def CheckPrereq(self):
10384 """Check prerequisites.
10387 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10388 self.op.iallocator is None)
10390 # Verify if node group locks are still correct
10391 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10393 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10395 return LogicalUnit.CheckPrereq(self)
10398 class TLReplaceDisks(Tasklet):
10399 """Replaces disks for an instance.
10401 Note: Locking is not within the scope of this class.
10404 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10405 disks, delay_iallocator, early_release, ignore_ipolicy):
10406 """Initializes this class.
10409 Tasklet.__init__(self, lu)
10412 self.instance_name = instance_name
10414 self.iallocator_name = iallocator_name
10415 self.remote_node = remote_node
10417 self.delay_iallocator = delay_iallocator
10418 self.early_release = early_release
10419 self.ignore_ipolicy = ignore_ipolicy
10422 self.instance = None
10423 self.new_node = None
10424 self.target_node = None
10425 self.other_node = None
10426 self.remote_node_info = None
10427 self.node_secondary_ip = None
10430 def CheckArguments(mode, remote_node, iallocator):
10431 """Helper function for users of this class.
10434 # check for valid parameter combination
10435 if mode == constants.REPLACE_DISK_CHG:
10436 if remote_node is None and iallocator is None:
10437 raise errors.OpPrereqError("When changing the secondary either an"
10438 " iallocator script must be used or the"
10439 " new node given", errors.ECODE_INVAL)
10441 if remote_node is not None and iallocator is not None:
10442 raise errors.OpPrereqError("Give either the iallocator or the new"
10443 " secondary, not both", errors.ECODE_INVAL)
10445 elif remote_node is not None or iallocator is not None:
10446 # Not replacing the secondary
10447 raise errors.OpPrereqError("The iallocator and new node options can"
10448 " only be used when changing the"
10449 " secondary node", errors.ECODE_INVAL)
10452 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10453 """Compute a new secondary node using an IAllocator.
10456 ial = IAllocator(lu.cfg, lu.rpc,
10457 mode=constants.IALLOCATOR_MODE_RELOC,
10458 name=instance_name,
10459 relocate_from=list(relocate_from))
10461 ial.Run(iallocator_name)
10463 if not ial.success:
10464 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10465 " %s" % (iallocator_name, ial.info),
10466 errors.ECODE_NORES)
10468 if len(ial.result) != ial.required_nodes:
10469 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10470 " of nodes (%s), required %s" %
10472 len(ial.result), ial.required_nodes),
10473 errors.ECODE_FAULT)
10475 remote_node_name = ial.result[0]
10477 lu.LogInfo("Selected new secondary for instance '%s': %s",
10478 instance_name, remote_node_name)
10480 return remote_node_name
10482 def _FindFaultyDisks(self, node_name):
10483 """Wrapper for L{_FindFaultyInstanceDisks}.
10486 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10489 def _CheckDisksActivated(self, instance):
10490 """Checks if the instance disks are activated.
10492 @param instance: The instance to check disks
10493 @return: True if they are activated, False otherwise
10496 nodes = instance.all_nodes
10498 for idx, dev in enumerate(instance.disks):
10500 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10501 self.cfg.SetDiskID(dev, node)
10503 result = self.rpc.call_blockdev_find(node, dev)
10507 elif result.fail_msg or not result.payload:
10512 def CheckPrereq(self):
10513 """Check prerequisites.
10515 This checks that the instance is in the cluster.
10518 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10519 assert instance is not None, \
10520 "Cannot retrieve locked instance %s" % self.instance_name
10522 if instance.disk_template != constants.DT_DRBD8:
10523 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10524 " instances", errors.ECODE_INVAL)
10526 if len(instance.secondary_nodes) != 1:
10527 raise errors.OpPrereqError("The instance has a strange layout,"
10528 " expected one secondary but found %d" %
10529 len(instance.secondary_nodes),
10530 errors.ECODE_FAULT)
10532 if not self.delay_iallocator:
10533 self._CheckPrereq2()
10535 def _CheckPrereq2(self):
10536 """Check prerequisites, second part.
10538 This function should always be part of CheckPrereq. It was separated and is
10539 now called from Exec because during node evacuation iallocator was only
10540 called with an unmodified cluster model, not taking planned changes into
10544 instance = self.instance
10545 secondary_node = instance.secondary_nodes[0]
10547 if self.iallocator_name is None:
10548 remote_node = self.remote_node
10550 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10551 instance.name, instance.secondary_nodes)
10553 if remote_node is None:
10554 self.remote_node_info = None
10556 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10557 "Remote node '%s' is not locked" % remote_node
10559 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10560 assert self.remote_node_info is not None, \
10561 "Cannot retrieve locked node %s" % remote_node
10563 if remote_node == self.instance.primary_node:
10564 raise errors.OpPrereqError("The specified node is the primary node of"
10565 " the instance", errors.ECODE_INVAL)
10567 if remote_node == secondary_node:
10568 raise errors.OpPrereqError("The specified node is already the"
10569 " secondary node of the instance",
10570 errors.ECODE_INVAL)
10572 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10573 constants.REPLACE_DISK_CHG):
10574 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10575 errors.ECODE_INVAL)
10577 if self.mode == constants.REPLACE_DISK_AUTO:
10578 if not self._CheckDisksActivated(instance):
10579 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10580 " first" % self.instance_name,
10581 errors.ECODE_STATE)
10582 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10583 faulty_secondary = self._FindFaultyDisks(secondary_node)
10585 if faulty_primary and faulty_secondary:
10586 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10587 " one node and can not be repaired"
10588 " automatically" % self.instance_name,
10589 errors.ECODE_STATE)
10592 self.disks = faulty_primary
10593 self.target_node = instance.primary_node
10594 self.other_node = secondary_node
10595 check_nodes = [self.target_node, self.other_node]
10596 elif faulty_secondary:
10597 self.disks = faulty_secondary
10598 self.target_node = secondary_node
10599 self.other_node = instance.primary_node
10600 check_nodes = [self.target_node, self.other_node]
10606 # Non-automatic modes
10607 if self.mode == constants.REPLACE_DISK_PRI:
10608 self.target_node = instance.primary_node
10609 self.other_node = secondary_node
10610 check_nodes = [self.target_node, self.other_node]
10612 elif self.mode == constants.REPLACE_DISK_SEC:
10613 self.target_node = secondary_node
10614 self.other_node = instance.primary_node
10615 check_nodes = [self.target_node, self.other_node]
10617 elif self.mode == constants.REPLACE_DISK_CHG:
10618 self.new_node = remote_node
10619 self.other_node = instance.primary_node
10620 self.target_node = secondary_node
10621 check_nodes = [self.new_node, self.other_node]
10623 _CheckNodeNotDrained(self.lu, remote_node)
10624 _CheckNodeVmCapable(self.lu, remote_node)
10626 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10627 assert old_node_info is not None
10628 if old_node_info.offline and not self.early_release:
10629 # doesn't make sense to delay the release
10630 self.early_release = True
10631 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10632 " early-release mode", secondary_node)
10635 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10638 # If not specified all disks should be replaced
10640 self.disks = range(len(self.instance.disks))
10642 # TODO: This is ugly, but right now we can't distinguish between internal
10643 # submitted opcode and external one. We should fix that.
10644 if self.remote_node_info:
10645 # We change the node, lets verify it still meets instance policy
10646 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10647 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10649 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10650 ignore=self.ignore_ipolicy)
10652 # TODO: compute disk parameters
10653 primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10654 secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10655 if primary_node_info.group != secondary_node_info.group:
10656 self.lu.LogInfo("The instance primary and secondary nodes are in two"
10657 " different node groups; the disk parameters of the"
10658 " primary node's group will be applied.")
10660 self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10662 for node in check_nodes:
10663 _CheckNodeOnline(self.lu, node)
10665 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10668 if node_name is not None)
10670 # Release unneeded node and node resource locks
10671 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10672 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10674 # Release any owned node group
10675 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10676 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10678 # Check whether disks are valid
10679 for disk_idx in self.disks:
10680 instance.FindDisk(disk_idx)
10682 # Get secondary node IP addresses
10683 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10684 in self.cfg.GetMultiNodeInfo(touched_nodes))
10686 def Exec(self, feedback_fn):
10687 """Execute disk replacement.
10689 This dispatches the disk replacement to the appropriate handler.
10692 if self.delay_iallocator:
10693 self._CheckPrereq2()
10696 # Verify owned locks before starting operation
10697 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10698 assert set(owned_nodes) == set(self.node_secondary_ip), \
10699 ("Incorrect node locks, owning %s, expected %s" %
10700 (owned_nodes, self.node_secondary_ip.keys()))
10701 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10702 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10704 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10705 assert list(owned_instances) == [self.instance_name], \
10706 "Instance '%s' not locked" % self.instance_name
10708 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10709 "Should not own any node group lock at this point"
10712 feedback_fn("No disks need replacement")
10715 feedback_fn("Replacing disk(s) %s for %s" %
10716 (utils.CommaJoin(self.disks), self.instance.name))
10718 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10720 # Activate the instance disks if we're replacing them on a down instance
10722 _StartInstanceDisks(self.lu, self.instance, True)
10725 # Should we replace the secondary node?
10726 if self.new_node is not None:
10727 fn = self._ExecDrbd8Secondary
10729 fn = self._ExecDrbd8DiskOnly
10731 result = fn(feedback_fn)
10733 # Deactivate the instance disks if we're replacing them on a
10736 _SafeShutdownInstanceDisks(self.lu, self.instance)
10738 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10741 # Verify owned locks
10742 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10743 nodes = frozenset(self.node_secondary_ip)
10744 assert ((self.early_release and not owned_nodes) or
10745 (not self.early_release and not (set(owned_nodes) - nodes))), \
10746 ("Not owning the correct locks, early_release=%s, owned=%r,"
10747 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10751 def _CheckVolumeGroup(self, nodes):
10752 self.lu.LogInfo("Checking volume groups")
10754 vgname = self.cfg.GetVGName()
10756 # Make sure volume group exists on all involved nodes
10757 results = self.rpc.call_vg_list(nodes)
10759 raise errors.OpExecError("Can't list volume groups on the nodes")
10762 res = results[node]
10763 res.Raise("Error checking node %s" % node)
10764 if vgname not in res.payload:
10765 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10768 def _CheckDisksExistence(self, nodes):
10769 # Check disk existence
10770 for idx, dev in enumerate(self.instance.disks):
10771 if idx not in self.disks:
10775 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10776 self.cfg.SetDiskID(dev, node)
10778 result = self.rpc.call_blockdev_find(node, dev)
10780 msg = result.fail_msg
10781 if msg or not result.payload:
10783 msg = "disk not found"
10784 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10787 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10788 for idx, dev in enumerate(self.instance.disks):
10789 if idx not in self.disks:
10792 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10795 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10797 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10798 " replace disks for instance %s" %
10799 (node_name, self.instance.name))
10801 def _CreateNewStorage(self, node_name):
10802 """Create new storage on the primary or secondary node.
10804 This is only used for same-node replaces, not for changing the
10805 secondary node, hence we don't want to modify the existing disk.
10810 for idx, dev in enumerate(self.instance.disks):
10811 if idx not in self.disks:
10814 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10816 self.cfg.SetDiskID(dev, node_name)
10818 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10819 names = _GenerateUniqueNames(self.lu, lv_names)
10821 _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10823 vg_data = dev.children[0].logical_id[0]
10824 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10825 logical_id=(vg_data, names[0]), params=data_p)
10826 vg_meta = dev.children[1].logical_id[0]
10827 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10828 logical_id=(vg_meta, names[1]), params=meta_p)
10830 new_lvs = [lv_data, lv_meta]
10831 old_lvs = [child.Copy() for child in dev.children]
10832 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10834 # we pass force_create=True to force the LVM creation
10835 for new_lv in new_lvs:
10836 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10837 _GetInstanceInfoText(self.instance), False)
10841 def _CheckDevices(self, node_name, iv_names):
10842 for name, (dev, _, _) in iv_names.iteritems():
10843 self.cfg.SetDiskID(dev, node_name)
10845 result = self.rpc.call_blockdev_find(node_name, dev)
10847 msg = result.fail_msg
10848 if msg or not result.payload:
10850 msg = "disk not found"
10851 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10854 if result.payload.is_degraded:
10855 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10857 def _RemoveOldStorage(self, node_name, iv_names):
10858 for name, (_, old_lvs, _) in iv_names.iteritems():
10859 self.lu.LogInfo("Remove logical volumes for %s" % name)
10862 self.cfg.SetDiskID(lv, node_name)
10864 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10866 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10867 hint="remove unused LVs manually")
10869 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10870 """Replace a disk on the primary or secondary for DRBD 8.
10872 The algorithm for replace is quite complicated:
10874 1. for each disk to be replaced:
10876 1. create new LVs on the target node with unique names
10877 1. detach old LVs from the drbd device
10878 1. rename old LVs to name_replaced.<time_t>
10879 1. rename new LVs to old LVs
10880 1. attach the new LVs (with the old names now) to the drbd device
10882 1. wait for sync across all devices
10884 1. for each modified disk:
10886 1. remove old LVs (which have the name name_replaces.<time_t>)
10888 Failures are not very well handled.
10893 # Step: check device activation
10894 self.lu.LogStep(1, steps_total, "Check device existence")
10895 self._CheckDisksExistence([self.other_node, self.target_node])
10896 self._CheckVolumeGroup([self.target_node, self.other_node])
10898 # Step: check other node consistency
10899 self.lu.LogStep(2, steps_total, "Check peer consistency")
10900 self._CheckDisksConsistency(self.other_node,
10901 self.other_node == self.instance.primary_node,
10904 # Step: create new storage
10905 self.lu.LogStep(3, steps_total, "Allocate new storage")
10906 iv_names = self._CreateNewStorage(self.target_node)
10908 # Step: for each lv, detach+rename*2+attach
10909 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10910 for dev, old_lvs, new_lvs in iv_names.itervalues():
10911 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10913 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10915 result.Raise("Can't detach drbd from local storage on node"
10916 " %s for device %s" % (self.target_node, dev.iv_name))
10918 #cfg.Update(instance)
10920 # ok, we created the new LVs, so now we know we have the needed
10921 # storage; as such, we proceed on the target node to rename
10922 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10923 # using the assumption that logical_id == physical_id (which in
10924 # turn is the unique_id on that node)
10926 # FIXME(iustin): use a better name for the replaced LVs
10927 temp_suffix = int(time.time())
10928 ren_fn = lambda d, suff: (d.physical_id[0],
10929 d.physical_id[1] + "_replaced-%s" % suff)
10931 # Build the rename list based on what LVs exist on the node
10932 rename_old_to_new = []
10933 for to_ren in old_lvs:
10934 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10935 if not result.fail_msg and result.payload:
10937 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10939 self.lu.LogInfo("Renaming the old LVs on the target node")
10940 result = self.rpc.call_blockdev_rename(self.target_node,
10942 result.Raise("Can't rename old LVs on node %s" % self.target_node)
10944 # Now we rename the new LVs to the old LVs
10945 self.lu.LogInfo("Renaming the new LVs on the target node")
10946 rename_new_to_old = [(new, old.physical_id)
10947 for old, new in zip(old_lvs, new_lvs)]
10948 result = self.rpc.call_blockdev_rename(self.target_node,
10950 result.Raise("Can't rename new LVs on node %s" % self.target_node)
10952 # Intermediate steps of in memory modifications
10953 for old, new in zip(old_lvs, new_lvs):
10954 new.logical_id = old.logical_id
10955 self.cfg.SetDiskID(new, self.target_node)
10957 # We need to modify old_lvs so that removal later removes the
10958 # right LVs, not the newly added ones; note that old_lvs is a
10960 for disk in old_lvs:
10961 disk.logical_id = ren_fn(disk, temp_suffix)
10962 self.cfg.SetDiskID(disk, self.target_node)
10964 # Now that the new lvs have the old name, we can add them to the device
10965 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10966 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10968 msg = result.fail_msg
10970 for new_lv in new_lvs:
10971 msg2 = self.rpc.call_blockdev_remove(self.target_node,
10974 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10975 hint=("cleanup manually the unused logical"
10977 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10979 cstep = itertools.count(5)
10981 if self.early_release:
10982 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10983 self._RemoveOldStorage(self.target_node, iv_names)
10984 # TODO: Check if releasing locks early still makes sense
10985 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10987 # Release all resource locks except those used by the instance
10988 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10989 keep=self.node_secondary_ip.keys())
10991 # Release all node locks while waiting for sync
10992 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10994 # TODO: Can the instance lock be downgraded here? Take the optional disk
10995 # shutdown in the caller into consideration.
10998 # This can fail as the old devices are degraded and _WaitForSync
10999 # does a combined result over all disks, so we don't check its return value
11000 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11001 _WaitForSync(self.lu, self.instance)
11003 # Check all devices manually
11004 self._CheckDevices(self.instance.primary_node, iv_names)
11006 # Step: remove old storage
11007 if not self.early_release:
11008 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11009 self._RemoveOldStorage(self.target_node, iv_names)
11011 def _ExecDrbd8Secondary(self, feedback_fn):
11012 """Replace the secondary node for DRBD 8.
11014 The algorithm for replace is quite complicated:
11015 - for all disks of the instance:
11016 - create new LVs on the new node with same names
11017 - shutdown the drbd device on the old secondary
11018 - disconnect the drbd network on the primary
11019 - create the drbd device on the new secondary
11020 - network attach the drbd on the primary, using an artifice:
11021 the drbd code for Attach() will connect to the network if it
11022 finds a device which is connected to the good local disks but
11023 not network enabled
11024 - wait for sync across all devices
11025 - remove all disks from the old secondary
11027 Failures are not very well handled.
11032 pnode = self.instance.primary_node
11034 # Step: check device activation
11035 self.lu.LogStep(1, steps_total, "Check device existence")
11036 self._CheckDisksExistence([self.instance.primary_node])
11037 self._CheckVolumeGroup([self.instance.primary_node])
11039 # Step: check other node consistency
11040 self.lu.LogStep(2, steps_total, "Check peer consistency")
11041 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11043 # Step: create new storage
11044 self.lu.LogStep(3, steps_total, "Allocate new storage")
11045 for idx, dev in enumerate(self.instance.disks):
11046 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11047 (self.new_node, idx))
11048 # we pass force_create=True to force LVM creation
11049 for new_lv in dev.children:
11050 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
11051 _GetInstanceInfoText(self.instance), False)
11053 # Step 4: dbrd minors and drbd setups changes
11054 # after this, we must manually remove the drbd minors on both the
11055 # error and the success paths
11056 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11057 minors = self.cfg.AllocateDRBDMinor([self.new_node
11058 for dev in self.instance.disks],
11059 self.instance.name)
11060 logging.debug("Allocated minors %r", minors)
11063 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11064 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11065 (self.new_node, idx))
11066 # create new devices on new_node; note that we create two IDs:
11067 # one without port, so the drbd will be activated without
11068 # networking information on the new node at this stage, and one
11069 # with network, for the latter activation in step 4
11070 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11071 if self.instance.primary_node == o_node1:
11074 assert self.instance.primary_node == o_node2, "Three-node instance?"
11077 new_alone_id = (self.instance.primary_node, self.new_node, None,
11078 p_minor, new_minor, o_secret)
11079 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11080 p_minor, new_minor, o_secret)
11082 iv_names[idx] = (dev, dev.children, new_net_id)
11083 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11085 drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
11086 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11087 logical_id=new_alone_id,
11088 children=dev.children,
11090 params=drbd_params)
11092 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
11093 _GetInstanceInfoText(self.instance), False)
11094 except errors.GenericError:
11095 self.cfg.ReleaseDRBDMinors(self.instance.name)
11098 # We have new devices, shutdown the drbd on the old secondary
11099 for idx, dev in enumerate(self.instance.disks):
11100 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11101 self.cfg.SetDiskID(dev, self.target_node)
11102 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
11104 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11105 "node: %s" % (idx, msg),
11106 hint=("Please cleanup this device manually as"
11107 " soon as possible"))
11109 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11110 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11111 self.instance.disks)[pnode]
11113 msg = result.fail_msg
11115 # detaches didn't succeed (unlikely)
11116 self.cfg.ReleaseDRBDMinors(self.instance.name)
11117 raise errors.OpExecError("Can't detach the disks from the network on"
11118 " old node: %s" % (msg,))
11120 # if we managed to detach at least one, we update all the disks of
11121 # the instance to point to the new secondary
11122 self.lu.LogInfo("Updating instance configuration")
11123 for dev, _, new_logical_id in iv_names.itervalues():
11124 dev.logical_id = new_logical_id
11125 self.cfg.SetDiskID(dev, self.instance.primary_node)
11127 self.cfg.Update(self.instance, feedback_fn)
11129 # Release all node locks (the configuration has been updated)
11130 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11132 # and now perform the drbd attach
11133 self.lu.LogInfo("Attaching primary drbds to new secondary"
11134 " (standalone => connected)")
11135 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11137 self.node_secondary_ip,
11138 self.instance.disks,
11139 self.instance.name,
11141 for to_node, to_result in result.items():
11142 msg = to_result.fail_msg
11144 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11146 hint=("please do a gnt-instance info to see the"
11147 " status of disks"))
11149 cstep = itertools.count(5)
11151 if self.early_release:
11152 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11153 self._RemoveOldStorage(self.target_node, iv_names)
11154 # TODO: Check if releasing locks early still makes sense
11155 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11157 # Release all resource locks except those used by the instance
11158 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11159 keep=self.node_secondary_ip.keys())
11161 # TODO: Can the instance lock be downgraded here? Take the optional disk
11162 # shutdown in the caller into consideration.
11165 # This can fail as the old devices are degraded and _WaitForSync
11166 # does a combined result over all disks, so we don't check its return value
11167 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11168 _WaitForSync(self.lu, self.instance)
11170 # Check all devices manually
11171 self._CheckDevices(self.instance.primary_node, iv_names)
11173 # Step: remove old storage
11174 if not self.early_release:
11175 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11176 self._RemoveOldStorage(self.target_node, iv_names)
11179 class LURepairNodeStorage(NoHooksLU):
11180 """Repairs the volume group on a node.
11185 def CheckArguments(self):
11186 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11188 storage_type = self.op.storage_type
11190 if (constants.SO_FIX_CONSISTENCY not in
11191 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11192 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11193 " repaired" % storage_type,
11194 errors.ECODE_INVAL)
11196 def ExpandNames(self):
11197 self.needed_locks = {
11198 locking.LEVEL_NODE: [self.op.node_name],
11201 def _CheckFaultyDisks(self, instance, node_name):
11202 """Ensure faulty disks abort the opcode or at least warn."""
11204 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11206 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11207 " node '%s'" % (instance.name, node_name),
11208 errors.ECODE_STATE)
11209 except errors.OpPrereqError, err:
11210 if self.op.ignore_consistency:
11211 self.proc.LogWarning(str(err.args[0]))
11215 def CheckPrereq(self):
11216 """Check prerequisites.
11219 # Check whether any instance on this node has faulty disks
11220 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11221 if inst.admin_state != constants.ADMINST_UP:
11223 check_nodes = set(inst.all_nodes)
11224 check_nodes.discard(self.op.node_name)
11225 for inst_node_name in check_nodes:
11226 self._CheckFaultyDisks(inst, inst_node_name)
11228 def Exec(self, feedback_fn):
11229 feedback_fn("Repairing storage unit '%s' on %s ..." %
11230 (self.op.name, self.op.node_name))
11232 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11233 result = self.rpc.call_storage_execute(self.op.node_name,
11234 self.op.storage_type, st_args,
11236 constants.SO_FIX_CONSISTENCY)
11237 result.Raise("Failed to repair storage unit '%s' on %s" %
11238 (self.op.name, self.op.node_name))
11241 class LUNodeEvacuate(NoHooksLU):
11242 """Evacuates instances off a list of nodes.
11247 _MODE2IALLOCATOR = {
11248 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11249 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11250 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11252 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11253 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11254 constants.IALLOCATOR_NEVAC_MODES)
11256 def CheckArguments(self):
11257 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11259 def ExpandNames(self):
11260 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11262 if self.op.remote_node is not None:
11263 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11264 assert self.op.remote_node
11266 if self.op.remote_node == self.op.node_name:
11267 raise errors.OpPrereqError("Can not use evacuated node as a new"
11268 " secondary node", errors.ECODE_INVAL)
11270 if self.op.mode != constants.NODE_EVAC_SEC:
11271 raise errors.OpPrereqError("Without the use of an iallocator only"
11272 " secondary instances can be evacuated",
11273 errors.ECODE_INVAL)
11276 self.share_locks = _ShareAll()
11277 self.needed_locks = {
11278 locking.LEVEL_INSTANCE: [],
11279 locking.LEVEL_NODEGROUP: [],
11280 locking.LEVEL_NODE: [],
11283 # Determine nodes (via group) optimistically, needs verification once locks
11284 # have been acquired
11285 self.lock_nodes = self._DetermineNodes()
11287 def _DetermineNodes(self):
11288 """Gets the list of nodes to operate on.
11291 if self.op.remote_node is None:
11292 # Iallocator will choose any node(s) in the same group
11293 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11295 group_nodes = frozenset([self.op.remote_node])
11297 # Determine nodes to be locked
11298 return set([self.op.node_name]) | group_nodes
11300 def _DetermineInstances(self):
11301 """Builds list of instances to operate on.
11304 assert self.op.mode in constants.NODE_EVAC_MODES
11306 if self.op.mode == constants.NODE_EVAC_PRI:
11307 # Primary instances only
11308 inst_fn = _GetNodePrimaryInstances
11309 assert self.op.remote_node is None, \
11310 "Evacuating primary instances requires iallocator"
11311 elif self.op.mode == constants.NODE_EVAC_SEC:
11312 # Secondary instances only
11313 inst_fn = _GetNodeSecondaryInstances
11316 assert self.op.mode == constants.NODE_EVAC_ALL
11317 inst_fn = _GetNodeInstances
11318 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11320 raise errors.OpPrereqError("Due to an issue with the iallocator"
11321 " interface it is not possible to evacuate"
11322 " all instances at once; specify explicitly"
11323 " whether to evacuate primary or secondary"
11325 errors.ECODE_INVAL)
11327 return inst_fn(self.cfg, self.op.node_name)
11329 def DeclareLocks(self, level):
11330 if level == locking.LEVEL_INSTANCE:
11331 # Lock instances optimistically, needs verification once node and group
11332 # locks have been acquired
11333 self.needed_locks[locking.LEVEL_INSTANCE] = \
11334 set(i.name for i in self._DetermineInstances())
11336 elif level == locking.LEVEL_NODEGROUP:
11337 # Lock node groups for all potential target nodes optimistically, needs
11338 # verification once nodes have been acquired
11339 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11340 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11342 elif level == locking.LEVEL_NODE:
11343 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11345 def CheckPrereq(self):
11347 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11348 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11349 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11351 need_nodes = self._DetermineNodes()
11353 if not owned_nodes.issuperset(need_nodes):
11354 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11355 " locks were acquired, current nodes are"
11356 " are '%s', used to be '%s'; retry the"
11358 (self.op.node_name,
11359 utils.CommaJoin(need_nodes),
11360 utils.CommaJoin(owned_nodes)),
11361 errors.ECODE_STATE)
11363 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11364 if owned_groups != wanted_groups:
11365 raise errors.OpExecError("Node groups changed since locks were acquired,"
11366 " current groups are '%s', used to be '%s';"
11367 " retry the operation" %
11368 (utils.CommaJoin(wanted_groups),
11369 utils.CommaJoin(owned_groups)))
11371 # Determine affected instances
11372 self.instances = self._DetermineInstances()
11373 self.instance_names = [i.name for i in self.instances]
11375 if set(self.instance_names) != owned_instances:
11376 raise errors.OpExecError("Instances on node '%s' changed since locks"
11377 " were acquired, current instances are '%s',"
11378 " used to be '%s'; retry the operation" %
11379 (self.op.node_name,
11380 utils.CommaJoin(self.instance_names),
11381 utils.CommaJoin(owned_instances)))
11383 if self.instance_names:
11384 self.LogInfo("Evacuating instances from node '%s': %s",
11386 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11388 self.LogInfo("No instances to evacuate from node '%s'",
11391 if self.op.remote_node is not None:
11392 for i in self.instances:
11393 if i.primary_node == self.op.remote_node:
11394 raise errors.OpPrereqError("Node %s is the primary node of"
11395 " instance %s, cannot use it as"
11397 (self.op.remote_node, i.name),
11398 errors.ECODE_INVAL)
11400 def Exec(self, feedback_fn):
11401 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11403 if not self.instance_names:
11404 # No instances to evacuate
11407 elif self.op.iallocator is not None:
11408 # TODO: Implement relocation to other group
11409 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11410 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11411 instances=list(self.instance_names))
11413 ial.Run(self.op.iallocator)
11415 if not ial.success:
11416 raise errors.OpPrereqError("Can't compute node evacuation using"
11417 " iallocator '%s': %s" %
11418 (self.op.iallocator, ial.info),
11419 errors.ECODE_NORES)
11421 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11423 elif self.op.remote_node is not None:
11424 assert self.op.mode == constants.NODE_EVAC_SEC
11426 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11427 remote_node=self.op.remote_node,
11429 mode=constants.REPLACE_DISK_CHG,
11430 early_release=self.op.early_release)]
11431 for instance_name in self.instance_names
11435 raise errors.ProgrammerError("No iallocator or remote node")
11437 return ResultWithJobs(jobs)
11440 def _SetOpEarlyRelease(early_release, op):
11441 """Sets C{early_release} flag on opcodes if available.
11445 op.early_release = early_release
11446 except AttributeError:
11447 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11452 def _NodeEvacDest(use_nodes, group, nodes):
11453 """Returns group or nodes depending on caller's choice.
11457 return utils.CommaJoin(nodes)
11462 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11463 """Unpacks the result of change-group and node-evacuate iallocator requests.
11465 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11466 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11468 @type lu: L{LogicalUnit}
11469 @param lu: Logical unit instance
11470 @type alloc_result: tuple/list
11471 @param alloc_result: Result from iallocator
11472 @type early_release: bool
11473 @param early_release: Whether to release locks early if possible
11474 @type use_nodes: bool
11475 @param use_nodes: Whether to display node names instead of groups
11478 (moved, failed, jobs) = alloc_result
11481 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11482 for (name, reason) in failed)
11483 lu.LogWarning("Unable to evacuate instances %s", failreason)
11484 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11487 lu.LogInfo("Instances to be moved: %s",
11488 utils.CommaJoin("%s (to %s)" %
11489 (name, _NodeEvacDest(use_nodes, group, nodes))
11490 for (name, group, nodes) in moved))
11492 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11493 map(opcodes.OpCode.LoadOpCode, ops))
11497 class LUInstanceGrowDisk(LogicalUnit):
11498 """Grow a disk of an instance.
11501 HPATH = "disk-grow"
11502 HTYPE = constants.HTYPE_INSTANCE
11505 def ExpandNames(self):
11506 self._ExpandAndLockInstance()
11507 self.needed_locks[locking.LEVEL_NODE] = []
11508 self.needed_locks[locking.LEVEL_NODE_RES] = []
11509 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11510 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11512 def DeclareLocks(self, level):
11513 if level == locking.LEVEL_NODE:
11514 self._LockInstancesNodes()
11515 elif level == locking.LEVEL_NODE_RES:
11517 self.needed_locks[locking.LEVEL_NODE_RES] = \
11518 self.needed_locks[locking.LEVEL_NODE][:]
11520 def BuildHooksEnv(self):
11521 """Build hooks env.
11523 This runs on the master, the primary and all the secondaries.
11527 "DISK": self.op.disk,
11528 "AMOUNT": self.op.amount,
11530 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11533 def BuildHooksNodes(self):
11534 """Build hooks nodes.
11537 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11540 def CheckPrereq(self):
11541 """Check prerequisites.
11543 This checks that the instance is in the cluster.
11546 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11547 assert instance is not None, \
11548 "Cannot retrieve locked instance %s" % self.op.instance_name
11549 nodenames = list(instance.all_nodes)
11550 for node in nodenames:
11551 _CheckNodeOnline(self, node)
11553 self.instance = instance
11555 if instance.disk_template not in constants.DTS_GROWABLE:
11556 raise errors.OpPrereqError("Instance's disk layout does not support"
11557 " growing", errors.ECODE_INVAL)
11559 self.disk = instance.FindDisk(self.op.disk)
11561 if instance.disk_template not in (constants.DT_FILE,
11562 constants.DT_SHARED_FILE,
11564 # TODO: check the free disk space for file, when that feature will be
11566 _CheckNodesFreeDiskPerVG(self, nodenames,
11567 self.disk.ComputeGrowth(self.op.amount))
11569 def Exec(self, feedback_fn):
11570 """Execute disk grow.
11573 instance = self.instance
11576 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11577 assert (self.owned_locks(locking.LEVEL_NODE) ==
11578 self.owned_locks(locking.LEVEL_NODE_RES))
11580 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11582 raise errors.OpExecError("Cannot activate block device to grow")
11584 feedback_fn("Growing disk %s of instance '%s' by %s" %
11585 (self.op.disk, instance.name,
11586 utils.FormatUnit(self.op.amount, "h")))
11588 # First run all grow ops in dry-run mode
11589 for node in instance.all_nodes:
11590 self.cfg.SetDiskID(disk, node)
11591 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11592 result.Raise("Grow request failed to node %s" % node)
11594 # We know that (as far as we can test) operations across different
11595 # nodes will succeed, time to run it for real
11596 for node in instance.all_nodes:
11597 self.cfg.SetDiskID(disk, node)
11598 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11599 result.Raise("Grow request failed to node %s" % node)
11601 # TODO: Rewrite code to work properly
11602 # DRBD goes into sync mode for a short amount of time after executing the
11603 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11604 # calling "resize" in sync mode fails. Sleeping for a short amount of
11605 # time is a work-around.
11608 disk.RecordGrow(self.op.amount)
11609 self.cfg.Update(instance, feedback_fn)
11611 # Changes have been recorded, release node lock
11612 _ReleaseLocks(self, locking.LEVEL_NODE)
11614 # Downgrade lock while waiting for sync
11615 self.glm.downgrade(locking.LEVEL_INSTANCE)
11617 if self.op.wait_for_sync:
11618 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11620 self.proc.LogWarning("Disk sync-ing has not returned a good"
11621 " status; please check the instance")
11622 if instance.admin_state != constants.ADMINST_UP:
11623 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11624 elif instance.admin_state != constants.ADMINST_UP:
11625 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11626 " not supposed to be running because no wait for"
11627 " sync mode was requested")
11629 assert self.owned_locks(locking.LEVEL_NODE_RES)
11630 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11633 class LUInstanceQueryData(NoHooksLU):
11634 """Query runtime instance data.
11639 def ExpandNames(self):
11640 self.needed_locks = {}
11642 # Use locking if requested or when non-static information is wanted
11643 if not (self.op.static or self.op.use_locking):
11644 self.LogWarning("Non-static data requested, locks need to be acquired")
11645 self.op.use_locking = True
11647 if self.op.instances or not self.op.use_locking:
11648 # Expand instance names right here
11649 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11651 # Will use acquired locks
11652 self.wanted_names = None
11654 if self.op.use_locking:
11655 self.share_locks = _ShareAll()
11657 if self.wanted_names is None:
11658 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11660 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11662 self.needed_locks[locking.LEVEL_NODE] = []
11663 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11665 def DeclareLocks(self, level):
11666 if self.op.use_locking and level == locking.LEVEL_NODE:
11667 self._LockInstancesNodes()
11669 def CheckPrereq(self):
11670 """Check prerequisites.
11672 This only checks the optional instance list against the existing names.
11675 if self.wanted_names is None:
11676 assert self.op.use_locking, "Locking was not used"
11677 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11679 self.wanted_instances = \
11680 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11682 def _ComputeBlockdevStatus(self, node, instance_name, dev):
11683 """Returns the status of a block device
11686 if self.op.static or not node:
11689 self.cfg.SetDiskID(dev, node)
11691 result = self.rpc.call_blockdev_find(node, dev)
11695 result.Raise("Can't compute disk status for %s" % instance_name)
11697 status = result.payload
11701 return (status.dev_path, status.major, status.minor,
11702 status.sync_percent, status.estimated_time,
11703 status.is_degraded, status.ldisk_status)
11705 def _ComputeDiskStatus(self, instance, snode, dev):
11706 """Compute block device status.
11709 if dev.dev_type in constants.LDS_DRBD:
11710 # we change the snode then (otherwise we use the one passed in)
11711 if dev.logical_id[0] == instance.primary_node:
11712 snode = dev.logical_id[1]
11714 snode = dev.logical_id[0]
11716 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11717 instance.name, dev)
11718 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11721 dev_children = map(compat.partial(self._ComputeDiskStatus,
11728 "iv_name": dev.iv_name,
11729 "dev_type": dev.dev_type,
11730 "logical_id": dev.logical_id,
11731 "physical_id": dev.physical_id,
11732 "pstatus": dev_pstatus,
11733 "sstatus": dev_sstatus,
11734 "children": dev_children,
11739 def Exec(self, feedback_fn):
11740 """Gather and return data"""
11743 cluster = self.cfg.GetClusterInfo()
11745 pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11746 for i in self.wanted_instances)
11747 for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11748 if self.op.static or pnode.offline:
11749 remote_state = None
11751 self.LogWarning("Primary node %s is marked offline, returning static"
11752 " information only for instance %s" %
11753 (pnode.name, instance.name))
11755 remote_info = self.rpc.call_instance_info(instance.primary_node,
11757 instance.hypervisor)
11758 remote_info.Raise("Error checking node %s" % instance.primary_node)
11759 remote_info = remote_info.payload
11760 if remote_info and "state" in remote_info:
11761 remote_state = "up"
11763 if instance.admin_state == constants.ADMINST_UP:
11764 remote_state = "down"
11766 remote_state = instance.admin_state
11768 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11771 result[instance.name] = {
11772 "name": instance.name,
11773 "config_state": instance.admin_state,
11774 "run_state": remote_state,
11775 "pnode": instance.primary_node,
11776 "snodes": instance.secondary_nodes,
11778 # this happens to be the same format used for hooks
11779 "nics": _NICListToTuple(self, instance.nics),
11780 "disk_template": instance.disk_template,
11782 "hypervisor": instance.hypervisor,
11783 "network_port": instance.network_port,
11784 "hv_instance": instance.hvparams,
11785 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11786 "be_instance": instance.beparams,
11787 "be_actual": cluster.FillBE(instance),
11788 "os_instance": instance.osparams,
11789 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11790 "serial_no": instance.serial_no,
11791 "mtime": instance.mtime,
11792 "ctime": instance.ctime,
11793 "uuid": instance.uuid,
11799 class LUInstanceSetParams(LogicalUnit):
11800 """Modifies an instances's parameters.
11803 HPATH = "instance-modify"
11804 HTYPE = constants.HTYPE_INSTANCE
11807 def CheckArguments(self):
11808 if not (self.op.nics or self.op.disks or self.op.disk_template or
11809 self.op.hvparams or self.op.beparams or self.op.os_name or
11810 self.op.online_inst or self.op.offline_inst or
11811 self.op.runtime_mem):
11812 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
11814 if self.op.hvparams:
11815 _CheckGlobalHvParams(self.op.hvparams)
11819 for disk_op, disk_dict in self.op.disks:
11820 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
11821 if disk_op == constants.DDM_REMOVE:
11822 disk_addremove += 1
11824 elif disk_op == constants.DDM_ADD:
11825 disk_addremove += 1
11827 if not isinstance(disk_op, int):
11828 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11829 if not isinstance(disk_dict, dict):
11830 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11831 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11833 if disk_op == constants.DDM_ADD:
11834 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11835 if mode not in constants.DISK_ACCESS_SET:
11836 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11837 errors.ECODE_INVAL)
11838 size = disk_dict.get(constants.IDISK_SIZE, None)
11840 raise errors.OpPrereqError("Required disk parameter size missing",
11841 errors.ECODE_INVAL)
11844 except (TypeError, ValueError), err:
11845 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11846 str(err), errors.ECODE_INVAL)
11847 disk_dict[constants.IDISK_SIZE] = size
11849 # modification of disk
11850 if constants.IDISK_SIZE in disk_dict:
11851 raise errors.OpPrereqError("Disk size change not possible, use"
11852 " grow-disk", errors.ECODE_INVAL)
11854 if disk_addremove > 1:
11855 raise errors.OpPrereqError("Only one disk add or remove operation"
11856 " supported at a time", errors.ECODE_INVAL)
11858 if self.op.disks and self.op.disk_template is not None:
11859 raise errors.OpPrereqError("Disk template conversion and other disk"
11860 " changes not supported at the same time",
11861 errors.ECODE_INVAL)
11863 if (self.op.disk_template and
11864 self.op.disk_template in constants.DTS_INT_MIRROR and
11865 self.op.remote_node is None):
11866 raise errors.OpPrereqError("Changing the disk template to a mirrored"
11867 " one requires specifying a secondary node",
11868 errors.ECODE_INVAL)
11872 for nic_op, nic_dict in self.op.nics:
11873 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11874 if nic_op == constants.DDM_REMOVE:
11877 elif nic_op == constants.DDM_ADD:
11880 if not isinstance(nic_op, int):
11881 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11882 if not isinstance(nic_dict, dict):
11883 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11884 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11886 # nic_dict should be a dict
11887 nic_ip = nic_dict.get(constants.INIC_IP, None)
11888 if nic_ip is not None:
11889 if nic_ip.lower() == constants.VALUE_NONE:
11890 nic_dict[constants.INIC_IP] = None
11892 if not netutils.IPAddress.IsValid(nic_ip):
11893 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11894 errors.ECODE_INVAL)
11896 nic_bridge = nic_dict.get("bridge", None)
11897 nic_link = nic_dict.get(constants.INIC_LINK, None)
11898 if nic_bridge and nic_link:
11899 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11900 " at the same time", errors.ECODE_INVAL)
11901 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11902 nic_dict["bridge"] = None
11903 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11904 nic_dict[constants.INIC_LINK] = None
11906 if nic_op == constants.DDM_ADD:
11907 nic_mac = nic_dict.get(constants.INIC_MAC, None)
11908 if nic_mac is None:
11909 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11911 if constants.INIC_MAC in nic_dict:
11912 nic_mac = nic_dict[constants.INIC_MAC]
11913 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11914 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11916 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11917 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11918 " modifying an existing nic",
11919 errors.ECODE_INVAL)
11921 if nic_addremove > 1:
11922 raise errors.OpPrereqError("Only one NIC add or remove operation"
11923 " supported at a time", errors.ECODE_INVAL)
11925 def ExpandNames(self):
11926 self._ExpandAndLockInstance()
11927 # Can't even acquire node locks in shared mode as upcoming changes in
11928 # Ganeti 2.6 will start to modify the node object on disk conversion
11929 self.needed_locks[locking.LEVEL_NODE] = []
11930 self.needed_locks[locking.LEVEL_NODE_RES] = []
11931 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11933 def DeclareLocks(self, level):
11934 if level == locking.LEVEL_NODE:
11935 self._LockInstancesNodes()
11936 if self.op.disk_template and self.op.remote_node:
11937 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11938 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11939 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11941 self.needed_locks[locking.LEVEL_NODE_RES] = \
11942 self.needed_locks[locking.LEVEL_NODE][:]
11944 def BuildHooksEnv(self):
11945 """Build hooks env.
11947 This runs on the master, primary and secondaries.
11951 if constants.BE_MINMEM in self.be_new:
11952 args["minmem"] = self.be_new[constants.BE_MINMEM]
11953 if constants.BE_MAXMEM in self.be_new:
11954 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
11955 if constants.BE_VCPUS in self.be_new:
11956 args["vcpus"] = self.be_new[constants.BE_VCPUS]
11957 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11958 # information at all.
11961 nic_override = dict(self.op.nics)
11962 for idx, nic in enumerate(self.instance.nics):
11963 if idx in nic_override:
11964 this_nic_override = nic_override[idx]
11966 this_nic_override = {}
11967 if constants.INIC_IP in this_nic_override:
11968 ip = this_nic_override[constants.INIC_IP]
11971 if constants.INIC_MAC in this_nic_override:
11972 mac = this_nic_override[constants.INIC_MAC]
11975 if idx in self.nic_pnew:
11976 nicparams = self.nic_pnew[idx]
11978 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11979 mode = nicparams[constants.NIC_MODE]
11980 link = nicparams[constants.NIC_LINK]
11981 args["nics"].append((ip, mac, mode, link))
11982 if constants.DDM_ADD in nic_override:
11983 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11984 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11985 nicparams = self.nic_pnew[constants.DDM_ADD]
11986 mode = nicparams[constants.NIC_MODE]
11987 link = nicparams[constants.NIC_LINK]
11988 args["nics"].append((ip, mac, mode, link))
11989 elif constants.DDM_REMOVE in nic_override:
11990 del args["nics"][-1]
11992 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11993 if self.op.disk_template:
11994 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11995 if self.op.runtime_mem:
11996 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12000 def BuildHooksNodes(self):
12001 """Build hooks nodes.
12004 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12007 def CheckPrereq(self):
12008 """Check prerequisites.
12010 This only checks the instance list against the existing names.
12013 # checking the new params on the primary/secondary nodes
12015 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12016 cluster = self.cluster = self.cfg.GetClusterInfo()
12017 assert self.instance is not None, \
12018 "Cannot retrieve locked instance %s" % self.op.instance_name
12019 pnode = instance.primary_node
12020 nodelist = list(instance.all_nodes)
12021 pnode_info = self.cfg.GetNodeInfo(pnode)
12022 self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
12025 if self.op.os_name and not self.op.force:
12026 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12027 self.op.force_variant)
12028 instance_os = self.op.os_name
12030 instance_os = instance.os
12032 if self.op.disk_template:
12033 if instance.disk_template == self.op.disk_template:
12034 raise errors.OpPrereqError("Instance already has disk template %s" %
12035 instance.disk_template, errors.ECODE_INVAL)
12037 if (instance.disk_template,
12038 self.op.disk_template) not in self._DISK_CONVERSIONS:
12039 raise errors.OpPrereqError("Unsupported disk template conversion from"
12040 " %s to %s" % (instance.disk_template,
12041 self.op.disk_template),
12042 errors.ECODE_INVAL)
12043 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12044 msg="cannot change disk template")
12045 if self.op.disk_template in constants.DTS_INT_MIRROR:
12046 if self.op.remote_node == pnode:
12047 raise errors.OpPrereqError("Given new secondary node %s is the same"
12048 " as the primary node of the instance" %
12049 self.op.remote_node, errors.ECODE_STATE)
12050 _CheckNodeOnline(self, self.op.remote_node)
12051 _CheckNodeNotDrained(self, self.op.remote_node)
12052 # FIXME: here we assume that the old instance type is DT_PLAIN
12053 assert instance.disk_template == constants.DT_PLAIN
12054 disks = [{constants.IDISK_SIZE: d.size,
12055 constants.IDISK_VG: d.logical_id[0]}
12056 for d in instance.disks]
12057 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12058 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12060 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12061 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12062 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12063 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12064 ignore=self.op.ignore_ipolicy)
12065 if pnode_info.group != snode_info.group:
12066 self.LogWarning("The primary and secondary nodes are in two"
12067 " different node groups; the disk parameters"
12068 " from the first disk's node group will be"
12071 # hvparams processing
12072 if self.op.hvparams:
12073 hv_type = instance.hypervisor
12074 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12075 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12076 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12079 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12080 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12081 self.hv_proposed = self.hv_new = hv_new # the new actual values
12082 self.hv_inst = i_hvdict # the new dict (without defaults)
12084 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12086 self.hv_new = self.hv_inst = {}
12088 # beparams processing
12089 if self.op.beparams:
12090 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12092 objects.UpgradeBeParams(i_bedict)
12093 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12094 be_new = cluster.SimpleFillBE(i_bedict)
12095 self.be_proposed = self.be_new = be_new # the new actual values
12096 self.be_inst = i_bedict # the new dict (without defaults)
12098 self.be_new = self.be_inst = {}
12099 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12100 be_old = cluster.FillBE(instance)
12102 # CPU param validation -- checking every time a paramtere is
12103 # changed to cover all cases where either CPU mask or vcpus have
12105 if (constants.BE_VCPUS in self.be_proposed and
12106 constants.HV_CPU_MASK in self.hv_proposed):
12108 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12109 # Verify mask is consistent with number of vCPUs. Can skip this
12110 # test if only 1 entry in the CPU mask, which means same mask
12111 # is applied to all vCPUs.
12112 if (len(cpu_list) > 1 and
12113 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12114 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12116 (self.be_proposed[constants.BE_VCPUS],
12117 self.hv_proposed[constants.HV_CPU_MASK]),
12118 errors.ECODE_INVAL)
12120 # Only perform this test if a new CPU mask is given
12121 if constants.HV_CPU_MASK in self.hv_new:
12122 # Calculate the largest CPU number requested
12123 max_requested_cpu = max(map(max, cpu_list))
12124 # Check that all of the instance's nodes have enough physical CPUs to
12125 # satisfy the requested CPU mask
12126 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12127 max_requested_cpu + 1, instance.hypervisor)
12129 # osparams processing
12130 if self.op.osparams:
12131 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12132 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12133 self.os_inst = i_osdict # the new dict (without defaults)
12139 #TODO(dynmem): do the appropriate check involving MINMEM
12140 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12141 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12142 mem_check_list = [pnode]
12143 if be_new[constants.BE_AUTO_BALANCE]:
12144 # either we changed auto_balance to yes or it was from before
12145 mem_check_list.extend(instance.secondary_nodes)
12146 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12147 instance.hypervisor)
12148 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12149 [instance.hypervisor])
12150 pninfo = nodeinfo[pnode]
12151 msg = pninfo.fail_msg
12153 # Assume the primary node is unreachable and go ahead
12154 self.warn.append("Can't get info from primary node %s: %s" %
12157 (_, _, (pnhvinfo, )) = pninfo.payload
12158 if not isinstance(pnhvinfo.get("memory_free", None), int):
12159 self.warn.append("Node data from primary node %s doesn't contain"
12160 " free memory information" % pnode)
12161 elif instance_info.fail_msg:
12162 self.warn.append("Can't get instance runtime information: %s" %
12163 instance_info.fail_msg)
12165 if instance_info.payload:
12166 current_mem = int(instance_info.payload["memory"])
12168 # Assume instance not running
12169 # (there is a slight race condition here, but it's not very
12170 # probable, and we have no other way to check)
12171 # TODO: Describe race condition
12173 #TODO(dynmem): do the appropriate check involving MINMEM
12174 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12175 pnhvinfo["memory_free"])
12177 raise errors.OpPrereqError("This change will prevent the instance"
12178 " from starting, due to %d MB of memory"
12179 " missing on its primary node" %
12181 errors.ECODE_NORES)
12183 if be_new[constants.BE_AUTO_BALANCE]:
12184 for node, nres in nodeinfo.items():
12185 if node not in instance.secondary_nodes:
12187 nres.Raise("Can't get info from secondary node %s" % node,
12188 prereq=True, ecode=errors.ECODE_STATE)
12189 (_, _, (nhvinfo, )) = nres.payload
12190 if not isinstance(nhvinfo.get("memory_free", None), int):
12191 raise errors.OpPrereqError("Secondary node %s didn't return free"
12192 " memory information" % node,
12193 errors.ECODE_STATE)
12194 #TODO(dynmem): do the appropriate check involving MINMEM
12195 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12196 raise errors.OpPrereqError("This change will prevent the instance"
12197 " from failover to its secondary node"
12198 " %s, due to not enough memory" % node,
12199 errors.ECODE_STATE)
12201 if self.op.runtime_mem:
12202 remote_info = self.rpc.call_instance_info(instance.primary_node,
12204 instance.hypervisor)
12205 remote_info.Raise("Error checking node %s" % instance.primary_node)
12206 if not remote_info.payload: # not running already
12207 raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12208 errors.ECODE_STATE)
12210 current_memory = remote_info.payload["memory"]
12211 if (not self.op.force and
12212 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12213 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12214 raise errors.OpPrereqError("Instance %s must have memory between %d"
12215 " and %d MB of memory unless --force is"
12216 " given" % (instance.name,
12217 self.be_proposed[constants.BE_MINMEM],
12218 self.be_proposed[constants.BE_MAXMEM]),
12219 errors.ECODE_INVAL)
12221 if self.op.runtime_mem > current_memory:
12222 _CheckNodeFreeMemory(self, instance.primary_node,
12223 "ballooning memory for instance %s" %
12225 self.op.memory - current_memory,
12226 instance.hypervisor)
12230 self.nic_pinst = {}
12231 for nic_op, nic_dict in self.op.nics:
12232 if nic_op == constants.DDM_REMOVE:
12233 if not instance.nics:
12234 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
12235 errors.ECODE_INVAL)
12237 if nic_op != constants.DDM_ADD:
12239 if not instance.nics:
12240 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
12241 " no NICs" % nic_op,
12242 errors.ECODE_INVAL)
12243 if nic_op < 0 or nic_op >= len(instance.nics):
12244 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
12246 (nic_op, len(instance.nics) - 1),
12247 errors.ECODE_INVAL)
12248 old_nic_params = instance.nics[nic_op].nicparams
12249 old_nic_ip = instance.nics[nic_op].ip
12251 old_nic_params = {}
12254 update_params_dict = dict([(key, nic_dict[key])
12255 for key in constants.NICS_PARAMETERS
12256 if key in nic_dict])
12258 if "bridge" in nic_dict:
12259 update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
12261 new_nic_params = _GetUpdatedParams(old_nic_params,
12262 update_params_dict)
12263 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
12264 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
12265 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
12266 self.nic_pinst[nic_op] = new_nic_params
12267 self.nic_pnew[nic_op] = new_filled_nic_params
12268 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
12270 if new_nic_mode == constants.NIC_MODE_BRIDGED:
12271 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
12272 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
12274 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
12276 self.warn.append(msg)
12278 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12279 if new_nic_mode == constants.NIC_MODE_ROUTED:
12280 if constants.INIC_IP in nic_dict:
12281 nic_ip = nic_dict[constants.INIC_IP]
12283 nic_ip = old_nic_ip
12285 raise errors.OpPrereqError("Cannot set the nic ip to None"
12286 " on a routed nic", errors.ECODE_INVAL)
12287 if constants.INIC_MAC in nic_dict:
12288 nic_mac = nic_dict[constants.INIC_MAC]
12289 if nic_mac is None:
12290 raise errors.OpPrereqError("Cannot set the nic mac to None",
12291 errors.ECODE_INVAL)
12292 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12293 # otherwise generate the mac
12294 nic_dict[constants.INIC_MAC] = \
12295 self.cfg.GenerateMAC(self.proc.GetECId())
12297 # or validate/reserve the current one
12299 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
12300 except errors.ReservationError:
12301 raise errors.OpPrereqError("MAC address %s already in use"
12302 " in cluster" % nic_mac,
12303 errors.ECODE_NOTUNIQUE)
12306 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12307 raise errors.OpPrereqError("Disk operations not supported for"
12308 " diskless instances",
12309 errors.ECODE_INVAL)
12310 for disk_op, _ in self.op.disks:
12311 if disk_op == constants.DDM_REMOVE:
12312 if len(instance.disks) == 1:
12313 raise errors.OpPrereqError("Cannot remove the last disk of"
12314 " an instance", errors.ECODE_INVAL)
12315 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12316 msg="cannot remove disks")
12318 if (disk_op == constants.DDM_ADD and
12319 len(instance.disks) >= constants.MAX_DISKS):
12320 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
12321 " add more" % constants.MAX_DISKS,
12322 errors.ECODE_STATE)
12323 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
12325 if disk_op < 0 or disk_op >= len(instance.disks):
12326 raise errors.OpPrereqError("Invalid disk index %s, valid values"
12328 (disk_op, len(instance.disks)),
12329 errors.ECODE_INVAL)
12331 # disabling the instance
12332 if self.op.offline_inst:
12333 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12334 msg="cannot change instance state to offline")
12336 # enabling the instance
12337 if self.op.online_inst:
12338 _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
12339 msg="cannot make instance go online")
12341 def _ConvertPlainToDrbd(self, feedback_fn):
12342 """Converts an instance from plain to drbd.
12345 feedback_fn("Converting template to drbd")
12346 instance = self.instance
12347 pnode = instance.primary_node
12348 snode = self.op.remote_node
12350 assert instance.disk_template == constants.DT_PLAIN
12352 # create a fake disk info for _GenerateDiskTemplate
12353 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12354 constants.IDISK_VG: d.logical_id[0]}
12355 for d in instance.disks]
12356 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12357 instance.name, pnode, [snode],
12358 disk_info, None, None, 0, feedback_fn,
12360 info = _GetInstanceInfoText(instance)
12361 feedback_fn("Creating aditional volumes...")
12362 # first, create the missing data and meta devices
12363 for disk in new_disks:
12364 # unfortunately this is... not too nice
12365 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12367 for child in disk.children:
12368 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12369 # at this stage, all new LVs have been created, we can rename the
12371 feedback_fn("Renaming original volumes...")
12372 rename_list = [(o, n.children[0].logical_id)
12373 for (o, n) in zip(instance.disks, new_disks)]
12374 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12375 result.Raise("Failed to rename original LVs")
12377 feedback_fn("Initializing DRBD devices...")
12378 # all child devices are in place, we can now create the DRBD devices
12379 for disk in new_disks:
12380 for node in [pnode, snode]:
12381 f_create = node == pnode
12382 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12384 # at this point, the instance has been modified
12385 instance.disk_template = constants.DT_DRBD8
12386 instance.disks = new_disks
12387 self.cfg.Update(instance, feedback_fn)
12389 # Release node locks while waiting for sync
12390 _ReleaseLocks(self, locking.LEVEL_NODE)
12392 # disks are created, waiting for sync
12393 disk_abort = not _WaitForSync(self, instance,
12394 oneshot=not self.op.wait_for_sync)
12396 raise errors.OpExecError("There are some degraded disks for"
12397 " this instance, please cleanup manually")
12399 # Node resource locks will be released by caller
12401 def _ConvertDrbdToPlain(self, feedback_fn):
12402 """Converts an instance from drbd to plain.
12405 instance = self.instance
12407 assert len(instance.secondary_nodes) == 1
12408 assert instance.disk_template == constants.DT_DRBD8
12410 pnode = instance.primary_node
12411 snode = instance.secondary_nodes[0]
12412 feedback_fn("Converting template to plain")
12414 old_disks = instance.disks
12415 new_disks = [d.children[0] for d in old_disks]
12417 # copy over size and mode
12418 for parent, child in zip(old_disks, new_disks):
12419 child.size = parent.size
12420 child.mode = parent.mode
12422 # update instance structure
12423 instance.disks = new_disks
12424 instance.disk_template = constants.DT_PLAIN
12425 self.cfg.Update(instance, feedback_fn)
12427 # Release locks in case removing disks takes a while
12428 _ReleaseLocks(self, locking.LEVEL_NODE)
12430 feedback_fn("Removing volumes on the secondary node...")
12431 for disk in old_disks:
12432 self.cfg.SetDiskID(disk, snode)
12433 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12435 self.LogWarning("Could not remove block device %s on node %s,"
12436 " continuing anyway: %s", disk.iv_name, snode, msg)
12438 feedback_fn("Removing unneeded volumes on the primary node...")
12439 for idx, disk in enumerate(old_disks):
12440 meta = disk.children[1]
12441 self.cfg.SetDiskID(meta, pnode)
12442 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12444 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12445 " continuing anyway: %s", idx, pnode, msg)
12447 # this is a DRBD disk, return its port to the pool
12448 for disk in old_disks:
12449 tcp_port = disk.logical_id[2]
12450 self.cfg.AddTcpUdpPort(tcp_port)
12452 # Node resource locks will be released by caller
12454 def Exec(self, feedback_fn):
12455 """Modifies an instance.
12457 All parameters take effect only at the next restart of the instance.
12460 # Process here the warnings from CheckPrereq, as we don't have a
12461 # feedback_fn there.
12462 for warn in self.warn:
12463 feedback_fn("WARNING: %s" % warn)
12465 assert ((self.op.disk_template is None) ^
12466 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12467 "Not owning any node resource locks"
12470 instance = self.instance
12473 if self.op.runtime_mem:
12474 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12476 self.op.runtime_mem)
12477 rpcres.Raise("Cannot modify instance runtime memory")
12478 result.append(("runtime_memory", self.op.runtime_mem))
12481 for disk_op, disk_dict in self.op.disks:
12482 if disk_op == constants.DDM_REMOVE:
12483 # remove the last disk
12484 device = instance.disks.pop()
12485 device_idx = len(instance.disks)
12486 for node, disk in device.ComputeNodeTree(instance.primary_node):
12487 self.cfg.SetDiskID(disk, node)
12488 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12490 self.LogWarning("Could not remove disk/%d on node %s: %s,"
12491 " continuing anyway", device_idx, node, msg)
12492 result.append(("disk/%d" % device_idx, "remove"))
12494 # if this is a DRBD disk, return its port to the pool
12495 if device.dev_type in constants.LDS_DRBD:
12496 tcp_port = device.logical_id[2]
12497 self.cfg.AddTcpUdpPort(tcp_port)
12498 elif disk_op == constants.DDM_ADD:
12500 if instance.disk_template in (constants.DT_FILE,
12501 constants.DT_SHARED_FILE):
12502 file_driver, file_path = instance.disks[0].logical_id
12503 file_path = os.path.dirname(file_path)
12505 file_driver = file_path = None
12506 disk_idx_base = len(instance.disks)
12507 new_disk = _GenerateDiskTemplate(self,
12508 instance.disk_template,
12509 instance.name, instance.primary_node,
12510 instance.secondary_nodes,
12516 self.diskparams)[0]
12517 instance.disks.append(new_disk)
12518 info = _GetInstanceInfoText(instance)
12520 logging.info("Creating volume %s for instance %s",
12521 new_disk.iv_name, instance.name)
12522 # Note: this needs to be kept in sync with _CreateDisks
12524 for node in instance.all_nodes:
12525 f_create = node == instance.primary_node
12527 _CreateBlockDev(self, node, instance, new_disk,
12528 f_create, info, f_create)
12529 except errors.OpExecError, err:
12530 self.LogWarning("Failed to create volume %s (%s) on"
12532 new_disk.iv_name, new_disk, node, err)
12533 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
12534 (new_disk.size, new_disk.mode)))
12536 # change a given disk
12537 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
12538 result.append(("disk.mode/%d" % disk_op,
12539 disk_dict[constants.IDISK_MODE]))
12541 if self.op.disk_template:
12543 check_nodes = set(instance.all_nodes)
12544 if self.op.remote_node:
12545 check_nodes.add(self.op.remote_node)
12546 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12547 owned = self.owned_locks(level)
12548 assert not (check_nodes - owned), \
12549 ("Not owning the correct locks, owning %r, expected at least %r" %
12550 (owned, check_nodes))
12552 r_shut = _ShutdownInstanceDisks(self, instance)
12554 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12555 " proceed with disk template conversion")
12556 mode = (instance.disk_template, self.op.disk_template)
12558 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12560 self.cfg.ReleaseDRBDMinors(instance.name)
12562 result.append(("disk_template", self.op.disk_template))
12564 assert instance.disk_template == self.op.disk_template, \
12565 ("Expected disk template '%s', found '%s'" %
12566 (self.op.disk_template, instance.disk_template))
12568 # Release node and resource locks if there are any (they might already have
12569 # been released during disk conversion)
12570 _ReleaseLocks(self, locking.LEVEL_NODE)
12571 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12574 for nic_op, nic_dict in self.op.nics:
12575 if nic_op == constants.DDM_REMOVE:
12576 # remove the last nic
12577 del instance.nics[-1]
12578 result.append(("nic.%d" % len(instance.nics), "remove"))
12579 elif nic_op == constants.DDM_ADD:
12580 # mac and bridge should be set, by now
12581 mac = nic_dict[constants.INIC_MAC]
12582 ip = nic_dict.get(constants.INIC_IP, None)
12583 nicparams = self.nic_pinst[constants.DDM_ADD]
12584 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
12585 instance.nics.append(new_nic)
12586 result.append(("nic.%d" % (len(instance.nics) - 1),
12587 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12588 (new_nic.mac, new_nic.ip,
12589 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
12590 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
12593 for key in (constants.INIC_MAC, constants.INIC_IP):
12594 if key in nic_dict:
12595 setattr(instance.nics[nic_op], key, nic_dict[key])
12596 if nic_op in self.nic_pinst:
12597 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
12598 for key, val in nic_dict.iteritems():
12599 result.append(("nic.%s/%d" % (key, nic_op), val))
12602 if self.op.hvparams:
12603 instance.hvparams = self.hv_inst
12604 for key, val in self.op.hvparams.iteritems():
12605 result.append(("hv/%s" % key, val))
12608 if self.op.beparams:
12609 instance.beparams = self.be_inst
12610 for key, val in self.op.beparams.iteritems():
12611 result.append(("be/%s" % key, val))
12614 if self.op.os_name:
12615 instance.os = self.op.os_name
12618 if self.op.osparams:
12619 instance.osparams = self.os_inst
12620 for key, val in self.op.osparams.iteritems():
12621 result.append(("os/%s" % key, val))
12623 # online/offline instance
12624 if self.op.online_inst:
12625 self.cfg.MarkInstanceDown(instance.name)
12626 result.append(("admin_state", constants.ADMINST_DOWN))
12627 if self.op.offline_inst:
12628 self.cfg.MarkInstanceOffline(instance.name)
12629 result.append(("admin_state", constants.ADMINST_OFFLINE))
12631 self.cfg.Update(instance, feedback_fn)
12633 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12634 self.owned_locks(locking.LEVEL_NODE)), \
12635 "All node locks should have been released by now"
12639 _DISK_CONVERSIONS = {
12640 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12641 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12645 class LUInstanceChangeGroup(LogicalUnit):
12646 HPATH = "instance-change-group"
12647 HTYPE = constants.HTYPE_INSTANCE
12650 def ExpandNames(self):
12651 self.share_locks = _ShareAll()
12652 self.needed_locks = {
12653 locking.LEVEL_NODEGROUP: [],
12654 locking.LEVEL_NODE: [],
12657 self._ExpandAndLockInstance()
12659 if self.op.target_groups:
12660 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12661 self.op.target_groups)
12663 self.req_target_uuids = None
12665 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12667 def DeclareLocks(self, level):
12668 if level == locking.LEVEL_NODEGROUP:
12669 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12671 if self.req_target_uuids:
12672 lock_groups = set(self.req_target_uuids)
12674 # Lock all groups used by instance optimistically; this requires going
12675 # via the node before it's locked, requiring verification later on
12676 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12677 lock_groups.update(instance_groups)
12679 # No target groups, need to lock all of them
12680 lock_groups = locking.ALL_SET
12682 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12684 elif level == locking.LEVEL_NODE:
12685 if self.req_target_uuids:
12686 # Lock all nodes used by instances
12687 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12688 self._LockInstancesNodes()
12690 # Lock all nodes in all potential target groups
12691 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12692 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12693 member_nodes = [node_name
12694 for group in lock_groups
12695 for node_name in self.cfg.GetNodeGroup(group).members]
12696 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12698 # Lock all nodes as all groups are potential targets
12699 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12701 def CheckPrereq(self):
12702 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12703 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12704 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12706 assert (self.req_target_uuids is None or
12707 owned_groups.issuperset(self.req_target_uuids))
12708 assert owned_instances == set([self.op.instance_name])
12710 # Get instance information
12711 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12713 # Check if node groups for locked instance are still correct
12714 assert owned_nodes.issuperset(self.instance.all_nodes), \
12715 ("Instance %s's nodes changed while we kept the lock" %
12716 self.op.instance_name)
12718 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12721 if self.req_target_uuids:
12722 # User requested specific target groups
12723 self.target_uuids = self.req_target_uuids
12725 # All groups except those used by the instance are potential targets
12726 self.target_uuids = owned_groups - inst_groups
12728 conflicting_groups = self.target_uuids & inst_groups
12729 if conflicting_groups:
12730 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12731 " used by the instance '%s'" %
12732 (utils.CommaJoin(conflicting_groups),
12733 self.op.instance_name),
12734 errors.ECODE_INVAL)
12736 if not self.target_uuids:
12737 raise errors.OpPrereqError("There are no possible target groups",
12738 errors.ECODE_INVAL)
12740 def BuildHooksEnv(self):
12741 """Build hooks env.
12744 assert self.target_uuids
12747 "TARGET_GROUPS": " ".join(self.target_uuids),
12750 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12754 def BuildHooksNodes(self):
12755 """Build hooks nodes.
12758 mn = self.cfg.GetMasterNode()
12759 return ([mn], [mn])
12761 def Exec(self, feedback_fn):
12762 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12764 assert instances == [self.op.instance_name], "Instance not locked"
12766 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12767 instances=instances, target_groups=list(self.target_uuids))
12769 ial.Run(self.op.iallocator)
12771 if not ial.success:
12772 raise errors.OpPrereqError("Can't compute solution for changing group of"
12773 " instance '%s' using iallocator '%s': %s" %
12774 (self.op.instance_name, self.op.iallocator,
12776 errors.ECODE_NORES)
12778 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12780 self.LogInfo("Iallocator returned %s job(s) for changing group of"
12781 " instance '%s'", len(jobs), self.op.instance_name)
12783 return ResultWithJobs(jobs)
12786 class LUBackupQuery(NoHooksLU):
12787 """Query the exports list
12792 def ExpandNames(self):
12793 self.needed_locks = {}
12794 self.share_locks[locking.LEVEL_NODE] = 1
12795 if not self.op.nodes:
12796 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12798 self.needed_locks[locking.LEVEL_NODE] = \
12799 _GetWantedNodes(self, self.op.nodes)
12801 def Exec(self, feedback_fn):
12802 """Compute the list of all the exported system images.
12805 @return: a dictionary with the structure node->(export-list)
12806 where export-list is a list of the instances exported on
12810 self.nodes = self.owned_locks(locking.LEVEL_NODE)
12811 rpcresult = self.rpc.call_export_list(self.nodes)
12813 for node in rpcresult:
12814 if rpcresult[node].fail_msg:
12815 result[node] = False
12817 result[node] = rpcresult[node].payload
12822 class LUBackupPrepare(NoHooksLU):
12823 """Prepares an instance for an export and returns useful information.
12828 def ExpandNames(self):
12829 self._ExpandAndLockInstance()
12831 def CheckPrereq(self):
12832 """Check prerequisites.
12835 instance_name = self.op.instance_name
12837 self.instance = self.cfg.GetInstanceInfo(instance_name)
12838 assert self.instance is not None, \
12839 "Cannot retrieve locked instance %s" % self.op.instance_name
12840 _CheckNodeOnline(self, self.instance.primary_node)
12842 self._cds = _GetClusterDomainSecret()
12844 def Exec(self, feedback_fn):
12845 """Prepares an instance for an export.
12848 instance = self.instance
12850 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12851 salt = utils.GenerateSecret(8)
12853 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
12854 result = self.rpc.call_x509_cert_create(instance.primary_node,
12855 constants.RIE_CERT_VALIDITY)
12856 result.Raise("Can't create X509 key and certificate on %s" % result.node)
12858 (name, cert_pem) = result.payload
12860 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
12864 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
12865 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
12867 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
12873 class LUBackupExport(LogicalUnit):
12874 """Export an instance to an image in the cluster.
12877 HPATH = "instance-export"
12878 HTYPE = constants.HTYPE_INSTANCE
12881 def CheckArguments(self):
12882 """Check the arguments.
12885 self.x509_key_name = self.op.x509_key_name
12886 self.dest_x509_ca_pem = self.op.destination_x509_ca
12888 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12889 if not self.x509_key_name:
12890 raise errors.OpPrereqError("Missing X509 key name for encryption",
12891 errors.ECODE_INVAL)
12893 if not self.dest_x509_ca_pem:
12894 raise errors.OpPrereqError("Missing destination X509 CA",
12895 errors.ECODE_INVAL)
12897 def ExpandNames(self):
12898 self._ExpandAndLockInstance()
12900 # Lock all nodes for local exports
12901 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12902 # FIXME: lock only instance primary and destination node
12904 # Sad but true, for now we have do lock all nodes, as we don't know where
12905 # the previous export might be, and in this LU we search for it and
12906 # remove it from its current node. In the future we could fix this by:
12907 # - making a tasklet to search (share-lock all), then create the
12908 # new one, then one to remove, after
12909 # - removing the removal operation altogether
12910 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12912 def DeclareLocks(self, level):
12913 """Last minute lock declaration."""
12914 # All nodes are locked anyway, so nothing to do here.
12916 def BuildHooksEnv(self):
12917 """Build hooks env.
12919 This will run on the master, primary node and target node.
12923 "EXPORT_MODE": self.op.mode,
12924 "EXPORT_NODE": self.op.target_node,
12925 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12926 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12927 # TODO: Generic function for boolean env variables
12928 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12931 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12935 def BuildHooksNodes(self):
12936 """Build hooks nodes.
12939 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12941 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12942 nl.append(self.op.target_node)
12946 def CheckPrereq(self):
12947 """Check prerequisites.
12949 This checks that the instance and node names are valid.
12952 instance_name = self.op.instance_name
12954 self.instance = self.cfg.GetInstanceInfo(instance_name)
12955 assert self.instance is not None, \
12956 "Cannot retrieve locked instance %s" % self.op.instance_name
12957 _CheckNodeOnline(self, self.instance.primary_node)
12959 if (self.op.remove_instance and
12960 self.instance.admin_state == constants.ADMINST_UP and
12961 not self.op.shutdown):
12962 raise errors.OpPrereqError("Can not remove instance without shutting it"
12965 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12966 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12967 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12968 assert self.dst_node is not None
12970 _CheckNodeOnline(self, self.dst_node.name)
12971 _CheckNodeNotDrained(self, self.dst_node.name)
12974 self.dest_disk_info = None
12975 self.dest_x509_ca = None
12977 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12978 self.dst_node = None
12980 if len(self.op.target_node) != len(self.instance.disks):
12981 raise errors.OpPrereqError(("Received destination information for %s"
12982 " disks, but instance %s has %s disks") %
12983 (len(self.op.target_node), instance_name,
12984 len(self.instance.disks)),
12985 errors.ECODE_INVAL)
12987 cds = _GetClusterDomainSecret()
12989 # Check X509 key name
12991 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12992 except (TypeError, ValueError), err:
12993 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12995 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12996 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12997 errors.ECODE_INVAL)
12999 # Load and verify CA
13001 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13002 except OpenSSL.crypto.Error, err:
13003 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13004 (err, ), errors.ECODE_INVAL)
13006 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13007 if errcode is not None:
13008 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13009 (msg, ), errors.ECODE_INVAL)
13011 self.dest_x509_ca = cert
13013 # Verify target information
13015 for idx, disk_data in enumerate(self.op.target_node):
13017 (host, port, magic) = \
13018 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13019 except errors.GenericError, err:
13020 raise errors.OpPrereqError("Target info for disk %s: %s" %
13021 (idx, err), errors.ECODE_INVAL)
13023 disk_info.append((host, port, magic))
13025 assert len(disk_info) == len(self.op.target_node)
13026 self.dest_disk_info = disk_info
13029 raise errors.ProgrammerError("Unhandled export mode %r" %
13032 # instance disk type verification
13033 # TODO: Implement export support for file-based disks
13034 for disk in self.instance.disks:
13035 if disk.dev_type == constants.LD_FILE:
13036 raise errors.OpPrereqError("Export not supported for instances with"
13037 " file-based disks", errors.ECODE_INVAL)
13039 def _CleanupExports(self, feedback_fn):
13040 """Removes exports of current instance from all other nodes.
13042 If an instance in a cluster with nodes A..D was exported to node C, its
13043 exports will be removed from the nodes A, B and D.
13046 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13048 nodelist = self.cfg.GetNodeList()
13049 nodelist.remove(self.dst_node.name)
13051 # on one-node clusters nodelist will be empty after the removal
13052 # if we proceed the backup would be removed because OpBackupQuery
13053 # substitutes an empty list with the full cluster node list.
13054 iname = self.instance.name
13056 feedback_fn("Removing old exports for instance %s" % iname)
13057 exportlist = self.rpc.call_export_list(nodelist)
13058 for node in exportlist:
13059 if exportlist[node].fail_msg:
13061 if iname in exportlist[node].payload:
13062 msg = self.rpc.call_export_remove(node, iname).fail_msg
13064 self.LogWarning("Could not remove older export for instance %s"
13065 " on node %s: %s", iname, node, msg)
13067 def Exec(self, feedback_fn):
13068 """Export an instance to an image in the cluster.
13071 assert self.op.mode in constants.EXPORT_MODES
13073 instance = self.instance
13074 src_node = instance.primary_node
13076 if self.op.shutdown:
13077 # shutdown the instance, but not the disks
13078 feedback_fn("Shutting down instance %s" % instance.name)
13079 result = self.rpc.call_instance_shutdown(src_node, instance,
13080 self.op.shutdown_timeout)
13081 # TODO: Maybe ignore failures if ignore_remove_failures is set
13082 result.Raise("Could not shutdown instance %s on"
13083 " node %s" % (instance.name, src_node))
13085 # set the disks ID correctly since call_instance_start needs the
13086 # correct drbd minor to create the symlinks
13087 for disk in instance.disks:
13088 self.cfg.SetDiskID(disk, src_node)
13090 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13093 # Activate the instance disks if we'exporting a stopped instance
13094 feedback_fn("Activating disks for %s" % instance.name)
13095 _StartInstanceDisks(self, instance, None)
13098 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13101 helper.CreateSnapshots()
13103 if (self.op.shutdown and
13104 instance.admin_state == constants.ADMINST_UP and
13105 not self.op.remove_instance):
13106 assert not activate_disks
13107 feedback_fn("Starting instance %s" % instance.name)
13108 result = self.rpc.call_instance_start(src_node,
13109 (instance, None, None), False)
13110 msg = result.fail_msg
13112 feedback_fn("Failed to start instance: %s" % msg)
13113 _ShutdownInstanceDisks(self, instance)
13114 raise errors.OpExecError("Could not start instance: %s" % msg)
13116 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13117 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13118 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13119 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13120 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13122 (key_name, _, _) = self.x509_key_name
13125 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13128 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13129 key_name, dest_ca_pem,
13134 # Check for backwards compatibility
13135 assert len(dresults) == len(instance.disks)
13136 assert compat.all(isinstance(i, bool) for i in dresults), \
13137 "Not all results are boolean: %r" % dresults
13141 feedback_fn("Deactivating disks for %s" % instance.name)
13142 _ShutdownInstanceDisks(self, instance)
13144 if not (compat.all(dresults) and fin_resu):
13147 failures.append("export finalization")
13148 if not compat.all(dresults):
13149 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13151 failures.append("disk export: disk(s) %s" % fdsk)
13153 raise errors.OpExecError("Export failed, errors in %s" %
13154 utils.CommaJoin(failures))
13156 # At this point, the export was successful, we can cleanup/finish
13158 # Remove instance if requested
13159 if self.op.remove_instance:
13160 feedback_fn("Removing instance %s" % instance.name)
13161 _RemoveInstance(self, feedback_fn, instance,
13162 self.op.ignore_remove_failures)
13164 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13165 self._CleanupExports(feedback_fn)
13167 return fin_resu, dresults
13170 class LUBackupRemove(NoHooksLU):
13171 """Remove exports related to the named instance.
13176 def ExpandNames(self):
13177 self.needed_locks = {}
13178 # We need all nodes to be locked in order for RemoveExport to work, but we
13179 # don't need to lock the instance itself, as nothing will happen to it (and
13180 # we can remove exports also for a removed instance)
13181 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13183 def Exec(self, feedback_fn):
13184 """Remove any export.
13187 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13188 # If the instance was not found we'll try with the name that was passed in.
13189 # This will only work if it was an FQDN, though.
13191 if not instance_name:
13193 instance_name = self.op.instance_name
13195 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13196 exportlist = self.rpc.call_export_list(locked_nodes)
13198 for node in exportlist:
13199 msg = exportlist[node].fail_msg
13201 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13203 if instance_name in exportlist[node].payload:
13205 result = self.rpc.call_export_remove(node, instance_name)
13206 msg = result.fail_msg
13208 logging.error("Could not remove export for instance %s"
13209 " on node %s: %s", instance_name, node, msg)
13211 if fqdn_warn and not found:
13212 feedback_fn("Export not found. If trying to remove an export belonging"
13213 " to a deleted instance please use its Fully Qualified"
13217 class LUGroupAdd(LogicalUnit):
13218 """Logical unit for creating node groups.
13221 HPATH = "group-add"
13222 HTYPE = constants.HTYPE_GROUP
13225 def ExpandNames(self):
13226 # We need the new group's UUID here so that we can create and acquire the
13227 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13228 # that it should not check whether the UUID exists in the configuration.
13229 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13230 self.needed_locks = {}
13231 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13233 def CheckPrereq(self):
13234 """Check prerequisites.
13236 This checks that the given group name is not an existing node group
13241 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13242 except errors.OpPrereqError:
13245 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13246 " node group (UUID: %s)" %
13247 (self.op.group_name, existing_uuid),
13248 errors.ECODE_EXISTS)
13250 if self.op.ndparams:
13251 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13253 if self.op.hv_state:
13254 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13256 self.new_hv_state = None
13258 if self.op.disk_state:
13259 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13261 self.new_disk_state = None
13263 if self.op.diskparams:
13264 for templ in constants.DISK_TEMPLATES:
13265 if templ not in self.op.diskparams:
13266 self.op.diskparams[templ] = {}
13267 utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
13269 self.op.diskparams = self.cfg.GetClusterInfo().diskparams
13271 if self.op.ipolicy:
13272 cluster = self.cfg.GetClusterInfo()
13273 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13275 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
13276 except errors.ConfigurationError, err:
13277 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13278 errors.ECODE_INVAL)
13280 def BuildHooksEnv(self):
13281 """Build hooks env.
13285 "GROUP_NAME": self.op.group_name,
13288 def BuildHooksNodes(self):
13289 """Build hooks nodes.
13292 mn = self.cfg.GetMasterNode()
13293 return ([mn], [mn])
13295 def Exec(self, feedback_fn):
13296 """Add the node group to the cluster.
13299 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13300 uuid=self.group_uuid,
13301 alloc_policy=self.op.alloc_policy,
13302 ndparams=self.op.ndparams,
13303 diskparams=self.op.diskparams,
13304 ipolicy=self.op.ipolicy,
13305 hv_state_static=self.new_hv_state,
13306 disk_state_static=self.new_disk_state)
13308 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13309 del self.remove_locks[locking.LEVEL_NODEGROUP]
13312 class LUGroupAssignNodes(NoHooksLU):
13313 """Logical unit for assigning nodes to groups.
13318 def ExpandNames(self):
13319 # These raise errors.OpPrereqError on their own:
13320 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13321 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13323 # We want to lock all the affected nodes and groups. We have readily
13324 # available the list of nodes, and the *destination* group. To gather the
13325 # list of "source" groups, we need to fetch node information later on.
13326 self.needed_locks = {
13327 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13328 locking.LEVEL_NODE: self.op.nodes,
13331 def DeclareLocks(self, level):
13332 if level == locking.LEVEL_NODEGROUP:
13333 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13335 # Try to get all affected nodes' groups without having the group or node
13336 # lock yet. Needs verification later in the code flow.
13337 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13339 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13341 def CheckPrereq(self):
13342 """Check prerequisites.
13345 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13346 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13347 frozenset(self.op.nodes))
13349 expected_locks = (set([self.group_uuid]) |
13350 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13351 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13352 if actual_locks != expected_locks:
13353 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13354 " current groups are '%s', used to be '%s'" %
13355 (utils.CommaJoin(expected_locks),
13356 utils.CommaJoin(actual_locks)))
13358 self.node_data = self.cfg.GetAllNodesInfo()
13359 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13360 instance_data = self.cfg.GetAllInstancesInfo()
13362 if self.group is None:
13363 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13364 (self.op.group_name, self.group_uuid))
13366 (new_splits, previous_splits) = \
13367 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13368 for node in self.op.nodes],
13369 self.node_data, instance_data)
13372 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13374 if not self.op.force:
13375 raise errors.OpExecError("The following instances get split by this"
13376 " change and --force was not given: %s" %
13379 self.LogWarning("This operation will split the following instances: %s",
13382 if previous_splits:
13383 self.LogWarning("In addition, these already-split instances continue"
13384 " to be split across groups: %s",
13385 utils.CommaJoin(utils.NiceSort(previous_splits)))
13387 def Exec(self, feedback_fn):
13388 """Assign nodes to a new group.
13391 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13393 self.cfg.AssignGroupNodes(mods)
13396 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13397 """Check for split instances after a node assignment.
13399 This method considers a series of node assignments as an atomic operation,
13400 and returns information about split instances after applying the set of
13403 In particular, it returns information about newly split instances, and
13404 instances that were already split, and remain so after the change.
13406 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13409 @type changes: list of (node_name, new_group_uuid) pairs.
13410 @param changes: list of node assignments to consider.
13411 @param node_data: a dict with data for all nodes
13412 @param instance_data: a dict with all instances to consider
13413 @rtype: a two-tuple
13414 @return: a list of instances that were previously okay and result split as a
13415 consequence of this change, and a list of instances that were previously
13416 split and this change does not fix.
13419 changed_nodes = dict((node, group) for node, group in changes
13420 if node_data[node].group != group)
13422 all_split_instances = set()
13423 previously_split_instances = set()
13425 def InstanceNodes(instance):
13426 return [instance.primary_node] + list(instance.secondary_nodes)
13428 for inst in instance_data.values():
13429 if inst.disk_template not in constants.DTS_INT_MIRROR:
13432 instance_nodes = InstanceNodes(inst)
13434 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13435 previously_split_instances.add(inst.name)
13437 if len(set(changed_nodes.get(node, node_data[node].group)
13438 for node in instance_nodes)) > 1:
13439 all_split_instances.add(inst.name)
13441 return (list(all_split_instances - previously_split_instances),
13442 list(previously_split_instances & all_split_instances))
13445 class _GroupQuery(_QueryBase):
13446 FIELDS = query.GROUP_FIELDS
13448 def ExpandNames(self, lu):
13449 lu.needed_locks = {}
13451 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13452 self._cluster = lu.cfg.GetClusterInfo()
13453 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13456 self.wanted = [name_to_uuid[name]
13457 for name in utils.NiceSort(name_to_uuid.keys())]
13459 # Accept names to be either names or UUIDs.
13462 all_uuid = frozenset(self._all_groups.keys())
13464 for name in self.names:
13465 if name in all_uuid:
13466 self.wanted.append(name)
13467 elif name in name_to_uuid:
13468 self.wanted.append(name_to_uuid[name])
13470 missing.append(name)
13473 raise errors.OpPrereqError("Some groups do not exist: %s" %
13474 utils.CommaJoin(missing),
13475 errors.ECODE_NOENT)
13477 def DeclareLocks(self, lu, level):
13480 def _GetQueryData(self, lu):
13481 """Computes the list of node groups and their attributes.
13484 do_nodes = query.GQ_NODE in self.requested_data
13485 do_instances = query.GQ_INST in self.requested_data
13487 group_to_nodes = None
13488 group_to_instances = None
13490 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13491 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13492 # latter GetAllInstancesInfo() is not enough, for we have to go through
13493 # instance->node. Hence, we will need to process nodes even if we only need
13494 # instance information.
13495 if do_nodes or do_instances:
13496 all_nodes = lu.cfg.GetAllNodesInfo()
13497 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13500 for node in all_nodes.values():
13501 if node.group in group_to_nodes:
13502 group_to_nodes[node.group].append(node.name)
13503 node_to_group[node.name] = node.group
13506 all_instances = lu.cfg.GetAllInstancesInfo()
13507 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13509 for instance in all_instances.values():
13510 node = instance.primary_node
13511 if node in node_to_group:
13512 group_to_instances[node_to_group[node]].append(instance.name)
13515 # Do not pass on node information if it was not requested.
13516 group_to_nodes = None
13518 return query.GroupQueryData(self._cluster,
13519 [self._all_groups[uuid]
13520 for uuid in self.wanted],
13521 group_to_nodes, group_to_instances)
13524 class LUGroupQuery(NoHooksLU):
13525 """Logical unit for querying node groups.
13530 def CheckArguments(self):
13531 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13532 self.op.output_fields, False)
13534 def ExpandNames(self):
13535 self.gq.ExpandNames(self)
13537 def DeclareLocks(self, level):
13538 self.gq.DeclareLocks(self, level)
13540 def Exec(self, feedback_fn):
13541 return self.gq.OldStyleQuery(self)
13544 class LUGroupSetParams(LogicalUnit):
13545 """Modifies the parameters of a node group.
13548 HPATH = "group-modify"
13549 HTYPE = constants.HTYPE_GROUP
13552 def CheckArguments(self):
13555 self.op.diskparams,
13556 self.op.alloc_policy,
13558 self.op.disk_state,
13562 if all_changes.count(None) == len(all_changes):
13563 raise errors.OpPrereqError("Please pass at least one modification",
13564 errors.ECODE_INVAL)
13566 def ExpandNames(self):
13567 # This raises errors.OpPrereqError on its own:
13568 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13570 self.needed_locks = {
13571 locking.LEVEL_INSTANCE: [],
13572 locking.LEVEL_NODEGROUP: [self.group_uuid],
13575 self.share_locks[locking.LEVEL_INSTANCE] = 1
13577 def DeclareLocks(self, level):
13578 if level == locking.LEVEL_INSTANCE:
13579 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13581 # Lock instances optimistically, needs verification once group lock has
13583 self.needed_locks[locking.LEVEL_INSTANCE] = \
13584 self.cfg.GetNodeGroupInstances(self.group_uuid)
13586 def CheckPrereq(self):
13587 """Check prerequisites.
13590 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13592 # Check if locked instances are still correct
13593 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13595 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13596 cluster = self.cfg.GetClusterInfo()
13598 if self.group is None:
13599 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13600 (self.op.group_name, self.group_uuid))
13602 if self.op.ndparams:
13603 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13604 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13605 self.new_ndparams = new_ndparams
13607 if self.op.diskparams:
13608 self.new_diskparams = dict()
13609 for templ in constants.DISK_TEMPLATES:
13610 if templ not in self.op.diskparams:
13611 self.op.diskparams[templ] = {}
13612 new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13613 self.op.diskparams[templ])
13614 utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13615 self.new_diskparams[templ] = new_templ_params
13617 if self.op.hv_state:
13618 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13619 self.group.hv_state_static)
13621 if self.op.disk_state:
13622 self.new_disk_state = \
13623 _MergeAndVerifyDiskState(self.op.disk_state,
13624 self.group.disk_state_static)
13626 if self.op.ipolicy:
13627 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
13631 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
13632 inst_filter = lambda inst: inst.name in owned_instances
13633 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
13635 _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
13637 new_ipolicy, instances)
13640 self.LogWarning("After the ipolicy change the following instances"
13641 " violate them: %s",
13642 utils.CommaJoin(violations))
13644 def BuildHooksEnv(self):
13645 """Build hooks env.
13649 "GROUP_NAME": self.op.group_name,
13650 "NEW_ALLOC_POLICY": self.op.alloc_policy,
13653 def BuildHooksNodes(self):
13654 """Build hooks nodes.
13657 mn = self.cfg.GetMasterNode()
13658 return ([mn], [mn])
13660 def Exec(self, feedback_fn):
13661 """Modifies the node group.
13666 if self.op.ndparams:
13667 self.group.ndparams = self.new_ndparams
13668 result.append(("ndparams", str(self.group.ndparams)))
13670 if self.op.diskparams:
13671 self.group.diskparams = self.new_diskparams
13672 result.append(("diskparams", str(self.group.diskparams)))
13674 if self.op.alloc_policy:
13675 self.group.alloc_policy = self.op.alloc_policy
13677 if self.op.hv_state:
13678 self.group.hv_state_static = self.new_hv_state
13680 if self.op.disk_state:
13681 self.group.disk_state_static = self.new_disk_state
13683 if self.op.ipolicy:
13684 self.group.ipolicy = self.new_ipolicy
13686 self.cfg.Update(self.group, feedback_fn)
13690 class LUGroupRemove(LogicalUnit):
13691 HPATH = "group-remove"
13692 HTYPE = constants.HTYPE_GROUP
13695 def ExpandNames(self):
13696 # This will raises errors.OpPrereqError on its own:
13697 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13698 self.needed_locks = {
13699 locking.LEVEL_NODEGROUP: [self.group_uuid],
13702 def CheckPrereq(self):
13703 """Check prerequisites.
13705 This checks that the given group name exists as a node group, that is
13706 empty (i.e., contains no nodes), and that is not the last group of the
13710 # Verify that the group is empty.
13711 group_nodes = [node.name
13712 for node in self.cfg.GetAllNodesInfo().values()
13713 if node.group == self.group_uuid]
13716 raise errors.OpPrereqError("Group '%s' not empty, has the following"
13718 (self.op.group_name,
13719 utils.CommaJoin(utils.NiceSort(group_nodes))),
13720 errors.ECODE_STATE)
13722 # Verify the cluster would not be left group-less.
13723 if len(self.cfg.GetNodeGroupList()) == 1:
13724 raise errors.OpPrereqError("Group '%s' is the only group,"
13725 " cannot be removed" %
13726 self.op.group_name,
13727 errors.ECODE_STATE)
13729 def BuildHooksEnv(self):
13730 """Build hooks env.
13734 "GROUP_NAME": self.op.group_name,
13737 def BuildHooksNodes(self):
13738 """Build hooks nodes.
13741 mn = self.cfg.GetMasterNode()
13742 return ([mn], [mn])
13744 def Exec(self, feedback_fn):
13745 """Remove the node group.
13749 self.cfg.RemoveNodeGroup(self.group_uuid)
13750 except errors.ConfigurationError:
13751 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13752 (self.op.group_name, self.group_uuid))
13754 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13757 class LUGroupRename(LogicalUnit):
13758 HPATH = "group-rename"
13759 HTYPE = constants.HTYPE_GROUP
13762 def ExpandNames(self):
13763 # This raises errors.OpPrereqError on its own:
13764 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13766 self.needed_locks = {
13767 locking.LEVEL_NODEGROUP: [self.group_uuid],
13770 def CheckPrereq(self):
13771 """Check prerequisites.
13773 Ensures requested new name is not yet used.
13777 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13778 except errors.OpPrereqError:
13781 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13782 " node group (UUID: %s)" %
13783 (self.op.new_name, new_name_uuid),
13784 errors.ECODE_EXISTS)
13786 def BuildHooksEnv(self):
13787 """Build hooks env.
13791 "OLD_NAME": self.op.group_name,
13792 "NEW_NAME": self.op.new_name,
13795 def BuildHooksNodes(self):
13796 """Build hooks nodes.
13799 mn = self.cfg.GetMasterNode()
13801 all_nodes = self.cfg.GetAllNodesInfo()
13802 all_nodes.pop(mn, None)
13805 run_nodes.extend(node.name for node in all_nodes.values()
13806 if node.group == self.group_uuid)
13808 return (run_nodes, run_nodes)
13810 def Exec(self, feedback_fn):
13811 """Rename the node group.
13814 group = self.cfg.GetNodeGroup(self.group_uuid)
13817 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13818 (self.op.group_name, self.group_uuid))
13820 group.name = self.op.new_name
13821 self.cfg.Update(group, feedback_fn)
13823 return self.op.new_name
13826 class LUGroupEvacuate(LogicalUnit):
13827 HPATH = "group-evacuate"
13828 HTYPE = constants.HTYPE_GROUP
13831 def ExpandNames(self):
13832 # This raises errors.OpPrereqError on its own:
13833 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13835 if self.op.target_groups:
13836 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13837 self.op.target_groups)
13839 self.req_target_uuids = []
13841 if self.group_uuid in self.req_target_uuids:
13842 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
13843 " as a target group (targets are %s)" %
13845 utils.CommaJoin(self.req_target_uuids)),
13846 errors.ECODE_INVAL)
13848 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13850 self.share_locks = _ShareAll()
13851 self.needed_locks = {
13852 locking.LEVEL_INSTANCE: [],
13853 locking.LEVEL_NODEGROUP: [],
13854 locking.LEVEL_NODE: [],
13857 def DeclareLocks(self, level):
13858 if level == locking.LEVEL_INSTANCE:
13859 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13861 # Lock instances optimistically, needs verification once node and group
13862 # locks have been acquired
13863 self.needed_locks[locking.LEVEL_INSTANCE] = \
13864 self.cfg.GetNodeGroupInstances(self.group_uuid)
13866 elif level == locking.LEVEL_NODEGROUP:
13867 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13869 if self.req_target_uuids:
13870 lock_groups = set([self.group_uuid] + self.req_target_uuids)
13872 # Lock all groups used by instances optimistically; this requires going
13873 # via the node before it's locked, requiring verification later on
13874 lock_groups.update(group_uuid
13875 for instance_name in
13876 self.owned_locks(locking.LEVEL_INSTANCE)
13878 self.cfg.GetInstanceNodeGroups(instance_name))
13880 # No target groups, need to lock all of them
13881 lock_groups = locking.ALL_SET
13883 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13885 elif level == locking.LEVEL_NODE:
13886 # This will only lock the nodes in the group to be evacuated which
13887 # contain actual instances
13888 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13889 self._LockInstancesNodes()
13891 # Lock all nodes in group to be evacuated and target groups
13892 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13893 assert self.group_uuid in owned_groups
13894 member_nodes = [node_name
13895 for group in owned_groups
13896 for node_name in self.cfg.GetNodeGroup(group).members]
13897 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13899 def CheckPrereq(self):
13900 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13901 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13902 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13904 assert owned_groups.issuperset(self.req_target_uuids)
13905 assert self.group_uuid in owned_groups
13907 # Check if locked instances are still correct
13908 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13910 # Get instance information
13911 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
13913 # Check if node groups for locked instances are still correct
13914 for instance_name in owned_instances:
13915 inst = self.instances[instance_name]
13916 assert owned_nodes.issuperset(inst.all_nodes), \
13917 "Instance %s's nodes changed while we kept the lock" % instance_name
13919 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
13922 assert self.group_uuid in inst_groups, \
13923 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
13925 if self.req_target_uuids:
13926 # User requested specific target groups
13927 self.target_uuids = self.req_target_uuids
13929 # All groups except the one to be evacuated are potential targets
13930 self.target_uuids = [group_uuid for group_uuid in owned_groups
13931 if group_uuid != self.group_uuid]
13933 if not self.target_uuids:
13934 raise errors.OpPrereqError("There are no possible target groups",
13935 errors.ECODE_INVAL)
13937 def BuildHooksEnv(self):
13938 """Build hooks env.
13942 "GROUP_NAME": self.op.group_name,
13943 "TARGET_GROUPS": " ".join(self.target_uuids),
13946 def BuildHooksNodes(self):
13947 """Build hooks nodes.
13950 mn = self.cfg.GetMasterNode()
13952 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
13954 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
13956 return (run_nodes, run_nodes)
13958 def Exec(self, feedback_fn):
13959 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13961 assert self.group_uuid not in self.target_uuids
13963 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13964 instances=instances, target_groups=self.target_uuids)
13966 ial.Run(self.op.iallocator)
13968 if not ial.success:
13969 raise errors.OpPrereqError("Can't compute group evacuation using"
13970 " iallocator '%s': %s" %
13971 (self.op.iallocator, ial.info),
13972 errors.ECODE_NORES)
13974 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13976 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
13977 len(jobs), self.op.group_name)
13979 return ResultWithJobs(jobs)
13982 class TagsLU(NoHooksLU): # pylint: disable=W0223
13983 """Generic tags LU.
13985 This is an abstract class which is the parent of all the other tags LUs.
13988 def ExpandNames(self):
13989 self.group_uuid = None
13990 self.needed_locks = {}
13991 if self.op.kind == constants.TAG_NODE:
13992 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
13993 self.needed_locks[locking.LEVEL_NODE] = self.op.name
13994 elif self.op.kind == constants.TAG_INSTANCE:
13995 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
13996 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
13997 elif self.op.kind == constants.TAG_NODEGROUP:
13998 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14000 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14001 # not possible to acquire the BGL based on opcode parameters)
14003 def CheckPrereq(self):
14004 """Check prerequisites.
14007 if self.op.kind == constants.TAG_CLUSTER:
14008 self.target = self.cfg.GetClusterInfo()
14009 elif self.op.kind == constants.TAG_NODE:
14010 self.target = self.cfg.GetNodeInfo(self.op.name)
14011 elif self.op.kind == constants.TAG_INSTANCE:
14012 self.target = self.cfg.GetInstanceInfo(self.op.name)
14013 elif self.op.kind == constants.TAG_NODEGROUP:
14014 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14016 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14017 str(self.op.kind), errors.ECODE_INVAL)
14020 class LUTagsGet(TagsLU):
14021 """Returns the tags of a given object.
14026 def ExpandNames(self):
14027 TagsLU.ExpandNames(self)
14029 # Share locks as this is only a read operation
14030 self.share_locks = _ShareAll()
14032 def Exec(self, feedback_fn):
14033 """Returns the tag list.
14036 return list(self.target.GetTags())
14039 class LUTagsSearch(NoHooksLU):
14040 """Searches the tags for a given pattern.
14045 def ExpandNames(self):
14046 self.needed_locks = {}
14048 def CheckPrereq(self):
14049 """Check prerequisites.
14051 This checks the pattern passed for validity by compiling it.
14055 self.re = re.compile(self.op.pattern)
14056 except re.error, err:
14057 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14058 (self.op.pattern, err), errors.ECODE_INVAL)
14060 def Exec(self, feedback_fn):
14061 """Returns the tag list.
14065 tgts = [("/cluster", cfg.GetClusterInfo())]
14066 ilist = cfg.GetAllInstancesInfo().values()
14067 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14068 nlist = cfg.GetAllNodesInfo().values()
14069 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14070 tgts.extend(("/nodegroup/%s" % n.name, n)
14071 for n in cfg.GetAllNodeGroupsInfo().values())
14073 for path, target in tgts:
14074 for tag in target.GetTags():
14075 if self.re.search(tag):
14076 results.append((path, tag))
14080 class LUTagsSet(TagsLU):
14081 """Sets a tag on a given object.
14086 def CheckPrereq(self):
14087 """Check prerequisites.
14089 This checks the type and length of the tag name and value.
14092 TagsLU.CheckPrereq(self)
14093 for tag in self.op.tags:
14094 objects.TaggableObject.ValidateTag(tag)
14096 def Exec(self, feedback_fn):
14101 for tag in self.op.tags:
14102 self.target.AddTag(tag)
14103 except errors.TagError, err:
14104 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14105 self.cfg.Update(self.target, feedback_fn)
14108 class LUTagsDel(TagsLU):
14109 """Delete a list of tags from a given object.
14114 def CheckPrereq(self):
14115 """Check prerequisites.
14117 This checks that we have the given tag.
14120 TagsLU.CheckPrereq(self)
14121 for tag in self.op.tags:
14122 objects.TaggableObject.ValidateTag(tag)
14123 del_tags = frozenset(self.op.tags)
14124 cur_tags = self.target.GetTags()
14126 diff_tags = del_tags - cur_tags
14128 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14129 raise errors.OpPrereqError("Tag(s) %s not found" %
14130 (utils.CommaJoin(diff_names), ),
14131 errors.ECODE_NOENT)
14133 def Exec(self, feedback_fn):
14134 """Remove the tag from the object.
14137 for tag in self.op.tags:
14138 self.target.RemoveTag(tag)
14139 self.cfg.Update(self.target, feedback_fn)
14142 class LUTestDelay(NoHooksLU):
14143 """Sleep for a specified amount of time.
14145 This LU sleeps on the master and/or nodes for a specified amount of
14151 def ExpandNames(self):
14152 """Expand names and set required locks.
14154 This expands the node list, if any.
14157 self.needed_locks = {}
14158 if self.op.on_nodes:
14159 # _GetWantedNodes can be used here, but is not always appropriate to use
14160 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14161 # more information.
14162 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14163 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14165 def _TestDelay(self):
14166 """Do the actual sleep.
14169 if self.op.on_master:
14170 if not utils.TestDelay(self.op.duration):
14171 raise errors.OpExecError("Error during master delay test")
14172 if self.op.on_nodes:
14173 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14174 for node, node_result in result.items():
14175 node_result.Raise("Failure during rpc call to node %s" % node)
14177 def Exec(self, feedback_fn):
14178 """Execute the test delay opcode, with the wanted repetitions.
14181 if self.op.repeat == 0:
14184 top_value = self.op.repeat - 1
14185 for i in range(self.op.repeat):
14186 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14190 class LUTestJqueue(NoHooksLU):
14191 """Utility LU to test some aspects of the job queue.
14196 # Must be lower than default timeout for WaitForJobChange to see whether it
14197 # notices changed jobs
14198 _CLIENT_CONNECT_TIMEOUT = 20.0
14199 _CLIENT_CONFIRM_TIMEOUT = 60.0
14202 def _NotifyUsingSocket(cls, cb, errcls):
14203 """Opens a Unix socket and waits for another program to connect.
14206 @param cb: Callback to send socket name to client
14207 @type errcls: class
14208 @param errcls: Exception class to use for errors
14211 # Using a temporary directory as there's no easy way to create temporary
14212 # sockets without writing a custom loop around tempfile.mktemp and
14214 tmpdir = tempfile.mkdtemp()
14216 tmpsock = utils.PathJoin(tmpdir, "sock")
14218 logging.debug("Creating temporary socket at %s", tmpsock)
14219 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14224 # Send details to client
14227 # Wait for client to connect before continuing
14228 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14230 (conn, _) = sock.accept()
14231 except socket.error, err:
14232 raise errcls("Client didn't connect in time (%s)" % err)
14236 # Remove as soon as client is connected
14237 shutil.rmtree(tmpdir)
14239 # Wait for client to close
14242 # pylint: disable=E1101
14243 # Instance of '_socketobject' has no ... member
14244 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14246 except socket.error, err:
14247 raise errcls("Client failed to confirm notification (%s)" % err)
14251 def _SendNotification(self, test, arg, sockname):
14252 """Sends a notification to the client.
14255 @param test: Test name
14256 @param arg: Test argument (depends on test)
14257 @type sockname: string
14258 @param sockname: Socket path
14261 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14263 def _Notify(self, prereq, test, arg):
14264 """Notifies the client of a test.
14267 @param prereq: Whether this is a prereq-phase test
14269 @param test: Test name
14270 @param arg: Test argument (depends on test)
14274 errcls = errors.OpPrereqError
14276 errcls = errors.OpExecError
14278 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14282 def CheckArguments(self):
14283 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14284 self.expandnames_calls = 0
14286 def ExpandNames(self):
14287 checkargs_calls = getattr(self, "checkargs_calls", 0)
14288 if checkargs_calls < 1:
14289 raise errors.ProgrammerError("CheckArguments was not called")
14291 self.expandnames_calls += 1
14293 if self.op.notify_waitlock:
14294 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14296 self.LogInfo("Expanding names")
14298 # Get lock on master node (just to get a lock, not for a particular reason)
14299 self.needed_locks = {
14300 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14303 def Exec(self, feedback_fn):
14304 if self.expandnames_calls < 1:
14305 raise errors.ProgrammerError("ExpandNames was not called")
14307 if self.op.notify_exec:
14308 self._Notify(False, constants.JQT_EXEC, None)
14310 self.LogInfo("Executing")
14312 if self.op.log_messages:
14313 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14314 for idx, msg in enumerate(self.op.log_messages):
14315 self.LogInfo("Sending log message %s", idx + 1)
14316 feedback_fn(constants.JQT_MSGPREFIX + msg)
14317 # Report how many test messages have been sent
14318 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14321 raise errors.OpExecError("Opcode failure was requested")
14326 class IAllocator(object):
14327 """IAllocator framework.
14329 An IAllocator instance has three sets of attributes:
14330 - cfg that is needed to query the cluster
14331 - input data (all members of the _KEYS class attribute are required)
14332 - four buffer attributes (in|out_data|text), that represent the
14333 input (to the external script) in text and data structure format,
14334 and the output from it, again in two formats
14335 - the result variables from the script (success, info, nodes) for
14339 # pylint: disable=R0902
14340 # lots of instance attributes
14342 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14344 self.rpc = rpc_runner
14345 # init buffer variables
14346 self.in_text = self.out_text = self.in_data = self.out_data = None
14347 # init all input fields so that pylint is happy
14349 self.memory = self.disks = self.disk_template = None
14350 self.os = self.tags = self.nics = self.vcpus = None
14351 self.hypervisor = None
14352 self.relocate_from = None
14354 self.instances = None
14355 self.evac_mode = None
14356 self.target_groups = []
14358 self.required_nodes = None
14359 # init result fields
14360 self.success = self.info = self.result = None
14363 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14365 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14366 " IAllocator" % self.mode)
14368 keyset = [n for (n, _) in keydata]
14371 if key not in keyset:
14372 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14373 " IAllocator" % key)
14374 setattr(self, key, kwargs[key])
14377 if key not in kwargs:
14378 raise errors.ProgrammerError("Missing input parameter '%s' to"
14379 " IAllocator" % key)
14380 self._BuildInputData(compat.partial(fn, self), keydata)
14382 def _ComputeClusterData(self):
14383 """Compute the generic allocator input data.
14385 This is the data that is independent of the actual operation.
14389 cluster_info = cfg.GetClusterInfo()
14392 "version": constants.IALLOCATOR_VERSION,
14393 "cluster_name": cfg.GetClusterName(),
14394 "cluster_tags": list(cluster_info.GetTags()),
14395 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14396 "ipolicy": cluster_info.ipolicy,
14398 ninfo = cfg.GetAllNodesInfo()
14399 iinfo = cfg.GetAllInstancesInfo().values()
14400 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14403 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14405 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14406 hypervisor_name = self.hypervisor
14407 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14408 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14410 hypervisor_name = cluster_info.primary_hypervisor
14412 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14415 self.rpc.call_all_instances_info(node_list,
14416 cluster_info.enabled_hypervisors)
14418 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14420 config_ndata = self._ComputeBasicNodeData(ninfo)
14421 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14422 i_list, config_ndata)
14423 assert len(data["nodes"]) == len(ninfo), \
14424 "Incomplete node data computed"
14426 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14428 self.in_data = data
14431 def _ComputeNodeGroupData(cfg):
14432 """Compute node groups data.
14435 cluster = cfg.GetClusterInfo()
14436 ng = dict((guuid, {
14437 "name": gdata.name,
14438 "alloc_policy": gdata.alloc_policy,
14439 "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14441 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14446 def _ComputeBasicNodeData(node_cfg):
14447 """Compute global node data.
14450 @returns: a dict of name: (node dict, node config)
14453 # fill in static (config-based) values
14454 node_results = dict((ninfo.name, {
14455 "tags": list(ninfo.GetTags()),
14456 "primary_ip": ninfo.primary_ip,
14457 "secondary_ip": ninfo.secondary_ip,
14458 "offline": ninfo.offline,
14459 "drained": ninfo.drained,
14460 "master_candidate": ninfo.master_candidate,
14461 "group": ninfo.group,
14462 "master_capable": ninfo.master_capable,
14463 "vm_capable": ninfo.vm_capable,
14465 for ninfo in node_cfg.values())
14467 return node_results
14470 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14472 """Compute global node data.
14474 @param node_results: the basic node structures as filled from the config
14477 #TODO(dynmem): compute the right data on MAX and MIN memory
14478 # make a copy of the current dict
14479 node_results = dict(node_results)
14480 for nname, nresult in node_data.items():
14481 assert nname in node_results, "Missing basic data for node %s" % nname
14482 ninfo = node_cfg[nname]
14484 if not (ninfo.offline or ninfo.drained):
14485 nresult.Raise("Can't get data for node %s" % nname)
14486 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14488 remote_info = _MakeLegacyNodeInfo(nresult.payload)
14490 for attr in ["memory_total", "memory_free", "memory_dom0",
14491 "vg_size", "vg_free", "cpu_total"]:
14492 if attr not in remote_info:
14493 raise errors.OpExecError("Node '%s' didn't return attribute"
14494 " '%s'" % (nname, attr))
14495 if not isinstance(remote_info[attr], int):
14496 raise errors.OpExecError("Node '%s' returned invalid value"
14498 (nname, attr, remote_info[attr]))
14499 # compute memory used by primary instances
14500 i_p_mem = i_p_up_mem = 0
14501 for iinfo, beinfo in i_list:
14502 if iinfo.primary_node == nname:
14503 i_p_mem += beinfo[constants.BE_MAXMEM]
14504 if iinfo.name not in node_iinfo[nname].payload:
14507 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14508 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14509 remote_info["memory_free"] -= max(0, i_mem_diff)
14511 if iinfo.admin_state == constants.ADMINST_UP:
14512 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14514 # compute memory used by instances
14516 "total_memory": remote_info["memory_total"],
14517 "reserved_memory": remote_info["memory_dom0"],
14518 "free_memory": remote_info["memory_free"],
14519 "total_disk": remote_info["vg_size"],
14520 "free_disk": remote_info["vg_free"],
14521 "total_cpus": remote_info["cpu_total"],
14522 "i_pri_memory": i_p_mem,
14523 "i_pri_up_memory": i_p_up_mem,
14525 pnr_dyn.update(node_results[nname])
14526 node_results[nname] = pnr_dyn
14528 return node_results
14531 def _ComputeInstanceData(cluster_info, i_list):
14532 """Compute global instance data.
14536 for iinfo, beinfo in i_list:
14538 for nic in iinfo.nics:
14539 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14543 "mode": filled_params[constants.NIC_MODE],
14544 "link": filled_params[constants.NIC_LINK],
14546 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14547 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14548 nic_data.append(nic_dict)
14550 "tags": list(iinfo.GetTags()),
14551 "admin_state": iinfo.admin_state,
14552 "vcpus": beinfo[constants.BE_VCPUS],
14553 "memory": beinfo[constants.BE_MAXMEM],
14555 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14557 "disks": [{constants.IDISK_SIZE: dsk.size,
14558 constants.IDISK_MODE: dsk.mode}
14559 for dsk in iinfo.disks],
14560 "disk_template": iinfo.disk_template,
14561 "hypervisor": iinfo.hypervisor,
14563 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14565 instance_data[iinfo.name] = pir
14567 return instance_data
14569 def _AddNewInstance(self):
14570 """Add new instance data to allocator structure.
14572 This in combination with _AllocatorGetClusterData will create the
14573 correct structure needed as input for the allocator.
14575 The checks for the completeness of the opcode must have already been
14579 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14581 if self.disk_template in constants.DTS_INT_MIRROR:
14582 self.required_nodes = 2
14584 self.required_nodes = 1
14588 "disk_template": self.disk_template,
14591 "vcpus": self.vcpus,
14592 "memory": self.memory,
14593 "disks": self.disks,
14594 "disk_space_total": disk_space,
14596 "required_nodes": self.required_nodes,
14597 "hypervisor": self.hypervisor,
14602 def _AddRelocateInstance(self):
14603 """Add relocate instance data to allocator structure.
14605 This in combination with _IAllocatorGetClusterData will create the
14606 correct structure needed as input for the allocator.
14608 The checks for the completeness of the opcode must have already been
14612 instance = self.cfg.GetInstanceInfo(self.name)
14613 if instance is None:
14614 raise errors.ProgrammerError("Unknown instance '%s' passed to"
14615 " IAllocator" % self.name)
14617 if instance.disk_template not in constants.DTS_MIRRORED:
14618 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14619 errors.ECODE_INVAL)
14621 if instance.disk_template in constants.DTS_INT_MIRROR and \
14622 len(instance.secondary_nodes) != 1:
14623 raise errors.OpPrereqError("Instance has not exactly one secondary node",
14624 errors.ECODE_STATE)
14626 self.required_nodes = 1
14627 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14628 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14632 "disk_space_total": disk_space,
14633 "required_nodes": self.required_nodes,
14634 "relocate_from": self.relocate_from,
14638 def _AddNodeEvacuate(self):
14639 """Get data for node-evacuate requests.
14643 "instances": self.instances,
14644 "evac_mode": self.evac_mode,
14647 def _AddChangeGroup(self):
14648 """Get data for node-evacuate requests.
14652 "instances": self.instances,
14653 "target_groups": self.target_groups,
14656 def _BuildInputData(self, fn, keydata):
14657 """Build input data structures.
14660 self._ComputeClusterData()
14663 request["type"] = self.mode
14664 for keyname, keytype in keydata:
14665 if keyname not in request:
14666 raise errors.ProgrammerError("Request parameter %s is missing" %
14668 val = request[keyname]
14669 if not keytype(val):
14670 raise errors.ProgrammerError("Request parameter %s doesn't pass"
14671 " validation, value %s, expected"
14672 " type %s" % (keyname, val, keytype))
14673 self.in_data["request"] = request
14675 self.in_text = serializer.Dump(self.in_data)
14677 _STRING_LIST = ht.TListOf(ht.TString)
14678 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14679 # pylint: disable=E1101
14680 # Class '...' has no 'OP_ID' member
14681 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14682 opcodes.OpInstanceMigrate.OP_ID,
14683 opcodes.OpInstanceReplaceDisks.OP_ID])
14687 ht.TListOf(ht.TAnd(ht.TIsLength(3),
14688 ht.TItems([ht.TNonEmptyString,
14689 ht.TNonEmptyString,
14690 ht.TListOf(ht.TNonEmptyString),
14693 ht.TListOf(ht.TAnd(ht.TIsLength(2),
14694 ht.TItems([ht.TNonEmptyString,
14697 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14698 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14701 constants.IALLOCATOR_MODE_ALLOC:
14704 ("name", ht.TString),
14705 ("memory", ht.TInt),
14706 ("disks", ht.TListOf(ht.TDict)),
14707 ("disk_template", ht.TString),
14708 ("os", ht.TString),
14709 ("tags", _STRING_LIST),
14710 ("nics", ht.TListOf(ht.TDict)),
14711 ("vcpus", ht.TInt),
14712 ("hypervisor", ht.TString),
14714 constants.IALLOCATOR_MODE_RELOC:
14715 (_AddRelocateInstance,
14716 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14718 constants.IALLOCATOR_MODE_NODE_EVAC:
14719 (_AddNodeEvacuate, [
14720 ("instances", _STRING_LIST),
14721 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14723 constants.IALLOCATOR_MODE_CHG_GROUP:
14724 (_AddChangeGroup, [
14725 ("instances", _STRING_LIST),
14726 ("target_groups", _STRING_LIST),
14730 def Run(self, name, validate=True, call_fn=None):
14731 """Run an instance allocator and return the results.
14734 if call_fn is None:
14735 call_fn = self.rpc.call_iallocator_runner
14737 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14738 result.Raise("Failure while running the iallocator script")
14740 self.out_text = result.payload
14742 self._ValidateResult()
14744 def _ValidateResult(self):
14745 """Process the allocator results.
14747 This will process and if successful save the result in
14748 self.out_data and the other parameters.
14752 rdict = serializer.Load(self.out_text)
14753 except Exception, err:
14754 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14756 if not isinstance(rdict, dict):
14757 raise errors.OpExecError("Can't parse iallocator results: not a dict")
14759 # TODO: remove backwards compatiblity in later versions
14760 if "nodes" in rdict and "result" not in rdict:
14761 rdict["result"] = rdict["nodes"]
14764 for key in "success", "info", "result":
14765 if key not in rdict:
14766 raise errors.OpExecError("Can't parse iallocator results:"
14767 " missing key '%s'" % key)
14768 setattr(self, key, rdict[key])
14770 if not self._result_check(self.result):
14771 raise errors.OpExecError("Iallocator returned invalid result,"
14772 " expected %s, got %s" %
14773 (self._result_check, self.result),
14774 errors.ECODE_INVAL)
14776 if self.mode == constants.IALLOCATOR_MODE_RELOC:
14777 assert self.relocate_from is not None
14778 assert self.required_nodes == 1
14780 node2group = dict((name, ndata["group"])
14781 for (name, ndata) in self.in_data["nodes"].items())
14783 fn = compat.partial(self._NodesToGroups, node2group,
14784 self.in_data["nodegroups"])
14786 instance = self.cfg.GetInstanceInfo(self.name)
14787 request_groups = fn(self.relocate_from + [instance.primary_node])
14788 result_groups = fn(rdict["result"] + [instance.primary_node])
14790 if self.success and not set(result_groups).issubset(request_groups):
14791 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14792 " differ from original groups (%s)" %
14793 (utils.CommaJoin(result_groups),
14794 utils.CommaJoin(request_groups)))
14796 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14797 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14799 self.out_data = rdict
14802 def _NodesToGroups(node2group, groups, nodes):
14803 """Returns a list of unique group names for a list of nodes.
14805 @type node2group: dict
14806 @param node2group: Map from node name to group UUID
14808 @param groups: Group information
14810 @param nodes: Node names
14817 group_uuid = node2group[node]
14819 # Ignore unknown node
14823 group = groups[group_uuid]
14825 # Can't find group, let's use UUID
14826 group_name = group_uuid
14828 group_name = group["name"]
14830 result.add(group_name)
14832 return sorted(result)
14835 class LUTestAllocator(NoHooksLU):
14836 """Run allocator tests.
14838 This LU runs the allocator tests
14841 def CheckPrereq(self):
14842 """Check prerequisites.
14844 This checks the opcode parameters depending on the director and mode test.
14847 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14848 for attr in ["memory", "disks", "disk_template",
14849 "os", "tags", "nics", "vcpus"]:
14850 if not hasattr(self.op, attr):
14851 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14852 attr, errors.ECODE_INVAL)
14853 iname = self.cfg.ExpandInstanceName(self.op.name)
14854 if iname is not None:
14855 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14856 iname, errors.ECODE_EXISTS)
14857 if not isinstance(self.op.nics, list):
14858 raise errors.OpPrereqError("Invalid parameter 'nics'",
14859 errors.ECODE_INVAL)
14860 if not isinstance(self.op.disks, list):
14861 raise errors.OpPrereqError("Invalid parameter 'disks'",
14862 errors.ECODE_INVAL)
14863 for row in self.op.disks:
14864 if (not isinstance(row, dict) or
14865 constants.IDISK_SIZE not in row or
14866 not isinstance(row[constants.IDISK_SIZE], int) or
14867 constants.IDISK_MODE not in row or
14868 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
14869 raise errors.OpPrereqError("Invalid contents of the 'disks'"
14870 " parameter", errors.ECODE_INVAL)
14871 if self.op.hypervisor is None:
14872 self.op.hypervisor = self.cfg.GetHypervisorType()
14873 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14874 fname = _ExpandInstanceName(self.cfg, self.op.name)
14875 self.op.name = fname
14876 self.relocate_from = \
14877 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
14878 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
14879 constants.IALLOCATOR_MODE_NODE_EVAC):
14880 if not self.op.instances:
14881 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
14882 self.op.instances = _GetWantedInstances(self, self.op.instances)
14884 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
14885 self.op.mode, errors.ECODE_INVAL)
14887 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
14888 if self.op.allocator is None:
14889 raise errors.OpPrereqError("Missing allocator name",
14890 errors.ECODE_INVAL)
14891 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
14892 raise errors.OpPrereqError("Wrong allocator test '%s'" %
14893 self.op.direction, errors.ECODE_INVAL)
14895 def Exec(self, feedback_fn):
14896 """Run the allocator test.
14899 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14900 ial = IAllocator(self.cfg, self.rpc,
14903 memory=self.op.memory,
14904 disks=self.op.disks,
14905 disk_template=self.op.disk_template,
14909 vcpus=self.op.vcpus,
14910 hypervisor=self.op.hypervisor,
14912 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14913 ial = IAllocator(self.cfg, self.rpc,
14916 relocate_from=list(self.relocate_from),
14918 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14919 ial = IAllocator(self.cfg, self.rpc,
14921 instances=self.op.instances,
14922 target_groups=self.op.target_groups)
14923 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14924 ial = IAllocator(self.cfg, self.rpc,
14926 instances=self.op.instances,
14927 evac_mode=self.op.evac_mode)
14929 raise errors.ProgrammerError("Uncatched mode %s in"
14930 " LUTestAllocator.Exec", self.op.mode)
14932 if self.op.direction == constants.IALLOCATOR_DIR_IN:
14933 result = ial.in_text
14935 ial.Run(self.op.allocator, validate=False)
14936 result = ial.out_text
14940 #: Query type implementations
14942 constants.QR_INSTANCE: _InstanceQuery,
14943 constants.QR_NODE: _NodeQuery,
14944 constants.QR_GROUP: _GroupQuery,
14945 constants.QR_OS: _OsQuery,
14948 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14951 def _GetQueryImplementation(name):
14952 """Returns the implemtnation for a query type.
14954 @param name: Query type, must be one of L{constants.QR_VIA_OP}
14958 return _QUERY_IMPL[name]
14960 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14961 errors.ECODE_INVAL)