4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
62 from ganeti import rpc
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_UP = [constants.ADMINST_UP]
72 INSTANCE_DOWN = [constants.ADMINST_DOWN]
73 INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
74 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
75 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
79 """Data container for LU results with jobs.
81 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
83 contained in the C{jobs} attribute and include the job IDs in the opcode
87 def __init__(self, jobs, **kwargs):
88 """Initializes this class.
90 Additional return values can be specified as keyword arguments.
92 @type jobs: list of lists of L{opcode.OpCode}
93 @param jobs: A list of lists of opcode objects
100 class LogicalUnit(object):
101 """Logical Unit base class.
103 Subclasses must follow these rules:
104 - implement ExpandNames
105 - implement CheckPrereq (except when tasklets are used)
106 - implement Exec (except when tasklets are used)
107 - implement BuildHooksEnv
108 - implement BuildHooksNodes
109 - redefine HPATH and HTYPE
110 - optionally redefine their run requirements:
111 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
113 Note that all commands require root permissions.
115 @ivar dry_run_result: the value (if any) that will be returned to the caller
116 in dry-run mode (signalled by opcode dry_run parameter)
123 def __init__(self, processor, op, context, rpc_runner):
124 """Constructor for LogicalUnit.
126 This needs to be overridden in derived classes in order to check op
130 self.proc = processor
132 self.cfg = context.cfg
133 self.glm = context.glm
135 self.owned_locks = context.glm.list_owned
136 self.context = context
137 self.rpc = rpc_runner
138 # Dicts used to declare locking needs to mcpu
139 self.needed_locks = None
140 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
142 self.remove_locks = {}
143 # Used to force good behavior when calling helper functions
144 self.recalculate_locks = {}
146 self.Log = processor.Log # pylint: disable=C0103
147 self.LogWarning = processor.LogWarning # pylint: disable=C0103
148 self.LogInfo = processor.LogInfo # pylint: disable=C0103
149 self.LogStep = processor.LogStep # pylint: disable=C0103
150 # support for dry-run
151 self.dry_run_result = None
152 # support for generic debug attribute
153 if (not hasattr(self.op, "debug_level") or
154 not isinstance(self.op.debug_level, int)):
155 self.op.debug_level = 0
160 # Validate opcode parameters and set defaults
161 self.op.Validate(True)
163 self.CheckArguments()
165 def CheckArguments(self):
166 """Check syntactic validity for the opcode arguments.
168 This method is for doing a simple syntactic check and ensure
169 validity of opcode parameters, without any cluster-related
170 checks. While the same can be accomplished in ExpandNames and/or
171 CheckPrereq, doing these separate is better because:
173 - ExpandNames is left as as purely a lock-related function
174 - CheckPrereq is run after we have acquired locks (and possible
177 The function is allowed to change the self.op attribute so that
178 later methods can no longer worry about missing parameters.
183 def ExpandNames(self):
184 """Expand names for this LU.
186 This method is called before starting to execute the opcode, and it should
187 update all the parameters of the opcode to their canonical form (e.g. a
188 short node name must be fully expanded after this method has successfully
189 completed). This way locking, hooks, logging, etc. can work correctly.
191 LUs which implement this method must also populate the self.needed_locks
192 member, as a dict with lock levels as keys, and a list of needed lock names
195 - use an empty dict if you don't need any lock
196 - if you don't need any lock at a particular level omit that level
197 - don't put anything for the BGL level
198 - if you want all locks at a level use locking.ALL_SET as a value
200 If you need to share locks (rather than acquire them exclusively) at one
201 level you can modify self.share_locks, setting a true value (usually 1) for
202 that level. By default locks are not shared.
204 This function can also define a list of tasklets, which then will be
205 executed in order instead of the usual LU-level CheckPrereq and Exec
206 functions, if those are not defined by the LU.
210 # Acquire all nodes and one instance
211 self.needed_locks = {
212 locking.LEVEL_NODE: locking.ALL_SET,
213 locking.LEVEL_INSTANCE: ['instance1.example.com'],
215 # Acquire just two nodes
216 self.needed_locks = {
217 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
220 self.needed_locks = {} # No, you can't leave it to the default value None
223 # The implementation of this method is mandatory only if the new LU is
224 # concurrent, so that old LUs don't need to be changed all at the same
227 self.needed_locks = {} # Exclusive LUs don't need locks.
229 raise NotImplementedError
231 def DeclareLocks(self, level):
232 """Declare LU locking needs for a level
234 While most LUs can just declare their locking needs at ExpandNames time,
235 sometimes there's the need to calculate some locks after having acquired
236 the ones before. This function is called just before acquiring locks at a
237 particular level, but after acquiring the ones at lower levels, and permits
238 such calculations. It can be used to modify self.needed_locks, and by
239 default it does nothing.
241 This function is only called if you have something already set in
242 self.needed_locks for the level.
244 @param level: Locking level which is going to be locked
245 @type level: member of ganeti.locking.LEVELS
249 def CheckPrereq(self):
250 """Check prerequisites for this LU.
252 This method should check that the prerequisites for the execution
253 of this LU are fulfilled. It can do internode communication, but
254 it should be idempotent - no cluster or system changes are
257 The method should raise errors.OpPrereqError in case something is
258 not fulfilled. Its return value is ignored.
260 This method should also update all the parameters of the opcode to
261 their canonical form if it hasn't been done by ExpandNames before.
264 if self.tasklets is not None:
265 for (idx, tl) in enumerate(self.tasklets):
266 logging.debug("Checking prerequisites for tasklet %s/%s",
267 idx + 1, len(self.tasklets))
272 def Exec(self, feedback_fn):
275 This method should implement the actual work. It should raise
276 errors.OpExecError for failures that are somewhat dealt with in
280 if self.tasklets is not None:
281 for (idx, tl) in enumerate(self.tasklets):
282 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
285 raise NotImplementedError
287 def BuildHooksEnv(self):
288 """Build hooks environment for this LU.
291 @return: Dictionary containing the environment that will be used for
292 running the hooks for this LU. The keys of the dict must not be prefixed
293 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
294 will extend the environment with additional variables. If no environment
295 should be defined, an empty dictionary should be returned (not C{None}).
296 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
300 raise NotImplementedError
302 def BuildHooksNodes(self):
303 """Build list of nodes to run LU's hooks.
305 @rtype: tuple; (list, list)
306 @return: Tuple containing a list of node names on which the hook
307 should run before the execution and a list of node names on which the
308 hook should run after the execution. No nodes should be returned as an
309 empty list (and not None).
310 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
314 raise NotImplementedError
316 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
317 """Notify the LU about the results of its hooks.
319 This method is called every time a hooks phase is executed, and notifies
320 the Logical Unit about the hooks' result. The LU can then use it to alter
321 its result based on the hooks. By default the method does nothing and the
322 previous result is passed back unchanged but any LU can define it if it
323 wants to use the local cluster hook-scripts somehow.
325 @param phase: one of L{constants.HOOKS_PHASE_POST} or
326 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
327 @param hook_results: the results of the multi-node hooks rpc call
328 @param feedback_fn: function used send feedback back to the caller
329 @param lu_result: the previous Exec result this LU had, or None
331 @return: the new Exec result, based on the previous result
335 # API must be kept, thus we ignore the unused argument and could
336 # be a function warnings
337 # pylint: disable=W0613,R0201
340 def _ExpandAndLockInstance(self):
341 """Helper function to expand and lock an instance.
343 Many LUs that work on an instance take its name in self.op.instance_name
344 and need to expand it and then declare the expanded name for locking. This
345 function does it, and then updates self.op.instance_name to the expanded
346 name. It also initializes needed_locks as a dict, if this hasn't been done
350 if self.needed_locks is None:
351 self.needed_locks = {}
353 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
354 "_ExpandAndLockInstance called with instance-level locks set"
355 self.op.instance_name = _ExpandInstanceName(self.cfg,
356 self.op.instance_name)
357 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
359 def _LockInstancesNodes(self, primary_only=False,
360 level=locking.LEVEL_NODE):
361 """Helper function to declare instances' nodes for locking.
363 This function should be called after locking one or more instances to lock
364 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
365 with all primary or secondary nodes for instances already locked and
366 present in self.needed_locks[locking.LEVEL_INSTANCE].
368 It should be called from DeclareLocks, and for safety only works if
369 self.recalculate_locks[locking.LEVEL_NODE] is set.
371 In the future it may grow parameters to just lock some instance's nodes, or
372 to just lock primaries or secondary nodes, if needed.
374 If should be called in DeclareLocks in a way similar to::
376 if level == locking.LEVEL_NODE:
377 self._LockInstancesNodes()
379 @type primary_only: boolean
380 @param primary_only: only lock primary nodes of locked instances
381 @param level: Which lock level to use for locking nodes
384 assert level in self.recalculate_locks, \
385 "_LockInstancesNodes helper function called with no nodes to recalculate"
387 # TODO: check if we're really been called with the instance locks held
389 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
390 # future we might want to have different behaviors depending on the value
391 # of self.recalculate_locks[locking.LEVEL_NODE]
393 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
394 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
395 wanted_nodes.append(instance.primary_node)
397 wanted_nodes.extend(instance.secondary_nodes)
399 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
400 self.needed_locks[level] = wanted_nodes
401 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
402 self.needed_locks[level].extend(wanted_nodes)
404 raise errors.ProgrammerError("Unknown recalculation mode")
406 del self.recalculate_locks[level]
409 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
410 """Simple LU which runs no hooks.
412 This LU is intended as a parent for other LogicalUnits which will
413 run no hooks, in order to reduce duplicate code.
419 def BuildHooksEnv(self):
420 """Empty BuildHooksEnv for NoHooksLu.
422 This just raises an error.
425 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
427 def BuildHooksNodes(self):
428 """Empty BuildHooksNodes for NoHooksLU.
431 raise AssertionError("BuildHooksNodes called for NoHooksLU")
435 """Tasklet base class.
437 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
438 they can mix legacy code with tasklets. Locking needs to be done in the LU,
439 tasklets know nothing about locks.
441 Subclasses must follow these rules:
442 - Implement CheckPrereq
446 def __init__(self, lu):
453 def CheckPrereq(self):
454 """Check prerequisites for this tasklets.
456 This method should check whether the prerequisites for the execution of
457 this tasklet are fulfilled. It can do internode communication, but it
458 should be idempotent - no cluster or system changes are allowed.
460 The method should raise errors.OpPrereqError in case something is not
461 fulfilled. Its return value is ignored.
463 This method should also update all parameters to their canonical form if it
464 hasn't been done before.
469 def Exec(self, feedback_fn):
470 """Execute the tasklet.
472 This method should implement the actual work. It should raise
473 errors.OpExecError for failures that are somewhat dealt with in code, or
477 raise NotImplementedError
481 """Base for query utility classes.
484 #: Attribute holding field definitions
487 def __init__(self, qfilter, fields, use_locking):
488 """Initializes this class.
491 self.use_locking = use_locking
493 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
495 self.requested_data = self.query.RequestedData()
496 self.names = self.query.RequestedNames()
498 # Sort only if no names were requested
499 self.sort_by_name = not self.names
501 self.do_locking = None
504 def _GetNames(self, lu, all_names, lock_level):
505 """Helper function to determine names asked for in the query.
509 names = lu.owned_locks(lock_level)
513 if self.wanted == locking.ALL_SET:
514 assert not self.names
515 # caller didn't specify names, so ordering is not important
516 return utils.NiceSort(names)
518 # caller specified names and we must keep the same order
520 assert not self.do_locking or lu.glm.is_owned(lock_level)
522 missing = set(self.wanted).difference(names)
524 raise errors.OpExecError("Some items were removed before retrieving"
525 " their data: %s" % missing)
527 # Return expanded names
530 def ExpandNames(self, lu):
531 """Expand names for this query.
533 See L{LogicalUnit.ExpandNames}.
536 raise NotImplementedError()
538 def DeclareLocks(self, lu, level):
539 """Declare locks for this query.
541 See L{LogicalUnit.DeclareLocks}.
544 raise NotImplementedError()
546 def _GetQueryData(self, lu):
547 """Collects all data for this query.
549 @return: Query data object
552 raise NotImplementedError()
554 def NewStyleQuery(self, lu):
555 """Collect data and execute query.
558 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
559 sort_by_name=self.sort_by_name)
561 def OldStyleQuery(self, lu):
562 """Collect data and execute query.
565 return self.query.OldStyleQuery(self._GetQueryData(lu),
566 sort_by_name=self.sort_by_name)
570 """Returns a dict declaring all lock levels shared.
573 return dict.fromkeys(locking.LEVELS, 1)
576 def _MakeLegacyNodeInfo(data):
577 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
579 Converts the data into a single dictionary. This is fine for most use cases,
580 but some require information from more than one volume group or hypervisor.
583 (bootid, (vg_info, ), (hv_info, )) = data
585 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
590 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
591 """Checks if the owned node groups are still correct for an instance.
593 @type cfg: L{config.ConfigWriter}
594 @param cfg: The cluster configuration
595 @type instance_name: string
596 @param instance_name: Instance name
597 @type owned_groups: set or frozenset
598 @param owned_groups: List of currently owned node groups
601 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
603 if not owned_groups.issuperset(inst_groups):
604 raise errors.OpPrereqError("Instance %s's node groups changed since"
605 " locks were acquired, current groups are"
606 " are '%s', owning groups '%s'; retry the"
609 utils.CommaJoin(inst_groups),
610 utils.CommaJoin(owned_groups)),
616 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
617 """Checks if the instances in a node group are still correct.
619 @type cfg: L{config.ConfigWriter}
620 @param cfg: The cluster configuration
621 @type group_uuid: string
622 @param group_uuid: Node group UUID
623 @type owned_instances: set or frozenset
624 @param owned_instances: List of currently owned instances
627 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
628 if owned_instances != wanted_instances:
629 raise errors.OpPrereqError("Instances in node group '%s' changed since"
630 " locks were acquired, wanted '%s', have '%s';"
631 " retry the operation" %
633 utils.CommaJoin(wanted_instances),
634 utils.CommaJoin(owned_instances)),
637 return wanted_instances
640 def _SupportsOob(cfg, node):
641 """Tells if node supports OOB.
643 @type cfg: L{config.ConfigWriter}
644 @param cfg: The cluster configuration
645 @type node: L{objects.Node}
646 @param node: The node
647 @return: The OOB script if supported or an empty string otherwise
650 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
653 def _GetWantedNodes(lu, nodes):
654 """Returns list of checked and expanded node names.
656 @type lu: L{LogicalUnit}
657 @param lu: the logical unit on whose behalf we execute
659 @param nodes: list of node names or None for all nodes
661 @return: the list of nodes, sorted
662 @raise errors.ProgrammerError: if the nodes parameter is wrong type
666 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
668 return utils.NiceSort(lu.cfg.GetNodeList())
671 def _GetWantedInstances(lu, instances):
672 """Returns list of checked and expanded instance names.
674 @type lu: L{LogicalUnit}
675 @param lu: the logical unit on whose behalf we execute
676 @type instances: list
677 @param instances: list of instance names or None for all instances
679 @return: the list of instances, sorted
680 @raise errors.OpPrereqError: if the instances parameter is wrong type
681 @raise errors.OpPrereqError: if any of the passed instances is not found
685 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
687 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
691 def _GetUpdatedParams(old_params, update_dict,
692 use_default=True, use_none=False):
693 """Return the new version of a parameter dictionary.
695 @type old_params: dict
696 @param old_params: old parameters
697 @type update_dict: dict
698 @param update_dict: dict containing new parameter values, or
699 constants.VALUE_DEFAULT to reset the parameter to its default
701 @param use_default: boolean
702 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
703 values as 'to be deleted' values
704 @param use_none: boolean
705 @type use_none: whether to recognise C{None} values as 'to be
708 @return: the new parameter dictionary
711 params_copy = copy.deepcopy(old_params)
712 for key, val in update_dict.iteritems():
713 if ((use_default and val == constants.VALUE_DEFAULT) or
714 (use_none and val is None)):
720 params_copy[key] = val
724 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
725 """Return the new version of a instance policy.
727 @param group_policy: whether this policy applies to a group and thus
728 we should support removal of policy entries
731 use_none = use_default = group_policy
732 ipolicy = copy.deepcopy(old_ipolicy)
733 for key, value in new_ipolicy.items():
734 if key not in constants.IPOLICY_ALL_KEYS:
735 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
737 if key in constants.IPOLICY_ISPECS:
738 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
739 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
741 use_default=use_default)
743 if not value or value == [constants.VALUE_DEFAULT]:
747 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
748 " on the cluster'" % key,
751 if key in constants.IPOLICY_PARAMETERS:
752 # FIXME: we assume all such values are float
754 ipolicy[key] = float(value)
755 except (TypeError, ValueError), err:
756 raise errors.OpPrereqError("Invalid value for attribute"
757 " '%s': '%s', error: %s" %
758 (key, value, err), errors.ECODE_INVAL)
760 # FIXME: we assume all others are lists; this should be redone
762 ipolicy[key] = list(value)
764 objects.InstancePolicy.CheckParameterSyntax(ipolicy)
765 except errors.ConfigurationError, err:
766 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
771 def _UpdateAndVerifySubDict(base, updates, type_check):
772 """Updates and verifies a dict with sub dicts of the same type.
774 @param base: The dict with the old data
775 @param updates: The dict with the new data
776 @param type_check: Dict suitable to ForceDictType to verify correct types
777 @returns: A new dict with updated and verified values
781 new = _GetUpdatedParams(old, value)
782 utils.ForceDictType(new, type_check)
785 ret = copy.deepcopy(base)
786 ret.update(dict((key, fn(base.get(key, {}), value))
787 for key, value in updates.items()))
791 def _MergeAndVerifyHvState(op_input, obj_input):
792 """Combines the hv state from an opcode with the one of the object
794 @param op_input: The input dict from the opcode
795 @param obj_input: The input dict from the objects
796 @return: The verified and updated dict
800 invalid_hvs = set(op_input) - constants.HYPER_TYPES
802 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
803 " %s" % utils.CommaJoin(invalid_hvs),
805 if obj_input is None:
807 type_check = constants.HVSTS_PARAMETER_TYPES
808 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
813 def _MergeAndVerifyDiskState(op_input, obj_input):
814 """Combines the disk state from an opcode with the one of the object
816 @param op_input: The input dict from the opcode
817 @param obj_input: The input dict from the objects
818 @return: The verified and updated dict
821 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
823 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
824 utils.CommaJoin(invalid_dst),
826 type_check = constants.DSS_PARAMETER_TYPES
827 if obj_input is None:
829 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
831 for key, value in op_input.items())
836 def _ReleaseLocks(lu, level, names=None, keep=None):
837 """Releases locks owned by an LU.
839 @type lu: L{LogicalUnit}
840 @param level: Lock level
841 @type names: list or None
842 @param names: Names of locks to release
843 @type keep: list or None
844 @param keep: Names of locks to retain
847 assert not (keep is not None and names is not None), \
848 "Only one of the 'names' and the 'keep' parameters can be given"
850 if names is not None:
851 should_release = names.__contains__
853 should_release = lambda name: name not in keep
855 should_release = None
857 owned = lu.owned_locks(level)
859 # Not owning any lock at this level, do nothing
866 # Determine which locks to release
868 if should_release(name):
873 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
875 # Release just some locks
876 lu.glm.release(level, names=release)
878 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
881 lu.glm.release(level)
883 assert not lu.glm.is_owned(level), "No locks should be owned"
886 def _MapInstanceDisksToNodes(instances):
887 """Creates a map from (node, volume) to instance name.
889 @type instances: list of L{objects.Instance}
890 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
893 return dict(((node, vol), inst.name)
894 for inst in instances
895 for (node, vols) in inst.MapLVsByNode().items()
899 def _RunPostHook(lu, node_name):
900 """Runs the post-hook for an opcode on a single node.
903 hm = lu.proc.BuildHooksManager(lu)
905 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
907 # pylint: disable=W0702
908 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
911 def _CheckOutputFields(static, dynamic, selected):
912 """Checks whether all selected fields are valid.
914 @type static: L{utils.FieldSet}
915 @param static: static fields set
916 @type dynamic: L{utils.FieldSet}
917 @param dynamic: dynamic fields set
924 delta = f.NonMatching(selected)
926 raise errors.OpPrereqError("Unknown output fields selected: %s"
927 % ",".join(delta), errors.ECODE_INVAL)
930 def _CheckGlobalHvParams(params):
931 """Validates that given hypervisor params are not global ones.
933 This will ensure that instances don't get customised versions of
937 used_globals = constants.HVC_GLOBALS.intersection(params)
939 msg = ("The following hypervisor parameters are global and cannot"
940 " be customized at instance level, please modify them at"
941 " cluster level: %s" % utils.CommaJoin(used_globals))
942 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
945 def _CheckNodeOnline(lu, node, msg=None):
946 """Ensure that a given node is online.
948 @param lu: the LU on behalf of which we make the check
949 @param node: the node to check
950 @param msg: if passed, should be a message to replace the default one
951 @raise errors.OpPrereqError: if the node is offline
955 msg = "Can't use offline node"
956 if lu.cfg.GetNodeInfo(node).offline:
957 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
960 def _CheckNodeNotDrained(lu, node):
961 """Ensure that a given node is not drained.
963 @param lu: the LU on behalf of which we make the check
964 @param node: the node to check
965 @raise errors.OpPrereqError: if the node is drained
968 if lu.cfg.GetNodeInfo(node).drained:
969 raise errors.OpPrereqError("Can't use drained node %s" % node,
973 def _CheckNodeVmCapable(lu, node):
974 """Ensure that a given node is vm capable.
976 @param lu: the LU on behalf of which we make the check
977 @param node: the node to check
978 @raise errors.OpPrereqError: if the node is not vm capable
981 if not lu.cfg.GetNodeInfo(node).vm_capable:
982 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
986 def _CheckNodeHasOS(lu, node, os_name, force_variant):
987 """Ensure that a node supports a given OS.
989 @param lu: the LU on behalf of which we make the check
990 @param node: the node to check
991 @param os_name: the OS to query about
992 @param force_variant: whether to ignore variant errors
993 @raise errors.OpPrereqError: if the node is not supporting the OS
996 result = lu.rpc.call_os_get(node, os_name)
997 result.Raise("OS '%s' not in supported OS list for node %s" %
999 prereq=True, ecode=errors.ECODE_INVAL)
1000 if not force_variant:
1001 _CheckOSVariant(result.payload, os_name)
1004 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1005 """Ensure that a node has the given secondary ip.
1007 @type lu: L{LogicalUnit}
1008 @param lu: the LU on behalf of which we make the check
1010 @param node: the node to check
1011 @type secondary_ip: string
1012 @param secondary_ip: the ip to check
1013 @type prereq: boolean
1014 @param prereq: whether to throw a prerequisite or an execute error
1015 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1016 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1019 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1020 result.Raise("Failure checking secondary ip on node %s" % node,
1021 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1022 if not result.payload:
1023 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1024 " please fix and re-run this command" % secondary_ip)
1026 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1028 raise errors.OpExecError(msg)
1031 def _GetClusterDomainSecret():
1032 """Reads the cluster domain secret.
1035 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1039 def _CheckInstanceState(lu, instance, req_states, msg=None):
1040 """Ensure that an instance is in one of the required states.
1042 @param lu: the LU on behalf of which we make the check
1043 @param instance: the instance to check
1044 @param msg: if passed, should be a message to replace the default one
1045 @raise errors.OpPrereqError: if the instance is not in the required state
1049 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1050 if instance.admin_state not in req_states:
1051 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1052 (instance.name, instance.admin_state, msg),
1055 if constants.ADMINST_UP not in req_states:
1056 pnode = instance.primary_node
1057 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1058 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1059 prereq=True, ecode=errors.ECODE_ENVIRON)
1061 if instance.name in ins_l.payload:
1062 raise errors.OpPrereqError("Instance %s is running, %s" %
1063 (instance.name, msg), errors.ECODE_STATE)
1066 def _ComputeMinMaxSpec(name, ipolicy, value):
1067 """Computes if value is in the desired range.
1069 @param name: name of the parameter for which we perform the check
1070 @param ipolicy: dictionary containing min, max and std values
1071 @param value: actual value that we want to use
1072 @return: None or element not meeting the criteria
1076 if value in [None, constants.VALUE_AUTO]:
1078 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1079 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1080 if value > max_v or min_v > value:
1081 return ("%s value %s is not in range [%s, %s]" %
1082 (name, value, min_v, max_v))
1086 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1087 nic_count, disk_sizes,
1088 _compute_fn=_ComputeMinMaxSpec):
1089 """Verifies ipolicy against provided specs.
1092 @param ipolicy: The ipolicy
1094 @param mem_size: The memory size
1095 @type cpu_count: int
1096 @param cpu_count: Used cpu cores
1097 @type disk_count: int
1098 @param disk_count: Number of disks used
1099 @type nic_count: int
1100 @param nic_count: Number of nics used
1101 @type disk_sizes: list of ints
1102 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1103 @param _compute_fn: The compute function (unittest only)
1104 @return: A list of violations, or an empty list of no violations are found
1107 assert disk_count == len(disk_sizes)
1110 (constants.ISPEC_MEM_SIZE, mem_size),
1111 (constants.ISPEC_CPU_COUNT, cpu_count),
1112 (constants.ISPEC_DISK_COUNT, disk_count),
1113 (constants.ISPEC_NIC_COUNT, nic_count),
1114 ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
1117 (_compute_fn(name, ipolicy, value)
1118 for (name, value) in test_settings))
1121 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1122 _compute_fn=_ComputeIPolicySpecViolation):
1123 """Compute if instance meets the specs of ipolicy.
1126 @param ipolicy: The ipolicy to verify against
1127 @type instance: L{objects.Instance}
1128 @param instance: The instance to verify
1129 @param _compute_fn: The function to verify ipolicy (unittest only)
1130 @see: L{_ComputeIPolicySpecViolation}
1133 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1134 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1135 disk_count = len(instance.disks)
1136 disk_sizes = [disk.size for disk in instance.disks]
1137 nic_count = len(instance.nics)
1139 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1143 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1144 _compute_fn=_ComputeIPolicySpecViolation):
1145 """Compute if instance specs meets the specs of ipolicy.
1148 @param ipolicy: The ipolicy to verify against
1149 @param instance_spec: dict
1150 @param instance_spec: The instance spec to verify
1151 @param _compute_fn: The function to verify ipolicy (unittest only)
1152 @see: L{_ComputeIPolicySpecViolation}
1155 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1156 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1157 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1158 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1159 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1161 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1165 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1167 _compute_fn=_ComputeIPolicyInstanceViolation):
1168 """Compute if instance meets the specs of the new target group.
1170 @param ipolicy: The ipolicy to verify
1171 @param instance: The instance object to verify
1172 @param current_group: The current group of the instance
1173 @param target_group: The new group of the instance
1174 @param _compute_fn: The function to verify ipolicy (unittest only)
1175 @see: L{_ComputeIPolicySpecViolation}
1178 if current_group == target_group:
1181 return _compute_fn(ipolicy, instance)
1184 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1185 _compute_fn=_ComputeIPolicyNodeViolation):
1186 """Checks that the target node is correct in terms of instance policy.
1188 @param ipolicy: The ipolicy to verify
1189 @param instance: The instance object to verify
1190 @param node: The new node to relocate
1191 @param ignore: Ignore violations of the ipolicy
1192 @param _compute_fn: The function to verify ipolicy (unittest only)
1193 @see: L{_ComputeIPolicySpecViolation}
1196 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1197 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1200 msg = ("Instance does not meet target node group's (%s) instance"
1201 " policy: %s") % (node.group, utils.CommaJoin(res))
1205 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1208 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1209 """Computes a set of any instances that would violate the new ipolicy.
1211 @param old_ipolicy: The current (still in-place) ipolicy
1212 @param new_ipolicy: The new (to become) ipolicy
1213 @param instances: List of instances to verify
1214 @return: A list of instances which violates the new ipolicy but did not before
1217 return (_ComputeViolatingInstances(old_ipolicy, instances) -
1218 _ComputeViolatingInstances(new_ipolicy, instances))
1221 def _ExpandItemName(fn, name, kind):
1222 """Expand an item name.
1224 @param fn: the function to use for expansion
1225 @param name: requested item name
1226 @param kind: text description ('Node' or 'Instance')
1227 @return: the resolved (full) name
1228 @raise errors.OpPrereqError: if the item is not found
1231 full_name = fn(name)
1232 if full_name is None:
1233 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1238 def _ExpandNodeName(cfg, name):
1239 """Wrapper over L{_ExpandItemName} for nodes."""
1240 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1243 def _ExpandInstanceName(cfg, name):
1244 """Wrapper over L{_ExpandItemName} for instance."""
1245 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1248 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1249 minmem, maxmem, vcpus, nics, disk_template, disks,
1250 bep, hvp, hypervisor_name, tags):
1251 """Builds instance related env variables for hooks
1253 This builds the hook environment from individual variables.
1256 @param name: the name of the instance
1257 @type primary_node: string
1258 @param primary_node: the name of the instance's primary node
1259 @type secondary_nodes: list
1260 @param secondary_nodes: list of secondary nodes as strings
1261 @type os_type: string
1262 @param os_type: the name of the instance's OS
1263 @type status: string
1264 @param status: the desired status of the instance
1265 @type minmem: string
1266 @param minmem: the minimum memory size of the instance
1267 @type maxmem: string
1268 @param maxmem: the maximum memory size of the instance
1270 @param vcpus: the count of VCPUs the instance has
1272 @param nics: list of tuples (ip, mac, mode, link) representing
1273 the NICs the instance has
1274 @type disk_template: string
1275 @param disk_template: the disk template of the instance
1277 @param disks: the list of (size, mode) pairs
1279 @param bep: the backend parameters for the instance
1281 @param hvp: the hypervisor parameters for the instance
1282 @type hypervisor_name: string
1283 @param hypervisor_name: the hypervisor for the instance
1285 @param tags: list of instance tags as strings
1287 @return: the hook environment for this instance
1292 "INSTANCE_NAME": name,
1293 "INSTANCE_PRIMARY": primary_node,
1294 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1295 "INSTANCE_OS_TYPE": os_type,
1296 "INSTANCE_STATUS": status,
1297 "INSTANCE_MINMEM": minmem,
1298 "INSTANCE_MAXMEM": maxmem,
1299 # TODO(2.7) remove deprecated "memory" value
1300 "INSTANCE_MEMORY": maxmem,
1301 "INSTANCE_VCPUS": vcpus,
1302 "INSTANCE_DISK_TEMPLATE": disk_template,
1303 "INSTANCE_HYPERVISOR": hypervisor_name,
1306 nic_count = len(nics)
1307 for idx, (ip, mac, mode, link) in enumerate(nics):
1310 env["INSTANCE_NIC%d_IP" % idx] = ip
1311 env["INSTANCE_NIC%d_MAC" % idx] = mac
1312 env["INSTANCE_NIC%d_MODE" % idx] = mode
1313 env["INSTANCE_NIC%d_LINK" % idx] = link
1314 if mode == constants.NIC_MODE_BRIDGED:
1315 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1319 env["INSTANCE_NIC_COUNT"] = nic_count
1322 disk_count = len(disks)
1323 for idx, (size, mode) in enumerate(disks):
1324 env["INSTANCE_DISK%d_SIZE" % idx] = size
1325 env["INSTANCE_DISK%d_MODE" % idx] = mode
1329 env["INSTANCE_DISK_COUNT"] = disk_count
1334 env["INSTANCE_TAGS"] = " ".join(tags)
1336 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1337 for key, value in source.items():
1338 env["INSTANCE_%s_%s" % (kind, key)] = value
1343 def _NICListToTuple(lu, nics):
1344 """Build a list of nic information tuples.
1346 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1347 value in LUInstanceQueryData.
1349 @type lu: L{LogicalUnit}
1350 @param lu: the logical unit on whose behalf we execute
1351 @type nics: list of L{objects.NIC}
1352 @param nics: list of nics to convert to hooks tuples
1356 cluster = lu.cfg.GetClusterInfo()
1360 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1361 mode = filled_params[constants.NIC_MODE]
1362 link = filled_params[constants.NIC_LINK]
1363 hooks_nics.append((ip, mac, mode, link))
1367 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1368 """Builds instance related env variables for hooks from an object.
1370 @type lu: L{LogicalUnit}
1371 @param lu: the logical unit on whose behalf we execute
1372 @type instance: L{objects.Instance}
1373 @param instance: the instance for which we should build the
1375 @type override: dict
1376 @param override: dictionary with key/values that will override
1379 @return: the hook environment dictionary
1382 cluster = lu.cfg.GetClusterInfo()
1383 bep = cluster.FillBE(instance)
1384 hvp = cluster.FillHV(instance)
1386 "name": instance.name,
1387 "primary_node": instance.primary_node,
1388 "secondary_nodes": instance.secondary_nodes,
1389 "os_type": instance.os,
1390 "status": instance.admin_state,
1391 "maxmem": bep[constants.BE_MAXMEM],
1392 "minmem": bep[constants.BE_MINMEM],
1393 "vcpus": bep[constants.BE_VCPUS],
1394 "nics": _NICListToTuple(lu, instance.nics),
1395 "disk_template": instance.disk_template,
1396 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1399 "hypervisor_name": instance.hypervisor,
1400 "tags": instance.tags,
1403 args.update(override)
1404 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1407 def _AdjustCandidatePool(lu, exceptions):
1408 """Adjust the candidate pool after node operations.
1411 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1413 lu.LogInfo("Promoted nodes to master candidate role: %s",
1414 utils.CommaJoin(node.name for node in mod_list))
1415 for name in mod_list:
1416 lu.context.ReaddNode(name)
1417 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1419 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1423 def _DecideSelfPromotion(lu, exceptions=None):
1424 """Decide whether I should promote myself as a master candidate.
1427 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1428 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1429 # the new node will increase mc_max with one, so:
1430 mc_should = min(mc_should + 1, cp_size)
1431 return mc_now < mc_should
1434 def _CalculateGroupIPolicy(cluster, group):
1435 """Calculate instance policy for group.
1438 return cluster.SimpleFillIPolicy(group.ipolicy)
1441 def _ComputeViolatingInstances(ipolicy, instances):
1442 """Computes a set of instances who violates given ipolicy.
1444 @param ipolicy: The ipolicy to verify
1445 @type instances: object.Instance
1446 @param instances: List of instances to verify
1447 @return: A frozenset of instance names violating the ipolicy
1450 return frozenset([inst.name for inst in instances
1451 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1454 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1455 """Check that the brigdes needed by a list of nics exist.
1458 cluster = lu.cfg.GetClusterInfo()
1459 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1460 brlist = [params[constants.NIC_LINK] for params in paramslist
1461 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1463 result = lu.rpc.call_bridges_exist(target_node, brlist)
1464 result.Raise("Error checking bridges on destination node '%s'" %
1465 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1468 def _CheckInstanceBridgesExist(lu, instance, node=None):
1469 """Check that the brigdes needed by an instance exist.
1473 node = instance.primary_node
1474 _CheckNicsBridgesExist(lu, instance.nics, node)
1477 def _CheckOSVariant(os_obj, name):
1478 """Check whether an OS name conforms to the os variants specification.
1480 @type os_obj: L{objects.OS}
1481 @param os_obj: OS object to check
1483 @param name: OS name passed by the user, to check for validity
1486 variant = objects.OS.GetVariant(name)
1487 if not os_obj.supported_variants:
1489 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1490 " passed)" % (os_obj.name, variant),
1494 raise errors.OpPrereqError("OS name must include a variant",
1497 if variant not in os_obj.supported_variants:
1498 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1501 def _GetNodeInstancesInner(cfg, fn):
1502 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1505 def _GetNodeInstances(cfg, node_name):
1506 """Returns a list of all primary and secondary instances on a node.
1510 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1513 def _GetNodePrimaryInstances(cfg, node_name):
1514 """Returns primary instances on a node.
1517 return _GetNodeInstancesInner(cfg,
1518 lambda inst: node_name == inst.primary_node)
1521 def _GetNodeSecondaryInstances(cfg, node_name):
1522 """Returns secondary instances on a node.
1525 return _GetNodeInstancesInner(cfg,
1526 lambda inst: node_name in inst.secondary_nodes)
1529 def _GetStorageTypeArgs(cfg, storage_type):
1530 """Returns the arguments for a storage type.
1533 # Special case for file storage
1534 if storage_type == constants.ST_FILE:
1535 # storage.FileStorage wants a list of storage directories
1536 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1541 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1544 for dev in instance.disks:
1545 cfg.SetDiskID(dev, node_name)
1547 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1548 result.Raise("Failed to get disk status from node %s" % node_name,
1549 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1551 for idx, bdev_status in enumerate(result.payload):
1552 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1558 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1559 """Check the sanity of iallocator and node arguments and use the
1560 cluster-wide iallocator if appropriate.
1562 Check that at most one of (iallocator, node) is specified. If none is
1563 specified, then the LU's opcode's iallocator slot is filled with the
1564 cluster-wide default iallocator.
1566 @type iallocator_slot: string
1567 @param iallocator_slot: the name of the opcode iallocator slot
1568 @type node_slot: string
1569 @param node_slot: the name of the opcode target node slot
1572 node = getattr(lu.op, node_slot, None)
1573 iallocator = getattr(lu.op, iallocator_slot, None)
1575 if node is not None and iallocator is not None:
1576 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1578 elif node is None and iallocator is None:
1579 default_iallocator = lu.cfg.GetDefaultIAllocator()
1580 if default_iallocator:
1581 setattr(lu.op, iallocator_slot, default_iallocator)
1583 raise errors.OpPrereqError("No iallocator or node given and no"
1584 " cluster-wide default iallocator found;"
1585 " please specify either an iallocator or a"
1586 " node, or set a cluster-wide default"
1590 def _GetDefaultIAllocator(cfg, iallocator):
1591 """Decides on which iallocator to use.
1593 @type cfg: L{config.ConfigWriter}
1594 @param cfg: Cluster configuration object
1595 @type iallocator: string or None
1596 @param iallocator: Iallocator specified in opcode
1598 @return: Iallocator name
1602 # Use default iallocator
1603 iallocator = cfg.GetDefaultIAllocator()
1606 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1607 " opcode nor as a cluster-wide default",
1613 class LUClusterPostInit(LogicalUnit):
1614 """Logical unit for running hooks after cluster initialization.
1617 HPATH = "cluster-init"
1618 HTYPE = constants.HTYPE_CLUSTER
1620 def BuildHooksEnv(self):
1625 "OP_TARGET": self.cfg.GetClusterName(),
1628 def BuildHooksNodes(self):
1629 """Build hooks nodes.
1632 return ([], [self.cfg.GetMasterNode()])
1634 def Exec(self, feedback_fn):
1641 class LUClusterDestroy(LogicalUnit):
1642 """Logical unit for destroying the cluster.
1645 HPATH = "cluster-destroy"
1646 HTYPE = constants.HTYPE_CLUSTER
1648 def BuildHooksEnv(self):
1653 "OP_TARGET": self.cfg.GetClusterName(),
1656 def BuildHooksNodes(self):
1657 """Build hooks nodes.
1662 def CheckPrereq(self):
1663 """Check prerequisites.
1665 This checks whether the cluster is empty.
1667 Any errors are signaled by raising errors.OpPrereqError.
1670 master = self.cfg.GetMasterNode()
1672 nodelist = self.cfg.GetNodeList()
1673 if len(nodelist) != 1 or nodelist[0] != master:
1674 raise errors.OpPrereqError("There are still %d node(s) in"
1675 " this cluster." % (len(nodelist) - 1),
1677 instancelist = self.cfg.GetInstanceList()
1679 raise errors.OpPrereqError("There are still %d instance(s) in"
1680 " this cluster." % len(instancelist),
1683 def Exec(self, feedback_fn):
1684 """Destroys the cluster.
1687 master_params = self.cfg.GetMasterNetworkParameters()
1689 # Run post hooks on master node before it's removed
1690 _RunPostHook(self, master_params.name)
1692 ems = self.cfg.GetUseExternalMipScript()
1693 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1696 self.LogWarning("Error disabling the master IP address: %s",
1699 return master_params.name
1702 def _VerifyCertificate(filename):
1703 """Verifies a certificate for L{LUClusterVerifyConfig}.
1705 @type filename: string
1706 @param filename: Path to PEM file
1710 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1711 utils.ReadFile(filename))
1712 except Exception, err: # pylint: disable=W0703
1713 return (LUClusterVerifyConfig.ETYPE_ERROR,
1714 "Failed to load X509 certificate %s: %s" % (filename, err))
1717 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1718 constants.SSL_CERT_EXPIRATION_ERROR)
1721 fnamemsg = "While verifying %s: %s" % (filename, msg)
1726 return (None, fnamemsg)
1727 elif errcode == utils.CERT_WARNING:
1728 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1729 elif errcode == utils.CERT_ERROR:
1730 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1732 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1735 def _GetAllHypervisorParameters(cluster, instances):
1736 """Compute the set of all hypervisor parameters.
1738 @type cluster: L{objects.Cluster}
1739 @param cluster: the cluster object
1740 @param instances: list of L{objects.Instance}
1741 @param instances: additional instances from which to obtain parameters
1742 @rtype: list of (origin, hypervisor, parameters)
1743 @return: a list with all parameters found, indicating the hypervisor they
1744 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1749 for hv_name in cluster.enabled_hypervisors:
1750 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1752 for os_name, os_hvp in cluster.os_hvp.items():
1753 for hv_name, hv_params in os_hvp.items():
1755 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1756 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1758 # TODO: collapse identical parameter values in a single one
1759 for instance in instances:
1760 if instance.hvparams:
1761 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1762 cluster.FillHV(instance)))
1767 class _VerifyErrors(object):
1768 """Mix-in for cluster/group verify LUs.
1770 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1771 self.op and self._feedback_fn to be available.)
1775 ETYPE_FIELD = "code"
1776 ETYPE_ERROR = "ERROR"
1777 ETYPE_WARNING = "WARNING"
1779 def _Error(self, ecode, item, msg, *args, **kwargs):
1780 """Format an error message.
1782 Based on the opcode's error_codes parameter, either format a
1783 parseable error code, or a simpler error string.
1785 This must be called only from Exec and functions called from Exec.
1788 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1789 itype, etxt, _ = ecode
1790 # first complete the msg
1793 # then format the whole message
1794 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1795 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1801 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1802 # and finally report it via the feedback_fn
1803 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1805 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1806 """Log an error message if the passed condition is True.
1810 or self.op.debug_simulate_errors) # pylint: disable=E1101
1812 # If the error code is in the list of ignored errors, demote the error to a
1814 (_, etxt, _) = ecode
1815 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1816 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1819 self._Error(ecode, *args, **kwargs)
1821 # do not mark the operation as failed for WARN cases only
1822 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1823 self.bad = self.bad or cond
1826 class LUClusterVerify(NoHooksLU):
1827 """Submits all jobs necessary to verify the cluster.
1832 def ExpandNames(self):
1833 self.needed_locks = {}
1835 def Exec(self, feedback_fn):
1838 if self.op.group_name:
1839 groups = [self.op.group_name]
1840 depends_fn = lambda: None
1842 groups = self.cfg.GetNodeGroupList()
1844 # Verify global configuration
1846 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1849 # Always depend on global verification
1850 depends_fn = lambda: [(-len(jobs), [])]
1852 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1853 ignore_errors=self.op.ignore_errors,
1854 depends=depends_fn())]
1855 for group in groups)
1857 # Fix up all parameters
1858 for op in itertools.chain(*jobs): # pylint: disable=W0142
1859 op.debug_simulate_errors = self.op.debug_simulate_errors
1860 op.verbose = self.op.verbose
1861 op.error_codes = self.op.error_codes
1863 op.skip_checks = self.op.skip_checks
1864 except AttributeError:
1865 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1867 return ResultWithJobs(jobs)
1870 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1871 """Verifies the cluster config.
1876 def _VerifyHVP(self, hvp_data):
1877 """Verifies locally the syntax of the hypervisor parameters.
1880 for item, hv_name, hv_params in hvp_data:
1881 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1884 hv_class = hypervisor.GetHypervisor(hv_name)
1885 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1886 hv_class.CheckParameterSyntax(hv_params)
1887 except errors.GenericError, err:
1888 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1890 def ExpandNames(self):
1891 # Information can be safely retrieved as the BGL is acquired in exclusive
1893 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1894 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1895 self.all_node_info = self.cfg.GetAllNodesInfo()
1896 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1897 self.needed_locks = {}
1899 def Exec(self, feedback_fn):
1900 """Verify integrity of cluster, performing various test on nodes.
1904 self._feedback_fn = feedback_fn
1906 feedback_fn("* Verifying cluster config")
1908 for msg in self.cfg.VerifyConfig():
1909 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1911 feedback_fn("* Verifying cluster certificate files")
1913 for cert_filename in constants.ALL_CERT_FILES:
1914 (errcode, msg) = _VerifyCertificate(cert_filename)
1915 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1917 feedback_fn("* Verifying hypervisor parameters")
1919 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1920 self.all_inst_info.values()))
1922 feedback_fn("* Verifying all nodes belong to an existing group")
1924 # We do this verification here because, should this bogus circumstance
1925 # occur, it would never be caught by VerifyGroup, which only acts on
1926 # nodes/instances reachable from existing node groups.
1928 dangling_nodes = set(node.name for node in self.all_node_info.values()
1929 if node.group not in self.all_group_info)
1931 dangling_instances = {}
1932 no_node_instances = []
1934 for inst in self.all_inst_info.values():
1935 if inst.primary_node in dangling_nodes:
1936 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1937 elif inst.primary_node not in self.all_node_info:
1938 no_node_instances.append(inst.name)
1943 utils.CommaJoin(dangling_instances.get(node.name,
1945 for node in dangling_nodes]
1947 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1949 "the following nodes (and their instances) belong to a non"
1950 " existing group: %s", utils.CommaJoin(pretty_dangling))
1952 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1954 "the following instances have a non-existing primary-node:"
1955 " %s", utils.CommaJoin(no_node_instances))
1960 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1961 """Verifies the status of a node group.
1964 HPATH = "cluster-verify"
1965 HTYPE = constants.HTYPE_CLUSTER
1968 _HOOKS_INDENT_RE = re.compile("^", re.M)
1970 class NodeImage(object):
1971 """A class representing the logical and physical status of a node.
1974 @ivar name: the node name to which this object refers
1975 @ivar volumes: a structure as returned from
1976 L{ganeti.backend.GetVolumeList} (runtime)
1977 @ivar instances: a list of running instances (runtime)
1978 @ivar pinst: list of configured primary instances (config)
1979 @ivar sinst: list of configured secondary instances (config)
1980 @ivar sbp: dictionary of {primary-node: list of instances} for all
1981 instances for which this node is secondary (config)
1982 @ivar mfree: free memory, as reported by hypervisor (runtime)
1983 @ivar dfree: free disk, as reported by the node (runtime)
1984 @ivar offline: the offline status (config)
1985 @type rpc_fail: boolean
1986 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1987 not whether the individual keys were correct) (runtime)
1988 @type lvm_fail: boolean
1989 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1990 @type hyp_fail: boolean
1991 @ivar hyp_fail: whether the RPC call didn't return the instance list
1992 @type ghost: boolean
1993 @ivar ghost: whether this is a known node or not (config)
1994 @type os_fail: boolean
1995 @ivar os_fail: whether the RPC call didn't return valid OS data
1997 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1998 @type vm_capable: boolean
1999 @ivar vm_capable: whether the node can host instances
2002 def __init__(self, offline=False, name=None, vm_capable=True):
2011 self.offline = offline
2012 self.vm_capable = vm_capable
2013 self.rpc_fail = False
2014 self.lvm_fail = False
2015 self.hyp_fail = False
2017 self.os_fail = False
2020 def ExpandNames(self):
2021 # This raises errors.OpPrereqError on its own:
2022 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2024 # Get instances in node group; this is unsafe and needs verification later
2025 inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
2027 self.needed_locks = {
2028 locking.LEVEL_INSTANCE: inst_names,
2029 locking.LEVEL_NODEGROUP: [self.group_uuid],
2030 locking.LEVEL_NODE: [],
2033 self.share_locks = _ShareAll()
2035 def DeclareLocks(self, level):
2036 if level == locking.LEVEL_NODE:
2037 # Get members of node group; this is unsafe and needs verification later
2038 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2040 all_inst_info = self.cfg.GetAllInstancesInfo()
2042 # In Exec(), we warn about mirrored instances that have primary and
2043 # secondary living in separate node groups. To fully verify that
2044 # volumes for these instances are healthy, we will need to do an
2045 # extra call to their secondaries. We ensure here those nodes will
2047 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2048 # Important: access only the instances whose lock is owned
2049 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2050 nodes.update(all_inst_info[inst].secondary_nodes)
2052 self.needed_locks[locking.LEVEL_NODE] = nodes
2054 def CheckPrereq(self):
2055 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2056 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2058 group_nodes = set(self.group_info.members)
2059 group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
2062 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2064 unlocked_instances = \
2065 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2068 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2069 utils.CommaJoin(unlocked_nodes))
2071 if unlocked_instances:
2072 raise errors.OpPrereqError("Missing lock for instances: %s" %
2073 utils.CommaJoin(unlocked_instances))
2075 self.all_node_info = self.cfg.GetAllNodesInfo()
2076 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2078 self.my_node_names = utils.NiceSort(group_nodes)
2079 self.my_inst_names = utils.NiceSort(group_instances)
2081 self.my_node_info = dict((name, self.all_node_info[name])
2082 for name in self.my_node_names)
2084 self.my_inst_info = dict((name, self.all_inst_info[name])
2085 for name in self.my_inst_names)
2087 # We detect here the nodes that will need the extra RPC calls for verifying
2088 # split LV volumes; they should be locked.
2089 extra_lv_nodes = set()
2091 for inst in self.my_inst_info.values():
2092 if inst.disk_template in constants.DTS_INT_MIRROR:
2093 group = self.my_node_info[inst.primary_node].group
2094 for nname in inst.secondary_nodes:
2095 if self.all_node_info[nname].group != group:
2096 extra_lv_nodes.add(nname)
2098 unlocked_lv_nodes = \
2099 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2101 if unlocked_lv_nodes:
2102 raise errors.OpPrereqError("these nodes could be locked: %s" %
2103 utils.CommaJoin(unlocked_lv_nodes))
2104 self.extra_lv_nodes = list(extra_lv_nodes)
2106 def _VerifyNode(self, ninfo, nresult):
2107 """Perform some basic validation on data returned from a node.
2109 - check the result data structure is well formed and has all the
2111 - check ganeti version
2113 @type ninfo: L{objects.Node}
2114 @param ninfo: the node to check
2115 @param nresult: the results from the node
2117 @return: whether overall this call was successful (and we can expect
2118 reasonable values in the respose)
2122 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2124 # main result, nresult should be a non-empty dict
2125 test = not nresult or not isinstance(nresult, dict)
2126 _ErrorIf(test, constants.CV_ENODERPC, node,
2127 "unable to verify node: no data returned")
2131 # compares ganeti version
2132 local_version = constants.PROTOCOL_VERSION
2133 remote_version = nresult.get("version", None)
2134 test = not (remote_version and
2135 isinstance(remote_version, (list, tuple)) and
2136 len(remote_version) == 2)
2137 _ErrorIf(test, constants.CV_ENODERPC, node,
2138 "connection to node returned invalid data")
2142 test = local_version != remote_version[0]
2143 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2144 "incompatible protocol versions: master %s,"
2145 " node %s", local_version, remote_version[0])
2149 # node seems compatible, we can actually try to look into its results
2151 # full package version
2152 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2153 constants.CV_ENODEVERSION, node,
2154 "software version mismatch: master %s, node %s",
2155 constants.RELEASE_VERSION, remote_version[1],
2156 code=self.ETYPE_WARNING)
2158 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2159 if ninfo.vm_capable and isinstance(hyp_result, dict):
2160 for hv_name, hv_result in hyp_result.iteritems():
2161 test = hv_result is not None
2162 _ErrorIf(test, constants.CV_ENODEHV, node,
2163 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2165 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2166 if ninfo.vm_capable and isinstance(hvp_result, list):
2167 for item, hv_name, hv_result in hvp_result:
2168 _ErrorIf(True, constants.CV_ENODEHV, node,
2169 "hypervisor %s parameter verify failure (source %s): %s",
2170 hv_name, item, hv_result)
2172 test = nresult.get(constants.NV_NODESETUP,
2173 ["Missing NODESETUP results"])
2174 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2179 def _VerifyNodeTime(self, ninfo, nresult,
2180 nvinfo_starttime, nvinfo_endtime):
2181 """Check the node time.
2183 @type ninfo: L{objects.Node}
2184 @param ninfo: the node to check
2185 @param nresult: the remote results for the node
2186 @param nvinfo_starttime: the start time of the RPC call
2187 @param nvinfo_endtime: the end time of the RPC call
2191 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2193 ntime = nresult.get(constants.NV_TIME, None)
2195 ntime_merged = utils.MergeTime(ntime)
2196 except (ValueError, TypeError):
2197 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2200 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2201 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2202 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2203 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2207 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2208 "Node time diverges by at least %s from master node time",
2211 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2212 """Check the node LVM results.
2214 @type ninfo: L{objects.Node}
2215 @param ninfo: the node to check
2216 @param nresult: the remote results for the node
2217 @param vg_name: the configured VG name
2224 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2226 # checks vg existence and size > 20G
2227 vglist = nresult.get(constants.NV_VGLIST, None)
2229 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2231 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2232 constants.MIN_VG_SIZE)
2233 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2236 pvlist = nresult.get(constants.NV_PVLIST, None)
2237 test = pvlist is None
2238 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2240 # check that ':' is not present in PV names, since it's a
2241 # special character for lvcreate (denotes the range of PEs to
2243 for _, pvname, owner_vg in pvlist:
2244 test = ":" in pvname
2245 _ErrorIf(test, constants.CV_ENODELVM, node,
2246 "Invalid character ':' in PV '%s' of VG '%s'",
2249 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2250 """Check the node bridges.
2252 @type ninfo: L{objects.Node}
2253 @param ninfo: the node to check
2254 @param nresult: the remote results for the node
2255 @param bridges: the expected list of bridges
2262 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2264 missing = nresult.get(constants.NV_BRIDGES, None)
2265 test = not isinstance(missing, list)
2266 _ErrorIf(test, constants.CV_ENODENET, node,
2267 "did not return valid bridge information")
2269 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2270 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2272 def _VerifyNodeUserScripts(self, ninfo, nresult):
2273 """Check the results of user scripts presence and executability on the node
2275 @type ninfo: L{objects.Node}
2276 @param ninfo: the node to check
2277 @param nresult: the remote results for the node
2282 test = not constants.NV_USERSCRIPTS in nresult
2283 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2284 "did not return user scripts information")
2286 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2288 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2289 "user scripts not present or not executable: %s" %
2290 utils.CommaJoin(sorted(broken_scripts)))
2292 def _VerifyNodeNetwork(self, ninfo, nresult):
2293 """Check the node network connectivity results.
2295 @type ninfo: L{objects.Node}
2296 @param ninfo: the node to check
2297 @param nresult: the remote results for the node
2301 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2303 test = constants.NV_NODELIST not in nresult
2304 _ErrorIf(test, constants.CV_ENODESSH, node,
2305 "node hasn't returned node ssh connectivity data")
2307 if nresult[constants.NV_NODELIST]:
2308 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2309 _ErrorIf(True, constants.CV_ENODESSH, node,
2310 "ssh communication with node '%s': %s", a_node, a_msg)
2312 test = constants.NV_NODENETTEST not in nresult
2313 _ErrorIf(test, constants.CV_ENODENET, node,
2314 "node hasn't returned node tcp connectivity data")
2316 if nresult[constants.NV_NODENETTEST]:
2317 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2319 _ErrorIf(True, constants.CV_ENODENET, node,
2320 "tcp communication with node '%s': %s",
2321 anode, nresult[constants.NV_NODENETTEST][anode])
2323 test = constants.NV_MASTERIP not in nresult
2324 _ErrorIf(test, constants.CV_ENODENET, node,
2325 "node hasn't returned node master IP reachability data")
2327 if not nresult[constants.NV_MASTERIP]:
2328 if node == self.master_node:
2329 msg = "the master node cannot reach the master IP (not configured?)"
2331 msg = "cannot reach the master IP"
2332 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2334 def _VerifyInstance(self, instance, instanceconfig, node_image,
2336 """Verify an instance.
2338 This function checks to see if the required block devices are
2339 available on the instance's node.
2342 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2343 node_current = instanceconfig.primary_node
2345 node_vol_should = {}
2346 instanceconfig.MapLVsByNode(node_vol_should)
2348 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2349 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2350 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, err)
2352 for node in node_vol_should:
2353 n_img = node_image[node]
2354 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2355 # ignore missing volumes on offline or broken nodes
2357 for volume in node_vol_should[node]:
2358 test = volume not in n_img.volumes
2359 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2360 "volume %s missing on node %s", volume, node)
2362 if instanceconfig.admin_state == constants.ADMINST_UP:
2363 pri_img = node_image[node_current]
2364 test = instance not in pri_img.instances and not pri_img.offline
2365 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2366 "instance not running on its primary node %s",
2369 diskdata = [(nname, success, status, idx)
2370 for (nname, disks) in diskstatus.items()
2371 for idx, (success, status) in enumerate(disks)]
2373 for nname, success, bdev_status, idx in diskdata:
2374 # the 'ghost node' construction in Exec() ensures that we have a
2376 snode = node_image[nname]
2377 bad_snode = snode.ghost or snode.offline
2378 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2379 not success and not bad_snode,
2380 constants.CV_EINSTANCEFAULTYDISK, instance,
2381 "couldn't retrieve status for disk/%s on %s: %s",
2382 idx, nname, bdev_status)
2383 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2384 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2385 constants.CV_EINSTANCEFAULTYDISK, instance,
2386 "disk/%s on %s is faulty", idx, nname)
2388 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2389 """Verify if there are any unknown volumes in the cluster.
2391 The .os, .swap and backup volumes are ignored. All other volumes are
2392 reported as unknown.
2394 @type reserved: L{ganeti.utils.FieldSet}
2395 @param reserved: a FieldSet of reserved volume names
2398 for node, n_img in node_image.items():
2399 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2400 # skip non-healthy nodes
2402 for volume in n_img.volumes:
2403 test = ((node not in node_vol_should or
2404 volume not in node_vol_should[node]) and
2405 not reserved.Matches(volume))
2406 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2407 "volume %s is unknown", volume)
2409 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2410 """Verify N+1 Memory Resilience.
2412 Check that if one single node dies we can still start all the
2413 instances it was primary for.
2416 cluster_info = self.cfg.GetClusterInfo()
2417 for node, n_img in node_image.items():
2418 # This code checks that every node which is now listed as
2419 # secondary has enough memory to host all instances it is
2420 # supposed to should a single other node in the cluster fail.
2421 # FIXME: not ready for failover to an arbitrary node
2422 # FIXME: does not support file-backed instances
2423 # WARNING: we currently take into account down instances as well
2424 # as up ones, considering that even if they're down someone
2425 # might want to start them even in the event of a node failure.
2427 # we're skipping offline nodes from the N+1 warning, since
2428 # most likely we don't have good memory infromation from them;
2429 # we already list instances living on such nodes, and that's
2432 #TODO(dynmem): use MINMEM for checking
2433 #TODO(dynmem): also consider ballooning out other instances
2434 for prinode, instances in n_img.sbp.items():
2436 for instance in instances:
2437 bep = cluster_info.FillBE(instance_cfg[instance])
2438 if bep[constants.BE_AUTO_BALANCE]:
2439 needed_mem += bep[constants.BE_MAXMEM]
2440 test = n_img.mfree < needed_mem
2441 self._ErrorIf(test, constants.CV_ENODEN1, node,
2442 "not enough memory to accomodate instance failovers"
2443 " should node %s fail (%dMiB needed, %dMiB available)",
2444 prinode, needed_mem, n_img.mfree)
2447 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2448 (files_all, files_opt, files_mc, files_vm)):
2449 """Verifies file checksums collected from all nodes.
2451 @param errorif: Callback for reporting errors
2452 @param nodeinfo: List of L{objects.Node} objects
2453 @param master_node: Name of master node
2454 @param all_nvinfo: RPC results
2457 # Define functions determining which nodes to consider for a file
2460 (files_mc, lambda node: (node.master_candidate or
2461 node.name == master_node)),
2462 (files_vm, lambda node: node.vm_capable),
2465 # Build mapping from filename to list of nodes which should have the file
2467 for (files, fn) in files2nodefn:
2469 filenodes = nodeinfo
2471 filenodes = filter(fn, nodeinfo)
2472 nodefiles.update((filename,
2473 frozenset(map(operator.attrgetter("name"), filenodes)))
2474 for filename in files)
2476 assert set(nodefiles) == (files_all | files_mc | files_vm)
2478 fileinfo = dict((filename, {}) for filename in nodefiles)
2479 ignore_nodes = set()
2481 for node in nodeinfo:
2483 ignore_nodes.add(node.name)
2486 nresult = all_nvinfo[node.name]
2488 if nresult.fail_msg or not nresult.payload:
2491 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2493 test = not (node_files and isinstance(node_files, dict))
2494 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2495 "Node did not return file checksum data")
2497 ignore_nodes.add(node.name)
2500 # Build per-checksum mapping from filename to nodes having it
2501 for (filename, checksum) in node_files.items():
2502 assert filename in nodefiles
2503 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2505 for (filename, checksums) in fileinfo.items():
2506 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2508 # Nodes having the file
2509 with_file = frozenset(node_name
2510 for nodes in fileinfo[filename].values()
2511 for node_name in nodes) - ignore_nodes
2513 expected_nodes = nodefiles[filename] - ignore_nodes
2515 # Nodes missing file
2516 missing_file = expected_nodes - with_file
2518 if filename in files_opt:
2520 errorif(missing_file and missing_file != expected_nodes,
2521 constants.CV_ECLUSTERFILECHECK, None,
2522 "File %s is optional, but it must exist on all or no"
2523 " nodes (not found on %s)",
2524 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2526 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2527 "File %s is missing from node(s) %s", filename,
2528 utils.CommaJoin(utils.NiceSort(missing_file)))
2530 # Warn if a node has a file it shouldn't
2531 unexpected = with_file - expected_nodes
2533 constants.CV_ECLUSTERFILECHECK, None,
2534 "File %s should not exist on node(s) %s",
2535 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2537 # See if there are multiple versions of the file
2538 test = len(checksums) > 1
2540 variants = ["variant %s on %s" %
2541 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2542 for (idx, (checksum, nodes)) in
2543 enumerate(sorted(checksums.items()))]
2547 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2548 "File %s found with %s different checksums (%s)",
2549 filename, len(checksums), "; ".join(variants))
2551 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2553 """Verifies and the node DRBD status.
2555 @type ninfo: L{objects.Node}
2556 @param ninfo: the node to check
2557 @param nresult: the remote results for the node
2558 @param instanceinfo: the dict of instances
2559 @param drbd_helper: the configured DRBD usermode helper
2560 @param drbd_map: the DRBD map as returned by
2561 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2565 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2568 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2569 test = (helper_result == None)
2570 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2571 "no drbd usermode helper returned")
2573 status, payload = helper_result
2575 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2576 "drbd usermode helper check unsuccessful: %s", payload)
2577 test = status and (payload != drbd_helper)
2578 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2579 "wrong drbd usermode helper: %s", payload)
2581 # compute the DRBD minors
2583 for minor, instance in drbd_map[node].items():
2584 test = instance not in instanceinfo
2585 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2586 "ghost instance '%s' in temporary DRBD map", instance)
2587 # ghost instance should not be running, but otherwise we
2588 # don't give double warnings (both ghost instance and
2589 # unallocated minor in use)
2591 node_drbd[minor] = (instance, False)
2593 instance = instanceinfo[instance]
2594 node_drbd[minor] = (instance.name,
2595 instance.admin_state == constants.ADMINST_UP)
2597 # and now check them
2598 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2599 test = not isinstance(used_minors, (tuple, list))
2600 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2601 "cannot parse drbd status file: %s", str(used_minors))
2603 # we cannot check drbd status
2606 for minor, (iname, must_exist) in node_drbd.items():
2607 test = minor not in used_minors and must_exist
2608 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2609 "drbd minor %d of instance %s is not active", minor, iname)
2610 for minor in used_minors:
2611 test = minor not in node_drbd
2612 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2613 "unallocated drbd minor %d is in use", minor)
2615 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2616 """Builds the node OS structures.
2618 @type ninfo: L{objects.Node}
2619 @param ninfo: the node to check
2620 @param nresult: the remote results for the node
2621 @param nimg: the node image object
2625 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2627 remote_os = nresult.get(constants.NV_OSLIST, None)
2628 test = (not isinstance(remote_os, list) or
2629 not compat.all(isinstance(v, list) and len(v) == 7
2630 for v in remote_os))
2632 _ErrorIf(test, constants.CV_ENODEOS, node,
2633 "node hasn't returned valid OS data")
2642 for (name, os_path, status, diagnose,
2643 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2645 if name not in os_dict:
2648 # parameters is a list of lists instead of list of tuples due to
2649 # JSON lacking a real tuple type, fix it:
2650 parameters = [tuple(v) for v in parameters]
2651 os_dict[name].append((os_path, status, diagnose,
2652 set(variants), set(parameters), set(api_ver)))
2654 nimg.oslist = os_dict
2656 def _VerifyNodeOS(self, ninfo, nimg, base):
2657 """Verifies the node OS list.
2659 @type ninfo: L{objects.Node}
2660 @param ninfo: the node to check
2661 @param nimg: the node image object
2662 @param base: the 'template' node we match against (e.g. from the master)
2666 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2668 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2670 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2671 for os_name, os_data in nimg.oslist.items():
2672 assert os_data, "Empty OS status for OS %s?!" % os_name
2673 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2674 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2675 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2676 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2677 "OS '%s' has multiple entries (first one shadows the rest): %s",
2678 os_name, utils.CommaJoin([v[0] for v in os_data]))
2679 # comparisons with the 'base' image
2680 test = os_name not in base.oslist
2681 _ErrorIf(test, constants.CV_ENODEOS, node,
2682 "Extra OS %s not present on reference node (%s)",
2686 assert base.oslist[os_name], "Base node has empty OS status?"
2687 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2689 # base OS is invalid, skipping
2691 for kind, a, b in [("API version", f_api, b_api),
2692 ("variants list", f_var, b_var),
2693 ("parameters", beautify_params(f_param),
2694 beautify_params(b_param))]:
2695 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2696 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2697 kind, os_name, base.name,
2698 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2700 # check any missing OSes
2701 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2702 _ErrorIf(missing, constants.CV_ENODEOS, node,
2703 "OSes present on reference node %s but missing on this node: %s",
2704 base.name, utils.CommaJoin(missing))
2706 def _VerifyOob(self, ninfo, nresult):
2707 """Verifies out of band functionality of a node.
2709 @type ninfo: L{objects.Node}
2710 @param ninfo: the node to check
2711 @param nresult: the remote results for the node
2715 # We just have to verify the paths on master and/or master candidates
2716 # as the oob helper is invoked on the master
2717 if ((ninfo.master_candidate or ninfo.master_capable) and
2718 constants.NV_OOB_PATHS in nresult):
2719 for path_result in nresult[constants.NV_OOB_PATHS]:
2720 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2722 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2723 """Verifies and updates the node volume data.
2725 This function will update a L{NodeImage}'s internal structures
2726 with data from the remote call.
2728 @type ninfo: L{objects.Node}
2729 @param ninfo: the node to check
2730 @param nresult: the remote results for the node
2731 @param nimg: the node image object
2732 @param vg_name: the configured VG name
2736 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2738 nimg.lvm_fail = True
2739 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2742 elif isinstance(lvdata, basestring):
2743 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2744 utils.SafeEncode(lvdata))
2745 elif not isinstance(lvdata, dict):
2746 _ErrorIf(True, constants.CV_ENODELVM, node,
2747 "rpc call to node failed (lvlist)")
2749 nimg.volumes = lvdata
2750 nimg.lvm_fail = False
2752 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2753 """Verifies and updates the node instance list.
2755 If the listing was successful, then updates this node's instance
2756 list. Otherwise, it marks the RPC call as failed for the instance
2759 @type ninfo: L{objects.Node}
2760 @param ninfo: the node to check
2761 @param nresult: the remote results for the node
2762 @param nimg: the node image object
2765 idata = nresult.get(constants.NV_INSTANCELIST, None)
2766 test = not isinstance(idata, list)
2767 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2768 "rpc call to node failed (instancelist): %s",
2769 utils.SafeEncode(str(idata)))
2771 nimg.hyp_fail = True
2773 nimg.instances = idata
2775 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2776 """Verifies and computes a node information map
2778 @type ninfo: L{objects.Node}
2779 @param ninfo: the node to check
2780 @param nresult: the remote results for the node
2781 @param nimg: the node image object
2782 @param vg_name: the configured VG name
2786 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2788 # try to read free memory (from the hypervisor)
2789 hv_info = nresult.get(constants.NV_HVINFO, None)
2790 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2791 _ErrorIf(test, constants.CV_ENODEHV, node,
2792 "rpc call to node failed (hvinfo)")
2795 nimg.mfree = int(hv_info["memory_free"])
2796 except (ValueError, TypeError):
2797 _ErrorIf(True, constants.CV_ENODERPC, node,
2798 "node returned invalid nodeinfo, check hypervisor")
2800 # FIXME: devise a free space model for file based instances as well
2801 if vg_name is not None:
2802 test = (constants.NV_VGLIST not in nresult or
2803 vg_name not in nresult[constants.NV_VGLIST])
2804 _ErrorIf(test, constants.CV_ENODELVM, node,
2805 "node didn't return data for the volume group '%s'"
2806 " - it is either missing or broken", vg_name)
2809 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2810 except (ValueError, TypeError):
2811 _ErrorIf(True, constants.CV_ENODERPC, node,
2812 "node returned invalid LVM info, check LVM status")
2814 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2815 """Gets per-disk status information for all instances.
2817 @type nodelist: list of strings
2818 @param nodelist: Node names
2819 @type node_image: dict of (name, L{objects.Node})
2820 @param node_image: Node objects
2821 @type instanceinfo: dict of (name, L{objects.Instance})
2822 @param instanceinfo: Instance objects
2823 @rtype: {instance: {node: [(succes, payload)]}}
2824 @return: a dictionary of per-instance dictionaries with nodes as
2825 keys and disk information as values; the disk information is a
2826 list of tuples (success, payload)
2829 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2832 node_disks_devonly = {}
2833 diskless_instances = set()
2834 diskless = constants.DT_DISKLESS
2836 for nname in nodelist:
2837 node_instances = list(itertools.chain(node_image[nname].pinst,
2838 node_image[nname].sinst))
2839 diskless_instances.update(inst for inst in node_instances
2840 if instanceinfo[inst].disk_template == diskless)
2841 disks = [(inst, disk)
2842 for inst in node_instances
2843 for disk in instanceinfo[inst].disks]
2846 # No need to collect data
2849 node_disks[nname] = disks
2851 # Creating copies as SetDiskID below will modify the objects and that can
2852 # lead to incorrect data returned from nodes
2853 devonly = [dev.Copy() for (_, dev) in disks]
2856 self.cfg.SetDiskID(dev, nname)
2858 node_disks_devonly[nname] = devonly
2860 assert len(node_disks) == len(node_disks_devonly)
2862 # Collect data from all nodes with disks
2863 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2866 assert len(result) == len(node_disks)
2870 for (nname, nres) in result.items():
2871 disks = node_disks[nname]
2874 # No data from this node
2875 data = len(disks) * [(False, "node offline")]
2878 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2879 "while getting disk information: %s", msg)
2881 # No data from this node
2882 data = len(disks) * [(False, msg)]
2885 for idx, i in enumerate(nres.payload):
2886 if isinstance(i, (tuple, list)) and len(i) == 2:
2889 logging.warning("Invalid result from node %s, entry %d: %s",
2891 data.append((False, "Invalid result from the remote node"))
2893 for ((inst, _), status) in zip(disks, data):
2894 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2896 # Add empty entries for diskless instances.
2897 for inst in diskless_instances:
2898 assert inst not in instdisk
2901 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2902 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2903 compat.all(isinstance(s, (tuple, list)) and
2904 len(s) == 2 for s in statuses)
2905 for inst, nnames in instdisk.items()
2906 for nname, statuses in nnames.items())
2907 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2912 def _SshNodeSelector(group_uuid, all_nodes):
2913 """Create endless iterators for all potential SSH check hosts.
2916 nodes = [node for node in all_nodes
2917 if (node.group != group_uuid and
2919 keyfunc = operator.attrgetter("group")
2921 return map(itertools.cycle,
2922 [sorted(map(operator.attrgetter("name"), names))
2923 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2927 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2928 """Choose which nodes should talk to which other nodes.
2930 We will make nodes contact all nodes in their group, and one node from
2933 @warning: This algorithm has a known issue if one node group is much
2934 smaller than others (e.g. just one node). In such a case all other
2935 nodes will talk to the single node.
2938 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2939 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2941 return (online_nodes,
2942 dict((name, sorted([i.next() for i in sel]))
2943 for name in online_nodes))
2945 def BuildHooksEnv(self):
2948 Cluster-Verify hooks just ran in the post phase and their failure makes
2949 the output be logged in the verify output and the verification to fail.
2953 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2956 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2957 for node in self.my_node_info.values())
2961 def BuildHooksNodes(self):
2962 """Build hooks nodes.
2965 return ([], self.my_node_names)
2967 def Exec(self, feedback_fn):
2968 """Verify integrity of the node group, performing various test on nodes.
2971 # This method has too many local variables. pylint: disable=R0914
2972 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2974 if not self.my_node_names:
2976 feedback_fn("* Empty node group, skipping verification")
2980 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2981 verbose = self.op.verbose
2982 self._feedback_fn = feedback_fn
2984 vg_name = self.cfg.GetVGName()
2985 drbd_helper = self.cfg.GetDRBDHelper()
2986 cluster = self.cfg.GetClusterInfo()
2987 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2988 hypervisors = cluster.enabled_hypervisors
2989 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2991 i_non_redundant = [] # Non redundant instances
2992 i_non_a_balanced = [] # Non auto-balanced instances
2993 i_offline = 0 # Count of offline instances
2994 n_offline = 0 # Count of offline nodes
2995 n_drained = 0 # Count of nodes being drained
2996 node_vol_should = {}
2998 # FIXME: verify OS list
3001 filemap = _ComputeAncillaryFiles(cluster, False)
3003 # do local checksums
3004 master_node = self.master_node = self.cfg.GetMasterNode()
3005 master_ip = self.cfg.GetMasterIP()
3007 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3010 if self.cfg.GetUseExternalMipScript():
3011 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3013 node_verify_param = {
3014 constants.NV_FILELIST:
3015 utils.UniqueSequence(filename
3016 for files in filemap
3017 for filename in files),
3018 constants.NV_NODELIST:
3019 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3020 self.all_node_info.values()),
3021 constants.NV_HYPERVISOR: hypervisors,
3022 constants.NV_HVPARAMS:
3023 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3024 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3025 for node in node_data_list
3026 if not node.offline],
3027 constants.NV_INSTANCELIST: hypervisors,
3028 constants.NV_VERSION: None,
3029 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3030 constants.NV_NODESETUP: None,
3031 constants.NV_TIME: None,
3032 constants.NV_MASTERIP: (master_node, master_ip),
3033 constants.NV_OSLIST: None,
3034 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3035 constants.NV_USERSCRIPTS: user_scripts,
3038 if vg_name is not None:
3039 node_verify_param[constants.NV_VGLIST] = None
3040 node_verify_param[constants.NV_LVLIST] = vg_name
3041 node_verify_param[constants.NV_PVLIST] = [vg_name]
3042 node_verify_param[constants.NV_DRBDLIST] = None
3045 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3048 # FIXME: this needs to be changed per node-group, not cluster-wide
3050 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3051 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3052 bridges.add(default_nicpp[constants.NIC_LINK])
3053 for instance in self.my_inst_info.values():
3054 for nic in instance.nics:
3055 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3056 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3057 bridges.add(full_nic[constants.NIC_LINK])
3060 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3062 # Build our expected cluster state
3063 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3065 vm_capable=node.vm_capable))
3066 for node in node_data_list)
3070 for node in self.all_node_info.values():
3071 path = _SupportsOob(self.cfg, node)
3072 if path and path not in oob_paths:
3073 oob_paths.append(path)
3076 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3078 for instance in self.my_inst_names:
3079 inst_config = self.my_inst_info[instance]
3081 for nname in inst_config.all_nodes:
3082 if nname not in node_image:
3083 gnode = self.NodeImage(name=nname)
3084 gnode.ghost = (nname not in self.all_node_info)
3085 node_image[nname] = gnode
3087 inst_config.MapLVsByNode(node_vol_should)
3089 pnode = inst_config.primary_node
3090 node_image[pnode].pinst.append(instance)
3092 for snode in inst_config.secondary_nodes:
3093 nimg = node_image[snode]
3094 nimg.sinst.append(instance)
3095 if pnode not in nimg.sbp:
3096 nimg.sbp[pnode] = []
3097 nimg.sbp[pnode].append(instance)
3099 # At this point, we have the in-memory data structures complete,
3100 # except for the runtime information, which we'll gather next
3102 # Due to the way our RPC system works, exact response times cannot be
3103 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3104 # time before and after executing the request, we can at least have a time
3106 nvinfo_starttime = time.time()
3107 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3109 self.cfg.GetClusterName())
3110 nvinfo_endtime = time.time()
3112 if self.extra_lv_nodes and vg_name is not None:
3114 self.rpc.call_node_verify(self.extra_lv_nodes,
3115 {constants.NV_LVLIST: vg_name},
3116 self.cfg.GetClusterName())
3118 extra_lv_nvinfo = {}
3120 all_drbd_map = self.cfg.ComputeDRBDMap()
3122 feedback_fn("* Gathering disk information (%s nodes)" %
3123 len(self.my_node_names))
3124 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3127 feedback_fn("* Verifying configuration file consistency")
3129 # If not all nodes are being checked, we need to make sure the master node
3130 # and a non-checked vm_capable node are in the list.
3131 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3133 vf_nvinfo = all_nvinfo.copy()
3134 vf_node_info = list(self.my_node_info.values())
3135 additional_nodes = []
3136 if master_node not in self.my_node_info:
3137 additional_nodes.append(master_node)
3138 vf_node_info.append(self.all_node_info[master_node])
3139 # Add the first vm_capable node we find which is not included
3140 for node in absent_nodes:
3141 nodeinfo = self.all_node_info[node]
3142 if nodeinfo.vm_capable and not nodeinfo.offline:
3143 additional_nodes.append(node)
3144 vf_node_info.append(self.all_node_info[node])
3146 key = constants.NV_FILELIST
3147 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3148 {key: node_verify_param[key]},
3149 self.cfg.GetClusterName()))
3151 vf_nvinfo = all_nvinfo
3152 vf_node_info = self.my_node_info.values()
3154 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3156 feedback_fn("* Verifying node status")
3160 for node_i in node_data_list:
3162 nimg = node_image[node]
3166 feedback_fn("* Skipping offline node %s" % (node,))
3170 if node == master_node:
3172 elif node_i.master_candidate:
3173 ntype = "master candidate"
3174 elif node_i.drained:
3180 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3182 msg = all_nvinfo[node].fail_msg
3183 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3186 nimg.rpc_fail = True
3189 nresult = all_nvinfo[node].payload
3191 nimg.call_ok = self._VerifyNode(node_i, nresult)
3192 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3193 self._VerifyNodeNetwork(node_i, nresult)
3194 self._VerifyNodeUserScripts(node_i, nresult)
3195 self._VerifyOob(node_i, nresult)
3198 self._VerifyNodeLVM(node_i, nresult, vg_name)
3199 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3202 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3203 self._UpdateNodeInstances(node_i, nresult, nimg)
3204 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3205 self._UpdateNodeOS(node_i, nresult, nimg)
3207 if not nimg.os_fail:
3208 if refos_img is None:
3210 self._VerifyNodeOS(node_i, nimg, refos_img)
3211 self._VerifyNodeBridges(node_i, nresult, bridges)
3213 # Check whether all running instancies are primary for the node. (This
3214 # can no longer be done from _VerifyInstance below, since some of the
3215 # wrong instances could be from other node groups.)
3216 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3218 for inst in non_primary_inst:
3219 # FIXME: investigate best way to handle offline insts
3220 if inst.admin_state == constants.ADMINST_OFFLINE:
3222 feedback_fn("* Skipping offline instance %s" % inst.name)
3225 test = inst in self.all_inst_info
3226 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3227 "instance should not run on node %s", node_i.name)
3228 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3229 "node is running unknown instance %s", inst)
3231 for node, result in extra_lv_nvinfo.items():
3232 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3233 node_image[node], vg_name)
3235 feedback_fn("* Verifying instance status")
3236 for instance in self.my_inst_names:
3238 feedback_fn("* Verifying instance %s" % instance)
3239 inst_config = self.my_inst_info[instance]
3240 self._VerifyInstance(instance, inst_config, node_image,
3242 inst_nodes_offline = []
3244 pnode = inst_config.primary_node
3245 pnode_img = node_image[pnode]
3246 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3247 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3248 " primary node failed", instance)
3250 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3252 constants.CV_EINSTANCEBADNODE, instance,
3253 "instance is marked as running and lives on offline node %s",
3254 inst_config.primary_node)
3256 # If the instance is non-redundant we cannot survive losing its primary
3257 # node, so we are not N+1 compliant. On the other hand we have no disk
3258 # templates with more than one secondary so that situation is not well
3260 # FIXME: does not support file-backed instances
3261 if not inst_config.secondary_nodes:
3262 i_non_redundant.append(instance)
3264 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3265 constants.CV_EINSTANCELAYOUT,
3266 instance, "instance has multiple secondary nodes: %s",
3267 utils.CommaJoin(inst_config.secondary_nodes),
3268 code=self.ETYPE_WARNING)
3270 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3271 pnode = inst_config.primary_node
3272 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3273 instance_groups = {}
3275 for node in instance_nodes:
3276 instance_groups.setdefault(self.all_node_info[node].group,
3280 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3281 # Sort so that we always list the primary node first.
3282 for group, nodes in sorted(instance_groups.items(),
3283 key=lambda (_, nodes): pnode in nodes,
3286 self._ErrorIf(len(instance_groups) > 1,
3287 constants.CV_EINSTANCESPLITGROUPS,
3288 instance, "instance has primary and secondary nodes in"
3289 " different groups: %s", utils.CommaJoin(pretty_list),
3290 code=self.ETYPE_WARNING)
3292 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3293 i_non_a_balanced.append(instance)
3295 for snode in inst_config.secondary_nodes:
3296 s_img = node_image[snode]
3297 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3298 snode, "instance %s, connection to secondary node failed",
3302 inst_nodes_offline.append(snode)
3304 # warn that the instance lives on offline nodes
3305 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3306 "instance has offline secondary node(s) %s",
3307 utils.CommaJoin(inst_nodes_offline))
3308 # ... or ghost/non-vm_capable nodes
3309 for node in inst_config.all_nodes:
3310 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3311 instance, "instance lives on ghost node %s", node)
3312 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3313 instance, "instance lives on non-vm_capable node %s", node)
3315 feedback_fn("* Verifying orphan volumes")
3316 reserved = utils.FieldSet(*cluster.reserved_lvs)
3318 # We will get spurious "unknown volume" warnings if any node of this group
3319 # is secondary for an instance whose primary is in another group. To avoid
3320 # them, we find these instances and add their volumes to node_vol_should.
3321 for inst in self.all_inst_info.values():
3322 for secondary in inst.secondary_nodes:
3323 if (secondary in self.my_node_info
3324 and inst.name not in self.my_inst_info):
3325 inst.MapLVsByNode(node_vol_should)
3328 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3330 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3331 feedback_fn("* Verifying N+1 Memory redundancy")
3332 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3334 feedback_fn("* Other Notes")
3336 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3337 % len(i_non_redundant))
3339 if i_non_a_balanced:
3340 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3341 % len(i_non_a_balanced))
3344 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3347 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3350 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3354 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3355 """Analyze the post-hooks' result
3357 This method analyses the hook result, handles it, and sends some
3358 nicely-formatted feedback back to the user.
3360 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3361 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3362 @param hooks_results: the results of the multi-node hooks rpc call
3363 @param feedback_fn: function used send feedback back to the caller
3364 @param lu_result: previous Exec result
3365 @return: the new Exec result, based on the previous result
3369 # We only really run POST phase hooks, only for non-empty groups,
3370 # and are only interested in their results
3371 if not self.my_node_names:
3374 elif phase == constants.HOOKS_PHASE_POST:
3375 # Used to change hooks' output to proper indentation
3376 feedback_fn("* Hooks Results")
3377 assert hooks_results, "invalid result from hooks"
3379 for node_name in hooks_results:
3380 res = hooks_results[node_name]
3382 test = msg and not res.offline
3383 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3384 "Communication failure in hooks execution: %s", msg)
3385 if res.offline or msg:
3386 # No need to investigate payload if node is offline or gave
3389 for script, hkr, output in res.payload:
3390 test = hkr == constants.HKR_FAIL
3391 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3392 "Script %s failed, output:", script)
3394 output = self._HOOKS_INDENT_RE.sub(" ", output)
3395 feedback_fn("%s" % output)
3401 class LUClusterVerifyDisks(NoHooksLU):
3402 """Verifies the cluster disks status.
3407 def ExpandNames(self):
3408 self.share_locks = _ShareAll()
3409 self.needed_locks = {
3410 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3413 def Exec(self, feedback_fn):
3414 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3416 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3417 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3418 for group in group_names])
3421 class LUGroupVerifyDisks(NoHooksLU):
3422 """Verifies the status of all disks in a node group.
3427 def ExpandNames(self):
3428 # Raises errors.OpPrereqError on its own if group can't be found
3429 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3431 self.share_locks = _ShareAll()
3432 self.needed_locks = {
3433 locking.LEVEL_INSTANCE: [],
3434 locking.LEVEL_NODEGROUP: [],
3435 locking.LEVEL_NODE: [],
3438 def DeclareLocks(self, level):
3439 if level == locking.LEVEL_INSTANCE:
3440 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3442 # Lock instances optimistically, needs verification once node and group
3443 # locks have been acquired
3444 self.needed_locks[locking.LEVEL_INSTANCE] = \
3445 self.cfg.GetNodeGroupInstances(self.group_uuid)
3447 elif level == locking.LEVEL_NODEGROUP:
3448 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3450 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3451 set([self.group_uuid] +
3452 # Lock all groups used by instances optimistically; this requires
3453 # going via the node before it's locked, requiring verification
3456 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3457 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3459 elif level == locking.LEVEL_NODE:
3460 # This will only lock the nodes in the group to be verified which contain
3462 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3463 self._LockInstancesNodes()
3465 # Lock all nodes in group to be verified
3466 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3467 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3468 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3470 def CheckPrereq(self):
3471 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3472 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3473 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3475 assert self.group_uuid in owned_groups
3477 # Check if locked instances are still correct
3478 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3480 # Get instance information
3481 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3483 # Check if node groups for locked instances are still correct
3484 for (instance_name, inst) in self.instances.items():
3485 assert owned_nodes.issuperset(inst.all_nodes), \
3486 "Instance %s's nodes changed while we kept the lock" % instance_name
3488 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3491 assert self.group_uuid in inst_groups, \
3492 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3494 def Exec(self, feedback_fn):
3495 """Verify integrity of cluster disks.
3497 @rtype: tuple of three items
3498 @return: a tuple of (dict of node-to-node_error, list of instances
3499 which need activate-disks, dict of instance: (node, volume) for
3504 res_instances = set()
3507 nv_dict = _MapInstanceDisksToNodes([inst
3508 for inst in self.instances.values()
3509 if inst.admin_state == constants.ADMINST_UP])
3512 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3513 set(self.cfg.GetVmCapableNodeList()))
3515 node_lvs = self.rpc.call_lv_list(nodes, [])
3517 for (node, node_res) in node_lvs.items():
3518 if node_res.offline:
3521 msg = node_res.fail_msg
3523 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3524 res_nodes[node] = msg
3527 for lv_name, (_, _, lv_online) in node_res.payload.items():
3528 inst = nv_dict.pop((node, lv_name), None)
3529 if not (lv_online or inst is None):
3530 res_instances.add(inst)
3532 # any leftover items in nv_dict are missing LVs, let's arrange the data
3534 for key, inst in nv_dict.iteritems():
3535 res_missing.setdefault(inst, []).append(list(key))
3537 return (res_nodes, list(res_instances), res_missing)
3540 class LUClusterRepairDiskSizes(NoHooksLU):
3541 """Verifies the cluster disks sizes.
3546 def ExpandNames(self):
3547 if self.op.instances:
3548 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3549 self.needed_locks = {
3550 locking.LEVEL_NODE_RES: [],
3551 locking.LEVEL_INSTANCE: self.wanted_names,
3553 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3555 self.wanted_names = None
3556 self.needed_locks = {
3557 locking.LEVEL_NODE_RES: locking.ALL_SET,
3558 locking.LEVEL_INSTANCE: locking.ALL_SET,
3560 self.share_locks = {
3561 locking.LEVEL_NODE_RES: 1,
3562 locking.LEVEL_INSTANCE: 0,
3565 def DeclareLocks(self, level):
3566 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3567 self._LockInstancesNodes(primary_only=True, level=level)
3569 def CheckPrereq(self):
3570 """Check prerequisites.
3572 This only checks the optional instance list against the existing names.
3575 if self.wanted_names is None:
3576 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3578 self.wanted_instances = \
3579 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3581 def _EnsureChildSizes(self, disk):
3582 """Ensure children of the disk have the needed disk size.
3584 This is valid mainly for DRBD8 and fixes an issue where the
3585 children have smaller disk size.
3587 @param disk: an L{ganeti.objects.Disk} object
3590 if disk.dev_type == constants.LD_DRBD8:
3591 assert disk.children, "Empty children for DRBD8?"
3592 fchild = disk.children[0]
3593 mismatch = fchild.size < disk.size
3595 self.LogInfo("Child disk has size %d, parent %d, fixing",
3596 fchild.size, disk.size)
3597 fchild.size = disk.size
3599 # and we recurse on this child only, not on the metadev
3600 return self._EnsureChildSizes(fchild) or mismatch
3604 def Exec(self, feedback_fn):
3605 """Verify the size of cluster disks.
3608 # TODO: check child disks too
3609 # TODO: check differences in size between primary/secondary nodes
3611 for instance in self.wanted_instances:
3612 pnode = instance.primary_node
3613 if pnode not in per_node_disks:
3614 per_node_disks[pnode] = []
3615 for idx, disk in enumerate(instance.disks):
3616 per_node_disks[pnode].append((instance, idx, disk))
3618 assert not (frozenset(per_node_disks.keys()) -
3619 self.owned_locks(locking.LEVEL_NODE_RES)), \
3620 "Not owning correct locks"
3621 assert not self.owned_locks(locking.LEVEL_NODE)
3624 for node, dskl in per_node_disks.items():
3625 newl = [v[2].Copy() for v in dskl]
3627 self.cfg.SetDiskID(dsk, node)
3628 result = self.rpc.call_blockdev_getsize(node, newl)
3630 self.LogWarning("Failure in blockdev_getsize call to node"
3631 " %s, ignoring", node)
3633 if len(result.payload) != len(dskl):
3634 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3635 " result.payload=%s", node, len(dskl), result.payload)
3636 self.LogWarning("Invalid result from node %s, ignoring node results",
3639 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3641 self.LogWarning("Disk %d of instance %s did not return size"
3642 " information, ignoring", idx, instance.name)
3644 if not isinstance(size, (int, long)):
3645 self.LogWarning("Disk %d of instance %s did not return valid"
3646 " size information, ignoring", idx, instance.name)
3649 if size != disk.size:
3650 self.LogInfo("Disk %d of instance %s has mismatched size,"
3651 " correcting: recorded %d, actual %d", idx,
3652 instance.name, disk.size, size)
3654 self.cfg.Update(instance, feedback_fn)
3655 changed.append((instance.name, idx, size))
3656 if self._EnsureChildSizes(disk):
3657 self.cfg.Update(instance, feedback_fn)
3658 changed.append((instance.name, idx, disk.size))
3662 class LUClusterRename(LogicalUnit):
3663 """Rename the cluster.
3666 HPATH = "cluster-rename"
3667 HTYPE = constants.HTYPE_CLUSTER
3669 def BuildHooksEnv(self):
3674 "OP_TARGET": self.cfg.GetClusterName(),
3675 "NEW_NAME": self.op.name,
3678 def BuildHooksNodes(self):
3679 """Build hooks nodes.
3682 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3684 def CheckPrereq(self):
3685 """Verify that the passed name is a valid one.
3688 hostname = netutils.GetHostname(name=self.op.name,
3689 family=self.cfg.GetPrimaryIPFamily())
3691 new_name = hostname.name
3692 self.ip = new_ip = hostname.ip
3693 old_name = self.cfg.GetClusterName()
3694 old_ip = self.cfg.GetMasterIP()
3695 if new_name == old_name and new_ip == old_ip:
3696 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3697 " cluster has changed",
3699 if new_ip != old_ip:
3700 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3701 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3702 " reachable on the network" %
3703 new_ip, errors.ECODE_NOTUNIQUE)
3705 self.op.name = new_name
3707 def Exec(self, feedback_fn):
3708 """Rename the cluster.
3711 clustername = self.op.name
3714 # shutdown the master IP
3715 master_params = self.cfg.GetMasterNetworkParameters()
3716 ems = self.cfg.GetUseExternalMipScript()
3717 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3719 result.Raise("Could not disable the master role")
3722 cluster = self.cfg.GetClusterInfo()
3723 cluster.cluster_name = clustername
3724 cluster.master_ip = new_ip
3725 self.cfg.Update(cluster, feedback_fn)
3727 # update the known hosts file
3728 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3729 node_list = self.cfg.GetOnlineNodeList()
3731 node_list.remove(master_params.name)
3734 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3736 master_params.ip = new_ip
3737 result = self.rpc.call_node_activate_master_ip(master_params.name,
3739 msg = result.fail_msg
3741 self.LogWarning("Could not re-enable the master role on"
3742 " the master, please restart manually: %s", msg)
3747 def _ValidateNetmask(cfg, netmask):
3748 """Checks if a netmask is valid.
3750 @type cfg: L{config.ConfigWriter}
3751 @param cfg: The cluster configuration
3753 @param netmask: the netmask to be verified
3754 @raise errors.OpPrereqError: if the validation fails
3757 ip_family = cfg.GetPrimaryIPFamily()
3759 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3760 except errors.ProgrammerError:
3761 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3763 if not ipcls.ValidateNetmask(netmask):
3764 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3768 class LUClusterSetParams(LogicalUnit):
3769 """Change the parameters of the cluster.
3772 HPATH = "cluster-modify"
3773 HTYPE = constants.HTYPE_CLUSTER
3776 def CheckArguments(self):
3780 if self.op.uid_pool:
3781 uidpool.CheckUidPool(self.op.uid_pool)
3783 if self.op.add_uids:
3784 uidpool.CheckUidPool(self.op.add_uids)
3786 if self.op.remove_uids:
3787 uidpool.CheckUidPool(self.op.remove_uids)
3789 if self.op.master_netmask is not None:
3790 _ValidateNetmask(self.cfg, self.op.master_netmask)
3792 if self.op.diskparams:
3793 for dt_params in self.op.diskparams.values():
3794 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3796 def ExpandNames(self):
3797 # FIXME: in the future maybe other cluster params won't require checking on
3798 # all nodes to be modified.
3799 self.needed_locks = {
3800 locking.LEVEL_NODE: locking.ALL_SET,
3801 locking.LEVEL_INSTANCE: locking.ALL_SET,
3802 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3804 self.share_locks = {
3805 locking.LEVEL_NODE: 1,
3806 locking.LEVEL_INSTANCE: 1,
3807 locking.LEVEL_NODEGROUP: 1,
3810 def BuildHooksEnv(self):
3815 "OP_TARGET": self.cfg.GetClusterName(),
3816 "NEW_VG_NAME": self.op.vg_name,
3819 def BuildHooksNodes(self):
3820 """Build hooks nodes.
3823 mn = self.cfg.GetMasterNode()
3826 def CheckPrereq(self):
3827 """Check prerequisites.
3829 This checks whether the given params don't conflict and
3830 if the given volume group is valid.
3833 if self.op.vg_name is not None and not self.op.vg_name:
3834 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3835 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3836 " instances exist", errors.ECODE_INVAL)
3838 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3839 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3840 raise errors.OpPrereqError("Cannot disable drbd helper while"
3841 " drbd-based instances exist",
3844 node_list = self.owned_locks(locking.LEVEL_NODE)
3846 # if vg_name not None, checks given volume group on all nodes
3848 vglist = self.rpc.call_vg_list(node_list)
3849 for node in node_list:
3850 msg = vglist[node].fail_msg
3852 # ignoring down node
3853 self.LogWarning("Error while gathering data on node %s"
3854 " (ignoring node): %s", node, msg)
3856 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3858 constants.MIN_VG_SIZE)
3860 raise errors.OpPrereqError("Error on node '%s': %s" %
3861 (node, vgstatus), errors.ECODE_ENVIRON)
3863 if self.op.drbd_helper:
3864 # checks given drbd helper on all nodes
3865 helpers = self.rpc.call_drbd_helper(node_list)
3866 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3868 self.LogInfo("Not checking drbd helper on offline node %s", node)
3870 msg = helpers[node].fail_msg
3872 raise errors.OpPrereqError("Error checking drbd helper on node"
3873 " '%s': %s" % (node, msg),
3874 errors.ECODE_ENVIRON)
3875 node_helper = helpers[node].payload
3876 if node_helper != self.op.drbd_helper:
3877 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3878 (node, node_helper), errors.ECODE_ENVIRON)
3880 self.cluster = cluster = self.cfg.GetClusterInfo()
3881 # validate params changes
3882 if self.op.beparams:
3883 objects.UpgradeBeParams(self.op.beparams)
3884 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3885 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3887 if self.op.ndparams:
3888 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3889 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3891 # TODO: we need a more general way to handle resetting
3892 # cluster-level parameters to default values
3893 if self.new_ndparams["oob_program"] == "":
3894 self.new_ndparams["oob_program"] = \
3895 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3897 if self.op.hv_state:
3898 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3899 self.cluster.hv_state_static)
3900 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3901 for hv, values in new_hv_state.items())
3903 if self.op.disk_state:
3904 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3905 self.cluster.disk_state_static)
3906 self.new_disk_state = \
3907 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3908 for name, values in svalues.items()))
3909 for storage, svalues in new_disk_state.items())
3912 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3915 all_instances = self.cfg.GetAllInstancesInfo().values()
3917 for group in self.cfg.GetAllNodeGroupsInfo().values():
3918 instances = frozenset([inst for inst in all_instances
3919 if compat.any(node in group.members
3920 for node in inst.all_nodes)])
3921 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3922 new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3924 new_ipolicy, instances)
3926 violations.update(new)
3929 self.LogWarning("After the ipolicy change the following instances"
3930 " violate them: %s",
3931 utils.CommaJoin(violations))
3933 if self.op.nicparams:
3934 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3935 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3936 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3939 # check all instances for consistency
3940 for instance in self.cfg.GetAllInstancesInfo().values():
3941 for nic_idx, nic in enumerate(instance.nics):
3942 params_copy = copy.deepcopy(nic.nicparams)
3943 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3945 # check parameter syntax
3947 objects.NIC.CheckParameterSyntax(params_filled)
3948 except errors.ConfigurationError, err:
3949 nic_errors.append("Instance %s, nic/%d: %s" %
3950 (instance.name, nic_idx, err))
3952 # if we're moving instances to routed, check that they have an ip
3953 target_mode = params_filled[constants.NIC_MODE]
3954 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3955 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3956 " address" % (instance.name, nic_idx))
3958 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3959 "\n".join(nic_errors))
3961 # hypervisor list/parameters
3962 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3963 if self.op.hvparams:
3964 for hv_name, hv_dict in self.op.hvparams.items():
3965 if hv_name not in self.new_hvparams:
3966 self.new_hvparams[hv_name] = hv_dict
3968 self.new_hvparams[hv_name].update(hv_dict)
3970 # disk template parameters
3971 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3972 if self.op.diskparams:
3973 for dt_name, dt_params in self.op.diskparams.items():
3974 if dt_name not in self.op.diskparams:
3975 self.new_diskparams[dt_name] = dt_params
3977 self.new_diskparams[dt_name].update(dt_params)
3979 # os hypervisor parameters
3980 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3982 for os_name, hvs in self.op.os_hvp.items():
3983 if os_name not in self.new_os_hvp:
3984 self.new_os_hvp[os_name] = hvs
3986 for hv_name, hv_dict in hvs.items():
3987 if hv_name not in self.new_os_hvp[os_name]:
3988 self.new_os_hvp[os_name][hv_name] = hv_dict
3990 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3993 self.new_osp = objects.FillDict(cluster.osparams, {})
3994 if self.op.osparams:
3995 for os_name, osp in self.op.osparams.items():
3996 if os_name not in self.new_osp:
3997 self.new_osp[os_name] = {}
3999 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4002 if not self.new_osp[os_name]:
4003 # we removed all parameters
4004 del self.new_osp[os_name]
4006 # check the parameter validity (remote check)
4007 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4008 os_name, self.new_osp[os_name])
4010 # changes to the hypervisor list
4011 if self.op.enabled_hypervisors is not None:
4012 self.hv_list = self.op.enabled_hypervisors
4013 for hv in self.hv_list:
4014 # if the hypervisor doesn't already exist in the cluster
4015 # hvparams, we initialize it to empty, and then (in both
4016 # cases) we make sure to fill the defaults, as we might not
4017 # have a complete defaults list if the hypervisor wasn't
4019 if hv not in new_hvp:
4021 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4022 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4024 self.hv_list = cluster.enabled_hypervisors
4026 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4027 # either the enabled list has changed, or the parameters have, validate
4028 for hv_name, hv_params in self.new_hvparams.items():
4029 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4030 (self.op.enabled_hypervisors and
4031 hv_name in self.op.enabled_hypervisors)):
4032 # either this is a new hypervisor, or its parameters have changed
4033 hv_class = hypervisor.GetHypervisor(hv_name)
4034 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4035 hv_class.CheckParameterSyntax(hv_params)
4036 _CheckHVParams(self, node_list, hv_name, hv_params)
4039 # no need to check any newly-enabled hypervisors, since the
4040 # defaults have already been checked in the above code-block
4041 for os_name, os_hvp in self.new_os_hvp.items():
4042 for hv_name, hv_params in os_hvp.items():
4043 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4044 # we need to fill in the new os_hvp on top of the actual hv_p
4045 cluster_defaults = self.new_hvparams.get(hv_name, {})
4046 new_osp = objects.FillDict(cluster_defaults, hv_params)
4047 hv_class = hypervisor.GetHypervisor(hv_name)
4048 hv_class.CheckParameterSyntax(new_osp)
4049 _CheckHVParams(self, node_list, hv_name, new_osp)
4051 if self.op.default_iallocator:
4052 alloc_script = utils.FindFile(self.op.default_iallocator,
4053 constants.IALLOCATOR_SEARCH_PATH,
4055 if alloc_script is None:
4056 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4057 " specified" % self.op.default_iallocator,
4060 def Exec(self, feedback_fn):
4061 """Change the parameters of the cluster.
4064 if self.op.vg_name is not None:
4065 new_volume = self.op.vg_name
4068 if new_volume != self.cfg.GetVGName():
4069 self.cfg.SetVGName(new_volume)
4071 feedback_fn("Cluster LVM configuration already in desired"
4072 " state, not changing")
4073 if self.op.drbd_helper is not None:
4074 new_helper = self.op.drbd_helper
4077 if new_helper != self.cfg.GetDRBDHelper():
4078 self.cfg.SetDRBDHelper(new_helper)
4080 feedback_fn("Cluster DRBD helper already in desired state,"
4082 if self.op.hvparams:
4083 self.cluster.hvparams = self.new_hvparams
4085 self.cluster.os_hvp = self.new_os_hvp
4086 if self.op.enabled_hypervisors is not None:
4087 self.cluster.hvparams = self.new_hvparams
4088 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4089 if self.op.beparams:
4090 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4091 if self.op.nicparams:
4092 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4094 self.cluster.ipolicy = self.new_ipolicy
4095 if self.op.osparams:
4096 self.cluster.osparams = self.new_osp
4097 if self.op.ndparams:
4098 self.cluster.ndparams = self.new_ndparams
4099 if self.op.diskparams:
4100 self.cluster.diskparams = self.new_diskparams
4101 if self.op.hv_state:
4102 self.cluster.hv_state_static = self.new_hv_state
4103 if self.op.disk_state:
4104 self.cluster.disk_state_static = self.new_disk_state
4106 if self.op.candidate_pool_size is not None:
4107 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4108 # we need to update the pool size here, otherwise the save will fail
4109 _AdjustCandidatePool(self, [])
4111 if self.op.maintain_node_health is not None:
4112 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4113 feedback_fn("Note: CONFD was disabled at build time, node health"
4114 " maintenance is not useful (still enabling it)")
4115 self.cluster.maintain_node_health = self.op.maintain_node_health
4117 if self.op.prealloc_wipe_disks is not None:
4118 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4120 if self.op.add_uids is not None:
4121 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4123 if self.op.remove_uids is not None:
4124 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4126 if self.op.uid_pool is not None:
4127 self.cluster.uid_pool = self.op.uid_pool
4129 if self.op.default_iallocator is not None:
4130 self.cluster.default_iallocator = self.op.default_iallocator
4132 if self.op.reserved_lvs is not None:
4133 self.cluster.reserved_lvs = self.op.reserved_lvs
4135 if self.op.use_external_mip_script is not None:
4136 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4138 def helper_os(aname, mods, desc):
4140 lst = getattr(self.cluster, aname)
4141 for key, val in mods:
4142 if key == constants.DDM_ADD:
4144 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4147 elif key == constants.DDM_REMOVE:
4151 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4153 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4155 if self.op.hidden_os:
4156 helper_os("hidden_os", self.op.hidden_os, "hidden")
4158 if self.op.blacklisted_os:
4159 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4161 if self.op.master_netdev:
4162 master_params = self.cfg.GetMasterNetworkParameters()
4163 ems = self.cfg.GetUseExternalMipScript()
4164 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4165 self.cluster.master_netdev)
4166 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4168 result.Raise("Could not disable the master ip")
4169 feedback_fn("Changing master_netdev from %s to %s" %
4170 (master_params.netdev, self.op.master_netdev))
4171 self.cluster.master_netdev = self.op.master_netdev
4173 if self.op.master_netmask:
4174 master_params = self.cfg.GetMasterNetworkParameters()
4175 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4176 result = self.rpc.call_node_change_master_netmask(master_params.name,
4177 master_params.netmask,
4178 self.op.master_netmask,
4180 master_params.netdev)
4182 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4185 self.cluster.master_netmask = self.op.master_netmask
4187 self.cfg.Update(self.cluster, feedback_fn)
4189 if self.op.master_netdev:
4190 master_params = self.cfg.GetMasterNetworkParameters()
4191 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4192 self.op.master_netdev)
4193 ems = self.cfg.GetUseExternalMipScript()
4194 result = self.rpc.call_node_activate_master_ip(master_params.name,
4197 self.LogWarning("Could not re-enable the master ip on"
4198 " the master, please restart manually: %s",
4202 def _UploadHelper(lu, nodes, fname):
4203 """Helper for uploading a file and showing warnings.
4206 if os.path.exists(fname):
4207 result = lu.rpc.call_upload_file(nodes, fname)
4208 for to_node, to_result in result.items():
4209 msg = to_result.fail_msg
4211 msg = ("Copy of file %s to node %s failed: %s" %
4212 (fname, to_node, msg))
4213 lu.proc.LogWarning(msg)
4216 def _ComputeAncillaryFiles(cluster, redist):
4217 """Compute files external to Ganeti which need to be consistent.
4219 @type redist: boolean
4220 @param redist: Whether to include files which need to be redistributed
4223 # Compute files for all nodes
4225 constants.SSH_KNOWN_HOSTS_FILE,
4226 constants.CONFD_HMAC_KEY,
4227 constants.CLUSTER_DOMAIN_SECRET_FILE,
4228 constants.SPICE_CERT_FILE,
4229 constants.SPICE_CACERT_FILE,
4230 constants.RAPI_USERS_FILE,
4234 files_all.update(constants.ALL_CERT_FILES)
4235 files_all.update(ssconf.SimpleStore().GetFileList())
4237 # we need to ship at least the RAPI certificate
4238 files_all.add(constants.RAPI_CERT_FILE)
4240 if cluster.modify_etc_hosts:
4241 files_all.add(constants.ETC_HOSTS)
4243 # Files which are optional, these must:
4244 # - be present in one other category as well
4245 # - either exist or not exist on all nodes of that category (mc, vm all)
4247 constants.RAPI_USERS_FILE,
4250 # Files which should only be on master candidates
4254 files_mc.add(constants.CLUSTER_CONF_FILE)
4256 # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4258 files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4260 # Files which should only be on VM-capable nodes
4261 files_vm = set(filename
4262 for hv_name in cluster.enabled_hypervisors
4263 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4265 files_opt |= set(filename
4266 for hv_name in cluster.enabled_hypervisors
4267 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4269 # Filenames in each category must be unique
4270 all_files_set = files_all | files_mc | files_vm
4271 assert (len(all_files_set) ==
4272 sum(map(len, [files_all, files_mc, files_vm]))), \
4273 "Found file listed in more than one file list"
4275 # Optional files must be present in one other category
4276 assert all_files_set.issuperset(files_opt), \
4277 "Optional file not in a different required list"
4279 return (files_all, files_opt, files_mc, files_vm)
4282 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4283 """Distribute additional files which are part of the cluster configuration.
4285 ConfigWriter takes care of distributing the config and ssconf files, but
4286 there are more files which should be distributed to all nodes. This function
4287 makes sure those are copied.
4289 @param lu: calling logical unit
4290 @param additional_nodes: list of nodes not in the config to distribute to
4291 @type additional_vm: boolean
4292 @param additional_vm: whether the additional nodes are vm-capable or not
4295 # Gather target nodes
4296 cluster = lu.cfg.GetClusterInfo()
4297 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4299 online_nodes = lu.cfg.GetOnlineNodeList()
4300 vm_nodes = lu.cfg.GetVmCapableNodeList()
4302 if additional_nodes is not None:
4303 online_nodes.extend(additional_nodes)
4305 vm_nodes.extend(additional_nodes)
4307 # Never distribute to master node
4308 for nodelist in [online_nodes, vm_nodes]:
4309 if master_info.name in nodelist:
4310 nodelist.remove(master_info.name)
4313 (files_all, _, files_mc, files_vm) = \
4314 _ComputeAncillaryFiles(cluster, True)
4316 # Never re-distribute configuration file from here
4317 assert not (constants.CLUSTER_CONF_FILE in files_all or
4318 constants.CLUSTER_CONF_FILE in files_vm)
4319 assert not files_mc, "Master candidates not handled in this function"
4322 (online_nodes, files_all),
4323 (vm_nodes, files_vm),
4327 for (node_list, files) in filemap:
4329 _UploadHelper(lu, node_list, fname)
4332 class LUClusterRedistConf(NoHooksLU):
4333 """Force the redistribution of cluster configuration.
4335 This is a very simple LU.
4340 def ExpandNames(self):
4341 self.needed_locks = {
4342 locking.LEVEL_NODE: locking.ALL_SET,
4344 self.share_locks[locking.LEVEL_NODE] = 1
4346 def Exec(self, feedback_fn):
4347 """Redistribute the configuration.
4350 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4351 _RedistributeAncillaryFiles(self)
4354 class LUClusterActivateMasterIp(NoHooksLU):
4355 """Activate the master IP on the master node.
4358 def Exec(self, feedback_fn):
4359 """Activate the master IP.
4362 master_params = self.cfg.GetMasterNetworkParameters()
4363 ems = self.cfg.GetUseExternalMipScript()
4364 result = self.rpc.call_node_activate_master_ip(master_params.name,
4366 result.Raise("Could not activate the master IP")
4369 class LUClusterDeactivateMasterIp(NoHooksLU):
4370 """Deactivate the master IP on the master node.
4373 def Exec(self, feedback_fn):
4374 """Deactivate the master IP.
4377 master_params = self.cfg.GetMasterNetworkParameters()
4378 ems = self.cfg.GetUseExternalMipScript()
4379 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4381 result.Raise("Could not deactivate the master IP")
4384 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4385 """Sleep and poll for an instance's disk to sync.
4388 if not instance.disks or disks is not None and not disks:
4391 disks = _ExpandCheckDisks(instance, disks)
4394 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4396 node = instance.primary_node
4399 lu.cfg.SetDiskID(dev, node)
4401 # TODO: Convert to utils.Retry
4404 degr_retries = 10 # in seconds, as we sleep 1 second each time
4408 cumul_degraded = False
4409 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4410 msg = rstats.fail_msg
4412 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4415 raise errors.RemoteError("Can't contact node %s for mirror data,"
4416 " aborting." % node)
4419 rstats = rstats.payload
4421 for i, mstat in enumerate(rstats):
4423 lu.LogWarning("Can't compute data for node %s/%s",
4424 node, disks[i].iv_name)
4427 cumul_degraded = (cumul_degraded or
4428 (mstat.is_degraded and mstat.sync_percent is None))
4429 if mstat.sync_percent is not None:
4431 if mstat.estimated_time is not None:
4432 rem_time = ("%s remaining (estimated)" %
4433 utils.FormatSeconds(mstat.estimated_time))
4434 max_time = mstat.estimated_time
4436 rem_time = "no time estimate"
4437 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4438 (disks[i].iv_name, mstat.sync_percent, rem_time))
4440 # if we're done but degraded, let's do a few small retries, to
4441 # make sure we see a stable and not transient situation; therefore
4442 # we force restart of the loop
4443 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4444 logging.info("Degraded disks found, %d retries left", degr_retries)
4452 time.sleep(min(60, max_time))
4455 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4456 return not cumul_degraded
4459 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4460 """Check that mirrors are not degraded.
4462 The ldisk parameter, if True, will change the test from the
4463 is_degraded attribute (which represents overall non-ok status for
4464 the device(s)) to the ldisk (representing the local storage status).
4467 lu.cfg.SetDiskID(dev, node)
4471 if on_primary or dev.AssembleOnSecondary():
4472 rstats = lu.rpc.call_blockdev_find(node, dev)
4473 msg = rstats.fail_msg
4475 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4477 elif not rstats.payload:
4478 lu.LogWarning("Can't find disk on node %s", node)
4482 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4484 result = result and not rstats.payload.is_degraded
4487 for child in dev.children:
4488 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4493 class LUOobCommand(NoHooksLU):
4494 """Logical unit for OOB handling.
4498 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4500 def ExpandNames(self):
4501 """Gather locks we need.
4504 if self.op.node_names:
4505 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4506 lock_names = self.op.node_names
4508 lock_names = locking.ALL_SET
4510 self.needed_locks = {
4511 locking.LEVEL_NODE: lock_names,
4514 def CheckPrereq(self):
4515 """Check prerequisites.
4518 - the node exists in the configuration
4521 Any errors are signaled by raising errors.OpPrereqError.
4525 self.master_node = self.cfg.GetMasterNode()
4527 assert self.op.power_delay >= 0.0
4529 if self.op.node_names:
4530 if (self.op.command in self._SKIP_MASTER and
4531 self.master_node in self.op.node_names):
4532 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4533 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4535 if master_oob_handler:
4536 additional_text = ("run '%s %s %s' if you want to operate on the"
4537 " master regardless") % (master_oob_handler,
4541 additional_text = "it does not support out-of-band operations"
4543 raise errors.OpPrereqError(("Operating on the master node %s is not"
4544 " allowed for %s; %s") %
4545 (self.master_node, self.op.command,
4546 additional_text), errors.ECODE_INVAL)
4548 self.op.node_names = self.cfg.GetNodeList()
4549 if self.op.command in self._SKIP_MASTER:
4550 self.op.node_names.remove(self.master_node)
4552 if self.op.command in self._SKIP_MASTER:
4553 assert self.master_node not in self.op.node_names
4555 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4557 raise errors.OpPrereqError("Node %s not found" % node_name,
4560 self.nodes.append(node)
4562 if (not self.op.ignore_status and
4563 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4564 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4565 " not marked offline") % node_name,
4568 def Exec(self, feedback_fn):
4569 """Execute OOB and return result if we expect any.
4572 master_node = self.master_node
4575 for idx, node in enumerate(utils.NiceSort(self.nodes,
4576 key=lambda node: node.name)):
4577 node_entry = [(constants.RS_NORMAL, node.name)]
4578 ret.append(node_entry)
4580 oob_program = _SupportsOob(self.cfg, node)
4583 node_entry.append((constants.RS_UNAVAIL, None))
4586 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4587 self.op.command, oob_program, node.name)
4588 result = self.rpc.call_run_oob(master_node, oob_program,
4589 self.op.command, node.name,
4593 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4594 node.name, result.fail_msg)
4595 node_entry.append((constants.RS_NODATA, None))
4598 self._CheckPayload(result)
4599 except errors.OpExecError, err:
4600 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4602 node_entry.append((constants.RS_NODATA, None))
4604 if self.op.command == constants.OOB_HEALTH:
4605 # For health we should log important events
4606 for item, status in result.payload:
4607 if status in [constants.OOB_STATUS_WARNING,
4608 constants.OOB_STATUS_CRITICAL]:
4609 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4610 item, node.name, status)
4612 if self.op.command == constants.OOB_POWER_ON:
4614 elif self.op.command == constants.OOB_POWER_OFF:
4615 node.powered = False
4616 elif self.op.command == constants.OOB_POWER_STATUS:
4617 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4618 if powered != node.powered:
4619 logging.warning(("Recorded power state (%s) of node '%s' does not"
4620 " match actual power state (%s)"), node.powered,
4623 # For configuration changing commands we should update the node
4624 if self.op.command in (constants.OOB_POWER_ON,
4625 constants.OOB_POWER_OFF):
4626 self.cfg.Update(node, feedback_fn)
4628 node_entry.append((constants.RS_NORMAL, result.payload))
4630 if (self.op.command == constants.OOB_POWER_ON and
4631 idx < len(self.nodes) - 1):
4632 time.sleep(self.op.power_delay)
4636 def _CheckPayload(self, result):
4637 """Checks if the payload is valid.
4639 @param result: RPC result
4640 @raises errors.OpExecError: If payload is not valid
4644 if self.op.command == constants.OOB_HEALTH:
4645 if not isinstance(result.payload, list):
4646 errs.append("command 'health' is expected to return a list but got %s" %
4647 type(result.payload))
4649 for item, status in result.payload:
4650 if status not in constants.OOB_STATUSES:
4651 errs.append("health item '%s' has invalid status '%s'" %
4654 if self.op.command == constants.OOB_POWER_STATUS:
4655 if not isinstance(result.payload, dict):
4656 errs.append("power-status is expected to return a dict but got %s" %
4657 type(result.payload))
4659 if self.op.command in [
4660 constants.OOB_POWER_ON,
4661 constants.OOB_POWER_OFF,
4662 constants.OOB_POWER_CYCLE,
4664 if result.payload is not None:
4665 errs.append("%s is expected to not return payload but got '%s'" %
4666 (self.op.command, result.payload))
4669 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4670 utils.CommaJoin(errs))
4673 class _OsQuery(_QueryBase):
4674 FIELDS = query.OS_FIELDS
4676 def ExpandNames(self, lu):
4677 # Lock all nodes in shared mode
4678 # Temporary removal of locks, should be reverted later
4679 # TODO: reintroduce locks when they are lighter-weight
4680 lu.needed_locks = {}
4681 #self.share_locks[locking.LEVEL_NODE] = 1
4682 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4684 # The following variables interact with _QueryBase._GetNames
4686 self.wanted = self.names
4688 self.wanted = locking.ALL_SET
4690 self.do_locking = self.use_locking
4692 def DeclareLocks(self, lu, level):
4696 def _DiagnoseByOS(rlist):
4697 """Remaps a per-node return list into an a per-os per-node dictionary
4699 @param rlist: a map with node names as keys and OS objects as values
4702 @return: a dictionary with osnames as keys and as value another
4703 map, with nodes as keys and tuples of (path, status, diagnose,
4704 variants, parameters, api_versions) as values, eg::
4706 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4707 (/srv/..., False, "invalid api")],
4708 "node2": [(/srv/..., True, "", [], [])]}
4713 # we build here the list of nodes that didn't fail the RPC (at RPC
4714 # level), so that nodes with a non-responding node daemon don't
4715 # make all OSes invalid
4716 good_nodes = [node_name for node_name in rlist
4717 if not rlist[node_name].fail_msg]
4718 for node_name, nr in rlist.items():
4719 if nr.fail_msg or not nr.payload:
4721 for (name, path, status, diagnose, variants,
4722 params, api_versions) in nr.payload:
4723 if name not in all_os:
4724 # build a list of nodes for this os containing empty lists
4725 # for each node in node_list
4727 for nname in good_nodes:
4728 all_os[name][nname] = []
4729 # convert params from [name, help] to (name, help)
4730 params = [tuple(v) for v in params]
4731 all_os[name][node_name].append((path, status, diagnose,
4732 variants, params, api_versions))
4735 def _GetQueryData(self, lu):
4736 """Computes the list of nodes and their attributes.
4739 # Locking is not used
4740 assert not (compat.any(lu.glm.is_owned(level)
4741 for level in locking.LEVELS
4742 if level != locking.LEVEL_CLUSTER) or
4743 self.do_locking or self.use_locking)
4745 valid_nodes = [node.name
4746 for node in lu.cfg.GetAllNodesInfo().values()
4747 if not node.offline and node.vm_capable]
4748 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4749 cluster = lu.cfg.GetClusterInfo()
4753 for (os_name, os_data) in pol.items():
4754 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4755 hidden=(os_name in cluster.hidden_os),
4756 blacklisted=(os_name in cluster.blacklisted_os))
4760 api_versions = set()
4762 for idx, osl in enumerate(os_data.values()):
4763 info.valid = bool(info.valid and osl and osl[0][1])
4767 (node_variants, node_params, node_api) = osl[0][3:6]
4770 variants.update(node_variants)
4771 parameters.update(node_params)
4772 api_versions.update(node_api)
4774 # Filter out inconsistent values
4775 variants.intersection_update(node_variants)
4776 parameters.intersection_update(node_params)
4777 api_versions.intersection_update(node_api)
4779 info.variants = list(variants)
4780 info.parameters = list(parameters)
4781 info.api_versions = list(api_versions)
4783 data[os_name] = info
4785 # Prepare data in requested order
4786 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4790 class LUOsDiagnose(NoHooksLU):
4791 """Logical unit for OS diagnose/query.
4797 def _BuildFilter(fields, names):
4798 """Builds a filter for querying OSes.
4801 name_filter = qlang.MakeSimpleFilter("name", names)
4803 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4804 # respective field is not requested
4805 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4806 for fname in ["hidden", "blacklisted"]
4807 if fname not in fields]
4808 if "valid" not in fields:
4809 status_filter.append([qlang.OP_TRUE, "valid"])
4812 status_filter.insert(0, qlang.OP_AND)
4814 status_filter = None
4816 if name_filter and status_filter:
4817 return [qlang.OP_AND, name_filter, status_filter]
4821 return status_filter
4823 def CheckArguments(self):
4824 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4825 self.op.output_fields, False)
4827 def ExpandNames(self):
4828 self.oq.ExpandNames(self)
4830 def Exec(self, feedback_fn):
4831 return self.oq.OldStyleQuery(self)
4834 class LUNodeRemove(LogicalUnit):
4835 """Logical unit for removing a node.
4838 HPATH = "node-remove"
4839 HTYPE = constants.HTYPE_NODE
4841 def BuildHooksEnv(self):
4844 This doesn't run on the target node in the pre phase as a failed
4845 node would then be impossible to remove.
4849 "OP_TARGET": self.op.node_name,
4850 "NODE_NAME": self.op.node_name,
4853 def BuildHooksNodes(self):
4854 """Build hooks nodes.
4857 all_nodes = self.cfg.GetNodeList()
4859 all_nodes.remove(self.op.node_name)
4861 logging.warning("Node '%s', which is about to be removed, was not found"
4862 " in the list of all nodes", self.op.node_name)
4863 return (all_nodes, all_nodes)
4865 def CheckPrereq(self):
4866 """Check prerequisites.
4869 - the node exists in the configuration
4870 - it does not have primary or secondary instances
4871 - it's not the master
4873 Any errors are signaled by raising errors.OpPrereqError.
4876 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4877 node = self.cfg.GetNodeInfo(self.op.node_name)
4878 assert node is not None
4880 masternode = self.cfg.GetMasterNode()
4881 if node.name == masternode:
4882 raise errors.OpPrereqError("Node is the master node, failover to another"
4883 " node is required", errors.ECODE_INVAL)
4885 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4886 if node.name in instance.all_nodes:
4887 raise errors.OpPrereqError("Instance %s is still running on the node,"
4888 " please remove first" % instance_name,
4890 self.op.node_name = node.name
4893 def Exec(self, feedback_fn):
4894 """Removes the node from the cluster.
4898 logging.info("Stopping the node daemon and removing configs from node %s",
4901 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4903 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4906 # Promote nodes to master candidate as needed
4907 _AdjustCandidatePool(self, exceptions=[node.name])
4908 self.context.RemoveNode(node.name)
4910 # Run post hooks on the node before it's removed
4911 _RunPostHook(self, node.name)
4913 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4914 msg = result.fail_msg
4916 self.LogWarning("Errors encountered on the remote node while leaving"
4917 " the cluster: %s", msg)
4919 # Remove node from our /etc/hosts
4920 if self.cfg.GetClusterInfo().modify_etc_hosts:
4921 master_node = self.cfg.GetMasterNode()
4922 result = self.rpc.call_etc_hosts_modify(master_node,
4923 constants.ETC_HOSTS_REMOVE,
4925 result.Raise("Can't update hosts file with new host data")
4926 _RedistributeAncillaryFiles(self)
4929 class _NodeQuery(_QueryBase):
4930 FIELDS = query.NODE_FIELDS
4932 def ExpandNames(self, lu):
4933 lu.needed_locks = {}
4934 lu.share_locks = _ShareAll()
4937 self.wanted = _GetWantedNodes(lu, self.names)
4939 self.wanted = locking.ALL_SET
4941 self.do_locking = (self.use_locking and
4942 query.NQ_LIVE in self.requested_data)
4945 # If any non-static field is requested we need to lock the nodes
4946 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4948 def DeclareLocks(self, lu, level):
4951 def _GetQueryData(self, lu):
4952 """Computes the list of nodes and their attributes.
4955 all_info = lu.cfg.GetAllNodesInfo()
4957 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4959 # Gather data as requested
4960 if query.NQ_LIVE in self.requested_data:
4961 # filter out non-vm_capable nodes
4962 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4964 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4965 [lu.cfg.GetHypervisorType()])
4966 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4967 for (name, nresult) in node_data.items()
4968 if not nresult.fail_msg and nresult.payload)
4972 if query.NQ_INST in self.requested_data:
4973 node_to_primary = dict([(name, set()) for name in nodenames])
4974 node_to_secondary = dict([(name, set()) for name in nodenames])
4976 inst_data = lu.cfg.GetAllInstancesInfo()
4978 for inst in inst_data.values():
4979 if inst.primary_node in node_to_primary:
4980 node_to_primary[inst.primary_node].add(inst.name)
4981 for secnode in inst.secondary_nodes:
4982 if secnode in node_to_secondary:
4983 node_to_secondary[secnode].add(inst.name)
4985 node_to_primary = None
4986 node_to_secondary = None
4988 if query.NQ_OOB in self.requested_data:
4989 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4990 for name, node in all_info.iteritems())
4994 if query.NQ_GROUP in self.requested_data:
4995 groups = lu.cfg.GetAllNodeGroupsInfo()
4999 return query.NodeQueryData([all_info[name] for name in nodenames],
5000 live_data, lu.cfg.GetMasterNode(),
5001 node_to_primary, node_to_secondary, groups,
5002 oob_support, lu.cfg.GetClusterInfo())
5005 class LUNodeQuery(NoHooksLU):
5006 """Logical unit for querying nodes.
5009 # pylint: disable=W0142
5012 def CheckArguments(self):
5013 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5014 self.op.output_fields, self.op.use_locking)
5016 def ExpandNames(self):
5017 self.nq.ExpandNames(self)
5019 def DeclareLocks(self, level):
5020 self.nq.DeclareLocks(self, level)
5022 def Exec(self, feedback_fn):
5023 return self.nq.OldStyleQuery(self)
5026 class LUNodeQueryvols(NoHooksLU):
5027 """Logical unit for getting volumes on node(s).
5031 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5032 _FIELDS_STATIC = utils.FieldSet("node")
5034 def CheckArguments(self):
5035 _CheckOutputFields(static=self._FIELDS_STATIC,
5036 dynamic=self._FIELDS_DYNAMIC,
5037 selected=self.op.output_fields)
5039 def ExpandNames(self):
5040 self.share_locks = _ShareAll()
5041 self.needed_locks = {}
5043 if not self.op.nodes:
5044 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5046 self.needed_locks[locking.LEVEL_NODE] = \
5047 _GetWantedNodes(self, self.op.nodes)
5049 def Exec(self, feedback_fn):
5050 """Computes the list of nodes and their attributes.
5053 nodenames = self.owned_locks(locking.LEVEL_NODE)
5054 volumes = self.rpc.call_node_volumes(nodenames)
5056 ilist = self.cfg.GetAllInstancesInfo()
5057 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5060 for node in nodenames:
5061 nresult = volumes[node]
5064 msg = nresult.fail_msg
5066 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5069 node_vols = sorted(nresult.payload,
5070 key=operator.itemgetter("dev"))
5072 for vol in node_vols:
5074 for field in self.op.output_fields:
5077 elif field == "phys":
5081 elif field == "name":
5083 elif field == "size":
5084 val = int(float(vol["size"]))
5085 elif field == "instance":
5086 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5088 raise errors.ParameterError(field)
5089 node_output.append(str(val))
5091 output.append(node_output)
5096 class LUNodeQueryStorage(NoHooksLU):
5097 """Logical unit for getting information on storage units on node(s).
5100 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5103 def CheckArguments(self):
5104 _CheckOutputFields(static=self._FIELDS_STATIC,
5105 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5106 selected=self.op.output_fields)
5108 def ExpandNames(self):
5109 self.share_locks = _ShareAll()
5110 self.needed_locks = {}
5113 self.needed_locks[locking.LEVEL_NODE] = \
5114 _GetWantedNodes(self, self.op.nodes)
5116 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5118 def Exec(self, feedback_fn):
5119 """Computes the list of nodes and their attributes.
5122 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5124 # Always get name to sort by
5125 if constants.SF_NAME in self.op.output_fields:
5126 fields = self.op.output_fields[:]
5128 fields = [constants.SF_NAME] + self.op.output_fields
5130 # Never ask for node or type as it's only known to the LU
5131 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5132 while extra in fields:
5133 fields.remove(extra)
5135 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5136 name_idx = field_idx[constants.SF_NAME]
5138 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5139 data = self.rpc.call_storage_list(self.nodes,
5140 self.op.storage_type, st_args,
5141 self.op.name, fields)
5145 for node in utils.NiceSort(self.nodes):
5146 nresult = data[node]
5150 msg = nresult.fail_msg
5152 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5155 rows = dict([(row[name_idx], row) for row in nresult.payload])
5157 for name in utils.NiceSort(rows.keys()):
5162 for field in self.op.output_fields:
5163 if field == constants.SF_NODE:
5165 elif field == constants.SF_TYPE:
5166 val = self.op.storage_type
5167 elif field in field_idx:
5168 val = row[field_idx[field]]
5170 raise errors.ParameterError(field)
5179 class _InstanceQuery(_QueryBase):
5180 FIELDS = query.INSTANCE_FIELDS
5182 def ExpandNames(self, lu):
5183 lu.needed_locks = {}
5184 lu.share_locks = _ShareAll()
5187 self.wanted = _GetWantedInstances(lu, self.names)
5189 self.wanted = locking.ALL_SET
5191 self.do_locking = (self.use_locking and
5192 query.IQ_LIVE in self.requested_data)
5194 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5195 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5196 lu.needed_locks[locking.LEVEL_NODE] = []
5197 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5199 self.do_grouplocks = (self.do_locking and
5200 query.IQ_NODES in self.requested_data)
5202 def DeclareLocks(self, lu, level):
5204 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5205 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5207 # Lock all groups used by instances optimistically; this requires going
5208 # via the node before it's locked, requiring verification later on
5209 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5211 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5212 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5213 elif level == locking.LEVEL_NODE:
5214 lu._LockInstancesNodes() # pylint: disable=W0212
5217 def _CheckGroupLocks(lu):
5218 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5219 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5221 # Check if node groups for locked instances are still correct
5222 for instance_name in owned_instances:
5223 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5225 def _GetQueryData(self, lu):
5226 """Computes the list of instances and their attributes.
5229 if self.do_grouplocks:
5230 self._CheckGroupLocks(lu)
5232 cluster = lu.cfg.GetClusterInfo()
5233 all_info = lu.cfg.GetAllInstancesInfo()
5235 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5237 instance_list = [all_info[name] for name in instance_names]
5238 nodes = frozenset(itertools.chain(*(inst.all_nodes
5239 for inst in instance_list)))
5240 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5243 wrongnode_inst = set()
5245 # Gather data as requested
5246 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5248 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5250 result = node_data[name]
5252 # offline nodes will be in both lists
5253 assert result.fail_msg
5254 offline_nodes.append(name)
5256 bad_nodes.append(name)
5257 elif result.payload:
5258 for inst in result.payload:
5259 if inst in all_info:
5260 if all_info[inst].primary_node == name:
5261 live_data.update(result.payload)
5263 wrongnode_inst.add(inst)
5265 # orphan instance; we don't list it here as we don't
5266 # handle this case yet in the output of instance listing
5267 logging.warning("Orphan instance '%s' found on node %s",
5269 # else no instance is alive
5273 if query.IQ_DISKUSAGE in self.requested_data:
5274 disk_usage = dict((inst.name,
5275 _ComputeDiskSize(inst.disk_template,
5276 [{constants.IDISK_SIZE: disk.size}
5277 for disk in inst.disks]))
5278 for inst in instance_list)
5282 if query.IQ_CONSOLE in self.requested_data:
5284 for inst in instance_list:
5285 if inst.name in live_data:
5286 # Instance is running
5287 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5289 consinfo[inst.name] = None
5290 assert set(consinfo.keys()) == set(instance_names)
5294 if query.IQ_NODES in self.requested_data:
5295 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5297 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5298 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5299 for uuid in set(map(operator.attrgetter("group"),
5305 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5306 disk_usage, offline_nodes, bad_nodes,
5307 live_data, wrongnode_inst, consinfo,
5311 class LUQuery(NoHooksLU):
5312 """Query for resources/items of a certain kind.
5315 # pylint: disable=W0142
5318 def CheckArguments(self):
5319 qcls = _GetQueryImplementation(self.op.what)
5321 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5323 def ExpandNames(self):
5324 self.impl.ExpandNames(self)
5326 def DeclareLocks(self, level):
5327 self.impl.DeclareLocks(self, level)
5329 def Exec(self, feedback_fn):
5330 return self.impl.NewStyleQuery(self)
5333 class LUQueryFields(NoHooksLU):
5334 """Query for resources/items of a certain kind.
5337 # pylint: disable=W0142
5340 def CheckArguments(self):
5341 self.qcls = _GetQueryImplementation(self.op.what)
5343 def ExpandNames(self):
5344 self.needed_locks = {}
5346 def Exec(self, feedback_fn):
5347 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5350 class LUNodeModifyStorage(NoHooksLU):
5351 """Logical unit for modifying a storage volume on a node.
5356 def CheckArguments(self):
5357 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5359 storage_type = self.op.storage_type
5362 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5364 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5365 " modified" % storage_type,
5368 diff = set(self.op.changes.keys()) - modifiable
5370 raise errors.OpPrereqError("The following fields can not be modified for"
5371 " storage units of type '%s': %r" %
5372 (storage_type, list(diff)),
5375 def ExpandNames(self):
5376 self.needed_locks = {
5377 locking.LEVEL_NODE: self.op.node_name,
5380 def Exec(self, feedback_fn):
5381 """Computes the list of nodes and their attributes.
5384 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5385 result = self.rpc.call_storage_modify(self.op.node_name,
5386 self.op.storage_type, st_args,
5387 self.op.name, self.op.changes)
5388 result.Raise("Failed to modify storage unit '%s' on %s" %
5389 (self.op.name, self.op.node_name))
5392 class LUNodeAdd(LogicalUnit):
5393 """Logical unit for adding node to the cluster.
5397 HTYPE = constants.HTYPE_NODE
5398 _NFLAGS = ["master_capable", "vm_capable"]
5400 def CheckArguments(self):
5401 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5402 # validate/normalize the node name
5403 self.hostname = netutils.GetHostname(name=self.op.node_name,
5404 family=self.primary_ip_family)
5405 self.op.node_name = self.hostname.name
5407 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5408 raise errors.OpPrereqError("Cannot readd the master node",
5411 if self.op.readd and self.op.group:
5412 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5413 " being readded", errors.ECODE_INVAL)
5415 def BuildHooksEnv(self):
5418 This will run on all nodes before, and on all nodes + the new node after.
5422 "OP_TARGET": self.op.node_name,
5423 "NODE_NAME": self.op.node_name,
5424 "NODE_PIP": self.op.primary_ip,
5425 "NODE_SIP": self.op.secondary_ip,
5426 "MASTER_CAPABLE": str(self.op.master_capable),
5427 "VM_CAPABLE": str(self.op.vm_capable),
5430 def BuildHooksNodes(self):
5431 """Build hooks nodes.
5434 # Exclude added node
5435 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5436 post_nodes = pre_nodes + [self.op.node_name, ]
5438 return (pre_nodes, post_nodes)
5440 def CheckPrereq(self):
5441 """Check prerequisites.
5444 - the new node is not already in the config
5446 - its parameters (single/dual homed) matches the cluster
5448 Any errors are signaled by raising errors.OpPrereqError.
5452 hostname = self.hostname
5453 node = hostname.name
5454 primary_ip = self.op.primary_ip = hostname.ip
5455 if self.op.secondary_ip is None:
5456 if self.primary_ip_family == netutils.IP6Address.family:
5457 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5458 " IPv4 address must be given as secondary",
5460 self.op.secondary_ip = primary_ip
5462 secondary_ip = self.op.secondary_ip
5463 if not netutils.IP4Address.IsValid(secondary_ip):
5464 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5465 " address" % secondary_ip, errors.ECODE_INVAL)
5467 node_list = cfg.GetNodeList()
5468 if not self.op.readd and node in node_list:
5469 raise errors.OpPrereqError("Node %s is already in the configuration" %
5470 node, errors.ECODE_EXISTS)
5471 elif self.op.readd and node not in node_list:
5472 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5475 self.changed_primary_ip = False
5477 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5478 if self.op.readd and node == existing_node_name:
5479 if existing_node.secondary_ip != secondary_ip:
5480 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5481 " address configuration as before",
5483 if existing_node.primary_ip != primary_ip:
5484 self.changed_primary_ip = True
5488 if (existing_node.primary_ip == primary_ip or
5489 existing_node.secondary_ip == primary_ip or
5490 existing_node.primary_ip == secondary_ip or
5491 existing_node.secondary_ip == secondary_ip):
5492 raise errors.OpPrereqError("New node ip address(es) conflict with"
5493 " existing node %s" % existing_node.name,
5494 errors.ECODE_NOTUNIQUE)
5496 # After this 'if' block, None is no longer a valid value for the
5497 # _capable op attributes
5499 old_node = self.cfg.GetNodeInfo(node)
5500 assert old_node is not None, "Can't retrieve locked node %s" % node
5501 for attr in self._NFLAGS:
5502 if getattr(self.op, attr) is None:
5503 setattr(self.op, attr, getattr(old_node, attr))
5505 for attr in self._NFLAGS:
5506 if getattr(self.op, attr) is None:
5507 setattr(self.op, attr, True)
5509 if self.op.readd and not self.op.vm_capable:
5510 pri, sec = cfg.GetNodeInstances(node)
5512 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5513 " flag set to false, but it already holds"
5514 " instances" % node,
5517 # check that the type of the node (single versus dual homed) is the
5518 # same as for the master
5519 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5520 master_singlehomed = myself.secondary_ip == myself.primary_ip
5521 newbie_singlehomed = secondary_ip == primary_ip
5522 if master_singlehomed != newbie_singlehomed:
5523 if master_singlehomed:
5524 raise errors.OpPrereqError("The master has no secondary ip but the"
5525 " new node has one",
5528 raise errors.OpPrereqError("The master has a secondary ip but the"
5529 " new node doesn't have one",
5532 # checks reachability
5533 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5534 raise errors.OpPrereqError("Node not reachable by ping",
5535 errors.ECODE_ENVIRON)
5537 if not newbie_singlehomed:
5538 # check reachability from my secondary ip to newbie's secondary ip
5539 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5540 source=myself.secondary_ip):
5541 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5542 " based ping to node daemon port",
5543 errors.ECODE_ENVIRON)
5550 if self.op.master_capable:
5551 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5553 self.master_candidate = False
5556 self.new_node = old_node
5558 node_group = cfg.LookupNodeGroup(self.op.group)
5559 self.new_node = objects.Node(name=node,
5560 primary_ip=primary_ip,
5561 secondary_ip=secondary_ip,
5562 master_candidate=self.master_candidate,
5563 offline=False, drained=False,
5566 if self.op.ndparams:
5567 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5569 if self.op.hv_state:
5570 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5572 if self.op.disk_state:
5573 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5575 def Exec(self, feedback_fn):
5576 """Adds the new node to the cluster.
5579 new_node = self.new_node
5580 node = new_node.name
5582 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5585 # We adding a new node so we assume it's powered
5586 new_node.powered = True
5588 # for re-adds, reset the offline/drained/master-candidate flags;
5589 # we need to reset here, otherwise offline would prevent RPC calls
5590 # later in the procedure; this also means that if the re-add
5591 # fails, we are left with a non-offlined, broken node
5593 new_node.drained = new_node.offline = False # pylint: disable=W0201
5594 self.LogInfo("Readding a node, the offline/drained flags were reset")
5595 # if we demote the node, we do cleanup later in the procedure
5596 new_node.master_candidate = self.master_candidate
5597 if self.changed_primary_ip:
5598 new_node.primary_ip = self.op.primary_ip
5600 # copy the master/vm_capable flags
5601 for attr in self._NFLAGS:
5602 setattr(new_node, attr, getattr(self.op, attr))
5604 # notify the user about any possible mc promotion
5605 if new_node.master_candidate:
5606 self.LogInfo("Node will be a master candidate")
5608 if self.op.ndparams:
5609 new_node.ndparams = self.op.ndparams
5611 new_node.ndparams = {}
5613 if self.op.hv_state:
5614 new_node.hv_state_static = self.new_hv_state
5616 if self.op.disk_state:
5617 new_node.disk_state_static = self.new_disk_state
5619 # check connectivity
5620 result = self.rpc.call_version([node])[node]
5621 result.Raise("Can't get version information from node %s" % node)
5622 if constants.PROTOCOL_VERSION == result.payload:
5623 logging.info("Communication to node %s fine, sw version %s match",
5624 node, result.payload)
5626 raise errors.OpExecError("Version mismatch master version %s,"
5627 " node version %s" %
5628 (constants.PROTOCOL_VERSION, result.payload))
5630 # Add node to our /etc/hosts, and add key to known_hosts
5631 if self.cfg.GetClusterInfo().modify_etc_hosts:
5632 master_node = self.cfg.GetMasterNode()
5633 result = self.rpc.call_etc_hosts_modify(master_node,
5634 constants.ETC_HOSTS_ADD,
5637 result.Raise("Can't update hosts file with new host data")
5639 if new_node.secondary_ip != new_node.primary_ip:
5640 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5643 node_verify_list = [self.cfg.GetMasterNode()]
5644 node_verify_param = {
5645 constants.NV_NODELIST: ([node], {}),
5646 # TODO: do a node-net-test as well?
5649 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5650 self.cfg.GetClusterName())
5651 for verifier in node_verify_list:
5652 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5653 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5655 for failed in nl_payload:
5656 feedback_fn("ssh/hostname verification failed"
5657 " (checking from %s): %s" %
5658 (verifier, nl_payload[failed]))
5659 raise errors.OpExecError("ssh/hostname verification failed")
5662 _RedistributeAncillaryFiles(self)
5663 self.context.ReaddNode(new_node)
5664 # make sure we redistribute the config
5665 self.cfg.Update(new_node, feedback_fn)
5666 # and make sure the new node will not have old files around
5667 if not new_node.master_candidate:
5668 result = self.rpc.call_node_demote_from_mc(new_node.name)
5669 msg = result.fail_msg
5671 self.LogWarning("Node failed to demote itself from master"
5672 " candidate status: %s" % msg)
5674 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5675 additional_vm=self.op.vm_capable)
5676 self.context.AddNode(new_node, self.proc.GetECId())
5679 class LUNodeSetParams(LogicalUnit):
5680 """Modifies the parameters of a node.
5682 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5683 to the node role (as _ROLE_*)
5684 @cvar _R2F: a dictionary from node role to tuples of flags
5685 @cvar _FLAGS: a list of attribute names corresponding to the flags
5688 HPATH = "node-modify"
5689 HTYPE = constants.HTYPE_NODE
5691 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5693 (True, False, False): _ROLE_CANDIDATE,
5694 (False, True, False): _ROLE_DRAINED,
5695 (False, False, True): _ROLE_OFFLINE,
5696 (False, False, False): _ROLE_REGULAR,
5698 _R2F = dict((v, k) for k, v in _F2R.items())
5699 _FLAGS = ["master_candidate", "drained", "offline"]
5701 def CheckArguments(self):
5702 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5703 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5704 self.op.master_capable, self.op.vm_capable,
5705 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5707 if all_mods.count(None) == len(all_mods):
5708 raise errors.OpPrereqError("Please pass at least one modification",
5710 if all_mods.count(True) > 1:
5711 raise errors.OpPrereqError("Can't set the node into more than one"
5712 " state at the same time",
5715 # Boolean value that tells us whether we might be demoting from MC
5716 self.might_demote = (self.op.master_candidate == False or
5717 self.op.offline == True or
5718 self.op.drained == True or
5719 self.op.master_capable == False)
5721 if self.op.secondary_ip:
5722 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5723 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5724 " address" % self.op.secondary_ip,
5727 self.lock_all = self.op.auto_promote and self.might_demote
5728 self.lock_instances = self.op.secondary_ip is not None
5730 def _InstanceFilter(self, instance):
5731 """Filter for getting affected instances.
5734 return (instance.disk_template in constants.DTS_INT_MIRROR and
5735 self.op.node_name in instance.all_nodes)
5737 def ExpandNames(self):
5739 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5741 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5743 # Since modifying a node can have severe effects on currently running
5744 # operations the resource lock is at least acquired in shared mode
5745 self.needed_locks[locking.LEVEL_NODE_RES] = \
5746 self.needed_locks[locking.LEVEL_NODE]
5748 # Get node resource and instance locks in shared mode; they are not used
5749 # for anything but read-only access
5750 self.share_locks[locking.LEVEL_NODE_RES] = 1
5751 self.share_locks[locking.LEVEL_INSTANCE] = 1
5753 if self.lock_instances:
5754 self.needed_locks[locking.LEVEL_INSTANCE] = \
5755 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5757 def BuildHooksEnv(self):
5760 This runs on the master node.
5764 "OP_TARGET": self.op.node_name,
5765 "MASTER_CANDIDATE": str(self.op.master_candidate),
5766 "OFFLINE": str(self.op.offline),
5767 "DRAINED": str(self.op.drained),
5768 "MASTER_CAPABLE": str(self.op.master_capable),
5769 "VM_CAPABLE": str(self.op.vm_capable),
5772 def BuildHooksNodes(self):
5773 """Build hooks nodes.
5776 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5779 def CheckPrereq(self):
5780 """Check prerequisites.
5782 This only checks the instance list against the existing names.
5785 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5787 if self.lock_instances:
5788 affected_instances = \
5789 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5791 # Verify instance locks
5792 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5793 wanted_instances = frozenset(affected_instances.keys())
5794 if wanted_instances - owned_instances:
5795 raise errors.OpPrereqError("Instances affected by changing node %s's"
5796 " secondary IP address have changed since"
5797 " locks were acquired, wanted '%s', have"
5798 " '%s'; retry the operation" %
5800 utils.CommaJoin(wanted_instances),
5801 utils.CommaJoin(owned_instances)),
5804 affected_instances = None
5806 if (self.op.master_candidate is not None or
5807 self.op.drained is not None or
5808 self.op.offline is not None):
5809 # we can't change the master's node flags
5810 if self.op.node_name == self.cfg.GetMasterNode():
5811 raise errors.OpPrereqError("The master role can be changed"
5812 " only via master-failover",
5815 if self.op.master_candidate and not node.master_capable:
5816 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5817 " it a master candidate" % node.name,
5820 if self.op.vm_capable == False:
5821 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5823 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5824 " the vm_capable flag" % node.name,
5827 if node.master_candidate and self.might_demote and not self.lock_all:
5828 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5829 # check if after removing the current node, we're missing master
5831 (mc_remaining, mc_should, _) = \
5832 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5833 if mc_remaining < mc_should:
5834 raise errors.OpPrereqError("Not enough master candidates, please"
5835 " pass auto promote option to allow"
5836 " promotion", errors.ECODE_STATE)
5838 self.old_flags = old_flags = (node.master_candidate,
5839 node.drained, node.offline)
5840 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5841 self.old_role = old_role = self._F2R[old_flags]
5843 # Check for ineffective changes
5844 for attr in self._FLAGS:
5845 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5846 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5847 setattr(self.op, attr, None)
5849 # Past this point, any flag change to False means a transition
5850 # away from the respective state, as only real changes are kept
5852 # TODO: We might query the real power state if it supports OOB
5853 if _SupportsOob(self.cfg, node):
5854 if self.op.offline is False and not (node.powered or
5855 self.op.powered == True):
5856 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5857 " offline status can be reset") %
5859 elif self.op.powered is not None:
5860 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5861 " as it does not support out-of-band"
5862 " handling") % self.op.node_name)
5864 # If we're being deofflined/drained, we'll MC ourself if needed
5865 if (self.op.drained == False or self.op.offline == False or
5866 (self.op.master_capable and not node.master_capable)):
5867 if _DecideSelfPromotion(self):
5868 self.op.master_candidate = True
5869 self.LogInfo("Auto-promoting node to master candidate")
5871 # If we're no longer master capable, we'll demote ourselves from MC
5872 if self.op.master_capable == False and node.master_candidate:
5873 self.LogInfo("Demoting from master candidate")
5874 self.op.master_candidate = False
5877 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5878 if self.op.master_candidate:
5879 new_role = self._ROLE_CANDIDATE
5880 elif self.op.drained:
5881 new_role = self._ROLE_DRAINED
5882 elif self.op.offline:
5883 new_role = self._ROLE_OFFLINE
5884 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5885 # False is still in new flags, which means we're un-setting (the
5887 new_role = self._ROLE_REGULAR
5888 else: # no new flags, nothing, keep old role
5891 self.new_role = new_role
5893 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5894 # Trying to transition out of offline status
5895 # TODO: Use standard RPC runner, but make sure it works when the node is
5896 # still marked offline
5897 result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5899 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5900 " to report its version: %s" %
5901 (node.name, result.fail_msg),
5904 self.LogWarning("Transitioning node from offline to online state"
5905 " without using re-add. Please make sure the node"
5908 if self.op.secondary_ip:
5909 # Ok even without locking, because this can't be changed by any LU
5910 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5911 master_singlehomed = master.secondary_ip == master.primary_ip
5912 if master_singlehomed and self.op.secondary_ip:
5913 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5914 " homed cluster", errors.ECODE_INVAL)
5916 assert not (frozenset(affected_instances) -
5917 self.owned_locks(locking.LEVEL_INSTANCE))
5920 if affected_instances:
5921 raise errors.OpPrereqError("Cannot change secondary IP address:"
5922 " offline node has instances (%s)"
5923 " configured to use it" %
5924 utils.CommaJoin(affected_instances.keys()))
5926 # On online nodes, check that no instances are running, and that
5927 # the node has the new ip and we can reach it.
5928 for instance in affected_instances.values():
5929 _CheckInstanceState(self, instance, INSTANCE_DOWN,
5930 msg="cannot change secondary ip")
5932 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5933 if master.name != node.name:
5934 # check reachability from master secondary ip to new secondary ip
5935 if not netutils.TcpPing(self.op.secondary_ip,
5936 constants.DEFAULT_NODED_PORT,
5937 source=master.secondary_ip):
5938 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5939 " based ping to node daemon port",
5940 errors.ECODE_ENVIRON)
5942 if self.op.ndparams:
5943 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5944 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5945 self.new_ndparams = new_ndparams
5947 if self.op.hv_state:
5948 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5949 self.node.hv_state_static)
5951 if self.op.disk_state:
5952 self.new_disk_state = \
5953 _MergeAndVerifyDiskState(self.op.disk_state,
5954 self.node.disk_state_static)
5956 def Exec(self, feedback_fn):
5961 old_role = self.old_role
5962 new_role = self.new_role
5966 if self.op.ndparams:
5967 node.ndparams = self.new_ndparams
5969 if self.op.powered is not None:
5970 node.powered = self.op.powered
5972 if self.op.hv_state:
5973 node.hv_state_static = self.new_hv_state
5975 if self.op.disk_state:
5976 node.disk_state_static = self.new_disk_state
5978 for attr in ["master_capable", "vm_capable"]:
5979 val = getattr(self.op, attr)
5981 setattr(node, attr, val)
5982 result.append((attr, str(val)))
5984 if new_role != old_role:
5985 # Tell the node to demote itself, if no longer MC and not offline
5986 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5987 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5989 self.LogWarning("Node failed to demote itself: %s", msg)
5991 new_flags = self._R2F[new_role]
5992 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5994 result.append((desc, str(nf)))
5995 (node.master_candidate, node.drained, node.offline) = new_flags
5997 # we locked all nodes, we adjust the CP before updating this node
5999 _AdjustCandidatePool(self, [node.name])
6001 if self.op.secondary_ip:
6002 node.secondary_ip = self.op.secondary_ip
6003 result.append(("secondary_ip", self.op.secondary_ip))
6005 # this will trigger configuration file update, if needed
6006 self.cfg.Update(node, feedback_fn)
6008 # this will trigger job queue propagation or cleanup if the mc
6010 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6011 self.context.ReaddNode(node)
6016 class LUNodePowercycle(NoHooksLU):
6017 """Powercycles a node.
6022 def CheckArguments(self):
6023 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6024 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6025 raise errors.OpPrereqError("The node is the master and the force"
6026 " parameter was not set",
6029 def ExpandNames(self):
6030 """Locking for PowercycleNode.
6032 This is a last-resort option and shouldn't block on other
6033 jobs. Therefore, we grab no locks.
6036 self.needed_locks = {}
6038 def Exec(self, feedback_fn):
6042 result = self.rpc.call_node_powercycle(self.op.node_name,
6043 self.cfg.GetHypervisorType())
6044 result.Raise("Failed to schedule the reboot")
6045 return result.payload
6048 class LUClusterQuery(NoHooksLU):
6049 """Query cluster configuration.
6054 def ExpandNames(self):
6055 self.needed_locks = {}
6057 def Exec(self, feedback_fn):
6058 """Return cluster config.
6061 cluster = self.cfg.GetClusterInfo()
6064 # Filter just for enabled hypervisors
6065 for os_name, hv_dict in cluster.os_hvp.items():
6066 os_hvp[os_name] = {}
6067 for hv_name, hv_params in hv_dict.items():
6068 if hv_name in cluster.enabled_hypervisors:
6069 os_hvp[os_name][hv_name] = hv_params
6071 # Convert ip_family to ip_version
6072 primary_ip_version = constants.IP4_VERSION
6073 if cluster.primary_ip_family == netutils.IP6Address.family:
6074 primary_ip_version = constants.IP6_VERSION
6077 "software_version": constants.RELEASE_VERSION,
6078 "protocol_version": constants.PROTOCOL_VERSION,
6079 "config_version": constants.CONFIG_VERSION,
6080 "os_api_version": max(constants.OS_API_VERSIONS),
6081 "export_version": constants.EXPORT_VERSION,
6082 "architecture": (platform.architecture()[0], platform.machine()),
6083 "name": cluster.cluster_name,
6084 "master": cluster.master_node,
6085 "default_hypervisor": cluster.primary_hypervisor,
6086 "enabled_hypervisors": cluster.enabled_hypervisors,
6087 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6088 for hypervisor_name in cluster.enabled_hypervisors]),
6090 "beparams": cluster.beparams,
6091 "osparams": cluster.osparams,
6092 "ipolicy": cluster.ipolicy,
6093 "nicparams": cluster.nicparams,
6094 "ndparams": cluster.ndparams,
6095 "candidate_pool_size": cluster.candidate_pool_size,
6096 "master_netdev": cluster.master_netdev,
6097 "master_netmask": cluster.master_netmask,
6098 "use_external_mip_script": cluster.use_external_mip_script,
6099 "volume_group_name": cluster.volume_group_name,
6100 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6101 "file_storage_dir": cluster.file_storage_dir,
6102 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6103 "maintain_node_health": cluster.maintain_node_health,
6104 "ctime": cluster.ctime,
6105 "mtime": cluster.mtime,
6106 "uuid": cluster.uuid,
6107 "tags": list(cluster.GetTags()),
6108 "uid_pool": cluster.uid_pool,
6109 "default_iallocator": cluster.default_iallocator,
6110 "reserved_lvs": cluster.reserved_lvs,
6111 "primary_ip_version": primary_ip_version,
6112 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6113 "hidden_os": cluster.hidden_os,
6114 "blacklisted_os": cluster.blacklisted_os,
6120 class LUClusterConfigQuery(NoHooksLU):
6121 """Return configuration values.
6125 _FIELDS_DYNAMIC = utils.FieldSet()
6126 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
6127 "watcher_pause", "volume_group_name")
6129 def CheckArguments(self):
6130 _CheckOutputFields(static=self._FIELDS_STATIC,
6131 dynamic=self._FIELDS_DYNAMIC,
6132 selected=self.op.output_fields)
6134 def ExpandNames(self):
6135 self.needed_locks = {}
6137 def Exec(self, feedback_fn):
6138 """Dump a representation of the cluster config to the standard output.
6142 for field in self.op.output_fields:
6143 if field == "cluster_name":
6144 entry = self.cfg.GetClusterName()
6145 elif field == "master_node":
6146 entry = self.cfg.GetMasterNode()
6147 elif field == "drain_flag":
6148 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6149 elif field == "watcher_pause":
6150 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6151 elif field == "volume_group_name":
6152 entry = self.cfg.GetVGName()
6154 raise errors.ParameterError(field)
6155 values.append(entry)
6159 class LUInstanceActivateDisks(NoHooksLU):
6160 """Bring up an instance's disks.
6165 def ExpandNames(self):
6166 self._ExpandAndLockInstance()
6167 self.needed_locks[locking.LEVEL_NODE] = []
6168 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6170 def DeclareLocks(self, level):
6171 if level == locking.LEVEL_NODE:
6172 self._LockInstancesNodes()
6174 def CheckPrereq(self):
6175 """Check prerequisites.
6177 This checks that the instance is in the cluster.
6180 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6181 assert self.instance is not None, \
6182 "Cannot retrieve locked instance %s" % self.op.instance_name
6183 _CheckNodeOnline(self, self.instance.primary_node)
6185 def Exec(self, feedback_fn):
6186 """Activate the disks.
6189 disks_ok, disks_info = \
6190 _AssembleInstanceDisks(self, self.instance,
6191 ignore_size=self.op.ignore_size)
6193 raise errors.OpExecError("Cannot activate block devices")
6198 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6200 """Prepare the block devices for an instance.
6202 This sets up the block devices on all nodes.
6204 @type lu: L{LogicalUnit}
6205 @param lu: the logical unit on whose behalf we execute
6206 @type instance: L{objects.Instance}
6207 @param instance: the instance for whose disks we assemble
6208 @type disks: list of L{objects.Disk} or None
6209 @param disks: which disks to assemble (or all, if None)
6210 @type ignore_secondaries: boolean
6211 @param ignore_secondaries: if true, errors on secondary nodes
6212 won't result in an error return from the function
6213 @type ignore_size: boolean
6214 @param ignore_size: if true, the current known size of the disk
6215 will not be used during the disk activation, useful for cases
6216 when the size is wrong
6217 @return: False if the operation failed, otherwise a list of
6218 (host, instance_visible_name, node_visible_name)
6219 with the mapping from node devices to instance devices
6224 iname = instance.name
6225 disks = _ExpandCheckDisks(instance, disks)
6227 # With the two passes mechanism we try to reduce the window of
6228 # opportunity for the race condition of switching DRBD to primary
6229 # before handshaking occured, but we do not eliminate it
6231 # The proper fix would be to wait (with some limits) until the
6232 # connection has been made and drbd transitions from WFConnection
6233 # into any other network-connected state (Connected, SyncTarget,
6236 # 1st pass, assemble on all nodes in secondary mode
6237 for idx, inst_disk in enumerate(disks):
6238 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6240 node_disk = node_disk.Copy()
6241 node_disk.UnsetSize()
6242 lu.cfg.SetDiskID(node_disk, node)
6243 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
6244 msg = result.fail_msg
6246 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6247 " (is_primary=False, pass=1): %s",
6248 inst_disk.iv_name, node, msg)
6249 if not ignore_secondaries:
6252 # FIXME: race condition on drbd migration to primary
6254 # 2nd pass, do only the primary node
6255 for idx, inst_disk in enumerate(disks):
6258 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6259 if node != instance.primary_node:
6262 node_disk = node_disk.Copy()
6263 node_disk.UnsetSize()
6264 lu.cfg.SetDiskID(node_disk, node)
6265 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6266 msg = result.fail_msg
6268 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6269 " (is_primary=True, pass=2): %s",
6270 inst_disk.iv_name, node, msg)
6273 dev_path = result.payload
6275 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6277 # leave the disks configured for the primary node
6278 # this is a workaround that would be fixed better by
6279 # improving the logical/physical id handling
6281 lu.cfg.SetDiskID(disk, instance.primary_node)
6283 return disks_ok, device_info
6286 def _StartInstanceDisks(lu, instance, force):
6287 """Start the disks of an instance.
6290 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6291 ignore_secondaries=force)
6293 _ShutdownInstanceDisks(lu, instance)
6294 if force is not None and not force:
6295 lu.proc.LogWarning("", hint="If the message above refers to a"
6297 " you can retry the operation using '--force'.")
6298 raise errors.OpExecError("Disk consistency error")
6301 class LUInstanceDeactivateDisks(NoHooksLU):
6302 """Shutdown an instance's disks.
6307 def ExpandNames(self):
6308 self._ExpandAndLockInstance()
6309 self.needed_locks[locking.LEVEL_NODE] = []
6310 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6312 def DeclareLocks(self, level):
6313 if level == locking.LEVEL_NODE:
6314 self._LockInstancesNodes()
6316 def CheckPrereq(self):
6317 """Check prerequisites.
6319 This checks that the instance is in the cluster.
6322 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6323 assert self.instance is not None, \
6324 "Cannot retrieve locked instance %s" % self.op.instance_name
6326 def Exec(self, feedback_fn):
6327 """Deactivate the disks
6330 instance = self.instance
6332 _ShutdownInstanceDisks(self, instance)
6334 _SafeShutdownInstanceDisks(self, instance)
6337 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6338 """Shutdown block devices of an instance.
6340 This function checks if an instance is running, before calling
6341 _ShutdownInstanceDisks.
6344 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6345 _ShutdownInstanceDisks(lu, instance, disks=disks)
6348 def _ExpandCheckDisks(instance, disks):
6349 """Return the instance disks selected by the disks list
6351 @type disks: list of L{objects.Disk} or None
6352 @param disks: selected disks
6353 @rtype: list of L{objects.Disk}
6354 @return: selected instance disks to act on
6358 return instance.disks
6360 if not set(disks).issubset(instance.disks):
6361 raise errors.ProgrammerError("Can only act on disks belonging to the"
6366 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6367 """Shutdown block devices of an instance.
6369 This does the shutdown on all nodes of the instance.
6371 If the ignore_primary is false, errors on the primary node are
6376 disks = _ExpandCheckDisks(instance, disks)
6379 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6380 lu.cfg.SetDiskID(top_disk, node)
6381 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6382 msg = result.fail_msg
6384 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6385 disk.iv_name, node, msg)
6386 if ((node == instance.primary_node and not ignore_primary) or
6387 (node != instance.primary_node and not result.offline)):
6392 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6393 """Checks if a node has enough free memory.
6395 This function check if a given node has the needed amount of free
6396 memory. In case the node has less memory or we cannot get the
6397 information from the node, this function raise an OpPrereqError
6400 @type lu: C{LogicalUnit}
6401 @param lu: a logical unit from which we get configuration data
6403 @param node: the node to check
6404 @type reason: C{str}
6405 @param reason: string to use in the error message
6406 @type requested: C{int}
6407 @param requested: the amount of memory in MiB to check for
6408 @type hypervisor_name: C{str}
6409 @param hypervisor_name: the hypervisor to ask for memory stats
6411 @return: node current free memory
6412 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6413 we cannot check the node
6416 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6417 nodeinfo[node].Raise("Can't get data from node %s" % node,
6418 prereq=True, ecode=errors.ECODE_ENVIRON)
6419 (_, _, (hv_info, )) = nodeinfo[node].payload
6421 free_mem = hv_info.get("memory_free", None)
6422 if not isinstance(free_mem, int):
6423 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6424 " was '%s'" % (node, free_mem),
6425 errors.ECODE_ENVIRON)
6426 if requested > free_mem:
6427 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6428 " needed %s MiB, available %s MiB" %
6429 (node, reason, requested, free_mem),
6434 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6435 """Checks if nodes have enough free disk space in the all VGs.
6437 This function check if all given nodes have the needed amount of
6438 free disk. In case any node has less disk or we cannot get the
6439 information from the node, this function raise an OpPrereqError
6442 @type lu: C{LogicalUnit}
6443 @param lu: a logical unit from which we get configuration data
6444 @type nodenames: C{list}
6445 @param nodenames: the list of node names to check
6446 @type req_sizes: C{dict}
6447 @param req_sizes: the hash of vg and corresponding amount of disk in
6449 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6450 or we cannot check the node
6453 for vg, req_size in req_sizes.items():
6454 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6457 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6458 """Checks if nodes have enough free disk space in the specified VG.
6460 This function check if all given nodes have the needed amount of
6461 free disk. In case any node has less disk or we cannot get the
6462 information from the node, this function raise an OpPrereqError
6465 @type lu: C{LogicalUnit}
6466 @param lu: a logical unit from which we get configuration data
6467 @type nodenames: C{list}
6468 @param nodenames: the list of node names to check
6470 @param vg: the volume group to check
6471 @type requested: C{int}
6472 @param requested: the amount of disk in MiB to check for
6473 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6474 or we cannot check the node
6477 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6478 for node in nodenames:
6479 info = nodeinfo[node]
6480 info.Raise("Cannot get current information from node %s" % node,
6481 prereq=True, ecode=errors.ECODE_ENVIRON)
6482 (_, (vg_info, ), _) = info.payload
6483 vg_free = vg_info.get("vg_free", None)
6484 if not isinstance(vg_free, int):
6485 raise errors.OpPrereqError("Can't compute free disk space on node"
6486 " %s for vg %s, result was '%s'" %
6487 (node, vg, vg_free), errors.ECODE_ENVIRON)
6488 if requested > vg_free:
6489 raise errors.OpPrereqError("Not enough disk space on target node %s"
6490 " vg %s: required %d MiB, available %d MiB" %
6491 (node, vg, requested, vg_free),
6495 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6496 """Checks if nodes have enough physical CPUs
6498 This function checks if all given nodes have the needed number of
6499 physical CPUs. In case any node has less CPUs or we cannot get the
6500 information from the node, this function raises an OpPrereqError
6503 @type lu: C{LogicalUnit}
6504 @param lu: a logical unit from which we get configuration data
6505 @type nodenames: C{list}
6506 @param nodenames: the list of node names to check
6507 @type requested: C{int}
6508 @param requested: the minimum acceptable number of physical CPUs
6509 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6510 or we cannot check the node
6513 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6514 for node in nodenames:
6515 info = nodeinfo[node]
6516 info.Raise("Cannot get current information from node %s" % node,
6517 prereq=True, ecode=errors.ECODE_ENVIRON)
6518 (_, _, (hv_info, )) = info.payload
6519 num_cpus = hv_info.get("cpu_total", None)
6520 if not isinstance(num_cpus, int):
6521 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6522 " on node %s, result was '%s'" %
6523 (node, num_cpus), errors.ECODE_ENVIRON)
6524 if requested > num_cpus:
6525 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6526 "required" % (node, num_cpus, requested),
6530 class LUInstanceStartup(LogicalUnit):
6531 """Starts an instance.
6534 HPATH = "instance-start"
6535 HTYPE = constants.HTYPE_INSTANCE
6538 def CheckArguments(self):
6540 if self.op.beparams:
6541 # fill the beparams dict
6542 objects.UpgradeBeParams(self.op.beparams)
6543 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6545 def ExpandNames(self):
6546 self._ExpandAndLockInstance()
6547 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6549 def DeclareLocks(self, level):
6550 if level == locking.LEVEL_NODE_RES:
6551 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6553 def BuildHooksEnv(self):
6556 This runs on master, primary and secondary nodes of the instance.
6560 "FORCE": self.op.force,
6563 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6567 def BuildHooksNodes(self):
6568 """Build hooks nodes.
6571 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6574 def CheckPrereq(self):
6575 """Check prerequisites.
6577 This checks that the instance is in the cluster.
6580 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6581 assert self.instance is not None, \
6582 "Cannot retrieve locked instance %s" % self.op.instance_name
6585 if self.op.hvparams:
6586 # check hypervisor parameter syntax (locally)
6587 cluster = self.cfg.GetClusterInfo()
6588 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6589 filled_hvp = cluster.FillHV(instance)
6590 filled_hvp.update(self.op.hvparams)
6591 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6592 hv_type.CheckParameterSyntax(filled_hvp)
6593 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6595 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6597 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6599 if self.primary_offline and self.op.ignore_offline_nodes:
6600 self.proc.LogWarning("Ignoring offline primary node")
6602 if self.op.hvparams or self.op.beparams:
6603 self.proc.LogWarning("Overridden parameters are ignored")
6605 _CheckNodeOnline(self, instance.primary_node)
6607 bep = self.cfg.GetClusterInfo().FillBE(instance)
6608 bep.update(self.op.beparams)
6610 # check bridges existence
6611 _CheckInstanceBridgesExist(self, instance)
6613 remote_info = self.rpc.call_instance_info(instance.primary_node,
6615 instance.hypervisor)
6616 remote_info.Raise("Error checking node %s" % instance.primary_node,
6617 prereq=True, ecode=errors.ECODE_ENVIRON)
6618 if not remote_info.payload: # not running already
6619 _CheckNodeFreeMemory(self, instance.primary_node,
6620 "starting instance %s" % instance.name,
6621 bep[constants.BE_MINMEM], instance.hypervisor)
6623 def Exec(self, feedback_fn):
6624 """Start the instance.
6627 instance = self.instance
6628 force = self.op.force
6630 if not self.op.no_remember:
6631 self.cfg.MarkInstanceUp(instance.name)
6633 if self.primary_offline:
6634 assert self.op.ignore_offline_nodes
6635 self.proc.LogInfo("Primary node offline, marked instance as started")
6637 node_current = instance.primary_node
6639 _StartInstanceDisks(self, instance, force)
6642 self.rpc.call_instance_start(node_current,
6643 (instance, self.op.hvparams,
6645 self.op.startup_paused)
6646 msg = result.fail_msg
6648 _ShutdownInstanceDisks(self, instance)
6649 raise errors.OpExecError("Could not start instance: %s" % msg)
6652 class LUInstanceReboot(LogicalUnit):
6653 """Reboot an instance.
6656 HPATH = "instance-reboot"
6657 HTYPE = constants.HTYPE_INSTANCE
6660 def ExpandNames(self):
6661 self._ExpandAndLockInstance()
6663 def BuildHooksEnv(self):
6666 This runs on master, primary and secondary nodes of the instance.
6670 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6671 "REBOOT_TYPE": self.op.reboot_type,
6672 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6675 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6679 def BuildHooksNodes(self):
6680 """Build hooks nodes.
6683 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6686 def CheckPrereq(self):
6687 """Check prerequisites.
6689 This checks that the instance is in the cluster.
6692 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6693 assert self.instance is not None, \
6694 "Cannot retrieve locked instance %s" % self.op.instance_name
6695 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6696 _CheckNodeOnline(self, instance.primary_node)
6698 # check bridges existence
6699 _CheckInstanceBridgesExist(self, instance)
6701 def Exec(self, feedback_fn):
6702 """Reboot the instance.
6705 instance = self.instance
6706 ignore_secondaries = self.op.ignore_secondaries
6707 reboot_type = self.op.reboot_type
6709 remote_info = self.rpc.call_instance_info(instance.primary_node,
6711 instance.hypervisor)
6712 remote_info.Raise("Error checking node %s" % instance.primary_node)
6713 instance_running = bool(remote_info.payload)
6715 node_current = instance.primary_node
6717 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6718 constants.INSTANCE_REBOOT_HARD]:
6719 for disk in instance.disks:
6720 self.cfg.SetDiskID(disk, node_current)
6721 result = self.rpc.call_instance_reboot(node_current, instance,
6723 self.op.shutdown_timeout)
6724 result.Raise("Could not reboot instance")
6726 if instance_running:
6727 result = self.rpc.call_instance_shutdown(node_current, instance,
6728 self.op.shutdown_timeout)
6729 result.Raise("Could not shutdown instance for full reboot")
6730 _ShutdownInstanceDisks(self, instance)
6732 self.LogInfo("Instance %s was already stopped, starting now",
6734 _StartInstanceDisks(self, instance, ignore_secondaries)
6735 result = self.rpc.call_instance_start(node_current,
6736 (instance, None, None), False)
6737 msg = result.fail_msg
6739 _ShutdownInstanceDisks(self, instance)
6740 raise errors.OpExecError("Could not start instance for"
6741 " full reboot: %s" % msg)
6743 self.cfg.MarkInstanceUp(instance.name)
6746 class LUInstanceShutdown(LogicalUnit):
6747 """Shutdown an instance.
6750 HPATH = "instance-stop"
6751 HTYPE = constants.HTYPE_INSTANCE
6754 def ExpandNames(self):
6755 self._ExpandAndLockInstance()
6757 def BuildHooksEnv(self):
6760 This runs on master, primary and secondary nodes of the instance.
6763 env = _BuildInstanceHookEnvByObject(self, self.instance)
6764 env["TIMEOUT"] = self.op.timeout
6767 def BuildHooksNodes(self):
6768 """Build hooks nodes.
6771 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6774 def CheckPrereq(self):
6775 """Check prerequisites.
6777 This checks that the instance is in the cluster.
6780 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6781 assert self.instance is not None, \
6782 "Cannot retrieve locked instance %s" % self.op.instance_name
6784 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6786 self.primary_offline = \
6787 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6789 if self.primary_offline and self.op.ignore_offline_nodes:
6790 self.proc.LogWarning("Ignoring offline primary node")
6792 _CheckNodeOnline(self, self.instance.primary_node)
6794 def Exec(self, feedback_fn):
6795 """Shutdown the instance.
6798 instance = self.instance
6799 node_current = instance.primary_node
6800 timeout = self.op.timeout
6802 if not self.op.no_remember:
6803 self.cfg.MarkInstanceDown(instance.name)
6805 if self.primary_offline:
6806 assert self.op.ignore_offline_nodes
6807 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6809 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6810 msg = result.fail_msg
6812 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6814 _ShutdownInstanceDisks(self, instance)
6817 class LUInstanceReinstall(LogicalUnit):
6818 """Reinstall an instance.
6821 HPATH = "instance-reinstall"
6822 HTYPE = constants.HTYPE_INSTANCE
6825 def ExpandNames(self):
6826 self._ExpandAndLockInstance()
6828 def BuildHooksEnv(self):
6831 This runs on master, primary and secondary nodes of the instance.
6834 return _BuildInstanceHookEnvByObject(self, self.instance)
6836 def BuildHooksNodes(self):
6837 """Build hooks nodes.
6840 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6843 def CheckPrereq(self):
6844 """Check prerequisites.
6846 This checks that the instance is in the cluster and is not running.
6849 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6850 assert instance is not None, \
6851 "Cannot retrieve locked instance %s" % self.op.instance_name
6852 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6853 " offline, cannot reinstall")
6854 for node in instance.secondary_nodes:
6855 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6856 " cannot reinstall")
6858 if instance.disk_template == constants.DT_DISKLESS:
6859 raise errors.OpPrereqError("Instance '%s' has no disks" %
6860 self.op.instance_name,
6862 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6864 if self.op.os_type is not None:
6866 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6867 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6868 instance_os = self.op.os_type
6870 instance_os = instance.os
6872 nodelist = list(instance.all_nodes)
6874 if self.op.osparams:
6875 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6876 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6877 self.os_inst = i_osdict # the new dict (without defaults)
6881 self.instance = instance
6883 def Exec(self, feedback_fn):
6884 """Reinstall the instance.
6887 inst = self.instance
6889 if self.op.os_type is not None:
6890 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6891 inst.os = self.op.os_type
6892 # Write to configuration
6893 self.cfg.Update(inst, feedback_fn)
6895 _StartInstanceDisks(self, inst, None)
6897 feedback_fn("Running the instance OS create scripts...")
6898 # FIXME: pass debug option from opcode to backend
6899 result = self.rpc.call_instance_os_add(inst.primary_node,
6900 (inst, self.os_inst), True,
6901 self.op.debug_level)
6902 result.Raise("Could not install OS for instance %s on node %s" %
6903 (inst.name, inst.primary_node))
6905 _ShutdownInstanceDisks(self, inst)
6908 class LUInstanceRecreateDisks(LogicalUnit):
6909 """Recreate an instance's missing disks.
6912 HPATH = "instance-recreate-disks"
6913 HTYPE = constants.HTYPE_INSTANCE
6916 _MODIFYABLE = frozenset([
6917 constants.IDISK_SIZE,
6918 constants.IDISK_MODE,
6921 # New or changed disk parameters may have different semantics
6922 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
6923 constants.IDISK_ADOPT,
6925 # TODO: Implement support changing VG while recreating
6927 constants.IDISK_METAVG,
6930 def CheckArguments(self):
6931 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
6932 # Normalize and convert deprecated list of disk indices
6933 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
6935 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
6937 raise errors.OpPrereqError("Some disks have been specified more than"
6938 " once: %s" % utils.CommaJoin(duplicates),
6941 for (idx, params) in self.op.disks:
6942 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
6943 unsupported = frozenset(params.keys()) - self._MODIFYABLE
6945 raise errors.OpPrereqError("Parameters for disk %s try to change"
6946 " unmodifyable parameter(s): %s" %
6947 (idx, utils.CommaJoin(unsupported)),
6950 def ExpandNames(self):
6951 self._ExpandAndLockInstance()
6952 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6954 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6955 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6957 self.needed_locks[locking.LEVEL_NODE] = []
6958 self.needed_locks[locking.LEVEL_NODE_RES] = []
6960 def DeclareLocks(self, level):
6961 if level == locking.LEVEL_NODE:
6962 # if we replace the nodes, we only need to lock the old primary,
6963 # otherwise we need to lock all nodes for disk re-creation
6964 primary_only = bool(self.op.nodes)
6965 self._LockInstancesNodes(primary_only=primary_only)
6966 elif level == locking.LEVEL_NODE_RES:
6968 self.needed_locks[locking.LEVEL_NODE_RES] = \
6969 self.needed_locks[locking.LEVEL_NODE][:]
6971 def BuildHooksEnv(self):
6974 This runs on master, primary and secondary nodes of the instance.
6977 return _BuildInstanceHookEnvByObject(self, self.instance)
6979 def BuildHooksNodes(self):
6980 """Build hooks nodes.
6983 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6986 def CheckPrereq(self):
6987 """Check prerequisites.
6989 This checks that the instance is in the cluster and is not running.
6992 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6993 assert instance is not None, \
6994 "Cannot retrieve locked instance %s" % self.op.instance_name
6996 if len(self.op.nodes) != len(instance.all_nodes):
6997 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6998 " %d replacement nodes were specified" %
6999 (instance.name, len(instance.all_nodes),
7000 len(self.op.nodes)),
7002 assert instance.disk_template != constants.DT_DRBD8 or \
7003 len(self.op.nodes) == 2
7004 assert instance.disk_template != constants.DT_PLAIN or \
7005 len(self.op.nodes) == 1
7006 primary_node = self.op.nodes[0]
7008 primary_node = instance.primary_node
7009 _CheckNodeOnline(self, primary_node)
7011 if instance.disk_template == constants.DT_DISKLESS:
7012 raise errors.OpPrereqError("Instance '%s' has no disks" %
7013 self.op.instance_name, errors.ECODE_INVAL)
7015 # if we replace nodes *and* the old primary is offline, we don't
7017 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7018 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7019 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7020 if not (self.op.nodes and old_pnode.offline):
7021 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7022 msg="cannot recreate disks")
7025 self.disks = dict(self.op.disks)
7027 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7029 maxidx = max(self.disks.keys())
7030 if maxidx >= len(instance.disks):
7031 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7034 if (self.op.nodes and
7035 sorted(self.disks.keys()) != range(len(instance.disks))):
7036 raise errors.OpPrereqError("Can't recreate disks partially and"
7037 " change the nodes at the same time",
7040 self.instance = instance
7042 def Exec(self, feedback_fn):
7043 """Recreate the disks.
7046 instance = self.instance
7048 assert (self.owned_locks(locking.LEVEL_NODE) ==
7049 self.owned_locks(locking.LEVEL_NODE_RES))
7052 mods = [] # keeps track of needed changes
7054 for idx, disk in enumerate(instance.disks):
7056 changes = self.disks[idx]
7058 # Disk should not be recreated
7062 # update secondaries for disks, if needed
7063 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7064 # need to update the nodes and minors
7065 assert len(self.op.nodes) == 2
7066 assert len(disk.logical_id) == 6 # otherwise disk internals
7068 (_, _, old_port, _, _, old_secret) = disk.logical_id
7069 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7070 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7071 new_minors[0], new_minors[1], old_secret)
7072 assert len(disk.logical_id) == len(new_id)
7076 mods.append((idx, new_id, changes))
7078 # now that we have passed all asserts above, we can apply the mods
7079 # in a single run (to avoid partial changes)
7080 for idx, new_id, changes in mods:
7081 disk = instance.disks[idx]
7082 if new_id is not None:
7083 assert disk.dev_type == constants.LD_DRBD8
7084 disk.logical_id = new_id
7086 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7087 mode=changes.get(constants.IDISK_MODE, None))
7089 # change primary node, if needed
7091 instance.primary_node = self.op.nodes[0]
7092 self.LogWarning("Changing the instance's nodes, you will have to"
7093 " remove any disks left on the older nodes manually")
7096 self.cfg.Update(instance, feedback_fn)
7098 _CreateDisks(self, instance, to_skip=to_skip)
7101 class LUInstanceRename(LogicalUnit):
7102 """Rename an instance.
7105 HPATH = "instance-rename"
7106 HTYPE = constants.HTYPE_INSTANCE
7108 def CheckArguments(self):
7112 if self.op.ip_check and not self.op.name_check:
7113 # TODO: make the ip check more flexible and not depend on the name check
7114 raise errors.OpPrereqError("IP address check requires a name check",
7117 def BuildHooksEnv(self):
7120 This runs on master, primary and secondary nodes of the instance.
7123 env = _BuildInstanceHookEnvByObject(self, self.instance)
7124 env["INSTANCE_NEW_NAME"] = self.op.new_name
7127 def BuildHooksNodes(self):
7128 """Build hooks nodes.
7131 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7134 def CheckPrereq(self):
7135 """Check prerequisites.
7137 This checks that the instance is in the cluster and is not running.
7140 self.op.instance_name = _ExpandInstanceName(self.cfg,
7141 self.op.instance_name)
7142 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7143 assert instance is not None
7144 _CheckNodeOnline(self, instance.primary_node)
7145 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7146 msg="cannot rename")
7147 self.instance = instance
7149 new_name = self.op.new_name
7150 if self.op.name_check:
7151 hostname = netutils.GetHostname(name=new_name)
7152 if hostname.name != new_name:
7153 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7155 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7156 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7157 " same as given hostname '%s'") %
7158 (hostname.name, self.op.new_name),
7160 new_name = self.op.new_name = hostname.name
7161 if (self.op.ip_check and
7162 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7163 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7164 (hostname.ip, new_name),
7165 errors.ECODE_NOTUNIQUE)
7167 instance_list = self.cfg.GetInstanceList()
7168 if new_name in instance_list and new_name != instance.name:
7169 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7170 new_name, errors.ECODE_EXISTS)
7172 def Exec(self, feedback_fn):
7173 """Rename the instance.
7176 inst = self.instance
7177 old_name = inst.name
7179 rename_file_storage = False
7180 if (inst.disk_template in constants.DTS_FILEBASED and
7181 self.op.new_name != inst.name):
7182 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7183 rename_file_storage = True
7185 self.cfg.RenameInstance(inst.name, self.op.new_name)
7186 # Change the instance lock. This is definitely safe while we hold the BGL.
7187 # Otherwise the new lock would have to be added in acquired mode.
7189 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7190 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7192 # re-read the instance from the configuration after rename
7193 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7195 if rename_file_storage:
7196 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7197 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7198 old_file_storage_dir,
7199 new_file_storage_dir)
7200 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7201 " (but the instance has been renamed in Ganeti)" %
7202 (inst.primary_node, old_file_storage_dir,
7203 new_file_storage_dir))
7205 _StartInstanceDisks(self, inst, None)
7207 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7208 old_name, self.op.debug_level)
7209 msg = result.fail_msg
7211 msg = ("Could not run OS rename script for instance %s on node %s"
7212 " (but the instance has been renamed in Ganeti): %s" %
7213 (inst.name, inst.primary_node, msg))
7214 self.proc.LogWarning(msg)
7216 _ShutdownInstanceDisks(self, inst)
7221 class LUInstanceRemove(LogicalUnit):
7222 """Remove an instance.
7225 HPATH = "instance-remove"
7226 HTYPE = constants.HTYPE_INSTANCE
7229 def ExpandNames(self):
7230 self._ExpandAndLockInstance()
7231 self.needed_locks[locking.LEVEL_NODE] = []
7232 self.needed_locks[locking.LEVEL_NODE_RES] = []
7233 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7235 def DeclareLocks(self, level):
7236 if level == locking.LEVEL_NODE:
7237 self._LockInstancesNodes()
7238 elif level == locking.LEVEL_NODE_RES:
7240 self.needed_locks[locking.LEVEL_NODE_RES] = \
7241 self.needed_locks[locking.LEVEL_NODE][:]
7243 def BuildHooksEnv(self):
7246 This runs on master, primary and secondary nodes of the instance.
7249 env = _BuildInstanceHookEnvByObject(self, self.instance)
7250 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7253 def BuildHooksNodes(self):
7254 """Build hooks nodes.
7257 nl = [self.cfg.GetMasterNode()]
7258 nl_post = list(self.instance.all_nodes) + nl
7259 return (nl, nl_post)
7261 def CheckPrereq(self):
7262 """Check prerequisites.
7264 This checks that the instance is in the cluster.
7267 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7268 assert self.instance is not None, \
7269 "Cannot retrieve locked instance %s" % self.op.instance_name
7271 def Exec(self, feedback_fn):
7272 """Remove the instance.
7275 instance = self.instance
7276 logging.info("Shutting down instance %s on node %s",
7277 instance.name, instance.primary_node)
7279 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7280 self.op.shutdown_timeout)
7281 msg = result.fail_msg
7283 if self.op.ignore_failures:
7284 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7286 raise errors.OpExecError("Could not shutdown instance %s on"
7288 (instance.name, instance.primary_node, msg))
7290 assert (self.owned_locks(locking.LEVEL_NODE) ==
7291 self.owned_locks(locking.LEVEL_NODE_RES))
7292 assert not (set(instance.all_nodes) -
7293 self.owned_locks(locking.LEVEL_NODE)), \
7294 "Not owning correct locks"
7296 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7299 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7300 """Utility function to remove an instance.
7303 logging.info("Removing block devices for instance %s", instance.name)
7305 if not _RemoveDisks(lu, instance):
7306 if not ignore_failures:
7307 raise errors.OpExecError("Can't remove instance's disks")
7308 feedback_fn("Warning: can't remove instance's disks")
7310 logging.info("Removing instance %s out of cluster config", instance.name)
7312 lu.cfg.RemoveInstance(instance.name)
7314 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7315 "Instance lock removal conflict"
7317 # Remove lock for the instance
7318 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7321 class LUInstanceQuery(NoHooksLU):
7322 """Logical unit for querying instances.
7325 # pylint: disable=W0142
7328 def CheckArguments(self):
7329 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7330 self.op.output_fields, self.op.use_locking)
7332 def ExpandNames(self):
7333 self.iq.ExpandNames(self)
7335 def DeclareLocks(self, level):
7336 self.iq.DeclareLocks(self, level)
7338 def Exec(self, feedback_fn):
7339 return self.iq.OldStyleQuery(self)
7342 class LUInstanceFailover(LogicalUnit):
7343 """Failover an instance.
7346 HPATH = "instance-failover"
7347 HTYPE = constants.HTYPE_INSTANCE
7350 def CheckArguments(self):
7351 """Check the arguments.
7354 self.iallocator = getattr(self.op, "iallocator", None)
7355 self.target_node = getattr(self.op, "target_node", None)
7357 def ExpandNames(self):
7358 self._ExpandAndLockInstance()
7360 if self.op.target_node is not None:
7361 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7363 self.needed_locks[locking.LEVEL_NODE] = []
7364 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7366 self.needed_locks[locking.LEVEL_NODE_RES] = []
7367 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7369 ignore_consistency = self.op.ignore_consistency
7370 shutdown_timeout = self.op.shutdown_timeout
7371 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7374 ignore_consistency=ignore_consistency,
7375 shutdown_timeout=shutdown_timeout,
7376 ignore_ipolicy=self.op.ignore_ipolicy)
7377 self.tasklets = [self._migrater]
7379 def DeclareLocks(self, level):
7380 if level == locking.LEVEL_NODE:
7381 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7382 if instance.disk_template in constants.DTS_EXT_MIRROR:
7383 if self.op.target_node is None:
7384 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7386 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7387 self.op.target_node]
7388 del self.recalculate_locks[locking.LEVEL_NODE]
7390 self._LockInstancesNodes()
7391 elif level == locking.LEVEL_NODE_RES:
7393 self.needed_locks[locking.LEVEL_NODE_RES] = \
7394 self.needed_locks[locking.LEVEL_NODE][:]
7396 def BuildHooksEnv(self):
7399 This runs on master, primary and secondary nodes of the instance.
7402 instance = self._migrater.instance
7403 source_node = instance.primary_node
7404 target_node = self.op.target_node
7406 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7407 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7408 "OLD_PRIMARY": source_node,
7409 "NEW_PRIMARY": target_node,
7412 if instance.disk_template in constants.DTS_INT_MIRROR:
7413 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7414 env["NEW_SECONDARY"] = source_node
7416 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7418 env.update(_BuildInstanceHookEnvByObject(self, instance))
7422 def BuildHooksNodes(self):
7423 """Build hooks nodes.
7426 instance = self._migrater.instance
7427 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7428 return (nl, nl + [instance.primary_node])
7431 class LUInstanceMigrate(LogicalUnit):
7432 """Migrate an instance.
7434 This is migration without shutting down, compared to the failover,
7435 which is done with shutdown.
7438 HPATH = "instance-migrate"
7439 HTYPE = constants.HTYPE_INSTANCE
7442 def ExpandNames(self):
7443 self._ExpandAndLockInstance()
7445 if self.op.target_node is not None:
7446 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7448 self.needed_locks[locking.LEVEL_NODE] = []
7449 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7451 self.needed_locks[locking.LEVEL_NODE] = []
7452 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7454 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7455 cleanup=self.op.cleanup,
7457 fallback=self.op.allow_failover,
7458 ignore_ipolicy=self.op.ignore_ipolicy)
7459 self.tasklets = [self._migrater]
7461 def DeclareLocks(self, level):
7462 if level == locking.LEVEL_NODE:
7463 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7464 if instance.disk_template in constants.DTS_EXT_MIRROR:
7465 if self.op.target_node is None:
7466 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7468 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7469 self.op.target_node]
7470 del self.recalculate_locks[locking.LEVEL_NODE]
7472 self._LockInstancesNodes()
7473 elif level == locking.LEVEL_NODE_RES:
7475 self.needed_locks[locking.LEVEL_NODE_RES] = \
7476 self.needed_locks[locking.LEVEL_NODE][:]
7478 def BuildHooksEnv(self):
7481 This runs on master, primary and secondary nodes of the instance.
7484 instance = self._migrater.instance
7485 source_node = instance.primary_node
7486 target_node = self.op.target_node
7487 env = _BuildInstanceHookEnvByObject(self, instance)
7489 "MIGRATE_LIVE": self._migrater.live,
7490 "MIGRATE_CLEANUP": self.op.cleanup,
7491 "OLD_PRIMARY": source_node,
7492 "NEW_PRIMARY": target_node,
7495 if instance.disk_template in constants.DTS_INT_MIRROR:
7496 env["OLD_SECONDARY"] = target_node
7497 env["NEW_SECONDARY"] = source_node
7499 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7503 def BuildHooksNodes(self):
7504 """Build hooks nodes.
7507 instance = self._migrater.instance
7508 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7509 return (nl, nl + [instance.primary_node])
7512 class LUInstanceMove(LogicalUnit):
7513 """Move an instance by data-copying.
7516 HPATH = "instance-move"
7517 HTYPE = constants.HTYPE_INSTANCE
7520 def ExpandNames(self):
7521 self._ExpandAndLockInstance()
7522 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7523 self.op.target_node = target_node
7524 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7525 self.needed_locks[locking.LEVEL_NODE_RES] = []
7526 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7528 def DeclareLocks(self, level):
7529 if level == locking.LEVEL_NODE:
7530 self._LockInstancesNodes(primary_only=True)
7531 elif level == locking.LEVEL_NODE_RES:
7533 self.needed_locks[locking.LEVEL_NODE_RES] = \
7534 self.needed_locks[locking.LEVEL_NODE][:]
7536 def BuildHooksEnv(self):
7539 This runs on master, primary and secondary nodes of the instance.
7543 "TARGET_NODE": self.op.target_node,
7544 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7546 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7549 def BuildHooksNodes(self):
7550 """Build hooks nodes.
7554 self.cfg.GetMasterNode(),
7555 self.instance.primary_node,
7556 self.op.target_node,
7560 def CheckPrereq(self):
7561 """Check prerequisites.
7563 This checks that the instance is in the cluster.
7566 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7567 assert self.instance is not None, \
7568 "Cannot retrieve locked instance %s" % self.op.instance_name
7570 node = self.cfg.GetNodeInfo(self.op.target_node)
7571 assert node is not None, \
7572 "Cannot retrieve locked node %s" % self.op.target_node
7574 self.target_node = target_node = node.name
7576 if target_node == instance.primary_node:
7577 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7578 (instance.name, target_node),
7581 bep = self.cfg.GetClusterInfo().FillBE(instance)
7583 for idx, dsk in enumerate(instance.disks):
7584 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7585 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7586 " cannot copy" % idx, errors.ECODE_STATE)
7588 _CheckNodeOnline(self, target_node)
7589 _CheckNodeNotDrained(self, target_node)
7590 _CheckNodeVmCapable(self, target_node)
7591 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7592 self.cfg.GetNodeGroup(node.group))
7593 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7594 ignore=self.op.ignore_ipolicy)
7596 if instance.admin_state == constants.ADMINST_UP:
7597 # check memory requirements on the secondary node
7598 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7599 instance.name, bep[constants.BE_MAXMEM],
7600 instance.hypervisor)
7602 self.LogInfo("Not checking memory on the secondary node as"
7603 " instance will not be started")
7605 # check bridge existance
7606 _CheckInstanceBridgesExist(self, instance, node=target_node)
7608 def Exec(self, feedback_fn):
7609 """Move an instance.
7611 The move is done by shutting it down on its present node, copying
7612 the data over (slow) and starting it on the new node.
7615 instance = self.instance
7617 source_node = instance.primary_node
7618 target_node = self.target_node
7620 self.LogInfo("Shutting down instance %s on source node %s",
7621 instance.name, source_node)
7623 assert (self.owned_locks(locking.LEVEL_NODE) ==
7624 self.owned_locks(locking.LEVEL_NODE_RES))
7626 result = self.rpc.call_instance_shutdown(source_node, instance,
7627 self.op.shutdown_timeout)
7628 msg = result.fail_msg
7630 if self.op.ignore_consistency:
7631 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7632 " Proceeding anyway. Please make sure node"
7633 " %s is down. Error details: %s",
7634 instance.name, source_node, source_node, msg)
7636 raise errors.OpExecError("Could not shutdown instance %s on"
7638 (instance.name, source_node, msg))
7640 # create the target disks
7642 _CreateDisks(self, instance, target_node=target_node)
7643 except errors.OpExecError:
7644 self.LogWarning("Device creation failed, reverting...")
7646 _RemoveDisks(self, instance, target_node=target_node)
7648 self.cfg.ReleaseDRBDMinors(instance.name)
7651 cluster_name = self.cfg.GetClusterInfo().cluster_name
7654 # activate, get path, copy the data over
7655 for idx, disk in enumerate(instance.disks):
7656 self.LogInfo("Copying data for disk %d", idx)
7657 result = self.rpc.call_blockdev_assemble(target_node, disk,
7658 instance.name, True, idx)
7660 self.LogWarning("Can't assemble newly created disk %d: %s",
7661 idx, result.fail_msg)
7662 errs.append(result.fail_msg)
7664 dev_path = result.payload
7665 result = self.rpc.call_blockdev_export(source_node, disk,
7666 target_node, dev_path,
7669 self.LogWarning("Can't copy data over for disk %d: %s",
7670 idx, result.fail_msg)
7671 errs.append(result.fail_msg)
7675 self.LogWarning("Some disks failed to copy, aborting")
7677 _RemoveDisks(self, instance, target_node=target_node)
7679 self.cfg.ReleaseDRBDMinors(instance.name)
7680 raise errors.OpExecError("Errors during disk copy: %s" %
7683 instance.primary_node = target_node
7684 self.cfg.Update(instance, feedback_fn)
7686 self.LogInfo("Removing the disks on the original node")
7687 _RemoveDisks(self, instance, target_node=source_node)
7689 # Only start the instance if it's marked as up
7690 if instance.admin_state == constants.ADMINST_UP:
7691 self.LogInfo("Starting instance %s on node %s",
7692 instance.name, target_node)
7694 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7695 ignore_secondaries=True)
7697 _ShutdownInstanceDisks(self, instance)
7698 raise errors.OpExecError("Can't activate the instance's disks")
7700 result = self.rpc.call_instance_start(target_node,
7701 (instance, None, None), False)
7702 msg = result.fail_msg
7704 _ShutdownInstanceDisks(self, instance)
7705 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7706 (instance.name, target_node, msg))
7709 class LUNodeMigrate(LogicalUnit):
7710 """Migrate all instances from a node.
7713 HPATH = "node-migrate"
7714 HTYPE = constants.HTYPE_NODE
7717 def CheckArguments(self):
7720 def ExpandNames(self):
7721 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7723 self.share_locks = _ShareAll()
7724 self.needed_locks = {
7725 locking.LEVEL_NODE: [self.op.node_name],
7728 def BuildHooksEnv(self):
7731 This runs on the master, the primary and all the secondaries.
7735 "NODE_NAME": self.op.node_name,
7738 def BuildHooksNodes(self):
7739 """Build hooks nodes.
7742 nl = [self.cfg.GetMasterNode()]
7745 def CheckPrereq(self):
7748 def Exec(self, feedback_fn):
7749 # Prepare jobs for migration instances
7751 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7754 iallocator=self.op.iallocator,
7755 target_node=self.op.target_node,
7756 ignore_ipolicy=self.op.ignore_ipolicy)]
7757 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7760 # TODO: Run iallocator in this opcode and pass correct placement options to
7761 # OpInstanceMigrate. Since other jobs can modify the cluster between
7762 # running the iallocator and the actual migration, a good consistency model
7763 # will have to be found.
7765 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7766 frozenset([self.op.node_name]))
7768 return ResultWithJobs(jobs)
7771 class TLMigrateInstance(Tasklet):
7772 """Tasklet class for instance migration.
7775 @ivar live: whether the migration will be done live or non-live;
7776 this variable is initalized only after CheckPrereq has run
7777 @type cleanup: boolean
7778 @ivar cleanup: Wheater we cleanup from a failed migration
7779 @type iallocator: string
7780 @ivar iallocator: The iallocator used to determine target_node
7781 @type target_node: string
7782 @ivar target_node: If given, the target_node to reallocate the instance to
7783 @type failover: boolean
7784 @ivar failover: Whether operation results in failover or migration
7785 @type fallback: boolean
7786 @ivar fallback: Whether fallback to failover is allowed if migration not
7788 @type ignore_consistency: boolean
7789 @ivar ignore_consistency: Wheter we should ignore consistency between source
7791 @type shutdown_timeout: int
7792 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7793 @type ignore_ipolicy: bool
7794 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7799 _MIGRATION_POLL_INTERVAL = 1 # seconds
7800 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7802 def __init__(self, lu, instance_name, cleanup=False,
7803 failover=False, fallback=False,
7804 ignore_consistency=False,
7805 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7806 ignore_ipolicy=False):
7807 """Initializes this class.
7810 Tasklet.__init__(self, lu)
7813 self.instance_name = instance_name
7814 self.cleanup = cleanup
7815 self.live = False # will be overridden later
7816 self.failover = failover
7817 self.fallback = fallback
7818 self.ignore_consistency = ignore_consistency
7819 self.shutdown_timeout = shutdown_timeout
7820 self.ignore_ipolicy = ignore_ipolicy
7822 def CheckPrereq(self):
7823 """Check prerequisites.
7825 This checks that the instance is in the cluster.
7828 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7829 instance = self.cfg.GetInstanceInfo(instance_name)
7830 assert instance is not None
7831 self.instance = instance
7832 cluster = self.cfg.GetClusterInfo()
7834 if (not self.cleanup and
7835 not instance.admin_state == constants.ADMINST_UP and
7836 not self.failover and self.fallback):
7837 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7838 " switching to failover")
7839 self.failover = True
7841 if instance.disk_template not in constants.DTS_MIRRORED:
7846 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7847 " %s" % (instance.disk_template, text),
7850 if instance.disk_template in constants.DTS_EXT_MIRROR:
7851 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7853 if self.lu.op.iallocator:
7854 self._RunAllocator()
7856 # We set set self.target_node as it is required by
7858 self.target_node = self.lu.op.target_node
7860 # Check that the target node is correct in terms of instance policy
7861 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
7862 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7863 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7864 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7865 ignore=self.ignore_ipolicy)
7867 # self.target_node is already populated, either directly or by the
7869 target_node = self.target_node
7870 if self.target_node == instance.primary_node:
7871 raise errors.OpPrereqError("Cannot migrate instance %s"
7872 " to its primary (%s)" %
7873 (instance.name, instance.primary_node))
7875 if len(self.lu.tasklets) == 1:
7876 # It is safe to release locks only when we're the only tasklet
7878 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7879 keep=[instance.primary_node, self.target_node])
7882 secondary_nodes = instance.secondary_nodes
7883 if not secondary_nodes:
7884 raise errors.ConfigurationError("No secondary node but using"
7885 " %s disk template" %
7886 instance.disk_template)
7887 target_node = secondary_nodes[0]
7888 if self.lu.op.iallocator or (self.lu.op.target_node and
7889 self.lu.op.target_node != target_node):
7891 text = "failed over"
7894 raise errors.OpPrereqError("Instances with disk template %s cannot"
7895 " be %s to arbitrary nodes"
7896 " (neither an iallocator nor a target"
7897 " node can be passed)" %
7898 (instance.disk_template, text),
7900 nodeinfo = self.cfg.GetNodeInfo(target_node)
7901 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7902 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7903 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7904 ignore=self.ignore_ipolicy)
7906 i_be = cluster.FillBE(instance)
7908 # check memory requirements on the secondary node
7909 if (not self.cleanup and
7910 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
7911 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
7912 "migrating instance %s" %
7914 i_be[constants.BE_MINMEM],
7915 instance.hypervisor)
7917 self.lu.LogInfo("Not checking memory on the secondary node as"
7918 " instance will not be started")
7920 # check if failover must be forced instead of migration
7921 if (not self.cleanup and not self.failover and
7922 i_be[constants.BE_ALWAYS_FAILOVER]):
7924 self.lu.LogInfo("Instance configured to always failover; fallback"
7926 self.failover = True
7928 raise errors.OpPrereqError("This instance has been configured to"
7929 " always failover, please allow failover",
7932 # check bridge existance
7933 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7935 if not self.cleanup:
7936 _CheckNodeNotDrained(self.lu, target_node)
7937 if not self.failover:
7938 result = self.rpc.call_instance_migratable(instance.primary_node,
7940 if result.fail_msg and self.fallback:
7941 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7943 self.failover = True
7945 result.Raise("Can't migrate, please use failover",
7946 prereq=True, ecode=errors.ECODE_STATE)
7948 assert not (self.failover and self.cleanup)
7950 if not self.failover:
7951 if self.lu.op.live is not None and self.lu.op.mode is not None:
7952 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7953 " parameters are accepted",
7955 if self.lu.op.live is not None:
7957 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7959 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7960 # reset the 'live' parameter to None so that repeated
7961 # invocations of CheckPrereq do not raise an exception
7962 self.lu.op.live = None
7963 elif self.lu.op.mode is None:
7964 # read the default value from the hypervisor
7965 i_hv = cluster.FillHV(self.instance, skip_globals=False)
7966 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7968 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7970 # Failover is never live
7973 if not (self.failover or self.cleanup):
7974 remote_info = self.rpc.call_instance_info(instance.primary_node,
7976 instance.hypervisor)
7977 remote_info.Raise("Error checking instance on node %s" %
7978 instance.primary_node)
7979 instance_running = bool(remote_info.payload)
7980 if instance_running:
7981 self.current_mem = int(remote_info.payload["memory"])
7983 def _RunAllocator(self):
7984 """Run the allocator based on input opcode.
7987 # FIXME: add a self.ignore_ipolicy option
7988 ial = IAllocator(self.cfg, self.rpc,
7989 mode=constants.IALLOCATOR_MODE_RELOC,
7990 name=self.instance_name,
7991 # TODO See why hail breaks with a single node below
7992 relocate_from=[self.instance.primary_node,
7993 self.instance.primary_node],
7996 ial.Run(self.lu.op.iallocator)
7999 raise errors.OpPrereqError("Can't compute nodes using"
8000 " iallocator '%s': %s" %
8001 (self.lu.op.iallocator, ial.info),
8003 if len(ial.result) != ial.required_nodes:
8004 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8005 " of nodes (%s), required %s" %
8006 (self.lu.op.iallocator, len(ial.result),
8007 ial.required_nodes), errors.ECODE_FAULT)
8008 self.target_node = ial.result[0]
8009 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8010 self.instance_name, self.lu.op.iallocator,
8011 utils.CommaJoin(ial.result))
8013 def _WaitUntilSync(self):
8014 """Poll with custom rpc for disk sync.
8016 This uses our own step-based rpc call.
8019 self.feedback_fn("* wait until resync is done")
8023 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8025 self.instance.disks)
8027 for node, nres in result.items():
8028 nres.Raise("Cannot resync disks on node %s" % node)
8029 node_done, node_percent = nres.payload
8030 all_done = all_done and node_done
8031 if node_percent is not None:
8032 min_percent = min(min_percent, node_percent)
8034 if min_percent < 100:
8035 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8038 def _EnsureSecondary(self, node):
8039 """Demote a node to secondary.
8042 self.feedback_fn("* switching node %s to secondary mode" % node)
8044 for dev in self.instance.disks:
8045 self.cfg.SetDiskID(dev, node)
8047 result = self.rpc.call_blockdev_close(node, self.instance.name,
8048 self.instance.disks)
8049 result.Raise("Cannot change disk to secondary on node %s" % node)
8051 def _GoStandalone(self):
8052 """Disconnect from the network.
8055 self.feedback_fn("* changing into standalone mode")
8056 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8057 self.instance.disks)
8058 for node, nres in result.items():
8059 nres.Raise("Cannot disconnect disks node %s" % node)
8061 def _GoReconnect(self, multimaster):
8062 """Reconnect to the network.
8068 msg = "single-master"
8069 self.feedback_fn("* changing disks into %s mode" % msg)
8070 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8071 self.instance.disks,
8072 self.instance.name, multimaster)
8073 for node, nres in result.items():
8074 nres.Raise("Cannot change disks config on node %s" % node)
8076 def _ExecCleanup(self):
8077 """Try to cleanup after a failed migration.
8079 The cleanup is done by:
8080 - check that the instance is running only on one node
8081 (and update the config if needed)
8082 - change disks on its secondary node to secondary
8083 - wait until disks are fully synchronized
8084 - disconnect from the network
8085 - change disks into single-master mode
8086 - wait again until disks are fully synchronized
8089 instance = self.instance
8090 target_node = self.target_node
8091 source_node = self.source_node
8093 # check running on only one node
8094 self.feedback_fn("* checking where the instance actually runs"
8095 " (if this hangs, the hypervisor might be in"
8097 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8098 for node, result in ins_l.items():
8099 result.Raise("Can't contact node %s" % node)
8101 runningon_source = instance.name in ins_l[source_node].payload
8102 runningon_target = instance.name in ins_l[target_node].payload
8104 if runningon_source and runningon_target:
8105 raise errors.OpExecError("Instance seems to be running on two nodes,"
8106 " or the hypervisor is confused; you will have"
8107 " to ensure manually that it runs only on one"
8108 " and restart this operation")
8110 if not (runningon_source or runningon_target):
8111 raise errors.OpExecError("Instance does not seem to be running at all;"
8112 " in this case it's safer to repair by"
8113 " running 'gnt-instance stop' to ensure disk"
8114 " shutdown, and then restarting it")
8116 if runningon_target:
8117 # the migration has actually succeeded, we need to update the config
8118 self.feedback_fn("* instance running on secondary node (%s),"
8119 " updating config" % target_node)
8120 instance.primary_node = target_node
8121 self.cfg.Update(instance, self.feedback_fn)
8122 demoted_node = source_node
8124 self.feedback_fn("* instance confirmed to be running on its"
8125 " primary node (%s)" % source_node)
8126 demoted_node = target_node
8128 if instance.disk_template in constants.DTS_INT_MIRROR:
8129 self._EnsureSecondary(demoted_node)
8131 self._WaitUntilSync()
8132 except errors.OpExecError:
8133 # we ignore here errors, since if the device is standalone, it
8134 # won't be able to sync
8136 self._GoStandalone()
8137 self._GoReconnect(False)
8138 self._WaitUntilSync()
8140 self.feedback_fn("* done")
8142 def _RevertDiskStatus(self):
8143 """Try to revert the disk status after a failed migration.
8146 target_node = self.target_node
8147 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8151 self._EnsureSecondary(target_node)
8152 self._GoStandalone()
8153 self._GoReconnect(False)
8154 self._WaitUntilSync()
8155 except errors.OpExecError, err:
8156 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8157 " please try to recover the instance manually;"
8158 " error '%s'" % str(err))
8160 def _AbortMigration(self):
8161 """Call the hypervisor code to abort a started migration.
8164 instance = self.instance
8165 target_node = self.target_node
8166 source_node = self.source_node
8167 migration_info = self.migration_info
8169 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8173 abort_msg = abort_result.fail_msg
8175 logging.error("Aborting migration failed on target node %s: %s",
8176 target_node, abort_msg)
8177 # Don't raise an exception here, as we stil have to try to revert the
8178 # disk status, even if this step failed.
8180 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8181 instance, False, self.live)
8182 abort_msg = abort_result.fail_msg
8184 logging.error("Aborting migration failed on source node %s: %s",
8185 source_node, abort_msg)
8187 def _ExecMigration(self):
8188 """Migrate an instance.
8190 The migrate is done by:
8191 - change the disks into dual-master mode
8192 - wait until disks are fully synchronized again
8193 - migrate the instance
8194 - change disks on the new secondary node (the old primary) to secondary
8195 - wait until disks are fully synchronized
8196 - change disks into single-master mode
8199 instance = self.instance
8200 target_node = self.target_node
8201 source_node = self.source_node
8203 # Check for hypervisor version mismatch and warn the user.
8204 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8205 None, [self.instance.hypervisor])
8206 for ninfo in nodeinfo.values():
8207 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8209 (_, _, (src_info, )) = nodeinfo[source_node].payload
8210 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8212 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8213 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8214 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8215 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8216 if src_version != dst_version:
8217 self.feedback_fn("* warning: hypervisor version mismatch between"
8218 " source (%s) and target (%s) node" %
8219 (src_version, dst_version))
8221 self.feedback_fn("* checking disk consistency between source and target")
8222 for dev in instance.disks:
8223 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8224 raise errors.OpExecError("Disk %s is degraded or not fully"
8225 " synchronized on target node,"
8226 " aborting migration" % dev.iv_name)
8228 if self.current_mem > self.tgt_free_mem:
8229 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8230 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8233 rpcres.Raise("Cannot modify instance runtime memory")
8235 # First get the migration information from the remote node
8236 result = self.rpc.call_migration_info(source_node, instance)
8237 msg = result.fail_msg
8239 log_err = ("Failed fetching source migration information from %s: %s" %
8241 logging.error(log_err)
8242 raise errors.OpExecError(log_err)
8244 self.migration_info = migration_info = result.payload
8246 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8247 # Then switch the disks to master/master mode
8248 self._EnsureSecondary(target_node)
8249 self._GoStandalone()
8250 self._GoReconnect(True)
8251 self._WaitUntilSync()
8253 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8254 result = self.rpc.call_accept_instance(target_node,
8257 self.nodes_ip[target_node])
8259 msg = result.fail_msg
8261 logging.error("Instance pre-migration failed, trying to revert"
8262 " disk status: %s", msg)
8263 self.feedback_fn("Pre-migration failed, aborting")
8264 self._AbortMigration()
8265 self._RevertDiskStatus()
8266 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8267 (instance.name, msg))
8269 self.feedback_fn("* migrating instance to %s" % target_node)
8270 result = self.rpc.call_instance_migrate(source_node, instance,
8271 self.nodes_ip[target_node],
8273 msg = result.fail_msg
8275 logging.error("Instance migration failed, trying to revert"
8276 " disk status: %s", msg)
8277 self.feedback_fn("Migration failed, aborting")
8278 self._AbortMigration()
8279 self._RevertDiskStatus()
8280 raise errors.OpExecError("Could not migrate instance %s: %s" %
8281 (instance.name, msg))
8283 self.feedback_fn("* starting memory transfer")
8284 last_feedback = time.time()
8286 result = self.rpc.call_instance_get_migration_status(source_node,
8288 msg = result.fail_msg
8289 ms = result.payload # MigrationStatus instance
8290 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8291 logging.error("Instance migration failed, trying to revert"
8292 " disk status: %s", msg)
8293 self.feedback_fn("Migration failed, aborting")
8294 self._AbortMigration()
8295 self._RevertDiskStatus()
8296 raise errors.OpExecError("Could not migrate instance %s: %s" %
8297 (instance.name, msg))
8299 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8300 self.feedback_fn("* memory transfer complete")
8303 if (utils.TimeoutExpired(last_feedback,
8304 self._MIGRATION_FEEDBACK_INTERVAL) and
8305 ms.transferred_ram is not None):
8306 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8307 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8308 last_feedback = time.time()
8310 time.sleep(self._MIGRATION_POLL_INTERVAL)
8312 result = self.rpc.call_instance_finalize_migration_src(source_node,
8316 msg = result.fail_msg
8318 logging.error("Instance migration succeeded, but finalization failed"
8319 " on the source node: %s", msg)
8320 raise errors.OpExecError("Could not finalize instance migration: %s" %
8323 instance.primary_node = target_node
8325 # distribute new instance config to the other nodes
8326 self.cfg.Update(instance, self.feedback_fn)
8328 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8332 msg = result.fail_msg
8334 logging.error("Instance migration succeeded, but finalization failed"
8335 " on the target node: %s", msg)
8336 raise errors.OpExecError("Could not finalize instance migration: %s" %
8339 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8340 self._EnsureSecondary(source_node)
8341 self._WaitUntilSync()
8342 self._GoStandalone()
8343 self._GoReconnect(False)
8344 self._WaitUntilSync()
8346 # If the instance's disk template is `rbd' and there was a successful
8347 # migration, unmap the device from the source node.
8348 if self.instance.disk_template == constants.DT_RBD:
8349 disks = _ExpandCheckDisks(instance, instance.disks)
8350 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8352 result = self.rpc.call_blockdev_shutdown(source_node, disk)
8353 msg = result.fail_msg
8355 logging.error("Migration was successful, but couldn't unmap the"
8356 " block device %s on source node %s: %s",
8357 disk.iv_name, source_node, msg)
8358 logging.error("You need to unmap the device %s manually on %s",
8359 disk.iv_name, source_node)
8361 self.feedback_fn("* done")
8363 def _ExecFailover(self):
8364 """Failover an instance.
8366 The failover is done by shutting it down on its present node and
8367 starting it on the secondary.
8370 instance = self.instance
8371 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8373 source_node = instance.primary_node
8374 target_node = self.target_node
8376 if instance.admin_state == constants.ADMINST_UP:
8377 self.feedback_fn("* checking disk consistency between source and target")
8378 for dev in instance.disks:
8379 # for drbd, these are drbd over lvm
8380 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8381 if primary_node.offline:
8382 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8384 (primary_node.name, dev.iv_name, target_node))
8385 elif not self.ignore_consistency:
8386 raise errors.OpExecError("Disk %s is degraded on target node,"
8387 " aborting failover" % dev.iv_name)
8389 self.feedback_fn("* not checking disk consistency as instance is not"
8392 self.feedback_fn("* shutting down instance on source node")
8393 logging.info("Shutting down instance %s on node %s",
8394 instance.name, source_node)
8396 result = self.rpc.call_instance_shutdown(source_node, instance,
8397 self.shutdown_timeout)
8398 msg = result.fail_msg
8400 if self.ignore_consistency or primary_node.offline:
8401 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8402 " proceeding anyway; please make sure node"
8403 " %s is down; error details: %s",
8404 instance.name, source_node, source_node, msg)
8406 raise errors.OpExecError("Could not shutdown instance %s on"
8408 (instance.name, source_node, msg))
8410 self.feedback_fn("* deactivating the instance's disks on source node")
8411 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8412 raise errors.OpExecError("Can't shut down the instance's disks")
8414 instance.primary_node = target_node
8415 # distribute new instance config to the other nodes
8416 self.cfg.Update(instance, self.feedback_fn)
8418 # Only start the instance if it's marked as up
8419 if instance.admin_state == constants.ADMINST_UP:
8420 self.feedback_fn("* activating the instance's disks on target node %s" %
8422 logging.info("Starting instance %s on node %s",
8423 instance.name, target_node)
8425 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8426 ignore_secondaries=True)
8428 _ShutdownInstanceDisks(self.lu, instance)
8429 raise errors.OpExecError("Can't activate the instance's disks")
8431 self.feedback_fn("* starting the instance on the target node %s" %
8433 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8435 msg = result.fail_msg
8437 _ShutdownInstanceDisks(self.lu, instance)
8438 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8439 (instance.name, target_node, msg))
8441 def Exec(self, feedback_fn):
8442 """Perform the migration.
8445 self.feedback_fn = feedback_fn
8446 self.source_node = self.instance.primary_node
8448 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8449 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8450 self.target_node = self.instance.secondary_nodes[0]
8451 # Otherwise self.target_node has been populated either
8452 # directly, or through an iallocator.
8454 self.all_nodes = [self.source_node, self.target_node]
8455 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8456 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8459 feedback_fn("Failover instance %s" % self.instance.name)
8460 self._ExecFailover()
8462 feedback_fn("Migrating instance %s" % self.instance.name)
8465 return self._ExecCleanup()
8467 return self._ExecMigration()
8470 def _CreateBlockDev(lu, node, instance, device, force_create,
8472 """Create a tree of block devices on a given node.
8474 If this device type has to be created on secondaries, create it and
8477 If not, just recurse to children keeping the same 'force' value.
8479 @param lu: the lu on whose behalf we execute
8480 @param node: the node on which to create the device
8481 @type instance: L{objects.Instance}
8482 @param instance: the instance which owns the device
8483 @type device: L{objects.Disk}
8484 @param device: the device to create
8485 @type force_create: boolean
8486 @param force_create: whether to force creation of this device; this
8487 will be change to True whenever we find a device which has
8488 CreateOnSecondary() attribute
8489 @param info: the extra 'metadata' we should attach to the device
8490 (this will be represented as a LVM tag)
8491 @type force_open: boolean
8492 @param force_open: this parameter will be passes to the
8493 L{backend.BlockdevCreate} function where it specifies
8494 whether we run on primary or not, and it affects both
8495 the child assembly and the device own Open() execution
8498 if device.CreateOnSecondary():
8502 for child in device.children:
8503 _CreateBlockDev(lu, node, instance, child, force_create,
8506 if not force_create:
8509 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8512 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8513 """Create a single block device on a given node.
8515 This will not recurse over children of the device, so they must be
8518 @param lu: the lu on whose behalf we execute
8519 @param node: the node on which to create the device
8520 @type instance: L{objects.Instance}
8521 @param instance: the instance which owns the device
8522 @type device: L{objects.Disk}
8523 @param device: the device to create
8524 @param info: the extra 'metadata' we should attach to the device
8525 (this will be represented as a LVM tag)
8526 @type force_open: boolean
8527 @param force_open: this parameter will be passes to the
8528 L{backend.BlockdevCreate} function where it specifies
8529 whether we run on primary or not, and it affects both
8530 the child assembly and the device own Open() execution
8533 lu.cfg.SetDiskID(device, node)
8534 result = lu.rpc.call_blockdev_create(node, device, device.size,
8535 instance.name, force_open, info)
8536 result.Raise("Can't create block device %s on"
8537 " node %s for instance %s" % (device, node, instance.name))
8538 if device.physical_id is None:
8539 device.physical_id = result.payload
8542 def _GenerateUniqueNames(lu, exts):
8543 """Generate a suitable LV name.
8545 This will generate a logical volume name for the given instance.
8550 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8551 results.append("%s%s" % (new_id, val))
8555 def _ComputeLDParams(disk_template, disk_params):
8556 """Computes Logical Disk parameters from Disk Template parameters.
8558 @type disk_template: string
8559 @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8560 @type disk_params: dict
8561 @param disk_params: disk template parameters; dict(template_name -> parameters
8563 @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8564 contains the LD parameters of the node. The tree is flattened in-order.
8567 if disk_template not in constants.DISK_TEMPLATES:
8568 raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8571 dt_params = disk_params[disk_template]
8572 if disk_template == constants.DT_DRBD8:
8574 constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8575 constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8576 constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8577 constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8578 constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8579 constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8580 constants.LDP_DYNAMIC_RESYNC: dt_params[constants.DRBD_DYNAMIC_RESYNC],
8581 constants.LDP_PLAN_AHEAD: dt_params[constants.DRBD_PLAN_AHEAD],
8582 constants.LDP_FILL_TARGET: dt_params[constants.DRBD_FILL_TARGET],
8583 constants.LDP_DELAY_TARGET: dt_params[constants.DRBD_DELAY_TARGET],
8584 constants.LDP_MAX_RATE: dt_params[constants.DRBD_MAX_RATE],
8585 constants.LDP_MIN_RATE: dt_params[constants.DRBD_MIN_RATE],
8589 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8592 result.append(drbd_params)
8596 constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8599 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8601 result.append(data_params)
8605 constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8608 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8610 result.append(meta_params)
8612 elif (disk_template == constants.DT_FILE or
8613 disk_template == constants.DT_SHARED_FILE):
8614 result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8616 elif disk_template == constants.DT_PLAIN:
8618 constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8621 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8623 result.append(params)
8625 elif disk_template == constants.DT_BLOCK:
8626 result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8628 elif disk_template == constants.DT_RBD:
8630 constants.LDP_POOL: dt_params[constants.RBD_POOL]
8633 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_RBD],
8635 result.append(params)
8640 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8641 iv_name, p_minor, s_minor, drbd_params, data_params,
8643 """Generate a drbd8 device complete with its children.
8646 assert len(vgnames) == len(names) == 2
8647 port = lu.cfg.AllocatePort()
8648 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8650 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8651 logical_id=(vgnames[0], names[0]),
8653 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8654 logical_id=(vgnames[1], names[1]),
8656 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8657 logical_id=(primary, secondary, port,
8660 children=[dev_data, dev_meta],
8661 iv_name=iv_name, params=drbd_params)
8665 def _GenerateDiskTemplate(lu, template_name,
8666 instance_name, primary_node,
8667 secondary_nodes, disk_info,
8668 file_storage_dir, file_driver,
8669 base_index, feedback_fn, disk_params):
8670 """Generate the entire disk layout for a given template type.
8673 #TODO: compute space requirements
8675 vgname = lu.cfg.GetVGName()
8676 disk_count = len(disk_info)
8678 ld_params = _ComputeLDParams(template_name, disk_params)
8679 if template_name == constants.DT_DISKLESS:
8681 elif template_name == constants.DT_PLAIN:
8683 raise errors.ProgrammerError("Wrong template configuration")
8685 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8686 for i in range(disk_count)])
8687 for idx, disk in enumerate(disk_info):
8688 disk_index = idx + base_index
8689 vg = disk.get(constants.IDISK_VG, vgname)
8690 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8691 disk_dev = objects.Disk(dev_type=constants.LD_LV,
8692 size=disk[constants.IDISK_SIZE],
8693 logical_id=(vg, names[idx]),
8694 iv_name="disk/%d" % disk_index,
8695 mode=disk[constants.IDISK_MODE],
8696 params=ld_params[0])
8697 disks.append(disk_dev)
8698 elif template_name == constants.DT_DRBD8:
8699 drbd_params, data_params, meta_params = ld_params
8700 if len(secondary_nodes) != 1:
8701 raise errors.ProgrammerError("Wrong template configuration")
8702 remote_node = secondary_nodes[0]
8703 minors = lu.cfg.AllocateDRBDMinor(
8704 [primary_node, remote_node] * len(disk_info), instance_name)
8707 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8708 for i in range(disk_count)]):
8709 names.append(lv_prefix + "_data")
8710 names.append(lv_prefix + "_meta")
8711 for idx, disk in enumerate(disk_info):
8712 disk_index = idx + base_index
8713 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8714 data_vg = disk.get(constants.IDISK_VG, vgname)
8715 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8716 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8717 disk[constants.IDISK_SIZE],
8719 names[idx * 2:idx * 2 + 2],
8720 "disk/%d" % disk_index,
8721 minors[idx * 2], minors[idx * 2 + 1],
8722 drbd_params, data_params, meta_params)
8723 disk_dev.mode = disk[constants.IDISK_MODE]
8724 disks.append(disk_dev)
8725 elif template_name == constants.DT_FILE:
8727 raise errors.ProgrammerError("Wrong template configuration")
8729 opcodes.RequireFileStorage()
8731 for idx, disk in enumerate(disk_info):
8732 disk_index = idx + base_index
8733 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8734 size=disk[constants.IDISK_SIZE],
8735 iv_name="disk/%d" % disk_index,
8736 logical_id=(file_driver,
8737 "%s/disk%d" % (file_storage_dir,
8739 mode=disk[constants.IDISK_MODE],
8740 params=ld_params[0])
8741 disks.append(disk_dev)
8742 elif template_name == constants.DT_SHARED_FILE:
8744 raise errors.ProgrammerError("Wrong template configuration")
8746 opcodes.RequireSharedFileStorage()
8748 for idx, disk in enumerate(disk_info):
8749 disk_index = idx + base_index
8750 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8751 size=disk[constants.IDISK_SIZE],
8752 iv_name="disk/%d" % disk_index,
8753 logical_id=(file_driver,
8754 "%s/disk%d" % (file_storage_dir,
8756 mode=disk[constants.IDISK_MODE],
8757 params=ld_params[0])
8758 disks.append(disk_dev)
8759 elif template_name == constants.DT_BLOCK:
8761 raise errors.ProgrammerError("Wrong template configuration")
8763 for idx, disk in enumerate(disk_info):
8764 disk_index = idx + base_index
8765 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8766 size=disk[constants.IDISK_SIZE],
8767 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8768 disk[constants.IDISK_ADOPT]),
8769 iv_name="disk/%d" % disk_index,
8770 mode=disk[constants.IDISK_MODE],
8771 params=ld_params[0])
8772 disks.append(disk_dev)
8773 elif template_name == constants.DT_RBD:
8775 raise errors.ProgrammerError("Wrong template configuration")
8777 names = _GenerateUniqueNames(lu, [".rbd.disk%d" % (base_index + i)
8778 for i in range(disk_count)])
8780 for idx, disk in enumerate(disk_info):
8781 disk_index = idx + base_index
8782 disk_dev = objects.Disk(dev_type=constants.LD_RBD,
8783 size=disk[constants.IDISK_SIZE],
8784 logical_id=("rbd", names[idx]),
8785 iv_name="disk/%d" % disk_index,
8786 mode=disk[constants.IDISK_MODE],
8787 params=ld_params[0])
8788 disks.append(disk_dev)
8791 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8795 def _GetInstanceInfoText(instance):
8796 """Compute that text that should be added to the disk's metadata.
8799 return "originstname+%s" % instance.name
8802 def _CalcEta(time_taken, written, total_size):
8803 """Calculates the ETA based on size written and total size.
8805 @param time_taken: The time taken so far
8806 @param written: amount written so far
8807 @param total_size: The total size of data to be written
8808 @return: The remaining time in seconds
8811 avg_time = time_taken / float(written)
8812 return (total_size - written) * avg_time
8815 def _WipeDisks(lu, instance):
8816 """Wipes instance disks.
8818 @type lu: L{LogicalUnit}
8819 @param lu: the logical unit on whose behalf we execute
8820 @type instance: L{objects.Instance}
8821 @param instance: the instance whose disks we should create
8822 @return: the success of the wipe
8825 node = instance.primary_node
8827 for device in instance.disks:
8828 lu.cfg.SetDiskID(device, node)
8830 logging.info("Pause sync of instance %s disks", instance.name)
8831 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8833 for idx, success in enumerate(result.payload):
8835 logging.warn("pause-sync of instance %s for disks %d failed",
8839 for idx, device in enumerate(instance.disks):
8840 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8841 # MAX_WIPE_CHUNK at max
8842 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8843 constants.MIN_WIPE_CHUNK_PERCENT)
8844 # we _must_ make this an int, otherwise rounding errors will
8846 wipe_chunk_size = int(wipe_chunk_size)
8848 lu.LogInfo("* Wiping disk %d", idx)
8849 logging.info("Wiping disk %d for instance %s, node %s using"
8850 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8855 start_time = time.time()
8857 while offset < size:
8858 wipe_size = min(wipe_chunk_size, size - offset)
8859 logging.debug("Wiping disk %d, offset %s, chunk %s",
8860 idx, offset, wipe_size)
8861 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8862 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8863 (idx, offset, wipe_size))
8866 if now - last_output >= 60:
8867 eta = _CalcEta(now - start_time, offset, size)
8868 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8869 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8872 logging.info("Resume sync of instance %s disks", instance.name)
8874 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8876 for idx, success in enumerate(result.payload):
8878 lu.LogWarning("Resume sync of disk %d failed, please have a"
8879 " look at the status and troubleshoot the issue", idx)
8880 logging.warn("resume-sync of instance %s for disks %d failed",
8884 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8885 """Create all disks for an instance.
8887 This abstracts away some work from AddInstance.
8889 @type lu: L{LogicalUnit}
8890 @param lu: the logical unit on whose behalf we execute
8891 @type instance: L{objects.Instance}
8892 @param instance: the instance whose disks we should create
8894 @param to_skip: list of indices to skip
8895 @type target_node: string
8896 @param target_node: if passed, overrides the target node for creation
8898 @return: the success of the creation
8901 info = _GetInstanceInfoText(instance)
8902 if target_node is None:
8903 pnode = instance.primary_node
8904 all_nodes = instance.all_nodes
8909 if instance.disk_template in constants.DTS_FILEBASED:
8910 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8911 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8913 result.Raise("Failed to create directory '%s' on"
8914 " node %s" % (file_storage_dir, pnode))
8916 # Note: this needs to be kept in sync with adding of disks in
8917 # LUInstanceSetParams
8918 for idx, device in enumerate(instance.disks):
8919 if to_skip and idx in to_skip:
8921 logging.info("Creating volume %s for instance %s",
8922 device.iv_name, instance.name)
8924 for node in all_nodes:
8925 f_create = node == pnode
8926 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8929 def _RemoveDisks(lu, instance, target_node=None):
8930 """Remove all disks for an instance.
8932 This abstracts away some work from `AddInstance()` and
8933 `RemoveInstance()`. Note that in case some of the devices couldn't
8934 be removed, the removal will continue with the other ones (compare
8935 with `_CreateDisks()`).
8937 @type lu: L{LogicalUnit}
8938 @param lu: the logical unit on whose behalf we execute
8939 @type instance: L{objects.Instance}
8940 @param instance: the instance whose disks we should remove
8941 @type target_node: string
8942 @param target_node: used to override the node on which to remove the disks
8944 @return: the success of the removal
8947 logging.info("Removing block devices for instance %s", instance.name)
8950 for device in instance.disks:
8952 edata = [(target_node, device)]
8954 edata = device.ComputeNodeTree(instance.primary_node)
8955 for node, disk in edata:
8956 lu.cfg.SetDiskID(disk, node)
8957 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8959 lu.LogWarning("Could not remove block device %s on node %s,"
8960 " continuing anyway: %s", device.iv_name, node, msg)
8963 # if this is a DRBD disk, return its port to the pool
8964 if device.dev_type in constants.LDS_DRBD:
8965 tcp_port = device.logical_id[2]
8966 lu.cfg.AddTcpUdpPort(tcp_port)
8968 if instance.disk_template == constants.DT_FILE:
8969 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8973 tgt = instance.primary_node
8974 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8976 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8977 file_storage_dir, instance.primary_node, result.fail_msg)
8983 def _ComputeDiskSizePerVG(disk_template, disks):
8984 """Compute disk size requirements in the volume group
8987 def _compute(disks, payload):
8988 """Universal algorithm.
8993 vgs[disk[constants.IDISK_VG]] = \
8994 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8998 # Required free disk space as a function of disk and swap space
9000 constants.DT_DISKLESS: {},
9001 constants.DT_PLAIN: _compute(disks, 0),
9002 # 128 MB are added for drbd metadata for each disk
9003 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9004 constants.DT_FILE: {},
9005 constants.DT_SHARED_FILE: {},
9008 if disk_template not in req_size_dict:
9009 raise errors.ProgrammerError("Disk template '%s' size requirement"
9010 " is unknown" % disk_template)
9012 return req_size_dict[disk_template]
9015 def _ComputeDiskSize(disk_template, disks):
9016 """Compute disk size requirements in the volume group
9019 # Required free disk space as a function of disk and swap space
9021 constants.DT_DISKLESS: None,
9022 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9023 # 128 MB are added for drbd metadata for each disk
9025 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9026 constants.DT_FILE: None,
9027 constants.DT_SHARED_FILE: 0,
9028 constants.DT_BLOCK: 0,
9029 constants.DT_RBD: 0,
9032 if disk_template not in req_size_dict:
9033 raise errors.ProgrammerError("Disk template '%s' size requirement"
9034 " is unknown" % disk_template)
9036 return req_size_dict[disk_template]
9039 def _FilterVmNodes(lu, nodenames):
9040 """Filters out non-vm_capable nodes from a list.
9042 @type lu: L{LogicalUnit}
9043 @param lu: the logical unit for which we check
9044 @type nodenames: list
9045 @param nodenames: the list of nodes on which we should check
9047 @return: the list of vm-capable nodes
9050 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9051 return [name for name in nodenames if name not in vm_nodes]
9054 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9055 """Hypervisor parameter validation.
9057 This function abstract the hypervisor parameter validation to be
9058 used in both instance create and instance modify.
9060 @type lu: L{LogicalUnit}
9061 @param lu: the logical unit for which we check
9062 @type nodenames: list
9063 @param nodenames: the list of nodes on which we should check
9064 @type hvname: string
9065 @param hvname: the name of the hypervisor we should use
9066 @type hvparams: dict
9067 @param hvparams: the parameters which we need to check
9068 @raise errors.OpPrereqError: if the parameters are not valid
9071 nodenames = _FilterVmNodes(lu, nodenames)
9073 cluster = lu.cfg.GetClusterInfo()
9074 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9076 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9077 for node in nodenames:
9081 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9084 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9085 """OS parameters validation.
9087 @type lu: L{LogicalUnit}
9088 @param lu: the logical unit for which we check
9089 @type required: boolean
9090 @param required: whether the validation should fail if the OS is not
9092 @type nodenames: list
9093 @param nodenames: the list of nodes on which we should check
9094 @type osname: string
9095 @param osname: the name of the hypervisor we should use
9096 @type osparams: dict
9097 @param osparams: the parameters which we need to check
9098 @raise errors.OpPrereqError: if the parameters are not valid
9101 nodenames = _FilterVmNodes(lu, nodenames)
9102 result = lu.rpc.call_os_validate(nodenames, required, osname,
9103 [constants.OS_VALIDATE_PARAMETERS],
9105 for node, nres in result.items():
9106 # we don't check for offline cases since this should be run only
9107 # against the master node and/or an instance's nodes
9108 nres.Raise("OS Parameters validation failed on node %s" % node)
9109 if not nres.payload:
9110 lu.LogInfo("OS %s not found on node %s, validation skipped",
9114 class LUInstanceCreate(LogicalUnit):
9115 """Create an instance.
9118 HPATH = "instance-add"
9119 HTYPE = constants.HTYPE_INSTANCE
9122 def CheckArguments(self):
9126 # do not require name_check to ease forward/backward compatibility
9128 if self.op.no_install and self.op.start:
9129 self.LogInfo("No-installation mode selected, disabling startup")
9130 self.op.start = False
9131 # validate/normalize the instance name
9132 self.op.instance_name = \
9133 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9135 if self.op.ip_check and not self.op.name_check:
9136 # TODO: make the ip check more flexible and not depend on the name check
9137 raise errors.OpPrereqError("Cannot do IP address check without a name"
9138 " check", errors.ECODE_INVAL)
9140 # check nics' parameter names
9141 for nic in self.op.nics:
9142 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9144 # check disks. parameter names and consistent adopt/no-adopt strategy
9145 has_adopt = has_no_adopt = False
9146 for disk in self.op.disks:
9147 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9148 if constants.IDISK_ADOPT in disk:
9152 if has_adopt and has_no_adopt:
9153 raise errors.OpPrereqError("Either all disks are adopted or none is",
9156 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9157 raise errors.OpPrereqError("Disk adoption is not supported for the"
9158 " '%s' disk template" %
9159 self.op.disk_template,
9161 if self.op.iallocator is not None:
9162 raise errors.OpPrereqError("Disk adoption not allowed with an"
9163 " iallocator script", errors.ECODE_INVAL)
9164 if self.op.mode == constants.INSTANCE_IMPORT:
9165 raise errors.OpPrereqError("Disk adoption not allowed for"
9166 " instance import", errors.ECODE_INVAL)
9168 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9169 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9170 " but no 'adopt' parameter given" %
9171 self.op.disk_template,
9174 self.adopt_disks = has_adopt
9176 # instance name verification
9177 if self.op.name_check:
9178 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9179 self.op.instance_name = self.hostname1.name
9180 # used in CheckPrereq for ip ping check
9181 self.check_ip = self.hostname1.ip
9183 self.check_ip = None
9185 # file storage checks
9186 if (self.op.file_driver and
9187 not self.op.file_driver in constants.FILE_DRIVER):
9188 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9189 self.op.file_driver, errors.ECODE_INVAL)
9191 if self.op.disk_template == constants.DT_FILE:
9192 opcodes.RequireFileStorage()
9193 elif self.op.disk_template == constants.DT_SHARED_FILE:
9194 opcodes.RequireSharedFileStorage()
9196 ### Node/iallocator related checks
9197 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9199 if self.op.pnode is not None:
9200 if self.op.disk_template in constants.DTS_INT_MIRROR:
9201 if self.op.snode is None:
9202 raise errors.OpPrereqError("The networked disk templates need"
9203 " a mirror node", errors.ECODE_INVAL)
9205 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9207 self.op.snode = None
9209 self._cds = _GetClusterDomainSecret()
9211 if self.op.mode == constants.INSTANCE_IMPORT:
9212 # On import force_variant must be True, because if we forced it at
9213 # initial install, our only chance when importing it back is that it
9215 self.op.force_variant = True
9217 if self.op.no_install:
9218 self.LogInfo("No-installation mode has no effect during import")
9220 elif self.op.mode == constants.INSTANCE_CREATE:
9221 if self.op.os_type is None:
9222 raise errors.OpPrereqError("No guest OS specified",
9224 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9225 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9226 " installation" % self.op.os_type,
9228 if self.op.disk_template is None:
9229 raise errors.OpPrereqError("No disk template specified",
9232 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9233 # Check handshake to ensure both clusters have the same domain secret
9234 src_handshake = self.op.source_handshake
9235 if not src_handshake:
9236 raise errors.OpPrereqError("Missing source handshake",
9239 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9242 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9245 # Load and check source CA
9246 self.source_x509_ca_pem = self.op.source_x509_ca
9247 if not self.source_x509_ca_pem:
9248 raise errors.OpPrereqError("Missing source X509 CA",
9252 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9254 except OpenSSL.crypto.Error, err:
9255 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9256 (err, ), errors.ECODE_INVAL)
9258 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9259 if errcode is not None:
9260 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9263 self.source_x509_ca = cert
9265 src_instance_name = self.op.source_instance_name
9266 if not src_instance_name:
9267 raise errors.OpPrereqError("Missing source instance name",
9270 self.source_instance_name = \
9271 netutils.GetHostname(name=src_instance_name).name
9274 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9275 self.op.mode, errors.ECODE_INVAL)
9277 def ExpandNames(self):
9278 """ExpandNames for CreateInstance.
9280 Figure out the right locks for instance creation.
9283 self.needed_locks = {}
9285 instance_name = self.op.instance_name
9286 # this is just a preventive check, but someone might still add this
9287 # instance in the meantime, and creation will fail at lock-add time
9288 if instance_name in self.cfg.GetInstanceList():
9289 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9290 instance_name, errors.ECODE_EXISTS)
9292 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9294 if self.op.iallocator:
9295 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9296 # specifying a group on instance creation and then selecting nodes from
9298 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9299 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9301 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9302 nodelist = [self.op.pnode]
9303 if self.op.snode is not None:
9304 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9305 nodelist.append(self.op.snode)
9306 self.needed_locks[locking.LEVEL_NODE] = nodelist
9307 # Lock resources of instance's primary and secondary nodes (copy to
9308 # prevent accidential modification)
9309 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9311 # in case of import lock the source node too
9312 if self.op.mode == constants.INSTANCE_IMPORT:
9313 src_node = self.op.src_node
9314 src_path = self.op.src_path
9316 if src_path is None:
9317 self.op.src_path = src_path = self.op.instance_name
9319 if src_node is None:
9320 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9321 self.op.src_node = None
9322 if os.path.isabs(src_path):
9323 raise errors.OpPrereqError("Importing an instance from a path"
9324 " requires a source node option",
9327 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9328 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9329 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9330 if not os.path.isabs(src_path):
9331 self.op.src_path = src_path = \
9332 utils.PathJoin(constants.EXPORT_DIR, src_path)
9334 def _RunAllocator(self):
9335 """Run the allocator based on input opcode.
9338 nics = [n.ToDict() for n in self.nics]
9339 ial = IAllocator(self.cfg, self.rpc,
9340 mode=constants.IALLOCATOR_MODE_ALLOC,
9341 name=self.op.instance_name,
9342 disk_template=self.op.disk_template,
9345 vcpus=self.be_full[constants.BE_VCPUS],
9346 memory=self.be_full[constants.BE_MAXMEM],
9349 hypervisor=self.op.hypervisor,
9352 ial.Run(self.op.iallocator)
9355 raise errors.OpPrereqError("Can't compute nodes using"
9356 " iallocator '%s': %s" %
9357 (self.op.iallocator, ial.info),
9359 if len(ial.result) != ial.required_nodes:
9360 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9361 " of nodes (%s), required %s" %
9362 (self.op.iallocator, len(ial.result),
9363 ial.required_nodes), errors.ECODE_FAULT)
9364 self.op.pnode = ial.result[0]
9365 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9366 self.op.instance_name, self.op.iallocator,
9367 utils.CommaJoin(ial.result))
9368 if ial.required_nodes == 2:
9369 self.op.snode = ial.result[1]
9371 def BuildHooksEnv(self):
9374 This runs on master, primary and secondary nodes of the instance.
9378 "ADD_MODE": self.op.mode,
9380 if self.op.mode == constants.INSTANCE_IMPORT:
9381 env["SRC_NODE"] = self.op.src_node
9382 env["SRC_PATH"] = self.op.src_path
9383 env["SRC_IMAGES"] = self.src_images
9385 env.update(_BuildInstanceHookEnv(
9386 name=self.op.instance_name,
9387 primary_node=self.op.pnode,
9388 secondary_nodes=self.secondaries,
9389 status=self.op.start,
9390 os_type=self.op.os_type,
9391 minmem=self.be_full[constants.BE_MINMEM],
9392 maxmem=self.be_full[constants.BE_MAXMEM],
9393 vcpus=self.be_full[constants.BE_VCPUS],
9394 nics=_NICListToTuple(self, self.nics),
9395 disk_template=self.op.disk_template,
9396 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9397 for d in self.disks],
9400 hypervisor_name=self.op.hypervisor,
9406 def BuildHooksNodes(self):
9407 """Build hooks nodes.
9410 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9413 def _ReadExportInfo(self):
9414 """Reads the export information from disk.
9416 It will override the opcode source node and path with the actual
9417 information, if these two were not specified before.
9419 @return: the export information
9422 assert self.op.mode == constants.INSTANCE_IMPORT
9424 src_node = self.op.src_node
9425 src_path = self.op.src_path
9427 if src_node is None:
9428 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9429 exp_list = self.rpc.call_export_list(locked_nodes)
9431 for node in exp_list:
9432 if exp_list[node].fail_msg:
9434 if src_path in exp_list[node].payload:
9436 self.op.src_node = src_node = node
9437 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9441 raise errors.OpPrereqError("No export found for relative path %s" %
9442 src_path, errors.ECODE_INVAL)
9444 _CheckNodeOnline(self, src_node)
9445 result = self.rpc.call_export_info(src_node, src_path)
9446 result.Raise("No export or invalid export found in dir %s" % src_path)
9448 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9449 if not export_info.has_section(constants.INISECT_EXP):
9450 raise errors.ProgrammerError("Corrupted export config",
9451 errors.ECODE_ENVIRON)
9453 ei_version = export_info.get(constants.INISECT_EXP, "version")
9454 if (int(ei_version) != constants.EXPORT_VERSION):
9455 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9456 (ei_version, constants.EXPORT_VERSION),
9457 errors.ECODE_ENVIRON)
9460 def _ReadExportParams(self, einfo):
9461 """Use export parameters as defaults.
9463 In case the opcode doesn't specify (as in override) some instance
9464 parameters, then try to use them from the export information, if
9468 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9470 if self.op.disk_template is None:
9471 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9472 self.op.disk_template = einfo.get(constants.INISECT_INS,
9474 if self.op.disk_template not in constants.DISK_TEMPLATES:
9475 raise errors.OpPrereqError("Disk template specified in configuration"
9476 " file is not one of the allowed values:"
9477 " %s" % " ".join(constants.DISK_TEMPLATES))
9479 raise errors.OpPrereqError("No disk template specified and the export"
9480 " is missing the disk_template information",
9483 if not self.op.disks:
9485 # TODO: import the disk iv_name too
9486 for idx in range(constants.MAX_DISKS):
9487 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9488 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9489 disks.append({constants.IDISK_SIZE: disk_sz})
9490 self.op.disks = disks
9491 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9492 raise errors.OpPrereqError("No disk info specified and the export"
9493 " is missing the disk information",
9496 if not self.op.nics:
9498 for idx in range(constants.MAX_NICS):
9499 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9501 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9502 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9509 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9510 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9512 if (self.op.hypervisor is None and
9513 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9514 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9516 if einfo.has_section(constants.INISECT_HYP):
9517 # use the export parameters but do not override the ones
9518 # specified by the user
9519 for name, value in einfo.items(constants.INISECT_HYP):
9520 if name not in self.op.hvparams:
9521 self.op.hvparams[name] = value
9523 if einfo.has_section(constants.INISECT_BEP):
9524 # use the parameters, without overriding
9525 for name, value in einfo.items(constants.INISECT_BEP):
9526 if name not in self.op.beparams:
9527 self.op.beparams[name] = value
9528 # Compatibility for the old "memory" be param
9529 if name == constants.BE_MEMORY:
9530 if constants.BE_MAXMEM not in self.op.beparams:
9531 self.op.beparams[constants.BE_MAXMEM] = value
9532 if constants.BE_MINMEM not in self.op.beparams:
9533 self.op.beparams[constants.BE_MINMEM] = value
9535 # try to read the parameters old style, from the main section
9536 for name in constants.BES_PARAMETERS:
9537 if (name not in self.op.beparams and
9538 einfo.has_option(constants.INISECT_INS, name)):
9539 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9541 if einfo.has_section(constants.INISECT_OSP):
9542 # use the parameters, without overriding
9543 for name, value in einfo.items(constants.INISECT_OSP):
9544 if name not in self.op.osparams:
9545 self.op.osparams[name] = value
9547 def _RevertToDefaults(self, cluster):
9548 """Revert the instance parameters to the default values.
9552 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9553 for name in self.op.hvparams.keys():
9554 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9555 del self.op.hvparams[name]
9557 be_defs = cluster.SimpleFillBE({})
9558 for name in self.op.beparams.keys():
9559 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9560 del self.op.beparams[name]
9562 nic_defs = cluster.SimpleFillNIC({})
9563 for nic in self.op.nics:
9564 for name in constants.NICS_PARAMETERS:
9565 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9568 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9569 for name in self.op.osparams.keys():
9570 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9571 del self.op.osparams[name]
9573 def _CalculateFileStorageDir(self):
9574 """Calculate final instance file storage dir.
9577 # file storage dir calculation/check
9578 self.instance_file_storage_dir = None
9579 if self.op.disk_template in constants.DTS_FILEBASED:
9580 # build the full file storage dir path
9583 if self.op.disk_template == constants.DT_SHARED_FILE:
9584 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9586 get_fsd_fn = self.cfg.GetFileStorageDir
9588 cfg_storagedir = get_fsd_fn()
9589 if not cfg_storagedir:
9590 raise errors.OpPrereqError("Cluster file storage dir not defined")
9591 joinargs.append(cfg_storagedir)
9593 if self.op.file_storage_dir is not None:
9594 joinargs.append(self.op.file_storage_dir)
9596 joinargs.append(self.op.instance_name)
9598 # pylint: disable=W0142
9599 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9601 def CheckPrereq(self): # pylint: disable=R0914
9602 """Check prerequisites.
9605 self._CalculateFileStorageDir()
9607 if self.op.mode == constants.INSTANCE_IMPORT:
9608 export_info = self._ReadExportInfo()
9609 self._ReadExportParams(export_info)
9611 if (not self.cfg.GetVGName() and
9612 self.op.disk_template not in constants.DTS_NOT_LVM):
9613 raise errors.OpPrereqError("Cluster does not support lvm-based"
9614 " instances", errors.ECODE_STATE)
9616 if (self.op.hypervisor is None or
9617 self.op.hypervisor == constants.VALUE_AUTO):
9618 self.op.hypervisor = self.cfg.GetHypervisorType()
9620 cluster = self.cfg.GetClusterInfo()
9621 enabled_hvs = cluster.enabled_hypervisors
9622 if self.op.hypervisor not in enabled_hvs:
9623 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9624 " cluster (%s)" % (self.op.hypervisor,
9625 ",".join(enabled_hvs)),
9628 # Check tag validity
9629 for tag in self.op.tags:
9630 objects.TaggableObject.ValidateTag(tag)
9632 # check hypervisor parameter syntax (locally)
9633 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9634 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9636 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9637 hv_type.CheckParameterSyntax(filled_hvp)
9638 self.hv_full = filled_hvp
9639 # check that we don't specify global parameters on an instance
9640 _CheckGlobalHvParams(self.op.hvparams)
9642 # fill and remember the beparams dict
9643 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9644 for param, value in self.op.beparams.iteritems():
9645 if value == constants.VALUE_AUTO:
9646 self.op.beparams[param] = default_beparams[param]
9647 objects.UpgradeBeParams(self.op.beparams)
9648 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9649 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9651 # build os parameters
9652 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9654 # now that hvp/bep are in final format, let's reset to defaults,
9656 if self.op.identify_defaults:
9657 self._RevertToDefaults(cluster)
9661 for idx, nic in enumerate(self.op.nics):
9662 nic_mode_req = nic.get(constants.INIC_MODE, None)
9663 nic_mode = nic_mode_req
9664 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9665 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9667 # in routed mode, for the first nic, the default ip is 'auto'
9668 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9669 default_ip_mode = constants.VALUE_AUTO
9671 default_ip_mode = constants.VALUE_NONE
9673 # ip validity checks
9674 ip = nic.get(constants.INIC_IP, default_ip_mode)
9675 if ip is None or ip.lower() == constants.VALUE_NONE:
9677 elif ip.lower() == constants.VALUE_AUTO:
9678 if not self.op.name_check:
9679 raise errors.OpPrereqError("IP address set to auto but name checks"
9680 " have been skipped",
9682 nic_ip = self.hostname1.ip
9684 if not netutils.IPAddress.IsValid(ip):
9685 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9689 # TODO: check the ip address for uniqueness
9690 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9691 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9694 # MAC address verification
9695 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9696 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9697 mac = utils.NormalizeAndValidateMac(mac)
9700 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9701 except errors.ReservationError:
9702 raise errors.OpPrereqError("MAC address %s already in use"
9703 " in cluster" % mac,
9704 errors.ECODE_NOTUNIQUE)
9706 # Build nic parameters
9707 link = nic.get(constants.INIC_LINK, None)
9708 if link == constants.VALUE_AUTO:
9709 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9712 nicparams[constants.NIC_MODE] = nic_mode
9714 nicparams[constants.NIC_LINK] = link
9716 check_params = cluster.SimpleFillNIC(nicparams)
9717 objects.NIC.CheckParameterSyntax(check_params)
9718 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9720 # disk checks/pre-build
9721 default_vg = self.cfg.GetVGName()
9723 for disk in self.op.disks:
9724 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9725 if mode not in constants.DISK_ACCESS_SET:
9726 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9727 mode, errors.ECODE_INVAL)
9728 size = disk.get(constants.IDISK_SIZE, None)
9730 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9733 except (TypeError, ValueError):
9734 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9737 data_vg = disk.get(constants.IDISK_VG, default_vg)
9739 constants.IDISK_SIZE: size,
9740 constants.IDISK_MODE: mode,
9741 constants.IDISK_VG: data_vg,
9743 if constants.IDISK_METAVG in disk:
9744 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9745 if constants.IDISK_ADOPT in disk:
9746 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9747 self.disks.append(new_disk)
9749 if self.op.mode == constants.INSTANCE_IMPORT:
9751 for idx in range(len(self.disks)):
9752 option = "disk%d_dump" % idx
9753 if export_info.has_option(constants.INISECT_INS, option):
9754 # FIXME: are the old os-es, disk sizes, etc. useful?
9755 export_name = export_info.get(constants.INISECT_INS, option)
9756 image = utils.PathJoin(self.op.src_path, export_name)
9757 disk_images.append(image)
9759 disk_images.append(False)
9761 self.src_images = disk_images
9763 old_name = export_info.get(constants.INISECT_INS, "name")
9764 if self.op.instance_name == old_name:
9765 for idx, nic in enumerate(self.nics):
9766 if nic.mac == constants.VALUE_AUTO:
9767 nic_mac_ini = "nic%d_mac" % idx
9768 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9770 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9772 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9773 if self.op.ip_check:
9774 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9775 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9776 (self.check_ip, self.op.instance_name),
9777 errors.ECODE_NOTUNIQUE)
9779 #### mac address generation
9780 # By generating here the mac address both the allocator and the hooks get
9781 # the real final mac address rather than the 'auto' or 'generate' value.
9782 # There is a race condition between the generation and the instance object
9783 # creation, which means that we know the mac is valid now, but we're not
9784 # sure it will be when we actually add the instance. If things go bad
9785 # adding the instance will abort because of a duplicate mac, and the
9786 # creation job will fail.
9787 for nic in self.nics:
9788 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9789 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9793 if self.op.iallocator is not None:
9794 self._RunAllocator()
9796 # Release all unneeded node locks
9797 _ReleaseLocks(self, locking.LEVEL_NODE,
9798 keep=filter(None, [self.op.pnode, self.op.snode,
9800 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9801 keep=filter(None, [self.op.pnode, self.op.snode,
9804 #### node related checks
9806 # check primary node
9807 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9808 assert self.pnode is not None, \
9809 "Cannot retrieve locked node %s" % self.op.pnode
9811 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9812 pnode.name, errors.ECODE_STATE)
9814 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9815 pnode.name, errors.ECODE_STATE)
9816 if not pnode.vm_capable:
9817 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9818 " '%s'" % pnode.name, errors.ECODE_STATE)
9820 self.secondaries = []
9822 # mirror node verification
9823 if self.op.disk_template in constants.DTS_INT_MIRROR:
9824 if self.op.snode == pnode.name:
9825 raise errors.OpPrereqError("The secondary node cannot be the"
9826 " primary node", errors.ECODE_INVAL)
9827 _CheckNodeOnline(self, self.op.snode)
9828 _CheckNodeNotDrained(self, self.op.snode)
9829 _CheckNodeVmCapable(self, self.op.snode)
9830 self.secondaries.append(self.op.snode)
9832 snode = self.cfg.GetNodeInfo(self.op.snode)
9833 if pnode.group != snode.group:
9834 self.LogWarning("The primary and secondary nodes are in two"
9835 " different node groups; the disk parameters"
9836 " from the first disk's node group will be"
9839 nodenames = [pnode.name] + self.secondaries
9841 # Verify instance specs
9843 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9844 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9845 constants.ISPEC_DISK_COUNT: len(self.disks),
9846 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9847 constants.ISPEC_NIC_COUNT: len(self.nics),
9850 group_info = self.cfg.GetNodeGroup(pnode.group)
9851 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9852 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9853 if not self.op.ignore_ipolicy and res:
9854 raise errors.OpPrereqError(("Instance allocation to group %s violates"
9855 " policy: %s") % (pnode.group,
9856 utils.CommaJoin(res)),
9859 # disk parameters (not customizable at instance or node level)
9860 # just use the primary node parameters, ignoring the secondary.
9861 self.diskparams = group_info.diskparams
9863 if not self.adopt_disks:
9864 if self.op.disk_template == constants.DT_RBD:
9865 # _CheckRADOSFreeSpace() is just a placeholder.
9866 # Any function that checks prerequisites can be placed here.
9867 # Check if there is enough space on the RADOS cluster.
9868 _CheckRADOSFreeSpace()
9870 # Check lv size requirements, if not adopting
9871 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9872 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9874 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9875 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9876 disk[constants.IDISK_ADOPT])
9877 for disk in self.disks])
9878 if len(all_lvs) != len(self.disks):
9879 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9881 for lv_name in all_lvs:
9883 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9884 # to ReserveLV uses the same syntax
9885 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9886 except errors.ReservationError:
9887 raise errors.OpPrereqError("LV named %s used by another instance" %
9888 lv_name, errors.ECODE_NOTUNIQUE)
9890 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9891 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9893 node_lvs = self.rpc.call_lv_list([pnode.name],
9894 vg_names.payload.keys())[pnode.name]
9895 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9896 node_lvs = node_lvs.payload
9898 delta = all_lvs.difference(node_lvs.keys())
9900 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9901 utils.CommaJoin(delta),
9903 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9905 raise errors.OpPrereqError("Online logical volumes found, cannot"
9906 " adopt: %s" % utils.CommaJoin(online_lvs),
9908 # update the size of disk based on what is found
9909 for dsk in self.disks:
9910 dsk[constants.IDISK_SIZE] = \
9911 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9912 dsk[constants.IDISK_ADOPT])][0]))
9914 elif self.op.disk_template == constants.DT_BLOCK:
9915 # Normalize and de-duplicate device paths
9916 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9917 for disk in self.disks])
9918 if len(all_disks) != len(self.disks):
9919 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9921 baddisks = [d for d in all_disks
9922 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9924 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9925 " cannot be adopted" %
9926 (", ".join(baddisks),
9927 constants.ADOPTABLE_BLOCKDEV_ROOT),
9930 node_disks = self.rpc.call_bdev_sizes([pnode.name],
9931 list(all_disks))[pnode.name]
9932 node_disks.Raise("Cannot get block device information from node %s" %
9934 node_disks = node_disks.payload
9935 delta = all_disks.difference(node_disks.keys())
9937 raise errors.OpPrereqError("Missing block device(s): %s" %
9938 utils.CommaJoin(delta),
9940 for dsk in self.disks:
9941 dsk[constants.IDISK_SIZE] = \
9942 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9944 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9946 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9947 # check OS parameters (remotely)
9948 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9950 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9952 # memory check on primary node
9953 #TODO(dynmem): use MINMEM for checking
9955 _CheckNodeFreeMemory(self, self.pnode.name,
9956 "creating instance %s" % self.op.instance_name,
9957 self.be_full[constants.BE_MAXMEM],
9960 self.dry_run_result = list(nodenames)
9962 def Exec(self, feedback_fn):
9963 """Create and add the instance to the cluster.
9966 instance = self.op.instance_name
9967 pnode_name = self.pnode.name
9969 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9970 self.owned_locks(locking.LEVEL_NODE)), \
9971 "Node locks differ from node resource locks"
9973 ht_kind = self.op.hypervisor
9974 if ht_kind in constants.HTS_REQ_PORT:
9975 network_port = self.cfg.AllocatePort()
9979 disks = _GenerateDiskTemplate(self,
9980 self.op.disk_template,
9981 instance, pnode_name,
9984 self.instance_file_storage_dir,
9985 self.op.file_driver,
9990 iobj = objects.Instance(name=instance, os=self.op.os_type,
9991 primary_node=pnode_name,
9992 nics=self.nics, disks=disks,
9993 disk_template=self.op.disk_template,
9994 admin_state=constants.ADMINST_DOWN,
9995 network_port=network_port,
9996 beparams=self.op.beparams,
9997 hvparams=self.op.hvparams,
9998 hypervisor=self.op.hypervisor,
9999 osparams=self.op.osparams,
10003 for tag in self.op.tags:
10006 if self.adopt_disks:
10007 if self.op.disk_template == constants.DT_PLAIN:
10008 # rename LVs to the newly-generated names; we need to construct
10009 # 'fake' LV disks with the old data, plus the new unique_id
10010 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10012 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10013 rename_to.append(t_dsk.logical_id)
10014 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10015 self.cfg.SetDiskID(t_dsk, pnode_name)
10016 result = self.rpc.call_blockdev_rename(pnode_name,
10017 zip(tmp_disks, rename_to))
10018 result.Raise("Failed to rename adoped LVs")
10020 feedback_fn("* creating instance disks...")
10022 _CreateDisks(self, iobj)
10023 except errors.OpExecError:
10024 self.LogWarning("Device creation failed, reverting...")
10026 _RemoveDisks(self, iobj)
10028 self.cfg.ReleaseDRBDMinors(instance)
10031 feedback_fn("adding instance %s to cluster config" % instance)
10033 self.cfg.AddInstance(iobj, self.proc.GetECId())
10035 # Declare that we don't want to remove the instance lock anymore, as we've
10036 # added the instance to the config
10037 del self.remove_locks[locking.LEVEL_INSTANCE]
10039 if self.op.mode == constants.INSTANCE_IMPORT:
10040 # Release unused nodes
10041 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10043 # Release all nodes
10044 _ReleaseLocks(self, locking.LEVEL_NODE)
10047 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10048 feedback_fn("* wiping instance disks...")
10050 _WipeDisks(self, iobj)
10051 except errors.OpExecError, err:
10052 logging.exception("Wiping disks failed")
10053 self.LogWarning("Wiping instance disks failed (%s)", err)
10057 # Something is already wrong with the disks, don't do anything else
10059 elif self.op.wait_for_sync:
10060 disk_abort = not _WaitForSync(self, iobj)
10061 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10062 # make sure the disks are not degraded (still sync-ing is ok)
10063 feedback_fn("* checking mirrors status")
10064 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10069 _RemoveDisks(self, iobj)
10070 self.cfg.RemoveInstance(iobj.name)
10071 # Make sure the instance lock gets removed
10072 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10073 raise errors.OpExecError("There are some degraded disks for"
10076 # Release all node resource locks
10077 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10079 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10080 if self.op.mode == constants.INSTANCE_CREATE:
10081 if not self.op.no_install:
10082 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10083 not self.op.wait_for_sync)
10085 feedback_fn("* pausing disk sync to install instance OS")
10086 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10088 for idx, success in enumerate(result.payload):
10090 logging.warn("pause-sync of instance %s for disk %d failed",
10093 feedback_fn("* running the instance OS create scripts...")
10094 # FIXME: pass debug option from opcode to backend
10096 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10097 self.op.debug_level)
10099 feedback_fn("* resuming disk sync")
10100 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10102 for idx, success in enumerate(result.payload):
10104 logging.warn("resume-sync of instance %s for disk %d failed",
10107 os_add_result.Raise("Could not add os for instance %s"
10108 " on node %s" % (instance, pnode_name))
10110 elif self.op.mode == constants.INSTANCE_IMPORT:
10111 feedback_fn("* running the instance OS import scripts...")
10115 for idx, image in enumerate(self.src_images):
10119 # FIXME: pass debug option from opcode to backend
10120 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10121 constants.IEIO_FILE, (image, ),
10122 constants.IEIO_SCRIPT,
10123 (iobj.disks[idx], idx),
10125 transfers.append(dt)
10128 masterd.instance.TransferInstanceData(self, feedback_fn,
10129 self.op.src_node, pnode_name,
10130 self.pnode.secondary_ip,
10132 if not compat.all(import_result):
10133 self.LogWarning("Some disks for instance %s on node %s were not"
10134 " imported successfully" % (instance, pnode_name))
10136 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10137 feedback_fn("* preparing remote import...")
10138 # The source cluster will stop the instance before attempting to make a
10139 # connection. In some cases stopping an instance can take a long time,
10140 # hence the shutdown timeout is added to the connection timeout.
10141 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10142 self.op.source_shutdown_timeout)
10143 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10145 assert iobj.primary_node == self.pnode.name
10147 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10148 self.source_x509_ca,
10149 self._cds, timeouts)
10150 if not compat.all(disk_results):
10151 # TODO: Should the instance still be started, even if some disks
10152 # failed to import (valid for local imports, too)?
10153 self.LogWarning("Some disks for instance %s on node %s were not"
10154 " imported successfully" % (instance, pnode_name))
10156 # Run rename script on newly imported instance
10157 assert iobj.name == instance
10158 feedback_fn("Running rename script for %s" % instance)
10159 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10160 self.source_instance_name,
10161 self.op.debug_level)
10162 if result.fail_msg:
10163 self.LogWarning("Failed to run rename script for %s on node"
10164 " %s: %s" % (instance, pnode_name, result.fail_msg))
10167 # also checked in the prereq part
10168 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10171 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10174 iobj.admin_state = constants.ADMINST_UP
10175 self.cfg.Update(iobj, feedback_fn)
10176 logging.info("Starting instance %s on node %s", instance, pnode_name)
10177 feedback_fn("* starting instance...")
10178 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10180 result.Raise("Could not start instance")
10182 return list(iobj.all_nodes)
10185 def _CheckRADOSFreeSpace():
10186 """Compute disk size requirements inside the RADOS cluster.
10189 # For the RADOS cluster we assume there is always enough space.
10193 class LUInstanceConsole(NoHooksLU):
10194 """Connect to an instance's console.
10196 This is somewhat special in that it returns the command line that
10197 you need to run on the master node in order to connect to the
10203 def ExpandNames(self):
10204 self.share_locks = _ShareAll()
10205 self._ExpandAndLockInstance()
10207 def CheckPrereq(self):
10208 """Check prerequisites.
10210 This checks that the instance is in the cluster.
10213 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10214 assert self.instance is not None, \
10215 "Cannot retrieve locked instance %s" % self.op.instance_name
10216 _CheckNodeOnline(self, self.instance.primary_node)
10218 def Exec(self, feedback_fn):
10219 """Connect to the console of an instance
10222 instance = self.instance
10223 node = instance.primary_node
10225 node_insts = self.rpc.call_instance_list([node],
10226 [instance.hypervisor])[node]
10227 node_insts.Raise("Can't get node information from %s" % node)
10229 if instance.name not in node_insts.payload:
10230 if instance.admin_state == constants.ADMINST_UP:
10231 state = constants.INSTST_ERRORDOWN
10232 elif instance.admin_state == constants.ADMINST_DOWN:
10233 state = constants.INSTST_ADMINDOWN
10235 state = constants.INSTST_ADMINOFFLINE
10236 raise errors.OpExecError("Instance %s is not running (state %s)" %
10237 (instance.name, state))
10239 logging.debug("Connecting to console of %s on %s", instance.name, node)
10241 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10244 def _GetInstanceConsole(cluster, instance):
10245 """Returns console information for an instance.
10247 @type cluster: L{objects.Cluster}
10248 @type instance: L{objects.Instance}
10252 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10253 # beparams and hvparams are passed separately, to avoid editing the
10254 # instance and then saving the defaults in the instance itself.
10255 hvparams = cluster.FillHV(instance)
10256 beparams = cluster.FillBE(instance)
10257 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10259 assert console.instance == instance.name
10260 assert console.Validate()
10262 return console.ToDict()
10265 class LUInstanceReplaceDisks(LogicalUnit):
10266 """Replace the disks of an instance.
10269 HPATH = "mirrors-replace"
10270 HTYPE = constants.HTYPE_INSTANCE
10273 def CheckArguments(self):
10274 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10275 self.op.iallocator)
10277 def ExpandNames(self):
10278 self._ExpandAndLockInstance()
10280 assert locking.LEVEL_NODE not in self.needed_locks
10281 assert locking.LEVEL_NODE_RES not in self.needed_locks
10282 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10284 assert self.op.iallocator is None or self.op.remote_node is None, \
10285 "Conflicting options"
10287 if self.op.remote_node is not None:
10288 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10290 # Warning: do not remove the locking of the new secondary here
10291 # unless DRBD8.AddChildren is changed to work in parallel;
10292 # currently it doesn't since parallel invocations of
10293 # FindUnusedMinor will conflict
10294 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10295 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10297 self.needed_locks[locking.LEVEL_NODE] = []
10298 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10300 if self.op.iallocator is not None:
10301 # iallocator will select a new node in the same group
10302 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10304 self.needed_locks[locking.LEVEL_NODE_RES] = []
10306 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10307 self.op.iallocator, self.op.remote_node,
10308 self.op.disks, False, self.op.early_release,
10309 self.op.ignore_ipolicy)
10311 self.tasklets = [self.replacer]
10313 def DeclareLocks(self, level):
10314 if level == locking.LEVEL_NODEGROUP:
10315 assert self.op.remote_node is None
10316 assert self.op.iallocator is not None
10317 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10319 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10320 # Lock all groups used by instance optimistically; this requires going
10321 # via the node before it's locked, requiring verification later on
10322 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10323 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10325 elif level == locking.LEVEL_NODE:
10326 if self.op.iallocator is not None:
10327 assert self.op.remote_node is None
10328 assert not self.needed_locks[locking.LEVEL_NODE]
10330 # Lock member nodes of all locked groups
10331 self.needed_locks[locking.LEVEL_NODE] = [node_name
10332 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10333 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10335 self._LockInstancesNodes()
10336 elif level == locking.LEVEL_NODE_RES:
10338 self.needed_locks[locking.LEVEL_NODE_RES] = \
10339 self.needed_locks[locking.LEVEL_NODE]
10341 def BuildHooksEnv(self):
10342 """Build hooks env.
10344 This runs on the master, the primary and all the secondaries.
10347 instance = self.replacer.instance
10349 "MODE": self.op.mode,
10350 "NEW_SECONDARY": self.op.remote_node,
10351 "OLD_SECONDARY": instance.secondary_nodes[0],
10353 env.update(_BuildInstanceHookEnvByObject(self, instance))
10356 def BuildHooksNodes(self):
10357 """Build hooks nodes.
10360 instance = self.replacer.instance
10362 self.cfg.GetMasterNode(),
10363 instance.primary_node,
10365 if self.op.remote_node is not None:
10366 nl.append(self.op.remote_node)
10369 def CheckPrereq(self):
10370 """Check prerequisites.
10373 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10374 self.op.iallocator is None)
10376 # Verify if node group locks are still correct
10377 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10379 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10381 return LogicalUnit.CheckPrereq(self)
10384 class TLReplaceDisks(Tasklet):
10385 """Replaces disks for an instance.
10387 Note: Locking is not within the scope of this class.
10390 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10391 disks, delay_iallocator, early_release, ignore_ipolicy):
10392 """Initializes this class.
10395 Tasklet.__init__(self, lu)
10398 self.instance_name = instance_name
10400 self.iallocator_name = iallocator_name
10401 self.remote_node = remote_node
10403 self.delay_iallocator = delay_iallocator
10404 self.early_release = early_release
10405 self.ignore_ipolicy = ignore_ipolicy
10408 self.instance = None
10409 self.new_node = None
10410 self.target_node = None
10411 self.other_node = None
10412 self.remote_node_info = None
10413 self.node_secondary_ip = None
10416 def CheckArguments(mode, remote_node, iallocator):
10417 """Helper function for users of this class.
10420 # check for valid parameter combination
10421 if mode == constants.REPLACE_DISK_CHG:
10422 if remote_node is None and iallocator is None:
10423 raise errors.OpPrereqError("When changing the secondary either an"
10424 " iallocator script must be used or the"
10425 " new node given", errors.ECODE_INVAL)
10427 if remote_node is not None and iallocator is not None:
10428 raise errors.OpPrereqError("Give either the iallocator or the new"
10429 " secondary, not both", errors.ECODE_INVAL)
10431 elif remote_node is not None or iallocator is not None:
10432 # Not replacing the secondary
10433 raise errors.OpPrereqError("The iallocator and new node options can"
10434 " only be used when changing the"
10435 " secondary node", errors.ECODE_INVAL)
10438 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10439 """Compute a new secondary node using an IAllocator.
10442 ial = IAllocator(lu.cfg, lu.rpc,
10443 mode=constants.IALLOCATOR_MODE_RELOC,
10444 name=instance_name,
10445 relocate_from=list(relocate_from))
10447 ial.Run(iallocator_name)
10449 if not ial.success:
10450 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10451 " %s" % (iallocator_name, ial.info),
10452 errors.ECODE_NORES)
10454 if len(ial.result) != ial.required_nodes:
10455 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10456 " of nodes (%s), required %s" %
10458 len(ial.result), ial.required_nodes),
10459 errors.ECODE_FAULT)
10461 remote_node_name = ial.result[0]
10463 lu.LogInfo("Selected new secondary for instance '%s': %s",
10464 instance_name, remote_node_name)
10466 return remote_node_name
10468 def _FindFaultyDisks(self, node_name):
10469 """Wrapper for L{_FindFaultyInstanceDisks}.
10472 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10475 def _CheckDisksActivated(self, instance):
10476 """Checks if the instance disks are activated.
10478 @param instance: The instance to check disks
10479 @return: True if they are activated, False otherwise
10482 nodes = instance.all_nodes
10484 for idx, dev in enumerate(instance.disks):
10486 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10487 self.cfg.SetDiskID(dev, node)
10489 result = self.rpc.call_blockdev_find(node, dev)
10493 elif result.fail_msg or not result.payload:
10498 def CheckPrereq(self):
10499 """Check prerequisites.
10501 This checks that the instance is in the cluster.
10504 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10505 assert instance is not None, \
10506 "Cannot retrieve locked instance %s" % self.instance_name
10508 if instance.disk_template != constants.DT_DRBD8:
10509 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10510 " instances", errors.ECODE_INVAL)
10512 if len(instance.secondary_nodes) != 1:
10513 raise errors.OpPrereqError("The instance has a strange layout,"
10514 " expected one secondary but found %d" %
10515 len(instance.secondary_nodes),
10516 errors.ECODE_FAULT)
10518 if not self.delay_iallocator:
10519 self._CheckPrereq2()
10521 def _CheckPrereq2(self):
10522 """Check prerequisites, second part.
10524 This function should always be part of CheckPrereq. It was separated and is
10525 now called from Exec because during node evacuation iallocator was only
10526 called with an unmodified cluster model, not taking planned changes into
10530 instance = self.instance
10531 secondary_node = instance.secondary_nodes[0]
10533 if self.iallocator_name is None:
10534 remote_node = self.remote_node
10536 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10537 instance.name, instance.secondary_nodes)
10539 if remote_node is None:
10540 self.remote_node_info = None
10542 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10543 "Remote node '%s' is not locked" % remote_node
10545 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10546 assert self.remote_node_info is not None, \
10547 "Cannot retrieve locked node %s" % remote_node
10549 if remote_node == self.instance.primary_node:
10550 raise errors.OpPrereqError("The specified node is the primary node of"
10551 " the instance", errors.ECODE_INVAL)
10553 if remote_node == secondary_node:
10554 raise errors.OpPrereqError("The specified node is already the"
10555 " secondary node of the instance",
10556 errors.ECODE_INVAL)
10558 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10559 constants.REPLACE_DISK_CHG):
10560 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10561 errors.ECODE_INVAL)
10563 if self.mode == constants.REPLACE_DISK_AUTO:
10564 if not self._CheckDisksActivated(instance):
10565 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10566 " first" % self.instance_name,
10567 errors.ECODE_STATE)
10568 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10569 faulty_secondary = self._FindFaultyDisks(secondary_node)
10571 if faulty_primary and faulty_secondary:
10572 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10573 " one node and can not be repaired"
10574 " automatically" % self.instance_name,
10575 errors.ECODE_STATE)
10578 self.disks = faulty_primary
10579 self.target_node = instance.primary_node
10580 self.other_node = secondary_node
10581 check_nodes = [self.target_node, self.other_node]
10582 elif faulty_secondary:
10583 self.disks = faulty_secondary
10584 self.target_node = secondary_node
10585 self.other_node = instance.primary_node
10586 check_nodes = [self.target_node, self.other_node]
10592 # Non-automatic modes
10593 if self.mode == constants.REPLACE_DISK_PRI:
10594 self.target_node = instance.primary_node
10595 self.other_node = secondary_node
10596 check_nodes = [self.target_node, self.other_node]
10598 elif self.mode == constants.REPLACE_DISK_SEC:
10599 self.target_node = secondary_node
10600 self.other_node = instance.primary_node
10601 check_nodes = [self.target_node, self.other_node]
10603 elif self.mode == constants.REPLACE_DISK_CHG:
10604 self.new_node = remote_node
10605 self.other_node = instance.primary_node
10606 self.target_node = secondary_node
10607 check_nodes = [self.new_node, self.other_node]
10609 _CheckNodeNotDrained(self.lu, remote_node)
10610 _CheckNodeVmCapable(self.lu, remote_node)
10612 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10613 assert old_node_info is not None
10614 if old_node_info.offline and not self.early_release:
10615 # doesn't make sense to delay the release
10616 self.early_release = True
10617 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10618 " early-release mode", secondary_node)
10621 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10624 # If not specified all disks should be replaced
10626 self.disks = range(len(self.instance.disks))
10628 # TODO: This is ugly, but right now we can't distinguish between internal
10629 # submitted opcode and external one. We should fix that.
10630 if self.remote_node_info:
10631 # We change the node, lets verify it still meets instance policy
10632 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10633 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10635 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10636 ignore=self.ignore_ipolicy)
10638 # TODO: compute disk parameters
10639 primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10640 secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10641 if primary_node_info.group != secondary_node_info.group:
10642 self.lu.LogInfo("The instance primary and secondary nodes are in two"
10643 " different node groups; the disk parameters of the"
10644 " primary node's group will be applied.")
10646 self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10648 for node in check_nodes:
10649 _CheckNodeOnline(self.lu, node)
10651 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10654 if node_name is not None)
10656 # Release unneeded node and node resource locks
10657 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10658 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10660 # Release any owned node group
10661 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10662 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10664 # Check whether disks are valid
10665 for disk_idx in self.disks:
10666 instance.FindDisk(disk_idx)
10668 # Get secondary node IP addresses
10669 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10670 in self.cfg.GetMultiNodeInfo(touched_nodes))
10672 def Exec(self, feedback_fn):
10673 """Execute disk replacement.
10675 This dispatches the disk replacement to the appropriate handler.
10678 if self.delay_iallocator:
10679 self._CheckPrereq2()
10682 # Verify owned locks before starting operation
10683 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10684 assert set(owned_nodes) == set(self.node_secondary_ip), \
10685 ("Incorrect node locks, owning %s, expected %s" %
10686 (owned_nodes, self.node_secondary_ip.keys()))
10687 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10688 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10690 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10691 assert list(owned_instances) == [self.instance_name], \
10692 "Instance '%s' not locked" % self.instance_name
10694 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10695 "Should not own any node group lock at this point"
10698 feedback_fn("No disks need replacement")
10701 feedback_fn("Replacing disk(s) %s for %s" %
10702 (utils.CommaJoin(self.disks), self.instance.name))
10704 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10706 # Activate the instance disks if we're replacing them on a down instance
10708 _StartInstanceDisks(self.lu, self.instance, True)
10711 # Should we replace the secondary node?
10712 if self.new_node is not None:
10713 fn = self._ExecDrbd8Secondary
10715 fn = self._ExecDrbd8DiskOnly
10717 result = fn(feedback_fn)
10719 # Deactivate the instance disks if we're replacing them on a
10722 _SafeShutdownInstanceDisks(self.lu, self.instance)
10724 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10727 # Verify owned locks
10728 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10729 nodes = frozenset(self.node_secondary_ip)
10730 assert ((self.early_release and not owned_nodes) or
10731 (not self.early_release and not (set(owned_nodes) - nodes))), \
10732 ("Not owning the correct locks, early_release=%s, owned=%r,"
10733 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10737 def _CheckVolumeGroup(self, nodes):
10738 self.lu.LogInfo("Checking volume groups")
10740 vgname = self.cfg.GetVGName()
10742 # Make sure volume group exists on all involved nodes
10743 results = self.rpc.call_vg_list(nodes)
10745 raise errors.OpExecError("Can't list volume groups on the nodes")
10748 res = results[node]
10749 res.Raise("Error checking node %s" % node)
10750 if vgname not in res.payload:
10751 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10754 def _CheckDisksExistence(self, nodes):
10755 # Check disk existence
10756 for idx, dev in enumerate(self.instance.disks):
10757 if idx not in self.disks:
10761 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10762 self.cfg.SetDiskID(dev, node)
10764 result = self.rpc.call_blockdev_find(node, dev)
10766 msg = result.fail_msg
10767 if msg or not result.payload:
10769 msg = "disk not found"
10770 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10773 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10774 for idx, dev in enumerate(self.instance.disks):
10775 if idx not in self.disks:
10778 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10781 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10783 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10784 " replace disks for instance %s" %
10785 (node_name, self.instance.name))
10787 def _CreateNewStorage(self, node_name):
10788 """Create new storage on the primary or secondary node.
10790 This is only used for same-node replaces, not for changing the
10791 secondary node, hence we don't want to modify the existing disk.
10796 for idx, dev in enumerate(self.instance.disks):
10797 if idx not in self.disks:
10800 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10802 self.cfg.SetDiskID(dev, node_name)
10804 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10805 names = _GenerateUniqueNames(self.lu, lv_names)
10807 _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10809 vg_data = dev.children[0].logical_id[0]
10810 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10811 logical_id=(vg_data, names[0]), params=data_p)
10812 vg_meta = dev.children[1].logical_id[0]
10813 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10814 logical_id=(vg_meta, names[1]), params=meta_p)
10816 new_lvs = [lv_data, lv_meta]
10817 old_lvs = [child.Copy() for child in dev.children]
10818 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10820 # we pass force_create=True to force the LVM creation
10821 for new_lv in new_lvs:
10822 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10823 _GetInstanceInfoText(self.instance), False)
10827 def _CheckDevices(self, node_name, iv_names):
10828 for name, (dev, _, _) in iv_names.iteritems():
10829 self.cfg.SetDiskID(dev, node_name)
10831 result = self.rpc.call_blockdev_find(node_name, dev)
10833 msg = result.fail_msg
10834 if msg or not result.payload:
10836 msg = "disk not found"
10837 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10840 if result.payload.is_degraded:
10841 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10843 def _RemoveOldStorage(self, node_name, iv_names):
10844 for name, (_, old_lvs, _) in iv_names.iteritems():
10845 self.lu.LogInfo("Remove logical volumes for %s" % name)
10848 self.cfg.SetDiskID(lv, node_name)
10850 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10852 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10853 hint="remove unused LVs manually")
10855 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10856 """Replace a disk on the primary or secondary for DRBD 8.
10858 The algorithm for replace is quite complicated:
10860 1. for each disk to be replaced:
10862 1. create new LVs on the target node with unique names
10863 1. detach old LVs from the drbd device
10864 1. rename old LVs to name_replaced.<time_t>
10865 1. rename new LVs to old LVs
10866 1. attach the new LVs (with the old names now) to the drbd device
10868 1. wait for sync across all devices
10870 1. for each modified disk:
10872 1. remove old LVs (which have the name name_replaces.<time_t>)
10874 Failures are not very well handled.
10879 # Step: check device activation
10880 self.lu.LogStep(1, steps_total, "Check device existence")
10881 self._CheckDisksExistence([self.other_node, self.target_node])
10882 self._CheckVolumeGroup([self.target_node, self.other_node])
10884 # Step: check other node consistency
10885 self.lu.LogStep(2, steps_total, "Check peer consistency")
10886 self._CheckDisksConsistency(self.other_node,
10887 self.other_node == self.instance.primary_node,
10890 # Step: create new storage
10891 self.lu.LogStep(3, steps_total, "Allocate new storage")
10892 iv_names = self._CreateNewStorage(self.target_node)
10894 # Step: for each lv, detach+rename*2+attach
10895 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10896 for dev, old_lvs, new_lvs in iv_names.itervalues():
10897 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10899 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10901 result.Raise("Can't detach drbd from local storage on node"
10902 " %s for device %s" % (self.target_node, dev.iv_name))
10904 #cfg.Update(instance)
10906 # ok, we created the new LVs, so now we know we have the needed
10907 # storage; as such, we proceed on the target node to rename
10908 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10909 # using the assumption that logical_id == physical_id (which in
10910 # turn is the unique_id on that node)
10912 # FIXME(iustin): use a better name for the replaced LVs
10913 temp_suffix = int(time.time())
10914 ren_fn = lambda d, suff: (d.physical_id[0],
10915 d.physical_id[1] + "_replaced-%s" % suff)
10917 # Build the rename list based on what LVs exist on the node
10918 rename_old_to_new = []
10919 for to_ren in old_lvs:
10920 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10921 if not result.fail_msg and result.payload:
10923 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10925 self.lu.LogInfo("Renaming the old LVs on the target node")
10926 result = self.rpc.call_blockdev_rename(self.target_node,
10928 result.Raise("Can't rename old LVs on node %s" % self.target_node)
10930 # Now we rename the new LVs to the old LVs
10931 self.lu.LogInfo("Renaming the new LVs on the target node")
10932 rename_new_to_old = [(new, old.physical_id)
10933 for old, new in zip(old_lvs, new_lvs)]
10934 result = self.rpc.call_blockdev_rename(self.target_node,
10936 result.Raise("Can't rename new LVs on node %s" % self.target_node)
10938 # Intermediate steps of in memory modifications
10939 for old, new in zip(old_lvs, new_lvs):
10940 new.logical_id = old.logical_id
10941 self.cfg.SetDiskID(new, self.target_node)
10943 # We need to modify old_lvs so that removal later removes the
10944 # right LVs, not the newly added ones; note that old_lvs is a
10946 for disk in old_lvs:
10947 disk.logical_id = ren_fn(disk, temp_suffix)
10948 self.cfg.SetDiskID(disk, self.target_node)
10950 # Now that the new lvs have the old name, we can add them to the device
10951 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10952 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10954 msg = result.fail_msg
10956 for new_lv in new_lvs:
10957 msg2 = self.rpc.call_blockdev_remove(self.target_node,
10960 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10961 hint=("cleanup manually the unused logical"
10963 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10965 cstep = itertools.count(5)
10967 if self.early_release:
10968 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10969 self._RemoveOldStorage(self.target_node, iv_names)
10970 # TODO: Check if releasing locks early still makes sense
10971 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10973 # Release all resource locks except those used by the instance
10974 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10975 keep=self.node_secondary_ip.keys())
10977 # Release all node locks while waiting for sync
10978 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10980 # TODO: Can the instance lock be downgraded here? Take the optional disk
10981 # shutdown in the caller into consideration.
10984 # This can fail as the old devices are degraded and _WaitForSync
10985 # does a combined result over all disks, so we don't check its return value
10986 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10987 _WaitForSync(self.lu, self.instance)
10989 # Check all devices manually
10990 self._CheckDevices(self.instance.primary_node, iv_names)
10992 # Step: remove old storage
10993 if not self.early_release:
10994 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10995 self._RemoveOldStorage(self.target_node, iv_names)
10997 def _ExecDrbd8Secondary(self, feedback_fn):
10998 """Replace the secondary node for DRBD 8.
11000 The algorithm for replace is quite complicated:
11001 - for all disks of the instance:
11002 - create new LVs on the new node with same names
11003 - shutdown the drbd device on the old secondary
11004 - disconnect the drbd network on the primary
11005 - create the drbd device on the new secondary
11006 - network attach the drbd on the primary, using an artifice:
11007 the drbd code for Attach() will connect to the network if it
11008 finds a device which is connected to the good local disks but
11009 not network enabled
11010 - wait for sync across all devices
11011 - remove all disks from the old secondary
11013 Failures are not very well handled.
11018 pnode = self.instance.primary_node
11020 # Step: check device activation
11021 self.lu.LogStep(1, steps_total, "Check device existence")
11022 self._CheckDisksExistence([self.instance.primary_node])
11023 self._CheckVolumeGroup([self.instance.primary_node])
11025 # Step: check other node consistency
11026 self.lu.LogStep(2, steps_total, "Check peer consistency")
11027 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11029 # Step: create new storage
11030 self.lu.LogStep(3, steps_total, "Allocate new storage")
11031 for idx, dev in enumerate(self.instance.disks):
11032 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11033 (self.new_node, idx))
11034 # we pass force_create=True to force LVM creation
11035 for new_lv in dev.children:
11036 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
11037 _GetInstanceInfoText(self.instance), False)
11039 # Step 4: dbrd minors and drbd setups changes
11040 # after this, we must manually remove the drbd minors on both the
11041 # error and the success paths
11042 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11043 minors = self.cfg.AllocateDRBDMinor([self.new_node
11044 for dev in self.instance.disks],
11045 self.instance.name)
11046 logging.debug("Allocated minors %r", minors)
11049 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11050 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11051 (self.new_node, idx))
11052 # create new devices on new_node; note that we create two IDs:
11053 # one without port, so the drbd will be activated without
11054 # networking information on the new node at this stage, and one
11055 # with network, for the latter activation in step 4
11056 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11057 if self.instance.primary_node == o_node1:
11060 assert self.instance.primary_node == o_node2, "Three-node instance?"
11063 new_alone_id = (self.instance.primary_node, self.new_node, None,
11064 p_minor, new_minor, o_secret)
11065 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11066 p_minor, new_minor, o_secret)
11068 iv_names[idx] = (dev, dev.children, new_net_id)
11069 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11071 drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
11072 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11073 logical_id=new_alone_id,
11074 children=dev.children,
11076 params=drbd_params)
11078 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
11079 _GetInstanceInfoText(self.instance), False)
11080 except errors.GenericError:
11081 self.cfg.ReleaseDRBDMinors(self.instance.name)
11084 # We have new devices, shutdown the drbd on the old secondary
11085 for idx, dev in enumerate(self.instance.disks):
11086 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11087 self.cfg.SetDiskID(dev, self.target_node)
11088 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
11090 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11091 "node: %s" % (idx, msg),
11092 hint=("Please cleanup this device manually as"
11093 " soon as possible"))
11095 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11096 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11097 self.instance.disks)[pnode]
11099 msg = result.fail_msg
11101 # detaches didn't succeed (unlikely)
11102 self.cfg.ReleaseDRBDMinors(self.instance.name)
11103 raise errors.OpExecError("Can't detach the disks from the network on"
11104 " old node: %s" % (msg,))
11106 # if we managed to detach at least one, we update all the disks of
11107 # the instance to point to the new secondary
11108 self.lu.LogInfo("Updating instance configuration")
11109 for dev, _, new_logical_id in iv_names.itervalues():
11110 dev.logical_id = new_logical_id
11111 self.cfg.SetDiskID(dev, self.instance.primary_node)
11113 self.cfg.Update(self.instance, feedback_fn)
11115 # Release all node locks (the configuration has been updated)
11116 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11118 # and now perform the drbd attach
11119 self.lu.LogInfo("Attaching primary drbds to new secondary"
11120 " (standalone => connected)")
11121 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11123 self.node_secondary_ip,
11124 self.instance.disks,
11125 self.instance.name,
11127 for to_node, to_result in result.items():
11128 msg = to_result.fail_msg
11130 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11132 hint=("please do a gnt-instance info to see the"
11133 " status of disks"))
11135 cstep = itertools.count(5)
11137 if self.early_release:
11138 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11139 self._RemoveOldStorage(self.target_node, iv_names)
11140 # TODO: Check if releasing locks early still makes sense
11141 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11143 # Release all resource locks except those used by the instance
11144 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11145 keep=self.node_secondary_ip.keys())
11147 # TODO: Can the instance lock be downgraded here? Take the optional disk
11148 # shutdown in the caller into consideration.
11151 # This can fail as the old devices are degraded and _WaitForSync
11152 # does a combined result over all disks, so we don't check its return value
11153 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11154 _WaitForSync(self.lu, self.instance)
11156 # Check all devices manually
11157 self._CheckDevices(self.instance.primary_node, iv_names)
11159 # Step: remove old storage
11160 if not self.early_release:
11161 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11162 self._RemoveOldStorage(self.target_node, iv_names)
11165 class LURepairNodeStorage(NoHooksLU):
11166 """Repairs the volume group on a node.
11171 def CheckArguments(self):
11172 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11174 storage_type = self.op.storage_type
11176 if (constants.SO_FIX_CONSISTENCY not in
11177 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11178 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11179 " repaired" % storage_type,
11180 errors.ECODE_INVAL)
11182 def ExpandNames(self):
11183 self.needed_locks = {
11184 locking.LEVEL_NODE: [self.op.node_name],
11187 def _CheckFaultyDisks(self, instance, node_name):
11188 """Ensure faulty disks abort the opcode or at least warn."""
11190 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11192 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11193 " node '%s'" % (instance.name, node_name),
11194 errors.ECODE_STATE)
11195 except errors.OpPrereqError, err:
11196 if self.op.ignore_consistency:
11197 self.proc.LogWarning(str(err.args[0]))
11201 def CheckPrereq(self):
11202 """Check prerequisites.
11205 # Check whether any instance on this node has faulty disks
11206 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11207 if inst.admin_state != constants.ADMINST_UP:
11209 check_nodes = set(inst.all_nodes)
11210 check_nodes.discard(self.op.node_name)
11211 for inst_node_name in check_nodes:
11212 self._CheckFaultyDisks(inst, inst_node_name)
11214 def Exec(self, feedback_fn):
11215 feedback_fn("Repairing storage unit '%s' on %s ..." %
11216 (self.op.name, self.op.node_name))
11218 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11219 result = self.rpc.call_storage_execute(self.op.node_name,
11220 self.op.storage_type, st_args,
11222 constants.SO_FIX_CONSISTENCY)
11223 result.Raise("Failed to repair storage unit '%s' on %s" %
11224 (self.op.name, self.op.node_name))
11227 class LUNodeEvacuate(NoHooksLU):
11228 """Evacuates instances off a list of nodes.
11233 _MODE2IALLOCATOR = {
11234 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11235 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11236 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11238 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11239 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11240 constants.IALLOCATOR_NEVAC_MODES)
11242 def CheckArguments(self):
11243 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11245 def ExpandNames(self):
11246 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11248 if self.op.remote_node is not None:
11249 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11250 assert self.op.remote_node
11252 if self.op.remote_node == self.op.node_name:
11253 raise errors.OpPrereqError("Can not use evacuated node as a new"
11254 " secondary node", errors.ECODE_INVAL)
11256 if self.op.mode != constants.NODE_EVAC_SEC:
11257 raise errors.OpPrereqError("Without the use of an iallocator only"
11258 " secondary instances can be evacuated",
11259 errors.ECODE_INVAL)
11262 self.share_locks = _ShareAll()
11263 self.needed_locks = {
11264 locking.LEVEL_INSTANCE: [],
11265 locking.LEVEL_NODEGROUP: [],
11266 locking.LEVEL_NODE: [],
11269 # Determine nodes (via group) optimistically, needs verification once locks
11270 # have been acquired
11271 self.lock_nodes = self._DetermineNodes()
11273 def _DetermineNodes(self):
11274 """Gets the list of nodes to operate on.
11277 if self.op.remote_node is None:
11278 # Iallocator will choose any node(s) in the same group
11279 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11281 group_nodes = frozenset([self.op.remote_node])
11283 # Determine nodes to be locked
11284 return set([self.op.node_name]) | group_nodes
11286 def _DetermineInstances(self):
11287 """Builds list of instances to operate on.
11290 assert self.op.mode in constants.NODE_EVAC_MODES
11292 if self.op.mode == constants.NODE_EVAC_PRI:
11293 # Primary instances only
11294 inst_fn = _GetNodePrimaryInstances
11295 assert self.op.remote_node is None, \
11296 "Evacuating primary instances requires iallocator"
11297 elif self.op.mode == constants.NODE_EVAC_SEC:
11298 # Secondary instances only
11299 inst_fn = _GetNodeSecondaryInstances
11302 assert self.op.mode == constants.NODE_EVAC_ALL
11303 inst_fn = _GetNodeInstances
11304 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11306 raise errors.OpPrereqError("Due to an issue with the iallocator"
11307 " interface it is not possible to evacuate"
11308 " all instances at once; specify explicitly"
11309 " whether to evacuate primary or secondary"
11311 errors.ECODE_INVAL)
11313 return inst_fn(self.cfg, self.op.node_name)
11315 def DeclareLocks(self, level):
11316 if level == locking.LEVEL_INSTANCE:
11317 # Lock instances optimistically, needs verification once node and group
11318 # locks have been acquired
11319 self.needed_locks[locking.LEVEL_INSTANCE] = \
11320 set(i.name for i in self._DetermineInstances())
11322 elif level == locking.LEVEL_NODEGROUP:
11323 # Lock node groups for all potential target nodes optimistically, needs
11324 # verification once nodes have been acquired
11325 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11326 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11328 elif level == locking.LEVEL_NODE:
11329 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11331 def CheckPrereq(self):
11333 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11334 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11335 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11337 need_nodes = self._DetermineNodes()
11339 if not owned_nodes.issuperset(need_nodes):
11340 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11341 " locks were acquired, current nodes are"
11342 " are '%s', used to be '%s'; retry the"
11344 (self.op.node_name,
11345 utils.CommaJoin(need_nodes),
11346 utils.CommaJoin(owned_nodes)),
11347 errors.ECODE_STATE)
11349 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11350 if owned_groups != wanted_groups:
11351 raise errors.OpExecError("Node groups changed since locks were acquired,"
11352 " current groups are '%s', used to be '%s';"
11353 " retry the operation" %
11354 (utils.CommaJoin(wanted_groups),
11355 utils.CommaJoin(owned_groups)))
11357 # Determine affected instances
11358 self.instances = self._DetermineInstances()
11359 self.instance_names = [i.name for i in self.instances]
11361 if set(self.instance_names) != owned_instances:
11362 raise errors.OpExecError("Instances on node '%s' changed since locks"
11363 " were acquired, current instances are '%s',"
11364 " used to be '%s'; retry the operation" %
11365 (self.op.node_name,
11366 utils.CommaJoin(self.instance_names),
11367 utils.CommaJoin(owned_instances)))
11369 if self.instance_names:
11370 self.LogInfo("Evacuating instances from node '%s': %s",
11372 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11374 self.LogInfo("No instances to evacuate from node '%s'",
11377 if self.op.remote_node is not None:
11378 for i in self.instances:
11379 if i.primary_node == self.op.remote_node:
11380 raise errors.OpPrereqError("Node %s is the primary node of"
11381 " instance %s, cannot use it as"
11383 (self.op.remote_node, i.name),
11384 errors.ECODE_INVAL)
11386 def Exec(self, feedback_fn):
11387 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11389 if not self.instance_names:
11390 # No instances to evacuate
11393 elif self.op.iallocator is not None:
11394 # TODO: Implement relocation to other group
11395 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11396 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11397 instances=list(self.instance_names))
11399 ial.Run(self.op.iallocator)
11401 if not ial.success:
11402 raise errors.OpPrereqError("Can't compute node evacuation using"
11403 " iallocator '%s': %s" %
11404 (self.op.iallocator, ial.info),
11405 errors.ECODE_NORES)
11407 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11409 elif self.op.remote_node is not None:
11410 assert self.op.mode == constants.NODE_EVAC_SEC
11412 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11413 remote_node=self.op.remote_node,
11415 mode=constants.REPLACE_DISK_CHG,
11416 early_release=self.op.early_release)]
11417 for instance_name in self.instance_names
11421 raise errors.ProgrammerError("No iallocator or remote node")
11423 return ResultWithJobs(jobs)
11426 def _SetOpEarlyRelease(early_release, op):
11427 """Sets C{early_release} flag on opcodes if available.
11431 op.early_release = early_release
11432 except AttributeError:
11433 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11438 def _NodeEvacDest(use_nodes, group, nodes):
11439 """Returns group or nodes depending on caller's choice.
11443 return utils.CommaJoin(nodes)
11448 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11449 """Unpacks the result of change-group and node-evacuate iallocator requests.
11451 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11452 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11454 @type lu: L{LogicalUnit}
11455 @param lu: Logical unit instance
11456 @type alloc_result: tuple/list
11457 @param alloc_result: Result from iallocator
11458 @type early_release: bool
11459 @param early_release: Whether to release locks early if possible
11460 @type use_nodes: bool
11461 @param use_nodes: Whether to display node names instead of groups
11464 (moved, failed, jobs) = alloc_result
11467 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11468 for (name, reason) in failed)
11469 lu.LogWarning("Unable to evacuate instances %s", failreason)
11470 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11473 lu.LogInfo("Instances to be moved: %s",
11474 utils.CommaJoin("%s (to %s)" %
11475 (name, _NodeEvacDest(use_nodes, group, nodes))
11476 for (name, group, nodes) in moved))
11478 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11479 map(opcodes.OpCode.LoadOpCode, ops))
11483 class LUInstanceGrowDisk(LogicalUnit):
11484 """Grow a disk of an instance.
11487 HPATH = "disk-grow"
11488 HTYPE = constants.HTYPE_INSTANCE
11491 def ExpandNames(self):
11492 self._ExpandAndLockInstance()
11493 self.needed_locks[locking.LEVEL_NODE] = []
11494 self.needed_locks[locking.LEVEL_NODE_RES] = []
11495 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11496 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11498 def DeclareLocks(self, level):
11499 if level == locking.LEVEL_NODE:
11500 self._LockInstancesNodes()
11501 elif level == locking.LEVEL_NODE_RES:
11503 self.needed_locks[locking.LEVEL_NODE_RES] = \
11504 self.needed_locks[locking.LEVEL_NODE][:]
11506 def BuildHooksEnv(self):
11507 """Build hooks env.
11509 This runs on the master, the primary and all the secondaries.
11513 "DISK": self.op.disk,
11514 "AMOUNT": self.op.amount,
11516 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11519 def BuildHooksNodes(self):
11520 """Build hooks nodes.
11523 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11526 def CheckPrereq(self):
11527 """Check prerequisites.
11529 This checks that the instance is in the cluster.
11532 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11533 assert instance is not None, \
11534 "Cannot retrieve locked instance %s" % self.op.instance_name
11535 nodenames = list(instance.all_nodes)
11536 for node in nodenames:
11537 _CheckNodeOnline(self, node)
11539 self.instance = instance
11541 if instance.disk_template not in constants.DTS_GROWABLE:
11542 raise errors.OpPrereqError("Instance's disk layout does not support"
11543 " growing", errors.ECODE_INVAL)
11545 self.disk = instance.FindDisk(self.op.disk)
11547 if instance.disk_template not in (constants.DT_FILE,
11548 constants.DT_SHARED_FILE,
11550 # TODO: check the free disk space for file, when that feature will be
11552 _CheckNodesFreeDiskPerVG(self, nodenames,
11553 self.disk.ComputeGrowth(self.op.amount))
11555 def Exec(self, feedback_fn):
11556 """Execute disk grow.
11559 instance = self.instance
11562 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11563 assert (self.owned_locks(locking.LEVEL_NODE) ==
11564 self.owned_locks(locking.LEVEL_NODE_RES))
11566 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11568 raise errors.OpExecError("Cannot activate block device to grow")
11570 feedback_fn("Growing disk %s of instance '%s' by %s" %
11571 (self.op.disk, instance.name,
11572 utils.FormatUnit(self.op.amount, "h")))
11574 # First run all grow ops in dry-run mode
11575 for node in instance.all_nodes:
11576 self.cfg.SetDiskID(disk, node)
11577 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11578 result.Raise("Grow request failed to node %s" % node)
11580 # We know that (as far as we can test) operations across different
11581 # nodes will succeed, time to run it for real
11582 for node in instance.all_nodes:
11583 self.cfg.SetDiskID(disk, node)
11584 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11585 result.Raise("Grow request failed to node %s" % node)
11587 # TODO: Rewrite code to work properly
11588 # DRBD goes into sync mode for a short amount of time after executing the
11589 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11590 # calling "resize" in sync mode fails. Sleeping for a short amount of
11591 # time is a work-around.
11594 disk.RecordGrow(self.op.amount)
11595 self.cfg.Update(instance, feedback_fn)
11597 # Changes have been recorded, release node lock
11598 _ReleaseLocks(self, locking.LEVEL_NODE)
11600 # Downgrade lock while waiting for sync
11601 self.glm.downgrade(locking.LEVEL_INSTANCE)
11603 if self.op.wait_for_sync:
11604 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11606 self.proc.LogWarning("Disk sync-ing has not returned a good"
11607 " status; please check the instance")
11608 if instance.admin_state != constants.ADMINST_UP:
11609 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11610 elif instance.admin_state != constants.ADMINST_UP:
11611 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11612 " not supposed to be running because no wait for"
11613 " sync mode was requested")
11615 assert self.owned_locks(locking.LEVEL_NODE_RES)
11616 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11619 class LUInstanceQueryData(NoHooksLU):
11620 """Query runtime instance data.
11625 def ExpandNames(self):
11626 self.needed_locks = {}
11628 # Use locking if requested or when non-static information is wanted
11629 if not (self.op.static or self.op.use_locking):
11630 self.LogWarning("Non-static data requested, locks need to be acquired")
11631 self.op.use_locking = True
11633 if self.op.instances or not self.op.use_locking:
11634 # Expand instance names right here
11635 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11637 # Will use acquired locks
11638 self.wanted_names = None
11640 if self.op.use_locking:
11641 self.share_locks = _ShareAll()
11643 if self.wanted_names is None:
11644 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11646 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11648 self.needed_locks[locking.LEVEL_NODE] = []
11649 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11651 def DeclareLocks(self, level):
11652 if self.op.use_locking and level == locking.LEVEL_NODE:
11653 self._LockInstancesNodes()
11655 def CheckPrereq(self):
11656 """Check prerequisites.
11658 This only checks the optional instance list against the existing names.
11661 if self.wanted_names is None:
11662 assert self.op.use_locking, "Locking was not used"
11663 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11665 self.wanted_instances = \
11666 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11668 def _ComputeBlockdevStatus(self, node, instance_name, dev):
11669 """Returns the status of a block device
11672 if self.op.static or not node:
11675 self.cfg.SetDiskID(dev, node)
11677 result = self.rpc.call_blockdev_find(node, dev)
11681 result.Raise("Can't compute disk status for %s" % instance_name)
11683 status = result.payload
11687 return (status.dev_path, status.major, status.minor,
11688 status.sync_percent, status.estimated_time,
11689 status.is_degraded, status.ldisk_status)
11691 def _ComputeDiskStatus(self, instance, snode, dev):
11692 """Compute block device status.
11695 if dev.dev_type in constants.LDS_DRBD:
11696 # we change the snode then (otherwise we use the one passed in)
11697 if dev.logical_id[0] == instance.primary_node:
11698 snode = dev.logical_id[1]
11700 snode = dev.logical_id[0]
11702 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11703 instance.name, dev)
11704 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11707 dev_children = map(compat.partial(self._ComputeDiskStatus,
11714 "iv_name": dev.iv_name,
11715 "dev_type": dev.dev_type,
11716 "logical_id": dev.logical_id,
11717 "physical_id": dev.physical_id,
11718 "pstatus": dev_pstatus,
11719 "sstatus": dev_sstatus,
11720 "children": dev_children,
11725 def Exec(self, feedback_fn):
11726 """Gather and return data"""
11729 cluster = self.cfg.GetClusterInfo()
11731 pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11732 for i in self.wanted_instances)
11733 for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11734 if self.op.static or pnode.offline:
11735 remote_state = None
11737 self.LogWarning("Primary node %s is marked offline, returning static"
11738 " information only for instance %s" %
11739 (pnode.name, instance.name))
11741 remote_info = self.rpc.call_instance_info(instance.primary_node,
11743 instance.hypervisor)
11744 remote_info.Raise("Error checking node %s" % instance.primary_node)
11745 remote_info = remote_info.payload
11746 if remote_info and "state" in remote_info:
11747 remote_state = "up"
11749 if instance.admin_state == constants.ADMINST_UP:
11750 remote_state = "down"
11752 remote_state = instance.admin_state
11754 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11757 result[instance.name] = {
11758 "name": instance.name,
11759 "config_state": instance.admin_state,
11760 "run_state": remote_state,
11761 "pnode": instance.primary_node,
11762 "snodes": instance.secondary_nodes,
11764 # this happens to be the same format used for hooks
11765 "nics": _NICListToTuple(self, instance.nics),
11766 "disk_template": instance.disk_template,
11768 "hypervisor": instance.hypervisor,
11769 "network_port": instance.network_port,
11770 "hv_instance": instance.hvparams,
11771 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11772 "be_instance": instance.beparams,
11773 "be_actual": cluster.FillBE(instance),
11774 "os_instance": instance.osparams,
11775 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11776 "serial_no": instance.serial_no,
11777 "mtime": instance.mtime,
11778 "ctime": instance.ctime,
11779 "uuid": instance.uuid,
11785 class LUInstanceSetParams(LogicalUnit):
11786 """Modifies an instances's parameters.
11789 HPATH = "instance-modify"
11790 HTYPE = constants.HTYPE_INSTANCE
11793 def CheckArguments(self):
11794 if not (self.op.nics or self.op.disks or self.op.disk_template or
11795 self.op.hvparams or self.op.beparams or self.op.os_name or
11796 self.op.online_inst or self.op.offline_inst or
11797 self.op.runtime_mem):
11798 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
11800 if self.op.hvparams:
11801 _CheckGlobalHvParams(self.op.hvparams)
11805 for disk_op, disk_dict in self.op.disks:
11806 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
11807 if disk_op == constants.DDM_REMOVE:
11808 disk_addremove += 1
11810 elif disk_op == constants.DDM_ADD:
11811 disk_addremove += 1
11813 if not isinstance(disk_op, int):
11814 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11815 if not isinstance(disk_dict, dict):
11816 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11817 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11819 if disk_op == constants.DDM_ADD:
11820 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11821 if mode not in constants.DISK_ACCESS_SET:
11822 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11823 errors.ECODE_INVAL)
11824 size = disk_dict.get(constants.IDISK_SIZE, None)
11826 raise errors.OpPrereqError("Required disk parameter size missing",
11827 errors.ECODE_INVAL)
11830 except (TypeError, ValueError), err:
11831 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11832 str(err), errors.ECODE_INVAL)
11833 disk_dict[constants.IDISK_SIZE] = size
11835 # modification of disk
11836 if constants.IDISK_SIZE in disk_dict:
11837 raise errors.OpPrereqError("Disk size change not possible, use"
11838 " grow-disk", errors.ECODE_INVAL)
11840 if disk_addremove > 1:
11841 raise errors.OpPrereqError("Only one disk add or remove operation"
11842 " supported at a time", errors.ECODE_INVAL)
11844 if self.op.disks and self.op.disk_template is not None:
11845 raise errors.OpPrereqError("Disk template conversion and other disk"
11846 " changes not supported at the same time",
11847 errors.ECODE_INVAL)
11849 if (self.op.disk_template and
11850 self.op.disk_template in constants.DTS_INT_MIRROR and
11851 self.op.remote_node is None):
11852 raise errors.OpPrereqError("Changing the disk template to a mirrored"
11853 " one requires specifying a secondary node",
11854 errors.ECODE_INVAL)
11858 for nic_op, nic_dict in self.op.nics:
11859 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11860 if nic_op == constants.DDM_REMOVE:
11863 elif nic_op == constants.DDM_ADD:
11866 if not isinstance(nic_op, int):
11867 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11868 if not isinstance(nic_dict, dict):
11869 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11870 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11872 # nic_dict should be a dict
11873 nic_ip = nic_dict.get(constants.INIC_IP, None)
11874 if nic_ip is not None:
11875 if nic_ip.lower() == constants.VALUE_NONE:
11876 nic_dict[constants.INIC_IP] = None
11878 if not netutils.IPAddress.IsValid(nic_ip):
11879 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11880 errors.ECODE_INVAL)
11882 nic_bridge = nic_dict.get("bridge", None)
11883 nic_link = nic_dict.get(constants.INIC_LINK, None)
11884 if nic_bridge and nic_link:
11885 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11886 " at the same time", errors.ECODE_INVAL)
11887 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11888 nic_dict["bridge"] = None
11889 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11890 nic_dict[constants.INIC_LINK] = None
11892 if nic_op == constants.DDM_ADD:
11893 nic_mac = nic_dict.get(constants.INIC_MAC, None)
11894 if nic_mac is None:
11895 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11897 if constants.INIC_MAC in nic_dict:
11898 nic_mac = nic_dict[constants.INIC_MAC]
11899 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11900 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11902 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11903 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11904 " modifying an existing nic",
11905 errors.ECODE_INVAL)
11907 if nic_addremove > 1:
11908 raise errors.OpPrereqError("Only one NIC add or remove operation"
11909 " supported at a time", errors.ECODE_INVAL)
11911 def ExpandNames(self):
11912 self._ExpandAndLockInstance()
11913 # Can't even acquire node locks in shared mode as upcoming changes in
11914 # Ganeti 2.6 will start to modify the node object on disk conversion
11915 self.needed_locks[locking.LEVEL_NODE] = []
11916 self.needed_locks[locking.LEVEL_NODE_RES] = []
11917 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11919 def DeclareLocks(self, level):
11920 if level == locking.LEVEL_NODE:
11921 self._LockInstancesNodes()
11922 if self.op.disk_template and self.op.remote_node:
11923 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11924 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11925 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11927 self.needed_locks[locking.LEVEL_NODE_RES] = \
11928 self.needed_locks[locking.LEVEL_NODE][:]
11930 def BuildHooksEnv(self):
11931 """Build hooks env.
11933 This runs on the master, primary and secondaries.
11937 if constants.BE_MINMEM in self.be_new:
11938 args["minmem"] = self.be_new[constants.BE_MINMEM]
11939 if constants.BE_MAXMEM in self.be_new:
11940 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
11941 if constants.BE_VCPUS in self.be_new:
11942 args["vcpus"] = self.be_new[constants.BE_VCPUS]
11943 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11944 # information at all.
11947 nic_override = dict(self.op.nics)
11948 for idx, nic in enumerate(self.instance.nics):
11949 if idx in nic_override:
11950 this_nic_override = nic_override[idx]
11952 this_nic_override = {}
11953 if constants.INIC_IP in this_nic_override:
11954 ip = this_nic_override[constants.INIC_IP]
11957 if constants.INIC_MAC in this_nic_override:
11958 mac = this_nic_override[constants.INIC_MAC]
11961 if idx in self.nic_pnew:
11962 nicparams = self.nic_pnew[idx]
11964 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11965 mode = nicparams[constants.NIC_MODE]
11966 link = nicparams[constants.NIC_LINK]
11967 args["nics"].append((ip, mac, mode, link))
11968 if constants.DDM_ADD in nic_override:
11969 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11970 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11971 nicparams = self.nic_pnew[constants.DDM_ADD]
11972 mode = nicparams[constants.NIC_MODE]
11973 link = nicparams[constants.NIC_LINK]
11974 args["nics"].append((ip, mac, mode, link))
11975 elif constants.DDM_REMOVE in nic_override:
11976 del args["nics"][-1]
11978 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11979 if self.op.disk_template:
11980 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11981 if self.op.runtime_mem:
11982 env["RUNTIME_MEMORY"] = self.op.runtime_mem
11986 def BuildHooksNodes(self):
11987 """Build hooks nodes.
11990 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11993 def CheckPrereq(self):
11994 """Check prerequisites.
11996 This only checks the instance list against the existing names.
11999 # checking the new params on the primary/secondary nodes
12001 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12002 cluster = self.cluster = self.cfg.GetClusterInfo()
12003 assert self.instance is not None, \
12004 "Cannot retrieve locked instance %s" % self.op.instance_name
12005 pnode = instance.primary_node
12006 nodelist = list(instance.all_nodes)
12007 pnode_info = self.cfg.GetNodeInfo(pnode)
12008 self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
12011 if self.op.os_name and not self.op.force:
12012 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12013 self.op.force_variant)
12014 instance_os = self.op.os_name
12016 instance_os = instance.os
12018 if self.op.disk_template:
12019 if instance.disk_template == self.op.disk_template:
12020 raise errors.OpPrereqError("Instance already has disk template %s" %
12021 instance.disk_template, errors.ECODE_INVAL)
12023 if (instance.disk_template,
12024 self.op.disk_template) not in self._DISK_CONVERSIONS:
12025 raise errors.OpPrereqError("Unsupported disk template conversion from"
12026 " %s to %s" % (instance.disk_template,
12027 self.op.disk_template),
12028 errors.ECODE_INVAL)
12029 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12030 msg="cannot change disk template")
12031 if self.op.disk_template in constants.DTS_INT_MIRROR:
12032 if self.op.remote_node == pnode:
12033 raise errors.OpPrereqError("Given new secondary node %s is the same"
12034 " as the primary node of the instance" %
12035 self.op.remote_node, errors.ECODE_STATE)
12036 _CheckNodeOnline(self, self.op.remote_node)
12037 _CheckNodeNotDrained(self, self.op.remote_node)
12038 # FIXME: here we assume that the old instance type is DT_PLAIN
12039 assert instance.disk_template == constants.DT_PLAIN
12040 disks = [{constants.IDISK_SIZE: d.size,
12041 constants.IDISK_VG: d.logical_id[0]}
12042 for d in instance.disks]
12043 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12044 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12046 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12047 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12048 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12049 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12050 ignore=self.op.ignore_ipolicy)
12051 if pnode_info.group != snode_info.group:
12052 self.LogWarning("The primary and secondary nodes are in two"
12053 " different node groups; the disk parameters"
12054 " from the first disk's node group will be"
12057 # hvparams processing
12058 if self.op.hvparams:
12059 hv_type = instance.hypervisor
12060 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12061 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12062 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12065 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12066 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12067 self.hv_proposed = self.hv_new = hv_new # the new actual values
12068 self.hv_inst = i_hvdict # the new dict (without defaults)
12070 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12072 self.hv_new = self.hv_inst = {}
12074 # beparams processing
12075 if self.op.beparams:
12076 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12078 objects.UpgradeBeParams(i_bedict)
12079 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12080 be_new = cluster.SimpleFillBE(i_bedict)
12081 self.be_proposed = self.be_new = be_new # the new actual values
12082 self.be_inst = i_bedict # the new dict (without defaults)
12084 self.be_new = self.be_inst = {}
12085 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12086 be_old = cluster.FillBE(instance)
12088 # CPU param validation -- checking every time a paramtere is
12089 # changed to cover all cases where either CPU mask or vcpus have
12091 if (constants.BE_VCPUS in self.be_proposed and
12092 constants.HV_CPU_MASK in self.hv_proposed):
12094 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12095 # Verify mask is consistent with number of vCPUs. Can skip this
12096 # test if only 1 entry in the CPU mask, which means same mask
12097 # is applied to all vCPUs.
12098 if (len(cpu_list) > 1 and
12099 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12100 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12102 (self.be_proposed[constants.BE_VCPUS],
12103 self.hv_proposed[constants.HV_CPU_MASK]),
12104 errors.ECODE_INVAL)
12106 # Only perform this test if a new CPU mask is given
12107 if constants.HV_CPU_MASK in self.hv_new:
12108 # Calculate the largest CPU number requested
12109 max_requested_cpu = max(map(max, cpu_list))
12110 # Check that all of the instance's nodes have enough physical CPUs to
12111 # satisfy the requested CPU mask
12112 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12113 max_requested_cpu + 1, instance.hypervisor)
12115 # osparams processing
12116 if self.op.osparams:
12117 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12118 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12119 self.os_inst = i_osdict # the new dict (without defaults)
12125 #TODO(dynmem): do the appropriate check involving MINMEM
12126 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12127 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12128 mem_check_list = [pnode]
12129 if be_new[constants.BE_AUTO_BALANCE]:
12130 # either we changed auto_balance to yes or it was from before
12131 mem_check_list.extend(instance.secondary_nodes)
12132 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12133 instance.hypervisor)
12134 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12135 [instance.hypervisor])
12136 pninfo = nodeinfo[pnode]
12137 msg = pninfo.fail_msg
12139 # Assume the primary node is unreachable and go ahead
12140 self.warn.append("Can't get info from primary node %s: %s" %
12143 (_, _, (pnhvinfo, )) = pninfo.payload
12144 if not isinstance(pnhvinfo.get("memory_free", None), int):
12145 self.warn.append("Node data from primary node %s doesn't contain"
12146 " free memory information" % pnode)
12147 elif instance_info.fail_msg:
12148 self.warn.append("Can't get instance runtime information: %s" %
12149 instance_info.fail_msg)
12151 if instance_info.payload:
12152 current_mem = int(instance_info.payload["memory"])
12154 # Assume instance not running
12155 # (there is a slight race condition here, but it's not very
12156 # probable, and we have no other way to check)
12157 # TODO: Describe race condition
12159 #TODO(dynmem): do the appropriate check involving MINMEM
12160 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12161 pnhvinfo["memory_free"])
12163 raise errors.OpPrereqError("This change will prevent the instance"
12164 " from starting, due to %d MB of memory"
12165 " missing on its primary node" %
12167 errors.ECODE_NORES)
12169 if be_new[constants.BE_AUTO_BALANCE]:
12170 for node, nres in nodeinfo.items():
12171 if node not in instance.secondary_nodes:
12173 nres.Raise("Can't get info from secondary node %s" % node,
12174 prereq=True, ecode=errors.ECODE_STATE)
12175 (_, _, (nhvinfo, )) = nres.payload
12176 if not isinstance(nhvinfo.get("memory_free", None), int):
12177 raise errors.OpPrereqError("Secondary node %s didn't return free"
12178 " memory information" % node,
12179 errors.ECODE_STATE)
12180 #TODO(dynmem): do the appropriate check involving MINMEM
12181 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12182 raise errors.OpPrereqError("This change will prevent the instance"
12183 " from failover to its secondary node"
12184 " %s, due to not enough memory" % node,
12185 errors.ECODE_STATE)
12187 if self.op.runtime_mem:
12188 remote_info = self.rpc.call_instance_info(instance.primary_node,
12190 instance.hypervisor)
12191 remote_info.Raise("Error checking node %s" % instance.primary_node)
12192 if not remote_info.payload: # not running already
12193 raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12194 errors.ECODE_STATE)
12196 current_memory = remote_info.payload["memory"]
12197 if (not self.op.force and
12198 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12199 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12200 raise errors.OpPrereqError("Instance %s must have memory between %d"
12201 " and %d MB of memory unless --force is"
12202 " given" % (instance.name,
12203 self.be_proposed[constants.BE_MINMEM],
12204 self.be_proposed[constants.BE_MAXMEM]),
12205 errors.ECODE_INVAL)
12207 if self.op.runtime_mem > current_memory:
12208 _CheckNodeFreeMemory(self, instance.primary_node,
12209 "ballooning memory for instance %s" %
12211 self.op.memory - current_memory,
12212 instance.hypervisor)
12216 self.nic_pinst = {}
12217 for nic_op, nic_dict in self.op.nics:
12218 if nic_op == constants.DDM_REMOVE:
12219 if not instance.nics:
12220 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
12221 errors.ECODE_INVAL)
12223 if nic_op != constants.DDM_ADD:
12225 if not instance.nics:
12226 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
12227 " no NICs" % nic_op,
12228 errors.ECODE_INVAL)
12229 if nic_op < 0 or nic_op >= len(instance.nics):
12230 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
12232 (nic_op, len(instance.nics) - 1),
12233 errors.ECODE_INVAL)
12234 old_nic_params = instance.nics[nic_op].nicparams
12235 old_nic_ip = instance.nics[nic_op].ip
12237 old_nic_params = {}
12240 update_params_dict = dict([(key, nic_dict[key])
12241 for key in constants.NICS_PARAMETERS
12242 if key in nic_dict])
12244 if "bridge" in nic_dict:
12245 update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
12247 new_nic_params = _GetUpdatedParams(old_nic_params,
12248 update_params_dict)
12249 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
12250 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
12251 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
12252 self.nic_pinst[nic_op] = new_nic_params
12253 self.nic_pnew[nic_op] = new_filled_nic_params
12254 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
12256 if new_nic_mode == constants.NIC_MODE_BRIDGED:
12257 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
12258 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
12260 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
12262 self.warn.append(msg)
12264 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12265 if new_nic_mode == constants.NIC_MODE_ROUTED:
12266 if constants.INIC_IP in nic_dict:
12267 nic_ip = nic_dict[constants.INIC_IP]
12269 nic_ip = old_nic_ip
12271 raise errors.OpPrereqError("Cannot set the nic ip to None"
12272 " on a routed nic", errors.ECODE_INVAL)
12273 if constants.INIC_MAC in nic_dict:
12274 nic_mac = nic_dict[constants.INIC_MAC]
12275 if nic_mac is None:
12276 raise errors.OpPrereqError("Cannot set the nic mac to None",
12277 errors.ECODE_INVAL)
12278 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12279 # otherwise generate the mac
12280 nic_dict[constants.INIC_MAC] = \
12281 self.cfg.GenerateMAC(self.proc.GetECId())
12283 # or validate/reserve the current one
12285 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
12286 except errors.ReservationError:
12287 raise errors.OpPrereqError("MAC address %s already in use"
12288 " in cluster" % nic_mac,
12289 errors.ECODE_NOTUNIQUE)
12292 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12293 raise errors.OpPrereqError("Disk operations not supported for"
12294 " diskless instances",
12295 errors.ECODE_INVAL)
12296 for disk_op, _ in self.op.disks:
12297 if disk_op == constants.DDM_REMOVE:
12298 if len(instance.disks) == 1:
12299 raise errors.OpPrereqError("Cannot remove the last disk of"
12300 " an instance", errors.ECODE_INVAL)
12301 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12302 msg="cannot remove disks")
12304 if (disk_op == constants.DDM_ADD and
12305 len(instance.disks) >= constants.MAX_DISKS):
12306 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
12307 " add more" % constants.MAX_DISKS,
12308 errors.ECODE_STATE)
12309 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
12311 if disk_op < 0 or disk_op >= len(instance.disks):
12312 raise errors.OpPrereqError("Invalid disk index %s, valid values"
12314 (disk_op, len(instance.disks)),
12315 errors.ECODE_INVAL)
12317 # disabling the instance
12318 if self.op.offline_inst:
12319 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12320 msg="cannot change instance state to offline")
12322 # enabling the instance
12323 if self.op.online_inst:
12324 _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
12325 msg="cannot make instance go online")
12327 def _ConvertPlainToDrbd(self, feedback_fn):
12328 """Converts an instance from plain to drbd.
12331 feedback_fn("Converting template to drbd")
12332 instance = self.instance
12333 pnode = instance.primary_node
12334 snode = self.op.remote_node
12336 assert instance.disk_template == constants.DT_PLAIN
12338 # create a fake disk info for _GenerateDiskTemplate
12339 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12340 constants.IDISK_VG: d.logical_id[0]}
12341 for d in instance.disks]
12342 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12343 instance.name, pnode, [snode],
12344 disk_info, None, None, 0, feedback_fn,
12346 info = _GetInstanceInfoText(instance)
12347 feedback_fn("Creating aditional volumes...")
12348 # first, create the missing data and meta devices
12349 for disk in new_disks:
12350 # unfortunately this is... not too nice
12351 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12353 for child in disk.children:
12354 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12355 # at this stage, all new LVs have been created, we can rename the
12357 feedback_fn("Renaming original volumes...")
12358 rename_list = [(o, n.children[0].logical_id)
12359 for (o, n) in zip(instance.disks, new_disks)]
12360 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12361 result.Raise("Failed to rename original LVs")
12363 feedback_fn("Initializing DRBD devices...")
12364 # all child devices are in place, we can now create the DRBD devices
12365 for disk in new_disks:
12366 for node in [pnode, snode]:
12367 f_create = node == pnode
12368 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12370 # at this point, the instance has been modified
12371 instance.disk_template = constants.DT_DRBD8
12372 instance.disks = new_disks
12373 self.cfg.Update(instance, feedback_fn)
12375 # Release node locks while waiting for sync
12376 _ReleaseLocks(self, locking.LEVEL_NODE)
12378 # disks are created, waiting for sync
12379 disk_abort = not _WaitForSync(self, instance,
12380 oneshot=not self.op.wait_for_sync)
12382 raise errors.OpExecError("There are some degraded disks for"
12383 " this instance, please cleanup manually")
12385 # Node resource locks will be released by caller
12387 def _ConvertDrbdToPlain(self, feedback_fn):
12388 """Converts an instance from drbd to plain.
12391 instance = self.instance
12393 assert len(instance.secondary_nodes) == 1
12394 assert instance.disk_template == constants.DT_DRBD8
12396 pnode = instance.primary_node
12397 snode = instance.secondary_nodes[0]
12398 feedback_fn("Converting template to plain")
12400 old_disks = instance.disks
12401 new_disks = [d.children[0] for d in old_disks]
12403 # copy over size and mode
12404 for parent, child in zip(old_disks, new_disks):
12405 child.size = parent.size
12406 child.mode = parent.mode
12408 # update instance structure
12409 instance.disks = new_disks
12410 instance.disk_template = constants.DT_PLAIN
12411 self.cfg.Update(instance, feedback_fn)
12413 # Release locks in case removing disks takes a while
12414 _ReleaseLocks(self, locking.LEVEL_NODE)
12416 feedback_fn("Removing volumes on the secondary node...")
12417 for disk in old_disks:
12418 self.cfg.SetDiskID(disk, snode)
12419 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12421 self.LogWarning("Could not remove block device %s on node %s,"
12422 " continuing anyway: %s", disk.iv_name, snode, msg)
12424 feedback_fn("Removing unneeded volumes on the primary node...")
12425 for idx, disk in enumerate(old_disks):
12426 meta = disk.children[1]
12427 self.cfg.SetDiskID(meta, pnode)
12428 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12430 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12431 " continuing anyway: %s", idx, pnode, msg)
12433 # this is a DRBD disk, return its port to the pool
12434 for disk in old_disks:
12435 tcp_port = disk.logical_id[2]
12436 self.cfg.AddTcpUdpPort(tcp_port)
12438 # Node resource locks will be released by caller
12440 def Exec(self, feedback_fn):
12441 """Modifies an instance.
12443 All parameters take effect only at the next restart of the instance.
12446 # Process here the warnings from CheckPrereq, as we don't have a
12447 # feedback_fn there.
12448 for warn in self.warn:
12449 feedback_fn("WARNING: %s" % warn)
12451 assert ((self.op.disk_template is None) ^
12452 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12453 "Not owning any node resource locks"
12456 instance = self.instance
12459 if self.op.runtime_mem:
12460 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12462 self.op.runtime_mem)
12463 rpcres.Raise("Cannot modify instance runtime memory")
12464 result.append(("runtime_memory", self.op.runtime_mem))
12467 for disk_op, disk_dict in self.op.disks:
12468 if disk_op == constants.DDM_REMOVE:
12469 # remove the last disk
12470 device = instance.disks.pop()
12471 device_idx = len(instance.disks)
12472 for node, disk in device.ComputeNodeTree(instance.primary_node):
12473 self.cfg.SetDiskID(disk, node)
12474 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12476 self.LogWarning("Could not remove disk/%d on node %s: %s,"
12477 " continuing anyway", device_idx, node, msg)
12478 result.append(("disk/%d" % device_idx, "remove"))
12480 # if this is a DRBD disk, return its port to the pool
12481 if device.dev_type in constants.LDS_DRBD:
12482 tcp_port = device.logical_id[2]
12483 self.cfg.AddTcpUdpPort(tcp_port)
12484 elif disk_op == constants.DDM_ADD:
12486 if instance.disk_template in (constants.DT_FILE,
12487 constants.DT_SHARED_FILE):
12488 file_driver, file_path = instance.disks[0].logical_id
12489 file_path = os.path.dirname(file_path)
12491 file_driver = file_path = None
12492 disk_idx_base = len(instance.disks)
12493 new_disk = _GenerateDiskTemplate(self,
12494 instance.disk_template,
12495 instance.name, instance.primary_node,
12496 instance.secondary_nodes,
12502 self.diskparams)[0]
12503 instance.disks.append(new_disk)
12504 info = _GetInstanceInfoText(instance)
12506 logging.info("Creating volume %s for instance %s",
12507 new_disk.iv_name, instance.name)
12508 # Note: this needs to be kept in sync with _CreateDisks
12510 for node in instance.all_nodes:
12511 f_create = node == instance.primary_node
12513 _CreateBlockDev(self, node, instance, new_disk,
12514 f_create, info, f_create)
12515 except errors.OpExecError, err:
12516 self.LogWarning("Failed to create volume %s (%s) on"
12518 new_disk.iv_name, new_disk, node, err)
12519 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
12520 (new_disk.size, new_disk.mode)))
12522 # change a given disk
12523 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
12524 result.append(("disk.mode/%d" % disk_op,
12525 disk_dict[constants.IDISK_MODE]))
12527 if self.op.disk_template:
12529 check_nodes = set(instance.all_nodes)
12530 if self.op.remote_node:
12531 check_nodes.add(self.op.remote_node)
12532 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12533 owned = self.owned_locks(level)
12534 assert not (check_nodes - owned), \
12535 ("Not owning the correct locks, owning %r, expected at least %r" %
12536 (owned, check_nodes))
12538 r_shut = _ShutdownInstanceDisks(self, instance)
12540 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12541 " proceed with disk template conversion")
12542 mode = (instance.disk_template, self.op.disk_template)
12544 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12546 self.cfg.ReleaseDRBDMinors(instance.name)
12548 result.append(("disk_template", self.op.disk_template))
12550 assert instance.disk_template == self.op.disk_template, \
12551 ("Expected disk template '%s', found '%s'" %
12552 (self.op.disk_template, instance.disk_template))
12554 # Release node and resource locks if there are any (they might already have
12555 # been released during disk conversion)
12556 _ReleaseLocks(self, locking.LEVEL_NODE)
12557 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12560 for nic_op, nic_dict in self.op.nics:
12561 if nic_op == constants.DDM_REMOVE:
12562 # remove the last nic
12563 del instance.nics[-1]
12564 result.append(("nic.%d" % len(instance.nics), "remove"))
12565 elif nic_op == constants.DDM_ADD:
12566 # mac and bridge should be set, by now
12567 mac = nic_dict[constants.INIC_MAC]
12568 ip = nic_dict.get(constants.INIC_IP, None)
12569 nicparams = self.nic_pinst[constants.DDM_ADD]
12570 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
12571 instance.nics.append(new_nic)
12572 result.append(("nic.%d" % (len(instance.nics) - 1),
12573 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12574 (new_nic.mac, new_nic.ip,
12575 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
12576 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
12579 for key in (constants.INIC_MAC, constants.INIC_IP):
12580 if key in nic_dict:
12581 setattr(instance.nics[nic_op], key, nic_dict[key])
12582 if nic_op in self.nic_pinst:
12583 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
12584 for key, val in nic_dict.iteritems():
12585 result.append(("nic.%s/%d" % (key, nic_op), val))
12588 if self.op.hvparams:
12589 instance.hvparams = self.hv_inst
12590 for key, val in self.op.hvparams.iteritems():
12591 result.append(("hv/%s" % key, val))
12594 if self.op.beparams:
12595 instance.beparams = self.be_inst
12596 for key, val in self.op.beparams.iteritems():
12597 result.append(("be/%s" % key, val))
12600 if self.op.os_name:
12601 instance.os = self.op.os_name
12604 if self.op.osparams:
12605 instance.osparams = self.os_inst
12606 for key, val in self.op.osparams.iteritems():
12607 result.append(("os/%s" % key, val))
12609 # online/offline instance
12610 if self.op.online_inst:
12611 self.cfg.MarkInstanceDown(instance.name)
12612 result.append(("admin_state", constants.ADMINST_DOWN))
12613 if self.op.offline_inst:
12614 self.cfg.MarkInstanceOffline(instance.name)
12615 result.append(("admin_state", constants.ADMINST_OFFLINE))
12617 self.cfg.Update(instance, feedback_fn)
12619 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12620 self.owned_locks(locking.LEVEL_NODE)), \
12621 "All node locks should have been released by now"
12625 _DISK_CONVERSIONS = {
12626 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12627 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12631 class LUInstanceChangeGroup(LogicalUnit):
12632 HPATH = "instance-change-group"
12633 HTYPE = constants.HTYPE_INSTANCE
12636 def ExpandNames(self):
12637 self.share_locks = _ShareAll()
12638 self.needed_locks = {
12639 locking.LEVEL_NODEGROUP: [],
12640 locking.LEVEL_NODE: [],
12643 self._ExpandAndLockInstance()
12645 if self.op.target_groups:
12646 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12647 self.op.target_groups)
12649 self.req_target_uuids = None
12651 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12653 def DeclareLocks(self, level):
12654 if level == locking.LEVEL_NODEGROUP:
12655 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12657 if self.req_target_uuids:
12658 lock_groups = set(self.req_target_uuids)
12660 # Lock all groups used by instance optimistically; this requires going
12661 # via the node before it's locked, requiring verification later on
12662 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12663 lock_groups.update(instance_groups)
12665 # No target groups, need to lock all of them
12666 lock_groups = locking.ALL_SET
12668 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12670 elif level == locking.LEVEL_NODE:
12671 if self.req_target_uuids:
12672 # Lock all nodes used by instances
12673 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12674 self._LockInstancesNodes()
12676 # Lock all nodes in all potential target groups
12677 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12678 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12679 member_nodes = [node_name
12680 for group in lock_groups
12681 for node_name in self.cfg.GetNodeGroup(group).members]
12682 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12684 # Lock all nodes as all groups are potential targets
12685 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12687 def CheckPrereq(self):
12688 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12689 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12690 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12692 assert (self.req_target_uuids is None or
12693 owned_groups.issuperset(self.req_target_uuids))
12694 assert owned_instances == set([self.op.instance_name])
12696 # Get instance information
12697 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12699 # Check if node groups for locked instance are still correct
12700 assert owned_nodes.issuperset(self.instance.all_nodes), \
12701 ("Instance %s's nodes changed while we kept the lock" %
12702 self.op.instance_name)
12704 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12707 if self.req_target_uuids:
12708 # User requested specific target groups
12709 self.target_uuids = self.req_target_uuids
12711 # All groups except those used by the instance are potential targets
12712 self.target_uuids = owned_groups - inst_groups
12714 conflicting_groups = self.target_uuids & inst_groups
12715 if conflicting_groups:
12716 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12717 " used by the instance '%s'" %
12718 (utils.CommaJoin(conflicting_groups),
12719 self.op.instance_name),
12720 errors.ECODE_INVAL)
12722 if not self.target_uuids:
12723 raise errors.OpPrereqError("There are no possible target groups",
12724 errors.ECODE_INVAL)
12726 def BuildHooksEnv(self):
12727 """Build hooks env.
12730 assert self.target_uuids
12733 "TARGET_GROUPS": " ".join(self.target_uuids),
12736 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12740 def BuildHooksNodes(self):
12741 """Build hooks nodes.
12744 mn = self.cfg.GetMasterNode()
12745 return ([mn], [mn])
12747 def Exec(self, feedback_fn):
12748 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12750 assert instances == [self.op.instance_name], "Instance not locked"
12752 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12753 instances=instances, target_groups=list(self.target_uuids))
12755 ial.Run(self.op.iallocator)
12757 if not ial.success:
12758 raise errors.OpPrereqError("Can't compute solution for changing group of"
12759 " instance '%s' using iallocator '%s': %s" %
12760 (self.op.instance_name, self.op.iallocator,
12762 errors.ECODE_NORES)
12764 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12766 self.LogInfo("Iallocator returned %s job(s) for changing group of"
12767 " instance '%s'", len(jobs), self.op.instance_name)
12769 return ResultWithJobs(jobs)
12772 class LUBackupQuery(NoHooksLU):
12773 """Query the exports list
12778 def ExpandNames(self):
12779 self.needed_locks = {}
12780 self.share_locks[locking.LEVEL_NODE] = 1
12781 if not self.op.nodes:
12782 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12784 self.needed_locks[locking.LEVEL_NODE] = \
12785 _GetWantedNodes(self, self.op.nodes)
12787 def Exec(self, feedback_fn):
12788 """Compute the list of all the exported system images.
12791 @return: a dictionary with the structure node->(export-list)
12792 where export-list is a list of the instances exported on
12796 self.nodes = self.owned_locks(locking.LEVEL_NODE)
12797 rpcresult = self.rpc.call_export_list(self.nodes)
12799 for node in rpcresult:
12800 if rpcresult[node].fail_msg:
12801 result[node] = False
12803 result[node] = rpcresult[node].payload
12808 class LUBackupPrepare(NoHooksLU):
12809 """Prepares an instance for an export and returns useful information.
12814 def ExpandNames(self):
12815 self._ExpandAndLockInstance()
12817 def CheckPrereq(self):
12818 """Check prerequisites.
12821 instance_name = self.op.instance_name
12823 self.instance = self.cfg.GetInstanceInfo(instance_name)
12824 assert self.instance is not None, \
12825 "Cannot retrieve locked instance %s" % self.op.instance_name
12826 _CheckNodeOnline(self, self.instance.primary_node)
12828 self._cds = _GetClusterDomainSecret()
12830 def Exec(self, feedback_fn):
12831 """Prepares an instance for an export.
12834 instance = self.instance
12836 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12837 salt = utils.GenerateSecret(8)
12839 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
12840 result = self.rpc.call_x509_cert_create(instance.primary_node,
12841 constants.RIE_CERT_VALIDITY)
12842 result.Raise("Can't create X509 key and certificate on %s" % result.node)
12844 (name, cert_pem) = result.payload
12846 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
12850 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
12851 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
12853 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
12859 class LUBackupExport(LogicalUnit):
12860 """Export an instance to an image in the cluster.
12863 HPATH = "instance-export"
12864 HTYPE = constants.HTYPE_INSTANCE
12867 def CheckArguments(self):
12868 """Check the arguments.
12871 self.x509_key_name = self.op.x509_key_name
12872 self.dest_x509_ca_pem = self.op.destination_x509_ca
12874 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12875 if not self.x509_key_name:
12876 raise errors.OpPrereqError("Missing X509 key name for encryption",
12877 errors.ECODE_INVAL)
12879 if not self.dest_x509_ca_pem:
12880 raise errors.OpPrereqError("Missing destination X509 CA",
12881 errors.ECODE_INVAL)
12883 def ExpandNames(self):
12884 self._ExpandAndLockInstance()
12886 # Lock all nodes for local exports
12887 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12888 # FIXME: lock only instance primary and destination node
12890 # Sad but true, for now we have do lock all nodes, as we don't know where
12891 # the previous export might be, and in this LU we search for it and
12892 # remove it from its current node. In the future we could fix this by:
12893 # - making a tasklet to search (share-lock all), then create the
12894 # new one, then one to remove, after
12895 # - removing the removal operation altogether
12896 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12898 def DeclareLocks(self, level):
12899 """Last minute lock declaration."""
12900 # All nodes are locked anyway, so nothing to do here.
12902 def BuildHooksEnv(self):
12903 """Build hooks env.
12905 This will run on the master, primary node and target node.
12909 "EXPORT_MODE": self.op.mode,
12910 "EXPORT_NODE": self.op.target_node,
12911 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12912 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12913 # TODO: Generic function for boolean env variables
12914 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12917 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12921 def BuildHooksNodes(self):
12922 """Build hooks nodes.
12925 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12927 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12928 nl.append(self.op.target_node)
12932 def CheckPrereq(self):
12933 """Check prerequisites.
12935 This checks that the instance and node names are valid.
12938 instance_name = self.op.instance_name
12940 self.instance = self.cfg.GetInstanceInfo(instance_name)
12941 assert self.instance is not None, \
12942 "Cannot retrieve locked instance %s" % self.op.instance_name
12943 _CheckNodeOnline(self, self.instance.primary_node)
12945 if (self.op.remove_instance and
12946 self.instance.admin_state == constants.ADMINST_UP and
12947 not self.op.shutdown):
12948 raise errors.OpPrereqError("Can not remove instance without shutting it"
12951 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12952 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12953 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12954 assert self.dst_node is not None
12956 _CheckNodeOnline(self, self.dst_node.name)
12957 _CheckNodeNotDrained(self, self.dst_node.name)
12960 self.dest_disk_info = None
12961 self.dest_x509_ca = None
12963 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12964 self.dst_node = None
12966 if len(self.op.target_node) != len(self.instance.disks):
12967 raise errors.OpPrereqError(("Received destination information for %s"
12968 " disks, but instance %s has %s disks") %
12969 (len(self.op.target_node), instance_name,
12970 len(self.instance.disks)),
12971 errors.ECODE_INVAL)
12973 cds = _GetClusterDomainSecret()
12975 # Check X509 key name
12977 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12978 except (TypeError, ValueError), err:
12979 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12981 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12982 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12983 errors.ECODE_INVAL)
12985 # Load and verify CA
12987 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
12988 except OpenSSL.crypto.Error, err:
12989 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
12990 (err, ), errors.ECODE_INVAL)
12992 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
12993 if errcode is not None:
12994 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
12995 (msg, ), errors.ECODE_INVAL)
12997 self.dest_x509_ca = cert
12999 # Verify target information
13001 for idx, disk_data in enumerate(self.op.target_node):
13003 (host, port, magic) = \
13004 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13005 except errors.GenericError, err:
13006 raise errors.OpPrereqError("Target info for disk %s: %s" %
13007 (idx, err), errors.ECODE_INVAL)
13009 disk_info.append((host, port, magic))
13011 assert len(disk_info) == len(self.op.target_node)
13012 self.dest_disk_info = disk_info
13015 raise errors.ProgrammerError("Unhandled export mode %r" %
13018 # instance disk type verification
13019 # TODO: Implement export support for file-based disks
13020 for disk in self.instance.disks:
13021 if disk.dev_type == constants.LD_FILE:
13022 raise errors.OpPrereqError("Export not supported for instances with"
13023 " file-based disks", errors.ECODE_INVAL)
13025 def _CleanupExports(self, feedback_fn):
13026 """Removes exports of current instance from all other nodes.
13028 If an instance in a cluster with nodes A..D was exported to node C, its
13029 exports will be removed from the nodes A, B and D.
13032 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13034 nodelist = self.cfg.GetNodeList()
13035 nodelist.remove(self.dst_node.name)
13037 # on one-node clusters nodelist will be empty after the removal
13038 # if we proceed the backup would be removed because OpBackupQuery
13039 # substitutes an empty list with the full cluster node list.
13040 iname = self.instance.name
13042 feedback_fn("Removing old exports for instance %s" % iname)
13043 exportlist = self.rpc.call_export_list(nodelist)
13044 for node in exportlist:
13045 if exportlist[node].fail_msg:
13047 if iname in exportlist[node].payload:
13048 msg = self.rpc.call_export_remove(node, iname).fail_msg
13050 self.LogWarning("Could not remove older export for instance %s"
13051 " on node %s: %s", iname, node, msg)
13053 def Exec(self, feedback_fn):
13054 """Export an instance to an image in the cluster.
13057 assert self.op.mode in constants.EXPORT_MODES
13059 instance = self.instance
13060 src_node = instance.primary_node
13062 if self.op.shutdown:
13063 # shutdown the instance, but not the disks
13064 feedback_fn("Shutting down instance %s" % instance.name)
13065 result = self.rpc.call_instance_shutdown(src_node, instance,
13066 self.op.shutdown_timeout)
13067 # TODO: Maybe ignore failures if ignore_remove_failures is set
13068 result.Raise("Could not shutdown instance %s on"
13069 " node %s" % (instance.name, src_node))
13071 # set the disks ID correctly since call_instance_start needs the
13072 # correct drbd minor to create the symlinks
13073 for disk in instance.disks:
13074 self.cfg.SetDiskID(disk, src_node)
13076 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13079 # Activate the instance disks if we'exporting a stopped instance
13080 feedback_fn("Activating disks for %s" % instance.name)
13081 _StartInstanceDisks(self, instance, None)
13084 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13087 helper.CreateSnapshots()
13089 if (self.op.shutdown and
13090 instance.admin_state == constants.ADMINST_UP and
13091 not self.op.remove_instance):
13092 assert not activate_disks
13093 feedback_fn("Starting instance %s" % instance.name)
13094 result = self.rpc.call_instance_start(src_node,
13095 (instance, None, None), False)
13096 msg = result.fail_msg
13098 feedback_fn("Failed to start instance: %s" % msg)
13099 _ShutdownInstanceDisks(self, instance)
13100 raise errors.OpExecError("Could not start instance: %s" % msg)
13102 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13103 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13104 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13105 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13106 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13108 (key_name, _, _) = self.x509_key_name
13111 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13114 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13115 key_name, dest_ca_pem,
13120 # Check for backwards compatibility
13121 assert len(dresults) == len(instance.disks)
13122 assert compat.all(isinstance(i, bool) for i in dresults), \
13123 "Not all results are boolean: %r" % dresults
13127 feedback_fn("Deactivating disks for %s" % instance.name)
13128 _ShutdownInstanceDisks(self, instance)
13130 if not (compat.all(dresults) and fin_resu):
13133 failures.append("export finalization")
13134 if not compat.all(dresults):
13135 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13137 failures.append("disk export: disk(s) %s" % fdsk)
13139 raise errors.OpExecError("Export failed, errors in %s" %
13140 utils.CommaJoin(failures))
13142 # At this point, the export was successful, we can cleanup/finish
13144 # Remove instance if requested
13145 if self.op.remove_instance:
13146 feedback_fn("Removing instance %s" % instance.name)
13147 _RemoveInstance(self, feedback_fn, instance,
13148 self.op.ignore_remove_failures)
13150 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13151 self._CleanupExports(feedback_fn)
13153 return fin_resu, dresults
13156 class LUBackupRemove(NoHooksLU):
13157 """Remove exports related to the named instance.
13162 def ExpandNames(self):
13163 self.needed_locks = {}
13164 # We need all nodes to be locked in order for RemoveExport to work, but we
13165 # don't need to lock the instance itself, as nothing will happen to it (and
13166 # we can remove exports also for a removed instance)
13167 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13169 def Exec(self, feedback_fn):
13170 """Remove any export.
13173 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13174 # If the instance was not found we'll try with the name that was passed in.
13175 # This will only work if it was an FQDN, though.
13177 if not instance_name:
13179 instance_name = self.op.instance_name
13181 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13182 exportlist = self.rpc.call_export_list(locked_nodes)
13184 for node in exportlist:
13185 msg = exportlist[node].fail_msg
13187 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13189 if instance_name in exportlist[node].payload:
13191 result = self.rpc.call_export_remove(node, instance_name)
13192 msg = result.fail_msg
13194 logging.error("Could not remove export for instance %s"
13195 " on node %s: %s", instance_name, node, msg)
13197 if fqdn_warn and not found:
13198 feedback_fn("Export not found. If trying to remove an export belonging"
13199 " to a deleted instance please use its Fully Qualified"
13203 class LUGroupAdd(LogicalUnit):
13204 """Logical unit for creating node groups.
13207 HPATH = "group-add"
13208 HTYPE = constants.HTYPE_GROUP
13211 def ExpandNames(self):
13212 # We need the new group's UUID here so that we can create and acquire the
13213 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13214 # that it should not check whether the UUID exists in the configuration.
13215 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13216 self.needed_locks = {}
13217 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13219 def CheckPrereq(self):
13220 """Check prerequisites.
13222 This checks that the given group name is not an existing node group
13227 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13228 except errors.OpPrereqError:
13231 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13232 " node group (UUID: %s)" %
13233 (self.op.group_name, existing_uuid),
13234 errors.ECODE_EXISTS)
13236 if self.op.ndparams:
13237 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13239 if self.op.hv_state:
13240 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13242 self.new_hv_state = None
13244 if self.op.disk_state:
13245 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13247 self.new_disk_state = None
13249 if self.op.diskparams:
13250 for templ in constants.DISK_TEMPLATES:
13251 if templ not in self.op.diskparams:
13252 self.op.diskparams[templ] = {}
13253 utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
13255 self.op.diskparams = self.cfg.GetClusterInfo().diskparams
13257 if self.op.ipolicy:
13258 cluster = self.cfg.GetClusterInfo()
13259 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13261 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
13262 except errors.ConfigurationError, err:
13263 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13264 errors.ECODE_INVAL)
13266 def BuildHooksEnv(self):
13267 """Build hooks env.
13271 "GROUP_NAME": self.op.group_name,
13274 def BuildHooksNodes(self):
13275 """Build hooks nodes.
13278 mn = self.cfg.GetMasterNode()
13279 return ([mn], [mn])
13281 def Exec(self, feedback_fn):
13282 """Add the node group to the cluster.
13285 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13286 uuid=self.group_uuid,
13287 alloc_policy=self.op.alloc_policy,
13288 ndparams=self.op.ndparams,
13289 diskparams=self.op.diskparams,
13290 ipolicy=self.op.ipolicy,
13291 hv_state_static=self.new_hv_state,
13292 disk_state_static=self.new_disk_state)
13294 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13295 del self.remove_locks[locking.LEVEL_NODEGROUP]
13298 class LUGroupAssignNodes(NoHooksLU):
13299 """Logical unit for assigning nodes to groups.
13304 def ExpandNames(self):
13305 # These raise errors.OpPrereqError on their own:
13306 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13307 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13309 # We want to lock all the affected nodes and groups. We have readily
13310 # available the list of nodes, and the *destination* group. To gather the
13311 # list of "source" groups, we need to fetch node information later on.
13312 self.needed_locks = {
13313 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13314 locking.LEVEL_NODE: self.op.nodes,
13317 def DeclareLocks(self, level):
13318 if level == locking.LEVEL_NODEGROUP:
13319 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13321 # Try to get all affected nodes' groups without having the group or node
13322 # lock yet. Needs verification later in the code flow.
13323 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13325 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13327 def CheckPrereq(self):
13328 """Check prerequisites.
13331 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13332 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13333 frozenset(self.op.nodes))
13335 expected_locks = (set([self.group_uuid]) |
13336 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13337 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13338 if actual_locks != expected_locks:
13339 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13340 " current groups are '%s', used to be '%s'" %
13341 (utils.CommaJoin(expected_locks),
13342 utils.CommaJoin(actual_locks)))
13344 self.node_data = self.cfg.GetAllNodesInfo()
13345 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13346 instance_data = self.cfg.GetAllInstancesInfo()
13348 if self.group is None:
13349 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13350 (self.op.group_name, self.group_uuid))
13352 (new_splits, previous_splits) = \
13353 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13354 for node in self.op.nodes],
13355 self.node_data, instance_data)
13358 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13360 if not self.op.force:
13361 raise errors.OpExecError("The following instances get split by this"
13362 " change and --force was not given: %s" %
13365 self.LogWarning("This operation will split the following instances: %s",
13368 if previous_splits:
13369 self.LogWarning("In addition, these already-split instances continue"
13370 " to be split across groups: %s",
13371 utils.CommaJoin(utils.NiceSort(previous_splits)))
13373 def Exec(self, feedback_fn):
13374 """Assign nodes to a new group.
13377 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13379 self.cfg.AssignGroupNodes(mods)
13382 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13383 """Check for split instances after a node assignment.
13385 This method considers a series of node assignments as an atomic operation,
13386 and returns information about split instances after applying the set of
13389 In particular, it returns information about newly split instances, and
13390 instances that were already split, and remain so after the change.
13392 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13395 @type changes: list of (node_name, new_group_uuid) pairs.
13396 @param changes: list of node assignments to consider.
13397 @param node_data: a dict with data for all nodes
13398 @param instance_data: a dict with all instances to consider
13399 @rtype: a two-tuple
13400 @return: a list of instances that were previously okay and result split as a
13401 consequence of this change, and a list of instances that were previously
13402 split and this change does not fix.
13405 changed_nodes = dict((node, group) for node, group in changes
13406 if node_data[node].group != group)
13408 all_split_instances = set()
13409 previously_split_instances = set()
13411 def InstanceNodes(instance):
13412 return [instance.primary_node] + list(instance.secondary_nodes)
13414 for inst in instance_data.values():
13415 if inst.disk_template not in constants.DTS_INT_MIRROR:
13418 instance_nodes = InstanceNodes(inst)
13420 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13421 previously_split_instances.add(inst.name)
13423 if len(set(changed_nodes.get(node, node_data[node].group)
13424 for node in instance_nodes)) > 1:
13425 all_split_instances.add(inst.name)
13427 return (list(all_split_instances - previously_split_instances),
13428 list(previously_split_instances & all_split_instances))
13431 class _GroupQuery(_QueryBase):
13432 FIELDS = query.GROUP_FIELDS
13434 def ExpandNames(self, lu):
13435 lu.needed_locks = {}
13437 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13438 self._cluster = lu.cfg.GetClusterInfo()
13439 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13442 self.wanted = [name_to_uuid[name]
13443 for name in utils.NiceSort(name_to_uuid.keys())]
13445 # Accept names to be either names or UUIDs.
13448 all_uuid = frozenset(self._all_groups.keys())
13450 for name in self.names:
13451 if name in all_uuid:
13452 self.wanted.append(name)
13453 elif name in name_to_uuid:
13454 self.wanted.append(name_to_uuid[name])
13456 missing.append(name)
13459 raise errors.OpPrereqError("Some groups do not exist: %s" %
13460 utils.CommaJoin(missing),
13461 errors.ECODE_NOENT)
13463 def DeclareLocks(self, lu, level):
13466 def _GetQueryData(self, lu):
13467 """Computes the list of node groups and their attributes.
13470 do_nodes = query.GQ_NODE in self.requested_data
13471 do_instances = query.GQ_INST in self.requested_data
13473 group_to_nodes = None
13474 group_to_instances = None
13476 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13477 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13478 # latter GetAllInstancesInfo() is not enough, for we have to go through
13479 # instance->node. Hence, we will need to process nodes even if we only need
13480 # instance information.
13481 if do_nodes or do_instances:
13482 all_nodes = lu.cfg.GetAllNodesInfo()
13483 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13486 for node in all_nodes.values():
13487 if node.group in group_to_nodes:
13488 group_to_nodes[node.group].append(node.name)
13489 node_to_group[node.name] = node.group
13492 all_instances = lu.cfg.GetAllInstancesInfo()
13493 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13495 for instance in all_instances.values():
13496 node = instance.primary_node
13497 if node in node_to_group:
13498 group_to_instances[node_to_group[node]].append(instance.name)
13501 # Do not pass on node information if it was not requested.
13502 group_to_nodes = None
13504 return query.GroupQueryData(self._cluster,
13505 [self._all_groups[uuid]
13506 for uuid in self.wanted],
13507 group_to_nodes, group_to_instances)
13510 class LUGroupQuery(NoHooksLU):
13511 """Logical unit for querying node groups.
13516 def CheckArguments(self):
13517 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13518 self.op.output_fields, False)
13520 def ExpandNames(self):
13521 self.gq.ExpandNames(self)
13523 def DeclareLocks(self, level):
13524 self.gq.DeclareLocks(self, level)
13526 def Exec(self, feedback_fn):
13527 return self.gq.OldStyleQuery(self)
13530 class LUGroupSetParams(LogicalUnit):
13531 """Modifies the parameters of a node group.
13534 HPATH = "group-modify"
13535 HTYPE = constants.HTYPE_GROUP
13538 def CheckArguments(self):
13541 self.op.diskparams,
13542 self.op.alloc_policy,
13544 self.op.disk_state,
13548 if all_changes.count(None) == len(all_changes):
13549 raise errors.OpPrereqError("Please pass at least one modification",
13550 errors.ECODE_INVAL)
13552 def ExpandNames(self):
13553 # This raises errors.OpPrereqError on its own:
13554 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13556 self.needed_locks = {
13557 locking.LEVEL_INSTANCE: [],
13558 locking.LEVEL_NODEGROUP: [self.group_uuid],
13561 self.share_locks[locking.LEVEL_INSTANCE] = 1
13563 def DeclareLocks(self, level):
13564 if level == locking.LEVEL_INSTANCE:
13565 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13567 # Lock instances optimistically, needs verification once group lock has
13569 self.needed_locks[locking.LEVEL_INSTANCE] = \
13570 self.cfg.GetNodeGroupInstances(self.group_uuid)
13572 def CheckPrereq(self):
13573 """Check prerequisites.
13576 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13578 # Check if locked instances are still correct
13579 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13581 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13582 cluster = self.cfg.GetClusterInfo()
13584 if self.group is None:
13585 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13586 (self.op.group_name, self.group_uuid))
13588 if self.op.ndparams:
13589 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13590 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13591 self.new_ndparams = new_ndparams
13593 if self.op.diskparams:
13594 self.new_diskparams = dict()
13595 for templ in constants.DISK_TEMPLATES:
13596 if templ not in self.op.diskparams:
13597 self.op.diskparams[templ] = {}
13598 new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13599 self.op.diskparams[templ])
13600 utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13601 self.new_diskparams[templ] = new_templ_params
13603 if self.op.hv_state:
13604 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13605 self.group.hv_state_static)
13607 if self.op.disk_state:
13608 self.new_disk_state = \
13609 _MergeAndVerifyDiskState(self.op.disk_state,
13610 self.group.disk_state_static)
13612 if self.op.ipolicy:
13613 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
13617 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
13618 inst_filter = lambda inst: inst.name in owned_instances
13619 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
13621 _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
13623 new_ipolicy, instances)
13626 self.LogWarning("After the ipolicy change the following instances"
13627 " violate them: %s",
13628 utils.CommaJoin(violations))
13630 def BuildHooksEnv(self):
13631 """Build hooks env.
13635 "GROUP_NAME": self.op.group_name,
13636 "NEW_ALLOC_POLICY": self.op.alloc_policy,
13639 def BuildHooksNodes(self):
13640 """Build hooks nodes.
13643 mn = self.cfg.GetMasterNode()
13644 return ([mn], [mn])
13646 def Exec(self, feedback_fn):
13647 """Modifies the node group.
13652 if self.op.ndparams:
13653 self.group.ndparams = self.new_ndparams
13654 result.append(("ndparams", str(self.group.ndparams)))
13656 if self.op.diskparams:
13657 self.group.diskparams = self.new_diskparams
13658 result.append(("diskparams", str(self.group.diskparams)))
13660 if self.op.alloc_policy:
13661 self.group.alloc_policy = self.op.alloc_policy
13663 if self.op.hv_state:
13664 self.group.hv_state_static = self.new_hv_state
13666 if self.op.disk_state:
13667 self.group.disk_state_static = self.new_disk_state
13669 if self.op.ipolicy:
13670 self.group.ipolicy = self.new_ipolicy
13672 self.cfg.Update(self.group, feedback_fn)
13676 class LUGroupRemove(LogicalUnit):
13677 HPATH = "group-remove"
13678 HTYPE = constants.HTYPE_GROUP
13681 def ExpandNames(self):
13682 # This will raises errors.OpPrereqError on its own:
13683 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13684 self.needed_locks = {
13685 locking.LEVEL_NODEGROUP: [self.group_uuid],
13688 def CheckPrereq(self):
13689 """Check prerequisites.
13691 This checks that the given group name exists as a node group, that is
13692 empty (i.e., contains no nodes), and that is not the last group of the
13696 # Verify that the group is empty.
13697 group_nodes = [node.name
13698 for node in self.cfg.GetAllNodesInfo().values()
13699 if node.group == self.group_uuid]
13702 raise errors.OpPrereqError("Group '%s' not empty, has the following"
13704 (self.op.group_name,
13705 utils.CommaJoin(utils.NiceSort(group_nodes))),
13706 errors.ECODE_STATE)
13708 # Verify the cluster would not be left group-less.
13709 if len(self.cfg.GetNodeGroupList()) == 1:
13710 raise errors.OpPrereqError("Group '%s' is the only group,"
13711 " cannot be removed" %
13712 self.op.group_name,
13713 errors.ECODE_STATE)
13715 def BuildHooksEnv(self):
13716 """Build hooks env.
13720 "GROUP_NAME": self.op.group_name,
13723 def BuildHooksNodes(self):
13724 """Build hooks nodes.
13727 mn = self.cfg.GetMasterNode()
13728 return ([mn], [mn])
13730 def Exec(self, feedback_fn):
13731 """Remove the node group.
13735 self.cfg.RemoveNodeGroup(self.group_uuid)
13736 except errors.ConfigurationError:
13737 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13738 (self.op.group_name, self.group_uuid))
13740 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13743 class LUGroupRename(LogicalUnit):
13744 HPATH = "group-rename"
13745 HTYPE = constants.HTYPE_GROUP
13748 def ExpandNames(self):
13749 # This raises errors.OpPrereqError on its own:
13750 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13752 self.needed_locks = {
13753 locking.LEVEL_NODEGROUP: [self.group_uuid],
13756 def CheckPrereq(self):
13757 """Check prerequisites.
13759 Ensures requested new name is not yet used.
13763 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13764 except errors.OpPrereqError:
13767 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13768 " node group (UUID: %s)" %
13769 (self.op.new_name, new_name_uuid),
13770 errors.ECODE_EXISTS)
13772 def BuildHooksEnv(self):
13773 """Build hooks env.
13777 "OLD_NAME": self.op.group_name,
13778 "NEW_NAME": self.op.new_name,
13781 def BuildHooksNodes(self):
13782 """Build hooks nodes.
13785 mn = self.cfg.GetMasterNode()
13787 all_nodes = self.cfg.GetAllNodesInfo()
13788 all_nodes.pop(mn, None)
13791 run_nodes.extend(node.name for node in all_nodes.values()
13792 if node.group == self.group_uuid)
13794 return (run_nodes, run_nodes)
13796 def Exec(self, feedback_fn):
13797 """Rename the node group.
13800 group = self.cfg.GetNodeGroup(self.group_uuid)
13803 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13804 (self.op.group_name, self.group_uuid))
13806 group.name = self.op.new_name
13807 self.cfg.Update(group, feedback_fn)
13809 return self.op.new_name
13812 class LUGroupEvacuate(LogicalUnit):
13813 HPATH = "group-evacuate"
13814 HTYPE = constants.HTYPE_GROUP
13817 def ExpandNames(self):
13818 # This raises errors.OpPrereqError on its own:
13819 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13821 if self.op.target_groups:
13822 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13823 self.op.target_groups)
13825 self.req_target_uuids = []
13827 if self.group_uuid in self.req_target_uuids:
13828 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
13829 " as a target group (targets are %s)" %
13831 utils.CommaJoin(self.req_target_uuids)),
13832 errors.ECODE_INVAL)
13834 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13836 self.share_locks = _ShareAll()
13837 self.needed_locks = {
13838 locking.LEVEL_INSTANCE: [],
13839 locking.LEVEL_NODEGROUP: [],
13840 locking.LEVEL_NODE: [],
13843 def DeclareLocks(self, level):
13844 if level == locking.LEVEL_INSTANCE:
13845 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13847 # Lock instances optimistically, needs verification once node and group
13848 # locks have been acquired
13849 self.needed_locks[locking.LEVEL_INSTANCE] = \
13850 self.cfg.GetNodeGroupInstances(self.group_uuid)
13852 elif level == locking.LEVEL_NODEGROUP:
13853 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13855 if self.req_target_uuids:
13856 lock_groups = set([self.group_uuid] + self.req_target_uuids)
13858 # Lock all groups used by instances optimistically; this requires going
13859 # via the node before it's locked, requiring verification later on
13860 lock_groups.update(group_uuid
13861 for instance_name in
13862 self.owned_locks(locking.LEVEL_INSTANCE)
13864 self.cfg.GetInstanceNodeGroups(instance_name))
13866 # No target groups, need to lock all of them
13867 lock_groups = locking.ALL_SET
13869 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13871 elif level == locking.LEVEL_NODE:
13872 # This will only lock the nodes in the group to be evacuated which
13873 # contain actual instances
13874 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13875 self._LockInstancesNodes()
13877 # Lock all nodes in group to be evacuated and target groups
13878 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13879 assert self.group_uuid in owned_groups
13880 member_nodes = [node_name
13881 for group in owned_groups
13882 for node_name in self.cfg.GetNodeGroup(group).members]
13883 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13885 def CheckPrereq(self):
13886 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13887 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13888 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13890 assert owned_groups.issuperset(self.req_target_uuids)
13891 assert self.group_uuid in owned_groups
13893 # Check if locked instances are still correct
13894 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13896 # Get instance information
13897 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
13899 # Check if node groups for locked instances are still correct
13900 for instance_name in owned_instances:
13901 inst = self.instances[instance_name]
13902 assert owned_nodes.issuperset(inst.all_nodes), \
13903 "Instance %s's nodes changed while we kept the lock" % instance_name
13905 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
13908 assert self.group_uuid in inst_groups, \
13909 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
13911 if self.req_target_uuids:
13912 # User requested specific target groups
13913 self.target_uuids = self.req_target_uuids
13915 # All groups except the one to be evacuated are potential targets
13916 self.target_uuids = [group_uuid for group_uuid in owned_groups
13917 if group_uuid != self.group_uuid]
13919 if not self.target_uuids:
13920 raise errors.OpPrereqError("There are no possible target groups",
13921 errors.ECODE_INVAL)
13923 def BuildHooksEnv(self):
13924 """Build hooks env.
13928 "GROUP_NAME": self.op.group_name,
13929 "TARGET_GROUPS": " ".join(self.target_uuids),
13932 def BuildHooksNodes(self):
13933 """Build hooks nodes.
13936 mn = self.cfg.GetMasterNode()
13938 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
13940 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
13942 return (run_nodes, run_nodes)
13944 def Exec(self, feedback_fn):
13945 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13947 assert self.group_uuid not in self.target_uuids
13949 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13950 instances=instances, target_groups=self.target_uuids)
13952 ial.Run(self.op.iallocator)
13954 if not ial.success:
13955 raise errors.OpPrereqError("Can't compute group evacuation using"
13956 " iallocator '%s': %s" %
13957 (self.op.iallocator, ial.info),
13958 errors.ECODE_NORES)
13960 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13962 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
13963 len(jobs), self.op.group_name)
13965 return ResultWithJobs(jobs)
13968 class TagsLU(NoHooksLU): # pylint: disable=W0223
13969 """Generic tags LU.
13971 This is an abstract class which is the parent of all the other tags LUs.
13974 def ExpandNames(self):
13975 self.group_uuid = None
13976 self.needed_locks = {}
13977 if self.op.kind == constants.TAG_NODE:
13978 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
13979 self.needed_locks[locking.LEVEL_NODE] = self.op.name
13980 elif self.op.kind == constants.TAG_INSTANCE:
13981 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
13982 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
13983 elif self.op.kind == constants.TAG_NODEGROUP:
13984 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
13986 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
13987 # not possible to acquire the BGL based on opcode parameters)
13989 def CheckPrereq(self):
13990 """Check prerequisites.
13993 if self.op.kind == constants.TAG_CLUSTER:
13994 self.target = self.cfg.GetClusterInfo()
13995 elif self.op.kind == constants.TAG_NODE:
13996 self.target = self.cfg.GetNodeInfo(self.op.name)
13997 elif self.op.kind == constants.TAG_INSTANCE:
13998 self.target = self.cfg.GetInstanceInfo(self.op.name)
13999 elif self.op.kind == constants.TAG_NODEGROUP:
14000 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14002 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14003 str(self.op.kind), errors.ECODE_INVAL)
14006 class LUTagsGet(TagsLU):
14007 """Returns the tags of a given object.
14012 def ExpandNames(self):
14013 TagsLU.ExpandNames(self)
14015 # Share locks as this is only a read operation
14016 self.share_locks = _ShareAll()
14018 def Exec(self, feedback_fn):
14019 """Returns the tag list.
14022 return list(self.target.GetTags())
14025 class LUTagsSearch(NoHooksLU):
14026 """Searches the tags for a given pattern.
14031 def ExpandNames(self):
14032 self.needed_locks = {}
14034 def CheckPrereq(self):
14035 """Check prerequisites.
14037 This checks the pattern passed for validity by compiling it.
14041 self.re = re.compile(self.op.pattern)
14042 except re.error, err:
14043 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14044 (self.op.pattern, err), errors.ECODE_INVAL)
14046 def Exec(self, feedback_fn):
14047 """Returns the tag list.
14051 tgts = [("/cluster", cfg.GetClusterInfo())]
14052 ilist = cfg.GetAllInstancesInfo().values()
14053 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14054 nlist = cfg.GetAllNodesInfo().values()
14055 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14056 tgts.extend(("/nodegroup/%s" % n.name, n)
14057 for n in cfg.GetAllNodeGroupsInfo().values())
14059 for path, target in tgts:
14060 for tag in target.GetTags():
14061 if self.re.search(tag):
14062 results.append((path, tag))
14066 class LUTagsSet(TagsLU):
14067 """Sets a tag on a given object.
14072 def CheckPrereq(self):
14073 """Check prerequisites.
14075 This checks the type and length of the tag name and value.
14078 TagsLU.CheckPrereq(self)
14079 for tag in self.op.tags:
14080 objects.TaggableObject.ValidateTag(tag)
14082 def Exec(self, feedback_fn):
14087 for tag in self.op.tags:
14088 self.target.AddTag(tag)
14089 except errors.TagError, err:
14090 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14091 self.cfg.Update(self.target, feedback_fn)
14094 class LUTagsDel(TagsLU):
14095 """Delete a list of tags from a given object.
14100 def CheckPrereq(self):
14101 """Check prerequisites.
14103 This checks that we have the given tag.
14106 TagsLU.CheckPrereq(self)
14107 for tag in self.op.tags:
14108 objects.TaggableObject.ValidateTag(tag)
14109 del_tags = frozenset(self.op.tags)
14110 cur_tags = self.target.GetTags()
14112 diff_tags = del_tags - cur_tags
14114 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14115 raise errors.OpPrereqError("Tag(s) %s not found" %
14116 (utils.CommaJoin(diff_names), ),
14117 errors.ECODE_NOENT)
14119 def Exec(self, feedback_fn):
14120 """Remove the tag from the object.
14123 for tag in self.op.tags:
14124 self.target.RemoveTag(tag)
14125 self.cfg.Update(self.target, feedback_fn)
14128 class LUTestDelay(NoHooksLU):
14129 """Sleep for a specified amount of time.
14131 This LU sleeps on the master and/or nodes for a specified amount of
14137 def ExpandNames(self):
14138 """Expand names and set required locks.
14140 This expands the node list, if any.
14143 self.needed_locks = {}
14144 if self.op.on_nodes:
14145 # _GetWantedNodes can be used here, but is not always appropriate to use
14146 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14147 # more information.
14148 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14149 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14151 def _TestDelay(self):
14152 """Do the actual sleep.
14155 if self.op.on_master:
14156 if not utils.TestDelay(self.op.duration):
14157 raise errors.OpExecError("Error during master delay test")
14158 if self.op.on_nodes:
14159 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14160 for node, node_result in result.items():
14161 node_result.Raise("Failure during rpc call to node %s" % node)
14163 def Exec(self, feedback_fn):
14164 """Execute the test delay opcode, with the wanted repetitions.
14167 if self.op.repeat == 0:
14170 top_value = self.op.repeat - 1
14171 for i in range(self.op.repeat):
14172 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14176 class LUTestJqueue(NoHooksLU):
14177 """Utility LU to test some aspects of the job queue.
14182 # Must be lower than default timeout for WaitForJobChange to see whether it
14183 # notices changed jobs
14184 _CLIENT_CONNECT_TIMEOUT = 20.0
14185 _CLIENT_CONFIRM_TIMEOUT = 60.0
14188 def _NotifyUsingSocket(cls, cb, errcls):
14189 """Opens a Unix socket and waits for another program to connect.
14192 @param cb: Callback to send socket name to client
14193 @type errcls: class
14194 @param errcls: Exception class to use for errors
14197 # Using a temporary directory as there's no easy way to create temporary
14198 # sockets without writing a custom loop around tempfile.mktemp and
14200 tmpdir = tempfile.mkdtemp()
14202 tmpsock = utils.PathJoin(tmpdir, "sock")
14204 logging.debug("Creating temporary socket at %s", tmpsock)
14205 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14210 # Send details to client
14213 # Wait for client to connect before continuing
14214 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14216 (conn, _) = sock.accept()
14217 except socket.error, err:
14218 raise errcls("Client didn't connect in time (%s)" % err)
14222 # Remove as soon as client is connected
14223 shutil.rmtree(tmpdir)
14225 # Wait for client to close
14228 # pylint: disable=E1101
14229 # Instance of '_socketobject' has no ... member
14230 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14232 except socket.error, err:
14233 raise errcls("Client failed to confirm notification (%s)" % err)
14237 def _SendNotification(self, test, arg, sockname):
14238 """Sends a notification to the client.
14241 @param test: Test name
14242 @param arg: Test argument (depends on test)
14243 @type sockname: string
14244 @param sockname: Socket path
14247 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14249 def _Notify(self, prereq, test, arg):
14250 """Notifies the client of a test.
14253 @param prereq: Whether this is a prereq-phase test
14255 @param test: Test name
14256 @param arg: Test argument (depends on test)
14260 errcls = errors.OpPrereqError
14262 errcls = errors.OpExecError
14264 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14268 def CheckArguments(self):
14269 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14270 self.expandnames_calls = 0
14272 def ExpandNames(self):
14273 checkargs_calls = getattr(self, "checkargs_calls", 0)
14274 if checkargs_calls < 1:
14275 raise errors.ProgrammerError("CheckArguments was not called")
14277 self.expandnames_calls += 1
14279 if self.op.notify_waitlock:
14280 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14282 self.LogInfo("Expanding names")
14284 # Get lock on master node (just to get a lock, not for a particular reason)
14285 self.needed_locks = {
14286 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14289 def Exec(self, feedback_fn):
14290 if self.expandnames_calls < 1:
14291 raise errors.ProgrammerError("ExpandNames was not called")
14293 if self.op.notify_exec:
14294 self._Notify(False, constants.JQT_EXEC, None)
14296 self.LogInfo("Executing")
14298 if self.op.log_messages:
14299 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14300 for idx, msg in enumerate(self.op.log_messages):
14301 self.LogInfo("Sending log message %s", idx + 1)
14302 feedback_fn(constants.JQT_MSGPREFIX + msg)
14303 # Report how many test messages have been sent
14304 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14307 raise errors.OpExecError("Opcode failure was requested")
14312 class IAllocator(object):
14313 """IAllocator framework.
14315 An IAllocator instance has three sets of attributes:
14316 - cfg that is needed to query the cluster
14317 - input data (all members of the _KEYS class attribute are required)
14318 - four buffer attributes (in|out_data|text), that represent the
14319 input (to the external script) in text and data structure format,
14320 and the output from it, again in two formats
14321 - the result variables from the script (success, info, nodes) for
14325 # pylint: disable=R0902
14326 # lots of instance attributes
14328 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14330 self.rpc = rpc_runner
14331 # init buffer variables
14332 self.in_text = self.out_text = self.in_data = self.out_data = None
14333 # init all input fields so that pylint is happy
14335 self.memory = self.disks = self.disk_template = None
14336 self.os = self.tags = self.nics = self.vcpus = None
14337 self.hypervisor = None
14338 self.relocate_from = None
14340 self.instances = None
14341 self.evac_mode = None
14342 self.target_groups = []
14344 self.required_nodes = None
14345 # init result fields
14346 self.success = self.info = self.result = None
14349 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14351 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14352 " IAllocator" % self.mode)
14354 keyset = [n for (n, _) in keydata]
14357 if key not in keyset:
14358 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14359 " IAllocator" % key)
14360 setattr(self, key, kwargs[key])
14363 if key not in kwargs:
14364 raise errors.ProgrammerError("Missing input parameter '%s' to"
14365 " IAllocator" % key)
14366 self._BuildInputData(compat.partial(fn, self), keydata)
14368 def _ComputeClusterData(self):
14369 """Compute the generic allocator input data.
14371 This is the data that is independent of the actual operation.
14375 cluster_info = cfg.GetClusterInfo()
14378 "version": constants.IALLOCATOR_VERSION,
14379 "cluster_name": cfg.GetClusterName(),
14380 "cluster_tags": list(cluster_info.GetTags()),
14381 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14382 "ipolicy": cluster_info.ipolicy,
14384 ninfo = cfg.GetAllNodesInfo()
14385 iinfo = cfg.GetAllInstancesInfo().values()
14386 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14389 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14391 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14392 hypervisor_name = self.hypervisor
14393 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14394 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14396 hypervisor_name = cluster_info.primary_hypervisor
14398 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14401 self.rpc.call_all_instances_info(node_list,
14402 cluster_info.enabled_hypervisors)
14404 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14406 config_ndata = self._ComputeBasicNodeData(ninfo)
14407 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14408 i_list, config_ndata)
14409 assert len(data["nodes"]) == len(ninfo), \
14410 "Incomplete node data computed"
14412 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14414 self.in_data = data
14417 def _ComputeNodeGroupData(cfg):
14418 """Compute node groups data.
14421 cluster = cfg.GetClusterInfo()
14422 ng = dict((guuid, {
14423 "name": gdata.name,
14424 "alloc_policy": gdata.alloc_policy,
14425 "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14427 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14432 def _ComputeBasicNodeData(node_cfg):
14433 """Compute global node data.
14436 @returns: a dict of name: (node dict, node config)
14439 # fill in static (config-based) values
14440 node_results = dict((ninfo.name, {
14441 "tags": list(ninfo.GetTags()),
14442 "primary_ip": ninfo.primary_ip,
14443 "secondary_ip": ninfo.secondary_ip,
14444 "offline": ninfo.offline,
14445 "drained": ninfo.drained,
14446 "master_candidate": ninfo.master_candidate,
14447 "group": ninfo.group,
14448 "master_capable": ninfo.master_capable,
14449 "vm_capable": ninfo.vm_capable,
14451 for ninfo in node_cfg.values())
14453 return node_results
14456 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14458 """Compute global node data.
14460 @param node_results: the basic node structures as filled from the config
14463 #TODO(dynmem): compute the right data on MAX and MIN memory
14464 # make a copy of the current dict
14465 node_results = dict(node_results)
14466 for nname, nresult in node_data.items():
14467 assert nname in node_results, "Missing basic data for node %s" % nname
14468 ninfo = node_cfg[nname]
14470 if not (ninfo.offline or ninfo.drained):
14471 nresult.Raise("Can't get data for node %s" % nname)
14472 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14474 remote_info = _MakeLegacyNodeInfo(nresult.payload)
14476 for attr in ["memory_total", "memory_free", "memory_dom0",
14477 "vg_size", "vg_free", "cpu_total"]:
14478 if attr not in remote_info:
14479 raise errors.OpExecError("Node '%s' didn't return attribute"
14480 " '%s'" % (nname, attr))
14481 if not isinstance(remote_info[attr], int):
14482 raise errors.OpExecError("Node '%s' returned invalid value"
14484 (nname, attr, remote_info[attr]))
14485 # compute memory used by primary instances
14486 i_p_mem = i_p_up_mem = 0
14487 for iinfo, beinfo in i_list:
14488 if iinfo.primary_node == nname:
14489 i_p_mem += beinfo[constants.BE_MAXMEM]
14490 if iinfo.name not in node_iinfo[nname].payload:
14493 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14494 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14495 remote_info["memory_free"] -= max(0, i_mem_diff)
14497 if iinfo.admin_state == constants.ADMINST_UP:
14498 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14500 # compute memory used by instances
14502 "total_memory": remote_info["memory_total"],
14503 "reserved_memory": remote_info["memory_dom0"],
14504 "free_memory": remote_info["memory_free"],
14505 "total_disk": remote_info["vg_size"],
14506 "free_disk": remote_info["vg_free"],
14507 "total_cpus": remote_info["cpu_total"],
14508 "i_pri_memory": i_p_mem,
14509 "i_pri_up_memory": i_p_up_mem,
14511 pnr_dyn.update(node_results[nname])
14512 node_results[nname] = pnr_dyn
14514 return node_results
14517 def _ComputeInstanceData(cluster_info, i_list):
14518 """Compute global instance data.
14522 for iinfo, beinfo in i_list:
14524 for nic in iinfo.nics:
14525 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14529 "mode": filled_params[constants.NIC_MODE],
14530 "link": filled_params[constants.NIC_LINK],
14532 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14533 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14534 nic_data.append(nic_dict)
14536 "tags": list(iinfo.GetTags()),
14537 "admin_state": iinfo.admin_state,
14538 "vcpus": beinfo[constants.BE_VCPUS],
14539 "memory": beinfo[constants.BE_MAXMEM],
14541 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14543 "disks": [{constants.IDISK_SIZE: dsk.size,
14544 constants.IDISK_MODE: dsk.mode}
14545 for dsk in iinfo.disks],
14546 "disk_template": iinfo.disk_template,
14547 "hypervisor": iinfo.hypervisor,
14549 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14551 instance_data[iinfo.name] = pir
14553 return instance_data
14555 def _AddNewInstance(self):
14556 """Add new instance data to allocator structure.
14558 This in combination with _AllocatorGetClusterData will create the
14559 correct structure needed as input for the allocator.
14561 The checks for the completeness of the opcode must have already been
14565 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14567 if self.disk_template in constants.DTS_INT_MIRROR:
14568 self.required_nodes = 2
14570 self.required_nodes = 1
14574 "disk_template": self.disk_template,
14577 "vcpus": self.vcpus,
14578 "memory": self.memory,
14579 "disks": self.disks,
14580 "disk_space_total": disk_space,
14582 "required_nodes": self.required_nodes,
14583 "hypervisor": self.hypervisor,
14588 def _AddRelocateInstance(self):
14589 """Add relocate instance data to allocator structure.
14591 This in combination with _IAllocatorGetClusterData will create the
14592 correct structure needed as input for the allocator.
14594 The checks for the completeness of the opcode must have already been
14598 instance = self.cfg.GetInstanceInfo(self.name)
14599 if instance is None:
14600 raise errors.ProgrammerError("Unknown instance '%s' passed to"
14601 " IAllocator" % self.name)
14603 if instance.disk_template not in constants.DTS_MIRRORED:
14604 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14605 errors.ECODE_INVAL)
14607 if instance.disk_template in constants.DTS_INT_MIRROR and \
14608 len(instance.secondary_nodes) != 1:
14609 raise errors.OpPrereqError("Instance has not exactly one secondary node",
14610 errors.ECODE_STATE)
14612 self.required_nodes = 1
14613 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14614 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14618 "disk_space_total": disk_space,
14619 "required_nodes": self.required_nodes,
14620 "relocate_from": self.relocate_from,
14624 def _AddNodeEvacuate(self):
14625 """Get data for node-evacuate requests.
14629 "instances": self.instances,
14630 "evac_mode": self.evac_mode,
14633 def _AddChangeGroup(self):
14634 """Get data for node-evacuate requests.
14638 "instances": self.instances,
14639 "target_groups": self.target_groups,
14642 def _BuildInputData(self, fn, keydata):
14643 """Build input data structures.
14646 self._ComputeClusterData()
14649 request["type"] = self.mode
14650 for keyname, keytype in keydata:
14651 if keyname not in request:
14652 raise errors.ProgrammerError("Request parameter %s is missing" %
14654 val = request[keyname]
14655 if not keytype(val):
14656 raise errors.ProgrammerError("Request parameter %s doesn't pass"
14657 " validation, value %s, expected"
14658 " type %s" % (keyname, val, keytype))
14659 self.in_data["request"] = request
14661 self.in_text = serializer.Dump(self.in_data)
14663 _STRING_LIST = ht.TListOf(ht.TString)
14664 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14665 # pylint: disable=E1101
14666 # Class '...' has no 'OP_ID' member
14667 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14668 opcodes.OpInstanceMigrate.OP_ID,
14669 opcodes.OpInstanceReplaceDisks.OP_ID])
14673 ht.TListOf(ht.TAnd(ht.TIsLength(3),
14674 ht.TItems([ht.TNonEmptyString,
14675 ht.TNonEmptyString,
14676 ht.TListOf(ht.TNonEmptyString),
14679 ht.TListOf(ht.TAnd(ht.TIsLength(2),
14680 ht.TItems([ht.TNonEmptyString,
14683 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14684 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14687 constants.IALLOCATOR_MODE_ALLOC:
14690 ("name", ht.TString),
14691 ("memory", ht.TInt),
14692 ("disks", ht.TListOf(ht.TDict)),
14693 ("disk_template", ht.TString),
14694 ("os", ht.TString),
14695 ("tags", _STRING_LIST),
14696 ("nics", ht.TListOf(ht.TDict)),
14697 ("vcpus", ht.TInt),
14698 ("hypervisor", ht.TString),
14700 constants.IALLOCATOR_MODE_RELOC:
14701 (_AddRelocateInstance,
14702 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14704 constants.IALLOCATOR_MODE_NODE_EVAC:
14705 (_AddNodeEvacuate, [
14706 ("instances", _STRING_LIST),
14707 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14709 constants.IALLOCATOR_MODE_CHG_GROUP:
14710 (_AddChangeGroup, [
14711 ("instances", _STRING_LIST),
14712 ("target_groups", _STRING_LIST),
14716 def Run(self, name, validate=True, call_fn=None):
14717 """Run an instance allocator and return the results.
14720 if call_fn is None:
14721 call_fn = self.rpc.call_iallocator_runner
14723 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14724 result.Raise("Failure while running the iallocator script")
14726 self.out_text = result.payload
14728 self._ValidateResult()
14730 def _ValidateResult(self):
14731 """Process the allocator results.
14733 This will process and if successful save the result in
14734 self.out_data and the other parameters.
14738 rdict = serializer.Load(self.out_text)
14739 except Exception, err:
14740 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14742 if not isinstance(rdict, dict):
14743 raise errors.OpExecError("Can't parse iallocator results: not a dict")
14745 # TODO: remove backwards compatiblity in later versions
14746 if "nodes" in rdict and "result" not in rdict:
14747 rdict["result"] = rdict["nodes"]
14750 for key in "success", "info", "result":
14751 if key not in rdict:
14752 raise errors.OpExecError("Can't parse iallocator results:"
14753 " missing key '%s'" % key)
14754 setattr(self, key, rdict[key])
14756 if not self._result_check(self.result):
14757 raise errors.OpExecError("Iallocator returned invalid result,"
14758 " expected %s, got %s" %
14759 (self._result_check, self.result),
14760 errors.ECODE_INVAL)
14762 if self.mode == constants.IALLOCATOR_MODE_RELOC:
14763 assert self.relocate_from is not None
14764 assert self.required_nodes == 1
14766 node2group = dict((name, ndata["group"])
14767 for (name, ndata) in self.in_data["nodes"].items())
14769 fn = compat.partial(self._NodesToGroups, node2group,
14770 self.in_data["nodegroups"])
14772 instance = self.cfg.GetInstanceInfo(self.name)
14773 request_groups = fn(self.relocate_from + [instance.primary_node])
14774 result_groups = fn(rdict["result"] + [instance.primary_node])
14776 if self.success and not set(result_groups).issubset(request_groups):
14777 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14778 " differ from original groups (%s)" %
14779 (utils.CommaJoin(result_groups),
14780 utils.CommaJoin(request_groups)))
14782 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14783 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14785 self.out_data = rdict
14788 def _NodesToGroups(node2group, groups, nodes):
14789 """Returns a list of unique group names for a list of nodes.
14791 @type node2group: dict
14792 @param node2group: Map from node name to group UUID
14794 @param groups: Group information
14796 @param nodes: Node names
14803 group_uuid = node2group[node]
14805 # Ignore unknown node
14809 group = groups[group_uuid]
14811 # Can't find group, let's use UUID
14812 group_name = group_uuid
14814 group_name = group["name"]
14816 result.add(group_name)
14818 return sorted(result)
14821 class LUTestAllocator(NoHooksLU):
14822 """Run allocator tests.
14824 This LU runs the allocator tests
14827 def CheckPrereq(self):
14828 """Check prerequisites.
14830 This checks the opcode parameters depending on the director and mode test.
14833 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14834 for attr in ["memory", "disks", "disk_template",
14835 "os", "tags", "nics", "vcpus"]:
14836 if not hasattr(self.op, attr):
14837 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14838 attr, errors.ECODE_INVAL)
14839 iname = self.cfg.ExpandInstanceName(self.op.name)
14840 if iname is not None:
14841 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14842 iname, errors.ECODE_EXISTS)
14843 if not isinstance(self.op.nics, list):
14844 raise errors.OpPrereqError("Invalid parameter 'nics'",
14845 errors.ECODE_INVAL)
14846 if not isinstance(self.op.disks, list):
14847 raise errors.OpPrereqError("Invalid parameter 'disks'",
14848 errors.ECODE_INVAL)
14849 for row in self.op.disks:
14850 if (not isinstance(row, dict) or
14851 constants.IDISK_SIZE not in row or
14852 not isinstance(row[constants.IDISK_SIZE], int) or
14853 constants.IDISK_MODE not in row or
14854 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
14855 raise errors.OpPrereqError("Invalid contents of the 'disks'"
14856 " parameter", errors.ECODE_INVAL)
14857 if self.op.hypervisor is None:
14858 self.op.hypervisor = self.cfg.GetHypervisorType()
14859 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14860 fname = _ExpandInstanceName(self.cfg, self.op.name)
14861 self.op.name = fname
14862 self.relocate_from = \
14863 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
14864 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
14865 constants.IALLOCATOR_MODE_NODE_EVAC):
14866 if not self.op.instances:
14867 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
14868 self.op.instances = _GetWantedInstances(self, self.op.instances)
14870 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
14871 self.op.mode, errors.ECODE_INVAL)
14873 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
14874 if self.op.allocator is None:
14875 raise errors.OpPrereqError("Missing allocator name",
14876 errors.ECODE_INVAL)
14877 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
14878 raise errors.OpPrereqError("Wrong allocator test '%s'" %
14879 self.op.direction, errors.ECODE_INVAL)
14881 def Exec(self, feedback_fn):
14882 """Run the allocator test.
14885 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14886 ial = IAllocator(self.cfg, self.rpc,
14889 memory=self.op.memory,
14890 disks=self.op.disks,
14891 disk_template=self.op.disk_template,
14895 vcpus=self.op.vcpus,
14896 hypervisor=self.op.hypervisor,
14898 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14899 ial = IAllocator(self.cfg, self.rpc,
14902 relocate_from=list(self.relocate_from),
14904 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14905 ial = IAllocator(self.cfg, self.rpc,
14907 instances=self.op.instances,
14908 target_groups=self.op.target_groups)
14909 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14910 ial = IAllocator(self.cfg, self.rpc,
14912 instances=self.op.instances,
14913 evac_mode=self.op.evac_mode)
14915 raise errors.ProgrammerError("Uncatched mode %s in"
14916 " LUTestAllocator.Exec", self.op.mode)
14918 if self.op.direction == constants.IALLOCATOR_DIR_IN:
14919 result = ial.in_text
14921 ial.Run(self.op.allocator, validate=False)
14922 result = ial.out_text
14926 #: Query type implementations
14928 constants.QR_INSTANCE: _InstanceQuery,
14929 constants.QR_NODE: _NodeQuery,
14930 constants.QR_GROUP: _GroupQuery,
14931 constants.QR_OS: _OsQuery,
14934 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14937 def _GetQueryImplementation(name):
14938 """Returns the implemtnation for a query type.
14940 @param name: Query type, must be one of L{constants.QR_VIA_OP}
14944 return _QUERY_IMPL[name]
14946 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14947 errors.ECODE_INVAL)