4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
62 from ganeti import rpc
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
72 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
74 #: Instance status in which an instance can be marked as offline/online
75 CAN_CHANGE_INSTANCE_OFFLINE = [
76 constants.ADMINST_DOWN,
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
141 # Dicts used to declare locking needs to mcpu
142 self.needed_locks = None
143 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.remove_locks = {}
146 # Used to force good behavior when calling helper functions
147 self.recalculate_locks = {}
149 self.Log = processor.Log # pylint: disable=C0103
150 self.LogWarning = processor.LogWarning # pylint: disable=C0103
151 self.LogInfo = processor.LogInfo # pylint: disable=C0103
152 self.LogStep = processor.LogStep # pylint: disable=C0103
153 # support for dry-run
154 self.dry_run_result = None
155 # support for generic debug attribute
156 if (not hasattr(self.op, "debug_level") or
157 not isinstance(self.op.debug_level, int)):
158 self.op.debug_level = 0
163 # Validate opcode parameters and set defaults
164 self.op.Validate(True)
166 self.CheckArguments()
168 def CheckArguments(self):
169 """Check syntactic validity for the opcode arguments.
171 This method is for doing a simple syntactic check and ensure
172 validity of opcode parameters, without any cluster-related
173 checks. While the same can be accomplished in ExpandNames and/or
174 CheckPrereq, doing these separate is better because:
176 - ExpandNames is left as as purely a lock-related function
177 - CheckPrereq is run after we have acquired locks (and possible
180 The function is allowed to change the self.op attribute so that
181 later methods can no longer worry about missing parameters.
186 def ExpandNames(self):
187 """Expand names for this LU.
189 This method is called before starting to execute the opcode, and it should
190 update all the parameters of the opcode to their canonical form (e.g. a
191 short node name must be fully expanded after this method has successfully
192 completed). This way locking, hooks, logging, etc. can work correctly.
194 LUs which implement this method must also populate the self.needed_locks
195 member, as a dict with lock levels as keys, and a list of needed lock names
198 - use an empty dict if you don't need any lock
199 - if you don't need any lock at a particular level omit that level
200 - don't put anything for the BGL level
201 - if you want all locks at a level use locking.ALL_SET as a value
203 If you need to share locks (rather than acquire them exclusively) at one
204 level you can modify self.share_locks, setting a true value (usually 1) for
205 that level. By default locks are not shared.
207 This function can also define a list of tasklets, which then will be
208 executed in order instead of the usual LU-level CheckPrereq and Exec
209 functions, if those are not defined by the LU.
213 # Acquire all nodes and one instance
214 self.needed_locks = {
215 locking.LEVEL_NODE: locking.ALL_SET,
216 locking.LEVEL_INSTANCE: ['instance1.example.com'],
218 # Acquire just two nodes
219 self.needed_locks = {
220 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
223 self.needed_locks = {} # No, you can't leave it to the default value None
226 # The implementation of this method is mandatory only if the new LU is
227 # concurrent, so that old LUs don't need to be changed all at the same
230 self.needed_locks = {} # Exclusive LUs don't need locks.
232 raise NotImplementedError
234 def DeclareLocks(self, level):
235 """Declare LU locking needs for a level
237 While most LUs can just declare their locking needs at ExpandNames time,
238 sometimes there's the need to calculate some locks after having acquired
239 the ones before. This function is called just before acquiring locks at a
240 particular level, but after acquiring the ones at lower levels, and permits
241 such calculations. It can be used to modify self.needed_locks, and by
242 default it does nothing.
244 This function is only called if you have something already set in
245 self.needed_locks for the level.
247 @param level: Locking level which is going to be locked
248 @type level: member of ganeti.locking.LEVELS
252 def CheckPrereq(self):
253 """Check prerequisites for this LU.
255 This method should check that the prerequisites for the execution
256 of this LU are fulfilled. It can do internode communication, but
257 it should be idempotent - no cluster or system changes are
260 The method should raise errors.OpPrereqError in case something is
261 not fulfilled. Its return value is ignored.
263 This method should also update all the parameters of the opcode to
264 their canonical form if it hasn't been done by ExpandNames before.
267 if self.tasklets is not None:
268 for (idx, tl) in enumerate(self.tasklets):
269 logging.debug("Checking prerequisites for tasklet %s/%s",
270 idx + 1, len(self.tasklets))
275 def Exec(self, feedback_fn):
278 This method should implement the actual work. It should raise
279 errors.OpExecError for failures that are somewhat dealt with in
283 if self.tasklets is not None:
284 for (idx, tl) in enumerate(self.tasklets):
285 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
288 raise NotImplementedError
290 def BuildHooksEnv(self):
291 """Build hooks environment for this LU.
294 @return: Dictionary containing the environment that will be used for
295 running the hooks for this LU. The keys of the dict must not be prefixed
296 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
297 will extend the environment with additional variables. If no environment
298 should be defined, an empty dictionary should be returned (not C{None}).
299 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
303 raise NotImplementedError
305 def BuildHooksNodes(self):
306 """Build list of nodes to run LU's hooks.
308 @rtype: tuple; (list, list)
309 @return: Tuple containing a list of node names on which the hook
310 should run before the execution and a list of node names on which the
311 hook should run after the execution. No nodes should be returned as an
312 empty list (and not None).
313 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
317 raise NotImplementedError
319 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
320 """Notify the LU about the results of its hooks.
322 This method is called every time a hooks phase is executed, and notifies
323 the Logical Unit about the hooks' result. The LU can then use it to alter
324 its result based on the hooks. By default the method does nothing and the
325 previous result is passed back unchanged but any LU can define it if it
326 wants to use the local cluster hook-scripts somehow.
328 @param phase: one of L{constants.HOOKS_PHASE_POST} or
329 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
330 @param hook_results: the results of the multi-node hooks rpc call
331 @param feedback_fn: function used send feedback back to the caller
332 @param lu_result: the previous Exec result this LU had, or None
334 @return: the new Exec result, based on the previous result
338 # API must be kept, thus we ignore the unused argument and could
339 # be a function warnings
340 # pylint: disable=W0613,R0201
343 def _ExpandAndLockInstance(self):
344 """Helper function to expand and lock an instance.
346 Many LUs that work on an instance take its name in self.op.instance_name
347 and need to expand it and then declare the expanded name for locking. This
348 function does it, and then updates self.op.instance_name to the expanded
349 name. It also initializes needed_locks as a dict, if this hasn't been done
353 if self.needed_locks is None:
354 self.needed_locks = {}
356 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
357 "_ExpandAndLockInstance called with instance-level locks set"
358 self.op.instance_name = _ExpandInstanceName(self.cfg,
359 self.op.instance_name)
360 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
362 def _LockInstancesNodes(self, primary_only=False,
363 level=locking.LEVEL_NODE):
364 """Helper function to declare instances' nodes for locking.
366 This function should be called after locking one or more instances to lock
367 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
368 with all primary or secondary nodes for instances already locked and
369 present in self.needed_locks[locking.LEVEL_INSTANCE].
371 It should be called from DeclareLocks, and for safety only works if
372 self.recalculate_locks[locking.LEVEL_NODE] is set.
374 In the future it may grow parameters to just lock some instance's nodes, or
375 to just lock primaries or secondary nodes, if needed.
377 If should be called in DeclareLocks in a way similar to::
379 if level == locking.LEVEL_NODE:
380 self._LockInstancesNodes()
382 @type primary_only: boolean
383 @param primary_only: only lock primary nodes of locked instances
384 @param level: Which lock level to use for locking nodes
387 assert level in self.recalculate_locks, \
388 "_LockInstancesNodes helper function called with no nodes to recalculate"
390 # TODO: check if we're really been called with the instance locks held
392 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
393 # future we might want to have different behaviors depending on the value
394 # of self.recalculate_locks[locking.LEVEL_NODE]
396 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
397 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
398 wanted_nodes.append(instance.primary_node)
400 wanted_nodes.extend(instance.secondary_nodes)
402 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
403 self.needed_locks[level] = wanted_nodes
404 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
405 self.needed_locks[level].extend(wanted_nodes)
407 raise errors.ProgrammerError("Unknown recalculation mode")
409 del self.recalculate_locks[level]
412 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
413 """Simple LU which runs no hooks.
415 This LU is intended as a parent for other LogicalUnits which will
416 run no hooks, in order to reduce duplicate code.
422 def BuildHooksEnv(self):
423 """Empty BuildHooksEnv for NoHooksLu.
425 This just raises an error.
428 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
430 def BuildHooksNodes(self):
431 """Empty BuildHooksNodes for NoHooksLU.
434 raise AssertionError("BuildHooksNodes called for NoHooksLU")
438 """Tasklet base class.
440 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
441 they can mix legacy code with tasklets. Locking needs to be done in the LU,
442 tasklets know nothing about locks.
444 Subclasses must follow these rules:
445 - Implement CheckPrereq
449 def __init__(self, lu):
456 def CheckPrereq(self):
457 """Check prerequisites for this tasklets.
459 This method should check whether the prerequisites for the execution of
460 this tasklet are fulfilled. It can do internode communication, but it
461 should be idempotent - no cluster or system changes are allowed.
463 The method should raise errors.OpPrereqError in case something is not
464 fulfilled. Its return value is ignored.
466 This method should also update all parameters to their canonical form if it
467 hasn't been done before.
472 def Exec(self, feedback_fn):
473 """Execute the tasklet.
475 This method should implement the actual work. It should raise
476 errors.OpExecError for failures that are somewhat dealt with in code, or
480 raise NotImplementedError
484 """Base for query utility classes.
487 #: Attribute holding field definitions
490 def __init__(self, qfilter, fields, use_locking):
491 """Initializes this class.
494 self.use_locking = use_locking
496 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
498 self.requested_data = self.query.RequestedData()
499 self.names = self.query.RequestedNames()
501 # Sort only if no names were requested
502 self.sort_by_name = not self.names
504 self.do_locking = None
507 def _GetNames(self, lu, all_names, lock_level):
508 """Helper function to determine names asked for in the query.
512 names = lu.owned_locks(lock_level)
516 if self.wanted == locking.ALL_SET:
517 assert not self.names
518 # caller didn't specify names, so ordering is not important
519 return utils.NiceSort(names)
521 # caller specified names and we must keep the same order
523 assert not self.do_locking or lu.glm.is_owned(lock_level)
525 missing = set(self.wanted).difference(names)
527 raise errors.OpExecError("Some items were removed before retrieving"
528 " their data: %s" % missing)
530 # Return expanded names
533 def ExpandNames(self, lu):
534 """Expand names for this query.
536 See L{LogicalUnit.ExpandNames}.
539 raise NotImplementedError()
541 def DeclareLocks(self, lu, level):
542 """Declare locks for this query.
544 See L{LogicalUnit.DeclareLocks}.
547 raise NotImplementedError()
549 def _GetQueryData(self, lu):
550 """Collects all data for this query.
552 @return: Query data object
555 raise NotImplementedError()
557 def NewStyleQuery(self, lu):
558 """Collect data and execute query.
561 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
562 sort_by_name=self.sort_by_name)
564 def OldStyleQuery(self, lu):
565 """Collect data and execute query.
568 return self.query.OldStyleQuery(self._GetQueryData(lu),
569 sort_by_name=self.sort_by_name)
573 """Returns a dict declaring all lock levels shared.
576 return dict.fromkeys(locking.LEVELS, 1)
579 def _MakeLegacyNodeInfo(data):
580 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
582 Converts the data into a single dictionary. This is fine for most use cases,
583 but some require information from more than one volume group or hypervisor.
586 (bootid, (vg_info, ), (hv_info, )) = data
588 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
593 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
594 """Checks if the owned node groups are still correct for an instance.
596 @type cfg: L{config.ConfigWriter}
597 @param cfg: The cluster configuration
598 @type instance_name: string
599 @param instance_name: Instance name
600 @type owned_groups: set or frozenset
601 @param owned_groups: List of currently owned node groups
604 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
606 if not owned_groups.issuperset(inst_groups):
607 raise errors.OpPrereqError("Instance %s's node groups changed since"
608 " locks were acquired, current groups are"
609 " are '%s', owning groups '%s'; retry the"
612 utils.CommaJoin(inst_groups),
613 utils.CommaJoin(owned_groups)),
619 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
620 """Checks if the instances in a node group are still correct.
622 @type cfg: L{config.ConfigWriter}
623 @param cfg: The cluster configuration
624 @type group_uuid: string
625 @param group_uuid: Node group UUID
626 @type owned_instances: set or frozenset
627 @param owned_instances: List of currently owned instances
630 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
631 if owned_instances != wanted_instances:
632 raise errors.OpPrereqError("Instances in node group '%s' changed since"
633 " locks were acquired, wanted '%s', have '%s';"
634 " retry the operation" %
636 utils.CommaJoin(wanted_instances),
637 utils.CommaJoin(owned_instances)),
640 return wanted_instances
643 def _SupportsOob(cfg, node):
644 """Tells if node supports OOB.
646 @type cfg: L{config.ConfigWriter}
647 @param cfg: The cluster configuration
648 @type node: L{objects.Node}
649 @param node: The node
650 @return: The OOB script if supported or an empty string otherwise
653 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
656 def _GetWantedNodes(lu, nodes):
657 """Returns list of checked and expanded node names.
659 @type lu: L{LogicalUnit}
660 @param lu: the logical unit on whose behalf we execute
662 @param nodes: list of node names or None for all nodes
664 @return: the list of nodes, sorted
665 @raise errors.ProgrammerError: if the nodes parameter is wrong type
669 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
671 return utils.NiceSort(lu.cfg.GetNodeList())
674 def _GetWantedInstances(lu, instances):
675 """Returns list of checked and expanded instance names.
677 @type lu: L{LogicalUnit}
678 @param lu: the logical unit on whose behalf we execute
679 @type instances: list
680 @param instances: list of instance names or None for all instances
682 @return: the list of instances, sorted
683 @raise errors.OpPrereqError: if the instances parameter is wrong type
684 @raise errors.OpPrereqError: if any of the passed instances is not found
688 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
690 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
694 def _GetUpdatedParams(old_params, update_dict,
695 use_default=True, use_none=False):
696 """Return the new version of a parameter dictionary.
698 @type old_params: dict
699 @param old_params: old parameters
700 @type update_dict: dict
701 @param update_dict: dict containing new parameter values, or
702 constants.VALUE_DEFAULT to reset the parameter to its default
704 @param use_default: boolean
705 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
706 values as 'to be deleted' values
707 @param use_none: boolean
708 @type use_none: whether to recognise C{None} values as 'to be
711 @return: the new parameter dictionary
714 params_copy = copy.deepcopy(old_params)
715 for key, val in update_dict.iteritems():
716 if ((use_default and val == constants.VALUE_DEFAULT) or
717 (use_none and val is None)):
723 params_copy[key] = val
727 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
728 """Return the new version of a instance policy.
730 @param group_policy: whether this policy applies to a group and thus
731 we should support removal of policy entries
734 use_none = use_default = group_policy
735 ipolicy = copy.deepcopy(old_ipolicy)
736 for key, value in new_ipolicy.items():
737 if key not in constants.IPOLICY_ALL_KEYS:
738 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
740 if key in constants.IPOLICY_ISPECS:
741 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
742 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
744 use_default=use_default)
746 if not value or value == [constants.VALUE_DEFAULT]:
750 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
751 " on the cluster'" % key,
754 if key in constants.IPOLICY_PARAMETERS:
755 # FIXME: we assume all such values are float
757 ipolicy[key] = float(value)
758 except (TypeError, ValueError), err:
759 raise errors.OpPrereqError("Invalid value for attribute"
760 " '%s': '%s', error: %s" %
761 (key, value, err), errors.ECODE_INVAL)
763 # FIXME: we assume all others are lists; this should be redone
765 ipolicy[key] = list(value)
767 objects.InstancePolicy.CheckParameterSyntax(ipolicy)
768 except errors.ConfigurationError, err:
769 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
774 def _UpdateAndVerifySubDict(base, updates, type_check):
775 """Updates and verifies a dict with sub dicts of the same type.
777 @param base: The dict with the old data
778 @param updates: The dict with the new data
779 @param type_check: Dict suitable to ForceDictType to verify correct types
780 @returns: A new dict with updated and verified values
784 new = _GetUpdatedParams(old, value)
785 utils.ForceDictType(new, type_check)
788 ret = copy.deepcopy(base)
789 ret.update(dict((key, fn(base.get(key, {}), value))
790 for key, value in updates.items()))
794 def _MergeAndVerifyHvState(op_input, obj_input):
795 """Combines the hv state from an opcode with the one of the object
797 @param op_input: The input dict from the opcode
798 @param obj_input: The input dict from the objects
799 @return: The verified and updated dict
803 invalid_hvs = set(op_input) - constants.HYPER_TYPES
805 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
806 " %s" % utils.CommaJoin(invalid_hvs),
808 if obj_input is None:
810 type_check = constants.HVSTS_PARAMETER_TYPES
811 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
816 def _MergeAndVerifyDiskState(op_input, obj_input):
817 """Combines the disk state from an opcode with the one of the object
819 @param op_input: The input dict from the opcode
820 @param obj_input: The input dict from the objects
821 @return: The verified and updated dict
824 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
826 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
827 utils.CommaJoin(invalid_dst),
829 type_check = constants.DSS_PARAMETER_TYPES
830 if obj_input is None:
832 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
834 for key, value in op_input.items())
839 def _ReleaseLocks(lu, level, names=None, keep=None):
840 """Releases locks owned by an LU.
842 @type lu: L{LogicalUnit}
843 @param level: Lock level
844 @type names: list or None
845 @param names: Names of locks to release
846 @type keep: list or None
847 @param keep: Names of locks to retain
850 assert not (keep is not None and names is not None), \
851 "Only one of the 'names' and the 'keep' parameters can be given"
853 if names is not None:
854 should_release = names.__contains__
856 should_release = lambda name: name not in keep
858 should_release = None
860 owned = lu.owned_locks(level)
862 # Not owning any lock at this level, do nothing
869 # Determine which locks to release
871 if should_release(name):
876 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
878 # Release just some locks
879 lu.glm.release(level, names=release)
881 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
884 lu.glm.release(level)
886 assert not lu.glm.is_owned(level), "No locks should be owned"
889 def _MapInstanceDisksToNodes(instances):
890 """Creates a map from (node, volume) to instance name.
892 @type instances: list of L{objects.Instance}
893 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
896 return dict(((node, vol), inst.name)
897 for inst in instances
898 for (node, vols) in inst.MapLVsByNode().items()
902 def _RunPostHook(lu, node_name):
903 """Runs the post-hook for an opcode on a single node.
906 hm = lu.proc.BuildHooksManager(lu)
908 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
910 # pylint: disable=W0702
911 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
914 def _CheckOutputFields(static, dynamic, selected):
915 """Checks whether all selected fields are valid.
917 @type static: L{utils.FieldSet}
918 @param static: static fields set
919 @type dynamic: L{utils.FieldSet}
920 @param dynamic: dynamic fields set
927 delta = f.NonMatching(selected)
929 raise errors.OpPrereqError("Unknown output fields selected: %s"
930 % ",".join(delta), errors.ECODE_INVAL)
933 def _CheckGlobalHvParams(params):
934 """Validates that given hypervisor params are not global ones.
936 This will ensure that instances don't get customised versions of
940 used_globals = constants.HVC_GLOBALS.intersection(params)
942 msg = ("The following hypervisor parameters are global and cannot"
943 " be customized at instance level, please modify them at"
944 " cluster level: %s" % utils.CommaJoin(used_globals))
945 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
948 def _CheckNodeOnline(lu, node, msg=None):
949 """Ensure that a given node is online.
951 @param lu: the LU on behalf of which we make the check
952 @param node: the node to check
953 @param msg: if passed, should be a message to replace the default one
954 @raise errors.OpPrereqError: if the node is offline
958 msg = "Can't use offline node"
959 if lu.cfg.GetNodeInfo(node).offline:
960 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
963 def _CheckNodeNotDrained(lu, node):
964 """Ensure that a given node is not drained.
966 @param lu: the LU on behalf of which we make the check
967 @param node: the node to check
968 @raise errors.OpPrereqError: if the node is drained
971 if lu.cfg.GetNodeInfo(node).drained:
972 raise errors.OpPrereqError("Can't use drained node %s" % node,
976 def _CheckNodeVmCapable(lu, node):
977 """Ensure that a given node is vm capable.
979 @param lu: the LU on behalf of which we make the check
980 @param node: the node to check
981 @raise errors.OpPrereqError: if the node is not vm capable
984 if not lu.cfg.GetNodeInfo(node).vm_capable:
985 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
989 def _CheckNodeHasOS(lu, node, os_name, force_variant):
990 """Ensure that a node supports a given OS.
992 @param lu: the LU on behalf of which we make the check
993 @param node: the node to check
994 @param os_name: the OS to query about
995 @param force_variant: whether to ignore variant errors
996 @raise errors.OpPrereqError: if the node is not supporting the OS
999 result = lu.rpc.call_os_get(node, os_name)
1000 result.Raise("OS '%s' not in supported OS list for node %s" %
1002 prereq=True, ecode=errors.ECODE_INVAL)
1003 if not force_variant:
1004 _CheckOSVariant(result.payload, os_name)
1007 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1008 """Ensure that a node has the given secondary ip.
1010 @type lu: L{LogicalUnit}
1011 @param lu: the LU on behalf of which we make the check
1013 @param node: the node to check
1014 @type secondary_ip: string
1015 @param secondary_ip: the ip to check
1016 @type prereq: boolean
1017 @param prereq: whether to throw a prerequisite or an execute error
1018 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1019 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1022 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1023 result.Raise("Failure checking secondary ip on node %s" % node,
1024 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1025 if not result.payload:
1026 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1027 " please fix and re-run this command" % secondary_ip)
1029 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1031 raise errors.OpExecError(msg)
1034 def _GetClusterDomainSecret():
1035 """Reads the cluster domain secret.
1038 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1042 def _CheckInstanceState(lu, instance, req_states, msg=None):
1043 """Ensure that an instance is in one of the required states.
1045 @param lu: the LU on behalf of which we make the check
1046 @param instance: the instance to check
1047 @param msg: if passed, should be a message to replace the default one
1048 @raise errors.OpPrereqError: if the instance is not in the required state
1052 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1053 if instance.admin_state not in req_states:
1054 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1055 (instance.name, instance.admin_state, msg),
1058 if constants.ADMINST_UP not in req_states:
1059 pnode = instance.primary_node
1060 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1061 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1062 prereq=True, ecode=errors.ECODE_ENVIRON)
1064 if instance.name in ins_l.payload:
1065 raise errors.OpPrereqError("Instance %s is running, %s" %
1066 (instance.name, msg), errors.ECODE_STATE)
1069 def _ComputeMinMaxSpec(name, ipolicy, value):
1070 """Computes if value is in the desired range.
1072 @param name: name of the parameter for which we perform the check
1073 @param ipolicy: dictionary containing min, max and std values
1074 @param value: actual value that we want to use
1075 @return: None or element not meeting the criteria
1079 if value in [None, constants.VALUE_AUTO]:
1081 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1082 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1083 if value > max_v or min_v > value:
1084 return ("%s value %s is not in range [%s, %s]" %
1085 (name, value, min_v, max_v))
1089 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1090 nic_count, disk_sizes,
1091 _compute_fn=_ComputeMinMaxSpec):
1092 """Verifies ipolicy against provided specs.
1095 @param ipolicy: The ipolicy
1097 @param mem_size: The memory size
1098 @type cpu_count: int
1099 @param cpu_count: Used cpu cores
1100 @type disk_count: int
1101 @param disk_count: Number of disks used
1102 @type nic_count: int
1103 @param nic_count: Number of nics used
1104 @type disk_sizes: list of ints
1105 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1106 @param _compute_fn: The compute function (unittest only)
1107 @return: A list of violations, or an empty list of no violations are found
1110 assert disk_count == len(disk_sizes)
1113 (constants.ISPEC_MEM_SIZE, mem_size),
1114 (constants.ISPEC_CPU_COUNT, cpu_count),
1115 (constants.ISPEC_DISK_COUNT, disk_count),
1116 (constants.ISPEC_NIC_COUNT, nic_count),
1117 ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
1120 (_compute_fn(name, ipolicy, value)
1121 for (name, value) in test_settings))
1124 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1125 _compute_fn=_ComputeIPolicySpecViolation):
1126 """Compute if instance meets the specs of ipolicy.
1129 @param ipolicy: The ipolicy to verify against
1130 @type instance: L{objects.Instance}
1131 @param instance: The instance to verify
1132 @param _compute_fn: The function to verify ipolicy (unittest only)
1133 @see: L{_ComputeIPolicySpecViolation}
1136 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1137 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1138 disk_count = len(instance.disks)
1139 disk_sizes = [disk.size for disk in instance.disks]
1140 nic_count = len(instance.nics)
1142 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1146 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1147 _compute_fn=_ComputeIPolicySpecViolation):
1148 """Compute if instance specs meets the specs of ipolicy.
1151 @param ipolicy: The ipolicy to verify against
1152 @param instance_spec: dict
1153 @param instance_spec: The instance spec to verify
1154 @param _compute_fn: The function to verify ipolicy (unittest only)
1155 @see: L{_ComputeIPolicySpecViolation}
1158 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1159 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1160 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1161 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1162 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1164 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1168 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1170 _compute_fn=_ComputeIPolicyInstanceViolation):
1171 """Compute if instance meets the specs of the new target group.
1173 @param ipolicy: The ipolicy to verify
1174 @param instance: The instance object to verify
1175 @param current_group: The current group of the instance
1176 @param target_group: The new group of the instance
1177 @param _compute_fn: The function to verify ipolicy (unittest only)
1178 @see: L{_ComputeIPolicySpecViolation}
1181 if current_group == target_group:
1184 return _compute_fn(ipolicy, instance)
1187 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1188 _compute_fn=_ComputeIPolicyNodeViolation):
1189 """Checks that the target node is correct in terms of instance policy.
1191 @param ipolicy: The ipolicy to verify
1192 @param instance: The instance object to verify
1193 @param node: The new node to relocate
1194 @param ignore: Ignore violations of the ipolicy
1195 @param _compute_fn: The function to verify ipolicy (unittest only)
1196 @see: L{_ComputeIPolicySpecViolation}
1199 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1200 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1203 msg = ("Instance does not meet target node group's (%s) instance"
1204 " policy: %s") % (node.group, utils.CommaJoin(res))
1208 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1211 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1212 """Computes a set of any instances that would violate the new ipolicy.
1214 @param old_ipolicy: The current (still in-place) ipolicy
1215 @param new_ipolicy: The new (to become) ipolicy
1216 @param instances: List of instances to verify
1217 @return: A list of instances which violates the new ipolicy but did not before
1220 return (_ComputeViolatingInstances(old_ipolicy, instances) -
1221 _ComputeViolatingInstances(new_ipolicy, instances))
1224 def _ExpandItemName(fn, name, kind):
1225 """Expand an item name.
1227 @param fn: the function to use for expansion
1228 @param name: requested item name
1229 @param kind: text description ('Node' or 'Instance')
1230 @return: the resolved (full) name
1231 @raise errors.OpPrereqError: if the item is not found
1234 full_name = fn(name)
1235 if full_name is None:
1236 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1241 def _ExpandNodeName(cfg, name):
1242 """Wrapper over L{_ExpandItemName} for nodes."""
1243 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1246 def _ExpandInstanceName(cfg, name):
1247 """Wrapper over L{_ExpandItemName} for instance."""
1248 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1251 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1252 minmem, maxmem, vcpus, nics, disk_template, disks,
1253 bep, hvp, hypervisor_name, tags):
1254 """Builds instance related env variables for hooks
1256 This builds the hook environment from individual variables.
1259 @param name: the name of the instance
1260 @type primary_node: string
1261 @param primary_node: the name of the instance's primary node
1262 @type secondary_nodes: list
1263 @param secondary_nodes: list of secondary nodes as strings
1264 @type os_type: string
1265 @param os_type: the name of the instance's OS
1266 @type status: string
1267 @param status: the desired status of the instance
1268 @type minmem: string
1269 @param minmem: the minimum memory size of the instance
1270 @type maxmem: string
1271 @param maxmem: the maximum memory size of the instance
1273 @param vcpus: the count of VCPUs the instance has
1275 @param nics: list of tuples (ip, mac, mode, link) representing
1276 the NICs the instance has
1277 @type disk_template: string
1278 @param disk_template: the disk template of the instance
1280 @param disks: the list of (size, mode) pairs
1282 @param bep: the backend parameters for the instance
1284 @param hvp: the hypervisor parameters for the instance
1285 @type hypervisor_name: string
1286 @param hypervisor_name: the hypervisor for the instance
1288 @param tags: list of instance tags as strings
1290 @return: the hook environment for this instance
1295 "INSTANCE_NAME": name,
1296 "INSTANCE_PRIMARY": primary_node,
1297 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1298 "INSTANCE_OS_TYPE": os_type,
1299 "INSTANCE_STATUS": status,
1300 "INSTANCE_MINMEM": minmem,
1301 "INSTANCE_MAXMEM": maxmem,
1302 # TODO(2.7) remove deprecated "memory" value
1303 "INSTANCE_MEMORY": maxmem,
1304 "INSTANCE_VCPUS": vcpus,
1305 "INSTANCE_DISK_TEMPLATE": disk_template,
1306 "INSTANCE_HYPERVISOR": hypervisor_name,
1309 nic_count = len(nics)
1310 for idx, (ip, mac, mode, link) in enumerate(nics):
1313 env["INSTANCE_NIC%d_IP" % idx] = ip
1314 env["INSTANCE_NIC%d_MAC" % idx] = mac
1315 env["INSTANCE_NIC%d_MODE" % idx] = mode
1316 env["INSTANCE_NIC%d_LINK" % idx] = link
1317 if mode == constants.NIC_MODE_BRIDGED:
1318 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1322 env["INSTANCE_NIC_COUNT"] = nic_count
1325 disk_count = len(disks)
1326 for idx, (size, mode) in enumerate(disks):
1327 env["INSTANCE_DISK%d_SIZE" % idx] = size
1328 env["INSTANCE_DISK%d_MODE" % idx] = mode
1332 env["INSTANCE_DISK_COUNT"] = disk_count
1337 env["INSTANCE_TAGS"] = " ".join(tags)
1339 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1340 for key, value in source.items():
1341 env["INSTANCE_%s_%s" % (kind, key)] = value
1346 def _NICListToTuple(lu, nics):
1347 """Build a list of nic information tuples.
1349 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1350 value in LUInstanceQueryData.
1352 @type lu: L{LogicalUnit}
1353 @param lu: the logical unit on whose behalf we execute
1354 @type nics: list of L{objects.NIC}
1355 @param nics: list of nics to convert to hooks tuples
1359 cluster = lu.cfg.GetClusterInfo()
1363 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1364 mode = filled_params[constants.NIC_MODE]
1365 link = filled_params[constants.NIC_LINK]
1366 hooks_nics.append((ip, mac, mode, link))
1370 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1371 """Builds instance related env variables for hooks from an object.
1373 @type lu: L{LogicalUnit}
1374 @param lu: the logical unit on whose behalf we execute
1375 @type instance: L{objects.Instance}
1376 @param instance: the instance for which we should build the
1378 @type override: dict
1379 @param override: dictionary with key/values that will override
1382 @return: the hook environment dictionary
1385 cluster = lu.cfg.GetClusterInfo()
1386 bep = cluster.FillBE(instance)
1387 hvp = cluster.FillHV(instance)
1389 "name": instance.name,
1390 "primary_node": instance.primary_node,
1391 "secondary_nodes": instance.secondary_nodes,
1392 "os_type": instance.os,
1393 "status": instance.admin_state,
1394 "maxmem": bep[constants.BE_MAXMEM],
1395 "minmem": bep[constants.BE_MINMEM],
1396 "vcpus": bep[constants.BE_VCPUS],
1397 "nics": _NICListToTuple(lu, instance.nics),
1398 "disk_template": instance.disk_template,
1399 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1402 "hypervisor_name": instance.hypervisor,
1403 "tags": instance.tags,
1406 args.update(override)
1407 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1410 def _AdjustCandidatePool(lu, exceptions):
1411 """Adjust the candidate pool after node operations.
1414 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1416 lu.LogInfo("Promoted nodes to master candidate role: %s",
1417 utils.CommaJoin(node.name for node in mod_list))
1418 for name in mod_list:
1419 lu.context.ReaddNode(name)
1420 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1422 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1426 def _DecideSelfPromotion(lu, exceptions=None):
1427 """Decide whether I should promote myself as a master candidate.
1430 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1431 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1432 # the new node will increase mc_max with one, so:
1433 mc_should = min(mc_should + 1, cp_size)
1434 return mc_now < mc_should
1437 def _CalculateGroupIPolicy(cluster, group):
1438 """Calculate instance policy for group.
1441 return cluster.SimpleFillIPolicy(group.ipolicy)
1444 def _ComputeViolatingInstances(ipolicy, instances):
1445 """Computes a set of instances who violates given ipolicy.
1447 @param ipolicy: The ipolicy to verify
1448 @type instances: object.Instance
1449 @param instances: List of instances to verify
1450 @return: A frozenset of instance names violating the ipolicy
1453 return frozenset([inst.name for inst in instances
1454 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1457 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1458 """Check that the brigdes needed by a list of nics exist.
1461 cluster = lu.cfg.GetClusterInfo()
1462 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1463 brlist = [params[constants.NIC_LINK] for params in paramslist
1464 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1466 result = lu.rpc.call_bridges_exist(target_node, brlist)
1467 result.Raise("Error checking bridges on destination node '%s'" %
1468 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1471 def _CheckInstanceBridgesExist(lu, instance, node=None):
1472 """Check that the brigdes needed by an instance exist.
1476 node = instance.primary_node
1477 _CheckNicsBridgesExist(lu, instance.nics, node)
1480 def _CheckOSVariant(os_obj, name):
1481 """Check whether an OS name conforms to the os variants specification.
1483 @type os_obj: L{objects.OS}
1484 @param os_obj: OS object to check
1486 @param name: OS name passed by the user, to check for validity
1489 variant = objects.OS.GetVariant(name)
1490 if not os_obj.supported_variants:
1492 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1493 " passed)" % (os_obj.name, variant),
1497 raise errors.OpPrereqError("OS name must include a variant",
1500 if variant not in os_obj.supported_variants:
1501 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1504 def _GetNodeInstancesInner(cfg, fn):
1505 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1508 def _GetNodeInstances(cfg, node_name):
1509 """Returns a list of all primary and secondary instances on a node.
1513 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1516 def _GetNodePrimaryInstances(cfg, node_name):
1517 """Returns primary instances on a node.
1520 return _GetNodeInstancesInner(cfg,
1521 lambda inst: node_name == inst.primary_node)
1524 def _GetNodeSecondaryInstances(cfg, node_name):
1525 """Returns secondary instances on a node.
1528 return _GetNodeInstancesInner(cfg,
1529 lambda inst: node_name in inst.secondary_nodes)
1532 def _GetStorageTypeArgs(cfg, storage_type):
1533 """Returns the arguments for a storage type.
1536 # Special case for file storage
1537 if storage_type == constants.ST_FILE:
1538 # storage.FileStorage wants a list of storage directories
1539 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1544 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1547 for dev in instance.disks:
1548 cfg.SetDiskID(dev, node_name)
1550 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1551 result.Raise("Failed to get disk status from node %s" % node_name,
1552 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1554 for idx, bdev_status in enumerate(result.payload):
1555 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1561 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1562 """Check the sanity of iallocator and node arguments and use the
1563 cluster-wide iallocator if appropriate.
1565 Check that at most one of (iallocator, node) is specified. If none is
1566 specified, then the LU's opcode's iallocator slot is filled with the
1567 cluster-wide default iallocator.
1569 @type iallocator_slot: string
1570 @param iallocator_slot: the name of the opcode iallocator slot
1571 @type node_slot: string
1572 @param node_slot: the name of the opcode target node slot
1575 node = getattr(lu.op, node_slot, None)
1576 iallocator = getattr(lu.op, iallocator_slot, None)
1578 if node is not None and iallocator is not None:
1579 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1581 elif node is None and iallocator is None:
1582 default_iallocator = lu.cfg.GetDefaultIAllocator()
1583 if default_iallocator:
1584 setattr(lu.op, iallocator_slot, default_iallocator)
1586 raise errors.OpPrereqError("No iallocator or node given and no"
1587 " cluster-wide default iallocator found;"
1588 " please specify either an iallocator or a"
1589 " node, or set a cluster-wide default"
1593 def _GetDefaultIAllocator(cfg, iallocator):
1594 """Decides on which iallocator to use.
1596 @type cfg: L{config.ConfigWriter}
1597 @param cfg: Cluster configuration object
1598 @type iallocator: string or None
1599 @param iallocator: Iallocator specified in opcode
1601 @return: Iallocator name
1605 # Use default iallocator
1606 iallocator = cfg.GetDefaultIAllocator()
1609 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1610 " opcode nor as a cluster-wide default",
1616 class LUClusterPostInit(LogicalUnit):
1617 """Logical unit for running hooks after cluster initialization.
1620 HPATH = "cluster-init"
1621 HTYPE = constants.HTYPE_CLUSTER
1623 def BuildHooksEnv(self):
1628 "OP_TARGET": self.cfg.GetClusterName(),
1631 def BuildHooksNodes(self):
1632 """Build hooks nodes.
1635 return ([], [self.cfg.GetMasterNode()])
1637 def Exec(self, feedback_fn):
1644 class LUClusterDestroy(LogicalUnit):
1645 """Logical unit for destroying the cluster.
1648 HPATH = "cluster-destroy"
1649 HTYPE = constants.HTYPE_CLUSTER
1651 def BuildHooksEnv(self):
1656 "OP_TARGET": self.cfg.GetClusterName(),
1659 def BuildHooksNodes(self):
1660 """Build hooks nodes.
1665 def CheckPrereq(self):
1666 """Check prerequisites.
1668 This checks whether the cluster is empty.
1670 Any errors are signaled by raising errors.OpPrereqError.
1673 master = self.cfg.GetMasterNode()
1675 nodelist = self.cfg.GetNodeList()
1676 if len(nodelist) != 1 or nodelist[0] != master:
1677 raise errors.OpPrereqError("There are still %d node(s) in"
1678 " this cluster." % (len(nodelist) - 1),
1680 instancelist = self.cfg.GetInstanceList()
1682 raise errors.OpPrereqError("There are still %d instance(s) in"
1683 " this cluster." % len(instancelist),
1686 def Exec(self, feedback_fn):
1687 """Destroys the cluster.
1690 master_params = self.cfg.GetMasterNetworkParameters()
1692 # Run post hooks on master node before it's removed
1693 _RunPostHook(self, master_params.name)
1695 ems = self.cfg.GetUseExternalMipScript()
1696 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1699 self.LogWarning("Error disabling the master IP address: %s",
1702 return master_params.name
1705 def _VerifyCertificate(filename):
1706 """Verifies a certificate for L{LUClusterVerifyConfig}.
1708 @type filename: string
1709 @param filename: Path to PEM file
1713 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1714 utils.ReadFile(filename))
1715 except Exception, err: # pylint: disable=W0703
1716 return (LUClusterVerifyConfig.ETYPE_ERROR,
1717 "Failed to load X509 certificate %s: %s" % (filename, err))
1720 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1721 constants.SSL_CERT_EXPIRATION_ERROR)
1724 fnamemsg = "While verifying %s: %s" % (filename, msg)
1729 return (None, fnamemsg)
1730 elif errcode == utils.CERT_WARNING:
1731 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1732 elif errcode == utils.CERT_ERROR:
1733 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1735 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1738 def _GetAllHypervisorParameters(cluster, instances):
1739 """Compute the set of all hypervisor parameters.
1741 @type cluster: L{objects.Cluster}
1742 @param cluster: the cluster object
1743 @param instances: list of L{objects.Instance}
1744 @param instances: additional instances from which to obtain parameters
1745 @rtype: list of (origin, hypervisor, parameters)
1746 @return: a list with all parameters found, indicating the hypervisor they
1747 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1752 for hv_name in cluster.enabled_hypervisors:
1753 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1755 for os_name, os_hvp in cluster.os_hvp.items():
1756 for hv_name, hv_params in os_hvp.items():
1758 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1759 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1761 # TODO: collapse identical parameter values in a single one
1762 for instance in instances:
1763 if instance.hvparams:
1764 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1765 cluster.FillHV(instance)))
1770 class _VerifyErrors(object):
1771 """Mix-in for cluster/group verify LUs.
1773 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1774 self.op and self._feedback_fn to be available.)
1778 ETYPE_FIELD = "code"
1779 ETYPE_ERROR = "ERROR"
1780 ETYPE_WARNING = "WARNING"
1782 def _Error(self, ecode, item, msg, *args, **kwargs):
1783 """Format an error message.
1785 Based on the opcode's error_codes parameter, either format a
1786 parseable error code, or a simpler error string.
1788 This must be called only from Exec and functions called from Exec.
1791 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1792 itype, etxt, _ = ecode
1793 # first complete the msg
1796 # then format the whole message
1797 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1798 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1804 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1805 # and finally report it via the feedback_fn
1806 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1808 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1809 """Log an error message if the passed condition is True.
1813 or self.op.debug_simulate_errors) # pylint: disable=E1101
1815 # If the error code is in the list of ignored errors, demote the error to a
1817 (_, etxt, _) = ecode
1818 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1819 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1822 self._Error(ecode, *args, **kwargs)
1824 # do not mark the operation as failed for WARN cases only
1825 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1826 self.bad = self.bad or cond
1829 class LUClusterVerify(NoHooksLU):
1830 """Submits all jobs necessary to verify the cluster.
1835 def ExpandNames(self):
1836 self.needed_locks = {}
1838 def Exec(self, feedback_fn):
1841 if self.op.group_name:
1842 groups = [self.op.group_name]
1843 depends_fn = lambda: None
1845 groups = self.cfg.GetNodeGroupList()
1847 # Verify global configuration
1849 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1852 # Always depend on global verification
1853 depends_fn = lambda: [(-len(jobs), [])]
1855 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1856 ignore_errors=self.op.ignore_errors,
1857 depends=depends_fn())]
1858 for group in groups)
1860 # Fix up all parameters
1861 for op in itertools.chain(*jobs): # pylint: disable=W0142
1862 op.debug_simulate_errors = self.op.debug_simulate_errors
1863 op.verbose = self.op.verbose
1864 op.error_codes = self.op.error_codes
1866 op.skip_checks = self.op.skip_checks
1867 except AttributeError:
1868 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1870 return ResultWithJobs(jobs)
1873 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1874 """Verifies the cluster config.
1879 def _VerifyHVP(self, hvp_data):
1880 """Verifies locally the syntax of the hypervisor parameters.
1883 for item, hv_name, hv_params in hvp_data:
1884 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1887 hv_class = hypervisor.GetHypervisor(hv_name)
1888 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1889 hv_class.CheckParameterSyntax(hv_params)
1890 except errors.GenericError, err:
1891 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1893 def ExpandNames(self):
1894 # Information can be safely retrieved as the BGL is acquired in exclusive
1896 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1897 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1898 self.all_node_info = self.cfg.GetAllNodesInfo()
1899 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1900 self.needed_locks = {}
1902 def Exec(self, feedback_fn):
1903 """Verify integrity of cluster, performing various test on nodes.
1907 self._feedback_fn = feedback_fn
1909 feedback_fn("* Verifying cluster config")
1911 for msg in self.cfg.VerifyConfig():
1912 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1914 feedback_fn("* Verifying cluster certificate files")
1916 for cert_filename in constants.ALL_CERT_FILES:
1917 (errcode, msg) = _VerifyCertificate(cert_filename)
1918 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1920 feedback_fn("* Verifying hypervisor parameters")
1922 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1923 self.all_inst_info.values()))
1925 feedback_fn("* Verifying all nodes belong to an existing group")
1927 # We do this verification here because, should this bogus circumstance
1928 # occur, it would never be caught by VerifyGroup, which only acts on
1929 # nodes/instances reachable from existing node groups.
1931 dangling_nodes = set(node.name for node in self.all_node_info.values()
1932 if node.group not in self.all_group_info)
1934 dangling_instances = {}
1935 no_node_instances = []
1937 for inst in self.all_inst_info.values():
1938 if inst.primary_node in dangling_nodes:
1939 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1940 elif inst.primary_node not in self.all_node_info:
1941 no_node_instances.append(inst.name)
1946 utils.CommaJoin(dangling_instances.get(node.name,
1948 for node in dangling_nodes]
1950 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1952 "the following nodes (and their instances) belong to a non"
1953 " existing group: %s", utils.CommaJoin(pretty_dangling))
1955 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1957 "the following instances have a non-existing primary-node:"
1958 " %s", utils.CommaJoin(no_node_instances))
1963 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1964 """Verifies the status of a node group.
1967 HPATH = "cluster-verify"
1968 HTYPE = constants.HTYPE_CLUSTER
1971 _HOOKS_INDENT_RE = re.compile("^", re.M)
1973 class NodeImage(object):
1974 """A class representing the logical and physical status of a node.
1977 @ivar name: the node name to which this object refers
1978 @ivar volumes: a structure as returned from
1979 L{ganeti.backend.GetVolumeList} (runtime)
1980 @ivar instances: a list of running instances (runtime)
1981 @ivar pinst: list of configured primary instances (config)
1982 @ivar sinst: list of configured secondary instances (config)
1983 @ivar sbp: dictionary of {primary-node: list of instances} for all
1984 instances for which this node is secondary (config)
1985 @ivar mfree: free memory, as reported by hypervisor (runtime)
1986 @ivar dfree: free disk, as reported by the node (runtime)
1987 @ivar offline: the offline status (config)
1988 @type rpc_fail: boolean
1989 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1990 not whether the individual keys were correct) (runtime)
1991 @type lvm_fail: boolean
1992 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1993 @type hyp_fail: boolean
1994 @ivar hyp_fail: whether the RPC call didn't return the instance list
1995 @type ghost: boolean
1996 @ivar ghost: whether this is a known node or not (config)
1997 @type os_fail: boolean
1998 @ivar os_fail: whether the RPC call didn't return valid OS data
2000 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2001 @type vm_capable: boolean
2002 @ivar vm_capable: whether the node can host instances
2005 def __init__(self, offline=False, name=None, vm_capable=True):
2014 self.offline = offline
2015 self.vm_capable = vm_capable
2016 self.rpc_fail = False
2017 self.lvm_fail = False
2018 self.hyp_fail = False
2020 self.os_fail = False
2023 def ExpandNames(self):
2024 # This raises errors.OpPrereqError on its own:
2025 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2027 # Get instances in node group; this is unsafe and needs verification later
2028 inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
2030 self.needed_locks = {
2031 locking.LEVEL_INSTANCE: inst_names,
2032 locking.LEVEL_NODEGROUP: [self.group_uuid],
2033 locking.LEVEL_NODE: [],
2036 self.share_locks = _ShareAll()
2038 def DeclareLocks(self, level):
2039 if level == locking.LEVEL_NODE:
2040 # Get members of node group; this is unsafe and needs verification later
2041 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2043 all_inst_info = self.cfg.GetAllInstancesInfo()
2045 # In Exec(), we warn about mirrored instances that have primary and
2046 # secondary living in separate node groups. To fully verify that
2047 # volumes for these instances are healthy, we will need to do an
2048 # extra call to their secondaries. We ensure here those nodes will
2050 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2051 # Important: access only the instances whose lock is owned
2052 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2053 nodes.update(all_inst_info[inst].secondary_nodes)
2055 self.needed_locks[locking.LEVEL_NODE] = nodes
2057 def CheckPrereq(self):
2058 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2059 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2061 group_nodes = set(self.group_info.members)
2062 group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
2065 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2067 unlocked_instances = \
2068 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2071 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2072 utils.CommaJoin(unlocked_nodes))
2074 if unlocked_instances:
2075 raise errors.OpPrereqError("Missing lock for instances: %s" %
2076 utils.CommaJoin(unlocked_instances))
2078 self.all_node_info = self.cfg.GetAllNodesInfo()
2079 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2081 self.my_node_names = utils.NiceSort(group_nodes)
2082 self.my_inst_names = utils.NiceSort(group_instances)
2084 self.my_node_info = dict((name, self.all_node_info[name])
2085 for name in self.my_node_names)
2087 self.my_inst_info = dict((name, self.all_inst_info[name])
2088 for name in self.my_inst_names)
2090 # We detect here the nodes that will need the extra RPC calls for verifying
2091 # split LV volumes; they should be locked.
2092 extra_lv_nodes = set()
2094 for inst in self.my_inst_info.values():
2095 if inst.disk_template in constants.DTS_INT_MIRROR:
2096 group = self.my_node_info[inst.primary_node].group
2097 for nname in inst.secondary_nodes:
2098 if self.all_node_info[nname].group != group:
2099 extra_lv_nodes.add(nname)
2101 unlocked_lv_nodes = \
2102 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2104 if unlocked_lv_nodes:
2105 raise errors.OpPrereqError("these nodes could be locked: %s" %
2106 utils.CommaJoin(unlocked_lv_nodes))
2107 self.extra_lv_nodes = list(extra_lv_nodes)
2109 def _VerifyNode(self, ninfo, nresult):
2110 """Perform some basic validation on data returned from a node.
2112 - check the result data structure is well formed and has all the
2114 - check ganeti version
2116 @type ninfo: L{objects.Node}
2117 @param ninfo: the node to check
2118 @param nresult: the results from the node
2120 @return: whether overall this call was successful (and we can expect
2121 reasonable values in the respose)
2125 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2127 # main result, nresult should be a non-empty dict
2128 test = not nresult or not isinstance(nresult, dict)
2129 _ErrorIf(test, constants.CV_ENODERPC, node,
2130 "unable to verify node: no data returned")
2134 # compares ganeti version
2135 local_version = constants.PROTOCOL_VERSION
2136 remote_version = nresult.get("version", None)
2137 test = not (remote_version and
2138 isinstance(remote_version, (list, tuple)) and
2139 len(remote_version) == 2)
2140 _ErrorIf(test, constants.CV_ENODERPC, node,
2141 "connection to node returned invalid data")
2145 test = local_version != remote_version[0]
2146 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2147 "incompatible protocol versions: master %s,"
2148 " node %s", local_version, remote_version[0])
2152 # node seems compatible, we can actually try to look into its results
2154 # full package version
2155 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2156 constants.CV_ENODEVERSION, node,
2157 "software version mismatch: master %s, node %s",
2158 constants.RELEASE_VERSION, remote_version[1],
2159 code=self.ETYPE_WARNING)
2161 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2162 if ninfo.vm_capable and isinstance(hyp_result, dict):
2163 for hv_name, hv_result in hyp_result.iteritems():
2164 test = hv_result is not None
2165 _ErrorIf(test, constants.CV_ENODEHV, node,
2166 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2168 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2169 if ninfo.vm_capable and isinstance(hvp_result, list):
2170 for item, hv_name, hv_result in hvp_result:
2171 _ErrorIf(True, constants.CV_ENODEHV, node,
2172 "hypervisor %s parameter verify failure (source %s): %s",
2173 hv_name, item, hv_result)
2175 test = nresult.get(constants.NV_NODESETUP,
2176 ["Missing NODESETUP results"])
2177 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2182 def _VerifyNodeTime(self, ninfo, nresult,
2183 nvinfo_starttime, nvinfo_endtime):
2184 """Check the node time.
2186 @type ninfo: L{objects.Node}
2187 @param ninfo: the node to check
2188 @param nresult: the remote results for the node
2189 @param nvinfo_starttime: the start time of the RPC call
2190 @param nvinfo_endtime: the end time of the RPC call
2194 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2196 ntime = nresult.get(constants.NV_TIME, None)
2198 ntime_merged = utils.MergeTime(ntime)
2199 except (ValueError, TypeError):
2200 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2203 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2204 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2205 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2206 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2210 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2211 "Node time diverges by at least %s from master node time",
2214 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2215 """Check the node LVM results.
2217 @type ninfo: L{objects.Node}
2218 @param ninfo: the node to check
2219 @param nresult: the remote results for the node
2220 @param vg_name: the configured VG name
2227 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2229 # checks vg existence and size > 20G
2230 vglist = nresult.get(constants.NV_VGLIST, None)
2232 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2234 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2235 constants.MIN_VG_SIZE)
2236 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2239 pvlist = nresult.get(constants.NV_PVLIST, None)
2240 test = pvlist is None
2241 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2243 # check that ':' is not present in PV names, since it's a
2244 # special character for lvcreate (denotes the range of PEs to
2246 for _, pvname, owner_vg in pvlist:
2247 test = ":" in pvname
2248 _ErrorIf(test, constants.CV_ENODELVM, node,
2249 "Invalid character ':' in PV '%s' of VG '%s'",
2252 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2253 """Check the node bridges.
2255 @type ninfo: L{objects.Node}
2256 @param ninfo: the node to check
2257 @param nresult: the remote results for the node
2258 @param bridges: the expected list of bridges
2265 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2267 missing = nresult.get(constants.NV_BRIDGES, None)
2268 test = not isinstance(missing, list)
2269 _ErrorIf(test, constants.CV_ENODENET, node,
2270 "did not return valid bridge information")
2272 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2273 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2275 def _VerifyNodeUserScripts(self, ninfo, nresult):
2276 """Check the results of user scripts presence and executability on the node
2278 @type ninfo: L{objects.Node}
2279 @param ninfo: the node to check
2280 @param nresult: the remote results for the node
2285 test = not constants.NV_USERSCRIPTS in nresult
2286 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2287 "did not return user scripts information")
2289 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2291 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2292 "user scripts not present or not executable: %s" %
2293 utils.CommaJoin(sorted(broken_scripts)))
2295 def _VerifyNodeNetwork(self, ninfo, nresult):
2296 """Check the node network connectivity results.
2298 @type ninfo: L{objects.Node}
2299 @param ninfo: the node to check
2300 @param nresult: the remote results for the node
2304 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2306 test = constants.NV_NODELIST not in nresult
2307 _ErrorIf(test, constants.CV_ENODESSH, node,
2308 "node hasn't returned node ssh connectivity data")
2310 if nresult[constants.NV_NODELIST]:
2311 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2312 _ErrorIf(True, constants.CV_ENODESSH, node,
2313 "ssh communication with node '%s': %s", a_node, a_msg)
2315 test = constants.NV_NODENETTEST not in nresult
2316 _ErrorIf(test, constants.CV_ENODENET, node,
2317 "node hasn't returned node tcp connectivity data")
2319 if nresult[constants.NV_NODENETTEST]:
2320 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2322 _ErrorIf(True, constants.CV_ENODENET, node,
2323 "tcp communication with node '%s': %s",
2324 anode, nresult[constants.NV_NODENETTEST][anode])
2326 test = constants.NV_MASTERIP not in nresult
2327 _ErrorIf(test, constants.CV_ENODENET, node,
2328 "node hasn't returned node master IP reachability data")
2330 if not nresult[constants.NV_MASTERIP]:
2331 if node == self.master_node:
2332 msg = "the master node cannot reach the master IP (not configured?)"
2334 msg = "cannot reach the master IP"
2335 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2337 def _VerifyInstance(self, instance, instanceconfig, node_image,
2339 """Verify an instance.
2341 This function checks to see if the required block devices are
2342 available on the instance's node.
2345 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2346 node_current = instanceconfig.primary_node
2348 node_vol_should = {}
2349 instanceconfig.MapLVsByNode(node_vol_should)
2351 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2352 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2353 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, err)
2355 for node in node_vol_should:
2356 n_img = node_image[node]
2357 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2358 # ignore missing volumes on offline or broken nodes
2360 for volume in node_vol_should[node]:
2361 test = volume not in n_img.volumes
2362 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2363 "volume %s missing on node %s", volume, node)
2365 if instanceconfig.admin_state == constants.ADMINST_UP:
2366 pri_img = node_image[node_current]
2367 test = instance not in pri_img.instances and not pri_img.offline
2368 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2369 "instance not running on its primary node %s",
2372 diskdata = [(nname, success, status, idx)
2373 for (nname, disks) in diskstatus.items()
2374 for idx, (success, status) in enumerate(disks)]
2376 for nname, success, bdev_status, idx in diskdata:
2377 # the 'ghost node' construction in Exec() ensures that we have a
2379 snode = node_image[nname]
2380 bad_snode = snode.ghost or snode.offline
2381 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2382 not success and not bad_snode,
2383 constants.CV_EINSTANCEFAULTYDISK, instance,
2384 "couldn't retrieve status for disk/%s on %s: %s",
2385 idx, nname, bdev_status)
2386 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2387 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2388 constants.CV_EINSTANCEFAULTYDISK, instance,
2389 "disk/%s on %s is faulty", idx, nname)
2391 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2392 """Verify if there are any unknown volumes in the cluster.
2394 The .os, .swap and backup volumes are ignored. All other volumes are
2395 reported as unknown.
2397 @type reserved: L{ganeti.utils.FieldSet}
2398 @param reserved: a FieldSet of reserved volume names
2401 for node, n_img in node_image.items():
2402 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2403 # skip non-healthy nodes
2405 for volume in n_img.volumes:
2406 test = ((node not in node_vol_should or
2407 volume not in node_vol_should[node]) and
2408 not reserved.Matches(volume))
2409 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2410 "volume %s is unknown", volume)
2412 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2413 """Verify N+1 Memory Resilience.
2415 Check that if one single node dies we can still start all the
2416 instances it was primary for.
2419 cluster_info = self.cfg.GetClusterInfo()
2420 for node, n_img in node_image.items():
2421 # This code checks that every node which is now listed as
2422 # secondary has enough memory to host all instances it is
2423 # supposed to should a single other node in the cluster fail.
2424 # FIXME: not ready for failover to an arbitrary node
2425 # FIXME: does not support file-backed instances
2426 # WARNING: we currently take into account down instances as well
2427 # as up ones, considering that even if they're down someone
2428 # might want to start them even in the event of a node failure.
2430 # we're skipping offline nodes from the N+1 warning, since
2431 # most likely we don't have good memory infromation from them;
2432 # we already list instances living on such nodes, and that's
2435 #TODO(dynmem): also consider ballooning out other instances
2436 for prinode, instances in n_img.sbp.items():
2438 for instance in instances:
2439 bep = cluster_info.FillBE(instance_cfg[instance])
2440 if bep[constants.BE_AUTO_BALANCE]:
2441 needed_mem += bep[constants.BE_MINMEM]
2442 test = n_img.mfree < needed_mem
2443 self._ErrorIf(test, constants.CV_ENODEN1, node,
2444 "not enough memory to accomodate instance failovers"
2445 " should node %s fail (%dMiB needed, %dMiB available)",
2446 prinode, needed_mem, n_img.mfree)
2449 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2450 (files_all, files_opt, files_mc, files_vm)):
2451 """Verifies file checksums collected from all nodes.
2453 @param errorif: Callback for reporting errors
2454 @param nodeinfo: List of L{objects.Node} objects
2455 @param master_node: Name of master node
2456 @param all_nvinfo: RPC results
2459 # Define functions determining which nodes to consider for a file
2462 (files_mc, lambda node: (node.master_candidate or
2463 node.name == master_node)),
2464 (files_vm, lambda node: node.vm_capable),
2467 # Build mapping from filename to list of nodes which should have the file
2469 for (files, fn) in files2nodefn:
2471 filenodes = nodeinfo
2473 filenodes = filter(fn, nodeinfo)
2474 nodefiles.update((filename,
2475 frozenset(map(operator.attrgetter("name"), filenodes)))
2476 for filename in files)
2478 assert set(nodefiles) == (files_all | files_mc | files_vm)
2480 fileinfo = dict((filename, {}) for filename in nodefiles)
2481 ignore_nodes = set()
2483 for node in nodeinfo:
2485 ignore_nodes.add(node.name)
2488 nresult = all_nvinfo[node.name]
2490 if nresult.fail_msg or not nresult.payload:
2493 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2495 test = not (node_files and isinstance(node_files, dict))
2496 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2497 "Node did not return file checksum data")
2499 ignore_nodes.add(node.name)
2502 # Build per-checksum mapping from filename to nodes having it
2503 for (filename, checksum) in node_files.items():
2504 assert filename in nodefiles
2505 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2507 for (filename, checksums) in fileinfo.items():
2508 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2510 # Nodes having the file
2511 with_file = frozenset(node_name
2512 for nodes in fileinfo[filename].values()
2513 for node_name in nodes) - ignore_nodes
2515 expected_nodes = nodefiles[filename] - ignore_nodes
2517 # Nodes missing file
2518 missing_file = expected_nodes - with_file
2520 if filename in files_opt:
2522 errorif(missing_file and missing_file != expected_nodes,
2523 constants.CV_ECLUSTERFILECHECK, None,
2524 "File %s is optional, but it must exist on all or no"
2525 " nodes (not found on %s)",
2526 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2528 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2529 "File %s is missing from node(s) %s", filename,
2530 utils.CommaJoin(utils.NiceSort(missing_file)))
2532 # Warn if a node has a file it shouldn't
2533 unexpected = with_file - expected_nodes
2535 constants.CV_ECLUSTERFILECHECK, None,
2536 "File %s should not exist on node(s) %s",
2537 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2539 # See if there are multiple versions of the file
2540 test = len(checksums) > 1
2542 variants = ["variant %s on %s" %
2543 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2544 for (idx, (checksum, nodes)) in
2545 enumerate(sorted(checksums.items()))]
2549 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2550 "File %s found with %s different checksums (%s)",
2551 filename, len(checksums), "; ".join(variants))
2553 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2555 """Verifies and the node DRBD status.
2557 @type ninfo: L{objects.Node}
2558 @param ninfo: the node to check
2559 @param nresult: the remote results for the node
2560 @param instanceinfo: the dict of instances
2561 @param drbd_helper: the configured DRBD usermode helper
2562 @param drbd_map: the DRBD map as returned by
2563 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2567 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2570 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2571 test = (helper_result == None)
2572 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2573 "no drbd usermode helper returned")
2575 status, payload = helper_result
2577 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2578 "drbd usermode helper check unsuccessful: %s", payload)
2579 test = status and (payload != drbd_helper)
2580 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2581 "wrong drbd usermode helper: %s", payload)
2583 # compute the DRBD minors
2585 for minor, instance in drbd_map[node].items():
2586 test = instance not in instanceinfo
2587 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2588 "ghost instance '%s' in temporary DRBD map", instance)
2589 # ghost instance should not be running, but otherwise we
2590 # don't give double warnings (both ghost instance and
2591 # unallocated minor in use)
2593 node_drbd[minor] = (instance, False)
2595 instance = instanceinfo[instance]
2596 node_drbd[minor] = (instance.name,
2597 instance.admin_state == constants.ADMINST_UP)
2599 # and now check them
2600 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2601 test = not isinstance(used_minors, (tuple, list))
2602 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2603 "cannot parse drbd status file: %s", str(used_minors))
2605 # we cannot check drbd status
2608 for minor, (iname, must_exist) in node_drbd.items():
2609 test = minor not in used_minors and must_exist
2610 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2611 "drbd minor %d of instance %s is not active", minor, iname)
2612 for minor in used_minors:
2613 test = minor not in node_drbd
2614 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2615 "unallocated drbd minor %d is in use", minor)
2617 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2618 """Builds the node OS structures.
2620 @type ninfo: L{objects.Node}
2621 @param ninfo: the node to check
2622 @param nresult: the remote results for the node
2623 @param nimg: the node image object
2627 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2629 remote_os = nresult.get(constants.NV_OSLIST, None)
2630 test = (not isinstance(remote_os, list) or
2631 not compat.all(isinstance(v, list) and len(v) == 7
2632 for v in remote_os))
2634 _ErrorIf(test, constants.CV_ENODEOS, node,
2635 "node hasn't returned valid OS data")
2644 for (name, os_path, status, diagnose,
2645 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2647 if name not in os_dict:
2650 # parameters is a list of lists instead of list of tuples due to
2651 # JSON lacking a real tuple type, fix it:
2652 parameters = [tuple(v) for v in parameters]
2653 os_dict[name].append((os_path, status, diagnose,
2654 set(variants), set(parameters), set(api_ver)))
2656 nimg.oslist = os_dict
2658 def _VerifyNodeOS(self, ninfo, nimg, base):
2659 """Verifies the node OS list.
2661 @type ninfo: L{objects.Node}
2662 @param ninfo: the node to check
2663 @param nimg: the node image object
2664 @param base: the 'template' node we match against (e.g. from the master)
2668 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2670 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2672 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2673 for os_name, os_data in nimg.oslist.items():
2674 assert os_data, "Empty OS status for OS %s?!" % os_name
2675 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2676 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2677 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2678 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2679 "OS '%s' has multiple entries (first one shadows the rest): %s",
2680 os_name, utils.CommaJoin([v[0] for v in os_data]))
2681 # comparisons with the 'base' image
2682 test = os_name not in base.oslist
2683 _ErrorIf(test, constants.CV_ENODEOS, node,
2684 "Extra OS %s not present on reference node (%s)",
2688 assert base.oslist[os_name], "Base node has empty OS status?"
2689 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2691 # base OS is invalid, skipping
2693 for kind, a, b in [("API version", f_api, b_api),
2694 ("variants list", f_var, b_var),
2695 ("parameters", beautify_params(f_param),
2696 beautify_params(b_param))]:
2697 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2698 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2699 kind, os_name, base.name,
2700 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2702 # check any missing OSes
2703 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2704 _ErrorIf(missing, constants.CV_ENODEOS, node,
2705 "OSes present on reference node %s but missing on this node: %s",
2706 base.name, utils.CommaJoin(missing))
2708 def _VerifyOob(self, ninfo, nresult):
2709 """Verifies out of band functionality of a node.
2711 @type ninfo: L{objects.Node}
2712 @param ninfo: the node to check
2713 @param nresult: the remote results for the node
2717 # We just have to verify the paths on master and/or master candidates
2718 # as the oob helper is invoked on the master
2719 if ((ninfo.master_candidate or ninfo.master_capable) and
2720 constants.NV_OOB_PATHS in nresult):
2721 for path_result in nresult[constants.NV_OOB_PATHS]:
2722 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2724 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2725 """Verifies and updates the node volume data.
2727 This function will update a L{NodeImage}'s internal structures
2728 with data from the remote call.
2730 @type ninfo: L{objects.Node}
2731 @param ninfo: the node to check
2732 @param nresult: the remote results for the node
2733 @param nimg: the node image object
2734 @param vg_name: the configured VG name
2738 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2740 nimg.lvm_fail = True
2741 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2744 elif isinstance(lvdata, basestring):
2745 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2746 utils.SafeEncode(lvdata))
2747 elif not isinstance(lvdata, dict):
2748 _ErrorIf(True, constants.CV_ENODELVM, node,
2749 "rpc call to node failed (lvlist)")
2751 nimg.volumes = lvdata
2752 nimg.lvm_fail = False
2754 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2755 """Verifies and updates the node instance list.
2757 If the listing was successful, then updates this node's instance
2758 list. Otherwise, it marks the RPC call as failed for the instance
2761 @type ninfo: L{objects.Node}
2762 @param ninfo: the node to check
2763 @param nresult: the remote results for the node
2764 @param nimg: the node image object
2767 idata = nresult.get(constants.NV_INSTANCELIST, None)
2768 test = not isinstance(idata, list)
2769 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2770 "rpc call to node failed (instancelist): %s",
2771 utils.SafeEncode(str(idata)))
2773 nimg.hyp_fail = True
2775 nimg.instances = idata
2777 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2778 """Verifies and computes a node information map
2780 @type ninfo: L{objects.Node}
2781 @param ninfo: the node to check
2782 @param nresult: the remote results for the node
2783 @param nimg: the node image object
2784 @param vg_name: the configured VG name
2788 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2790 # try to read free memory (from the hypervisor)
2791 hv_info = nresult.get(constants.NV_HVINFO, None)
2792 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2793 _ErrorIf(test, constants.CV_ENODEHV, node,
2794 "rpc call to node failed (hvinfo)")
2797 nimg.mfree = int(hv_info["memory_free"])
2798 except (ValueError, TypeError):
2799 _ErrorIf(True, constants.CV_ENODERPC, node,
2800 "node returned invalid nodeinfo, check hypervisor")
2802 # FIXME: devise a free space model for file based instances as well
2803 if vg_name is not None:
2804 test = (constants.NV_VGLIST not in nresult or
2805 vg_name not in nresult[constants.NV_VGLIST])
2806 _ErrorIf(test, constants.CV_ENODELVM, node,
2807 "node didn't return data for the volume group '%s'"
2808 " - it is either missing or broken", vg_name)
2811 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2812 except (ValueError, TypeError):
2813 _ErrorIf(True, constants.CV_ENODERPC, node,
2814 "node returned invalid LVM info, check LVM status")
2816 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2817 """Gets per-disk status information for all instances.
2819 @type nodelist: list of strings
2820 @param nodelist: Node names
2821 @type node_image: dict of (name, L{objects.Node})
2822 @param node_image: Node objects
2823 @type instanceinfo: dict of (name, L{objects.Instance})
2824 @param instanceinfo: Instance objects
2825 @rtype: {instance: {node: [(succes, payload)]}}
2826 @return: a dictionary of per-instance dictionaries with nodes as
2827 keys and disk information as values; the disk information is a
2828 list of tuples (success, payload)
2831 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2834 node_disks_devonly = {}
2835 diskless_instances = set()
2836 diskless = constants.DT_DISKLESS
2838 for nname in nodelist:
2839 node_instances = list(itertools.chain(node_image[nname].pinst,
2840 node_image[nname].sinst))
2841 diskless_instances.update(inst for inst in node_instances
2842 if instanceinfo[inst].disk_template == diskless)
2843 disks = [(inst, disk)
2844 for inst in node_instances
2845 for disk in instanceinfo[inst].disks]
2848 # No need to collect data
2851 node_disks[nname] = disks
2853 # Creating copies as SetDiskID below will modify the objects and that can
2854 # lead to incorrect data returned from nodes
2855 devonly = [dev.Copy() for (_, dev) in disks]
2858 self.cfg.SetDiskID(dev, nname)
2860 node_disks_devonly[nname] = devonly
2862 assert len(node_disks) == len(node_disks_devonly)
2864 # Collect data from all nodes with disks
2865 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2868 assert len(result) == len(node_disks)
2872 for (nname, nres) in result.items():
2873 disks = node_disks[nname]
2876 # No data from this node
2877 data = len(disks) * [(False, "node offline")]
2880 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2881 "while getting disk information: %s", msg)
2883 # No data from this node
2884 data = len(disks) * [(False, msg)]
2887 for idx, i in enumerate(nres.payload):
2888 if isinstance(i, (tuple, list)) and len(i) == 2:
2891 logging.warning("Invalid result from node %s, entry %d: %s",
2893 data.append((False, "Invalid result from the remote node"))
2895 for ((inst, _), status) in zip(disks, data):
2896 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2898 # Add empty entries for diskless instances.
2899 for inst in diskless_instances:
2900 assert inst not in instdisk
2903 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2904 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2905 compat.all(isinstance(s, (tuple, list)) and
2906 len(s) == 2 for s in statuses)
2907 for inst, nnames in instdisk.items()
2908 for nname, statuses in nnames.items())
2909 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2914 def _SshNodeSelector(group_uuid, all_nodes):
2915 """Create endless iterators for all potential SSH check hosts.
2918 nodes = [node for node in all_nodes
2919 if (node.group != group_uuid and
2921 keyfunc = operator.attrgetter("group")
2923 return map(itertools.cycle,
2924 [sorted(map(operator.attrgetter("name"), names))
2925 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2929 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2930 """Choose which nodes should talk to which other nodes.
2932 We will make nodes contact all nodes in their group, and one node from
2935 @warning: This algorithm has a known issue if one node group is much
2936 smaller than others (e.g. just one node). In such a case all other
2937 nodes will talk to the single node.
2940 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2941 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2943 return (online_nodes,
2944 dict((name, sorted([i.next() for i in sel]))
2945 for name in online_nodes))
2947 def BuildHooksEnv(self):
2950 Cluster-Verify hooks just ran in the post phase and their failure makes
2951 the output be logged in the verify output and the verification to fail.
2955 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2958 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2959 for node in self.my_node_info.values())
2963 def BuildHooksNodes(self):
2964 """Build hooks nodes.
2967 return ([], self.my_node_names)
2969 def Exec(self, feedback_fn):
2970 """Verify integrity of the node group, performing various test on nodes.
2973 # This method has too many local variables. pylint: disable=R0914
2974 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2976 if not self.my_node_names:
2978 feedback_fn("* Empty node group, skipping verification")
2982 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2983 verbose = self.op.verbose
2984 self._feedback_fn = feedback_fn
2986 vg_name = self.cfg.GetVGName()
2987 drbd_helper = self.cfg.GetDRBDHelper()
2988 cluster = self.cfg.GetClusterInfo()
2989 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2990 hypervisors = cluster.enabled_hypervisors
2991 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2993 i_non_redundant = [] # Non redundant instances
2994 i_non_a_balanced = [] # Non auto-balanced instances
2995 i_offline = 0 # Count of offline instances
2996 n_offline = 0 # Count of offline nodes
2997 n_drained = 0 # Count of nodes being drained
2998 node_vol_should = {}
3000 # FIXME: verify OS list
3003 filemap = _ComputeAncillaryFiles(cluster, False)
3005 # do local checksums
3006 master_node = self.master_node = self.cfg.GetMasterNode()
3007 master_ip = self.cfg.GetMasterIP()
3009 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3012 if self.cfg.GetUseExternalMipScript():
3013 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3015 node_verify_param = {
3016 constants.NV_FILELIST:
3017 utils.UniqueSequence(filename
3018 for files in filemap
3019 for filename in files),
3020 constants.NV_NODELIST:
3021 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3022 self.all_node_info.values()),
3023 constants.NV_HYPERVISOR: hypervisors,
3024 constants.NV_HVPARAMS:
3025 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3026 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3027 for node in node_data_list
3028 if not node.offline],
3029 constants.NV_INSTANCELIST: hypervisors,
3030 constants.NV_VERSION: None,
3031 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3032 constants.NV_NODESETUP: None,
3033 constants.NV_TIME: None,
3034 constants.NV_MASTERIP: (master_node, master_ip),
3035 constants.NV_OSLIST: None,
3036 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3037 constants.NV_USERSCRIPTS: user_scripts,
3040 if vg_name is not None:
3041 node_verify_param[constants.NV_VGLIST] = None
3042 node_verify_param[constants.NV_LVLIST] = vg_name
3043 node_verify_param[constants.NV_PVLIST] = [vg_name]
3044 node_verify_param[constants.NV_DRBDLIST] = None
3047 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3050 # FIXME: this needs to be changed per node-group, not cluster-wide
3052 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3053 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3054 bridges.add(default_nicpp[constants.NIC_LINK])
3055 for instance in self.my_inst_info.values():
3056 for nic in instance.nics:
3057 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3058 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3059 bridges.add(full_nic[constants.NIC_LINK])
3062 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3064 # Build our expected cluster state
3065 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3067 vm_capable=node.vm_capable))
3068 for node in node_data_list)
3072 for node in self.all_node_info.values():
3073 path = _SupportsOob(self.cfg, node)
3074 if path and path not in oob_paths:
3075 oob_paths.append(path)
3078 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3080 for instance in self.my_inst_names:
3081 inst_config = self.my_inst_info[instance]
3083 for nname in inst_config.all_nodes:
3084 if nname not in node_image:
3085 gnode = self.NodeImage(name=nname)
3086 gnode.ghost = (nname not in self.all_node_info)
3087 node_image[nname] = gnode
3089 inst_config.MapLVsByNode(node_vol_should)
3091 pnode = inst_config.primary_node
3092 node_image[pnode].pinst.append(instance)
3094 for snode in inst_config.secondary_nodes:
3095 nimg = node_image[snode]
3096 nimg.sinst.append(instance)
3097 if pnode not in nimg.sbp:
3098 nimg.sbp[pnode] = []
3099 nimg.sbp[pnode].append(instance)
3101 # At this point, we have the in-memory data structures complete,
3102 # except for the runtime information, which we'll gather next
3104 # Due to the way our RPC system works, exact response times cannot be
3105 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3106 # time before and after executing the request, we can at least have a time
3108 nvinfo_starttime = time.time()
3109 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3111 self.cfg.GetClusterName())
3112 nvinfo_endtime = time.time()
3114 if self.extra_lv_nodes and vg_name is not None:
3116 self.rpc.call_node_verify(self.extra_lv_nodes,
3117 {constants.NV_LVLIST: vg_name},
3118 self.cfg.GetClusterName())
3120 extra_lv_nvinfo = {}
3122 all_drbd_map = self.cfg.ComputeDRBDMap()
3124 feedback_fn("* Gathering disk information (%s nodes)" %
3125 len(self.my_node_names))
3126 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3129 feedback_fn("* Verifying configuration file consistency")
3131 # If not all nodes are being checked, we need to make sure the master node
3132 # and a non-checked vm_capable node are in the list.
3133 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3135 vf_nvinfo = all_nvinfo.copy()
3136 vf_node_info = list(self.my_node_info.values())
3137 additional_nodes = []
3138 if master_node not in self.my_node_info:
3139 additional_nodes.append(master_node)
3140 vf_node_info.append(self.all_node_info[master_node])
3141 # Add the first vm_capable node we find which is not included
3142 for node in absent_nodes:
3143 nodeinfo = self.all_node_info[node]
3144 if nodeinfo.vm_capable and not nodeinfo.offline:
3145 additional_nodes.append(node)
3146 vf_node_info.append(self.all_node_info[node])
3148 key = constants.NV_FILELIST
3149 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3150 {key: node_verify_param[key]},
3151 self.cfg.GetClusterName()))
3153 vf_nvinfo = all_nvinfo
3154 vf_node_info = self.my_node_info.values()
3156 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3158 feedback_fn("* Verifying node status")
3162 for node_i in node_data_list:
3164 nimg = node_image[node]
3168 feedback_fn("* Skipping offline node %s" % (node,))
3172 if node == master_node:
3174 elif node_i.master_candidate:
3175 ntype = "master candidate"
3176 elif node_i.drained:
3182 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3184 msg = all_nvinfo[node].fail_msg
3185 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3188 nimg.rpc_fail = True
3191 nresult = all_nvinfo[node].payload
3193 nimg.call_ok = self._VerifyNode(node_i, nresult)
3194 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3195 self._VerifyNodeNetwork(node_i, nresult)
3196 self._VerifyNodeUserScripts(node_i, nresult)
3197 self._VerifyOob(node_i, nresult)
3200 self._VerifyNodeLVM(node_i, nresult, vg_name)
3201 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3204 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3205 self._UpdateNodeInstances(node_i, nresult, nimg)
3206 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3207 self._UpdateNodeOS(node_i, nresult, nimg)
3209 if not nimg.os_fail:
3210 if refos_img is None:
3212 self._VerifyNodeOS(node_i, nimg, refos_img)
3213 self._VerifyNodeBridges(node_i, nresult, bridges)
3215 # Check whether all running instancies are primary for the node. (This
3216 # can no longer be done from _VerifyInstance below, since some of the
3217 # wrong instances could be from other node groups.)
3218 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3220 for inst in non_primary_inst:
3221 # FIXME: investigate best way to handle offline insts
3222 if inst.admin_state == constants.ADMINST_OFFLINE:
3224 feedback_fn("* Skipping offline instance %s" % inst.name)
3227 test = inst in self.all_inst_info
3228 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3229 "instance should not run on node %s", node_i.name)
3230 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3231 "node is running unknown instance %s", inst)
3233 for node, result in extra_lv_nvinfo.items():
3234 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3235 node_image[node], vg_name)
3237 feedback_fn("* Verifying instance status")
3238 for instance in self.my_inst_names:
3240 feedback_fn("* Verifying instance %s" % instance)
3241 inst_config = self.my_inst_info[instance]
3242 self._VerifyInstance(instance, inst_config, node_image,
3244 inst_nodes_offline = []
3246 pnode = inst_config.primary_node
3247 pnode_img = node_image[pnode]
3248 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3249 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3250 " primary node failed", instance)
3252 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3254 constants.CV_EINSTANCEBADNODE, instance,
3255 "instance is marked as running and lives on offline node %s",
3256 inst_config.primary_node)
3258 # If the instance is non-redundant we cannot survive losing its primary
3259 # node, so we are not N+1 compliant. On the other hand we have no disk
3260 # templates with more than one secondary so that situation is not well
3262 # FIXME: does not support file-backed instances
3263 if not inst_config.secondary_nodes:
3264 i_non_redundant.append(instance)
3266 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3267 constants.CV_EINSTANCELAYOUT,
3268 instance, "instance has multiple secondary nodes: %s",
3269 utils.CommaJoin(inst_config.secondary_nodes),
3270 code=self.ETYPE_WARNING)
3272 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3273 pnode = inst_config.primary_node
3274 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3275 instance_groups = {}
3277 for node in instance_nodes:
3278 instance_groups.setdefault(self.all_node_info[node].group,
3282 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3283 # Sort so that we always list the primary node first.
3284 for group, nodes in sorted(instance_groups.items(),
3285 key=lambda (_, nodes): pnode in nodes,
3288 self._ErrorIf(len(instance_groups) > 1,
3289 constants.CV_EINSTANCESPLITGROUPS,
3290 instance, "instance has primary and secondary nodes in"
3291 " different groups: %s", utils.CommaJoin(pretty_list),
3292 code=self.ETYPE_WARNING)
3294 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3295 i_non_a_balanced.append(instance)
3297 for snode in inst_config.secondary_nodes:
3298 s_img = node_image[snode]
3299 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3300 snode, "instance %s, connection to secondary node failed",
3304 inst_nodes_offline.append(snode)
3306 # warn that the instance lives on offline nodes
3307 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3308 "instance has offline secondary node(s) %s",
3309 utils.CommaJoin(inst_nodes_offline))
3310 # ... or ghost/non-vm_capable nodes
3311 for node in inst_config.all_nodes:
3312 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3313 instance, "instance lives on ghost node %s", node)
3314 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3315 instance, "instance lives on non-vm_capable node %s", node)
3317 feedback_fn("* Verifying orphan volumes")
3318 reserved = utils.FieldSet(*cluster.reserved_lvs)
3320 # We will get spurious "unknown volume" warnings if any node of this group
3321 # is secondary for an instance whose primary is in another group. To avoid
3322 # them, we find these instances and add their volumes to node_vol_should.
3323 for inst in self.all_inst_info.values():
3324 for secondary in inst.secondary_nodes:
3325 if (secondary in self.my_node_info
3326 and inst.name not in self.my_inst_info):
3327 inst.MapLVsByNode(node_vol_should)
3330 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3332 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3333 feedback_fn("* Verifying N+1 Memory redundancy")
3334 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3336 feedback_fn("* Other Notes")
3338 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3339 % len(i_non_redundant))
3341 if i_non_a_balanced:
3342 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3343 % len(i_non_a_balanced))
3346 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3349 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3352 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3356 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3357 """Analyze the post-hooks' result
3359 This method analyses the hook result, handles it, and sends some
3360 nicely-formatted feedback back to the user.
3362 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3363 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3364 @param hooks_results: the results of the multi-node hooks rpc call
3365 @param feedback_fn: function used send feedback back to the caller
3366 @param lu_result: previous Exec result
3367 @return: the new Exec result, based on the previous result
3371 # We only really run POST phase hooks, only for non-empty groups,
3372 # and are only interested in their results
3373 if not self.my_node_names:
3376 elif phase == constants.HOOKS_PHASE_POST:
3377 # Used to change hooks' output to proper indentation
3378 feedback_fn("* Hooks Results")
3379 assert hooks_results, "invalid result from hooks"
3381 for node_name in hooks_results:
3382 res = hooks_results[node_name]
3384 test = msg and not res.offline
3385 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3386 "Communication failure in hooks execution: %s", msg)
3387 if res.offline or msg:
3388 # No need to investigate payload if node is offline or gave
3391 for script, hkr, output in res.payload:
3392 test = hkr == constants.HKR_FAIL
3393 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3394 "Script %s failed, output:", script)
3396 output = self._HOOKS_INDENT_RE.sub(" ", output)
3397 feedback_fn("%s" % output)
3403 class LUClusterVerifyDisks(NoHooksLU):
3404 """Verifies the cluster disks status.
3409 def ExpandNames(self):
3410 self.share_locks = _ShareAll()
3411 self.needed_locks = {
3412 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3415 def Exec(self, feedback_fn):
3416 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3418 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3419 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3420 for group in group_names])
3423 class LUGroupVerifyDisks(NoHooksLU):
3424 """Verifies the status of all disks in a node group.
3429 def ExpandNames(self):
3430 # Raises errors.OpPrereqError on its own if group can't be found
3431 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3433 self.share_locks = _ShareAll()
3434 self.needed_locks = {
3435 locking.LEVEL_INSTANCE: [],
3436 locking.LEVEL_NODEGROUP: [],
3437 locking.LEVEL_NODE: [],
3440 def DeclareLocks(self, level):
3441 if level == locking.LEVEL_INSTANCE:
3442 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3444 # Lock instances optimistically, needs verification once node and group
3445 # locks have been acquired
3446 self.needed_locks[locking.LEVEL_INSTANCE] = \
3447 self.cfg.GetNodeGroupInstances(self.group_uuid)
3449 elif level == locking.LEVEL_NODEGROUP:
3450 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3452 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3453 set([self.group_uuid] +
3454 # Lock all groups used by instances optimistically; this requires
3455 # going via the node before it's locked, requiring verification
3458 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3459 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3461 elif level == locking.LEVEL_NODE:
3462 # This will only lock the nodes in the group to be verified which contain
3464 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3465 self._LockInstancesNodes()
3467 # Lock all nodes in group to be verified
3468 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3469 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3470 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3472 def CheckPrereq(self):
3473 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3474 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3475 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3477 assert self.group_uuid in owned_groups
3479 # Check if locked instances are still correct
3480 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3482 # Get instance information
3483 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3485 # Check if node groups for locked instances are still correct
3486 for (instance_name, inst) in self.instances.items():
3487 assert owned_nodes.issuperset(inst.all_nodes), \
3488 "Instance %s's nodes changed while we kept the lock" % instance_name
3490 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3493 assert self.group_uuid in inst_groups, \
3494 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3496 def Exec(self, feedback_fn):
3497 """Verify integrity of cluster disks.
3499 @rtype: tuple of three items
3500 @return: a tuple of (dict of node-to-node_error, list of instances
3501 which need activate-disks, dict of instance: (node, volume) for
3506 res_instances = set()
3509 nv_dict = _MapInstanceDisksToNodes([inst
3510 for inst in self.instances.values()
3511 if inst.admin_state == constants.ADMINST_UP])
3514 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3515 set(self.cfg.GetVmCapableNodeList()))
3517 node_lvs = self.rpc.call_lv_list(nodes, [])
3519 for (node, node_res) in node_lvs.items():
3520 if node_res.offline:
3523 msg = node_res.fail_msg
3525 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3526 res_nodes[node] = msg
3529 for lv_name, (_, _, lv_online) in node_res.payload.items():
3530 inst = nv_dict.pop((node, lv_name), None)
3531 if not (lv_online or inst is None):
3532 res_instances.add(inst)
3534 # any leftover items in nv_dict are missing LVs, let's arrange the data
3536 for key, inst in nv_dict.iteritems():
3537 res_missing.setdefault(inst, []).append(list(key))
3539 return (res_nodes, list(res_instances), res_missing)
3542 class LUClusterRepairDiskSizes(NoHooksLU):
3543 """Verifies the cluster disks sizes.
3548 def ExpandNames(self):
3549 if self.op.instances:
3550 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3551 self.needed_locks = {
3552 locking.LEVEL_NODE_RES: [],
3553 locking.LEVEL_INSTANCE: self.wanted_names,
3555 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3557 self.wanted_names = None
3558 self.needed_locks = {
3559 locking.LEVEL_NODE_RES: locking.ALL_SET,
3560 locking.LEVEL_INSTANCE: locking.ALL_SET,
3562 self.share_locks = {
3563 locking.LEVEL_NODE_RES: 1,
3564 locking.LEVEL_INSTANCE: 0,
3567 def DeclareLocks(self, level):
3568 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3569 self._LockInstancesNodes(primary_only=True, level=level)
3571 def CheckPrereq(self):
3572 """Check prerequisites.
3574 This only checks the optional instance list against the existing names.
3577 if self.wanted_names is None:
3578 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3580 self.wanted_instances = \
3581 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3583 def _EnsureChildSizes(self, disk):
3584 """Ensure children of the disk have the needed disk size.
3586 This is valid mainly for DRBD8 and fixes an issue where the
3587 children have smaller disk size.
3589 @param disk: an L{ganeti.objects.Disk} object
3592 if disk.dev_type == constants.LD_DRBD8:
3593 assert disk.children, "Empty children for DRBD8?"
3594 fchild = disk.children[0]
3595 mismatch = fchild.size < disk.size
3597 self.LogInfo("Child disk has size %d, parent %d, fixing",
3598 fchild.size, disk.size)
3599 fchild.size = disk.size
3601 # and we recurse on this child only, not on the metadev
3602 return self._EnsureChildSizes(fchild) or mismatch
3606 def Exec(self, feedback_fn):
3607 """Verify the size of cluster disks.
3610 # TODO: check child disks too
3611 # TODO: check differences in size between primary/secondary nodes
3613 for instance in self.wanted_instances:
3614 pnode = instance.primary_node
3615 if pnode not in per_node_disks:
3616 per_node_disks[pnode] = []
3617 for idx, disk in enumerate(instance.disks):
3618 per_node_disks[pnode].append((instance, idx, disk))
3620 assert not (frozenset(per_node_disks.keys()) -
3621 self.owned_locks(locking.LEVEL_NODE_RES)), \
3622 "Not owning correct locks"
3623 assert not self.owned_locks(locking.LEVEL_NODE)
3626 for node, dskl in per_node_disks.items():
3627 newl = [v[2].Copy() for v in dskl]
3629 self.cfg.SetDiskID(dsk, node)
3630 result = self.rpc.call_blockdev_getsize(node, newl)
3632 self.LogWarning("Failure in blockdev_getsize call to node"
3633 " %s, ignoring", node)
3635 if len(result.payload) != len(dskl):
3636 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3637 " result.payload=%s", node, len(dskl), result.payload)
3638 self.LogWarning("Invalid result from node %s, ignoring node results",
3641 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3643 self.LogWarning("Disk %d of instance %s did not return size"
3644 " information, ignoring", idx, instance.name)
3646 if not isinstance(size, (int, long)):
3647 self.LogWarning("Disk %d of instance %s did not return valid"
3648 " size information, ignoring", idx, instance.name)
3651 if size != disk.size:
3652 self.LogInfo("Disk %d of instance %s has mismatched size,"
3653 " correcting: recorded %d, actual %d", idx,
3654 instance.name, disk.size, size)
3656 self.cfg.Update(instance, feedback_fn)
3657 changed.append((instance.name, idx, size))
3658 if self._EnsureChildSizes(disk):
3659 self.cfg.Update(instance, feedback_fn)
3660 changed.append((instance.name, idx, disk.size))
3664 class LUClusterRename(LogicalUnit):
3665 """Rename the cluster.
3668 HPATH = "cluster-rename"
3669 HTYPE = constants.HTYPE_CLUSTER
3671 def BuildHooksEnv(self):
3676 "OP_TARGET": self.cfg.GetClusterName(),
3677 "NEW_NAME": self.op.name,
3680 def BuildHooksNodes(self):
3681 """Build hooks nodes.
3684 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3686 def CheckPrereq(self):
3687 """Verify that the passed name is a valid one.
3690 hostname = netutils.GetHostname(name=self.op.name,
3691 family=self.cfg.GetPrimaryIPFamily())
3693 new_name = hostname.name
3694 self.ip = new_ip = hostname.ip
3695 old_name = self.cfg.GetClusterName()
3696 old_ip = self.cfg.GetMasterIP()
3697 if new_name == old_name and new_ip == old_ip:
3698 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3699 " cluster has changed",
3701 if new_ip != old_ip:
3702 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3703 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3704 " reachable on the network" %
3705 new_ip, errors.ECODE_NOTUNIQUE)
3707 self.op.name = new_name
3709 def Exec(self, feedback_fn):
3710 """Rename the cluster.
3713 clustername = self.op.name
3716 # shutdown the master IP
3717 master_params = self.cfg.GetMasterNetworkParameters()
3718 ems = self.cfg.GetUseExternalMipScript()
3719 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3721 result.Raise("Could not disable the master role")
3724 cluster = self.cfg.GetClusterInfo()
3725 cluster.cluster_name = clustername
3726 cluster.master_ip = new_ip
3727 self.cfg.Update(cluster, feedback_fn)
3729 # update the known hosts file
3730 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3731 node_list = self.cfg.GetOnlineNodeList()
3733 node_list.remove(master_params.name)
3736 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3738 master_params.ip = new_ip
3739 result = self.rpc.call_node_activate_master_ip(master_params.name,
3741 msg = result.fail_msg
3743 self.LogWarning("Could not re-enable the master role on"
3744 " the master, please restart manually: %s", msg)
3749 def _ValidateNetmask(cfg, netmask):
3750 """Checks if a netmask is valid.
3752 @type cfg: L{config.ConfigWriter}
3753 @param cfg: The cluster configuration
3755 @param netmask: the netmask to be verified
3756 @raise errors.OpPrereqError: if the validation fails
3759 ip_family = cfg.GetPrimaryIPFamily()
3761 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3762 except errors.ProgrammerError:
3763 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3765 if not ipcls.ValidateNetmask(netmask):
3766 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3770 class LUClusterSetParams(LogicalUnit):
3771 """Change the parameters of the cluster.
3774 HPATH = "cluster-modify"
3775 HTYPE = constants.HTYPE_CLUSTER
3778 def CheckArguments(self):
3782 if self.op.uid_pool:
3783 uidpool.CheckUidPool(self.op.uid_pool)
3785 if self.op.add_uids:
3786 uidpool.CheckUidPool(self.op.add_uids)
3788 if self.op.remove_uids:
3789 uidpool.CheckUidPool(self.op.remove_uids)
3791 if self.op.master_netmask is not None:
3792 _ValidateNetmask(self.cfg, self.op.master_netmask)
3794 if self.op.diskparams:
3795 for dt_params in self.op.diskparams.values():
3796 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3798 def ExpandNames(self):
3799 # FIXME: in the future maybe other cluster params won't require checking on
3800 # all nodes to be modified.
3801 self.needed_locks = {
3802 locking.LEVEL_NODE: locking.ALL_SET,
3803 locking.LEVEL_INSTANCE: locking.ALL_SET,
3804 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3806 self.share_locks = {
3807 locking.LEVEL_NODE: 1,
3808 locking.LEVEL_INSTANCE: 1,
3809 locking.LEVEL_NODEGROUP: 1,
3812 def BuildHooksEnv(self):
3817 "OP_TARGET": self.cfg.GetClusterName(),
3818 "NEW_VG_NAME": self.op.vg_name,
3821 def BuildHooksNodes(self):
3822 """Build hooks nodes.
3825 mn = self.cfg.GetMasterNode()
3828 def CheckPrereq(self):
3829 """Check prerequisites.
3831 This checks whether the given params don't conflict and
3832 if the given volume group is valid.
3835 if self.op.vg_name is not None and not self.op.vg_name:
3836 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3837 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3838 " instances exist", errors.ECODE_INVAL)
3840 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3841 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3842 raise errors.OpPrereqError("Cannot disable drbd helper while"
3843 " drbd-based instances exist",
3846 node_list = self.owned_locks(locking.LEVEL_NODE)
3848 # if vg_name not None, checks given volume group on all nodes
3850 vglist = self.rpc.call_vg_list(node_list)
3851 for node in node_list:
3852 msg = vglist[node].fail_msg
3854 # ignoring down node
3855 self.LogWarning("Error while gathering data on node %s"
3856 " (ignoring node): %s", node, msg)
3858 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3860 constants.MIN_VG_SIZE)
3862 raise errors.OpPrereqError("Error on node '%s': %s" %
3863 (node, vgstatus), errors.ECODE_ENVIRON)
3865 if self.op.drbd_helper:
3866 # checks given drbd helper on all nodes
3867 helpers = self.rpc.call_drbd_helper(node_list)
3868 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3870 self.LogInfo("Not checking drbd helper on offline node %s", node)
3872 msg = helpers[node].fail_msg
3874 raise errors.OpPrereqError("Error checking drbd helper on node"
3875 " '%s': %s" % (node, msg),
3876 errors.ECODE_ENVIRON)
3877 node_helper = helpers[node].payload
3878 if node_helper != self.op.drbd_helper:
3879 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3880 (node, node_helper), errors.ECODE_ENVIRON)
3882 self.cluster = cluster = self.cfg.GetClusterInfo()
3883 # validate params changes
3884 if self.op.beparams:
3885 objects.UpgradeBeParams(self.op.beparams)
3886 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3887 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3889 if self.op.ndparams:
3890 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3891 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3893 # TODO: we need a more general way to handle resetting
3894 # cluster-level parameters to default values
3895 if self.new_ndparams["oob_program"] == "":
3896 self.new_ndparams["oob_program"] = \
3897 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3899 if self.op.hv_state:
3900 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3901 self.cluster.hv_state_static)
3902 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3903 for hv, values in new_hv_state.items())
3905 if self.op.disk_state:
3906 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3907 self.cluster.disk_state_static)
3908 self.new_disk_state = \
3909 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3910 for name, values in svalues.items()))
3911 for storage, svalues in new_disk_state.items())
3914 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3917 all_instances = self.cfg.GetAllInstancesInfo().values()
3919 for group in self.cfg.GetAllNodeGroupsInfo().values():
3920 instances = frozenset([inst for inst in all_instances
3921 if compat.any(node in group.members
3922 for node in inst.all_nodes)])
3923 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3924 new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3926 new_ipolicy, instances)
3928 violations.update(new)
3931 self.LogWarning("After the ipolicy change the following instances"
3932 " violate them: %s",
3933 utils.CommaJoin(violations))
3935 if self.op.nicparams:
3936 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3937 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3938 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3941 # check all instances for consistency
3942 for instance in self.cfg.GetAllInstancesInfo().values():
3943 for nic_idx, nic in enumerate(instance.nics):
3944 params_copy = copy.deepcopy(nic.nicparams)
3945 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3947 # check parameter syntax
3949 objects.NIC.CheckParameterSyntax(params_filled)
3950 except errors.ConfigurationError, err:
3951 nic_errors.append("Instance %s, nic/%d: %s" %
3952 (instance.name, nic_idx, err))
3954 # if we're moving instances to routed, check that they have an ip
3955 target_mode = params_filled[constants.NIC_MODE]
3956 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3957 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3958 " address" % (instance.name, nic_idx))
3960 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3961 "\n".join(nic_errors))
3963 # hypervisor list/parameters
3964 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3965 if self.op.hvparams:
3966 for hv_name, hv_dict in self.op.hvparams.items():
3967 if hv_name not in self.new_hvparams:
3968 self.new_hvparams[hv_name] = hv_dict
3970 self.new_hvparams[hv_name].update(hv_dict)
3972 # disk template parameters
3973 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3974 if self.op.diskparams:
3975 for dt_name, dt_params in self.op.diskparams.items():
3976 if dt_name not in self.op.diskparams:
3977 self.new_diskparams[dt_name] = dt_params
3979 self.new_diskparams[dt_name].update(dt_params)
3981 # os hypervisor parameters
3982 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3984 for os_name, hvs in self.op.os_hvp.items():
3985 if os_name not in self.new_os_hvp:
3986 self.new_os_hvp[os_name] = hvs
3988 for hv_name, hv_dict in hvs.items():
3989 if hv_name not in self.new_os_hvp[os_name]:
3990 self.new_os_hvp[os_name][hv_name] = hv_dict
3992 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3995 self.new_osp = objects.FillDict(cluster.osparams, {})
3996 if self.op.osparams:
3997 for os_name, osp in self.op.osparams.items():
3998 if os_name not in self.new_osp:
3999 self.new_osp[os_name] = {}
4001 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4004 if not self.new_osp[os_name]:
4005 # we removed all parameters
4006 del self.new_osp[os_name]
4008 # check the parameter validity (remote check)
4009 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4010 os_name, self.new_osp[os_name])
4012 # changes to the hypervisor list
4013 if self.op.enabled_hypervisors is not None:
4014 self.hv_list = self.op.enabled_hypervisors
4015 for hv in self.hv_list:
4016 # if the hypervisor doesn't already exist in the cluster
4017 # hvparams, we initialize it to empty, and then (in both
4018 # cases) we make sure to fill the defaults, as we might not
4019 # have a complete defaults list if the hypervisor wasn't
4021 if hv not in new_hvp:
4023 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4024 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4026 self.hv_list = cluster.enabled_hypervisors
4028 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4029 # either the enabled list has changed, or the parameters have, validate
4030 for hv_name, hv_params in self.new_hvparams.items():
4031 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4032 (self.op.enabled_hypervisors and
4033 hv_name in self.op.enabled_hypervisors)):
4034 # either this is a new hypervisor, or its parameters have changed
4035 hv_class = hypervisor.GetHypervisor(hv_name)
4036 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4037 hv_class.CheckParameterSyntax(hv_params)
4038 _CheckHVParams(self, node_list, hv_name, hv_params)
4041 # no need to check any newly-enabled hypervisors, since the
4042 # defaults have already been checked in the above code-block
4043 for os_name, os_hvp in self.new_os_hvp.items():
4044 for hv_name, hv_params in os_hvp.items():
4045 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4046 # we need to fill in the new os_hvp on top of the actual hv_p
4047 cluster_defaults = self.new_hvparams.get(hv_name, {})
4048 new_osp = objects.FillDict(cluster_defaults, hv_params)
4049 hv_class = hypervisor.GetHypervisor(hv_name)
4050 hv_class.CheckParameterSyntax(new_osp)
4051 _CheckHVParams(self, node_list, hv_name, new_osp)
4053 if self.op.default_iallocator:
4054 alloc_script = utils.FindFile(self.op.default_iallocator,
4055 constants.IALLOCATOR_SEARCH_PATH,
4057 if alloc_script is None:
4058 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4059 " specified" % self.op.default_iallocator,
4062 def Exec(self, feedback_fn):
4063 """Change the parameters of the cluster.
4066 if self.op.vg_name is not None:
4067 new_volume = self.op.vg_name
4070 if new_volume != self.cfg.GetVGName():
4071 self.cfg.SetVGName(new_volume)
4073 feedback_fn("Cluster LVM configuration already in desired"
4074 " state, not changing")
4075 if self.op.drbd_helper is not None:
4076 new_helper = self.op.drbd_helper
4079 if new_helper != self.cfg.GetDRBDHelper():
4080 self.cfg.SetDRBDHelper(new_helper)
4082 feedback_fn("Cluster DRBD helper already in desired state,"
4084 if self.op.hvparams:
4085 self.cluster.hvparams = self.new_hvparams
4087 self.cluster.os_hvp = self.new_os_hvp
4088 if self.op.enabled_hypervisors is not None:
4089 self.cluster.hvparams = self.new_hvparams
4090 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4091 if self.op.beparams:
4092 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4093 if self.op.nicparams:
4094 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4096 self.cluster.ipolicy = self.new_ipolicy
4097 if self.op.osparams:
4098 self.cluster.osparams = self.new_osp
4099 if self.op.ndparams:
4100 self.cluster.ndparams = self.new_ndparams
4101 if self.op.diskparams:
4102 self.cluster.diskparams = self.new_diskparams
4103 if self.op.hv_state:
4104 self.cluster.hv_state_static = self.new_hv_state
4105 if self.op.disk_state:
4106 self.cluster.disk_state_static = self.new_disk_state
4108 if self.op.candidate_pool_size is not None:
4109 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4110 # we need to update the pool size here, otherwise the save will fail
4111 _AdjustCandidatePool(self, [])
4113 if self.op.maintain_node_health is not None:
4114 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4115 feedback_fn("Note: CONFD was disabled at build time, node health"
4116 " maintenance is not useful (still enabling it)")
4117 self.cluster.maintain_node_health = self.op.maintain_node_health
4119 if self.op.prealloc_wipe_disks is not None:
4120 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4122 if self.op.add_uids is not None:
4123 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4125 if self.op.remove_uids is not None:
4126 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4128 if self.op.uid_pool is not None:
4129 self.cluster.uid_pool = self.op.uid_pool
4131 if self.op.default_iallocator is not None:
4132 self.cluster.default_iallocator = self.op.default_iallocator
4134 if self.op.reserved_lvs is not None:
4135 self.cluster.reserved_lvs = self.op.reserved_lvs
4137 if self.op.use_external_mip_script is not None:
4138 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4140 def helper_os(aname, mods, desc):
4142 lst = getattr(self.cluster, aname)
4143 for key, val in mods:
4144 if key == constants.DDM_ADD:
4146 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4149 elif key == constants.DDM_REMOVE:
4153 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4155 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4157 if self.op.hidden_os:
4158 helper_os("hidden_os", self.op.hidden_os, "hidden")
4160 if self.op.blacklisted_os:
4161 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4163 if self.op.master_netdev:
4164 master_params = self.cfg.GetMasterNetworkParameters()
4165 ems = self.cfg.GetUseExternalMipScript()
4166 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4167 self.cluster.master_netdev)
4168 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4170 result.Raise("Could not disable the master ip")
4171 feedback_fn("Changing master_netdev from %s to %s" %
4172 (master_params.netdev, self.op.master_netdev))
4173 self.cluster.master_netdev = self.op.master_netdev
4175 if self.op.master_netmask:
4176 master_params = self.cfg.GetMasterNetworkParameters()
4177 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4178 result = self.rpc.call_node_change_master_netmask(master_params.name,
4179 master_params.netmask,
4180 self.op.master_netmask,
4182 master_params.netdev)
4184 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4187 self.cluster.master_netmask = self.op.master_netmask
4189 self.cfg.Update(self.cluster, feedback_fn)
4191 if self.op.master_netdev:
4192 master_params = self.cfg.GetMasterNetworkParameters()
4193 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4194 self.op.master_netdev)
4195 ems = self.cfg.GetUseExternalMipScript()
4196 result = self.rpc.call_node_activate_master_ip(master_params.name,
4199 self.LogWarning("Could not re-enable the master ip on"
4200 " the master, please restart manually: %s",
4204 def _UploadHelper(lu, nodes, fname):
4205 """Helper for uploading a file and showing warnings.
4208 if os.path.exists(fname):
4209 result = lu.rpc.call_upload_file(nodes, fname)
4210 for to_node, to_result in result.items():
4211 msg = to_result.fail_msg
4213 msg = ("Copy of file %s to node %s failed: %s" %
4214 (fname, to_node, msg))
4215 lu.proc.LogWarning(msg)
4218 def _ComputeAncillaryFiles(cluster, redist):
4219 """Compute files external to Ganeti which need to be consistent.
4221 @type redist: boolean
4222 @param redist: Whether to include files which need to be redistributed
4225 # Compute files for all nodes
4227 constants.SSH_KNOWN_HOSTS_FILE,
4228 constants.CONFD_HMAC_KEY,
4229 constants.CLUSTER_DOMAIN_SECRET_FILE,
4230 constants.SPICE_CERT_FILE,
4231 constants.SPICE_CACERT_FILE,
4232 constants.RAPI_USERS_FILE,
4236 files_all.update(constants.ALL_CERT_FILES)
4237 files_all.update(ssconf.SimpleStore().GetFileList())
4239 # we need to ship at least the RAPI certificate
4240 files_all.add(constants.RAPI_CERT_FILE)
4242 if cluster.modify_etc_hosts:
4243 files_all.add(constants.ETC_HOSTS)
4245 # Files which are optional, these must:
4246 # - be present in one other category as well
4247 # - either exist or not exist on all nodes of that category (mc, vm all)
4249 constants.RAPI_USERS_FILE,
4252 # Files which should only be on master candidates
4256 files_mc.add(constants.CLUSTER_CONF_FILE)
4258 # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4260 files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4262 # Files which should only be on VM-capable nodes
4263 files_vm = set(filename
4264 for hv_name in cluster.enabled_hypervisors
4265 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4267 files_opt |= set(filename
4268 for hv_name in cluster.enabled_hypervisors
4269 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4271 # Filenames in each category must be unique
4272 all_files_set = files_all | files_mc | files_vm
4273 assert (len(all_files_set) ==
4274 sum(map(len, [files_all, files_mc, files_vm]))), \
4275 "Found file listed in more than one file list"
4277 # Optional files must be present in one other category
4278 assert all_files_set.issuperset(files_opt), \
4279 "Optional file not in a different required list"
4281 return (files_all, files_opt, files_mc, files_vm)
4284 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4285 """Distribute additional files which are part of the cluster configuration.
4287 ConfigWriter takes care of distributing the config and ssconf files, but
4288 there are more files which should be distributed to all nodes. This function
4289 makes sure those are copied.
4291 @param lu: calling logical unit
4292 @param additional_nodes: list of nodes not in the config to distribute to
4293 @type additional_vm: boolean
4294 @param additional_vm: whether the additional nodes are vm-capable or not
4297 # Gather target nodes
4298 cluster = lu.cfg.GetClusterInfo()
4299 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4301 online_nodes = lu.cfg.GetOnlineNodeList()
4302 vm_nodes = lu.cfg.GetVmCapableNodeList()
4304 if additional_nodes is not None:
4305 online_nodes.extend(additional_nodes)
4307 vm_nodes.extend(additional_nodes)
4309 # Never distribute to master node
4310 for nodelist in [online_nodes, vm_nodes]:
4311 if master_info.name in nodelist:
4312 nodelist.remove(master_info.name)
4315 (files_all, _, files_mc, files_vm) = \
4316 _ComputeAncillaryFiles(cluster, True)
4318 # Never re-distribute configuration file from here
4319 assert not (constants.CLUSTER_CONF_FILE in files_all or
4320 constants.CLUSTER_CONF_FILE in files_vm)
4321 assert not files_mc, "Master candidates not handled in this function"
4324 (online_nodes, files_all),
4325 (vm_nodes, files_vm),
4329 for (node_list, files) in filemap:
4331 _UploadHelper(lu, node_list, fname)
4334 class LUClusterRedistConf(NoHooksLU):
4335 """Force the redistribution of cluster configuration.
4337 This is a very simple LU.
4342 def ExpandNames(self):
4343 self.needed_locks = {
4344 locking.LEVEL_NODE: locking.ALL_SET,
4346 self.share_locks[locking.LEVEL_NODE] = 1
4348 def Exec(self, feedback_fn):
4349 """Redistribute the configuration.
4352 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4353 _RedistributeAncillaryFiles(self)
4356 class LUClusterActivateMasterIp(NoHooksLU):
4357 """Activate the master IP on the master node.
4360 def Exec(self, feedback_fn):
4361 """Activate the master IP.
4364 master_params = self.cfg.GetMasterNetworkParameters()
4365 ems = self.cfg.GetUseExternalMipScript()
4366 result = self.rpc.call_node_activate_master_ip(master_params.name,
4368 result.Raise("Could not activate the master IP")
4371 class LUClusterDeactivateMasterIp(NoHooksLU):
4372 """Deactivate the master IP on the master node.
4375 def Exec(self, feedback_fn):
4376 """Deactivate the master IP.
4379 master_params = self.cfg.GetMasterNetworkParameters()
4380 ems = self.cfg.GetUseExternalMipScript()
4381 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4383 result.Raise("Could not deactivate the master IP")
4386 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4387 """Sleep and poll for an instance's disk to sync.
4390 if not instance.disks or disks is not None and not disks:
4393 disks = _ExpandCheckDisks(instance, disks)
4396 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4398 node = instance.primary_node
4401 lu.cfg.SetDiskID(dev, node)
4403 # TODO: Convert to utils.Retry
4406 degr_retries = 10 # in seconds, as we sleep 1 second each time
4410 cumul_degraded = False
4411 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4412 msg = rstats.fail_msg
4414 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4417 raise errors.RemoteError("Can't contact node %s for mirror data,"
4418 " aborting." % node)
4421 rstats = rstats.payload
4423 for i, mstat in enumerate(rstats):
4425 lu.LogWarning("Can't compute data for node %s/%s",
4426 node, disks[i].iv_name)
4429 cumul_degraded = (cumul_degraded or
4430 (mstat.is_degraded and mstat.sync_percent is None))
4431 if mstat.sync_percent is not None:
4433 if mstat.estimated_time is not None:
4434 rem_time = ("%s remaining (estimated)" %
4435 utils.FormatSeconds(mstat.estimated_time))
4436 max_time = mstat.estimated_time
4438 rem_time = "no time estimate"
4439 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4440 (disks[i].iv_name, mstat.sync_percent, rem_time))
4442 # if we're done but degraded, let's do a few small retries, to
4443 # make sure we see a stable and not transient situation; therefore
4444 # we force restart of the loop
4445 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4446 logging.info("Degraded disks found, %d retries left", degr_retries)
4454 time.sleep(min(60, max_time))
4457 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4458 return not cumul_degraded
4461 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4462 """Check that mirrors are not degraded.
4464 The ldisk parameter, if True, will change the test from the
4465 is_degraded attribute (which represents overall non-ok status for
4466 the device(s)) to the ldisk (representing the local storage status).
4469 lu.cfg.SetDiskID(dev, node)
4473 if on_primary or dev.AssembleOnSecondary():
4474 rstats = lu.rpc.call_blockdev_find(node, dev)
4475 msg = rstats.fail_msg
4477 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4479 elif not rstats.payload:
4480 lu.LogWarning("Can't find disk on node %s", node)
4484 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4486 result = result and not rstats.payload.is_degraded
4489 for child in dev.children:
4490 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4495 class LUOobCommand(NoHooksLU):
4496 """Logical unit for OOB handling.
4500 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4502 def ExpandNames(self):
4503 """Gather locks we need.
4506 if self.op.node_names:
4507 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4508 lock_names = self.op.node_names
4510 lock_names = locking.ALL_SET
4512 self.needed_locks = {
4513 locking.LEVEL_NODE: lock_names,
4516 def CheckPrereq(self):
4517 """Check prerequisites.
4520 - the node exists in the configuration
4523 Any errors are signaled by raising errors.OpPrereqError.
4527 self.master_node = self.cfg.GetMasterNode()
4529 assert self.op.power_delay >= 0.0
4531 if self.op.node_names:
4532 if (self.op.command in self._SKIP_MASTER and
4533 self.master_node in self.op.node_names):
4534 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4535 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4537 if master_oob_handler:
4538 additional_text = ("run '%s %s %s' if you want to operate on the"
4539 " master regardless") % (master_oob_handler,
4543 additional_text = "it does not support out-of-band operations"
4545 raise errors.OpPrereqError(("Operating on the master node %s is not"
4546 " allowed for %s; %s") %
4547 (self.master_node, self.op.command,
4548 additional_text), errors.ECODE_INVAL)
4550 self.op.node_names = self.cfg.GetNodeList()
4551 if self.op.command in self._SKIP_MASTER:
4552 self.op.node_names.remove(self.master_node)
4554 if self.op.command in self._SKIP_MASTER:
4555 assert self.master_node not in self.op.node_names
4557 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4559 raise errors.OpPrereqError("Node %s not found" % node_name,
4562 self.nodes.append(node)
4564 if (not self.op.ignore_status and
4565 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4566 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4567 " not marked offline") % node_name,
4570 def Exec(self, feedback_fn):
4571 """Execute OOB and return result if we expect any.
4574 master_node = self.master_node
4577 for idx, node in enumerate(utils.NiceSort(self.nodes,
4578 key=lambda node: node.name)):
4579 node_entry = [(constants.RS_NORMAL, node.name)]
4580 ret.append(node_entry)
4582 oob_program = _SupportsOob(self.cfg, node)
4585 node_entry.append((constants.RS_UNAVAIL, None))
4588 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4589 self.op.command, oob_program, node.name)
4590 result = self.rpc.call_run_oob(master_node, oob_program,
4591 self.op.command, node.name,
4595 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4596 node.name, result.fail_msg)
4597 node_entry.append((constants.RS_NODATA, None))
4600 self._CheckPayload(result)
4601 except errors.OpExecError, err:
4602 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4604 node_entry.append((constants.RS_NODATA, None))
4606 if self.op.command == constants.OOB_HEALTH:
4607 # For health we should log important events
4608 for item, status in result.payload:
4609 if status in [constants.OOB_STATUS_WARNING,
4610 constants.OOB_STATUS_CRITICAL]:
4611 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4612 item, node.name, status)
4614 if self.op.command == constants.OOB_POWER_ON:
4616 elif self.op.command == constants.OOB_POWER_OFF:
4617 node.powered = False
4618 elif self.op.command == constants.OOB_POWER_STATUS:
4619 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4620 if powered != node.powered:
4621 logging.warning(("Recorded power state (%s) of node '%s' does not"
4622 " match actual power state (%s)"), node.powered,
4625 # For configuration changing commands we should update the node
4626 if self.op.command in (constants.OOB_POWER_ON,
4627 constants.OOB_POWER_OFF):
4628 self.cfg.Update(node, feedback_fn)
4630 node_entry.append((constants.RS_NORMAL, result.payload))
4632 if (self.op.command == constants.OOB_POWER_ON and
4633 idx < len(self.nodes) - 1):
4634 time.sleep(self.op.power_delay)
4638 def _CheckPayload(self, result):
4639 """Checks if the payload is valid.
4641 @param result: RPC result
4642 @raises errors.OpExecError: If payload is not valid
4646 if self.op.command == constants.OOB_HEALTH:
4647 if not isinstance(result.payload, list):
4648 errs.append("command 'health' is expected to return a list but got %s" %
4649 type(result.payload))
4651 for item, status in result.payload:
4652 if status not in constants.OOB_STATUSES:
4653 errs.append("health item '%s' has invalid status '%s'" %
4656 if self.op.command == constants.OOB_POWER_STATUS:
4657 if not isinstance(result.payload, dict):
4658 errs.append("power-status is expected to return a dict but got %s" %
4659 type(result.payload))
4661 if self.op.command in [
4662 constants.OOB_POWER_ON,
4663 constants.OOB_POWER_OFF,
4664 constants.OOB_POWER_CYCLE,
4666 if result.payload is not None:
4667 errs.append("%s is expected to not return payload but got '%s'" %
4668 (self.op.command, result.payload))
4671 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4672 utils.CommaJoin(errs))
4675 class _OsQuery(_QueryBase):
4676 FIELDS = query.OS_FIELDS
4678 def ExpandNames(self, lu):
4679 # Lock all nodes in shared mode
4680 # Temporary removal of locks, should be reverted later
4681 # TODO: reintroduce locks when they are lighter-weight
4682 lu.needed_locks = {}
4683 #self.share_locks[locking.LEVEL_NODE] = 1
4684 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4686 # The following variables interact with _QueryBase._GetNames
4688 self.wanted = self.names
4690 self.wanted = locking.ALL_SET
4692 self.do_locking = self.use_locking
4694 def DeclareLocks(self, lu, level):
4698 def _DiagnoseByOS(rlist):
4699 """Remaps a per-node return list into an a per-os per-node dictionary
4701 @param rlist: a map with node names as keys and OS objects as values
4704 @return: a dictionary with osnames as keys and as value another
4705 map, with nodes as keys and tuples of (path, status, diagnose,
4706 variants, parameters, api_versions) as values, eg::
4708 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4709 (/srv/..., False, "invalid api")],
4710 "node2": [(/srv/..., True, "", [], [])]}
4715 # we build here the list of nodes that didn't fail the RPC (at RPC
4716 # level), so that nodes with a non-responding node daemon don't
4717 # make all OSes invalid
4718 good_nodes = [node_name for node_name in rlist
4719 if not rlist[node_name].fail_msg]
4720 for node_name, nr in rlist.items():
4721 if nr.fail_msg or not nr.payload:
4723 for (name, path, status, diagnose, variants,
4724 params, api_versions) in nr.payload:
4725 if name not in all_os:
4726 # build a list of nodes for this os containing empty lists
4727 # for each node in node_list
4729 for nname in good_nodes:
4730 all_os[name][nname] = []
4731 # convert params from [name, help] to (name, help)
4732 params = [tuple(v) for v in params]
4733 all_os[name][node_name].append((path, status, diagnose,
4734 variants, params, api_versions))
4737 def _GetQueryData(self, lu):
4738 """Computes the list of nodes and their attributes.
4741 # Locking is not used
4742 assert not (compat.any(lu.glm.is_owned(level)
4743 for level in locking.LEVELS
4744 if level != locking.LEVEL_CLUSTER) or
4745 self.do_locking or self.use_locking)
4747 valid_nodes = [node.name
4748 for node in lu.cfg.GetAllNodesInfo().values()
4749 if not node.offline and node.vm_capable]
4750 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4751 cluster = lu.cfg.GetClusterInfo()
4755 for (os_name, os_data) in pol.items():
4756 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4757 hidden=(os_name in cluster.hidden_os),
4758 blacklisted=(os_name in cluster.blacklisted_os))
4762 api_versions = set()
4764 for idx, osl in enumerate(os_data.values()):
4765 info.valid = bool(info.valid and osl and osl[0][1])
4769 (node_variants, node_params, node_api) = osl[0][3:6]
4772 variants.update(node_variants)
4773 parameters.update(node_params)
4774 api_versions.update(node_api)
4776 # Filter out inconsistent values
4777 variants.intersection_update(node_variants)
4778 parameters.intersection_update(node_params)
4779 api_versions.intersection_update(node_api)
4781 info.variants = list(variants)
4782 info.parameters = list(parameters)
4783 info.api_versions = list(api_versions)
4785 data[os_name] = info
4787 # Prepare data in requested order
4788 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4792 class LUOsDiagnose(NoHooksLU):
4793 """Logical unit for OS diagnose/query.
4799 def _BuildFilter(fields, names):
4800 """Builds a filter for querying OSes.
4803 name_filter = qlang.MakeSimpleFilter("name", names)
4805 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4806 # respective field is not requested
4807 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4808 for fname in ["hidden", "blacklisted"]
4809 if fname not in fields]
4810 if "valid" not in fields:
4811 status_filter.append([qlang.OP_TRUE, "valid"])
4814 status_filter.insert(0, qlang.OP_AND)
4816 status_filter = None
4818 if name_filter and status_filter:
4819 return [qlang.OP_AND, name_filter, status_filter]
4823 return status_filter
4825 def CheckArguments(self):
4826 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4827 self.op.output_fields, False)
4829 def ExpandNames(self):
4830 self.oq.ExpandNames(self)
4832 def Exec(self, feedback_fn):
4833 return self.oq.OldStyleQuery(self)
4836 class LUNodeRemove(LogicalUnit):
4837 """Logical unit for removing a node.
4840 HPATH = "node-remove"
4841 HTYPE = constants.HTYPE_NODE
4843 def BuildHooksEnv(self):
4846 This doesn't run on the target node in the pre phase as a failed
4847 node would then be impossible to remove.
4851 "OP_TARGET": self.op.node_name,
4852 "NODE_NAME": self.op.node_name,
4855 def BuildHooksNodes(self):
4856 """Build hooks nodes.
4859 all_nodes = self.cfg.GetNodeList()
4861 all_nodes.remove(self.op.node_name)
4863 logging.warning("Node '%s', which is about to be removed, was not found"
4864 " in the list of all nodes", self.op.node_name)
4865 return (all_nodes, all_nodes)
4867 def CheckPrereq(self):
4868 """Check prerequisites.
4871 - the node exists in the configuration
4872 - it does not have primary or secondary instances
4873 - it's not the master
4875 Any errors are signaled by raising errors.OpPrereqError.
4878 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4879 node = self.cfg.GetNodeInfo(self.op.node_name)
4880 assert node is not None
4882 masternode = self.cfg.GetMasterNode()
4883 if node.name == masternode:
4884 raise errors.OpPrereqError("Node is the master node, failover to another"
4885 " node is required", errors.ECODE_INVAL)
4887 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4888 if node.name in instance.all_nodes:
4889 raise errors.OpPrereqError("Instance %s is still running on the node,"
4890 " please remove first" % instance_name,
4892 self.op.node_name = node.name
4895 def Exec(self, feedback_fn):
4896 """Removes the node from the cluster.
4900 logging.info("Stopping the node daemon and removing configs from node %s",
4903 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4905 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4908 # Promote nodes to master candidate as needed
4909 _AdjustCandidatePool(self, exceptions=[node.name])
4910 self.context.RemoveNode(node.name)
4912 # Run post hooks on the node before it's removed
4913 _RunPostHook(self, node.name)
4915 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4916 msg = result.fail_msg
4918 self.LogWarning("Errors encountered on the remote node while leaving"
4919 " the cluster: %s", msg)
4921 # Remove node from our /etc/hosts
4922 if self.cfg.GetClusterInfo().modify_etc_hosts:
4923 master_node = self.cfg.GetMasterNode()
4924 result = self.rpc.call_etc_hosts_modify(master_node,
4925 constants.ETC_HOSTS_REMOVE,
4927 result.Raise("Can't update hosts file with new host data")
4928 _RedistributeAncillaryFiles(self)
4931 class _NodeQuery(_QueryBase):
4932 FIELDS = query.NODE_FIELDS
4934 def ExpandNames(self, lu):
4935 lu.needed_locks = {}
4936 lu.share_locks = _ShareAll()
4939 self.wanted = _GetWantedNodes(lu, self.names)
4941 self.wanted = locking.ALL_SET
4943 self.do_locking = (self.use_locking and
4944 query.NQ_LIVE in self.requested_data)
4947 # If any non-static field is requested we need to lock the nodes
4948 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4950 def DeclareLocks(self, lu, level):
4953 def _GetQueryData(self, lu):
4954 """Computes the list of nodes and their attributes.
4957 all_info = lu.cfg.GetAllNodesInfo()
4959 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4961 # Gather data as requested
4962 if query.NQ_LIVE in self.requested_data:
4963 # filter out non-vm_capable nodes
4964 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4966 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4967 [lu.cfg.GetHypervisorType()])
4968 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4969 for (name, nresult) in node_data.items()
4970 if not nresult.fail_msg and nresult.payload)
4974 if query.NQ_INST in self.requested_data:
4975 node_to_primary = dict([(name, set()) for name in nodenames])
4976 node_to_secondary = dict([(name, set()) for name in nodenames])
4978 inst_data = lu.cfg.GetAllInstancesInfo()
4980 for inst in inst_data.values():
4981 if inst.primary_node in node_to_primary:
4982 node_to_primary[inst.primary_node].add(inst.name)
4983 for secnode in inst.secondary_nodes:
4984 if secnode in node_to_secondary:
4985 node_to_secondary[secnode].add(inst.name)
4987 node_to_primary = None
4988 node_to_secondary = None
4990 if query.NQ_OOB in self.requested_data:
4991 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4992 for name, node in all_info.iteritems())
4996 if query.NQ_GROUP in self.requested_data:
4997 groups = lu.cfg.GetAllNodeGroupsInfo()
5001 return query.NodeQueryData([all_info[name] for name in nodenames],
5002 live_data, lu.cfg.GetMasterNode(),
5003 node_to_primary, node_to_secondary, groups,
5004 oob_support, lu.cfg.GetClusterInfo())
5007 class LUNodeQuery(NoHooksLU):
5008 """Logical unit for querying nodes.
5011 # pylint: disable=W0142
5014 def CheckArguments(self):
5015 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5016 self.op.output_fields, self.op.use_locking)
5018 def ExpandNames(self):
5019 self.nq.ExpandNames(self)
5021 def DeclareLocks(self, level):
5022 self.nq.DeclareLocks(self, level)
5024 def Exec(self, feedback_fn):
5025 return self.nq.OldStyleQuery(self)
5028 class LUNodeQueryvols(NoHooksLU):
5029 """Logical unit for getting volumes on node(s).
5033 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5034 _FIELDS_STATIC = utils.FieldSet("node")
5036 def CheckArguments(self):
5037 _CheckOutputFields(static=self._FIELDS_STATIC,
5038 dynamic=self._FIELDS_DYNAMIC,
5039 selected=self.op.output_fields)
5041 def ExpandNames(self):
5042 self.share_locks = _ShareAll()
5043 self.needed_locks = {}
5045 if not self.op.nodes:
5046 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5048 self.needed_locks[locking.LEVEL_NODE] = \
5049 _GetWantedNodes(self, self.op.nodes)
5051 def Exec(self, feedback_fn):
5052 """Computes the list of nodes and their attributes.
5055 nodenames = self.owned_locks(locking.LEVEL_NODE)
5056 volumes = self.rpc.call_node_volumes(nodenames)
5058 ilist = self.cfg.GetAllInstancesInfo()
5059 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5062 for node in nodenames:
5063 nresult = volumes[node]
5066 msg = nresult.fail_msg
5068 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5071 node_vols = sorted(nresult.payload,
5072 key=operator.itemgetter("dev"))
5074 for vol in node_vols:
5076 for field in self.op.output_fields:
5079 elif field == "phys":
5083 elif field == "name":
5085 elif field == "size":
5086 val = int(float(vol["size"]))
5087 elif field == "instance":
5088 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5090 raise errors.ParameterError(field)
5091 node_output.append(str(val))
5093 output.append(node_output)
5098 class LUNodeQueryStorage(NoHooksLU):
5099 """Logical unit for getting information on storage units on node(s).
5102 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5105 def CheckArguments(self):
5106 _CheckOutputFields(static=self._FIELDS_STATIC,
5107 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5108 selected=self.op.output_fields)
5110 def ExpandNames(self):
5111 self.share_locks = _ShareAll()
5112 self.needed_locks = {}
5115 self.needed_locks[locking.LEVEL_NODE] = \
5116 _GetWantedNodes(self, self.op.nodes)
5118 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5120 def Exec(self, feedback_fn):
5121 """Computes the list of nodes and their attributes.
5124 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5126 # Always get name to sort by
5127 if constants.SF_NAME in self.op.output_fields:
5128 fields = self.op.output_fields[:]
5130 fields = [constants.SF_NAME] + self.op.output_fields
5132 # Never ask for node or type as it's only known to the LU
5133 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5134 while extra in fields:
5135 fields.remove(extra)
5137 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5138 name_idx = field_idx[constants.SF_NAME]
5140 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5141 data = self.rpc.call_storage_list(self.nodes,
5142 self.op.storage_type, st_args,
5143 self.op.name, fields)
5147 for node in utils.NiceSort(self.nodes):
5148 nresult = data[node]
5152 msg = nresult.fail_msg
5154 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5157 rows = dict([(row[name_idx], row) for row in nresult.payload])
5159 for name in utils.NiceSort(rows.keys()):
5164 for field in self.op.output_fields:
5165 if field == constants.SF_NODE:
5167 elif field == constants.SF_TYPE:
5168 val = self.op.storage_type
5169 elif field in field_idx:
5170 val = row[field_idx[field]]
5172 raise errors.ParameterError(field)
5181 class _InstanceQuery(_QueryBase):
5182 FIELDS = query.INSTANCE_FIELDS
5184 def ExpandNames(self, lu):
5185 lu.needed_locks = {}
5186 lu.share_locks = _ShareAll()
5189 self.wanted = _GetWantedInstances(lu, self.names)
5191 self.wanted = locking.ALL_SET
5193 self.do_locking = (self.use_locking and
5194 query.IQ_LIVE in self.requested_data)
5196 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5197 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5198 lu.needed_locks[locking.LEVEL_NODE] = []
5199 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5201 self.do_grouplocks = (self.do_locking and
5202 query.IQ_NODES in self.requested_data)
5204 def DeclareLocks(self, lu, level):
5206 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5207 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5209 # Lock all groups used by instances optimistically; this requires going
5210 # via the node before it's locked, requiring verification later on
5211 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5213 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5214 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5215 elif level == locking.LEVEL_NODE:
5216 lu._LockInstancesNodes() # pylint: disable=W0212
5219 def _CheckGroupLocks(lu):
5220 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5221 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5223 # Check if node groups for locked instances are still correct
5224 for instance_name in owned_instances:
5225 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5227 def _GetQueryData(self, lu):
5228 """Computes the list of instances and their attributes.
5231 if self.do_grouplocks:
5232 self._CheckGroupLocks(lu)
5234 cluster = lu.cfg.GetClusterInfo()
5235 all_info = lu.cfg.GetAllInstancesInfo()
5237 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5239 instance_list = [all_info[name] for name in instance_names]
5240 nodes = frozenset(itertools.chain(*(inst.all_nodes
5241 for inst in instance_list)))
5242 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5245 wrongnode_inst = set()
5247 # Gather data as requested
5248 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5250 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5252 result = node_data[name]
5254 # offline nodes will be in both lists
5255 assert result.fail_msg
5256 offline_nodes.append(name)
5258 bad_nodes.append(name)
5259 elif result.payload:
5260 for inst in result.payload:
5261 if inst in all_info:
5262 if all_info[inst].primary_node == name:
5263 live_data.update(result.payload)
5265 wrongnode_inst.add(inst)
5267 # orphan instance; we don't list it here as we don't
5268 # handle this case yet in the output of instance listing
5269 logging.warning("Orphan instance '%s' found on node %s",
5271 # else no instance is alive
5275 if query.IQ_DISKUSAGE in self.requested_data:
5276 disk_usage = dict((inst.name,
5277 _ComputeDiskSize(inst.disk_template,
5278 [{constants.IDISK_SIZE: disk.size}
5279 for disk in inst.disks]))
5280 for inst in instance_list)
5284 if query.IQ_CONSOLE in self.requested_data:
5286 for inst in instance_list:
5287 if inst.name in live_data:
5288 # Instance is running
5289 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5291 consinfo[inst.name] = None
5292 assert set(consinfo.keys()) == set(instance_names)
5296 if query.IQ_NODES in self.requested_data:
5297 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5299 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5300 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5301 for uuid in set(map(operator.attrgetter("group"),
5307 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5308 disk_usage, offline_nodes, bad_nodes,
5309 live_data, wrongnode_inst, consinfo,
5313 class LUQuery(NoHooksLU):
5314 """Query for resources/items of a certain kind.
5317 # pylint: disable=W0142
5320 def CheckArguments(self):
5321 qcls = _GetQueryImplementation(self.op.what)
5323 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5325 def ExpandNames(self):
5326 self.impl.ExpandNames(self)
5328 def DeclareLocks(self, level):
5329 self.impl.DeclareLocks(self, level)
5331 def Exec(self, feedback_fn):
5332 return self.impl.NewStyleQuery(self)
5335 class LUQueryFields(NoHooksLU):
5336 """Query for resources/items of a certain kind.
5339 # pylint: disable=W0142
5342 def CheckArguments(self):
5343 self.qcls = _GetQueryImplementation(self.op.what)
5345 def ExpandNames(self):
5346 self.needed_locks = {}
5348 def Exec(self, feedback_fn):
5349 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5352 class LUNodeModifyStorage(NoHooksLU):
5353 """Logical unit for modifying a storage volume on a node.
5358 def CheckArguments(self):
5359 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5361 storage_type = self.op.storage_type
5364 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5366 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5367 " modified" % storage_type,
5370 diff = set(self.op.changes.keys()) - modifiable
5372 raise errors.OpPrereqError("The following fields can not be modified for"
5373 " storage units of type '%s': %r" %
5374 (storage_type, list(diff)),
5377 def ExpandNames(self):
5378 self.needed_locks = {
5379 locking.LEVEL_NODE: self.op.node_name,
5382 def Exec(self, feedback_fn):
5383 """Computes the list of nodes and their attributes.
5386 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5387 result = self.rpc.call_storage_modify(self.op.node_name,
5388 self.op.storage_type, st_args,
5389 self.op.name, self.op.changes)
5390 result.Raise("Failed to modify storage unit '%s' on %s" %
5391 (self.op.name, self.op.node_name))
5394 class LUNodeAdd(LogicalUnit):
5395 """Logical unit for adding node to the cluster.
5399 HTYPE = constants.HTYPE_NODE
5400 _NFLAGS = ["master_capable", "vm_capable"]
5402 def CheckArguments(self):
5403 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5404 # validate/normalize the node name
5405 self.hostname = netutils.GetHostname(name=self.op.node_name,
5406 family=self.primary_ip_family)
5407 self.op.node_name = self.hostname.name
5409 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5410 raise errors.OpPrereqError("Cannot readd the master node",
5413 if self.op.readd and self.op.group:
5414 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5415 " being readded", errors.ECODE_INVAL)
5417 def BuildHooksEnv(self):
5420 This will run on all nodes before, and on all nodes + the new node after.
5424 "OP_TARGET": self.op.node_name,
5425 "NODE_NAME": self.op.node_name,
5426 "NODE_PIP": self.op.primary_ip,
5427 "NODE_SIP": self.op.secondary_ip,
5428 "MASTER_CAPABLE": str(self.op.master_capable),
5429 "VM_CAPABLE": str(self.op.vm_capable),
5432 def BuildHooksNodes(self):
5433 """Build hooks nodes.
5436 # Exclude added node
5437 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5438 post_nodes = pre_nodes + [self.op.node_name, ]
5440 return (pre_nodes, post_nodes)
5442 def CheckPrereq(self):
5443 """Check prerequisites.
5446 - the new node is not already in the config
5448 - its parameters (single/dual homed) matches the cluster
5450 Any errors are signaled by raising errors.OpPrereqError.
5454 hostname = self.hostname
5455 node = hostname.name
5456 primary_ip = self.op.primary_ip = hostname.ip
5457 if self.op.secondary_ip is None:
5458 if self.primary_ip_family == netutils.IP6Address.family:
5459 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5460 " IPv4 address must be given as secondary",
5462 self.op.secondary_ip = primary_ip
5464 secondary_ip = self.op.secondary_ip
5465 if not netutils.IP4Address.IsValid(secondary_ip):
5466 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5467 " address" % secondary_ip, errors.ECODE_INVAL)
5469 node_list = cfg.GetNodeList()
5470 if not self.op.readd and node in node_list:
5471 raise errors.OpPrereqError("Node %s is already in the configuration" %
5472 node, errors.ECODE_EXISTS)
5473 elif self.op.readd and node not in node_list:
5474 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5477 self.changed_primary_ip = False
5479 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5480 if self.op.readd and node == existing_node_name:
5481 if existing_node.secondary_ip != secondary_ip:
5482 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5483 " address configuration as before",
5485 if existing_node.primary_ip != primary_ip:
5486 self.changed_primary_ip = True
5490 if (existing_node.primary_ip == primary_ip or
5491 existing_node.secondary_ip == primary_ip or
5492 existing_node.primary_ip == secondary_ip or
5493 existing_node.secondary_ip == secondary_ip):
5494 raise errors.OpPrereqError("New node ip address(es) conflict with"
5495 " existing node %s" % existing_node.name,
5496 errors.ECODE_NOTUNIQUE)
5498 # After this 'if' block, None is no longer a valid value for the
5499 # _capable op attributes
5501 old_node = self.cfg.GetNodeInfo(node)
5502 assert old_node is not None, "Can't retrieve locked node %s" % node
5503 for attr in self._NFLAGS:
5504 if getattr(self.op, attr) is None:
5505 setattr(self.op, attr, getattr(old_node, attr))
5507 for attr in self._NFLAGS:
5508 if getattr(self.op, attr) is None:
5509 setattr(self.op, attr, True)
5511 if self.op.readd and not self.op.vm_capable:
5512 pri, sec = cfg.GetNodeInstances(node)
5514 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5515 " flag set to false, but it already holds"
5516 " instances" % node,
5519 # check that the type of the node (single versus dual homed) is the
5520 # same as for the master
5521 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5522 master_singlehomed = myself.secondary_ip == myself.primary_ip
5523 newbie_singlehomed = secondary_ip == primary_ip
5524 if master_singlehomed != newbie_singlehomed:
5525 if master_singlehomed:
5526 raise errors.OpPrereqError("The master has no secondary ip but the"
5527 " new node has one",
5530 raise errors.OpPrereqError("The master has a secondary ip but the"
5531 " new node doesn't have one",
5534 # checks reachability
5535 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5536 raise errors.OpPrereqError("Node not reachable by ping",
5537 errors.ECODE_ENVIRON)
5539 if not newbie_singlehomed:
5540 # check reachability from my secondary ip to newbie's secondary ip
5541 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5542 source=myself.secondary_ip):
5543 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5544 " based ping to node daemon port",
5545 errors.ECODE_ENVIRON)
5552 if self.op.master_capable:
5553 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5555 self.master_candidate = False
5558 self.new_node = old_node
5560 node_group = cfg.LookupNodeGroup(self.op.group)
5561 self.new_node = objects.Node(name=node,
5562 primary_ip=primary_ip,
5563 secondary_ip=secondary_ip,
5564 master_candidate=self.master_candidate,
5565 offline=False, drained=False,
5568 if self.op.ndparams:
5569 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5571 if self.op.hv_state:
5572 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5574 if self.op.disk_state:
5575 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5577 def Exec(self, feedback_fn):
5578 """Adds the new node to the cluster.
5581 new_node = self.new_node
5582 node = new_node.name
5584 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5587 # We adding a new node so we assume it's powered
5588 new_node.powered = True
5590 # for re-adds, reset the offline/drained/master-candidate flags;
5591 # we need to reset here, otherwise offline would prevent RPC calls
5592 # later in the procedure; this also means that if the re-add
5593 # fails, we are left with a non-offlined, broken node
5595 new_node.drained = new_node.offline = False # pylint: disable=W0201
5596 self.LogInfo("Readding a node, the offline/drained flags were reset")
5597 # if we demote the node, we do cleanup later in the procedure
5598 new_node.master_candidate = self.master_candidate
5599 if self.changed_primary_ip:
5600 new_node.primary_ip = self.op.primary_ip
5602 # copy the master/vm_capable flags
5603 for attr in self._NFLAGS:
5604 setattr(new_node, attr, getattr(self.op, attr))
5606 # notify the user about any possible mc promotion
5607 if new_node.master_candidate:
5608 self.LogInfo("Node will be a master candidate")
5610 if self.op.ndparams:
5611 new_node.ndparams = self.op.ndparams
5613 new_node.ndparams = {}
5615 if self.op.hv_state:
5616 new_node.hv_state_static = self.new_hv_state
5618 if self.op.disk_state:
5619 new_node.disk_state_static = self.new_disk_state
5621 # check connectivity
5622 result = self.rpc.call_version([node])[node]
5623 result.Raise("Can't get version information from node %s" % node)
5624 if constants.PROTOCOL_VERSION == result.payload:
5625 logging.info("Communication to node %s fine, sw version %s match",
5626 node, result.payload)
5628 raise errors.OpExecError("Version mismatch master version %s,"
5629 " node version %s" %
5630 (constants.PROTOCOL_VERSION, result.payload))
5632 # Add node to our /etc/hosts, and add key to known_hosts
5633 if self.cfg.GetClusterInfo().modify_etc_hosts:
5634 master_node = self.cfg.GetMasterNode()
5635 result = self.rpc.call_etc_hosts_modify(master_node,
5636 constants.ETC_HOSTS_ADD,
5639 result.Raise("Can't update hosts file with new host data")
5641 if new_node.secondary_ip != new_node.primary_ip:
5642 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5645 node_verify_list = [self.cfg.GetMasterNode()]
5646 node_verify_param = {
5647 constants.NV_NODELIST: ([node], {}),
5648 # TODO: do a node-net-test as well?
5651 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5652 self.cfg.GetClusterName())
5653 for verifier in node_verify_list:
5654 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5655 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5657 for failed in nl_payload:
5658 feedback_fn("ssh/hostname verification failed"
5659 " (checking from %s): %s" %
5660 (verifier, nl_payload[failed]))
5661 raise errors.OpExecError("ssh/hostname verification failed")
5664 _RedistributeAncillaryFiles(self)
5665 self.context.ReaddNode(new_node)
5666 # make sure we redistribute the config
5667 self.cfg.Update(new_node, feedback_fn)
5668 # and make sure the new node will not have old files around
5669 if not new_node.master_candidate:
5670 result = self.rpc.call_node_demote_from_mc(new_node.name)
5671 msg = result.fail_msg
5673 self.LogWarning("Node failed to demote itself from master"
5674 " candidate status: %s" % msg)
5676 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5677 additional_vm=self.op.vm_capable)
5678 self.context.AddNode(new_node, self.proc.GetECId())
5681 class LUNodeSetParams(LogicalUnit):
5682 """Modifies the parameters of a node.
5684 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5685 to the node role (as _ROLE_*)
5686 @cvar _R2F: a dictionary from node role to tuples of flags
5687 @cvar _FLAGS: a list of attribute names corresponding to the flags
5690 HPATH = "node-modify"
5691 HTYPE = constants.HTYPE_NODE
5693 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5695 (True, False, False): _ROLE_CANDIDATE,
5696 (False, True, False): _ROLE_DRAINED,
5697 (False, False, True): _ROLE_OFFLINE,
5698 (False, False, False): _ROLE_REGULAR,
5700 _R2F = dict((v, k) for k, v in _F2R.items())
5701 _FLAGS = ["master_candidate", "drained", "offline"]
5703 def CheckArguments(self):
5704 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5705 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5706 self.op.master_capable, self.op.vm_capable,
5707 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5709 if all_mods.count(None) == len(all_mods):
5710 raise errors.OpPrereqError("Please pass at least one modification",
5712 if all_mods.count(True) > 1:
5713 raise errors.OpPrereqError("Can't set the node into more than one"
5714 " state at the same time",
5717 # Boolean value that tells us whether we might be demoting from MC
5718 self.might_demote = (self.op.master_candidate == False or
5719 self.op.offline == True or
5720 self.op.drained == True or
5721 self.op.master_capable == False)
5723 if self.op.secondary_ip:
5724 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5725 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5726 " address" % self.op.secondary_ip,
5729 self.lock_all = self.op.auto_promote and self.might_demote
5730 self.lock_instances = self.op.secondary_ip is not None
5732 def _InstanceFilter(self, instance):
5733 """Filter for getting affected instances.
5736 return (instance.disk_template in constants.DTS_INT_MIRROR and
5737 self.op.node_name in instance.all_nodes)
5739 def ExpandNames(self):
5741 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5743 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5745 # Since modifying a node can have severe effects on currently running
5746 # operations the resource lock is at least acquired in shared mode
5747 self.needed_locks[locking.LEVEL_NODE_RES] = \
5748 self.needed_locks[locking.LEVEL_NODE]
5750 # Get node resource and instance locks in shared mode; they are not used
5751 # for anything but read-only access
5752 self.share_locks[locking.LEVEL_NODE_RES] = 1
5753 self.share_locks[locking.LEVEL_INSTANCE] = 1
5755 if self.lock_instances:
5756 self.needed_locks[locking.LEVEL_INSTANCE] = \
5757 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5759 def BuildHooksEnv(self):
5762 This runs on the master node.
5766 "OP_TARGET": self.op.node_name,
5767 "MASTER_CANDIDATE": str(self.op.master_candidate),
5768 "OFFLINE": str(self.op.offline),
5769 "DRAINED": str(self.op.drained),
5770 "MASTER_CAPABLE": str(self.op.master_capable),
5771 "VM_CAPABLE": str(self.op.vm_capable),
5774 def BuildHooksNodes(self):
5775 """Build hooks nodes.
5778 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5781 def CheckPrereq(self):
5782 """Check prerequisites.
5784 This only checks the instance list against the existing names.
5787 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5789 if self.lock_instances:
5790 affected_instances = \
5791 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5793 # Verify instance locks
5794 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5795 wanted_instances = frozenset(affected_instances.keys())
5796 if wanted_instances - owned_instances:
5797 raise errors.OpPrereqError("Instances affected by changing node %s's"
5798 " secondary IP address have changed since"
5799 " locks were acquired, wanted '%s', have"
5800 " '%s'; retry the operation" %
5802 utils.CommaJoin(wanted_instances),
5803 utils.CommaJoin(owned_instances)),
5806 affected_instances = None
5808 if (self.op.master_candidate is not None or
5809 self.op.drained is not None or
5810 self.op.offline is not None):
5811 # we can't change the master's node flags
5812 if self.op.node_name == self.cfg.GetMasterNode():
5813 raise errors.OpPrereqError("The master role can be changed"
5814 " only via master-failover",
5817 if self.op.master_candidate and not node.master_capable:
5818 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5819 " it a master candidate" % node.name,
5822 if self.op.vm_capable == False:
5823 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5825 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5826 " the vm_capable flag" % node.name,
5829 if node.master_candidate and self.might_demote and not self.lock_all:
5830 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5831 # check if after removing the current node, we're missing master
5833 (mc_remaining, mc_should, _) = \
5834 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5835 if mc_remaining < mc_should:
5836 raise errors.OpPrereqError("Not enough master candidates, please"
5837 " pass auto promote option to allow"
5838 " promotion", errors.ECODE_STATE)
5840 self.old_flags = old_flags = (node.master_candidate,
5841 node.drained, node.offline)
5842 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5843 self.old_role = old_role = self._F2R[old_flags]
5845 # Check for ineffective changes
5846 for attr in self._FLAGS:
5847 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5848 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5849 setattr(self.op, attr, None)
5851 # Past this point, any flag change to False means a transition
5852 # away from the respective state, as only real changes are kept
5854 # TODO: We might query the real power state if it supports OOB
5855 if _SupportsOob(self.cfg, node):
5856 if self.op.offline is False and not (node.powered or
5857 self.op.powered == True):
5858 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5859 " offline status can be reset") %
5861 elif self.op.powered is not None:
5862 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5863 " as it does not support out-of-band"
5864 " handling") % self.op.node_name)
5866 # If we're being deofflined/drained, we'll MC ourself if needed
5867 if (self.op.drained == False or self.op.offline == False or
5868 (self.op.master_capable and not node.master_capable)):
5869 if _DecideSelfPromotion(self):
5870 self.op.master_candidate = True
5871 self.LogInfo("Auto-promoting node to master candidate")
5873 # If we're no longer master capable, we'll demote ourselves from MC
5874 if self.op.master_capable == False and node.master_candidate:
5875 self.LogInfo("Demoting from master candidate")
5876 self.op.master_candidate = False
5879 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5880 if self.op.master_candidate:
5881 new_role = self._ROLE_CANDIDATE
5882 elif self.op.drained:
5883 new_role = self._ROLE_DRAINED
5884 elif self.op.offline:
5885 new_role = self._ROLE_OFFLINE
5886 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5887 # False is still in new flags, which means we're un-setting (the
5889 new_role = self._ROLE_REGULAR
5890 else: # no new flags, nothing, keep old role
5893 self.new_role = new_role
5895 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5896 # Trying to transition out of offline status
5897 # TODO: Use standard RPC runner, but make sure it works when the node is
5898 # still marked offline
5899 result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5901 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5902 " to report its version: %s" %
5903 (node.name, result.fail_msg),
5906 self.LogWarning("Transitioning node from offline to online state"
5907 " without using re-add. Please make sure the node"
5910 if self.op.secondary_ip:
5911 # Ok even without locking, because this can't be changed by any LU
5912 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5913 master_singlehomed = master.secondary_ip == master.primary_ip
5914 if master_singlehomed and self.op.secondary_ip:
5915 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5916 " homed cluster", errors.ECODE_INVAL)
5918 assert not (frozenset(affected_instances) -
5919 self.owned_locks(locking.LEVEL_INSTANCE))
5922 if affected_instances:
5923 raise errors.OpPrereqError("Cannot change secondary IP address:"
5924 " offline node has instances (%s)"
5925 " configured to use it" %
5926 utils.CommaJoin(affected_instances.keys()))
5928 # On online nodes, check that no instances are running, and that
5929 # the node has the new ip and we can reach it.
5930 for instance in affected_instances.values():
5931 _CheckInstanceState(self, instance, INSTANCE_DOWN,
5932 msg="cannot change secondary ip")
5934 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5935 if master.name != node.name:
5936 # check reachability from master secondary ip to new secondary ip
5937 if not netutils.TcpPing(self.op.secondary_ip,
5938 constants.DEFAULT_NODED_PORT,
5939 source=master.secondary_ip):
5940 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5941 " based ping to node daemon port",
5942 errors.ECODE_ENVIRON)
5944 if self.op.ndparams:
5945 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5946 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5947 self.new_ndparams = new_ndparams
5949 if self.op.hv_state:
5950 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5951 self.node.hv_state_static)
5953 if self.op.disk_state:
5954 self.new_disk_state = \
5955 _MergeAndVerifyDiskState(self.op.disk_state,
5956 self.node.disk_state_static)
5958 def Exec(self, feedback_fn):
5963 old_role = self.old_role
5964 new_role = self.new_role
5968 if self.op.ndparams:
5969 node.ndparams = self.new_ndparams
5971 if self.op.powered is not None:
5972 node.powered = self.op.powered
5974 if self.op.hv_state:
5975 node.hv_state_static = self.new_hv_state
5977 if self.op.disk_state:
5978 node.disk_state_static = self.new_disk_state
5980 for attr in ["master_capable", "vm_capable"]:
5981 val = getattr(self.op, attr)
5983 setattr(node, attr, val)
5984 result.append((attr, str(val)))
5986 if new_role != old_role:
5987 # Tell the node to demote itself, if no longer MC and not offline
5988 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5989 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5991 self.LogWarning("Node failed to demote itself: %s", msg)
5993 new_flags = self._R2F[new_role]
5994 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5996 result.append((desc, str(nf)))
5997 (node.master_candidate, node.drained, node.offline) = new_flags
5999 # we locked all nodes, we adjust the CP before updating this node
6001 _AdjustCandidatePool(self, [node.name])
6003 if self.op.secondary_ip:
6004 node.secondary_ip = self.op.secondary_ip
6005 result.append(("secondary_ip", self.op.secondary_ip))
6007 # this will trigger configuration file update, if needed
6008 self.cfg.Update(node, feedback_fn)
6010 # this will trigger job queue propagation or cleanup if the mc
6012 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6013 self.context.ReaddNode(node)
6018 class LUNodePowercycle(NoHooksLU):
6019 """Powercycles a node.
6024 def CheckArguments(self):
6025 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6026 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6027 raise errors.OpPrereqError("The node is the master and the force"
6028 " parameter was not set",
6031 def ExpandNames(self):
6032 """Locking for PowercycleNode.
6034 This is a last-resort option and shouldn't block on other
6035 jobs. Therefore, we grab no locks.
6038 self.needed_locks = {}
6040 def Exec(self, feedback_fn):
6044 result = self.rpc.call_node_powercycle(self.op.node_name,
6045 self.cfg.GetHypervisorType())
6046 result.Raise("Failed to schedule the reboot")
6047 return result.payload
6050 class LUClusterQuery(NoHooksLU):
6051 """Query cluster configuration.
6056 def ExpandNames(self):
6057 self.needed_locks = {}
6059 def Exec(self, feedback_fn):
6060 """Return cluster config.
6063 cluster = self.cfg.GetClusterInfo()
6066 # Filter just for enabled hypervisors
6067 for os_name, hv_dict in cluster.os_hvp.items():
6068 os_hvp[os_name] = {}
6069 for hv_name, hv_params in hv_dict.items():
6070 if hv_name in cluster.enabled_hypervisors:
6071 os_hvp[os_name][hv_name] = hv_params
6073 # Convert ip_family to ip_version
6074 primary_ip_version = constants.IP4_VERSION
6075 if cluster.primary_ip_family == netutils.IP6Address.family:
6076 primary_ip_version = constants.IP6_VERSION
6079 "software_version": constants.RELEASE_VERSION,
6080 "protocol_version": constants.PROTOCOL_VERSION,
6081 "config_version": constants.CONFIG_VERSION,
6082 "os_api_version": max(constants.OS_API_VERSIONS),
6083 "export_version": constants.EXPORT_VERSION,
6084 "architecture": (platform.architecture()[0], platform.machine()),
6085 "name": cluster.cluster_name,
6086 "master": cluster.master_node,
6087 "default_hypervisor": cluster.primary_hypervisor,
6088 "enabled_hypervisors": cluster.enabled_hypervisors,
6089 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6090 for hypervisor_name in cluster.enabled_hypervisors]),
6092 "beparams": cluster.beparams,
6093 "osparams": cluster.osparams,
6094 "ipolicy": cluster.ipolicy,
6095 "nicparams": cluster.nicparams,
6096 "ndparams": cluster.ndparams,
6097 "candidate_pool_size": cluster.candidate_pool_size,
6098 "master_netdev": cluster.master_netdev,
6099 "master_netmask": cluster.master_netmask,
6100 "use_external_mip_script": cluster.use_external_mip_script,
6101 "volume_group_name": cluster.volume_group_name,
6102 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6103 "file_storage_dir": cluster.file_storage_dir,
6104 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6105 "maintain_node_health": cluster.maintain_node_health,
6106 "ctime": cluster.ctime,
6107 "mtime": cluster.mtime,
6108 "uuid": cluster.uuid,
6109 "tags": list(cluster.GetTags()),
6110 "uid_pool": cluster.uid_pool,
6111 "default_iallocator": cluster.default_iallocator,
6112 "reserved_lvs": cluster.reserved_lvs,
6113 "primary_ip_version": primary_ip_version,
6114 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6115 "hidden_os": cluster.hidden_os,
6116 "blacklisted_os": cluster.blacklisted_os,
6122 class LUClusterConfigQuery(NoHooksLU):
6123 """Return configuration values.
6127 _FIELDS_DYNAMIC = utils.FieldSet()
6128 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
6129 "watcher_pause", "volume_group_name")
6131 def CheckArguments(self):
6132 _CheckOutputFields(static=self._FIELDS_STATIC,
6133 dynamic=self._FIELDS_DYNAMIC,
6134 selected=self.op.output_fields)
6136 def ExpandNames(self):
6137 self.needed_locks = {}
6139 def Exec(self, feedback_fn):
6140 """Dump a representation of the cluster config to the standard output.
6144 for field in self.op.output_fields:
6145 if field == "cluster_name":
6146 entry = self.cfg.GetClusterName()
6147 elif field == "master_node":
6148 entry = self.cfg.GetMasterNode()
6149 elif field == "drain_flag":
6150 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6151 elif field == "watcher_pause":
6152 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6153 elif field == "volume_group_name":
6154 entry = self.cfg.GetVGName()
6156 raise errors.ParameterError(field)
6157 values.append(entry)
6161 class LUInstanceActivateDisks(NoHooksLU):
6162 """Bring up an instance's disks.
6167 def ExpandNames(self):
6168 self._ExpandAndLockInstance()
6169 self.needed_locks[locking.LEVEL_NODE] = []
6170 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6172 def DeclareLocks(self, level):
6173 if level == locking.LEVEL_NODE:
6174 self._LockInstancesNodes()
6176 def CheckPrereq(self):
6177 """Check prerequisites.
6179 This checks that the instance is in the cluster.
6182 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6183 assert self.instance is not None, \
6184 "Cannot retrieve locked instance %s" % self.op.instance_name
6185 _CheckNodeOnline(self, self.instance.primary_node)
6187 def Exec(self, feedback_fn):
6188 """Activate the disks.
6191 disks_ok, disks_info = \
6192 _AssembleInstanceDisks(self, self.instance,
6193 ignore_size=self.op.ignore_size)
6195 raise errors.OpExecError("Cannot activate block devices")
6200 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6202 """Prepare the block devices for an instance.
6204 This sets up the block devices on all nodes.
6206 @type lu: L{LogicalUnit}
6207 @param lu: the logical unit on whose behalf we execute
6208 @type instance: L{objects.Instance}
6209 @param instance: the instance for whose disks we assemble
6210 @type disks: list of L{objects.Disk} or None
6211 @param disks: which disks to assemble (or all, if None)
6212 @type ignore_secondaries: boolean
6213 @param ignore_secondaries: if true, errors on secondary nodes
6214 won't result in an error return from the function
6215 @type ignore_size: boolean
6216 @param ignore_size: if true, the current known size of the disk
6217 will not be used during the disk activation, useful for cases
6218 when the size is wrong
6219 @return: False if the operation failed, otherwise a list of
6220 (host, instance_visible_name, node_visible_name)
6221 with the mapping from node devices to instance devices
6226 iname = instance.name
6227 disks = _ExpandCheckDisks(instance, disks)
6229 # With the two passes mechanism we try to reduce the window of
6230 # opportunity for the race condition of switching DRBD to primary
6231 # before handshaking occured, but we do not eliminate it
6233 # The proper fix would be to wait (with some limits) until the
6234 # connection has been made and drbd transitions from WFConnection
6235 # into any other network-connected state (Connected, SyncTarget,
6238 # 1st pass, assemble on all nodes in secondary mode
6239 for idx, inst_disk in enumerate(disks):
6240 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6242 node_disk = node_disk.Copy()
6243 node_disk.UnsetSize()
6244 lu.cfg.SetDiskID(node_disk, node)
6245 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
6246 msg = result.fail_msg
6248 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6249 " (is_primary=False, pass=1): %s",
6250 inst_disk.iv_name, node, msg)
6251 if not ignore_secondaries:
6254 # FIXME: race condition on drbd migration to primary
6256 # 2nd pass, do only the primary node
6257 for idx, inst_disk in enumerate(disks):
6260 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6261 if node != instance.primary_node:
6264 node_disk = node_disk.Copy()
6265 node_disk.UnsetSize()
6266 lu.cfg.SetDiskID(node_disk, node)
6267 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6268 msg = result.fail_msg
6270 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6271 " (is_primary=True, pass=2): %s",
6272 inst_disk.iv_name, node, msg)
6275 dev_path = result.payload
6277 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6279 # leave the disks configured for the primary node
6280 # this is a workaround that would be fixed better by
6281 # improving the logical/physical id handling
6283 lu.cfg.SetDiskID(disk, instance.primary_node)
6285 return disks_ok, device_info
6288 def _StartInstanceDisks(lu, instance, force):
6289 """Start the disks of an instance.
6292 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6293 ignore_secondaries=force)
6295 _ShutdownInstanceDisks(lu, instance)
6296 if force is not None and not force:
6297 lu.proc.LogWarning("", hint="If the message above refers to a"
6299 " you can retry the operation using '--force'.")
6300 raise errors.OpExecError("Disk consistency error")
6303 class LUInstanceDeactivateDisks(NoHooksLU):
6304 """Shutdown an instance's disks.
6309 def ExpandNames(self):
6310 self._ExpandAndLockInstance()
6311 self.needed_locks[locking.LEVEL_NODE] = []
6312 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6314 def DeclareLocks(self, level):
6315 if level == locking.LEVEL_NODE:
6316 self._LockInstancesNodes()
6318 def CheckPrereq(self):
6319 """Check prerequisites.
6321 This checks that the instance is in the cluster.
6324 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6325 assert self.instance is not None, \
6326 "Cannot retrieve locked instance %s" % self.op.instance_name
6328 def Exec(self, feedback_fn):
6329 """Deactivate the disks
6332 instance = self.instance
6334 _ShutdownInstanceDisks(self, instance)
6336 _SafeShutdownInstanceDisks(self, instance)
6339 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6340 """Shutdown block devices of an instance.
6342 This function checks if an instance is running, before calling
6343 _ShutdownInstanceDisks.
6346 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6347 _ShutdownInstanceDisks(lu, instance, disks=disks)
6350 def _ExpandCheckDisks(instance, disks):
6351 """Return the instance disks selected by the disks list
6353 @type disks: list of L{objects.Disk} or None
6354 @param disks: selected disks
6355 @rtype: list of L{objects.Disk}
6356 @return: selected instance disks to act on
6360 return instance.disks
6362 if not set(disks).issubset(instance.disks):
6363 raise errors.ProgrammerError("Can only act on disks belonging to the"
6368 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6369 """Shutdown block devices of an instance.
6371 This does the shutdown on all nodes of the instance.
6373 If the ignore_primary is false, errors on the primary node are
6378 disks = _ExpandCheckDisks(instance, disks)
6381 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6382 lu.cfg.SetDiskID(top_disk, node)
6383 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6384 msg = result.fail_msg
6386 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6387 disk.iv_name, node, msg)
6388 if ((node == instance.primary_node and not ignore_primary) or
6389 (node != instance.primary_node and not result.offline)):
6394 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6395 """Checks if a node has enough free memory.
6397 This function check if a given node has the needed amount of free
6398 memory. In case the node has less memory or we cannot get the
6399 information from the node, this function raise an OpPrereqError
6402 @type lu: C{LogicalUnit}
6403 @param lu: a logical unit from which we get configuration data
6405 @param node: the node to check
6406 @type reason: C{str}
6407 @param reason: string to use in the error message
6408 @type requested: C{int}
6409 @param requested: the amount of memory in MiB to check for
6410 @type hypervisor_name: C{str}
6411 @param hypervisor_name: the hypervisor to ask for memory stats
6413 @return: node current free memory
6414 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6415 we cannot check the node
6418 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6419 nodeinfo[node].Raise("Can't get data from node %s" % node,
6420 prereq=True, ecode=errors.ECODE_ENVIRON)
6421 (_, _, (hv_info, )) = nodeinfo[node].payload
6423 free_mem = hv_info.get("memory_free", None)
6424 if not isinstance(free_mem, int):
6425 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6426 " was '%s'" % (node, free_mem),
6427 errors.ECODE_ENVIRON)
6428 if requested > free_mem:
6429 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6430 " needed %s MiB, available %s MiB" %
6431 (node, reason, requested, free_mem),
6436 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6437 """Checks if nodes have enough free disk space in the all VGs.
6439 This function check if all given nodes have the needed amount of
6440 free disk. In case any node has less disk or we cannot get the
6441 information from the node, this function raise an OpPrereqError
6444 @type lu: C{LogicalUnit}
6445 @param lu: a logical unit from which we get configuration data
6446 @type nodenames: C{list}
6447 @param nodenames: the list of node names to check
6448 @type req_sizes: C{dict}
6449 @param req_sizes: the hash of vg and corresponding amount of disk in
6451 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6452 or we cannot check the node
6455 for vg, req_size in req_sizes.items():
6456 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6459 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6460 """Checks if nodes have enough free disk space in the specified VG.
6462 This function check if all given nodes have the needed amount of
6463 free disk. In case any node has less disk or we cannot get the
6464 information from the node, this function raise an OpPrereqError
6467 @type lu: C{LogicalUnit}
6468 @param lu: a logical unit from which we get configuration data
6469 @type nodenames: C{list}
6470 @param nodenames: the list of node names to check
6472 @param vg: the volume group to check
6473 @type requested: C{int}
6474 @param requested: the amount of disk in MiB to check for
6475 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6476 or we cannot check the node
6479 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6480 for node in nodenames:
6481 info = nodeinfo[node]
6482 info.Raise("Cannot get current information from node %s" % node,
6483 prereq=True, ecode=errors.ECODE_ENVIRON)
6484 (_, (vg_info, ), _) = info.payload
6485 vg_free = vg_info.get("vg_free", None)
6486 if not isinstance(vg_free, int):
6487 raise errors.OpPrereqError("Can't compute free disk space on node"
6488 " %s for vg %s, result was '%s'" %
6489 (node, vg, vg_free), errors.ECODE_ENVIRON)
6490 if requested > vg_free:
6491 raise errors.OpPrereqError("Not enough disk space on target node %s"
6492 " vg %s: required %d MiB, available %d MiB" %
6493 (node, vg, requested, vg_free),
6497 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6498 """Checks if nodes have enough physical CPUs
6500 This function checks if all given nodes have the needed number of
6501 physical CPUs. In case any node has less CPUs or we cannot get the
6502 information from the node, this function raises an OpPrereqError
6505 @type lu: C{LogicalUnit}
6506 @param lu: a logical unit from which we get configuration data
6507 @type nodenames: C{list}
6508 @param nodenames: the list of node names to check
6509 @type requested: C{int}
6510 @param requested: the minimum acceptable number of physical CPUs
6511 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6512 or we cannot check the node
6515 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6516 for node in nodenames:
6517 info = nodeinfo[node]
6518 info.Raise("Cannot get current information from node %s" % node,
6519 prereq=True, ecode=errors.ECODE_ENVIRON)
6520 (_, _, (hv_info, )) = info.payload
6521 num_cpus = hv_info.get("cpu_total", None)
6522 if not isinstance(num_cpus, int):
6523 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6524 " on node %s, result was '%s'" %
6525 (node, num_cpus), errors.ECODE_ENVIRON)
6526 if requested > num_cpus:
6527 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6528 "required" % (node, num_cpus, requested),
6532 class LUInstanceStartup(LogicalUnit):
6533 """Starts an instance.
6536 HPATH = "instance-start"
6537 HTYPE = constants.HTYPE_INSTANCE
6540 def CheckArguments(self):
6542 if self.op.beparams:
6543 # fill the beparams dict
6544 objects.UpgradeBeParams(self.op.beparams)
6545 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6547 def ExpandNames(self):
6548 self._ExpandAndLockInstance()
6549 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6551 def DeclareLocks(self, level):
6552 if level == locking.LEVEL_NODE_RES:
6553 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6555 def BuildHooksEnv(self):
6558 This runs on master, primary and secondary nodes of the instance.
6562 "FORCE": self.op.force,
6565 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6569 def BuildHooksNodes(self):
6570 """Build hooks nodes.
6573 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6576 def CheckPrereq(self):
6577 """Check prerequisites.
6579 This checks that the instance is in the cluster.
6582 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6583 assert self.instance is not None, \
6584 "Cannot retrieve locked instance %s" % self.op.instance_name
6587 if self.op.hvparams:
6588 # check hypervisor parameter syntax (locally)
6589 cluster = self.cfg.GetClusterInfo()
6590 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6591 filled_hvp = cluster.FillHV(instance)
6592 filled_hvp.update(self.op.hvparams)
6593 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6594 hv_type.CheckParameterSyntax(filled_hvp)
6595 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6597 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6599 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6601 if self.primary_offline and self.op.ignore_offline_nodes:
6602 self.proc.LogWarning("Ignoring offline primary node")
6604 if self.op.hvparams or self.op.beparams:
6605 self.proc.LogWarning("Overridden parameters are ignored")
6607 _CheckNodeOnline(self, instance.primary_node)
6609 bep = self.cfg.GetClusterInfo().FillBE(instance)
6610 bep.update(self.op.beparams)
6612 # check bridges existence
6613 _CheckInstanceBridgesExist(self, instance)
6615 remote_info = self.rpc.call_instance_info(instance.primary_node,
6617 instance.hypervisor)
6618 remote_info.Raise("Error checking node %s" % instance.primary_node,
6619 prereq=True, ecode=errors.ECODE_ENVIRON)
6620 if not remote_info.payload: # not running already
6621 _CheckNodeFreeMemory(self, instance.primary_node,
6622 "starting instance %s" % instance.name,
6623 bep[constants.BE_MINMEM], instance.hypervisor)
6625 def Exec(self, feedback_fn):
6626 """Start the instance.
6629 instance = self.instance
6630 force = self.op.force
6632 if not self.op.no_remember:
6633 self.cfg.MarkInstanceUp(instance.name)
6635 if self.primary_offline:
6636 assert self.op.ignore_offline_nodes
6637 self.proc.LogInfo("Primary node offline, marked instance as started")
6639 node_current = instance.primary_node
6641 _StartInstanceDisks(self, instance, force)
6644 self.rpc.call_instance_start(node_current,
6645 (instance, self.op.hvparams,
6647 self.op.startup_paused)
6648 msg = result.fail_msg
6650 _ShutdownInstanceDisks(self, instance)
6651 raise errors.OpExecError("Could not start instance: %s" % msg)
6654 class LUInstanceReboot(LogicalUnit):
6655 """Reboot an instance.
6658 HPATH = "instance-reboot"
6659 HTYPE = constants.HTYPE_INSTANCE
6662 def ExpandNames(self):
6663 self._ExpandAndLockInstance()
6665 def BuildHooksEnv(self):
6668 This runs on master, primary and secondary nodes of the instance.
6672 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6673 "REBOOT_TYPE": self.op.reboot_type,
6674 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6677 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6681 def BuildHooksNodes(self):
6682 """Build hooks nodes.
6685 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6688 def CheckPrereq(self):
6689 """Check prerequisites.
6691 This checks that the instance is in the cluster.
6694 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6695 assert self.instance is not None, \
6696 "Cannot retrieve locked instance %s" % self.op.instance_name
6697 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6698 _CheckNodeOnline(self, instance.primary_node)
6700 # check bridges existence
6701 _CheckInstanceBridgesExist(self, instance)
6703 def Exec(self, feedback_fn):
6704 """Reboot the instance.
6707 instance = self.instance
6708 ignore_secondaries = self.op.ignore_secondaries
6709 reboot_type = self.op.reboot_type
6711 remote_info = self.rpc.call_instance_info(instance.primary_node,
6713 instance.hypervisor)
6714 remote_info.Raise("Error checking node %s" % instance.primary_node)
6715 instance_running = bool(remote_info.payload)
6717 node_current = instance.primary_node
6719 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6720 constants.INSTANCE_REBOOT_HARD]:
6721 for disk in instance.disks:
6722 self.cfg.SetDiskID(disk, node_current)
6723 result = self.rpc.call_instance_reboot(node_current, instance,
6725 self.op.shutdown_timeout)
6726 result.Raise("Could not reboot instance")
6728 if instance_running:
6729 result = self.rpc.call_instance_shutdown(node_current, instance,
6730 self.op.shutdown_timeout)
6731 result.Raise("Could not shutdown instance for full reboot")
6732 _ShutdownInstanceDisks(self, instance)
6734 self.LogInfo("Instance %s was already stopped, starting now",
6736 _StartInstanceDisks(self, instance, ignore_secondaries)
6737 result = self.rpc.call_instance_start(node_current,
6738 (instance, None, None), False)
6739 msg = result.fail_msg
6741 _ShutdownInstanceDisks(self, instance)
6742 raise errors.OpExecError("Could not start instance for"
6743 " full reboot: %s" % msg)
6745 self.cfg.MarkInstanceUp(instance.name)
6748 class LUInstanceShutdown(LogicalUnit):
6749 """Shutdown an instance.
6752 HPATH = "instance-stop"
6753 HTYPE = constants.HTYPE_INSTANCE
6756 def ExpandNames(self):
6757 self._ExpandAndLockInstance()
6759 def BuildHooksEnv(self):
6762 This runs on master, primary and secondary nodes of the instance.
6765 env = _BuildInstanceHookEnvByObject(self, self.instance)
6766 env["TIMEOUT"] = self.op.timeout
6769 def BuildHooksNodes(self):
6770 """Build hooks nodes.
6773 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6776 def CheckPrereq(self):
6777 """Check prerequisites.
6779 This checks that the instance is in the cluster.
6782 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6783 assert self.instance is not None, \
6784 "Cannot retrieve locked instance %s" % self.op.instance_name
6786 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6788 self.primary_offline = \
6789 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6791 if self.primary_offline and self.op.ignore_offline_nodes:
6792 self.proc.LogWarning("Ignoring offline primary node")
6794 _CheckNodeOnline(self, self.instance.primary_node)
6796 def Exec(self, feedback_fn):
6797 """Shutdown the instance.
6800 instance = self.instance
6801 node_current = instance.primary_node
6802 timeout = self.op.timeout
6804 if not self.op.no_remember:
6805 self.cfg.MarkInstanceDown(instance.name)
6807 if self.primary_offline:
6808 assert self.op.ignore_offline_nodes
6809 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6811 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6812 msg = result.fail_msg
6814 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6816 _ShutdownInstanceDisks(self, instance)
6819 class LUInstanceReinstall(LogicalUnit):
6820 """Reinstall an instance.
6823 HPATH = "instance-reinstall"
6824 HTYPE = constants.HTYPE_INSTANCE
6827 def ExpandNames(self):
6828 self._ExpandAndLockInstance()
6830 def BuildHooksEnv(self):
6833 This runs on master, primary and secondary nodes of the instance.
6836 return _BuildInstanceHookEnvByObject(self, self.instance)
6838 def BuildHooksNodes(self):
6839 """Build hooks nodes.
6842 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6845 def CheckPrereq(self):
6846 """Check prerequisites.
6848 This checks that the instance is in the cluster and is not running.
6851 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6852 assert instance is not None, \
6853 "Cannot retrieve locked instance %s" % self.op.instance_name
6854 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6855 " offline, cannot reinstall")
6856 for node in instance.secondary_nodes:
6857 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6858 " cannot reinstall")
6860 if instance.disk_template == constants.DT_DISKLESS:
6861 raise errors.OpPrereqError("Instance '%s' has no disks" %
6862 self.op.instance_name,
6864 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6866 if self.op.os_type is not None:
6868 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6869 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6870 instance_os = self.op.os_type
6872 instance_os = instance.os
6874 nodelist = list(instance.all_nodes)
6876 if self.op.osparams:
6877 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6878 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6879 self.os_inst = i_osdict # the new dict (without defaults)
6883 self.instance = instance
6885 def Exec(self, feedback_fn):
6886 """Reinstall the instance.
6889 inst = self.instance
6891 if self.op.os_type is not None:
6892 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6893 inst.os = self.op.os_type
6894 # Write to configuration
6895 self.cfg.Update(inst, feedback_fn)
6897 _StartInstanceDisks(self, inst, None)
6899 feedback_fn("Running the instance OS create scripts...")
6900 # FIXME: pass debug option from opcode to backend
6901 result = self.rpc.call_instance_os_add(inst.primary_node,
6902 (inst, self.os_inst), True,
6903 self.op.debug_level)
6904 result.Raise("Could not install OS for instance %s on node %s" %
6905 (inst.name, inst.primary_node))
6907 _ShutdownInstanceDisks(self, inst)
6910 class LUInstanceRecreateDisks(LogicalUnit):
6911 """Recreate an instance's missing disks.
6914 HPATH = "instance-recreate-disks"
6915 HTYPE = constants.HTYPE_INSTANCE
6918 _MODIFYABLE = frozenset([
6919 constants.IDISK_SIZE,
6920 constants.IDISK_MODE,
6923 # New or changed disk parameters may have different semantics
6924 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
6925 constants.IDISK_ADOPT,
6927 # TODO: Implement support changing VG while recreating
6929 constants.IDISK_METAVG,
6932 def CheckArguments(self):
6933 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
6934 # Normalize and convert deprecated list of disk indices
6935 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
6937 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
6939 raise errors.OpPrereqError("Some disks have been specified more than"
6940 " once: %s" % utils.CommaJoin(duplicates),
6943 for (idx, params) in self.op.disks:
6944 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
6945 unsupported = frozenset(params.keys()) - self._MODIFYABLE
6947 raise errors.OpPrereqError("Parameters for disk %s try to change"
6948 " unmodifyable parameter(s): %s" %
6949 (idx, utils.CommaJoin(unsupported)),
6952 def ExpandNames(self):
6953 self._ExpandAndLockInstance()
6954 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6956 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6957 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6959 self.needed_locks[locking.LEVEL_NODE] = []
6960 self.needed_locks[locking.LEVEL_NODE_RES] = []
6962 def DeclareLocks(self, level):
6963 if level == locking.LEVEL_NODE:
6964 # if we replace the nodes, we only need to lock the old primary,
6965 # otherwise we need to lock all nodes for disk re-creation
6966 primary_only = bool(self.op.nodes)
6967 self._LockInstancesNodes(primary_only=primary_only)
6968 elif level == locking.LEVEL_NODE_RES:
6970 self.needed_locks[locking.LEVEL_NODE_RES] = \
6971 self.needed_locks[locking.LEVEL_NODE][:]
6973 def BuildHooksEnv(self):
6976 This runs on master, primary and secondary nodes of the instance.
6979 return _BuildInstanceHookEnvByObject(self, self.instance)
6981 def BuildHooksNodes(self):
6982 """Build hooks nodes.
6985 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6988 def CheckPrereq(self):
6989 """Check prerequisites.
6991 This checks that the instance is in the cluster and is not running.
6994 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6995 assert instance is not None, \
6996 "Cannot retrieve locked instance %s" % self.op.instance_name
6998 if len(self.op.nodes) != len(instance.all_nodes):
6999 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7000 " %d replacement nodes were specified" %
7001 (instance.name, len(instance.all_nodes),
7002 len(self.op.nodes)),
7004 assert instance.disk_template != constants.DT_DRBD8 or \
7005 len(self.op.nodes) == 2
7006 assert instance.disk_template != constants.DT_PLAIN or \
7007 len(self.op.nodes) == 1
7008 primary_node = self.op.nodes[0]
7010 primary_node = instance.primary_node
7011 _CheckNodeOnline(self, primary_node)
7013 if instance.disk_template == constants.DT_DISKLESS:
7014 raise errors.OpPrereqError("Instance '%s' has no disks" %
7015 self.op.instance_name, errors.ECODE_INVAL)
7017 # if we replace nodes *and* the old primary is offline, we don't
7019 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7020 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7021 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7022 if not (self.op.nodes and old_pnode.offline):
7023 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7024 msg="cannot recreate disks")
7027 self.disks = dict(self.op.disks)
7029 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7031 maxidx = max(self.disks.keys())
7032 if maxidx >= len(instance.disks):
7033 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7036 if (self.op.nodes and
7037 sorted(self.disks.keys()) != range(len(instance.disks))):
7038 raise errors.OpPrereqError("Can't recreate disks partially and"
7039 " change the nodes at the same time",
7042 self.instance = instance
7044 def Exec(self, feedback_fn):
7045 """Recreate the disks.
7048 instance = self.instance
7050 assert (self.owned_locks(locking.LEVEL_NODE) ==
7051 self.owned_locks(locking.LEVEL_NODE_RES))
7054 mods = [] # keeps track of needed changes
7056 for idx, disk in enumerate(instance.disks):
7058 changes = self.disks[idx]
7060 # Disk should not be recreated
7064 # update secondaries for disks, if needed
7065 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7066 # need to update the nodes and minors
7067 assert len(self.op.nodes) == 2
7068 assert len(disk.logical_id) == 6 # otherwise disk internals
7070 (_, _, old_port, _, _, old_secret) = disk.logical_id
7071 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7072 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7073 new_minors[0], new_minors[1], old_secret)
7074 assert len(disk.logical_id) == len(new_id)
7078 mods.append((idx, new_id, changes))
7080 # now that we have passed all asserts above, we can apply the mods
7081 # in a single run (to avoid partial changes)
7082 for idx, new_id, changes in mods:
7083 disk = instance.disks[idx]
7084 if new_id is not None:
7085 assert disk.dev_type == constants.LD_DRBD8
7086 disk.logical_id = new_id
7088 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7089 mode=changes.get(constants.IDISK_MODE, None))
7091 # change primary node, if needed
7093 instance.primary_node = self.op.nodes[0]
7094 self.LogWarning("Changing the instance's nodes, you will have to"
7095 " remove any disks left on the older nodes manually")
7098 self.cfg.Update(instance, feedback_fn)
7100 _CreateDisks(self, instance, to_skip=to_skip)
7103 class LUInstanceRename(LogicalUnit):
7104 """Rename an instance.
7107 HPATH = "instance-rename"
7108 HTYPE = constants.HTYPE_INSTANCE
7110 def CheckArguments(self):
7114 if self.op.ip_check and not self.op.name_check:
7115 # TODO: make the ip check more flexible and not depend on the name check
7116 raise errors.OpPrereqError("IP address check requires a name check",
7119 def BuildHooksEnv(self):
7122 This runs on master, primary and secondary nodes of the instance.
7125 env = _BuildInstanceHookEnvByObject(self, self.instance)
7126 env["INSTANCE_NEW_NAME"] = self.op.new_name
7129 def BuildHooksNodes(self):
7130 """Build hooks nodes.
7133 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7136 def CheckPrereq(self):
7137 """Check prerequisites.
7139 This checks that the instance is in the cluster and is not running.
7142 self.op.instance_name = _ExpandInstanceName(self.cfg,
7143 self.op.instance_name)
7144 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7145 assert instance is not None
7146 _CheckNodeOnline(self, instance.primary_node)
7147 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7148 msg="cannot rename")
7149 self.instance = instance
7151 new_name = self.op.new_name
7152 if self.op.name_check:
7153 hostname = netutils.GetHostname(name=new_name)
7154 if hostname.name != new_name:
7155 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7157 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7158 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7159 " same as given hostname '%s'") %
7160 (hostname.name, self.op.new_name),
7162 new_name = self.op.new_name = hostname.name
7163 if (self.op.ip_check and
7164 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7165 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7166 (hostname.ip, new_name),
7167 errors.ECODE_NOTUNIQUE)
7169 instance_list = self.cfg.GetInstanceList()
7170 if new_name in instance_list and new_name != instance.name:
7171 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7172 new_name, errors.ECODE_EXISTS)
7174 def Exec(self, feedback_fn):
7175 """Rename the instance.
7178 inst = self.instance
7179 old_name = inst.name
7181 rename_file_storage = False
7182 if (inst.disk_template in constants.DTS_FILEBASED and
7183 self.op.new_name != inst.name):
7184 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7185 rename_file_storage = True
7187 self.cfg.RenameInstance(inst.name, self.op.new_name)
7188 # Change the instance lock. This is definitely safe while we hold the BGL.
7189 # Otherwise the new lock would have to be added in acquired mode.
7191 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7192 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7194 # re-read the instance from the configuration after rename
7195 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7197 if rename_file_storage:
7198 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7199 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7200 old_file_storage_dir,
7201 new_file_storage_dir)
7202 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7203 " (but the instance has been renamed in Ganeti)" %
7204 (inst.primary_node, old_file_storage_dir,
7205 new_file_storage_dir))
7207 _StartInstanceDisks(self, inst, None)
7209 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7210 old_name, self.op.debug_level)
7211 msg = result.fail_msg
7213 msg = ("Could not run OS rename script for instance %s on node %s"
7214 " (but the instance has been renamed in Ganeti): %s" %
7215 (inst.name, inst.primary_node, msg))
7216 self.proc.LogWarning(msg)
7218 _ShutdownInstanceDisks(self, inst)
7223 class LUInstanceRemove(LogicalUnit):
7224 """Remove an instance.
7227 HPATH = "instance-remove"
7228 HTYPE = constants.HTYPE_INSTANCE
7231 def ExpandNames(self):
7232 self._ExpandAndLockInstance()
7233 self.needed_locks[locking.LEVEL_NODE] = []
7234 self.needed_locks[locking.LEVEL_NODE_RES] = []
7235 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7237 def DeclareLocks(self, level):
7238 if level == locking.LEVEL_NODE:
7239 self._LockInstancesNodes()
7240 elif level == locking.LEVEL_NODE_RES:
7242 self.needed_locks[locking.LEVEL_NODE_RES] = \
7243 self.needed_locks[locking.LEVEL_NODE][:]
7245 def BuildHooksEnv(self):
7248 This runs on master, primary and secondary nodes of the instance.
7251 env = _BuildInstanceHookEnvByObject(self, self.instance)
7252 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7255 def BuildHooksNodes(self):
7256 """Build hooks nodes.
7259 nl = [self.cfg.GetMasterNode()]
7260 nl_post = list(self.instance.all_nodes) + nl
7261 return (nl, nl_post)
7263 def CheckPrereq(self):
7264 """Check prerequisites.
7266 This checks that the instance is in the cluster.
7269 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7270 assert self.instance is not None, \
7271 "Cannot retrieve locked instance %s" % self.op.instance_name
7273 def Exec(self, feedback_fn):
7274 """Remove the instance.
7277 instance = self.instance
7278 logging.info("Shutting down instance %s on node %s",
7279 instance.name, instance.primary_node)
7281 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7282 self.op.shutdown_timeout)
7283 msg = result.fail_msg
7285 if self.op.ignore_failures:
7286 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7288 raise errors.OpExecError("Could not shutdown instance %s on"
7290 (instance.name, instance.primary_node, msg))
7292 assert (self.owned_locks(locking.LEVEL_NODE) ==
7293 self.owned_locks(locking.LEVEL_NODE_RES))
7294 assert not (set(instance.all_nodes) -
7295 self.owned_locks(locking.LEVEL_NODE)), \
7296 "Not owning correct locks"
7298 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7301 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7302 """Utility function to remove an instance.
7305 logging.info("Removing block devices for instance %s", instance.name)
7307 if not _RemoveDisks(lu, instance):
7308 if not ignore_failures:
7309 raise errors.OpExecError("Can't remove instance's disks")
7310 feedback_fn("Warning: can't remove instance's disks")
7312 logging.info("Removing instance %s out of cluster config", instance.name)
7314 lu.cfg.RemoveInstance(instance.name)
7316 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7317 "Instance lock removal conflict"
7319 # Remove lock for the instance
7320 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7323 class LUInstanceQuery(NoHooksLU):
7324 """Logical unit for querying instances.
7327 # pylint: disable=W0142
7330 def CheckArguments(self):
7331 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7332 self.op.output_fields, self.op.use_locking)
7334 def ExpandNames(self):
7335 self.iq.ExpandNames(self)
7337 def DeclareLocks(self, level):
7338 self.iq.DeclareLocks(self, level)
7340 def Exec(self, feedback_fn):
7341 return self.iq.OldStyleQuery(self)
7344 class LUInstanceFailover(LogicalUnit):
7345 """Failover an instance.
7348 HPATH = "instance-failover"
7349 HTYPE = constants.HTYPE_INSTANCE
7352 def CheckArguments(self):
7353 """Check the arguments.
7356 self.iallocator = getattr(self.op, "iallocator", None)
7357 self.target_node = getattr(self.op, "target_node", None)
7359 def ExpandNames(self):
7360 self._ExpandAndLockInstance()
7362 if self.op.target_node is not None:
7363 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7365 self.needed_locks[locking.LEVEL_NODE] = []
7366 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7368 self.needed_locks[locking.LEVEL_NODE_RES] = []
7369 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7371 ignore_consistency = self.op.ignore_consistency
7372 shutdown_timeout = self.op.shutdown_timeout
7373 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7376 ignore_consistency=ignore_consistency,
7377 shutdown_timeout=shutdown_timeout,
7378 ignore_ipolicy=self.op.ignore_ipolicy)
7379 self.tasklets = [self._migrater]
7381 def DeclareLocks(self, level):
7382 if level == locking.LEVEL_NODE:
7383 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7384 if instance.disk_template in constants.DTS_EXT_MIRROR:
7385 if self.op.target_node is None:
7386 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7388 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7389 self.op.target_node]
7390 del self.recalculate_locks[locking.LEVEL_NODE]
7392 self._LockInstancesNodes()
7393 elif level == locking.LEVEL_NODE_RES:
7395 self.needed_locks[locking.LEVEL_NODE_RES] = \
7396 self.needed_locks[locking.LEVEL_NODE][:]
7398 def BuildHooksEnv(self):
7401 This runs on master, primary and secondary nodes of the instance.
7404 instance = self._migrater.instance
7405 source_node = instance.primary_node
7406 target_node = self.op.target_node
7408 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7409 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7410 "OLD_PRIMARY": source_node,
7411 "NEW_PRIMARY": target_node,
7414 if instance.disk_template in constants.DTS_INT_MIRROR:
7415 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7416 env["NEW_SECONDARY"] = source_node
7418 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7420 env.update(_BuildInstanceHookEnvByObject(self, instance))
7424 def BuildHooksNodes(self):
7425 """Build hooks nodes.
7428 instance = self._migrater.instance
7429 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7430 return (nl, nl + [instance.primary_node])
7433 class LUInstanceMigrate(LogicalUnit):
7434 """Migrate an instance.
7436 This is migration without shutting down, compared to the failover,
7437 which is done with shutdown.
7440 HPATH = "instance-migrate"
7441 HTYPE = constants.HTYPE_INSTANCE
7444 def ExpandNames(self):
7445 self._ExpandAndLockInstance()
7447 if self.op.target_node is not None:
7448 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7450 self.needed_locks[locking.LEVEL_NODE] = []
7451 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7453 self.needed_locks[locking.LEVEL_NODE] = []
7454 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7457 TLMigrateInstance(self, self.op.instance_name,
7458 cleanup=self.op.cleanup,
7460 fallback=self.op.allow_failover,
7461 allow_runtime_changes=self.op.allow_runtime_changes,
7462 ignore_ipolicy=self.op.ignore_ipolicy)
7463 self.tasklets = [self._migrater]
7465 def DeclareLocks(self, level):
7466 if level == locking.LEVEL_NODE:
7467 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7468 if instance.disk_template in constants.DTS_EXT_MIRROR:
7469 if self.op.target_node is None:
7470 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7472 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7473 self.op.target_node]
7474 del self.recalculate_locks[locking.LEVEL_NODE]
7476 self._LockInstancesNodes()
7477 elif level == locking.LEVEL_NODE_RES:
7479 self.needed_locks[locking.LEVEL_NODE_RES] = \
7480 self.needed_locks[locking.LEVEL_NODE][:]
7482 def BuildHooksEnv(self):
7485 This runs on master, primary and secondary nodes of the instance.
7488 instance = self._migrater.instance
7489 source_node = instance.primary_node
7490 target_node = self.op.target_node
7491 env = _BuildInstanceHookEnvByObject(self, instance)
7493 "MIGRATE_LIVE": self._migrater.live,
7494 "MIGRATE_CLEANUP": self.op.cleanup,
7495 "OLD_PRIMARY": source_node,
7496 "NEW_PRIMARY": target_node,
7497 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7500 if instance.disk_template in constants.DTS_INT_MIRROR:
7501 env["OLD_SECONDARY"] = target_node
7502 env["NEW_SECONDARY"] = source_node
7504 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7508 def BuildHooksNodes(self):
7509 """Build hooks nodes.
7512 instance = self._migrater.instance
7513 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7514 return (nl, nl + [instance.primary_node])
7517 class LUInstanceMove(LogicalUnit):
7518 """Move an instance by data-copying.
7521 HPATH = "instance-move"
7522 HTYPE = constants.HTYPE_INSTANCE
7525 def ExpandNames(self):
7526 self._ExpandAndLockInstance()
7527 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7528 self.op.target_node = target_node
7529 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7530 self.needed_locks[locking.LEVEL_NODE_RES] = []
7531 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7533 def DeclareLocks(self, level):
7534 if level == locking.LEVEL_NODE:
7535 self._LockInstancesNodes(primary_only=True)
7536 elif level == locking.LEVEL_NODE_RES:
7538 self.needed_locks[locking.LEVEL_NODE_RES] = \
7539 self.needed_locks[locking.LEVEL_NODE][:]
7541 def BuildHooksEnv(self):
7544 This runs on master, primary and secondary nodes of the instance.
7548 "TARGET_NODE": self.op.target_node,
7549 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7551 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7554 def BuildHooksNodes(self):
7555 """Build hooks nodes.
7559 self.cfg.GetMasterNode(),
7560 self.instance.primary_node,
7561 self.op.target_node,
7565 def CheckPrereq(self):
7566 """Check prerequisites.
7568 This checks that the instance is in the cluster.
7571 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7572 assert self.instance is not None, \
7573 "Cannot retrieve locked instance %s" % self.op.instance_name
7575 node = self.cfg.GetNodeInfo(self.op.target_node)
7576 assert node is not None, \
7577 "Cannot retrieve locked node %s" % self.op.target_node
7579 self.target_node = target_node = node.name
7581 if target_node == instance.primary_node:
7582 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7583 (instance.name, target_node),
7586 bep = self.cfg.GetClusterInfo().FillBE(instance)
7588 for idx, dsk in enumerate(instance.disks):
7589 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7590 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7591 " cannot copy" % idx, errors.ECODE_STATE)
7593 _CheckNodeOnline(self, target_node)
7594 _CheckNodeNotDrained(self, target_node)
7595 _CheckNodeVmCapable(self, target_node)
7596 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7597 self.cfg.GetNodeGroup(node.group))
7598 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7599 ignore=self.op.ignore_ipolicy)
7601 if instance.admin_state == constants.ADMINST_UP:
7602 # check memory requirements on the secondary node
7603 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7604 instance.name, bep[constants.BE_MAXMEM],
7605 instance.hypervisor)
7607 self.LogInfo("Not checking memory on the secondary node as"
7608 " instance will not be started")
7610 # check bridge existance
7611 _CheckInstanceBridgesExist(self, instance, node=target_node)
7613 def Exec(self, feedback_fn):
7614 """Move an instance.
7616 The move is done by shutting it down on its present node, copying
7617 the data over (slow) and starting it on the new node.
7620 instance = self.instance
7622 source_node = instance.primary_node
7623 target_node = self.target_node
7625 self.LogInfo("Shutting down instance %s on source node %s",
7626 instance.name, source_node)
7628 assert (self.owned_locks(locking.LEVEL_NODE) ==
7629 self.owned_locks(locking.LEVEL_NODE_RES))
7631 result = self.rpc.call_instance_shutdown(source_node, instance,
7632 self.op.shutdown_timeout)
7633 msg = result.fail_msg
7635 if self.op.ignore_consistency:
7636 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7637 " Proceeding anyway. Please make sure node"
7638 " %s is down. Error details: %s",
7639 instance.name, source_node, source_node, msg)
7641 raise errors.OpExecError("Could not shutdown instance %s on"
7643 (instance.name, source_node, msg))
7645 # create the target disks
7647 _CreateDisks(self, instance, target_node=target_node)
7648 except errors.OpExecError:
7649 self.LogWarning("Device creation failed, reverting...")
7651 _RemoveDisks(self, instance, target_node=target_node)
7653 self.cfg.ReleaseDRBDMinors(instance.name)
7656 cluster_name = self.cfg.GetClusterInfo().cluster_name
7659 # activate, get path, copy the data over
7660 for idx, disk in enumerate(instance.disks):
7661 self.LogInfo("Copying data for disk %d", idx)
7662 result = self.rpc.call_blockdev_assemble(target_node, disk,
7663 instance.name, True, idx)
7665 self.LogWarning("Can't assemble newly created disk %d: %s",
7666 idx, result.fail_msg)
7667 errs.append(result.fail_msg)
7669 dev_path = result.payload
7670 result = self.rpc.call_blockdev_export(source_node, disk,
7671 target_node, dev_path,
7674 self.LogWarning("Can't copy data over for disk %d: %s",
7675 idx, result.fail_msg)
7676 errs.append(result.fail_msg)
7680 self.LogWarning("Some disks failed to copy, aborting")
7682 _RemoveDisks(self, instance, target_node=target_node)
7684 self.cfg.ReleaseDRBDMinors(instance.name)
7685 raise errors.OpExecError("Errors during disk copy: %s" %
7688 instance.primary_node = target_node
7689 self.cfg.Update(instance, feedback_fn)
7691 self.LogInfo("Removing the disks on the original node")
7692 _RemoveDisks(self, instance, target_node=source_node)
7694 # Only start the instance if it's marked as up
7695 if instance.admin_state == constants.ADMINST_UP:
7696 self.LogInfo("Starting instance %s on node %s",
7697 instance.name, target_node)
7699 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7700 ignore_secondaries=True)
7702 _ShutdownInstanceDisks(self, instance)
7703 raise errors.OpExecError("Can't activate the instance's disks")
7705 result = self.rpc.call_instance_start(target_node,
7706 (instance, None, None), False)
7707 msg = result.fail_msg
7709 _ShutdownInstanceDisks(self, instance)
7710 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7711 (instance.name, target_node, msg))
7714 class LUNodeMigrate(LogicalUnit):
7715 """Migrate all instances from a node.
7718 HPATH = "node-migrate"
7719 HTYPE = constants.HTYPE_NODE
7722 def CheckArguments(self):
7725 def ExpandNames(self):
7726 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7728 self.share_locks = _ShareAll()
7729 self.needed_locks = {
7730 locking.LEVEL_NODE: [self.op.node_name],
7733 def BuildHooksEnv(self):
7736 This runs on the master, the primary and all the secondaries.
7740 "NODE_NAME": self.op.node_name,
7741 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7744 def BuildHooksNodes(self):
7745 """Build hooks nodes.
7748 nl = [self.cfg.GetMasterNode()]
7751 def CheckPrereq(self):
7754 def Exec(self, feedback_fn):
7755 # Prepare jobs for migration instances
7756 allow_runtime_changes = self.op.allow_runtime_changes
7758 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7761 iallocator=self.op.iallocator,
7762 target_node=self.op.target_node,
7763 allow_runtime_changes=allow_runtime_changes,
7764 ignore_ipolicy=self.op.ignore_ipolicy)]
7765 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7768 # TODO: Run iallocator in this opcode and pass correct placement options to
7769 # OpInstanceMigrate. Since other jobs can modify the cluster between
7770 # running the iallocator and the actual migration, a good consistency model
7771 # will have to be found.
7773 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7774 frozenset([self.op.node_name]))
7776 return ResultWithJobs(jobs)
7779 class TLMigrateInstance(Tasklet):
7780 """Tasklet class for instance migration.
7783 @ivar live: whether the migration will be done live or non-live;
7784 this variable is initalized only after CheckPrereq has run
7785 @type cleanup: boolean
7786 @ivar cleanup: Wheater we cleanup from a failed migration
7787 @type iallocator: string
7788 @ivar iallocator: The iallocator used to determine target_node
7789 @type target_node: string
7790 @ivar target_node: If given, the target_node to reallocate the instance to
7791 @type failover: boolean
7792 @ivar failover: Whether operation results in failover or migration
7793 @type fallback: boolean
7794 @ivar fallback: Whether fallback to failover is allowed if migration not
7796 @type ignore_consistency: boolean
7797 @ivar ignore_consistency: Wheter we should ignore consistency between source
7799 @type shutdown_timeout: int
7800 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7801 @type ignore_ipolicy: bool
7802 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7807 _MIGRATION_POLL_INTERVAL = 1 # seconds
7808 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7810 def __init__(self, lu, instance_name, cleanup=False,
7811 failover=False, fallback=False,
7812 ignore_consistency=False,
7813 allow_runtime_changes=True,
7814 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7815 ignore_ipolicy=False):
7816 """Initializes this class.
7819 Tasklet.__init__(self, lu)
7822 self.instance_name = instance_name
7823 self.cleanup = cleanup
7824 self.live = False # will be overridden later
7825 self.failover = failover
7826 self.fallback = fallback
7827 self.ignore_consistency = ignore_consistency
7828 self.shutdown_timeout = shutdown_timeout
7829 self.ignore_ipolicy = ignore_ipolicy
7830 self.allow_runtime_changes = allow_runtime_changes
7832 def CheckPrereq(self):
7833 """Check prerequisites.
7835 This checks that the instance is in the cluster.
7838 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7839 instance = self.cfg.GetInstanceInfo(instance_name)
7840 assert instance is not None
7841 self.instance = instance
7842 cluster = self.cfg.GetClusterInfo()
7844 if (not self.cleanup and
7845 not instance.admin_state == constants.ADMINST_UP and
7846 not self.failover and self.fallback):
7847 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7848 " switching to failover")
7849 self.failover = True
7851 if instance.disk_template not in constants.DTS_MIRRORED:
7856 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7857 " %s" % (instance.disk_template, text),
7860 if instance.disk_template in constants.DTS_EXT_MIRROR:
7861 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7863 if self.lu.op.iallocator:
7864 self._RunAllocator()
7866 # We set set self.target_node as it is required by
7868 self.target_node = self.lu.op.target_node
7870 # Check that the target node is correct in terms of instance policy
7871 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
7872 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7873 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7874 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7875 ignore=self.ignore_ipolicy)
7877 # self.target_node is already populated, either directly or by the
7879 target_node = self.target_node
7880 if self.target_node == instance.primary_node:
7881 raise errors.OpPrereqError("Cannot migrate instance %s"
7882 " to its primary (%s)" %
7883 (instance.name, instance.primary_node))
7885 if len(self.lu.tasklets) == 1:
7886 # It is safe to release locks only when we're the only tasklet
7888 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7889 keep=[instance.primary_node, self.target_node])
7892 secondary_nodes = instance.secondary_nodes
7893 if not secondary_nodes:
7894 raise errors.ConfigurationError("No secondary node but using"
7895 " %s disk template" %
7896 instance.disk_template)
7897 target_node = secondary_nodes[0]
7898 if self.lu.op.iallocator or (self.lu.op.target_node and
7899 self.lu.op.target_node != target_node):
7901 text = "failed over"
7904 raise errors.OpPrereqError("Instances with disk template %s cannot"
7905 " be %s to arbitrary nodes"
7906 " (neither an iallocator nor a target"
7907 " node can be passed)" %
7908 (instance.disk_template, text),
7910 nodeinfo = self.cfg.GetNodeInfo(target_node)
7911 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7912 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7913 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7914 ignore=self.ignore_ipolicy)
7916 i_be = cluster.FillBE(instance)
7918 # check memory requirements on the secondary node
7919 if (not self.cleanup and
7920 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
7921 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
7922 "migrating instance %s" %
7924 i_be[constants.BE_MINMEM],
7925 instance.hypervisor)
7927 self.lu.LogInfo("Not checking memory on the secondary node as"
7928 " instance will not be started")
7930 # check if failover must be forced instead of migration
7931 if (not self.cleanup and not self.failover and
7932 i_be[constants.BE_ALWAYS_FAILOVER]):
7934 self.lu.LogInfo("Instance configured to always failover; fallback"
7936 self.failover = True
7938 raise errors.OpPrereqError("This instance has been configured to"
7939 " always failover, please allow failover",
7942 # check bridge existance
7943 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7945 if not self.cleanup:
7946 _CheckNodeNotDrained(self.lu, target_node)
7947 if not self.failover:
7948 result = self.rpc.call_instance_migratable(instance.primary_node,
7950 if result.fail_msg and self.fallback:
7951 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7953 self.failover = True
7955 result.Raise("Can't migrate, please use failover",
7956 prereq=True, ecode=errors.ECODE_STATE)
7958 assert not (self.failover and self.cleanup)
7960 if not self.failover:
7961 if self.lu.op.live is not None and self.lu.op.mode is not None:
7962 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7963 " parameters are accepted",
7965 if self.lu.op.live is not None:
7967 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7969 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7970 # reset the 'live' parameter to None so that repeated
7971 # invocations of CheckPrereq do not raise an exception
7972 self.lu.op.live = None
7973 elif self.lu.op.mode is None:
7974 # read the default value from the hypervisor
7975 i_hv = cluster.FillHV(self.instance, skip_globals=False)
7976 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7978 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7980 # Failover is never live
7983 if not (self.failover or self.cleanup):
7984 remote_info = self.rpc.call_instance_info(instance.primary_node,
7986 instance.hypervisor)
7987 remote_info.Raise("Error checking instance on node %s" %
7988 instance.primary_node)
7989 instance_running = bool(remote_info.payload)
7990 if instance_running:
7991 self.current_mem = int(remote_info.payload["memory"])
7993 def _RunAllocator(self):
7994 """Run the allocator based on input opcode.
7997 # FIXME: add a self.ignore_ipolicy option
7998 ial = IAllocator(self.cfg, self.rpc,
7999 mode=constants.IALLOCATOR_MODE_RELOC,
8000 name=self.instance_name,
8001 # TODO See why hail breaks with a single node below
8002 relocate_from=[self.instance.primary_node,
8003 self.instance.primary_node],
8006 ial.Run(self.lu.op.iallocator)
8009 raise errors.OpPrereqError("Can't compute nodes using"
8010 " iallocator '%s': %s" %
8011 (self.lu.op.iallocator, ial.info),
8013 if len(ial.result) != ial.required_nodes:
8014 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8015 " of nodes (%s), required %s" %
8016 (self.lu.op.iallocator, len(ial.result),
8017 ial.required_nodes), errors.ECODE_FAULT)
8018 self.target_node = ial.result[0]
8019 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8020 self.instance_name, self.lu.op.iallocator,
8021 utils.CommaJoin(ial.result))
8023 def _WaitUntilSync(self):
8024 """Poll with custom rpc for disk sync.
8026 This uses our own step-based rpc call.
8029 self.feedback_fn("* wait until resync is done")
8033 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8035 self.instance.disks)
8037 for node, nres in result.items():
8038 nres.Raise("Cannot resync disks on node %s" % node)
8039 node_done, node_percent = nres.payload
8040 all_done = all_done and node_done
8041 if node_percent is not None:
8042 min_percent = min(min_percent, node_percent)
8044 if min_percent < 100:
8045 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8048 def _EnsureSecondary(self, node):
8049 """Demote a node to secondary.
8052 self.feedback_fn("* switching node %s to secondary mode" % node)
8054 for dev in self.instance.disks:
8055 self.cfg.SetDiskID(dev, node)
8057 result = self.rpc.call_blockdev_close(node, self.instance.name,
8058 self.instance.disks)
8059 result.Raise("Cannot change disk to secondary on node %s" % node)
8061 def _GoStandalone(self):
8062 """Disconnect from the network.
8065 self.feedback_fn("* changing into standalone mode")
8066 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8067 self.instance.disks)
8068 for node, nres in result.items():
8069 nres.Raise("Cannot disconnect disks node %s" % node)
8071 def _GoReconnect(self, multimaster):
8072 """Reconnect to the network.
8078 msg = "single-master"
8079 self.feedback_fn("* changing disks into %s mode" % msg)
8080 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8081 self.instance.disks,
8082 self.instance.name, multimaster)
8083 for node, nres in result.items():
8084 nres.Raise("Cannot change disks config on node %s" % node)
8086 def _ExecCleanup(self):
8087 """Try to cleanup after a failed migration.
8089 The cleanup is done by:
8090 - check that the instance is running only on one node
8091 (and update the config if needed)
8092 - change disks on its secondary node to secondary
8093 - wait until disks are fully synchronized
8094 - disconnect from the network
8095 - change disks into single-master mode
8096 - wait again until disks are fully synchronized
8099 instance = self.instance
8100 target_node = self.target_node
8101 source_node = self.source_node
8103 # check running on only one node
8104 self.feedback_fn("* checking where the instance actually runs"
8105 " (if this hangs, the hypervisor might be in"
8107 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8108 for node, result in ins_l.items():
8109 result.Raise("Can't contact node %s" % node)
8111 runningon_source = instance.name in ins_l[source_node].payload
8112 runningon_target = instance.name in ins_l[target_node].payload
8114 if runningon_source and runningon_target:
8115 raise errors.OpExecError("Instance seems to be running on two nodes,"
8116 " or the hypervisor is confused; you will have"
8117 " to ensure manually that it runs only on one"
8118 " and restart this operation")
8120 if not (runningon_source or runningon_target):
8121 raise errors.OpExecError("Instance does not seem to be running at all;"
8122 " in this case it's safer to repair by"
8123 " running 'gnt-instance stop' to ensure disk"
8124 " shutdown, and then restarting it")
8126 if runningon_target:
8127 # the migration has actually succeeded, we need to update the config
8128 self.feedback_fn("* instance running on secondary node (%s),"
8129 " updating config" % target_node)
8130 instance.primary_node = target_node
8131 self.cfg.Update(instance, self.feedback_fn)
8132 demoted_node = source_node
8134 self.feedback_fn("* instance confirmed to be running on its"
8135 " primary node (%s)" % source_node)
8136 demoted_node = target_node
8138 if instance.disk_template in constants.DTS_INT_MIRROR:
8139 self._EnsureSecondary(demoted_node)
8141 self._WaitUntilSync()
8142 except errors.OpExecError:
8143 # we ignore here errors, since if the device is standalone, it
8144 # won't be able to sync
8146 self._GoStandalone()
8147 self._GoReconnect(False)
8148 self._WaitUntilSync()
8150 self.feedback_fn("* done")
8152 def _RevertDiskStatus(self):
8153 """Try to revert the disk status after a failed migration.
8156 target_node = self.target_node
8157 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8161 self._EnsureSecondary(target_node)
8162 self._GoStandalone()
8163 self._GoReconnect(False)
8164 self._WaitUntilSync()
8165 except errors.OpExecError, err:
8166 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8167 " please try to recover the instance manually;"
8168 " error '%s'" % str(err))
8170 def _AbortMigration(self):
8171 """Call the hypervisor code to abort a started migration.
8174 instance = self.instance
8175 target_node = self.target_node
8176 source_node = self.source_node
8177 migration_info = self.migration_info
8179 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8183 abort_msg = abort_result.fail_msg
8185 logging.error("Aborting migration failed on target node %s: %s",
8186 target_node, abort_msg)
8187 # Don't raise an exception here, as we stil have to try to revert the
8188 # disk status, even if this step failed.
8190 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8191 instance, False, self.live)
8192 abort_msg = abort_result.fail_msg
8194 logging.error("Aborting migration failed on source node %s: %s",
8195 source_node, abort_msg)
8197 def _ExecMigration(self):
8198 """Migrate an instance.
8200 The migrate is done by:
8201 - change the disks into dual-master mode
8202 - wait until disks are fully synchronized again
8203 - migrate the instance
8204 - change disks on the new secondary node (the old primary) to secondary
8205 - wait until disks are fully synchronized
8206 - change disks into single-master mode
8209 instance = self.instance
8210 target_node = self.target_node
8211 source_node = self.source_node
8213 # Check for hypervisor version mismatch and warn the user.
8214 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8215 None, [self.instance.hypervisor])
8216 for ninfo in nodeinfo.values():
8217 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8219 (_, _, (src_info, )) = nodeinfo[source_node].payload
8220 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8222 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8223 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8224 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8225 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8226 if src_version != dst_version:
8227 self.feedback_fn("* warning: hypervisor version mismatch between"
8228 " source (%s) and target (%s) node" %
8229 (src_version, dst_version))
8231 self.feedback_fn("* checking disk consistency between source and target")
8232 for dev in instance.disks:
8233 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8234 raise errors.OpExecError("Disk %s is degraded or not fully"
8235 " synchronized on target node,"
8236 " aborting migration" % dev.iv_name)
8238 if self.current_mem > self.tgt_free_mem:
8239 if not self.allow_runtime_changes:
8240 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8241 " free memory to fit instance %s on target"
8242 " node %s (have %dMB, need %dMB)" %
8243 (instance.name, target_node,
8244 self.tgt_free_mem, self.current_mem))
8245 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8246 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8249 rpcres.Raise("Cannot modify instance runtime memory")
8251 # First get the migration information from the remote node
8252 result = self.rpc.call_migration_info(source_node, instance)
8253 msg = result.fail_msg
8255 log_err = ("Failed fetching source migration information from %s: %s" %
8257 logging.error(log_err)
8258 raise errors.OpExecError(log_err)
8260 self.migration_info = migration_info = result.payload
8262 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8263 # Then switch the disks to master/master mode
8264 self._EnsureSecondary(target_node)
8265 self._GoStandalone()
8266 self._GoReconnect(True)
8267 self._WaitUntilSync()
8269 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8270 result = self.rpc.call_accept_instance(target_node,
8273 self.nodes_ip[target_node])
8275 msg = result.fail_msg
8277 logging.error("Instance pre-migration failed, trying to revert"
8278 " disk status: %s", msg)
8279 self.feedback_fn("Pre-migration failed, aborting")
8280 self._AbortMigration()
8281 self._RevertDiskStatus()
8282 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8283 (instance.name, msg))
8285 self.feedback_fn("* migrating instance to %s" % target_node)
8286 result = self.rpc.call_instance_migrate(source_node, instance,
8287 self.nodes_ip[target_node],
8289 msg = result.fail_msg
8291 logging.error("Instance migration failed, trying to revert"
8292 " disk status: %s", msg)
8293 self.feedback_fn("Migration failed, aborting")
8294 self._AbortMigration()
8295 self._RevertDiskStatus()
8296 raise errors.OpExecError("Could not migrate instance %s: %s" %
8297 (instance.name, msg))
8299 self.feedback_fn("* starting memory transfer")
8300 last_feedback = time.time()
8302 result = self.rpc.call_instance_get_migration_status(source_node,
8304 msg = result.fail_msg
8305 ms = result.payload # MigrationStatus instance
8306 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8307 logging.error("Instance migration failed, trying to revert"
8308 " disk status: %s", msg)
8309 self.feedback_fn("Migration failed, aborting")
8310 self._AbortMigration()
8311 self._RevertDiskStatus()
8312 raise errors.OpExecError("Could not migrate instance %s: %s" %
8313 (instance.name, msg))
8315 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8316 self.feedback_fn("* memory transfer complete")
8319 if (utils.TimeoutExpired(last_feedback,
8320 self._MIGRATION_FEEDBACK_INTERVAL) and
8321 ms.transferred_ram is not None):
8322 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8323 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8324 last_feedback = time.time()
8326 time.sleep(self._MIGRATION_POLL_INTERVAL)
8328 result = self.rpc.call_instance_finalize_migration_src(source_node,
8332 msg = result.fail_msg
8334 logging.error("Instance migration succeeded, but finalization failed"
8335 " on the source node: %s", msg)
8336 raise errors.OpExecError("Could not finalize instance migration: %s" %
8339 instance.primary_node = target_node
8341 # distribute new instance config to the other nodes
8342 self.cfg.Update(instance, self.feedback_fn)
8344 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8348 msg = result.fail_msg
8350 logging.error("Instance migration succeeded, but finalization failed"
8351 " on the target node: %s", msg)
8352 raise errors.OpExecError("Could not finalize instance migration: %s" %
8355 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8356 self._EnsureSecondary(source_node)
8357 self._WaitUntilSync()
8358 self._GoStandalone()
8359 self._GoReconnect(False)
8360 self._WaitUntilSync()
8362 # If the instance's disk template is `rbd' and there was a successful
8363 # migration, unmap the device from the source node.
8364 if self.instance.disk_template == constants.DT_RBD:
8365 disks = _ExpandCheckDisks(instance, instance.disks)
8366 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8368 result = self.rpc.call_blockdev_shutdown(source_node, disk)
8369 msg = result.fail_msg
8371 logging.error("Migration was successful, but couldn't unmap the"
8372 " block device %s on source node %s: %s",
8373 disk.iv_name, source_node, msg)
8374 logging.error("You need to unmap the device %s manually on %s",
8375 disk.iv_name, source_node)
8377 self.feedback_fn("* done")
8379 def _ExecFailover(self):
8380 """Failover an instance.
8382 The failover is done by shutting it down on its present node and
8383 starting it on the secondary.
8386 instance = self.instance
8387 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8389 source_node = instance.primary_node
8390 target_node = self.target_node
8392 if instance.admin_state == constants.ADMINST_UP:
8393 self.feedback_fn("* checking disk consistency between source and target")
8394 for dev in instance.disks:
8395 # for drbd, these are drbd over lvm
8396 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8397 if primary_node.offline:
8398 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8400 (primary_node.name, dev.iv_name, target_node))
8401 elif not self.ignore_consistency:
8402 raise errors.OpExecError("Disk %s is degraded on target node,"
8403 " aborting failover" % dev.iv_name)
8405 self.feedback_fn("* not checking disk consistency as instance is not"
8408 self.feedback_fn("* shutting down instance on source node")
8409 logging.info("Shutting down instance %s on node %s",
8410 instance.name, source_node)
8412 result = self.rpc.call_instance_shutdown(source_node, instance,
8413 self.shutdown_timeout)
8414 msg = result.fail_msg
8416 if self.ignore_consistency or primary_node.offline:
8417 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8418 " proceeding anyway; please make sure node"
8419 " %s is down; error details: %s",
8420 instance.name, source_node, source_node, msg)
8422 raise errors.OpExecError("Could not shutdown instance %s on"
8424 (instance.name, source_node, msg))
8426 self.feedback_fn("* deactivating the instance's disks on source node")
8427 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8428 raise errors.OpExecError("Can't shut down the instance's disks")
8430 instance.primary_node = target_node
8431 # distribute new instance config to the other nodes
8432 self.cfg.Update(instance, self.feedback_fn)
8434 # Only start the instance if it's marked as up
8435 if instance.admin_state == constants.ADMINST_UP:
8436 self.feedback_fn("* activating the instance's disks on target node %s" %
8438 logging.info("Starting instance %s on node %s",
8439 instance.name, target_node)
8441 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8442 ignore_secondaries=True)
8444 _ShutdownInstanceDisks(self.lu, instance)
8445 raise errors.OpExecError("Can't activate the instance's disks")
8447 self.feedback_fn("* starting the instance on the target node %s" %
8449 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8451 msg = result.fail_msg
8453 _ShutdownInstanceDisks(self.lu, instance)
8454 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8455 (instance.name, target_node, msg))
8457 def Exec(self, feedback_fn):
8458 """Perform the migration.
8461 self.feedback_fn = feedback_fn
8462 self.source_node = self.instance.primary_node
8464 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8465 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8466 self.target_node = self.instance.secondary_nodes[0]
8467 # Otherwise self.target_node has been populated either
8468 # directly, or through an iallocator.
8470 self.all_nodes = [self.source_node, self.target_node]
8471 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8472 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8475 feedback_fn("Failover instance %s" % self.instance.name)
8476 self._ExecFailover()
8478 feedback_fn("Migrating instance %s" % self.instance.name)
8481 return self._ExecCleanup()
8483 return self._ExecMigration()
8486 def _CreateBlockDev(lu, node, instance, device, force_create,
8488 """Create a tree of block devices on a given node.
8490 If this device type has to be created on secondaries, create it and
8493 If not, just recurse to children keeping the same 'force' value.
8495 @param lu: the lu on whose behalf we execute
8496 @param node: the node on which to create the device
8497 @type instance: L{objects.Instance}
8498 @param instance: the instance which owns the device
8499 @type device: L{objects.Disk}
8500 @param device: the device to create
8501 @type force_create: boolean
8502 @param force_create: whether to force creation of this device; this
8503 will be change to True whenever we find a device which has
8504 CreateOnSecondary() attribute
8505 @param info: the extra 'metadata' we should attach to the device
8506 (this will be represented as a LVM tag)
8507 @type force_open: boolean
8508 @param force_open: this parameter will be passes to the
8509 L{backend.BlockdevCreate} function where it specifies
8510 whether we run on primary or not, and it affects both
8511 the child assembly and the device own Open() execution
8514 if device.CreateOnSecondary():
8518 for child in device.children:
8519 _CreateBlockDev(lu, node, instance, child, force_create,
8522 if not force_create:
8525 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8528 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8529 """Create a single block device on a given node.
8531 This will not recurse over children of the device, so they must be
8534 @param lu: the lu on whose behalf we execute
8535 @param node: the node on which to create the device
8536 @type instance: L{objects.Instance}
8537 @param instance: the instance which owns the device
8538 @type device: L{objects.Disk}
8539 @param device: the device to create
8540 @param info: the extra 'metadata' we should attach to the device
8541 (this will be represented as a LVM tag)
8542 @type force_open: boolean
8543 @param force_open: this parameter will be passes to the
8544 L{backend.BlockdevCreate} function where it specifies
8545 whether we run on primary or not, and it affects both
8546 the child assembly and the device own Open() execution
8549 lu.cfg.SetDiskID(device, node)
8550 result = lu.rpc.call_blockdev_create(node, device, device.size,
8551 instance.name, force_open, info)
8552 result.Raise("Can't create block device %s on"
8553 " node %s for instance %s" % (device, node, instance.name))
8554 if device.physical_id is None:
8555 device.physical_id = result.payload
8558 def _GenerateUniqueNames(lu, exts):
8559 """Generate a suitable LV name.
8561 This will generate a logical volume name for the given instance.
8566 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8567 results.append("%s%s" % (new_id, val))
8571 def _ComputeLDParams(disk_template, disk_params):
8572 """Computes Logical Disk parameters from Disk Template parameters.
8574 @type disk_template: string
8575 @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8576 @type disk_params: dict
8577 @param disk_params: disk template parameters; dict(template_name -> parameters
8579 @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8580 contains the LD parameters of the node. The tree is flattened in-order.
8583 if disk_template not in constants.DISK_TEMPLATES:
8584 raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8587 dt_params = disk_params[disk_template]
8588 if disk_template == constants.DT_DRBD8:
8590 constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8591 constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8592 constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8593 constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8594 constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8595 constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8596 constants.LDP_DYNAMIC_RESYNC: dt_params[constants.DRBD_DYNAMIC_RESYNC],
8597 constants.LDP_PLAN_AHEAD: dt_params[constants.DRBD_PLAN_AHEAD],
8598 constants.LDP_FILL_TARGET: dt_params[constants.DRBD_FILL_TARGET],
8599 constants.LDP_DELAY_TARGET: dt_params[constants.DRBD_DELAY_TARGET],
8600 constants.LDP_MAX_RATE: dt_params[constants.DRBD_MAX_RATE],
8601 constants.LDP_MIN_RATE: dt_params[constants.DRBD_MIN_RATE],
8605 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8608 result.append(drbd_params)
8612 constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8615 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8617 result.append(data_params)
8621 constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8624 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8626 result.append(meta_params)
8628 elif (disk_template == constants.DT_FILE or
8629 disk_template == constants.DT_SHARED_FILE):
8630 result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8632 elif disk_template == constants.DT_PLAIN:
8634 constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8637 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8639 result.append(params)
8641 elif disk_template == constants.DT_BLOCK:
8642 result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8644 elif disk_template == constants.DT_RBD:
8646 constants.LDP_POOL: dt_params[constants.RBD_POOL]
8649 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_RBD],
8651 result.append(params)
8656 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8657 iv_name, p_minor, s_minor, drbd_params, data_params,
8659 """Generate a drbd8 device complete with its children.
8662 assert len(vgnames) == len(names) == 2
8663 port = lu.cfg.AllocatePort()
8664 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8666 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8667 logical_id=(vgnames[0], names[0]),
8669 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8670 logical_id=(vgnames[1], names[1]),
8672 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8673 logical_id=(primary, secondary, port,
8676 children=[dev_data, dev_meta],
8677 iv_name=iv_name, params=drbd_params)
8681 def _GenerateDiskTemplate(lu, template_name,
8682 instance_name, primary_node,
8683 secondary_nodes, disk_info,
8684 file_storage_dir, file_driver,
8685 base_index, feedback_fn, disk_params):
8686 """Generate the entire disk layout for a given template type.
8689 #TODO: compute space requirements
8691 vgname = lu.cfg.GetVGName()
8692 disk_count = len(disk_info)
8694 ld_params = _ComputeLDParams(template_name, disk_params)
8695 if template_name == constants.DT_DISKLESS:
8697 elif template_name == constants.DT_PLAIN:
8699 raise errors.ProgrammerError("Wrong template configuration")
8701 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8702 for i in range(disk_count)])
8703 for idx, disk in enumerate(disk_info):
8704 disk_index = idx + base_index
8705 vg = disk.get(constants.IDISK_VG, vgname)
8706 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8707 disk_dev = objects.Disk(dev_type=constants.LD_LV,
8708 size=disk[constants.IDISK_SIZE],
8709 logical_id=(vg, names[idx]),
8710 iv_name="disk/%d" % disk_index,
8711 mode=disk[constants.IDISK_MODE],
8712 params=ld_params[0])
8713 disks.append(disk_dev)
8714 elif template_name == constants.DT_DRBD8:
8715 drbd_params, data_params, meta_params = ld_params
8716 if len(secondary_nodes) != 1:
8717 raise errors.ProgrammerError("Wrong template configuration")
8718 remote_node = secondary_nodes[0]
8719 minors = lu.cfg.AllocateDRBDMinor(
8720 [primary_node, remote_node] * len(disk_info), instance_name)
8723 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8724 for i in range(disk_count)]):
8725 names.append(lv_prefix + "_data")
8726 names.append(lv_prefix + "_meta")
8727 for idx, disk in enumerate(disk_info):
8728 disk_index = idx + base_index
8729 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8730 data_vg = disk.get(constants.IDISK_VG, vgname)
8731 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8732 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8733 disk[constants.IDISK_SIZE],
8735 names[idx * 2:idx * 2 + 2],
8736 "disk/%d" % disk_index,
8737 minors[idx * 2], minors[idx * 2 + 1],
8738 drbd_params, data_params, meta_params)
8739 disk_dev.mode = disk[constants.IDISK_MODE]
8740 disks.append(disk_dev)
8741 elif template_name == constants.DT_FILE:
8743 raise errors.ProgrammerError("Wrong template configuration")
8745 opcodes.RequireFileStorage()
8747 for idx, disk in enumerate(disk_info):
8748 disk_index = idx + base_index
8749 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8750 size=disk[constants.IDISK_SIZE],
8751 iv_name="disk/%d" % disk_index,
8752 logical_id=(file_driver,
8753 "%s/disk%d" % (file_storage_dir,
8755 mode=disk[constants.IDISK_MODE],
8756 params=ld_params[0])
8757 disks.append(disk_dev)
8758 elif template_name == constants.DT_SHARED_FILE:
8760 raise errors.ProgrammerError("Wrong template configuration")
8762 opcodes.RequireSharedFileStorage()
8764 for idx, disk in enumerate(disk_info):
8765 disk_index = idx + base_index
8766 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8767 size=disk[constants.IDISK_SIZE],
8768 iv_name="disk/%d" % disk_index,
8769 logical_id=(file_driver,
8770 "%s/disk%d" % (file_storage_dir,
8772 mode=disk[constants.IDISK_MODE],
8773 params=ld_params[0])
8774 disks.append(disk_dev)
8775 elif template_name == constants.DT_BLOCK:
8777 raise errors.ProgrammerError("Wrong template configuration")
8779 for idx, disk in enumerate(disk_info):
8780 disk_index = idx + base_index
8781 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8782 size=disk[constants.IDISK_SIZE],
8783 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8784 disk[constants.IDISK_ADOPT]),
8785 iv_name="disk/%d" % disk_index,
8786 mode=disk[constants.IDISK_MODE],
8787 params=ld_params[0])
8788 disks.append(disk_dev)
8789 elif template_name == constants.DT_RBD:
8791 raise errors.ProgrammerError("Wrong template configuration")
8793 names = _GenerateUniqueNames(lu, [".rbd.disk%d" % (base_index + i)
8794 for i in range(disk_count)])
8796 for idx, disk in enumerate(disk_info):
8797 disk_index = idx + base_index
8798 disk_dev = objects.Disk(dev_type=constants.LD_RBD,
8799 size=disk[constants.IDISK_SIZE],
8800 logical_id=("rbd", names[idx]),
8801 iv_name="disk/%d" % disk_index,
8802 mode=disk[constants.IDISK_MODE],
8803 params=ld_params[0])
8804 disks.append(disk_dev)
8807 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8811 def _GetInstanceInfoText(instance):
8812 """Compute that text that should be added to the disk's metadata.
8815 return "originstname+%s" % instance.name
8818 def _CalcEta(time_taken, written, total_size):
8819 """Calculates the ETA based on size written and total size.
8821 @param time_taken: The time taken so far
8822 @param written: amount written so far
8823 @param total_size: The total size of data to be written
8824 @return: The remaining time in seconds
8827 avg_time = time_taken / float(written)
8828 return (total_size - written) * avg_time
8831 def _WipeDisks(lu, instance):
8832 """Wipes instance disks.
8834 @type lu: L{LogicalUnit}
8835 @param lu: the logical unit on whose behalf we execute
8836 @type instance: L{objects.Instance}
8837 @param instance: the instance whose disks we should create
8838 @return: the success of the wipe
8841 node = instance.primary_node
8843 for device in instance.disks:
8844 lu.cfg.SetDiskID(device, node)
8846 logging.info("Pause sync of instance %s disks", instance.name)
8847 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8849 for idx, success in enumerate(result.payload):
8851 logging.warn("pause-sync of instance %s for disks %d failed",
8855 for idx, device in enumerate(instance.disks):
8856 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8857 # MAX_WIPE_CHUNK at max
8858 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8859 constants.MIN_WIPE_CHUNK_PERCENT)
8860 # we _must_ make this an int, otherwise rounding errors will
8862 wipe_chunk_size = int(wipe_chunk_size)
8864 lu.LogInfo("* Wiping disk %d", idx)
8865 logging.info("Wiping disk %d for instance %s, node %s using"
8866 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8871 start_time = time.time()
8873 while offset < size:
8874 wipe_size = min(wipe_chunk_size, size - offset)
8875 logging.debug("Wiping disk %d, offset %s, chunk %s",
8876 idx, offset, wipe_size)
8877 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8878 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8879 (idx, offset, wipe_size))
8882 if now - last_output >= 60:
8883 eta = _CalcEta(now - start_time, offset, size)
8884 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8885 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8888 logging.info("Resume sync of instance %s disks", instance.name)
8890 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8892 for idx, success in enumerate(result.payload):
8894 lu.LogWarning("Resume sync of disk %d failed, please have a"
8895 " look at the status and troubleshoot the issue", idx)
8896 logging.warn("resume-sync of instance %s for disks %d failed",
8900 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8901 """Create all disks for an instance.
8903 This abstracts away some work from AddInstance.
8905 @type lu: L{LogicalUnit}
8906 @param lu: the logical unit on whose behalf we execute
8907 @type instance: L{objects.Instance}
8908 @param instance: the instance whose disks we should create
8910 @param to_skip: list of indices to skip
8911 @type target_node: string
8912 @param target_node: if passed, overrides the target node for creation
8914 @return: the success of the creation
8917 info = _GetInstanceInfoText(instance)
8918 if target_node is None:
8919 pnode = instance.primary_node
8920 all_nodes = instance.all_nodes
8925 if instance.disk_template in constants.DTS_FILEBASED:
8926 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8927 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8929 result.Raise("Failed to create directory '%s' on"
8930 " node %s" % (file_storage_dir, pnode))
8932 # Note: this needs to be kept in sync with adding of disks in
8933 # LUInstanceSetParams
8934 for idx, device in enumerate(instance.disks):
8935 if to_skip and idx in to_skip:
8937 logging.info("Creating volume %s for instance %s",
8938 device.iv_name, instance.name)
8940 for node in all_nodes:
8941 f_create = node == pnode
8942 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8945 def _RemoveDisks(lu, instance, target_node=None):
8946 """Remove all disks for an instance.
8948 This abstracts away some work from `AddInstance()` and
8949 `RemoveInstance()`. Note that in case some of the devices couldn't
8950 be removed, the removal will continue with the other ones (compare
8951 with `_CreateDisks()`).
8953 @type lu: L{LogicalUnit}
8954 @param lu: the logical unit on whose behalf we execute
8955 @type instance: L{objects.Instance}
8956 @param instance: the instance whose disks we should remove
8957 @type target_node: string
8958 @param target_node: used to override the node on which to remove the disks
8960 @return: the success of the removal
8963 logging.info("Removing block devices for instance %s", instance.name)
8966 for device in instance.disks:
8968 edata = [(target_node, device)]
8970 edata = device.ComputeNodeTree(instance.primary_node)
8971 for node, disk in edata:
8972 lu.cfg.SetDiskID(disk, node)
8973 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8975 lu.LogWarning("Could not remove block device %s on node %s,"
8976 " continuing anyway: %s", device.iv_name, node, msg)
8979 # if this is a DRBD disk, return its port to the pool
8980 if device.dev_type in constants.LDS_DRBD:
8981 tcp_port = device.logical_id[2]
8982 lu.cfg.AddTcpUdpPort(tcp_port)
8984 if instance.disk_template == constants.DT_FILE:
8985 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8989 tgt = instance.primary_node
8990 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8992 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8993 file_storage_dir, instance.primary_node, result.fail_msg)
8999 def _ComputeDiskSizePerVG(disk_template, disks):
9000 """Compute disk size requirements in the volume group
9003 def _compute(disks, payload):
9004 """Universal algorithm.
9009 vgs[disk[constants.IDISK_VG]] = \
9010 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9014 # Required free disk space as a function of disk and swap space
9016 constants.DT_DISKLESS: {},
9017 constants.DT_PLAIN: _compute(disks, 0),
9018 # 128 MB are added for drbd metadata for each disk
9019 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9020 constants.DT_FILE: {},
9021 constants.DT_SHARED_FILE: {},
9024 if disk_template not in req_size_dict:
9025 raise errors.ProgrammerError("Disk template '%s' size requirement"
9026 " is unknown" % disk_template)
9028 return req_size_dict[disk_template]
9031 def _ComputeDiskSize(disk_template, disks):
9032 """Compute disk size requirements in the volume group
9035 # Required free disk space as a function of disk and swap space
9037 constants.DT_DISKLESS: None,
9038 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9039 # 128 MB are added for drbd metadata for each disk
9041 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9042 constants.DT_FILE: None,
9043 constants.DT_SHARED_FILE: 0,
9044 constants.DT_BLOCK: 0,
9045 constants.DT_RBD: 0,
9048 if disk_template not in req_size_dict:
9049 raise errors.ProgrammerError("Disk template '%s' size requirement"
9050 " is unknown" % disk_template)
9052 return req_size_dict[disk_template]
9055 def _FilterVmNodes(lu, nodenames):
9056 """Filters out non-vm_capable nodes from a list.
9058 @type lu: L{LogicalUnit}
9059 @param lu: the logical unit for which we check
9060 @type nodenames: list
9061 @param nodenames: the list of nodes on which we should check
9063 @return: the list of vm-capable nodes
9066 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9067 return [name for name in nodenames if name not in vm_nodes]
9070 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9071 """Hypervisor parameter validation.
9073 This function abstract the hypervisor parameter validation to be
9074 used in both instance create and instance modify.
9076 @type lu: L{LogicalUnit}
9077 @param lu: the logical unit for which we check
9078 @type nodenames: list
9079 @param nodenames: the list of nodes on which we should check
9080 @type hvname: string
9081 @param hvname: the name of the hypervisor we should use
9082 @type hvparams: dict
9083 @param hvparams: the parameters which we need to check
9084 @raise errors.OpPrereqError: if the parameters are not valid
9087 nodenames = _FilterVmNodes(lu, nodenames)
9089 cluster = lu.cfg.GetClusterInfo()
9090 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9092 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9093 for node in nodenames:
9097 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9100 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9101 """OS parameters validation.
9103 @type lu: L{LogicalUnit}
9104 @param lu: the logical unit for which we check
9105 @type required: boolean
9106 @param required: whether the validation should fail if the OS is not
9108 @type nodenames: list
9109 @param nodenames: the list of nodes on which we should check
9110 @type osname: string
9111 @param osname: the name of the hypervisor we should use
9112 @type osparams: dict
9113 @param osparams: the parameters which we need to check
9114 @raise errors.OpPrereqError: if the parameters are not valid
9117 nodenames = _FilterVmNodes(lu, nodenames)
9118 result = lu.rpc.call_os_validate(nodenames, required, osname,
9119 [constants.OS_VALIDATE_PARAMETERS],
9121 for node, nres in result.items():
9122 # we don't check for offline cases since this should be run only
9123 # against the master node and/or an instance's nodes
9124 nres.Raise("OS Parameters validation failed on node %s" % node)
9125 if not nres.payload:
9126 lu.LogInfo("OS %s not found on node %s, validation skipped",
9130 class LUInstanceCreate(LogicalUnit):
9131 """Create an instance.
9134 HPATH = "instance-add"
9135 HTYPE = constants.HTYPE_INSTANCE
9138 def CheckArguments(self):
9142 # do not require name_check to ease forward/backward compatibility
9144 if self.op.no_install and self.op.start:
9145 self.LogInfo("No-installation mode selected, disabling startup")
9146 self.op.start = False
9147 # validate/normalize the instance name
9148 self.op.instance_name = \
9149 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9151 if self.op.ip_check and not self.op.name_check:
9152 # TODO: make the ip check more flexible and not depend on the name check
9153 raise errors.OpPrereqError("Cannot do IP address check without a name"
9154 " check", errors.ECODE_INVAL)
9156 # check nics' parameter names
9157 for nic in self.op.nics:
9158 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9160 # check disks. parameter names and consistent adopt/no-adopt strategy
9161 has_adopt = has_no_adopt = False
9162 for disk in self.op.disks:
9163 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9164 if constants.IDISK_ADOPT in disk:
9168 if has_adopt and has_no_adopt:
9169 raise errors.OpPrereqError("Either all disks are adopted or none is",
9172 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9173 raise errors.OpPrereqError("Disk adoption is not supported for the"
9174 " '%s' disk template" %
9175 self.op.disk_template,
9177 if self.op.iallocator is not None:
9178 raise errors.OpPrereqError("Disk adoption not allowed with an"
9179 " iallocator script", errors.ECODE_INVAL)
9180 if self.op.mode == constants.INSTANCE_IMPORT:
9181 raise errors.OpPrereqError("Disk adoption not allowed for"
9182 " instance import", errors.ECODE_INVAL)
9184 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9185 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9186 " but no 'adopt' parameter given" %
9187 self.op.disk_template,
9190 self.adopt_disks = has_adopt
9192 # instance name verification
9193 if self.op.name_check:
9194 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9195 self.op.instance_name = self.hostname1.name
9196 # used in CheckPrereq for ip ping check
9197 self.check_ip = self.hostname1.ip
9199 self.check_ip = None
9201 # file storage checks
9202 if (self.op.file_driver and
9203 not self.op.file_driver in constants.FILE_DRIVER):
9204 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9205 self.op.file_driver, errors.ECODE_INVAL)
9207 if self.op.disk_template == constants.DT_FILE:
9208 opcodes.RequireFileStorage()
9209 elif self.op.disk_template == constants.DT_SHARED_FILE:
9210 opcodes.RequireSharedFileStorage()
9212 ### Node/iallocator related checks
9213 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9215 if self.op.pnode is not None:
9216 if self.op.disk_template in constants.DTS_INT_MIRROR:
9217 if self.op.snode is None:
9218 raise errors.OpPrereqError("The networked disk templates need"
9219 " a mirror node", errors.ECODE_INVAL)
9221 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9223 self.op.snode = None
9225 self._cds = _GetClusterDomainSecret()
9227 if self.op.mode == constants.INSTANCE_IMPORT:
9228 # On import force_variant must be True, because if we forced it at
9229 # initial install, our only chance when importing it back is that it
9231 self.op.force_variant = True
9233 if self.op.no_install:
9234 self.LogInfo("No-installation mode has no effect during import")
9236 elif self.op.mode == constants.INSTANCE_CREATE:
9237 if self.op.os_type is None:
9238 raise errors.OpPrereqError("No guest OS specified",
9240 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9241 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9242 " installation" % self.op.os_type,
9244 if self.op.disk_template is None:
9245 raise errors.OpPrereqError("No disk template specified",
9248 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9249 # Check handshake to ensure both clusters have the same domain secret
9250 src_handshake = self.op.source_handshake
9251 if not src_handshake:
9252 raise errors.OpPrereqError("Missing source handshake",
9255 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9258 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9261 # Load and check source CA
9262 self.source_x509_ca_pem = self.op.source_x509_ca
9263 if not self.source_x509_ca_pem:
9264 raise errors.OpPrereqError("Missing source X509 CA",
9268 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9270 except OpenSSL.crypto.Error, err:
9271 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9272 (err, ), errors.ECODE_INVAL)
9274 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9275 if errcode is not None:
9276 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9279 self.source_x509_ca = cert
9281 src_instance_name = self.op.source_instance_name
9282 if not src_instance_name:
9283 raise errors.OpPrereqError("Missing source instance name",
9286 self.source_instance_name = \
9287 netutils.GetHostname(name=src_instance_name).name
9290 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9291 self.op.mode, errors.ECODE_INVAL)
9293 def ExpandNames(self):
9294 """ExpandNames for CreateInstance.
9296 Figure out the right locks for instance creation.
9299 self.needed_locks = {}
9301 instance_name = self.op.instance_name
9302 # this is just a preventive check, but someone might still add this
9303 # instance in the meantime, and creation will fail at lock-add time
9304 if instance_name in self.cfg.GetInstanceList():
9305 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9306 instance_name, errors.ECODE_EXISTS)
9308 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9310 if self.op.iallocator:
9311 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9312 # specifying a group on instance creation and then selecting nodes from
9314 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9315 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9317 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9318 nodelist = [self.op.pnode]
9319 if self.op.snode is not None:
9320 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9321 nodelist.append(self.op.snode)
9322 self.needed_locks[locking.LEVEL_NODE] = nodelist
9323 # Lock resources of instance's primary and secondary nodes (copy to
9324 # prevent accidential modification)
9325 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9327 # in case of import lock the source node too
9328 if self.op.mode == constants.INSTANCE_IMPORT:
9329 src_node = self.op.src_node
9330 src_path = self.op.src_path
9332 if src_path is None:
9333 self.op.src_path = src_path = self.op.instance_name
9335 if src_node is None:
9336 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9337 self.op.src_node = None
9338 if os.path.isabs(src_path):
9339 raise errors.OpPrereqError("Importing an instance from a path"
9340 " requires a source node option",
9343 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9344 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9345 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9346 if not os.path.isabs(src_path):
9347 self.op.src_path = src_path = \
9348 utils.PathJoin(constants.EXPORT_DIR, src_path)
9350 def _RunAllocator(self):
9351 """Run the allocator based on input opcode.
9354 nics = [n.ToDict() for n in self.nics]
9355 ial = IAllocator(self.cfg, self.rpc,
9356 mode=constants.IALLOCATOR_MODE_ALLOC,
9357 name=self.op.instance_name,
9358 disk_template=self.op.disk_template,
9361 vcpus=self.be_full[constants.BE_VCPUS],
9362 memory=self.be_full[constants.BE_MAXMEM],
9365 hypervisor=self.op.hypervisor,
9368 ial.Run(self.op.iallocator)
9371 raise errors.OpPrereqError("Can't compute nodes using"
9372 " iallocator '%s': %s" %
9373 (self.op.iallocator, ial.info),
9375 if len(ial.result) != ial.required_nodes:
9376 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9377 " of nodes (%s), required %s" %
9378 (self.op.iallocator, len(ial.result),
9379 ial.required_nodes), errors.ECODE_FAULT)
9380 self.op.pnode = ial.result[0]
9381 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9382 self.op.instance_name, self.op.iallocator,
9383 utils.CommaJoin(ial.result))
9384 if ial.required_nodes == 2:
9385 self.op.snode = ial.result[1]
9387 def BuildHooksEnv(self):
9390 This runs on master, primary and secondary nodes of the instance.
9394 "ADD_MODE": self.op.mode,
9396 if self.op.mode == constants.INSTANCE_IMPORT:
9397 env["SRC_NODE"] = self.op.src_node
9398 env["SRC_PATH"] = self.op.src_path
9399 env["SRC_IMAGES"] = self.src_images
9401 env.update(_BuildInstanceHookEnv(
9402 name=self.op.instance_name,
9403 primary_node=self.op.pnode,
9404 secondary_nodes=self.secondaries,
9405 status=self.op.start,
9406 os_type=self.op.os_type,
9407 minmem=self.be_full[constants.BE_MINMEM],
9408 maxmem=self.be_full[constants.BE_MAXMEM],
9409 vcpus=self.be_full[constants.BE_VCPUS],
9410 nics=_NICListToTuple(self, self.nics),
9411 disk_template=self.op.disk_template,
9412 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9413 for d in self.disks],
9416 hypervisor_name=self.op.hypervisor,
9422 def BuildHooksNodes(self):
9423 """Build hooks nodes.
9426 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9429 def _ReadExportInfo(self):
9430 """Reads the export information from disk.
9432 It will override the opcode source node and path with the actual
9433 information, if these two were not specified before.
9435 @return: the export information
9438 assert self.op.mode == constants.INSTANCE_IMPORT
9440 src_node = self.op.src_node
9441 src_path = self.op.src_path
9443 if src_node is None:
9444 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9445 exp_list = self.rpc.call_export_list(locked_nodes)
9447 for node in exp_list:
9448 if exp_list[node].fail_msg:
9450 if src_path in exp_list[node].payload:
9452 self.op.src_node = src_node = node
9453 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9457 raise errors.OpPrereqError("No export found for relative path %s" %
9458 src_path, errors.ECODE_INVAL)
9460 _CheckNodeOnline(self, src_node)
9461 result = self.rpc.call_export_info(src_node, src_path)
9462 result.Raise("No export or invalid export found in dir %s" % src_path)
9464 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9465 if not export_info.has_section(constants.INISECT_EXP):
9466 raise errors.ProgrammerError("Corrupted export config",
9467 errors.ECODE_ENVIRON)
9469 ei_version = export_info.get(constants.INISECT_EXP, "version")
9470 if (int(ei_version) != constants.EXPORT_VERSION):
9471 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9472 (ei_version, constants.EXPORT_VERSION),
9473 errors.ECODE_ENVIRON)
9476 def _ReadExportParams(self, einfo):
9477 """Use export parameters as defaults.
9479 In case the opcode doesn't specify (as in override) some instance
9480 parameters, then try to use them from the export information, if
9484 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9486 if self.op.disk_template is None:
9487 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9488 self.op.disk_template = einfo.get(constants.INISECT_INS,
9490 if self.op.disk_template not in constants.DISK_TEMPLATES:
9491 raise errors.OpPrereqError("Disk template specified in configuration"
9492 " file is not one of the allowed values:"
9493 " %s" % " ".join(constants.DISK_TEMPLATES))
9495 raise errors.OpPrereqError("No disk template specified and the export"
9496 " is missing the disk_template information",
9499 if not self.op.disks:
9501 # TODO: import the disk iv_name too
9502 for idx in range(constants.MAX_DISKS):
9503 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9504 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9505 disks.append({constants.IDISK_SIZE: disk_sz})
9506 self.op.disks = disks
9507 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9508 raise errors.OpPrereqError("No disk info specified and the export"
9509 " is missing the disk information",
9512 if not self.op.nics:
9514 for idx in range(constants.MAX_NICS):
9515 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9517 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9518 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9525 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9526 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9528 if (self.op.hypervisor is None and
9529 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9530 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9532 if einfo.has_section(constants.INISECT_HYP):
9533 # use the export parameters but do not override the ones
9534 # specified by the user
9535 for name, value in einfo.items(constants.INISECT_HYP):
9536 if name not in self.op.hvparams:
9537 self.op.hvparams[name] = value
9539 if einfo.has_section(constants.INISECT_BEP):
9540 # use the parameters, without overriding
9541 for name, value in einfo.items(constants.INISECT_BEP):
9542 if name not in self.op.beparams:
9543 self.op.beparams[name] = value
9544 # Compatibility for the old "memory" be param
9545 if name == constants.BE_MEMORY:
9546 if constants.BE_MAXMEM not in self.op.beparams:
9547 self.op.beparams[constants.BE_MAXMEM] = value
9548 if constants.BE_MINMEM not in self.op.beparams:
9549 self.op.beparams[constants.BE_MINMEM] = value
9551 # try to read the parameters old style, from the main section
9552 for name in constants.BES_PARAMETERS:
9553 if (name not in self.op.beparams and
9554 einfo.has_option(constants.INISECT_INS, name)):
9555 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9557 if einfo.has_section(constants.INISECT_OSP):
9558 # use the parameters, without overriding
9559 for name, value in einfo.items(constants.INISECT_OSP):
9560 if name not in self.op.osparams:
9561 self.op.osparams[name] = value
9563 def _RevertToDefaults(self, cluster):
9564 """Revert the instance parameters to the default values.
9568 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9569 for name in self.op.hvparams.keys():
9570 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9571 del self.op.hvparams[name]
9573 be_defs = cluster.SimpleFillBE({})
9574 for name in self.op.beparams.keys():
9575 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9576 del self.op.beparams[name]
9578 nic_defs = cluster.SimpleFillNIC({})
9579 for nic in self.op.nics:
9580 for name in constants.NICS_PARAMETERS:
9581 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9584 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9585 for name in self.op.osparams.keys():
9586 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9587 del self.op.osparams[name]
9589 def _CalculateFileStorageDir(self):
9590 """Calculate final instance file storage dir.
9593 # file storage dir calculation/check
9594 self.instance_file_storage_dir = None
9595 if self.op.disk_template in constants.DTS_FILEBASED:
9596 # build the full file storage dir path
9599 if self.op.disk_template == constants.DT_SHARED_FILE:
9600 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9602 get_fsd_fn = self.cfg.GetFileStorageDir
9604 cfg_storagedir = get_fsd_fn()
9605 if not cfg_storagedir:
9606 raise errors.OpPrereqError("Cluster file storage dir not defined")
9607 joinargs.append(cfg_storagedir)
9609 if self.op.file_storage_dir is not None:
9610 joinargs.append(self.op.file_storage_dir)
9612 joinargs.append(self.op.instance_name)
9614 # pylint: disable=W0142
9615 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9617 def CheckPrereq(self): # pylint: disable=R0914
9618 """Check prerequisites.
9621 self._CalculateFileStorageDir()
9623 if self.op.mode == constants.INSTANCE_IMPORT:
9624 export_info = self._ReadExportInfo()
9625 self._ReadExportParams(export_info)
9627 if (not self.cfg.GetVGName() and
9628 self.op.disk_template not in constants.DTS_NOT_LVM):
9629 raise errors.OpPrereqError("Cluster does not support lvm-based"
9630 " instances", errors.ECODE_STATE)
9632 if (self.op.hypervisor is None or
9633 self.op.hypervisor == constants.VALUE_AUTO):
9634 self.op.hypervisor = self.cfg.GetHypervisorType()
9636 cluster = self.cfg.GetClusterInfo()
9637 enabled_hvs = cluster.enabled_hypervisors
9638 if self.op.hypervisor not in enabled_hvs:
9639 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9640 " cluster (%s)" % (self.op.hypervisor,
9641 ",".join(enabled_hvs)),
9644 # Check tag validity
9645 for tag in self.op.tags:
9646 objects.TaggableObject.ValidateTag(tag)
9648 # check hypervisor parameter syntax (locally)
9649 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9650 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9652 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9653 hv_type.CheckParameterSyntax(filled_hvp)
9654 self.hv_full = filled_hvp
9655 # check that we don't specify global parameters on an instance
9656 _CheckGlobalHvParams(self.op.hvparams)
9658 # fill and remember the beparams dict
9659 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9660 for param, value in self.op.beparams.iteritems():
9661 if value == constants.VALUE_AUTO:
9662 self.op.beparams[param] = default_beparams[param]
9663 objects.UpgradeBeParams(self.op.beparams)
9664 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9665 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9667 # build os parameters
9668 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9670 # now that hvp/bep are in final format, let's reset to defaults,
9672 if self.op.identify_defaults:
9673 self._RevertToDefaults(cluster)
9677 for idx, nic in enumerate(self.op.nics):
9678 nic_mode_req = nic.get(constants.INIC_MODE, None)
9679 nic_mode = nic_mode_req
9680 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9681 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9683 # in routed mode, for the first nic, the default ip is 'auto'
9684 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9685 default_ip_mode = constants.VALUE_AUTO
9687 default_ip_mode = constants.VALUE_NONE
9689 # ip validity checks
9690 ip = nic.get(constants.INIC_IP, default_ip_mode)
9691 if ip is None or ip.lower() == constants.VALUE_NONE:
9693 elif ip.lower() == constants.VALUE_AUTO:
9694 if not self.op.name_check:
9695 raise errors.OpPrereqError("IP address set to auto but name checks"
9696 " have been skipped",
9698 nic_ip = self.hostname1.ip
9700 if not netutils.IPAddress.IsValid(ip):
9701 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9705 # TODO: check the ip address for uniqueness
9706 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9707 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9710 # MAC address verification
9711 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9712 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9713 mac = utils.NormalizeAndValidateMac(mac)
9716 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9717 except errors.ReservationError:
9718 raise errors.OpPrereqError("MAC address %s already in use"
9719 " in cluster" % mac,
9720 errors.ECODE_NOTUNIQUE)
9722 # Build nic parameters
9723 link = nic.get(constants.INIC_LINK, None)
9724 if link == constants.VALUE_AUTO:
9725 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9728 nicparams[constants.NIC_MODE] = nic_mode
9730 nicparams[constants.NIC_LINK] = link
9732 check_params = cluster.SimpleFillNIC(nicparams)
9733 objects.NIC.CheckParameterSyntax(check_params)
9734 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9736 # disk checks/pre-build
9737 default_vg = self.cfg.GetVGName()
9739 for disk in self.op.disks:
9740 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9741 if mode not in constants.DISK_ACCESS_SET:
9742 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9743 mode, errors.ECODE_INVAL)
9744 size = disk.get(constants.IDISK_SIZE, None)
9746 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9749 except (TypeError, ValueError):
9750 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9753 data_vg = disk.get(constants.IDISK_VG, default_vg)
9755 constants.IDISK_SIZE: size,
9756 constants.IDISK_MODE: mode,
9757 constants.IDISK_VG: data_vg,
9759 if constants.IDISK_METAVG in disk:
9760 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9761 if constants.IDISK_ADOPT in disk:
9762 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9763 self.disks.append(new_disk)
9765 if self.op.mode == constants.INSTANCE_IMPORT:
9767 for idx in range(len(self.disks)):
9768 option = "disk%d_dump" % idx
9769 if export_info.has_option(constants.INISECT_INS, option):
9770 # FIXME: are the old os-es, disk sizes, etc. useful?
9771 export_name = export_info.get(constants.INISECT_INS, option)
9772 image = utils.PathJoin(self.op.src_path, export_name)
9773 disk_images.append(image)
9775 disk_images.append(False)
9777 self.src_images = disk_images
9779 old_name = export_info.get(constants.INISECT_INS, "name")
9780 if self.op.instance_name == old_name:
9781 for idx, nic in enumerate(self.nics):
9782 if nic.mac == constants.VALUE_AUTO:
9783 nic_mac_ini = "nic%d_mac" % idx
9784 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9786 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9788 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9789 if self.op.ip_check:
9790 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9791 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9792 (self.check_ip, self.op.instance_name),
9793 errors.ECODE_NOTUNIQUE)
9795 #### mac address generation
9796 # By generating here the mac address both the allocator and the hooks get
9797 # the real final mac address rather than the 'auto' or 'generate' value.
9798 # There is a race condition between the generation and the instance object
9799 # creation, which means that we know the mac is valid now, but we're not
9800 # sure it will be when we actually add the instance. If things go bad
9801 # adding the instance will abort because of a duplicate mac, and the
9802 # creation job will fail.
9803 for nic in self.nics:
9804 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9805 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9809 if self.op.iallocator is not None:
9810 self._RunAllocator()
9812 # Release all unneeded node locks
9813 _ReleaseLocks(self, locking.LEVEL_NODE,
9814 keep=filter(None, [self.op.pnode, self.op.snode,
9816 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9817 keep=filter(None, [self.op.pnode, self.op.snode,
9820 #### node related checks
9822 # check primary node
9823 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9824 assert self.pnode is not None, \
9825 "Cannot retrieve locked node %s" % self.op.pnode
9827 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9828 pnode.name, errors.ECODE_STATE)
9830 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9831 pnode.name, errors.ECODE_STATE)
9832 if not pnode.vm_capable:
9833 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9834 " '%s'" % pnode.name, errors.ECODE_STATE)
9836 self.secondaries = []
9838 # mirror node verification
9839 if self.op.disk_template in constants.DTS_INT_MIRROR:
9840 if self.op.snode == pnode.name:
9841 raise errors.OpPrereqError("The secondary node cannot be the"
9842 " primary node", errors.ECODE_INVAL)
9843 _CheckNodeOnline(self, self.op.snode)
9844 _CheckNodeNotDrained(self, self.op.snode)
9845 _CheckNodeVmCapable(self, self.op.snode)
9846 self.secondaries.append(self.op.snode)
9848 snode = self.cfg.GetNodeInfo(self.op.snode)
9849 if pnode.group != snode.group:
9850 self.LogWarning("The primary and secondary nodes are in two"
9851 " different node groups; the disk parameters"
9852 " from the first disk's node group will be"
9855 nodenames = [pnode.name] + self.secondaries
9857 # Verify instance specs
9859 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9860 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9861 constants.ISPEC_DISK_COUNT: len(self.disks),
9862 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9863 constants.ISPEC_NIC_COUNT: len(self.nics),
9866 group_info = self.cfg.GetNodeGroup(pnode.group)
9867 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9868 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9869 if not self.op.ignore_ipolicy and res:
9870 raise errors.OpPrereqError(("Instance allocation to group %s violates"
9871 " policy: %s") % (pnode.group,
9872 utils.CommaJoin(res)),
9875 # disk parameters (not customizable at instance or node level)
9876 # just use the primary node parameters, ignoring the secondary.
9877 self.diskparams = group_info.diskparams
9879 if not self.adopt_disks:
9880 if self.op.disk_template == constants.DT_RBD:
9881 # _CheckRADOSFreeSpace() is just a placeholder.
9882 # Any function that checks prerequisites can be placed here.
9883 # Check if there is enough space on the RADOS cluster.
9884 _CheckRADOSFreeSpace()
9886 # Check lv size requirements, if not adopting
9887 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9888 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9890 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9891 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9892 disk[constants.IDISK_ADOPT])
9893 for disk in self.disks])
9894 if len(all_lvs) != len(self.disks):
9895 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9897 for lv_name in all_lvs:
9899 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9900 # to ReserveLV uses the same syntax
9901 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9902 except errors.ReservationError:
9903 raise errors.OpPrereqError("LV named %s used by another instance" %
9904 lv_name, errors.ECODE_NOTUNIQUE)
9906 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9907 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9909 node_lvs = self.rpc.call_lv_list([pnode.name],
9910 vg_names.payload.keys())[pnode.name]
9911 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9912 node_lvs = node_lvs.payload
9914 delta = all_lvs.difference(node_lvs.keys())
9916 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9917 utils.CommaJoin(delta),
9919 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9921 raise errors.OpPrereqError("Online logical volumes found, cannot"
9922 " adopt: %s" % utils.CommaJoin(online_lvs),
9924 # update the size of disk based on what is found
9925 for dsk in self.disks:
9926 dsk[constants.IDISK_SIZE] = \
9927 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9928 dsk[constants.IDISK_ADOPT])][0]))
9930 elif self.op.disk_template == constants.DT_BLOCK:
9931 # Normalize and de-duplicate device paths
9932 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9933 for disk in self.disks])
9934 if len(all_disks) != len(self.disks):
9935 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9937 baddisks = [d for d in all_disks
9938 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9940 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9941 " cannot be adopted" %
9942 (", ".join(baddisks),
9943 constants.ADOPTABLE_BLOCKDEV_ROOT),
9946 node_disks = self.rpc.call_bdev_sizes([pnode.name],
9947 list(all_disks))[pnode.name]
9948 node_disks.Raise("Cannot get block device information from node %s" %
9950 node_disks = node_disks.payload
9951 delta = all_disks.difference(node_disks.keys())
9953 raise errors.OpPrereqError("Missing block device(s): %s" %
9954 utils.CommaJoin(delta),
9956 for dsk in self.disks:
9957 dsk[constants.IDISK_SIZE] = \
9958 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9960 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9962 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9963 # check OS parameters (remotely)
9964 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9966 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9968 # memory check on primary node
9969 #TODO(dynmem): use MINMEM for checking
9971 _CheckNodeFreeMemory(self, self.pnode.name,
9972 "creating instance %s" % self.op.instance_name,
9973 self.be_full[constants.BE_MAXMEM],
9976 self.dry_run_result = list(nodenames)
9978 def Exec(self, feedback_fn):
9979 """Create and add the instance to the cluster.
9982 instance = self.op.instance_name
9983 pnode_name = self.pnode.name
9985 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9986 self.owned_locks(locking.LEVEL_NODE)), \
9987 "Node locks differ from node resource locks"
9989 ht_kind = self.op.hypervisor
9990 if ht_kind in constants.HTS_REQ_PORT:
9991 network_port = self.cfg.AllocatePort()
9995 disks = _GenerateDiskTemplate(self,
9996 self.op.disk_template,
9997 instance, pnode_name,
10000 self.instance_file_storage_dir,
10001 self.op.file_driver,
10006 iobj = objects.Instance(name=instance, os=self.op.os_type,
10007 primary_node=pnode_name,
10008 nics=self.nics, disks=disks,
10009 disk_template=self.op.disk_template,
10010 admin_state=constants.ADMINST_DOWN,
10011 network_port=network_port,
10012 beparams=self.op.beparams,
10013 hvparams=self.op.hvparams,
10014 hypervisor=self.op.hypervisor,
10015 osparams=self.op.osparams,
10019 for tag in self.op.tags:
10022 if self.adopt_disks:
10023 if self.op.disk_template == constants.DT_PLAIN:
10024 # rename LVs to the newly-generated names; we need to construct
10025 # 'fake' LV disks with the old data, plus the new unique_id
10026 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10028 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10029 rename_to.append(t_dsk.logical_id)
10030 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10031 self.cfg.SetDiskID(t_dsk, pnode_name)
10032 result = self.rpc.call_blockdev_rename(pnode_name,
10033 zip(tmp_disks, rename_to))
10034 result.Raise("Failed to rename adoped LVs")
10036 feedback_fn("* creating instance disks...")
10038 _CreateDisks(self, iobj)
10039 except errors.OpExecError:
10040 self.LogWarning("Device creation failed, reverting...")
10042 _RemoveDisks(self, iobj)
10044 self.cfg.ReleaseDRBDMinors(instance)
10047 feedback_fn("adding instance %s to cluster config" % instance)
10049 self.cfg.AddInstance(iobj, self.proc.GetECId())
10051 # Declare that we don't want to remove the instance lock anymore, as we've
10052 # added the instance to the config
10053 del self.remove_locks[locking.LEVEL_INSTANCE]
10055 if self.op.mode == constants.INSTANCE_IMPORT:
10056 # Release unused nodes
10057 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10059 # Release all nodes
10060 _ReleaseLocks(self, locking.LEVEL_NODE)
10063 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10064 feedback_fn("* wiping instance disks...")
10066 _WipeDisks(self, iobj)
10067 except errors.OpExecError, err:
10068 logging.exception("Wiping disks failed")
10069 self.LogWarning("Wiping instance disks failed (%s)", err)
10073 # Something is already wrong with the disks, don't do anything else
10075 elif self.op.wait_for_sync:
10076 disk_abort = not _WaitForSync(self, iobj)
10077 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10078 # make sure the disks are not degraded (still sync-ing is ok)
10079 feedback_fn("* checking mirrors status")
10080 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10085 _RemoveDisks(self, iobj)
10086 self.cfg.RemoveInstance(iobj.name)
10087 # Make sure the instance lock gets removed
10088 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10089 raise errors.OpExecError("There are some degraded disks for"
10092 # Release all node resource locks
10093 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10095 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10096 if self.op.mode == constants.INSTANCE_CREATE:
10097 if not self.op.no_install:
10098 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10099 not self.op.wait_for_sync)
10101 feedback_fn("* pausing disk sync to install instance OS")
10102 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10104 for idx, success in enumerate(result.payload):
10106 logging.warn("pause-sync of instance %s for disk %d failed",
10109 feedback_fn("* running the instance OS create scripts...")
10110 # FIXME: pass debug option from opcode to backend
10112 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10113 self.op.debug_level)
10115 feedback_fn("* resuming disk sync")
10116 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10118 for idx, success in enumerate(result.payload):
10120 logging.warn("resume-sync of instance %s for disk %d failed",
10123 os_add_result.Raise("Could not add os for instance %s"
10124 " on node %s" % (instance, pnode_name))
10126 elif self.op.mode == constants.INSTANCE_IMPORT:
10127 feedback_fn("* running the instance OS import scripts...")
10131 for idx, image in enumerate(self.src_images):
10135 # FIXME: pass debug option from opcode to backend
10136 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10137 constants.IEIO_FILE, (image, ),
10138 constants.IEIO_SCRIPT,
10139 (iobj.disks[idx], idx),
10141 transfers.append(dt)
10144 masterd.instance.TransferInstanceData(self, feedback_fn,
10145 self.op.src_node, pnode_name,
10146 self.pnode.secondary_ip,
10148 if not compat.all(import_result):
10149 self.LogWarning("Some disks for instance %s on node %s were not"
10150 " imported successfully" % (instance, pnode_name))
10152 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10153 feedback_fn("* preparing remote import...")
10154 # The source cluster will stop the instance before attempting to make a
10155 # connection. In some cases stopping an instance can take a long time,
10156 # hence the shutdown timeout is added to the connection timeout.
10157 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10158 self.op.source_shutdown_timeout)
10159 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10161 assert iobj.primary_node == self.pnode.name
10163 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10164 self.source_x509_ca,
10165 self._cds, timeouts)
10166 if not compat.all(disk_results):
10167 # TODO: Should the instance still be started, even if some disks
10168 # failed to import (valid for local imports, too)?
10169 self.LogWarning("Some disks for instance %s on node %s were not"
10170 " imported successfully" % (instance, pnode_name))
10172 # Run rename script on newly imported instance
10173 assert iobj.name == instance
10174 feedback_fn("Running rename script for %s" % instance)
10175 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10176 self.source_instance_name,
10177 self.op.debug_level)
10178 if result.fail_msg:
10179 self.LogWarning("Failed to run rename script for %s on node"
10180 " %s: %s" % (instance, pnode_name, result.fail_msg))
10183 # also checked in the prereq part
10184 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10187 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10190 iobj.admin_state = constants.ADMINST_UP
10191 self.cfg.Update(iobj, feedback_fn)
10192 logging.info("Starting instance %s on node %s", instance, pnode_name)
10193 feedback_fn("* starting instance...")
10194 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10196 result.Raise("Could not start instance")
10198 return list(iobj.all_nodes)
10201 def _CheckRADOSFreeSpace():
10202 """Compute disk size requirements inside the RADOS cluster.
10205 # For the RADOS cluster we assume there is always enough space.
10209 class LUInstanceConsole(NoHooksLU):
10210 """Connect to an instance's console.
10212 This is somewhat special in that it returns the command line that
10213 you need to run on the master node in order to connect to the
10219 def ExpandNames(self):
10220 self.share_locks = _ShareAll()
10221 self._ExpandAndLockInstance()
10223 def CheckPrereq(self):
10224 """Check prerequisites.
10226 This checks that the instance is in the cluster.
10229 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10230 assert self.instance is not None, \
10231 "Cannot retrieve locked instance %s" % self.op.instance_name
10232 _CheckNodeOnline(self, self.instance.primary_node)
10234 def Exec(self, feedback_fn):
10235 """Connect to the console of an instance
10238 instance = self.instance
10239 node = instance.primary_node
10241 node_insts = self.rpc.call_instance_list([node],
10242 [instance.hypervisor])[node]
10243 node_insts.Raise("Can't get node information from %s" % node)
10245 if instance.name not in node_insts.payload:
10246 if instance.admin_state == constants.ADMINST_UP:
10247 state = constants.INSTST_ERRORDOWN
10248 elif instance.admin_state == constants.ADMINST_DOWN:
10249 state = constants.INSTST_ADMINDOWN
10251 state = constants.INSTST_ADMINOFFLINE
10252 raise errors.OpExecError("Instance %s is not running (state %s)" %
10253 (instance.name, state))
10255 logging.debug("Connecting to console of %s on %s", instance.name, node)
10257 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10260 def _GetInstanceConsole(cluster, instance):
10261 """Returns console information for an instance.
10263 @type cluster: L{objects.Cluster}
10264 @type instance: L{objects.Instance}
10268 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10269 # beparams and hvparams are passed separately, to avoid editing the
10270 # instance and then saving the defaults in the instance itself.
10271 hvparams = cluster.FillHV(instance)
10272 beparams = cluster.FillBE(instance)
10273 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10275 assert console.instance == instance.name
10276 assert console.Validate()
10278 return console.ToDict()
10281 class LUInstanceReplaceDisks(LogicalUnit):
10282 """Replace the disks of an instance.
10285 HPATH = "mirrors-replace"
10286 HTYPE = constants.HTYPE_INSTANCE
10289 def CheckArguments(self):
10290 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10291 self.op.iallocator)
10293 def ExpandNames(self):
10294 self._ExpandAndLockInstance()
10296 assert locking.LEVEL_NODE not in self.needed_locks
10297 assert locking.LEVEL_NODE_RES not in self.needed_locks
10298 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10300 assert self.op.iallocator is None or self.op.remote_node is None, \
10301 "Conflicting options"
10303 if self.op.remote_node is not None:
10304 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10306 # Warning: do not remove the locking of the new secondary here
10307 # unless DRBD8.AddChildren is changed to work in parallel;
10308 # currently it doesn't since parallel invocations of
10309 # FindUnusedMinor will conflict
10310 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10311 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10313 self.needed_locks[locking.LEVEL_NODE] = []
10314 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10316 if self.op.iallocator is not None:
10317 # iallocator will select a new node in the same group
10318 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10320 self.needed_locks[locking.LEVEL_NODE_RES] = []
10322 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10323 self.op.iallocator, self.op.remote_node,
10324 self.op.disks, False, self.op.early_release,
10325 self.op.ignore_ipolicy)
10327 self.tasklets = [self.replacer]
10329 def DeclareLocks(self, level):
10330 if level == locking.LEVEL_NODEGROUP:
10331 assert self.op.remote_node is None
10332 assert self.op.iallocator is not None
10333 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10335 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10336 # Lock all groups used by instance optimistically; this requires going
10337 # via the node before it's locked, requiring verification later on
10338 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10339 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10341 elif level == locking.LEVEL_NODE:
10342 if self.op.iallocator is not None:
10343 assert self.op.remote_node is None
10344 assert not self.needed_locks[locking.LEVEL_NODE]
10346 # Lock member nodes of all locked groups
10347 self.needed_locks[locking.LEVEL_NODE] = [node_name
10348 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10349 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10351 self._LockInstancesNodes()
10352 elif level == locking.LEVEL_NODE_RES:
10354 self.needed_locks[locking.LEVEL_NODE_RES] = \
10355 self.needed_locks[locking.LEVEL_NODE]
10357 def BuildHooksEnv(self):
10358 """Build hooks env.
10360 This runs on the master, the primary and all the secondaries.
10363 instance = self.replacer.instance
10365 "MODE": self.op.mode,
10366 "NEW_SECONDARY": self.op.remote_node,
10367 "OLD_SECONDARY": instance.secondary_nodes[0],
10369 env.update(_BuildInstanceHookEnvByObject(self, instance))
10372 def BuildHooksNodes(self):
10373 """Build hooks nodes.
10376 instance = self.replacer.instance
10378 self.cfg.GetMasterNode(),
10379 instance.primary_node,
10381 if self.op.remote_node is not None:
10382 nl.append(self.op.remote_node)
10385 def CheckPrereq(self):
10386 """Check prerequisites.
10389 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10390 self.op.iallocator is None)
10392 # Verify if node group locks are still correct
10393 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10395 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10397 return LogicalUnit.CheckPrereq(self)
10400 class TLReplaceDisks(Tasklet):
10401 """Replaces disks for an instance.
10403 Note: Locking is not within the scope of this class.
10406 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10407 disks, delay_iallocator, early_release, ignore_ipolicy):
10408 """Initializes this class.
10411 Tasklet.__init__(self, lu)
10414 self.instance_name = instance_name
10416 self.iallocator_name = iallocator_name
10417 self.remote_node = remote_node
10419 self.delay_iallocator = delay_iallocator
10420 self.early_release = early_release
10421 self.ignore_ipolicy = ignore_ipolicy
10424 self.instance = None
10425 self.new_node = None
10426 self.target_node = None
10427 self.other_node = None
10428 self.remote_node_info = None
10429 self.node_secondary_ip = None
10432 def CheckArguments(mode, remote_node, iallocator):
10433 """Helper function for users of this class.
10436 # check for valid parameter combination
10437 if mode == constants.REPLACE_DISK_CHG:
10438 if remote_node is None and iallocator is None:
10439 raise errors.OpPrereqError("When changing the secondary either an"
10440 " iallocator script must be used or the"
10441 " new node given", errors.ECODE_INVAL)
10443 if remote_node is not None and iallocator is not None:
10444 raise errors.OpPrereqError("Give either the iallocator or the new"
10445 " secondary, not both", errors.ECODE_INVAL)
10447 elif remote_node is not None or iallocator is not None:
10448 # Not replacing the secondary
10449 raise errors.OpPrereqError("The iallocator and new node options can"
10450 " only be used when changing the"
10451 " secondary node", errors.ECODE_INVAL)
10454 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10455 """Compute a new secondary node using an IAllocator.
10458 ial = IAllocator(lu.cfg, lu.rpc,
10459 mode=constants.IALLOCATOR_MODE_RELOC,
10460 name=instance_name,
10461 relocate_from=list(relocate_from))
10463 ial.Run(iallocator_name)
10465 if not ial.success:
10466 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10467 " %s" % (iallocator_name, ial.info),
10468 errors.ECODE_NORES)
10470 if len(ial.result) != ial.required_nodes:
10471 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10472 " of nodes (%s), required %s" %
10474 len(ial.result), ial.required_nodes),
10475 errors.ECODE_FAULT)
10477 remote_node_name = ial.result[0]
10479 lu.LogInfo("Selected new secondary for instance '%s': %s",
10480 instance_name, remote_node_name)
10482 return remote_node_name
10484 def _FindFaultyDisks(self, node_name):
10485 """Wrapper for L{_FindFaultyInstanceDisks}.
10488 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10491 def _CheckDisksActivated(self, instance):
10492 """Checks if the instance disks are activated.
10494 @param instance: The instance to check disks
10495 @return: True if they are activated, False otherwise
10498 nodes = instance.all_nodes
10500 for idx, dev in enumerate(instance.disks):
10502 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10503 self.cfg.SetDiskID(dev, node)
10505 result = self.rpc.call_blockdev_find(node, dev)
10509 elif result.fail_msg or not result.payload:
10514 def CheckPrereq(self):
10515 """Check prerequisites.
10517 This checks that the instance is in the cluster.
10520 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10521 assert instance is not None, \
10522 "Cannot retrieve locked instance %s" % self.instance_name
10524 if instance.disk_template != constants.DT_DRBD8:
10525 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10526 " instances", errors.ECODE_INVAL)
10528 if len(instance.secondary_nodes) != 1:
10529 raise errors.OpPrereqError("The instance has a strange layout,"
10530 " expected one secondary but found %d" %
10531 len(instance.secondary_nodes),
10532 errors.ECODE_FAULT)
10534 if not self.delay_iallocator:
10535 self._CheckPrereq2()
10537 def _CheckPrereq2(self):
10538 """Check prerequisites, second part.
10540 This function should always be part of CheckPrereq. It was separated and is
10541 now called from Exec because during node evacuation iallocator was only
10542 called with an unmodified cluster model, not taking planned changes into
10546 instance = self.instance
10547 secondary_node = instance.secondary_nodes[0]
10549 if self.iallocator_name is None:
10550 remote_node = self.remote_node
10552 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10553 instance.name, instance.secondary_nodes)
10555 if remote_node is None:
10556 self.remote_node_info = None
10558 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10559 "Remote node '%s' is not locked" % remote_node
10561 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10562 assert self.remote_node_info is not None, \
10563 "Cannot retrieve locked node %s" % remote_node
10565 if remote_node == self.instance.primary_node:
10566 raise errors.OpPrereqError("The specified node is the primary node of"
10567 " the instance", errors.ECODE_INVAL)
10569 if remote_node == secondary_node:
10570 raise errors.OpPrereqError("The specified node is already the"
10571 " secondary node of the instance",
10572 errors.ECODE_INVAL)
10574 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10575 constants.REPLACE_DISK_CHG):
10576 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10577 errors.ECODE_INVAL)
10579 if self.mode == constants.REPLACE_DISK_AUTO:
10580 if not self._CheckDisksActivated(instance):
10581 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10582 " first" % self.instance_name,
10583 errors.ECODE_STATE)
10584 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10585 faulty_secondary = self._FindFaultyDisks(secondary_node)
10587 if faulty_primary and faulty_secondary:
10588 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10589 " one node and can not be repaired"
10590 " automatically" % self.instance_name,
10591 errors.ECODE_STATE)
10594 self.disks = faulty_primary
10595 self.target_node = instance.primary_node
10596 self.other_node = secondary_node
10597 check_nodes = [self.target_node, self.other_node]
10598 elif faulty_secondary:
10599 self.disks = faulty_secondary
10600 self.target_node = secondary_node
10601 self.other_node = instance.primary_node
10602 check_nodes = [self.target_node, self.other_node]
10608 # Non-automatic modes
10609 if self.mode == constants.REPLACE_DISK_PRI:
10610 self.target_node = instance.primary_node
10611 self.other_node = secondary_node
10612 check_nodes = [self.target_node, self.other_node]
10614 elif self.mode == constants.REPLACE_DISK_SEC:
10615 self.target_node = secondary_node
10616 self.other_node = instance.primary_node
10617 check_nodes = [self.target_node, self.other_node]
10619 elif self.mode == constants.REPLACE_DISK_CHG:
10620 self.new_node = remote_node
10621 self.other_node = instance.primary_node
10622 self.target_node = secondary_node
10623 check_nodes = [self.new_node, self.other_node]
10625 _CheckNodeNotDrained(self.lu, remote_node)
10626 _CheckNodeVmCapable(self.lu, remote_node)
10628 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10629 assert old_node_info is not None
10630 if old_node_info.offline and not self.early_release:
10631 # doesn't make sense to delay the release
10632 self.early_release = True
10633 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10634 " early-release mode", secondary_node)
10637 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10640 # If not specified all disks should be replaced
10642 self.disks = range(len(self.instance.disks))
10644 # TODO: This is ugly, but right now we can't distinguish between internal
10645 # submitted opcode and external one. We should fix that.
10646 if self.remote_node_info:
10647 # We change the node, lets verify it still meets instance policy
10648 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10649 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10651 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10652 ignore=self.ignore_ipolicy)
10654 # TODO: compute disk parameters
10655 primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10656 secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10657 if primary_node_info.group != secondary_node_info.group:
10658 self.lu.LogInfo("The instance primary and secondary nodes are in two"
10659 " different node groups; the disk parameters of the"
10660 " primary node's group will be applied.")
10662 self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10664 for node in check_nodes:
10665 _CheckNodeOnline(self.lu, node)
10667 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10670 if node_name is not None)
10672 # Release unneeded node and node resource locks
10673 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10674 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10676 # Release any owned node group
10677 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10678 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10680 # Check whether disks are valid
10681 for disk_idx in self.disks:
10682 instance.FindDisk(disk_idx)
10684 # Get secondary node IP addresses
10685 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10686 in self.cfg.GetMultiNodeInfo(touched_nodes))
10688 def Exec(self, feedback_fn):
10689 """Execute disk replacement.
10691 This dispatches the disk replacement to the appropriate handler.
10694 if self.delay_iallocator:
10695 self._CheckPrereq2()
10698 # Verify owned locks before starting operation
10699 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10700 assert set(owned_nodes) == set(self.node_secondary_ip), \
10701 ("Incorrect node locks, owning %s, expected %s" %
10702 (owned_nodes, self.node_secondary_ip.keys()))
10703 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10704 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10706 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10707 assert list(owned_instances) == [self.instance_name], \
10708 "Instance '%s' not locked" % self.instance_name
10710 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10711 "Should not own any node group lock at this point"
10714 feedback_fn("No disks need replacement")
10717 feedback_fn("Replacing disk(s) %s for %s" %
10718 (utils.CommaJoin(self.disks), self.instance.name))
10720 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10722 # Activate the instance disks if we're replacing them on a down instance
10724 _StartInstanceDisks(self.lu, self.instance, True)
10727 # Should we replace the secondary node?
10728 if self.new_node is not None:
10729 fn = self._ExecDrbd8Secondary
10731 fn = self._ExecDrbd8DiskOnly
10733 result = fn(feedback_fn)
10735 # Deactivate the instance disks if we're replacing them on a
10738 _SafeShutdownInstanceDisks(self.lu, self.instance)
10740 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10743 # Verify owned locks
10744 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10745 nodes = frozenset(self.node_secondary_ip)
10746 assert ((self.early_release and not owned_nodes) or
10747 (not self.early_release and not (set(owned_nodes) - nodes))), \
10748 ("Not owning the correct locks, early_release=%s, owned=%r,"
10749 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10753 def _CheckVolumeGroup(self, nodes):
10754 self.lu.LogInfo("Checking volume groups")
10756 vgname = self.cfg.GetVGName()
10758 # Make sure volume group exists on all involved nodes
10759 results = self.rpc.call_vg_list(nodes)
10761 raise errors.OpExecError("Can't list volume groups on the nodes")
10764 res = results[node]
10765 res.Raise("Error checking node %s" % node)
10766 if vgname not in res.payload:
10767 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10770 def _CheckDisksExistence(self, nodes):
10771 # Check disk existence
10772 for idx, dev in enumerate(self.instance.disks):
10773 if idx not in self.disks:
10777 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10778 self.cfg.SetDiskID(dev, node)
10780 result = self.rpc.call_blockdev_find(node, dev)
10782 msg = result.fail_msg
10783 if msg or not result.payload:
10785 msg = "disk not found"
10786 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10789 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10790 for idx, dev in enumerate(self.instance.disks):
10791 if idx not in self.disks:
10794 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10797 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10799 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10800 " replace disks for instance %s" %
10801 (node_name, self.instance.name))
10803 def _CreateNewStorage(self, node_name):
10804 """Create new storage on the primary or secondary node.
10806 This is only used for same-node replaces, not for changing the
10807 secondary node, hence we don't want to modify the existing disk.
10812 for idx, dev in enumerate(self.instance.disks):
10813 if idx not in self.disks:
10816 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10818 self.cfg.SetDiskID(dev, node_name)
10820 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10821 names = _GenerateUniqueNames(self.lu, lv_names)
10823 _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10825 vg_data = dev.children[0].logical_id[0]
10826 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10827 logical_id=(vg_data, names[0]), params=data_p)
10828 vg_meta = dev.children[1].logical_id[0]
10829 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10830 logical_id=(vg_meta, names[1]), params=meta_p)
10832 new_lvs = [lv_data, lv_meta]
10833 old_lvs = [child.Copy() for child in dev.children]
10834 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10836 # we pass force_create=True to force the LVM creation
10837 for new_lv in new_lvs:
10838 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10839 _GetInstanceInfoText(self.instance), False)
10843 def _CheckDevices(self, node_name, iv_names):
10844 for name, (dev, _, _) in iv_names.iteritems():
10845 self.cfg.SetDiskID(dev, node_name)
10847 result = self.rpc.call_blockdev_find(node_name, dev)
10849 msg = result.fail_msg
10850 if msg or not result.payload:
10852 msg = "disk not found"
10853 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10856 if result.payload.is_degraded:
10857 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10859 def _RemoveOldStorage(self, node_name, iv_names):
10860 for name, (_, old_lvs, _) in iv_names.iteritems():
10861 self.lu.LogInfo("Remove logical volumes for %s" % name)
10864 self.cfg.SetDiskID(lv, node_name)
10866 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10868 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10869 hint="remove unused LVs manually")
10871 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10872 """Replace a disk on the primary or secondary for DRBD 8.
10874 The algorithm for replace is quite complicated:
10876 1. for each disk to be replaced:
10878 1. create new LVs on the target node with unique names
10879 1. detach old LVs from the drbd device
10880 1. rename old LVs to name_replaced.<time_t>
10881 1. rename new LVs to old LVs
10882 1. attach the new LVs (with the old names now) to the drbd device
10884 1. wait for sync across all devices
10886 1. for each modified disk:
10888 1. remove old LVs (which have the name name_replaces.<time_t>)
10890 Failures are not very well handled.
10895 # Step: check device activation
10896 self.lu.LogStep(1, steps_total, "Check device existence")
10897 self._CheckDisksExistence([self.other_node, self.target_node])
10898 self._CheckVolumeGroup([self.target_node, self.other_node])
10900 # Step: check other node consistency
10901 self.lu.LogStep(2, steps_total, "Check peer consistency")
10902 self._CheckDisksConsistency(self.other_node,
10903 self.other_node == self.instance.primary_node,
10906 # Step: create new storage
10907 self.lu.LogStep(3, steps_total, "Allocate new storage")
10908 iv_names = self._CreateNewStorage(self.target_node)
10910 # Step: for each lv, detach+rename*2+attach
10911 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10912 for dev, old_lvs, new_lvs in iv_names.itervalues():
10913 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10915 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10917 result.Raise("Can't detach drbd from local storage on node"
10918 " %s for device %s" % (self.target_node, dev.iv_name))
10920 #cfg.Update(instance)
10922 # ok, we created the new LVs, so now we know we have the needed
10923 # storage; as such, we proceed on the target node to rename
10924 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10925 # using the assumption that logical_id == physical_id (which in
10926 # turn is the unique_id on that node)
10928 # FIXME(iustin): use a better name for the replaced LVs
10929 temp_suffix = int(time.time())
10930 ren_fn = lambda d, suff: (d.physical_id[0],
10931 d.physical_id[1] + "_replaced-%s" % suff)
10933 # Build the rename list based on what LVs exist on the node
10934 rename_old_to_new = []
10935 for to_ren in old_lvs:
10936 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10937 if not result.fail_msg and result.payload:
10939 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10941 self.lu.LogInfo("Renaming the old LVs on the target node")
10942 result = self.rpc.call_blockdev_rename(self.target_node,
10944 result.Raise("Can't rename old LVs on node %s" % self.target_node)
10946 # Now we rename the new LVs to the old LVs
10947 self.lu.LogInfo("Renaming the new LVs on the target node")
10948 rename_new_to_old = [(new, old.physical_id)
10949 for old, new in zip(old_lvs, new_lvs)]
10950 result = self.rpc.call_blockdev_rename(self.target_node,
10952 result.Raise("Can't rename new LVs on node %s" % self.target_node)
10954 # Intermediate steps of in memory modifications
10955 for old, new in zip(old_lvs, new_lvs):
10956 new.logical_id = old.logical_id
10957 self.cfg.SetDiskID(new, self.target_node)
10959 # We need to modify old_lvs so that removal later removes the
10960 # right LVs, not the newly added ones; note that old_lvs is a
10962 for disk in old_lvs:
10963 disk.logical_id = ren_fn(disk, temp_suffix)
10964 self.cfg.SetDiskID(disk, self.target_node)
10966 # Now that the new lvs have the old name, we can add them to the device
10967 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10968 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10970 msg = result.fail_msg
10972 for new_lv in new_lvs:
10973 msg2 = self.rpc.call_blockdev_remove(self.target_node,
10976 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10977 hint=("cleanup manually the unused logical"
10979 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10981 cstep = itertools.count(5)
10983 if self.early_release:
10984 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10985 self._RemoveOldStorage(self.target_node, iv_names)
10986 # TODO: Check if releasing locks early still makes sense
10987 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10989 # Release all resource locks except those used by the instance
10990 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10991 keep=self.node_secondary_ip.keys())
10993 # Release all node locks while waiting for sync
10994 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10996 # TODO: Can the instance lock be downgraded here? Take the optional disk
10997 # shutdown in the caller into consideration.
11000 # This can fail as the old devices are degraded and _WaitForSync
11001 # does a combined result over all disks, so we don't check its return value
11002 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11003 _WaitForSync(self.lu, self.instance)
11005 # Check all devices manually
11006 self._CheckDevices(self.instance.primary_node, iv_names)
11008 # Step: remove old storage
11009 if not self.early_release:
11010 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11011 self._RemoveOldStorage(self.target_node, iv_names)
11013 def _ExecDrbd8Secondary(self, feedback_fn):
11014 """Replace the secondary node for DRBD 8.
11016 The algorithm for replace is quite complicated:
11017 - for all disks of the instance:
11018 - create new LVs on the new node with same names
11019 - shutdown the drbd device on the old secondary
11020 - disconnect the drbd network on the primary
11021 - create the drbd device on the new secondary
11022 - network attach the drbd on the primary, using an artifice:
11023 the drbd code for Attach() will connect to the network if it
11024 finds a device which is connected to the good local disks but
11025 not network enabled
11026 - wait for sync across all devices
11027 - remove all disks from the old secondary
11029 Failures are not very well handled.
11034 pnode = self.instance.primary_node
11036 # Step: check device activation
11037 self.lu.LogStep(1, steps_total, "Check device existence")
11038 self._CheckDisksExistence([self.instance.primary_node])
11039 self._CheckVolumeGroup([self.instance.primary_node])
11041 # Step: check other node consistency
11042 self.lu.LogStep(2, steps_total, "Check peer consistency")
11043 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11045 # Step: create new storage
11046 self.lu.LogStep(3, steps_total, "Allocate new storage")
11047 for idx, dev in enumerate(self.instance.disks):
11048 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11049 (self.new_node, idx))
11050 # we pass force_create=True to force LVM creation
11051 for new_lv in dev.children:
11052 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
11053 _GetInstanceInfoText(self.instance), False)
11055 # Step 4: dbrd minors and drbd setups changes
11056 # after this, we must manually remove the drbd minors on both the
11057 # error and the success paths
11058 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11059 minors = self.cfg.AllocateDRBDMinor([self.new_node
11060 for dev in self.instance.disks],
11061 self.instance.name)
11062 logging.debug("Allocated minors %r", minors)
11065 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11066 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11067 (self.new_node, idx))
11068 # create new devices on new_node; note that we create two IDs:
11069 # one without port, so the drbd will be activated without
11070 # networking information on the new node at this stage, and one
11071 # with network, for the latter activation in step 4
11072 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11073 if self.instance.primary_node == o_node1:
11076 assert self.instance.primary_node == o_node2, "Three-node instance?"
11079 new_alone_id = (self.instance.primary_node, self.new_node, None,
11080 p_minor, new_minor, o_secret)
11081 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11082 p_minor, new_minor, o_secret)
11084 iv_names[idx] = (dev, dev.children, new_net_id)
11085 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11087 drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
11088 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11089 logical_id=new_alone_id,
11090 children=dev.children,
11092 params=drbd_params)
11094 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
11095 _GetInstanceInfoText(self.instance), False)
11096 except errors.GenericError:
11097 self.cfg.ReleaseDRBDMinors(self.instance.name)
11100 # We have new devices, shutdown the drbd on the old secondary
11101 for idx, dev in enumerate(self.instance.disks):
11102 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11103 self.cfg.SetDiskID(dev, self.target_node)
11104 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
11106 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11107 "node: %s" % (idx, msg),
11108 hint=("Please cleanup this device manually as"
11109 " soon as possible"))
11111 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11112 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11113 self.instance.disks)[pnode]
11115 msg = result.fail_msg
11117 # detaches didn't succeed (unlikely)
11118 self.cfg.ReleaseDRBDMinors(self.instance.name)
11119 raise errors.OpExecError("Can't detach the disks from the network on"
11120 " old node: %s" % (msg,))
11122 # if we managed to detach at least one, we update all the disks of
11123 # the instance to point to the new secondary
11124 self.lu.LogInfo("Updating instance configuration")
11125 for dev, _, new_logical_id in iv_names.itervalues():
11126 dev.logical_id = new_logical_id
11127 self.cfg.SetDiskID(dev, self.instance.primary_node)
11129 self.cfg.Update(self.instance, feedback_fn)
11131 # Release all node locks (the configuration has been updated)
11132 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11134 # and now perform the drbd attach
11135 self.lu.LogInfo("Attaching primary drbds to new secondary"
11136 " (standalone => connected)")
11137 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11139 self.node_secondary_ip,
11140 self.instance.disks,
11141 self.instance.name,
11143 for to_node, to_result in result.items():
11144 msg = to_result.fail_msg
11146 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11148 hint=("please do a gnt-instance info to see the"
11149 " status of disks"))
11151 cstep = itertools.count(5)
11153 if self.early_release:
11154 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11155 self._RemoveOldStorage(self.target_node, iv_names)
11156 # TODO: Check if releasing locks early still makes sense
11157 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11159 # Release all resource locks except those used by the instance
11160 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11161 keep=self.node_secondary_ip.keys())
11163 # TODO: Can the instance lock be downgraded here? Take the optional disk
11164 # shutdown in the caller into consideration.
11167 # This can fail as the old devices are degraded and _WaitForSync
11168 # does a combined result over all disks, so we don't check its return value
11169 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11170 _WaitForSync(self.lu, self.instance)
11172 # Check all devices manually
11173 self._CheckDevices(self.instance.primary_node, iv_names)
11175 # Step: remove old storage
11176 if not self.early_release:
11177 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11178 self._RemoveOldStorage(self.target_node, iv_names)
11181 class LURepairNodeStorage(NoHooksLU):
11182 """Repairs the volume group on a node.
11187 def CheckArguments(self):
11188 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11190 storage_type = self.op.storage_type
11192 if (constants.SO_FIX_CONSISTENCY not in
11193 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11194 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11195 " repaired" % storage_type,
11196 errors.ECODE_INVAL)
11198 def ExpandNames(self):
11199 self.needed_locks = {
11200 locking.LEVEL_NODE: [self.op.node_name],
11203 def _CheckFaultyDisks(self, instance, node_name):
11204 """Ensure faulty disks abort the opcode or at least warn."""
11206 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11208 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11209 " node '%s'" % (instance.name, node_name),
11210 errors.ECODE_STATE)
11211 except errors.OpPrereqError, err:
11212 if self.op.ignore_consistency:
11213 self.proc.LogWarning(str(err.args[0]))
11217 def CheckPrereq(self):
11218 """Check prerequisites.
11221 # Check whether any instance on this node has faulty disks
11222 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11223 if inst.admin_state != constants.ADMINST_UP:
11225 check_nodes = set(inst.all_nodes)
11226 check_nodes.discard(self.op.node_name)
11227 for inst_node_name in check_nodes:
11228 self._CheckFaultyDisks(inst, inst_node_name)
11230 def Exec(self, feedback_fn):
11231 feedback_fn("Repairing storage unit '%s' on %s ..." %
11232 (self.op.name, self.op.node_name))
11234 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11235 result = self.rpc.call_storage_execute(self.op.node_name,
11236 self.op.storage_type, st_args,
11238 constants.SO_FIX_CONSISTENCY)
11239 result.Raise("Failed to repair storage unit '%s' on %s" %
11240 (self.op.name, self.op.node_name))
11243 class LUNodeEvacuate(NoHooksLU):
11244 """Evacuates instances off a list of nodes.
11249 _MODE2IALLOCATOR = {
11250 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11251 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11252 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11254 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11255 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11256 constants.IALLOCATOR_NEVAC_MODES)
11258 def CheckArguments(self):
11259 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11261 def ExpandNames(self):
11262 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11264 if self.op.remote_node is not None:
11265 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11266 assert self.op.remote_node
11268 if self.op.remote_node == self.op.node_name:
11269 raise errors.OpPrereqError("Can not use evacuated node as a new"
11270 " secondary node", errors.ECODE_INVAL)
11272 if self.op.mode != constants.NODE_EVAC_SEC:
11273 raise errors.OpPrereqError("Without the use of an iallocator only"
11274 " secondary instances can be evacuated",
11275 errors.ECODE_INVAL)
11278 self.share_locks = _ShareAll()
11279 self.needed_locks = {
11280 locking.LEVEL_INSTANCE: [],
11281 locking.LEVEL_NODEGROUP: [],
11282 locking.LEVEL_NODE: [],
11285 # Determine nodes (via group) optimistically, needs verification once locks
11286 # have been acquired
11287 self.lock_nodes = self._DetermineNodes()
11289 def _DetermineNodes(self):
11290 """Gets the list of nodes to operate on.
11293 if self.op.remote_node is None:
11294 # Iallocator will choose any node(s) in the same group
11295 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11297 group_nodes = frozenset([self.op.remote_node])
11299 # Determine nodes to be locked
11300 return set([self.op.node_name]) | group_nodes
11302 def _DetermineInstances(self):
11303 """Builds list of instances to operate on.
11306 assert self.op.mode in constants.NODE_EVAC_MODES
11308 if self.op.mode == constants.NODE_EVAC_PRI:
11309 # Primary instances only
11310 inst_fn = _GetNodePrimaryInstances
11311 assert self.op.remote_node is None, \
11312 "Evacuating primary instances requires iallocator"
11313 elif self.op.mode == constants.NODE_EVAC_SEC:
11314 # Secondary instances only
11315 inst_fn = _GetNodeSecondaryInstances
11318 assert self.op.mode == constants.NODE_EVAC_ALL
11319 inst_fn = _GetNodeInstances
11320 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11322 raise errors.OpPrereqError("Due to an issue with the iallocator"
11323 " interface it is not possible to evacuate"
11324 " all instances at once; specify explicitly"
11325 " whether to evacuate primary or secondary"
11327 errors.ECODE_INVAL)
11329 return inst_fn(self.cfg, self.op.node_name)
11331 def DeclareLocks(self, level):
11332 if level == locking.LEVEL_INSTANCE:
11333 # Lock instances optimistically, needs verification once node and group
11334 # locks have been acquired
11335 self.needed_locks[locking.LEVEL_INSTANCE] = \
11336 set(i.name for i in self._DetermineInstances())
11338 elif level == locking.LEVEL_NODEGROUP:
11339 # Lock node groups for all potential target nodes optimistically, needs
11340 # verification once nodes have been acquired
11341 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11342 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11344 elif level == locking.LEVEL_NODE:
11345 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11347 def CheckPrereq(self):
11349 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11350 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11351 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11353 need_nodes = self._DetermineNodes()
11355 if not owned_nodes.issuperset(need_nodes):
11356 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11357 " locks were acquired, current nodes are"
11358 " are '%s', used to be '%s'; retry the"
11360 (self.op.node_name,
11361 utils.CommaJoin(need_nodes),
11362 utils.CommaJoin(owned_nodes)),
11363 errors.ECODE_STATE)
11365 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11366 if owned_groups != wanted_groups:
11367 raise errors.OpExecError("Node groups changed since locks were acquired,"
11368 " current groups are '%s', used to be '%s';"
11369 " retry the operation" %
11370 (utils.CommaJoin(wanted_groups),
11371 utils.CommaJoin(owned_groups)))
11373 # Determine affected instances
11374 self.instances = self._DetermineInstances()
11375 self.instance_names = [i.name for i in self.instances]
11377 if set(self.instance_names) != owned_instances:
11378 raise errors.OpExecError("Instances on node '%s' changed since locks"
11379 " were acquired, current instances are '%s',"
11380 " used to be '%s'; retry the operation" %
11381 (self.op.node_name,
11382 utils.CommaJoin(self.instance_names),
11383 utils.CommaJoin(owned_instances)))
11385 if self.instance_names:
11386 self.LogInfo("Evacuating instances from node '%s': %s",
11388 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11390 self.LogInfo("No instances to evacuate from node '%s'",
11393 if self.op.remote_node is not None:
11394 for i in self.instances:
11395 if i.primary_node == self.op.remote_node:
11396 raise errors.OpPrereqError("Node %s is the primary node of"
11397 " instance %s, cannot use it as"
11399 (self.op.remote_node, i.name),
11400 errors.ECODE_INVAL)
11402 def Exec(self, feedback_fn):
11403 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11405 if not self.instance_names:
11406 # No instances to evacuate
11409 elif self.op.iallocator is not None:
11410 # TODO: Implement relocation to other group
11411 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11412 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11413 instances=list(self.instance_names))
11415 ial.Run(self.op.iallocator)
11417 if not ial.success:
11418 raise errors.OpPrereqError("Can't compute node evacuation using"
11419 " iallocator '%s': %s" %
11420 (self.op.iallocator, ial.info),
11421 errors.ECODE_NORES)
11423 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11425 elif self.op.remote_node is not None:
11426 assert self.op.mode == constants.NODE_EVAC_SEC
11428 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11429 remote_node=self.op.remote_node,
11431 mode=constants.REPLACE_DISK_CHG,
11432 early_release=self.op.early_release)]
11433 for instance_name in self.instance_names
11437 raise errors.ProgrammerError("No iallocator or remote node")
11439 return ResultWithJobs(jobs)
11442 def _SetOpEarlyRelease(early_release, op):
11443 """Sets C{early_release} flag on opcodes if available.
11447 op.early_release = early_release
11448 except AttributeError:
11449 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11454 def _NodeEvacDest(use_nodes, group, nodes):
11455 """Returns group or nodes depending on caller's choice.
11459 return utils.CommaJoin(nodes)
11464 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11465 """Unpacks the result of change-group and node-evacuate iallocator requests.
11467 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11468 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11470 @type lu: L{LogicalUnit}
11471 @param lu: Logical unit instance
11472 @type alloc_result: tuple/list
11473 @param alloc_result: Result from iallocator
11474 @type early_release: bool
11475 @param early_release: Whether to release locks early if possible
11476 @type use_nodes: bool
11477 @param use_nodes: Whether to display node names instead of groups
11480 (moved, failed, jobs) = alloc_result
11483 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11484 for (name, reason) in failed)
11485 lu.LogWarning("Unable to evacuate instances %s", failreason)
11486 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11489 lu.LogInfo("Instances to be moved: %s",
11490 utils.CommaJoin("%s (to %s)" %
11491 (name, _NodeEvacDest(use_nodes, group, nodes))
11492 for (name, group, nodes) in moved))
11494 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11495 map(opcodes.OpCode.LoadOpCode, ops))
11499 class LUInstanceGrowDisk(LogicalUnit):
11500 """Grow a disk of an instance.
11503 HPATH = "disk-grow"
11504 HTYPE = constants.HTYPE_INSTANCE
11507 def ExpandNames(self):
11508 self._ExpandAndLockInstance()
11509 self.needed_locks[locking.LEVEL_NODE] = []
11510 self.needed_locks[locking.LEVEL_NODE_RES] = []
11511 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11512 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11514 def DeclareLocks(self, level):
11515 if level == locking.LEVEL_NODE:
11516 self._LockInstancesNodes()
11517 elif level == locking.LEVEL_NODE_RES:
11519 self.needed_locks[locking.LEVEL_NODE_RES] = \
11520 self.needed_locks[locking.LEVEL_NODE][:]
11522 def BuildHooksEnv(self):
11523 """Build hooks env.
11525 This runs on the master, the primary and all the secondaries.
11529 "DISK": self.op.disk,
11530 "AMOUNT": self.op.amount,
11532 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11535 def BuildHooksNodes(self):
11536 """Build hooks nodes.
11539 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11542 def CheckPrereq(self):
11543 """Check prerequisites.
11545 This checks that the instance is in the cluster.
11548 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11549 assert instance is not None, \
11550 "Cannot retrieve locked instance %s" % self.op.instance_name
11551 nodenames = list(instance.all_nodes)
11552 for node in nodenames:
11553 _CheckNodeOnline(self, node)
11555 self.instance = instance
11557 if instance.disk_template not in constants.DTS_GROWABLE:
11558 raise errors.OpPrereqError("Instance's disk layout does not support"
11559 " growing", errors.ECODE_INVAL)
11561 self.disk = instance.FindDisk(self.op.disk)
11563 if instance.disk_template not in (constants.DT_FILE,
11564 constants.DT_SHARED_FILE,
11566 # TODO: check the free disk space for file, when that feature will be
11568 _CheckNodesFreeDiskPerVG(self, nodenames,
11569 self.disk.ComputeGrowth(self.op.amount))
11571 def Exec(self, feedback_fn):
11572 """Execute disk grow.
11575 instance = self.instance
11578 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11579 assert (self.owned_locks(locking.LEVEL_NODE) ==
11580 self.owned_locks(locking.LEVEL_NODE_RES))
11582 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11584 raise errors.OpExecError("Cannot activate block device to grow")
11586 feedback_fn("Growing disk %s of instance '%s' by %s" %
11587 (self.op.disk, instance.name,
11588 utils.FormatUnit(self.op.amount, "h")))
11590 # First run all grow ops in dry-run mode
11591 for node in instance.all_nodes:
11592 self.cfg.SetDiskID(disk, node)
11593 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11594 result.Raise("Grow request failed to node %s" % node)
11596 # We know that (as far as we can test) operations across different
11597 # nodes will succeed, time to run it for real
11598 for node in instance.all_nodes:
11599 self.cfg.SetDiskID(disk, node)
11600 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11601 result.Raise("Grow request failed to node %s" % node)
11603 # TODO: Rewrite code to work properly
11604 # DRBD goes into sync mode for a short amount of time after executing the
11605 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11606 # calling "resize" in sync mode fails. Sleeping for a short amount of
11607 # time is a work-around.
11610 disk.RecordGrow(self.op.amount)
11611 self.cfg.Update(instance, feedback_fn)
11613 # Changes have been recorded, release node lock
11614 _ReleaseLocks(self, locking.LEVEL_NODE)
11616 # Downgrade lock while waiting for sync
11617 self.glm.downgrade(locking.LEVEL_INSTANCE)
11619 if self.op.wait_for_sync:
11620 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11622 self.proc.LogWarning("Disk sync-ing has not returned a good"
11623 " status; please check the instance")
11624 if instance.admin_state != constants.ADMINST_UP:
11625 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11626 elif instance.admin_state != constants.ADMINST_UP:
11627 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11628 " not supposed to be running because no wait for"
11629 " sync mode was requested")
11631 assert self.owned_locks(locking.LEVEL_NODE_RES)
11632 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11635 class LUInstanceQueryData(NoHooksLU):
11636 """Query runtime instance data.
11641 def ExpandNames(self):
11642 self.needed_locks = {}
11644 # Use locking if requested or when non-static information is wanted
11645 if not (self.op.static or self.op.use_locking):
11646 self.LogWarning("Non-static data requested, locks need to be acquired")
11647 self.op.use_locking = True
11649 if self.op.instances or not self.op.use_locking:
11650 # Expand instance names right here
11651 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11653 # Will use acquired locks
11654 self.wanted_names = None
11656 if self.op.use_locking:
11657 self.share_locks = _ShareAll()
11659 if self.wanted_names is None:
11660 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11662 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11664 self.needed_locks[locking.LEVEL_NODE] = []
11665 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11667 def DeclareLocks(self, level):
11668 if self.op.use_locking and level == locking.LEVEL_NODE:
11669 self._LockInstancesNodes()
11671 def CheckPrereq(self):
11672 """Check prerequisites.
11674 This only checks the optional instance list against the existing names.
11677 if self.wanted_names is None:
11678 assert self.op.use_locking, "Locking was not used"
11679 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11681 self.wanted_instances = \
11682 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11684 def _ComputeBlockdevStatus(self, node, instance_name, dev):
11685 """Returns the status of a block device
11688 if self.op.static or not node:
11691 self.cfg.SetDiskID(dev, node)
11693 result = self.rpc.call_blockdev_find(node, dev)
11697 result.Raise("Can't compute disk status for %s" % instance_name)
11699 status = result.payload
11703 return (status.dev_path, status.major, status.minor,
11704 status.sync_percent, status.estimated_time,
11705 status.is_degraded, status.ldisk_status)
11707 def _ComputeDiskStatus(self, instance, snode, dev):
11708 """Compute block device status.
11711 if dev.dev_type in constants.LDS_DRBD:
11712 # we change the snode then (otherwise we use the one passed in)
11713 if dev.logical_id[0] == instance.primary_node:
11714 snode = dev.logical_id[1]
11716 snode = dev.logical_id[0]
11718 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11719 instance.name, dev)
11720 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11723 dev_children = map(compat.partial(self._ComputeDiskStatus,
11730 "iv_name": dev.iv_name,
11731 "dev_type": dev.dev_type,
11732 "logical_id": dev.logical_id,
11733 "physical_id": dev.physical_id,
11734 "pstatus": dev_pstatus,
11735 "sstatus": dev_sstatus,
11736 "children": dev_children,
11741 def Exec(self, feedback_fn):
11742 """Gather and return data"""
11745 cluster = self.cfg.GetClusterInfo()
11747 pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11748 for i in self.wanted_instances)
11749 for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11750 if self.op.static or pnode.offline:
11751 remote_state = None
11753 self.LogWarning("Primary node %s is marked offline, returning static"
11754 " information only for instance %s" %
11755 (pnode.name, instance.name))
11757 remote_info = self.rpc.call_instance_info(instance.primary_node,
11759 instance.hypervisor)
11760 remote_info.Raise("Error checking node %s" % instance.primary_node)
11761 remote_info = remote_info.payload
11762 if remote_info and "state" in remote_info:
11763 remote_state = "up"
11765 if instance.admin_state == constants.ADMINST_UP:
11766 remote_state = "down"
11768 remote_state = instance.admin_state
11770 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11773 result[instance.name] = {
11774 "name": instance.name,
11775 "config_state": instance.admin_state,
11776 "run_state": remote_state,
11777 "pnode": instance.primary_node,
11778 "snodes": instance.secondary_nodes,
11780 # this happens to be the same format used for hooks
11781 "nics": _NICListToTuple(self, instance.nics),
11782 "disk_template": instance.disk_template,
11784 "hypervisor": instance.hypervisor,
11785 "network_port": instance.network_port,
11786 "hv_instance": instance.hvparams,
11787 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11788 "be_instance": instance.beparams,
11789 "be_actual": cluster.FillBE(instance),
11790 "os_instance": instance.osparams,
11791 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11792 "serial_no": instance.serial_no,
11793 "mtime": instance.mtime,
11794 "ctime": instance.ctime,
11795 "uuid": instance.uuid,
11801 class LUInstanceSetParams(LogicalUnit):
11802 """Modifies an instances's parameters.
11805 HPATH = "instance-modify"
11806 HTYPE = constants.HTYPE_INSTANCE
11809 def CheckArguments(self):
11810 if not (self.op.nics or self.op.disks or self.op.disk_template or
11811 self.op.hvparams or self.op.beparams or self.op.os_name or
11812 self.op.offline is not None or self.op.runtime_mem):
11813 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
11815 if self.op.hvparams:
11816 _CheckGlobalHvParams(self.op.hvparams)
11820 for disk_op, disk_dict in self.op.disks:
11821 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
11822 if disk_op == constants.DDM_REMOVE:
11823 disk_addremove += 1
11825 elif disk_op == constants.DDM_ADD:
11826 disk_addremove += 1
11828 if not isinstance(disk_op, int):
11829 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11830 if not isinstance(disk_dict, dict):
11831 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11832 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11834 if disk_op == constants.DDM_ADD:
11835 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11836 if mode not in constants.DISK_ACCESS_SET:
11837 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11838 errors.ECODE_INVAL)
11839 size = disk_dict.get(constants.IDISK_SIZE, None)
11841 raise errors.OpPrereqError("Required disk parameter size missing",
11842 errors.ECODE_INVAL)
11845 except (TypeError, ValueError), err:
11846 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11847 str(err), errors.ECODE_INVAL)
11848 disk_dict[constants.IDISK_SIZE] = size
11850 # modification of disk
11851 if constants.IDISK_SIZE in disk_dict:
11852 raise errors.OpPrereqError("Disk size change not possible, use"
11853 " grow-disk", errors.ECODE_INVAL)
11855 if disk_addremove > 1:
11856 raise errors.OpPrereqError("Only one disk add or remove operation"
11857 " supported at a time", errors.ECODE_INVAL)
11859 if self.op.disks and self.op.disk_template is not None:
11860 raise errors.OpPrereqError("Disk template conversion and other disk"
11861 " changes not supported at the same time",
11862 errors.ECODE_INVAL)
11864 if (self.op.disk_template and
11865 self.op.disk_template in constants.DTS_INT_MIRROR and
11866 self.op.remote_node is None):
11867 raise errors.OpPrereqError("Changing the disk template to a mirrored"
11868 " one requires specifying a secondary node",
11869 errors.ECODE_INVAL)
11873 for nic_op, nic_dict in self.op.nics:
11874 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11875 if nic_op == constants.DDM_REMOVE:
11878 elif nic_op == constants.DDM_ADD:
11881 if not isinstance(nic_op, int):
11882 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11883 if not isinstance(nic_dict, dict):
11884 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11885 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11887 # nic_dict should be a dict
11888 nic_ip = nic_dict.get(constants.INIC_IP, None)
11889 if nic_ip is not None:
11890 if nic_ip.lower() == constants.VALUE_NONE:
11891 nic_dict[constants.INIC_IP] = None
11893 if not netutils.IPAddress.IsValid(nic_ip):
11894 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11895 errors.ECODE_INVAL)
11897 nic_bridge = nic_dict.get("bridge", None)
11898 nic_link = nic_dict.get(constants.INIC_LINK, None)
11899 if nic_bridge and nic_link:
11900 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11901 " at the same time", errors.ECODE_INVAL)
11902 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11903 nic_dict["bridge"] = None
11904 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11905 nic_dict[constants.INIC_LINK] = None
11907 if nic_op == constants.DDM_ADD:
11908 nic_mac = nic_dict.get(constants.INIC_MAC, None)
11909 if nic_mac is None:
11910 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11912 if constants.INIC_MAC in nic_dict:
11913 nic_mac = nic_dict[constants.INIC_MAC]
11914 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11915 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11917 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11918 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11919 " modifying an existing nic",
11920 errors.ECODE_INVAL)
11922 if nic_addremove > 1:
11923 raise errors.OpPrereqError("Only one NIC add or remove operation"
11924 " supported at a time", errors.ECODE_INVAL)
11926 def ExpandNames(self):
11927 self._ExpandAndLockInstance()
11928 # Can't even acquire node locks in shared mode as upcoming changes in
11929 # Ganeti 2.6 will start to modify the node object on disk conversion
11930 self.needed_locks[locking.LEVEL_NODE] = []
11931 self.needed_locks[locking.LEVEL_NODE_RES] = []
11932 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11934 def DeclareLocks(self, level):
11935 if level == locking.LEVEL_NODE:
11936 self._LockInstancesNodes()
11937 if self.op.disk_template and self.op.remote_node:
11938 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11939 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11940 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11942 self.needed_locks[locking.LEVEL_NODE_RES] = \
11943 self.needed_locks[locking.LEVEL_NODE][:]
11945 def BuildHooksEnv(self):
11946 """Build hooks env.
11948 This runs on the master, primary and secondaries.
11952 if constants.BE_MINMEM in self.be_new:
11953 args["minmem"] = self.be_new[constants.BE_MINMEM]
11954 if constants.BE_MAXMEM in self.be_new:
11955 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
11956 if constants.BE_VCPUS in self.be_new:
11957 args["vcpus"] = self.be_new[constants.BE_VCPUS]
11958 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11959 # information at all.
11962 nic_override = dict(self.op.nics)
11963 for idx, nic in enumerate(self.instance.nics):
11964 if idx in nic_override:
11965 this_nic_override = nic_override[idx]
11967 this_nic_override = {}
11968 if constants.INIC_IP in this_nic_override:
11969 ip = this_nic_override[constants.INIC_IP]
11972 if constants.INIC_MAC in this_nic_override:
11973 mac = this_nic_override[constants.INIC_MAC]
11976 if idx in self.nic_pnew:
11977 nicparams = self.nic_pnew[idx]
11979 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11980 mode = nicparams[constants.NIC_MODE]
11981 link = nicparams[constants.NIC_LINK]
11982 args["nics"].append((ip, mac, mode, link))
11983 if constants.DDM_ADD in nic_override:
11984 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11985 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11986 nicparams = self.nic_pnew[constants.DDM_ADD]
11987 mode = nicparams[constants.NIC_MODE]
11988 link = nicparams[constants.NIC_LINK]
11989 args["nics"].append((ip, mac, mode, link))
11990 elif constants.DDM_REMOVE in nic_override:
11991 del args["nics"][-1]
11993 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11994 if self.op.disk_template:
11995 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11996 if self.op.runtime_mem:
11997 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12001 def BuildHooksNodes(self):
12002 """Build hooks nodes.
12005 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12008 def CheckPrereq(self):
12009 """Check prerequisites.
12011 This only checks the instance list against the existing names.
12014 # checking the new params on the primary/secondary nodes
12016 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12017 cluster = self.cluster = self.cfg.GetClusterInfo()
12018 assert self.instance is not None, \
12019 "Cannot retrieve locked instance %s" % self.op.instance_name
12020 pnode = instance.primary_node
12021 nodelist = list(instance.all_nodes)
12022 pnode_info = self.cfg.GetNodeInfo(pnode)
12023 self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
12026 if self.op.os_name and not self.op.force:
12027 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12028 self.op.force_variant)
12029 instance_os = self.op.os_name
12031 instance_os = instance.os
12033 if self.op.disk_template:
12034 if instance.disk_template == self.op.disk_template:
12035 raise errors.OpPrereqError("Instance already has disk template %s" %
12036 instance.disk_template, errors.ECODE_INVAL)
12038 if (instance.disk_template,
12039 self.op.disk_template) not in self._DISK_CONVERSIONS:
12040 raise errors.OpPrereqError("Unsupported disk template conversion from"
12041 " %s to %s" % (instance.disk_template,
12042 self.op.disk_template),
12043 errors.ECODE_INVAL)
12044 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12045 msg="cannot change disk template")
12046 if self.op.disk_template in constants.DTS_INT_MIRROR:
12047 if self.op.remote_node == pnode:
12048 raise errors.OpPrereqError("Given new secondary node %s is the same"
12049 " as the primary node of the instance" %
12050 self.op.remote_node, errors.ECODE_STATE)
12051 _CheckNodeOnline(self, self.op.remote_node)
12052 _CheckNodeNotDrained(self, self.op.remote_node)
12053 # FIXME: here we assume that the old instance type is DT_PLAIN
12054 assert instance.disk_template == constants.DT_PLAIN
12055 disks = [{constants.IDISK_SIZE: d.size,
12056 constants.IDISK_VG: d.logical_id[0]}
12057 for d in instance.disks]
12058 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12059 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12061 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12062 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12063 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12064 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12065 ignore=self.op.ignore_ipolicy)
12066 if pnode_info.group != snode_info.group:
12067 self.LogWarning("The primary and secondary nodes are in two"
12068 " different node groups; the disk parameters"
12069 " from the first disk's node group will be"
12072 # hvparams processing
12073 if self.op.hvparams:
12074 hv_type = instance.hypervisor
12075 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12076 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12077 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12080 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12081 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12082 self.hv_proposed = self.hv_new = hv_new # the new actual values
12083 self.hv_inst = i_hvdict # the new dict (without defaults)
12085 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12087 self.hv_new = self.hv_inst = {}
12089 # beparams processing
12090 if self.op.beparams:
12091 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12093 objects.UpgradeBeParams(i_bedict)
12094 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12095 be_new = cluster.SimpleFillBE(i_bedict)
12096 self.be_proposed = self.be_new = be_new # the new actual values
12097 self.be_inst = i_bedict # the new dict (without defaults)
12099 self.be_new = self.be_inst = {}
12100 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12101 be_old = cluster.FillBE(instance)
12103 # CPU param validation -- checking every time a paramtere is
12104 # changed to cover all cases where either CPU mask or vcpus have
12106 if (constants.BE_VCPUS in self.be_proposed and
12107 constants.HV_CPU_MASK in self.hv_proposed):
12109 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12110 # Verify mask is consistent with number of vCPUs. Can skip this
12111 # test if only 1 entry in the CPU mask, which means same mask
12112 # is applied to all vCPUs.
12113 if (len(cpu_list) > 1 and
12114 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12115 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12117 (self.be_proposed[constants.BE_VCPUS],
12118 self.hv_proposed[constants.HV_CPU_MASK]),
12119 errors.ECODE_INVAL)
12121 # Only perform this test if a new CPU mask is given
12122 if constants.HV_CPU_MASK in self.hv_new:
12123 # Calculate the largest CPU number requested
12124 max_requested_cpu = max(map(max, cpu_list))
12125 # Check that all of the instance's nodes have enough physical CPUs to
12126 # satisfy the requested CPU mask
12127 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12128 max_requested_cpu + 1, instance.hypervisor)
12130 # osparams processing
12131 if self.op.osparams:
12132 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12133 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12134 self.os_inst = i_osdict # the new dict (without defaults)
12140 #TODO(dynmem): do the appropriate check involving MINMEM
12141 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12142 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12143 mem_check_list = [pnode]
12144 if be_new[constants.BE_AUTO_BALANCE]:
12145 # either we changed auto_balance to yes or it was from before
12146 mem_check_list.extend(instance.secondary_nodes)
12147 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12148 instance.hypervisor)
12149 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12150 [instance.hypervisor])
12151 pninfo = nodeinfo[pnode]
12152 msg = pninfo.fail_msg
12154 # Assume the primary node is unreachable and go ahead
12155 self.warn.append("Can't get info from primary node %s: %s" %
12158 (_, _, (pnhvinfo, )) = pninfo.payload
12159 if not isinstance(pnhvinfo.get("memory_free", None), int):
12160 self.warn.append("Node data from primary node %s doesn't contain"
12161 " free memory information" % pnode)
12162 elif instance_info.fail_msg:
12163 self.warn.append("Can't get instance runtime information: %s" %
12164 instance_info.fail_msg)
12166 if instance_info.payload:
12167 current_mem = int(instance_info.payload["memory"])
12169 # Assume instance not running
12170 # (there is a slight race condition here, but it's not very
12171 # probable, and we have no other way to check)
12172 # TODO: Describe race condition
12174 #TODO(dynmem): do the appropriate check involving MINMEM
12175 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12176 pnhvinfo["memory_free"])
12178 raise errors.OpPrereqError("This change will prevent the instance"
12179 " from starting, due to %d MB of memory"
12180 " missing on its primary node" %
12182 errors.ECODE_NORES)
12184 if be_new[constants.BE_AUTO_BALANCE]:
12185 for node, nres in nodeinfo.items():
12186 if node not in instance.secondary_nodes:
12188 nres.Raise("Can't get info from secondary node %s" % node,
12189 prereq=True, ecode=errors.ECODE_STATE)
12190 (_, _, (nhvinfo, )) = nres.payload
12191 if not isinstance(nhvinfo.get("memory_free", None), int):
12192 raise errors.OpPrereqError("Secondary node %s didn't return free"
12193 " memory information" % node,
12194 errors.ECODE_STATE)
12195 #TODO(dynmem): do the appropriate check involving MINMEM
12196 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12197 raise errors.OpPrereqError("This change will prevent the instance"
12198 " from failover to its secondary node"
12199 " %s, due to not enough memory" % node,
12200 errors.ECODE_STATE)
12202 if self.op.runtime_mem:
12203 remote_info = self.rpc.call_instance_info(instance.primary_node,
12205 instance.hypervisor)
12206 remote_info.Raise("Error checking node %s" % instance.primary_node)
12207 if not remote_info.payload: # not running already
12208 raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12209 errors.ECODE_STATE)
12211 current_memory = remote_info.payload["memory"]
12212 if (not self.op.force and
12213 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12214 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12215 raise errors.OpPrereqError("Instance %s must have memory between %d"
12216 " and %d MB of memory unless --force is"
12217 " given" % (instance.name,
12218 self.be_proposed[constants.BE_MINMEM],
12219 self.be_proposed[constants.BE_MAXMEM]),
12220 errors.ECODE_INVAL)
12222 if self.op.runtime_mem > current_memory:
12223 _CheckNodeFreeMemory(self, instance.primary_node,
12224 "ballooning memory for instance %s" %
12226 self.op.memory - current_memory,
12227 instance.hypervisor)
12231 self.nic_pinst = {}
12232 for nic_op, nic_dict in self.op.nics:
12233 if nic_op == constants.DDM_REMOVE:
12234 if not instance.nics:
12235 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
12236 errors.ECODE_INVAL)
12238 if nic_op != constants.DDM_ADD:
12240 if not instance.nics:
12241 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
12242 " no NICs" % nic_op,
12243 errors.ECODE_INVAL)
12244 if nic_op < 0 or nic_op >= len(instance.nics):
12245 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
12247 (nic_op, len(instance.nics) - 1),
12248 errors.ECODE_INVAL)
12249 old_nic_params = instance.nics[nic_op].nicparams
12250 old_nic_ip = instance.nics[nic_op].ip
12252 old_nic_params = {}
12255 update_params_dict = dict([(key, nic_dict[key])
12256 for key in constants.NICS_PARAMETERS
12257 if key in nic_dict])
12259 if "bridge" in nic_dict:
12260 update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
12262 new_nic_params = _GetUpdatedParams(old_nic_params,
12263 update_params_dict)
12264 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
12265 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
12266 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
12267 self.nic_pinst[nic_op] = new_nic_params
12268 self.nic_pnew[nic_op] = new_filled_nic_params
12269 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
12271 if new_nic_mode == constants.NIC_MODE_BRIDGED:
12272 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
12273 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
12275 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
12277 self.warn.append(msg)
12279 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12280 if new_nic_mode == constants.NIC_MODE_ROUTED:
12281 if constants.INIC_IP in nic_dict:
12282 nic_ip = nic_dict[constants.INIC_IP]
12284 nic_ip = old_nic_ip
12286 raise errors.OpPrereqError("Cannot set the nic ip to None"
12287 " on a routed nic", errors.ECODE_INVAL)
12288 if constants.INIC_MAC in nic_dict:
12289 nic_mac = nic_dict[constants.INIC_MAC]
12290 if nic_mac is None:
12291 raise errors.OpPrereqError("Cannot set the nic mac to None",
12292 errors.ECODE_INVAL)
12293 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12294 # otherwise generate the mac
12295 nic_dict[constants.INIC_MAC] = \
12296 self.cfg.GenerateMAC(self.proc.GetECId())
12298 # or validate/reserve the current one
12300 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
12301 except errors.ReservationError:
12302 raise errors.OpPrereqError("MAC address %s already in use"
12303 " in cluster" % nic_mac,
12304 errors.ECODE_NOTUNIQUE)
12307 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12308 raise errors.OpPrereqError("Disk operations not supported for"
12309 " diskless instances",
12310 errors.ECODE_INVAL)
12311 for disk_op, _ in self.op.disks:
12312 if disk_op == constants.DDM_REMOVE:
12313 if len(instance.disks) == 1:
12314 raise errors.OpPrereqError("Cannot remove the last disk of"
12315 " an instance", errors.ECODE_INVAL)
12316 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12317 msg="cannot remove disks")
12319 if (disk_op == constants.DDM_ADD and
12320 len(instance.disks) >= constants.MAX_DISKS):
12321 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
12322 " add more" % constants.MAX_DISKS,
12323 errors.ECODE_STATE)
12324 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
12326 if disk_op < 0 or disk_op >= len(instance.disks):
12327 raise errors.OpPrereqError("Invalid disk index %s, valid values"
12329 (disk_op, len(instance.disks)),
12330 errors.ECODE_INVAL)
12332 if self.op.offline is not None:
12333 if self.op.offline:
12334 msg = "can't change to offline"
12336 msg = "can't change to online"
12337 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12339 def _ConvertPlainToDrbd(self, feedback_fn):
12340 """Converts an instance from plain to drbd.
12343 feedback_fn("Converting template to drbd")
12344 instance = self.instance
12345 pnode = instance.primary_node
12346 snode = self.op.remote_node
12348 assert instance.disk_template == constants.DT_PLAIN
12350 # create a fake disk info for _GenerateDiskTemplate
12351 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12352 constants.IDISK_VG: d.logical_id[0]}
12353 for d in instance.disks]
12354 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12355 instance.name, pnode, [snode],
12356 disk_info, None, None, 0, feedback_fn,
12358 info = _GetInstanceInfoText(instance)
12359 feedback_fn("Creating aditional volumes...")
12360 # first, create the missing data and meta devices
12361 for disk in new_disks:
12362 # unfortunately this is... not too nice
12363 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12365 for child in disk.children:
12366 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12367 # at this stage, all new LVs have been created, we can rename the
12369 feedback_fn("Renaming original volumes...")
12370 rename_list = [(o, n.children[0].logical_id)
12371 for (o, n) in zip(instance.disks, new_disks)]
12372 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12373 result.Raise("Failed to rename original LVs")
12375 feedback_fn("Initializing DRBD devices...")
12376 # all child devices are in place, we can now create the DRBD devices
12377 for disk in new_disks:
12378 for node in [pnode, snode]:
12379 f_create = node == pnode
12380 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12382 # at this point, the instance has been modified
12383 instance.disk_template = constants.DT_DRBD8
12384 instance.disks = new_disks
12385 self.cfg.Update(instance, feedback_fn)
12387 # Release node locks while waiting for sync
12388 _ReleaseLocks(self, locking.LEVEL_NODE)
12390 # disks are created, waiting for sync
12391 disk_abort = not _WaitForSync(self, instance,
12392 oneshot=not self.op.wait_for_sync)
12394 raise errors.OpExecError("There are some degraded disks for"
12395 " this instance, please cleanup manually")
12397 # Node resource locks will be released by caller
12399 def _ConvertDrbdToPlain(self, feedback_fn):
12400 """Converts an instance from drbd to plain.
12403 instance = self.instance
12405 assert len(instance.secondary_nodes) == 1
12406 assert instance.disk_template == constants.DT_DRBD8
12408 pnode = instance.primary_node
12409 snode = instance.secondary_nodes[0]
12410 feedback_fn("Converting template to plain")
12412 old_disks = instance.disks
12413 new_disks = [d.children[0] for d in old_disks]
12415 # copy over size and mode
12416 for parent, child in zip(old_disks, new_disks):
12417 child.size = parent.size
12418 child.mode = parent.mode
12420 # update instance structure
12421 instance.disks = new_disks
12422 instance.disk_template = constants.DT_PLAIN
12423 self.cfg.Update(instance, feedback_fn)
12425 # Release locks in case removing disks takes a while
12426 _ReleaseLocks(self, locking.LEVEL_NODE)
12428 feedback_fn("Removing volumes on the secondary node...")
12429 for disk in old_disks:
12430 self.cfg.SetDiskID(disk, snode)
12431 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12433 self.LogWarning("Could not remove block device %s on node %s,"
12434 " continuing anyway: %s", disk.iv_name, snode, msg)
12436 feedback_fn("Removing unneeded volumes on the primary node...")
12437 for idx, disk in enumerate(old_disks):
12438 meta = disk.children[1]
12439 self.cfg.SetDiskID(meta, pnode)
12440 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12442 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12443 " continuing anyway: %s", idx, pnode, msg)
12445 # this is a DRBD disk, return its port to the pool
12446 for disk in old_disks:
12447 tcp_port = disk.logical_id[2]
12448 self.cfg.AddTcpUdpPort(tcp_port)
12450 # Node resource locks will be released by caller
12452 def Exec(self, feedback_fn):
12453 """Modifies an instance.
12455 All parameters take effect only at the next restart of the instance.
12458 # Process here the warnings from CheckPrereq, as we don't have a
12459 # feedback_fn there.
12460 for warn in self.warn:
12461 feedback_fn("WARNING: %s" % warn)
12463 assert ((self.op.disk_template is None) ^
12464 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12465 "Not owning any node resource locks"
12468 instance = self.instance
12471 if self.op.runtime_mem:
12472 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12474 self.op.runtime_mem)
12475 rpcres.Raise("Cannot modify instance runtime memory")
12476 result.append(("runtime_memory", self.op.runtime_mem))
12479 for disk_op, disk_dict in self.op.disks:
12480 if disk_op == constants.DDM_REMOVE:
12481 # remove the last disk
12482 device = instance.disks.pop()
12483 device_idx = len(instance.disks)
12484 for node, disk in device.ComputeNodeTree(instance.primary_node):
12485 self.cfg.SetDiskID(disk, node)
12486 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12488 self.LogWarning("Could not remove disk/%d on node %s: %s,"
12489 " continuing anyway", device_idx, node, msg)
12490 result.append(("disk/%d" % device_idx, "remove"))
12492 # if this is a DRBD disk, return its port to the pool
12493 if device.dev_type in constants.LDS_DRBD:
12494 tcp_port = device.logical_id[2]
12495 self.cfg.AddTcpUdpPort(tcp_port)
12496 elif disk_op == constants.DDM_ADD:
12498 if instance.disk_template in (constants.DT_FILE,
12499 constants.DT_SHARED_FILE):
12500 file_driver, file_path = instance.disks[0].logical_id
12501 file_path = os.path.dirname(file_path)
12503 file_driver = file_path = None
12504 disk_idx_base = len(instance.disks)
12505 new_disk = _GenerateDiskTemplate(self,
12506 instance.disk_template,
12507 instance.name, instance.primary_node,
12508 instance.secondary_nodes,
12514 self.diskparams)[0]
12515 instance.disks.append(new_disk)
12516 info = _GetInstanceInfoText(instance)
12518 logging.info("Creating volume %s for instance %s",
12519 new_disk.iv_name, instance.name)
12520 # Note: this needs to be kept in sync with _CreateDisks
12522 for node in instance.all_nodes:
12523 f_create = node == instance.primary_node
12525 _CreateBlockDev(self, node, instance, new_disk,
12526 f_create, info, f_create)
12527 except errors.OpExecError, err:
12528 self.LogWarning("Failed to create volume %s (%s) on"
12530 new_disk.iv_name, new_disk, node, err)
12531 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
12532 (new_disk.size, new_disk.mode)))
12534 # change a given disk
12535 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
12536 result.append(("disk.mode/%d" % disk_op,
12537 disk_dict[constants.IDISK_MODE]))
12539 if self.op.disk_template:
12541 check_nodes = set(instance.all_nodes)
12542 if self.op.remote_node:
12543 check_nodes.add(self.op.remote_node)
12544 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12545 owned = self.owned_locks(level)
12546 assert not (check_nodes - owned), \
12547 ("Not owning the correct locks, owning %r, expected at least %r" %
12548 (owned, check_nodes))
12550 r_shut = _ShutdownInstanceDisks(self, instance)
12552 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12553 " proceed with disk template conversion")
12554 mode = (instance.disk_template, self.op.disk_template)
12556 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12558 self.cfg.ReleaseDRBDMinors(instance.name)
12560 result.append(("disk_template", self.op.disk_template))
12562 assert instance.disk_template == self.op.disk_template, \
12563 ("Expected disk template '%s', found '%s'" %
12564 (self.op.disk_template, instance.disk_template))
12566 # Release node and resource locks if there are any (they might already have
12567 # been released during disk conversion)
12568 _ReleaseLocks(self, locking.LEVEL_NODE)
12569 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12572 for nic_op, nic_dict in self.op.nics:
12573 if nic_op == constants.DDM_REMOVE:
12574 # remove the last nic
12575 del instance.nics[-1]
12576 result.append(("nic.%d" % len(instance.nics), "remove"))
12577 elif nic_op == constants.DDM_ADD:
12578 # mac and bridge should be set, by now
12579 mac = nic_dict[constants.INIC_MAC]
12580 ip = nic_dict.get(constants.INIC_IP, None)
12581 nicparams = self.nic_pinst[constants.DDM_ADD]
12582 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
12583 instance.nics.append(new_nic)
12584 result.append(("nic.%d" % (len(instance.nics) - 1),
12585 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12586 (new_nic.mac, new_nic.ip,
12587 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
12588 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
12591 for key in (constants.INIC_MAC, constants.INIC_IP):
12592 if key in nic_dict:
12593 setattr(instance.nics[nic_op], key, nic_dict[key])
12594 if nic_op in self.nic_pinst:
12595 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
12596 for key, val in nic_dict.iteritems():
12597 result.append(("nic.%s/%d" % (key, nic_op), val))
12600 if self.op.hvparams:
12601 instance.hvparams = self.hv_inst
12602 for key, val in self.op.hvparams.iteritems():
12603 result.append(("hv/%s" % key, val))
12606 if self.op.beparams:
12607 instance.beparams = self.be_inst
12608 for key, val in self.op.beparams.iteritems():
12609 result.append(("be/%s" % key, val))
12612 if self.op.os_name:
12613 instance.os = self.op.os_name
12616 if self.op.osparams:
12617 instance.osparams = self.os_inst
12618 for key, val in self.op.osparams.iteritems():
12619 result.append(("os/%s" % key, val))
12621 if self.op.offline is None:
12624 elif self.op.offline:
12625 # Mark instance as offline
12626 self.cfg.MarkInstanceOffline(instance.name)
12627 result.append(("admin_state", constants.ADMINST_OFFLINE))
12629 # Mark instance as online, but stopped
12630 self.cfg.MarkInstanceDown(instance.name)
12631 result.append(("admin_state", constants.ADMINST_DOWN))
12633 self.cfg.Update(instance, feedback_fn)
12635 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12636 self.owned_locks(locking.LEVEL_NODE)), \
12637 "All node locks should have been released by now"
12641 _DISK_CONVERSIONS = {
12642 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12643 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12647 class LUInstanceChangeGroup(LogicalUnit):
12648 HPATH = "instance-change-group"
12649 HTYPE = constants.HTYPE_INSTANCE
12652 def ExpandNames(self):
12653 self.share_locks = _ShareAll()
12654 self.needed_locks = {
12655 locking.LEVEL_NODEGROUP: [],
12656 locking.LEVEL_NODE: [],
12659 self._ExpandAndLockInstance()
12661 if self.op.target_groups:
12662 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12663 self.op.target_groups)
12665 self.req_target_uuids = None
12667 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12669 def DeclareLocks(self, level):
12670 if level == locking.LEVEL_NODEGROUP:
12671 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12673 if self.req_target_uuids:
12674 lock_groups = set(self.req_target_uuids)
12676 # Lock all groups used by instance optimistically; this requires going
12677 # via the node before it's locked, requiring verification later on
12678 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12679 lock_groups.update(instance_groups)
12681 # No target groups, need to lock all of them
12682 lock_groups = locking.ALL_SET
12684 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12686 elif level == locking.LEVEL_NODE:
12687 if self.req_target_uuids:
12688 # Lock all nodes used by instances
12689 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12690 self._LockInstancesNodes()
12692 # Lock all nodes in all potential target groups
12693 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12694 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12695 member_nodes = [node_name
12696 for group in lock_groups
12697 for node_name in self.cfg.GetNodeGroup(group).members]
12698 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12700 # Lock all nodes as all groups are potential targets
12701 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12703 def CheckPrereq(self):
12704 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12705 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12706 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12708 assert (self.req_target_uuids is None or
12709 owned_groups.issuperset(self.req_target_uuids))
12710 assert owned_instances == set([self.op.instance_name])
12712 # Get instance information
12713 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12715 # Check if node groups for locked instance are still correct
12716 assert owned_nodes.issuperset(self.instance.all_nodes), \
12717 ("Instance %s's nodes changed while we kept the lock" %
12718 self.op.instance_name)
12720 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12723 if self.req_target_uuids:
12724 # User requested specific target groups
12725 self.target_uuids = self.req_target_uuids
12727 # All groups except those used by the instance are potential targets
12728 self.target_uuids = owned_groups - inst_groups
12730 conflicting_groups = self.target_uuids & inst_groups
12731 if conflicting_groups:
12732 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12733 " used by the instance '%s'" %
12734 (utils.CommaJoin(conflicting_groups),
12735 self.op.instance_name),
12736 errors.ECODE_INVAL)
12738 if not self.target_uuids:
12739 raise errors.OpPrereqError("There are no possible target groups",
12740 errors.ECODE_INVAL)
12742 def BuildHooksEnv(self):
12743 """Build hooks env.
12746 assert self.target_uuids
12749 "TARGET_GROUPS": " ".join(self.target_uuids),
12752 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12756 def BuildHooksNodes(self):
12757 """Build hooks nodes.
12760 mn = self.cfg.GetMasterNode()
12761 return ([mn], [mn])
12763 def Exec(self, feedback_fn):
12764 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12766 assert instances == [self.op.instance_name], "Instance not locked"
12768 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12769 instances=instances, target_groups=list(self.target_uuids))
12771 ial.Run(self.op.iallocator)
12773 if not ial.success:
12774 raise errors.OpPrereqError("Can't compute solution for changing group of"
12775 " instance '%s' using iallocator '%s': %s" %
12776 (self.op.instance_name, self.op.iallocator,
12778 errors.ECODE_NORES)
12780 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12782 self.LogInfo("Iallocator returned %s job(s) for changing group of"
12783 " instance '%s'", len(jobs), self.op.instance_name)
12785 return ResultWithJobs(jobs)
12788 class LUBackupQuery(NoHooksLU):
12789 """Query the exports list
12794 def ExpandNames(self):
12795 self.needed_locks = {}
12796 self.share_locks[locking.LEVEL_NODE] = 1
12797 if not self.op.nodes:
12798 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12800 self.needed_locks[locking.LEVEL_NODE] = \
12801 _GetWantedNodes(self, self.op.nodes)
12803 def Exec(self, feedback_fn):
12804 """Compute the list of all the exported system images.
12807 @return: a dictionary with the structure node->(export-list)
12808 where export-list is a list of the instances exported on
12812 self.nodes = self.owned_locks(locking.LEVEL_NODE)
12813 rpcresult = self.rpc.call_export_list(self.nodes)
12815 for node in rpcresult:
12816 if rpcresult[node].fail_msg:
12817 result[node] = False
12819 result[node] = rpcresult[node].payload
12824 class LUBackupPrepare(NoHooksLU):
12825 """Prepares an instance for an export and returns useful information.
12830 def ExpandNames(self):
12831 self._ExpandAndLockInstance()
12833 def CheckPrereq(self):
12834 """Check prerequisites.
12837 instance_name = self.op.instance_name
12839 self.instance = self.cfg.GetInstanceInfo(instance_name)
12840 assert self.instance is not None, \
12841 "Cannot retrieve locked instance %s" % self.op.instance_name
12842 _CheckNodeOnline(self, self.instance.primary_node)
12844 self._cds = _GetClusterDomainSecret()
12846 def Exec(self, feedback_fn):
12847 """Prepares an instance for an export.
12850 instance = self.instance
12852 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12853 salt = utils.GenerateSecret(8)
12855 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
12856 result = self.rpc.call_x509_cert_create(instance.primary_node,
12857 constants.RIE_CERT_VALIDITY)
12858 result.Raise("Can't create X509 key and certificate on %s" % result.node)
12860 (name, cert_pem) = result.payload
12862 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
12866 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
12867 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
12869 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
12875 class LUBackupExport(LogicalUnit):
12876 """Export an instance to an image in the cluster.
12879 HPATH = "instance-export"
12880 HTYPE = constants.HTYPE_INSTANCE
12883 def CheckArguments(self):
12884 """Check the arguments.
12887 self.x509_key_name = self.op.x509_key_name
12888 self.dest_x509_ca_pem = self.op.destination_x509_ca
12890 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12891 if not self.x509_key_name:
12892 raise errors.OpPrereqError("Missing X509 key name for encryption",
12893 errors.ECODE_INVAL)
12895 if not self.dest_x509_ca_pem:
12896 raise errors.OpPrereqError("Missing destination X509 CA",
12897 errors.ECODE_INVAL)
12899 def ExpandNames(self):
12900 self._ExpandAndLockInstance()
12902 # Lock all nodes for local exports
12903 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12904 # FIXME: lock only instance primary and destination node
12906 # Sad but true, for now we have do lock all nodes, as we don't know where
12907 # the previous export might be, and in this LU we search for it and
12908 # remove it from its current node. In the future we could fix this by:
12909 # - making a tasklet to search (share-lock all), then create the
12910 # new one, then one to remove, after
12911 # - removing the removal operation altogether
12912 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12914 def DeclareLocks(self, level):
12915 """Last minute lock declaration."""
12916 # All nodes are locked anyway, so nothing to do here.
12918 def BuildHooksEnv(self):
12919 """Build hooks env.
12921 This will run on the master, primary node and target node.
12925 "EXPORT_MODE": self.op.mode,
12926 "EXPORT_NODE": self.op.target_node,
12927 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12928 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12929 # TODO: Generic function for boolean env variables
12930 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12933 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12937 def BuildHooksNodes(self):
12938 """Build hooks nodes.
12941 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12943 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12944 nl.append(self.op.target_node)
12948 def CheckPrereq(self):
12949 """Check prerequisites.
12951 This checks that the instance and node names are valid.
12954 instance_name = self.op.instance_name
12956 self.instance = self.cfg.GetInstanceInfo(instance_name)
12957 assert self.instance is not None, \
12958 "Cannot retrieve locked instance %s" % self.op.instance_name
12959 _CheckNodeOnline(self, self.instance.primary_node)
12961 if (self.op.remove_instance and
12962 self.instance.admin_state == constants.ADMINST_UP and
12963 not self.op.shutdown):
12964 raise errors.OpPrereqError("Can not remove instance without shutting it"
12967 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12968 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12969 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12970 assert self.dst_node is not None
12972 _CheckNodeOnline(self, self.dst_node.name)
12973 _CheckNodeNotDrained(self, self.dst_node.name)
12976 self.dest_disk_info = None
12977 self.dest_x509_ca = None
12979 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12980 self.dst_node = None
12982 if len(self.op.target_node) != len(self.instance.disks):
12983 raise errors.OpPrereqError(("Received destination information for %s"
12984 " disks, but instance %s has %s disks") %
12985 (len(self.op.target_node), instance_name,
12986 len(self.instance.disks)),
12987 errors.ECODE_INVAL)
12989 cds = _GetClusterDomainSecret()
12991 # Check X509 key name
12993 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12994 except (TypeError, ValueError), err:
12995 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12997 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12998 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12999 errors.ECODE_INVAL)
13001 # Load and verify CA
13003 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13004 except OpenSSL.crypto.Error, err:
13005 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13006 (err, ), errors.ECODE_INVAL)
13008 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13009 if errcode is not None:
13010 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13011 (msg, ), errors.ECODE_INVAL)
13013 self.dest_x509_ca = cert
13015 # Verify target information
13017 for idx, disk_data in enumerate(self.op.target_node):
13019 (host, port, magic) = \
13020 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13021 except errors.GenericError, err:
13022 raise errors.OpPrereqError("Target info for disk %s: %s" %
13023 (idx, err), errors.ECODE_INVAL)
13025 disk_info.append((host, port, magic))
13027 assert len(disk_info) == len(self.op.target_node)
13028 self.dest_disk_info = disk_info
13031 raise errors.ProgrammerError("Unhandled export mode %r" %
13034 # instance disk type verification
13035 # TODO: Implement export support for file-based disks
13036 for disk in self.instance.disks:
13037 if disk.dev_type == constants.LD_FILE:
13038 raise errors.OpPrereqError("Export not supported for instances with"
13039 " file-based disks", errors.ECODE_INVAL)
13041 def _CleanupExports(self, feedback_fn):
13042 """Removes exports of current instance from all other nodes.
13044 If an instance in a cluster with nodes A..D was exported to node C, its
13045 exports will be removed from the nodes A, B and D.
13048 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13050 nodelist = self.cfg.GetNodeList()
13051 nodelist.remove(self.dst_node.name)
13053 # on one-node clusters nodelist will be empty after the removal
13054 # if we proceed the backup would be removed because OpBackupQuery
13055 # substitutes an empty list with the full cluster node list.
13056 iname = self.instance.name
13058 feedback_fn("Removing old exports for instance %s" % iname)
13059 exportlist = self.rpc.call_export_list(nodelist)
13060 for node in exportlist:
13061 if exportlist[node].fail_msg:
13063 if iname in exportlist[node].payload:
13064 msg = self.rpc.call_export_remove(node, iname).fail_msg
13066 self.LogWarning("Could not remove older export for instance %s"
13067 " on node %s: %s", iname, node, msg)
13069 def Exec(self, feedback_fn):
13070 """Export an instance to an image in the cluster.
13073 assert self.op.mode in constants.EXPORT_MODES
13075 instance = self.instance
13076 src_node = instance.primary_node
13078 if self.op.shutdown:
13079 # shutdown the instance, but not the disks
13080 feedback_fn("Shutting down instance %s" % instance.name)
13081 result = self.rpc.call_instance_shutdown(src_node, instance,
13082 self.op.shutdown_timeout)
13083 # TODO: Maybe ignore failures if ignore_remove_failures is set
13084 result.Raise("Could not shutdown instance %s on"
13085 " node %s" % (instance.name, src_node))
13087 # set the disks ID correctly since call_instance_start needs the
13088 # correct drbd minor to create the symlinks
13089 for disk in instance.disks:
13090 self.cfg.SetDiskID(disk, src_node)
13092 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13095 # Activate the instance disks if we'exporting a stopped instance
13096 feedback_fn("Activating disks for %s" % instance.name)
13097 _StartInstanceDisks(self, instance, None)
13100 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13103 helper.CreateSnapshots()
13105 if (self.op.shutdown and
13106 instance.admin_state == constants.ADMINST_UP and
13107 not self.op.remove_instance):
13108 assert not activate_disks
13109 feedback_fn("Starting instance %s" % instance.name)
13110 result = self.rpc.call_instance_start(src_node,
13111 (instance, None, None), False)
13112 msg = result.fail_msg
13114 feedback_fn("Failed to start instance: %s" % msg)
13115 _ShutdownInstanceDisks(self, instance)
13116 raise errors.OpExecError("Could not start instance: %s" % msg)
13118 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13119 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13120 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13121 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13122 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13124 (key_name, _, _) = self.x509_key_name
13127 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13130 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13131 key_name, dest_ca_pem,
13136 # Check for backwards compatibility
13137 assert len(dresults) == len(instance.disks)
13138 assert compat.all(isinstance(i, bool) for i in dresults), \
13139 "Not all results are boolean: %r" % dresults
13143 feedback_fn("Deactivating disks for %s" % instance.name)
13144 _ShutdownInstanceDisks(self, instance)
13146 if not (compat.all(dresults) and fin_resu):
13149 failures.append("export finalization")
13150 if not compat.all(dresults):
13151 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13153 failures.append("disk export: disk(s) %s" % fdsk)
13155 raise errors.OpExecError("Export failed, errors in %s" %
13156 utils.CommaJoin(failures))
13158 # At this point, the export was successful, we can cleanup/finish
13160 # Remove instance if requested
13161 if self.op.remove_instance:
13162 feedback_fn("Removing instance %s" % instance.name)
13163 _RemoveInstance(self, feedback_fn, instance,
13164 self.op.ignore_remove_failures)
13166 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13167 self._CleanupExports(feedback_fn)
13169 return fin_resu, dresults
13172 class LUBackupRemove(NoHooksLU):
13173 """Remove exports related to the named instance.
13178 def ExpandNames(self):
13179 self.needed_locks = {}
13180 # We need all nodes to be locked in order for RemoveExport to work, but we
13181 # don't need to lock the instance itself, as nothing will happen to it (and
13182 # we can remove exports also for a removed instance)
13183 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13185 def Exec(self, feedback_fn):
13186 """Remove any export.
13189 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13190 # If the instance was not found we'll try with the name that was passed in.
13191 # This will only work if it was an FQDN, though.
13193 if not instance_name:
13195 instance_name = self.op.instance_name
13197 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13198 exportlist = self.rpc.call_export_list(locked_nodes)
13200 for node in exportlist:
13201 msg = exportlist[node].fail_msg
13203 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13205 if instance_name in exportlist[node].payload:
13207 result = self.rpc.call_export_remove(node, instance_name)
13208 msg = result.fail_msg
13210 logging.error("Could not remove export for instance %s"
13211 " on node %s: %s", instance_name, node, msg)
13213 if fqdn_warn and not found:
13214 feedback_fn("Export not found. If trying to remove an export belonging"
13215 " to a deleted instance please use its Fully Qualified"
13219 class LUGroupAdd(LogicalUnit):
13220 """Logical unit for creating node groups.
13223 HPATH = "group-add"
13224 HTYPE = constants.HTYPE_GROUP
13227 def ExpandNames(self):
13228 # We need the new group's UUID here so that we can create and acquire the
13229 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13230 # that it should not check whether the UUID exists in the configuration.
13231 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13232 self.needed_locks = {}
13233 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13235 def CheckPrereq(self):
13236 """Check prerequisites.
13238 This checks that the given group name is not an existing node group
13243 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13244 except errors.OpPrereqError:
13247 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13248 " node group (UUID: %s)" %
13249 (self.op.group_name, existing_uuid),
13250 errors.ECODE_EXISTS)
13252 if self.op.ndparams:
13253 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13255 if self.op.hv_state:
13256 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13258 self.new_hv_state = None
13260 if self.op.disk_state:
13261 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13263 self.new_disk_state = None
13265 if self.op.diskparams:
13266 for templ in constants.DISK_TEMPLATES:
13267 if templ not in self.op.diskparams:
13268 self.op.diskparams[templ] = {}
13269 utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
13271 self.op.diskparams = self.cfg.GetClusterInfo().diskparams
13273 if self.op.ipolicy:
13274 cluster = self.cfg.GetClusterInfo()
13275 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13277 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
13278 except errors.ConfigurationError, err:
13279 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13280 errors.ECODE_INVAL)
13282 def BuildHooksEnv(self):
13283 """Build hooks env.
13287 "GROUP_NAME": self.op.group_name,
13290 def BuildHooksNodes(self):
13291 """Build hooks nodes.
13294 mn = self.cfg.GetMasterNode()
13295 return ([mn], [mn])
13297 def Exec(self, feedback_fn):
13298 """Add the node group to the cluster.
13301 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13302 uuid=self.group_uuid,
13303 alloc_policy=self.op.alloc_policy,
13304 ndparams=self.op.ndparams,
13305 diskparams=self.op.diskparams,
13306 ipolicy=self.op.ipolicy,
13307 hv_state_static=self.new_hv_state,
13308 disk_state_static=self.new_disk_state)
13310 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13311 del self.remove_locks[locking.LEVEL_NODEGROUP]
13314 class LUGroupAssignNodes(NoHooksLU):
13315 """Logical unit for assigning nodes to groups.
13320 def ExpandNames(self):
13321 # These raise errors.OpPrereqError on their own:
13322 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13323 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13325 # We want to lock all the affected nodes and groups. We have readily
13326 # available the list of nodes, and the *destination* group. To gather the
13327 # list of "source" groups, we need to fetch node information later on.
13328 self.needed_locks = {
13329 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13330 locking.LEVEL_NODE: self.op.nodes,
13333 def DeclareLocks(self, level):
13334 if level == locking.LEVEL_NODEGROUP:
13335 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13337 # Try to get all affected nodes' groups without having the group or node
13338 # lock yet. Needs verification later in the code flow.
13339 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13341 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13343 def CheckPrereq(self):
13344 """Check prerequisites.
13347 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13348 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13349 frozenset(self.op.nodes))
13351 expected_locks = (set([self.group_uuid]) |
13352 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13353 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13354 if actual_locks != expected_locks:
13355 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13356 " current groups are '%s', used to be '%s'" %
13357 (utils.CommaJoin(expected_locks),
13358 utils.CommaJoin(actual_locks)))
13360 self.node_data = self.cfg.GetAllNodesInfo()
13361 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13362 instance_data = self.cfg.GetAllInstancesInfo()
13364 if self.group is None:
13365 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13366 (self.op.group_name, self.group_uuid))
13368 (new_splits, previous_splits) = \
13369 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13370 for node in self.op.nodes],
13371 self.node_data, instance_data)
13374 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13376 if not self.op.force:
13377 raise errors.OpExecError("The following instances get split by this"
13378 " change and --force was not given: %s" %
13381 self.LogWarning("This operation will split the following instances: %s",
13384 if previous_splits:
13385 self.LogWarning("In addition, these already-split instances continue"
13386 " to be split across groups: %s",
13387 utils.CommaJoin(utils.NiceSort(previous_splits)))
13389 def Exec(self, feedback_fn):
13390 """Assign nodes to a new group.
13393 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13395 self.cfg.AssignGroupNodes(mods)
13398 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13399 """Check for split instances after a node assignment.
13401 This method considers a series of node assignments as an atomic operation,
13402 and returns information about split instances after applying the set of
13405 In particular, it returns information about newly split instances, and
13406 instances that were already split, and remain so after the change.
13408 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13411 @type changes: list of (node_name, new_group_uuid) pairs.
13412 @param changes: list of node assignments to consider.
13413 @param node_data: a dict with data for all nodes
13414 @param instance_data: a dict with all instances to consider
13415 @rtype: a two-tuple
13416 @return: a list of instances that were previously okay and result split as a
13417 consequence of this change, and a list of instances that were previously
13418 split and this change does not fix.
13421 changed_nodes = dict((node, group) for node, group in changes
13422 if node_data[node].group != group)
13424 all_split_instances = set()
13425 previously_split_instances = set()
13427 def InstanceNodes(instance):
13428 return [instance.primary_node] + list(instance.secondary_nodes)
13430 for inst in instance_data.values():
13431 if inst.disk_template not in constants.DTS_INT_MIRROR:
13434 instance_nodes = InstanceNodes(inst)
13436 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13437 previously_split_instances.add(inst.name)
13439 if len(set(changed_nodes.get(node, node_data[node].group)
13440 for node in instance_nodes)) > 1:
13441 all_split_instances.add(inst.name)
13443 return (list(all_split_instances - previously_split_instances),
13444 list(previously_split_instances & all_split_instances))
13447 class _GroupQuery(_QueryBase):
13448 FIELDS = query.GROUP_FIELDS
13450 def ExpandNames(self, lu):
13451 lu.needed_locks = {}
13453 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13454 self._cluster = lu.cfg.GetClusterInfo()
13455 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13458 self.wanted = [name_to_uuid[name]
13459 for name in utils.NiceSort(name_to_uuid.keys())]
13461 # Accept names to be either names or UUIDs.
13464 all_uuid = frozenset(self._all_groups.keys())
13466 for name in self.names:
13467 if name in all_uuid:
13468 self.wanted.append(name)
13469 elif name in name_to_uuid:
13470 self.wanted.append(name_to_uuid[name])
13472 missing.append(name)
13475 raise errors.OpPrereqError("Some groups do not exist: %s" %
13476 utils.CommaJoin(missing),
13477 errors.ECODE_NOENT)
13479 def DeclareLocks(self, lu, level):
13482 def _GetQueryData(self, lu):
13483 """Computes the list of node groups and their attributes.
13486 do_nodes = query.GQ_NODE in self.requested_data
13487 do_instances = query.GQ_INST in self.requested_data
13489 group_to_nodes = None
13490 group_to_instances = None
13492 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13493 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13494 # latter GetAllInstancesInfo() is not enough, for we have to go through
13495 # instance->node. Hence, we will need to process nodes even if we only need
13496 # instance information.
13497 if do_nodes or do_instances:
13498 all_nodes = lu.cfg.GetAllNodesInfo()
13499 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13502 for node in all_nodes.values():
13503 if node.group in group_to_nodes:
13504 group_to_nodes[node.group].append(node.name)
13505 node_to_group[node.name] = node.group
13508 all_instances = lu.cfg.GetAllInstancesInfo()
13509 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13511 for instance in all_instances.values():
13512 node = instance.primary_node
13513 if node in node_to_group:
13514 group_to_instances[node_to_group[node]].append(instance.name)
13517 # Do not pass on node information if it was not requested.
13518 group_to_nodes = None
13520 return query.GroupQueryData(self._cluster,
13521 [self._all_groups[uuid]
13522 for uuid in self.wanted],
13523 group_to_nodes, group_to_instances)
13526 class LUGroupQuery(NoHooksLU):
13527 """Logical unit for querying node groups.
13532 def CheckArguments(self):
13533 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13534 self.op.output_fields, False)
13536 def ExpandNames(self):
13537 self.gq.ExpandNames(self)
13539 def DeclareLocks(self, level):
13540 self.gq.DeclareLocks(self, level)
13542 def Exec(self, feedback_fn):
13543 return self.gq.OldStyleQuery(self)
13546 class LUGroupSetParams(LogicalUnit):
13547 """Modifies the parameters of a node group.
13550 HPATH = "group-modify"
13551 HTYPE = constants.HTYPE_GROUP
13554 def CheckArguments(self):
13557 self.op.diskparams,
13558 self.op.alloc_policy,
13560 self.op.disk_state,
13564 if all_changes.count(None) == len(all_changes):
13565 raise errors.OpPrereqError("Please pass at least one modification",
13566 errors.ECODE_INVAL)
13568 def ExpandNames(self):
13569 # This raises errors.OpPrereqError on its own:
13570 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13572 self.needed_locks = {
13573 locking.LEVEL_INSTANCE: [],
13574 locking.LEVEL_NODEGROUP: [self.group_uuid],
13577 self.share_locks[locking.LEVEL_INSTANCE] = 1
13579 def DeclareLocks(self, level):
13580 if level == locking.LEVEL_INSTANCE:
13581 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13583 # Lock instances optimistically, needs verification once group lock has
13585 self.needed_locks[locking.LEVEL_INSTANCE] = \
13586 self.cfg.GetNodeGroupInstances(self.group_uuid)
13588 def CheckPrereq(self):
13589 """Check prerequisites.
13592 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13594 # Check if locked instances are still correct
13595 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13597 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13598 cluster = self.cfg.GetClusterInfo()
13600 if self.group is None:
13601 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13602 (self.op.group_name, self.group_uuid))
13604 if self.op.ndparams:
13605 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13606 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13607 self.new_ndparams = new_ndparams
13609 if self.op.diskparams:
13610 self.new_diskparams = dict()
13611 for templ in constants.DISK_TEMPLATES:
13612 if templ not in self.op.diskparams:
13613 self.op.diskparams[templ] = {}
13614 new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13615 self.op.diskparams[templ])
13616 utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13617 self.new_diskparams[templ] = new_templ_params
13619 if self.op.hv_state:
13620 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13621 self.group.hv_state_static)
13623 if self.op.disk_state:
13624 self.new_disk_state = \
13625 _MergeAndVerifyDiskState(self.op.disk_state,
13626 self.group.disk_state_static)
13628 if self.op.ipolicy:
13629 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
13633 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
13634 inst_filter = lambda inst: inst.name in owned_instances
13635 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
13637 _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
13639 new_ipolicy, instances)
13642 self.LogWarning("After the ipolicy change the following instances"
13643 " violate them: %s",
13644 utils.CommaJoin(violations))
13646 def BuildHooksEnv(self):
13647 """Build hooks env.
13651 "GROUP_NAME": self.op.group_name,
13652 "NEW_ALLOC_POLICY": self.op.alloc_policy,
13655 def BuildHooksNodes(self):
13656 """Build hooks nodes.
13659 mn = self.cfg.GetMasterNode()
13660 return ([mn], [mn])
13662 def Exec(self, feedback_fn):
13663 """Modifies the node group.
13668 if self.op.ndparams:
13669 self.group.ndparams = self.new_ndparams
13670 result.append(("ndparams", str(self.group.ndparams)))
13672 if self.op.diskparams:
13673 self.group.diskparams = self.new_diskparams
13674 result.append(("diskparams", str(self.group.diskparams)))
13676 if self.op.alloc_policy:
13677 self.group.alloc_policy = self.op.alloc_policy
13679 if self.op.hv_state:
13680 self.group.hv_state_static = self.new_hv_state
13682 if self.op.disk_state:
13683 self.group.disk_state_static = self.new_disk_state
13685 if self.op.ipolicy:
13686 self.group.ipolicy = self.new_ipolicy
13688 self.cfg.Update(self.group, feedback_fn)
13692 class LUGroupRemove(LogicalUnit):
13693 HPATH = "group-remove"
13694 HTYPE = constants.HTYPE_GROUP
13697 def ExpandNames(self):
13698 # This will raises errors.OpPrereqError on its own:
13699 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13700 self.needed_locks = {
13701 locking.LEVEL_NODEGROUP: [self.group_uuid],
13704 def CheckPrereq(self):
13705 """Check prerequisites.
13707 This checks that the given group name exists as a node group, that is
13708 empty (i.e., contains no nodes), and that is not the last group of the
13712 # Verify that the group is empty.
13713 group_nodes = [node.name
13714 for node in self.cfg.GetAllNodesInfo().values()
13715 if node.group == self.group_uuid]
13718 raise errors.OpPrereqError("Group '%s' not empty, has the following"
13720 (self.op.group_name,
13721 utils.CommaJoin(utils.NiceSort(group_nodes))),
13722 errors.ECODE_STATE)
13724 # Verify the cluster would not be left group-less.
13725 if len(self.cfg.GetNodeGroupList()) == 1:
13726 raise errors.OpPrereqError("Group '%s' is the only group,"
13727 " cannot be removed" %
13728 self.op.group_name,
13729 errors.ECODE_STATE)
13731 def BuildHooksEnv(self):
13732 """Build hooks env.
13736 "GROUP_NAME": self.op.group_name,
13739 def BuildHooksNodes(self):
13740 """Build hooks nodes.
13743 mn = self.cfg.GetMasterNode()
13744 return ([mn], [mn])
13746 def Exec(self, feedback_fn):
13747 """Remove the node group.
13751 self.cfg.RemoveNodeGroup(self.group_uuid)
13752 except errors.ConfigurationError:
13753 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13754 (self.op.group_name, self.group_uuid))
13756 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13759 class LUGroupRename(LogicalUnit):
13760 HPATH = "group-rename"
13761 HTYPE = constants.HTYPE_GROUP
13764 def ExpandNames(self):
13765 # This raises errors.OpPrereqError on its own:
13766 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13768 self.needed_locks = {
13769 locking.LEVEL_NODEGROUP: [self.group_uuid],
13772 def CheckPrereq(self):
13773 """Check prerequisites.
13775 Ensures requested new name is not yet used.
13779 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13780 except errors.OpPrereqError:
13783 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13784 " node group (UUID: %s)" %
13785 (self.op.new_name, new_name_uuid),
13786 errors.ECODE_EXISTS)
13788 def BuildHooksEnv(self):
13789 """Build hooks env.
13793 "OLD_NAME": self.op.group_name,
13794 "NEW_NAME": self.op.new_name,
13797 def BuildHooksNodes(self):
13798 """Build hooks nodes.
13801 mn = self.cfg.GetMasterNode()
13803 all_nodes = self.cfg.GetAllNodesInfo()
13804 all_nodes.pop(mn, None)
13807 run_nodes.extend(node.name for node in all_nodes.values()
13808 if node.group == self.group_uuid)
13810 return (run_nodes, run_nodes)
13812 def Exec(self, feedback_fn):
13813 """Rename the node group.
13816 group = self.cfg.GetNodeGroup(self.group_uuid)
13819 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13820 (self.op.group_name, self.group_uuid))
13822 group.name = self.op.new_name
13823 self.cfg.Update(group, feedback_fn)
13825 return self.op.new_name
13828 class LUGroupEvacuate(LogicalUnit):
13829 HPATH = "group-evacuate"
13830 HTYPE = constants.HTYPE_GROUP
13833 def ExpandNames(self):
13834 # This raises errors.OpPrereqError on its own:
13835 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13837 if self.op.target_groups:
13838 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13839 self.op.target_groups)
13841 self.req_target_uuids = []
13843 if self.group_uuid in self.req_target_uuids:
13844 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
13845 " as a target group (targets are %s)" %
13847 utils.CommaJoin(self.req_target_uuids)),
13848 errors.ECODE_INVAL)
13850 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13852 self.share_locks = _ShareAll()
13853 self.needed_locks = {
13854 locking.LEVEL_INSTANCE: [],
13855 locking.LEVEL_NODEGROUP: [],
13856 locking.LEVEL_NODE: [],
13859 def DeclareLocks(self, level):
13860 if level == locking.LEVEL_INSTANCE:
13861 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13863 # Lock instances optimistically, needs verification once node and group
13864 # locks have been acquired
13865 self.needed_locks[locking.LEVEL_INSTANCE] = \
13866 self.cfg.GetNodeGroupInstances(self.group_uuid)
13868 elif level == locking.LEVEL_NODEGROUP:
13869 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13871 if self.req_target_uuids:
13872 lock_groups = set([self.group_uuid] + self.req_target_uuids)
13874 # Lock all groups used by instances optimistically; this requires going
13875 # via the node before it's locked, requiring verification later on
13876 lock_groups.update(group_uuid
13877 for instance_name in
13878 self.owned_locks(locking.LEVEL_INSTANCE)
13880 self.cfg.GetInstanceNodeGroups(instance_name))
13882 # No target groups, need to lock all of them
13883 lock_groups = locking.ALL_SET
13885 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13887 elif level == locking.LEVEL_NODE:
13888 # This will only lock the nodes in the group to be evacuated which
13889 # contain actual instances
13890 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13891 self._LockInstancesNodes()
13893 # Lock all nodes in group to be evacuated and target groups
13894 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13895 assert self.group_uuid in owned_groups
13896 member_nodes = [node_name
13897 for group in owned_groups
13898 for node_name in self.cfg.GetNodeGroup(group).members]
13899 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13901 def CheckPrereq(self):
13902 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13903 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13904 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13906 assert owned_groups.issuperset(self.req_target_uuids)
13907 assert self.group_uuid in owned_groups
13909 # Check if locked instances are still correct
13910 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13912 # Get instance information
13913 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
13915 # Check if node groups for locked instances are still correct
13916 for instance_name in owned_instances:
13917 inst = self.instances[instance_name]
13918 assert owned_nodes.issuperset(inst.all_nodes), \
13919 "Instance %s's nodes changed while we kept the lock" % instance_name
13921 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
13924 assert self.group_uuid in inst_groups, \
13925 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
13927 if self.req_target_uuids:
13928 # User requested specific target groups
13929 self.target_uuids = self.req_target_uuids
13931 # All groups except the one to be evacuated are potential targets
13932 self.target_uuids = [group_uuid for group_uuid in owned_groups
13933 if group_uuid != self.group_uuid]
13935 if not self.target_uuids:
13936 raise errors.OpPrereqError("There are no possible target groups",
13937 errors.ECODE_INVAL)
13939 def BuildHooksEnv(self):
13940 """Build hooks env.
13944 "GROUP_NAME": self.op.group_name,
13945 "TARGET_GROUPS": " ".join(self.target_uuids),
13948 def BuildHooksNodes(self):
13949 """Build hooks nodes.
13952 mn = self.cfg.GetMasterNode()
13954 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
13956 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
13958 return (run_nodes, run_nodes)
13960 def Exec(self, feedback_fn):
13961 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13963 assert self.group_uuid not in self.target_uuids
13965 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13966 instances=instances, target_groups=self.target_uuids)
13968 ial.Run(self.op.iallocator)
13970 if not ial.success:
13971 raise errors.OpPrereqError("Can't compute group evacuation using"
13972 " iallocator '%s': %s" %
13973 (self.op.iallocator, ial.info),
13974 errors.ECODE_NORES)
13976 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13978 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
13979 len(jobs), self.op.group_name)
13981 return ResultWithJobs(jobs)
13984 class TagsLU(NoHooksLU): # pylint: disable=W0223
13985 """Generic tags LU.
13987 This is an abstract class which is the parent of all the other tags LUs.
13990 def ExpandNames(self):
13991 self.group_uuid = None
13992 self.needed_locks = {}
13993 if self.op.kind == constants.TAG_NODE:
13994 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
13995 self.needed_locks[locking.LEVEL_NODE] = self.op.name
13996 elif self.op.kind == constants.TAG_INSTANCE:
13997 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
13998 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
13999 elif self.op.kind == constants.TAG_NODEGROUP:
14000 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14002 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14003 # not possible to acquire the BGL based on opcode parameters)
14005 def CheckPrereq(self):
14006 """Check prerequisites.
14009 if self.op.kind == constants.TAG_CLUSTER:
14010 self.target = self.cfg.GetClusterInfo()
14011 elif self.op.kind == constants.TAG_NODE:
14012 self.target = self.cfg.GetNodeInfo(self.op.name)
14013 elif self.op.kind == constants.TAG_INSTANCE:
14014 self.target = self.cfg.GetInstanceInfo(self.op.name)
14015 elif self.op.kind == constants.TAG_NODEGROUP:
14016 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14018 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14019 str(self.op.kind), errors.ECODE_INVAL)
14022 class LUTagsGet(TagsLU):
14023 """Returns the tags of a given object.
14028 def ExpandNames(self):
14029 TagsLU.ExpandNames(self)
14031 # Share locks as this is only a read operation
14032 self.share_locks = _ShareAll()
14034 def Exec(self, feedback_fn):
14035 """Returns the tag list.
14038 return list(self.target.GetTags())
14041 class LUTagsSearch(NoHooksLU):
14042 """Searches the tags for a given pattern.
14047 def ExpandNames(self):
14048 self.needed_locks = {}
14050 def CheckPrereq(self):
14051 """Check prerequisites.
14053 This checks the pattern passed for validity by compiling it.
14057 self.re = re.compile(self.op.pattern)
14058 except re.error, err:
14059 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14060 (self.op.pattern, err), errors.ECODE_INVAL)
14062 def Exec(self, feedback_fn):
14063 """Returns the tag list.
14067 tgts = [("/cluster", cfg.GetClusterInfo())]
14068 ilist = cfg.GetAllInstancesInfo().values()
14069 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14070 nlist = cfg.GetAllNodesInfo().values()
14071 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14072 tgts.extend(("/nodegroup/%s" % n.name, n)
14073 for n in cfg.GetAllNodeGroupsInfo().values())
14075 for path, target in tgts:
14076 for tag in target.GetTags():
14077 if self.re.search(tag):
14078 results.append((path, tag))
14082 class LUTagsSet(TagsLU):
14083 """Sets a tag on a given object.
14088 def CheckPrereq(self):
14089 """Check prerequisites.
14091 This checks the type and length of the tag name and value.
14094 TagsLU.CheckPrereq(self)
14095 for tag in self.op.tags:
14096 objects.TaggableObject.ValidateTag(tag)
14098 def Exec(self, feedback_fn):
14103 for tag in self.op.tags:
14104 self.target.AddTag(tag)
14105 except errors.TagError, err:
14106 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14107 self.cfg.Update(self.target, feedback_fn)
14110 class LUTagsDel(TagsLU):
14111 """Delete a list of tags from a given object.
14116 def CheckPrereq(self):
14117 """Check prerequisites.
14119 This checks that we have the given tag.
14122 TagsLU.CheckPrereq(self)
14123 for tag in self.op.tags:
14124 objects.TaggableObject.ValidateTag(tag)
14125 del_tags = frozenset(self.op.tags)
14126 cur_tags = self.target.GetTags()
14128 diff_tags = del_tags - cur_tags
14130 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14131 raise errors.OpPrereqError("Tag(s) %s not found" %
14132 (utils.CommaJoin(diff_names), ),
14133 errors.ECODE_NOENT)
14135 def Exec(self, feedback_fn):
14136 """Remove the tag from the object.
14139 for tag in self.op.tags:
14140 self.target.RemoveTag(tag)
14141 self.cfg.Update(self.target, feedback_fn)
14144 class LUTestDelay(NoHooksLU):
14145 """Sleep for a specified amount of time.
14147 This LU sleeps on the master and/or nodes for a specified amount of
14153 def ExpandNames(self):
14154 """Expand names and set required locks.
14156 This expands the node list, if any.
14159 self.needed_locks = {}
14160 if self.op.on_nodes:
14161 # _GetWantedNodes can be used here, but is not always appropriate to use
14162 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14163 # more information.
14164 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14165 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14167 def _TestDelay(self):
14168 """Do the actual sleep.
14171 if self.op.on_master:
14172 if not utils.TestDelay(self.op.duration):
14173 raise errors.OpExecError("Error during master delay test")
14174 if self.op.on_nodes:
14175 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14176 for node, node_result in result.items():
14177 node_result.Raise("Failure during rpc call to node %s" % node)
14179 def Exec(self, feedback_fn):
14180 """Execute the test delay opcode, with the wanted repetitions.
14183 if self.op.repeat == 0:
14186 top_value = self.op.repeat - 1
14187 for i in range(self.op.repeat):
14188 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14192 class LUTestJqueue(NoHooksLU):
14193 """Utility LU to test some aspects of the job queue.
14198 # Must be lower than default timeout for WaitForJobChange to see whether it
14199 # notices changed jobs
14200 _CLIENT_CONNECT_TIMEOUT = 20.0
14201 _CLIENT_CONFIRM_TIMEOUT = 60.0
14204 def _NotifyUsingSocket(cls, cb, errcls):
14205 """Opens a Unix socket and waits for another program to connect.
14208 @param cb: Callback to send socket name to client
14209 @type errcls: class
14210 @param errcls: Exception class to use for errors
14213 # Using a temporary directory as there's no easy way to create temporary
14214 # sockets without writing a custom loop around tempfile.mktemp and
14216 tmpdir = tempfile.mkdtemp()
14218 tmpsock = utils.PathJoin(tmpdir, "sock")
14220 logging.debug("Creating temporary socket at %s", tmpsock)
14221 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14226 # Send details to client
14229 # Wait for client to connect before continuing
14230 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14232 (conn, _) = sock.accept()
14233 except socket.error, err:
14234 raise errcls("Client didn't connect in time (%s)" % err)
14238 # Remove as soon as client is connected
14239 shutil.rmtree(tmpdir)
14241 # Wait for client to close
14244 # pylint: disable=E1101
14245 # Instance of '_socketobject' has no ... member
14246 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14248 except socket.error, err:
14249 raise errcls("Client failed to confirm notification (%s)" % err)
14253 def _SendNotification(self, test, arg, sockname):
14254 """Sends a notification to the client.
14257 @param test: Test name
14258 @param arg: Test argument (depends on test)
14259 @type sockname: string
14260 @param sockname: Socket path
14263 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14265 def _Notify(self, prereq, test, arg):
14266 """Notifies the client of a test.
14269 @param prereq: Whether this is a prereq-phase test
14271 @param test: Test name
14272 @param arg: Test argument (depends on test)
14276 errcls = errors.OpPrereqError
14278 errcls = errors.OpExecError
14280 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14284 def CheckArguments(self):
14285 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14286 self.expandnames_calls = 0
14288 def ExpandNames(self):
14289 checkargs_calls = getattr(self, "checkargs_calls", 0)
14290 if checkargs_calls < 1:
14291 raise errors.ProgrammerError("CheckArguments was not called")
14293 self.expandnames_calls += 1
14295 if self.op.notify_waitlock:
14296 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14298 self.LogInfo("Expanding names")
14300 # Get lock on master node (just to get a lock, not for a particular reason)
14301 self.needed_locks = {
14302 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14305 def Exec(self, feedback_fn):
14306 if self.expandnames_calls < 1:
14307 raise errors.ProgrammerError("ExpandNames was not called")
14309 if self.op.notify_exec:
14310 self._Notify(False, constants.JQT_EXEC, None)
14312 self.LogInfo("Executing")
14314 if self.op.log_messages:
14315 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14316 for idx, msg in enumerate(self.op.log_messages):
14317 self.LogInfo("Sending log message %s", idx + 1)
14318 feedback_fn(constants.JQT_MSGPREFIX + msg)
14319 # Report how many test messages have been sent
14320 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14323 raise errors.OpExecError("Opcode failure was requested")
14328 class IAllocator(object):
14329 """IAllocator framework.
14331 An IAllocator instance has three sets of attributes:
14332 - cfg that is needed to query the cluster
14333 - input data (all members of the _KEYS class attribute are required)
14334 - four buffer attributes (in|out_data|text), that represent the
14335 input (to the external script) in text and data structure format,
14336 and the output from it, again in two formats
14337 - the result variables from the script (success, info, nodes) for
14341 # pylint: disable=R0902
14342 # lots of instance attributes
14344 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14346 self.rpc = rpc_runner
14347 # init buffer variables
14348 self.in_text = self.out_text = self.in_data = self.out_data = None
14349 # init all input fields so that pylint is happy
14351 self.memory = self.disks = self.disk_template = None
14352 self.os = self.tags = self.nics = self.vcpus = None
14353 self.hypervisor = None
14354 self.relocate_from = None
14356 self.instances = None
14357 self.evac_mode = None
14358 self.target_groups = []
14360 self.required_nodes = None
14361 # init result fields
14362 self.success = self.info = self.result = None
14365 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14367 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14368 " IAllocator" % self.mode)
14370 keyset = [n for (n, _) in keydata]
14373 if key not in keyset:
14374 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14375 " IAllocator" % key)
14376 setattr(self, key, kwargs[key])
14379 if key not in kwargs:
14380 raise errors.ProgrammerError("Missing input parameter '%s' to"
14381 " IAllocator" % key)
14382 self._BuildInputData(compat.partial(fn, self), keydata)
14384 def _ComputeClusterData(self):
14385 """Compute the generic allocator input data.
14387 This is the data that is independent of the actual operation.
14391 cluster_info = cfg.GetClusterInfo()
14394 "version": constants.IALLOCATOR_VERSION,
14395 "cluster_name": cfg.GetClusterName(),
14396 "cluster_tags": list(cluster_info.GetTags()),
14397 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14398 "ipolicy": cluster_info.ipolicy,
14400 ninfo = cfg.GetAllNodesInfo()
14401 iinfo = cfg.GetAllInstancesInfo().values()
14402 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14405 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14407 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14408 hypervisor_name = self.hypervisor
14409 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14410 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14412 hypervisor_name = cluster_info.primary_hypervisor
14414 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14417 self.rpc.call_all_instances_info(node_list,
14418 cluster_info.enabled_hypervisors)
14420 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14422 config_ndata = self._ComputeBasicNodeData(ninfo)
14423 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14424 i_list, config_ndata)
14425 assert len(data["nodes"]) == len(ninfo), \
14426 "Incomplete node data computed"
14428 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14430 self.in_data = data
14433 def _ComputeNodeGroupData(cfg):
14434 """Compute node groups data.
14437 cluster = cfg.GetClusterInfo()
14438 ng = dict((guuid, {
14439 "name": gdata.name,
14440 "alloc_policy": gdata.alloc_policy,
14441 "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14443 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14448 def _ComputeBasicNodeData(node_cfg):
14449 """Compute global node data.
14452 @returns: a dict of name: (node dict, node config)
14455 # fill in static (config-based) values
14456 node_results = dict((ninfo.name, {
14457 "tags": list(ninfo.GetTags()),
14458 "primary_ip": ninfo.primary_ip,
14459 "secondary_ip": ninfo.secondary_ip,
14460 "offline": ninfo.offline,
14461 "drained": ninfo.drained,
14462 "master_candidate": ninfo.master_candidate,
14463 "group": ninfo.group,
14464 "master_capable": ninfo.master_capable,
14465 "vm_capable": ninfo.vm_capable,
14467 for ninfo in node_cfg.values())
14469 return node_results
14472 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14474 """Compute global node data.
14476 @param node_results: the basic node structures as filled from the config
14479 #TODO(dynmem): compute the right data on MAX and MIN memory
14480 # make a copy of the current dict
14481 node_results = dict(node_results)
14482 for nname, nresult in node_data.items():
14483 assert nname in node_results, "Missing basic data for node %s" % nname
14484 ninfo = node_cfg[nname]
14486 if not (ninfo.offline or ninfo.drained):
14487 nresult.Raise("Can't get data for node %s" % nname)
14488 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14490 remote_info = _MakeLegacyNodeInfo(nresult.payload)
14492 for attr in ["memory_total", "memory_free", "memory_dom0",
14493 "vg_size", "vg_free", "cpu_total"]:
14494 if attr not in remote_info:
14495 raise errors.OpExecError("Node '%s' didn't return attribute"
14496 " '%s'" % (nname, attr))
14497 if not isinstance(remote_info[attr], int):
14498 raise errors.OpExecError("Node '%s' returned invalid value"
14500 (nname, attr, remote_info[attr]))
14501 # compute memory used by primary instances
14502 i_p_mem = i_p_up_mem = 0
14503 for iinfo, beinfo in i_list:
14504 if iinfo.primary_node == nname:
14505 i_p_mem += beinfo[constants.BE_MAXMEM]
14506 if iinfo.name not in node_iinfo[nname].payload:
14509 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14510 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14511 remote_info["memory_free"] -= max(0, i_mem_diff)
14513 if iinfo.admin_state == constants.ADMINST_UP:
14514 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14516 # compute memory used by instances
14518 "total_memory": remote_info["memory_total"],
14519 "reserved_memory": remote_info["memory_dom0"],
14520 "free_memory": remote_info["memory_free"],
14521 "total_disk": remote_info["vg_size"],
14522 "free_disk": remote_info["vg_free"],
14523 "total_cpus": remote_info["cpu_total"],
14524 "i_pri_memory": i_p_mem,
14525 "i_pri_up_memory": i_p_up_mem,
14527 pnr_dyn.update(node_results[nname])
14528 node_results[nname] = pnr_dyn
14530 return node_results
14533 def _ComputeInstanceData(cluster_info, i_list):
14534 """Compute global instance data.
14538 for iinfo, beinfo in i_list:
14540 for nic in iinfo.nics:
14541 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14545 "mode": filled_params[constants.NIC_MODE],
14546 "link": filled_params[constants.NIC_LINK],
14548 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14549 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14550 nic_data.append(nic_dict)
14552 "tags": list(iinfo.GetTags()),
14553 "admin_state": iinfo.admin_state,
14554 "vcpus": beinfo[constants.BE_VCPUS],
14555 "memory": beinfo[constants.BE_MAXMEM],
14557 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14559 "disks": [{constants.IDISK_SIZE: dsk.size,
14560 constants.IDISK_MODE: dsk.mode}
14561 for dsk in iinfo.disks],
14562 "disk_template": iinfo.disk_template,
14563 "hypervisor": iinfo.hypervisor,
14565 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14567 instance_data[iinfo.name] = pir
14569 return instance_data
14571 def _AddNewInstance(self):
14572 """Add new instance data to allocator structure.
14574 This in combination with _AllocatorGetClusterData will create the
14575 correct structure needed as input for the allocator.
14577 The checks for the completeness of the opcode must have already been
14581 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14583 if self.disk_template in constants.DTS_INT_MIRROR:
14584 self.required_nodes = 2
14586 self.required_nodes = 1
14590 "disk_template": self.disk_template,
14593 "vcpus": self.vcpus,
14594 "memory": self.memory,
14595 "disks": self.disks,
14596 "disk_space_total": disk_space,
14598 "required_nodes": self.required_nodes,
14599 "hypervisor": self.hypervisor,
14604 def _AddRelocateInstance(self):
14605 """Add relocate instance data to allocator structure.
14607 This in combination with _IAllocatorGetClusterData will create the
14608 correct structure needed as input for the allocator.
14610 The checks for the completeness of the opcode must have already been
14614 instance = self.cfg.GetInstanceInfo(self.name)
14615 if instance is None:
14616 raise errors.ProgrammerError("Unknown instance '%s' passed to"
14617 " IAllocator" % self.name)
14619 if instance.disk_template not in constants.DTS_MIRRORED:
14620 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14621 errors.ECODE_INVAL)
14623 if instance.disk_template in constants.DTS_INT_MIRROR and \
14624 len(instance.secondary_nodes) != 1:
14625 raise errors.OpPrereqError("Instance has not exactly one secondary node",
14626 errors.ECODE_STATE)
14628 self.required_nodes = 1
14629 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14630 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14634 "disk_space_total": disk_space,
14635 "required_nodes": self.required_nodes,
14636 "relocate_from": self.relocate_from,
14640 def _AddNodeEvacuate(self):
14641 """Get data for node-evacuate requests.
14645 "instances": self.instances,
14646 "evac_mode": self.evac_mode,
14649 def _AddChangeGroup(self):
14650 """Get data for node-evacuate requests.
14654 "instances": self.instances,
14655 "target_groups": self.target_groups,
14658 def _BuildInputData(self, fn, keydata):
14659 """Build input data structures.
14662 self._ComputeClusterData()
14665 request["type"] = self.mode
14666 for keyname, keytype in keydata:
14667 if keyname not in request:
14668 raise errors.ProgrammerError("Request parameter %s is missing" %
14670 val = request[keyname]
14671 if not keytype(val):
14672 raise errors.ProgrammerError("Request parameter %s doesn't pass"
14673 " validation, value %s, expected"
14674 " type %s" % (keyname, val, keytype))
14675 self.in_data["request"] = request
14677 self.in_text = serializer.Dump(self.in_data)
14679 _STRING_LIST = ht.TListOf(ht.TString)
14680 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14681 # pylint: disable=E1101
14682 # Class '...' has no 'OP_ID' member
14683 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14684 opcodes.OpInstanceMigrate.OP_ID,
14685 opcodes.OpInstanceReplaceDisks.OP_ID])
14689 ht.TListOf(ht.TAnd(ht.TIsLength(3),
14690 ht.TItems([ht.TNonEmptyString,
14691 ht.TNonEmptyString,
14692 ht.TListOf(ht.TNonEmptyString),
14695 ht.TListOf(ht.TAnd(ht.TIsLength(2),
14696 ht.TItems([ht.TNonEmptyString,
14699 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14700 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14703 constants.IALLOCATOR_MODE_ALLOC:
14706 ("name", ht.TString),
14707 ("memory", ht.TInt),
14708 ("disks", ht.TListOf(ht.TDict)),
14709 ("disk_template", ht.TString),
14710 ("os", ht.TString),
14711 ("tags", _STRING_LIST),
14712 ("nics", ht.TListOf(ht.TDict)),
14713 ("vcpus", ht.TInt),
14714 ("hypervisor", ht.TString),
14716 constants.IALLOCATOR_MODE_RELOC:
14717 (_AddRelocateInstance,
14718 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14720 constants.IALLOCATOR_MODE_NODE_EVAC:
14721 (_AddNodeEvacuate, [
14722 ("instances", _STRING_LIST),
14723 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14725 constants.IALLOCATOR_MODE_CHG_GROUP:
14726 (_AddChangeGroup, [
14727 ("instances", _STRING_LIST),
14728 ("target_groups", _STRING_LIST),
14732 def Run(self, name, validate=True, call_fn=None):
14733 """Run an instance allocator and return the results.
14736 if call_fn is None:
14737 call_fn = self.rpc.call_iallocator_runner
14739 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14740 result.Raise("Failure while running the iallocator script")
14742 self.out_text = result.payload
14744 self._ValidateResult()
14746 def _ValidateResult(self):
14747 """Process the allocator results.
14749 This will process and if successful save the result in
14750 self.out_data and the other parameters.
14754 rdict = serializer.Load(self.out_text)
14755 except Exception, err:
14756 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14758 if not isinstance(rdict, dict):
14759 raise errors.OpExecError("Can't parse iallocator results: not a dict")
14761 # TODO: remove backwards compatiblity in later versions
14762 if "nodes" in rdict and "result" not in rdict:
14763 rdict["result"] = rdict["nodes"]
14766 for key in "success", "info", "result":
14767 if key not in rdict:
14768 raise errors.OpExecError("Can't parse iallocator results:"
14769 " missing key '%s'" % key)
14770 setattr(self, key, rdict[key])
14772 if not self._result_check(self.result):
14773 raise errors.OpExecError("Iallocator returned invalid result,"
14774 " expected %s, got %s" %
14775 (self._result_check, self.result),
14776 errors.ECODE_INVAL)
14778 if self.mode == constants.IALLOCATOR_MODE_RELOC:
14779 assert self.relocate_from is not None
14780 assert self.required_nodes == 1
14782 node2group = dict((name, ndata["group"])
14783 for (name, ndata) in self.in_data["nodes"].items())
14785 fn = compat.partial(self._NodesToGroups, node2group,
14786 self.in_data["nodegroups"])
14788 instance = self.cfg.GetInstanceInfo(self.name)
14789 request_groups = fn(self.relocate_from + [instance.primary_node])
14790 result_groups = fn(rdict["result"] + [instance.primary_node])
14792 if self.success and not set(result_groups).issubset(request_groups):
14793 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14794 " differ from original groups (%s)" %
14795 (utils.CommaJoin(result_groups),
14796 utils.CommaJoin(request_groups)))
14798 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14799 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14801 self.out_data = rdict
14804 def _NodesToGroups(node2group, groups, nodes):
14805 """Returns a list of unique group names for a list of nodes.
14807 @type node2group: dict
14808 @param node2group: Map from node name to group UUID
14810 @param groups: Group information
14812 @param nodes: Node names
14819 group_uuid = node2group[node]
14821 # Ignore unknown node
14825 group = groups[group_uuid]
14827 # Can't find group, let's use UUID
14828 group_name = group_uuid
14830 group_name = group["name"]
14832 result.add(group_name)
14834 return sorted(result)
14837 class LUTestAllocator(NoHooksLU):
14838 """Run allocator tests.
14840 This LU runs the allocator tests
14843 def CheckPrereq(self):
14844 """Check prerequisites.
14846 This checks the opcode parameters depending on the director and mode test.
14849 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14850 for attr in ["memory", "disks", "disk_template",
14851 "os", "tags", "nics", "vcpus"]:
14852 if not hasattr(self.op, attr):
14853 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14854 attr, errors.ECODE_INVAL)
14855 iname = self.cfg.ExpandInstanceName(self.op.name)
14856 if iname is not None:
14857 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14858 iname, errors.ECODE_EXISTS)
14859 if not isinstance(self.op.nics, list):
14860 raise errors.OpPrereqError("Invalid parameter 'nics'",
14861 errors.ECODE_INVAL)
14862 if not isinstance(self.op.disks, list):
14863 raise errors.OpPrereqError("Invalid parameter 'disks'",
14864 errors.ECODE_INVAL)
14865 for row in self.op.disks:
14866 if (not isinstance(row, dict) or
14867 constants.IDISK_SIZE not in row or
14868 not isinstance(row[constants.IDISK_SIZE], int) or
14869 constants.IDISK_MODE not in row or
14870 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
14871 raise errors.OpPrereqError("Invalid contents of the 'disks'"
14872 " parameter", errors.ECODE_INVAL)
14873 if self.op.hypervisor is None:
14874 self.op.hypervisor = self.cfg.GetHypervisorType()
14875 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14876 fname = _ExpandInstanceName(self.cfg, self.op.name)
14877 self.op.name = fname
14878 self.relocate_from = \
14879 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
14880 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
14881 constants.IALLOCATOR_MODE_NODE_EVAC):
14882 if not self.op.instances:
14883 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
14884 self.op.instances = _GetWantedInstances(self, self.op.instances)
14886 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
14887 self.op.mode, errors.ECODE_INVAL)
14889 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
14890 if self.op.allocator is None:
14891 raise errors.OpPrereqError("Missing allocator name",
14892 errors.ECODE_INVAL)
14893 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
14894 raise errors.OpPrereqError("Wrong allocator test '%s'" %
14895 self.op.direction, errors.ECODE_INVAL)
14897 def Exec(self, feedback_fn):
14898 """Run the allocator test.
14901 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14902 ial = IAllocator(self.cfg, self.rpc,
14905 memory=self.op.memory,
14906 disks=self.op.disks,
14907 disk_template=self.op.disk_template,
14911 vcpus=self.op.vcpus,
14912 hypervisor=self.op.hypervisor,
14914 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14915 ial = IAllocator(self.cfg, self.rpc,
14918 relocate_from=list(self.relocate_from),
14920 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14921 ial = IAllocator(self.cfg, self.rpc,
14923 instances=self.op.instances,
14924 target_groups=self.op.target_groups)
14925 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14926 ial = IAllocator(self.cfg, self.rpc,
14928 instances=self.op.instances,
14929 evac_mode=self.op.evac_mode)
14931 raise errors.ProgrammerError("Uncatched mode %s in"
14932 " LUTestAllocator.Exec", self.op.mode)
14934 if self.op.direction == constants.IALLOCATOR_DIR_IN:
14935 result = ial.in_text
14937 ial.Run(self.op.allocator, validate=False)
14938 result = ial.out_text
14942 #: Query type implementations
14944 constants.QR_INSTANCE: _InstanceQuery,
14945 constants.QR_NODE: _NodeQuery,
14946 constants.QR_GROUP: _GroupQuery,
14947 constants.QR_OS: _OsQuery,
14950 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14953 def _GetQueryImplementation(name):
14954 """Returns the implemtnation for a query type.
14956 @param name: Query type, must be one of L{constants.QR_VIA_OP}
14960 return _QUERY_IMPL[name]
14962 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14963 errors.ECODE_INVAL)