4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
62 from ganeti import rpc
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
141 # Dicts used to declare locking needs to mcpu
142 self.needed_locks = None
143 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.remove_locks = {}
146 # Used to force good behavior when calling helper functions
147 self.recalculate_locks = {}
149 self.Log = processor.Log # pylint: disable=C0103
150 self.LogWarning = processor.LogWarning # pylint: disable=C0103
151 self.LogInfo = processor.LogInfo # pylint: disable=C0103
152 self.LogStep = processor.LogStep # pylint: disable=C0103
153 # support for dry-run
154 self.dry_run_result = None
155 # support for generic debug attribute
156 if (not hasattr(self.op, "debug_level") or
157 not isinstance(self.op.debug_level, int)):
158 self.op.debug_level = 0
163 # Validate opcode parameters and set defaults
164 self.op.Validate(True)
166 self.CheckArguments()
168 def CheckArguments(self):
169 """Check syntactic validity for the opcode arguments.
171 This method is for doing a simple syntactic check and ensure
172 validity of opcode parameters, without any cluster-related
173 checks. While the same can be accomplished in ExpandNames and/or
174 CheckPrereq, doing these separate is better because:
176 - ExpandNames is left as as purely a lock-related function
177 - CheckPrereq is run after we have acquired locks (and possible
180 The function is allowed to change the self.op attribute so that
181 later methods can no longer worry about missing parameters.
186 def ExpandNames(self):
187 """Expand names for this LU.
189 This method is called before starting to execute the opcode, and it should
190 update all the parameters of the opcode to their canonical form (e.g. a
191 short node name must be fully expanded after this method has successfully
192 completed). This way locking, hooks, logging, etc. can work correctly.
194 LUs which implement this method must also populate the self.needed_locks
195 member, as a dict with lock levels as keys, and a list of needed lock names
198 - use an empty dict if you don't need any lock
199 - if you don't need any lock at a particular level omit that level
200 - don't put anything for the BGL level
201 - if you want all locks at a level use locking.ALL_SET as a value
203 If you need to share locks (rather than acquire them exclusively) at one
204 level you can modify self.share_locks, setting a true value (usually 1) for
205 that level. By default locks are not shared.
207 This function can also define a list of tasklets, which then will be
208 executed in order instead of the usual LU-level CheckPrereq and Exec
209 functions, if those are not defined by the LU.
213 # Acquire all nodes and one instance
214 self.needed_locks = {
215 locking.LEVEL_NODE: locking.ALL_SET,
216 locking.LEVEL_INSTANCE: ['instance1.example.com'],
218 # Acquire just two nodes
219 self.needed_locks = {
220 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
223 self.needed_locks = {} # No, you can't leave it to the default value None
226 # The implementation of this method is mandatory only if the new LU is
227 # concurrent, so that old LUs don't need to be changed all at the same
230 self.needed_locks = {} # Exclusive LUs don't need locks.
232 raise NotImplementedError
234 def DeclareLocks(self, level):
235 """Declare LU locking needs for a level
237 While most LUs can just declare their locking needs at ExpandNames time,
238 sometimes there's the need to calculate some locks after having acquired
239 the ones before. This function is called just before acquiring locks at a
240 particular level, but after acquiring the ones at lower levels, and permits
241 such calculations. It can be used to modify self.needed_locks, and by
242 default it does nothing.
244 This function is only called if you have something already set in
245 self.needed_locks for the level.
247 @param level: Locking level which is going to be locked
248 @type level: member of ganeti.locking.LEVELS
252 def CheckPrereq(self):
253 """Check prerequisites for this LU.
255 This method should check that the prerequisites for the execution
256 of this LU are fulfilled. It can do internode communication, but
257 it should be idempotent - no cluster or system changes are
260 The method should raise errors.OpPrereqError in case something is
261 not fulfilled. Its return value is ignored.
263 This method should also update all the parameters of the opcode to
264 their canonical form if it hasn't been done by ExpandNames before.
267 if self.tasklets is not None:
268 for (idx, tl) in enumerate(self.tasklets):
269 logging.debug("Checking prerequisites for tasklet %s/%s",
270 idx + 1, len(self.tasklets))
275 def Exec(self, feedback_fn):
278 This method should implement the actual work. It should raise
279 errors.OpExecError for failures that are somewhat dealt with in
283 if self.tasklets is not None:
284 for (idx, tl) in enumerate(self.tasklets):
285 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
288 raise NotImplementedError
290 def BuildHooksEnv(self):
291 """Build hooks environment for this LU.
294 @return: Dictionary containing the environment that will be used for
295 running the hooks for this LU. The keys of the dict must not be prefixed
296 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
297 will extend the environment with additional variables. If no environment
298 should be defined, an empty dictionary should be returned (not C{None}).
299 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
303 raise NotImplementedError
305 def BuildHooksNodes(self):
306 """Build list of nodes to run LU's hooks.
308 @rtype: tuple; (list, list)
309 @return: Tuple containing a list of node names on which the hook
310 should run before the execution and a list of node names on which the
311 hook should run after the execution. No nodes should be returned as an
312 empty list (and not None).
313 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
317 raise NotImplementedError
319 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
320 """Notify the LU about the results of its hooks.
322 This method is called every time a hooks phase is executed, and notifies
323 the Logical Unit about the hooks' result. The LU can then use it to alter
324 its result based on the hooks. By default the method does nothing and the
325 previous result is passed back unchanged but any LU can define it if it
326 wants to use the local cluster hook-scripts somehow.
328 @param phase: one of L{constants.HOOKS_PHASE_POST} or
329 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
330 @param hook_results: the results of the multi-node hooks rpc call
331 @param feedback_fn: function used send feedback back to the caller
332 @param lu_result: the previous Exec result this LU had, or None
334 @return: the new Exec result, based on the previous result
338 # API must be kept, thus we ignore the unused argument and could
339 # be a function warnings
340 # pylint: disable=W0613,R0201
343 def _ExpandAndLockInstance(self):
344 """Helper function to expand and lock an instance.
346 Many LUs that work on an instance take its name in self.op.instance_name
347 and need to expand it and then declare the expanded name for locking. This
348 function does it, and then updates self.op.instance_name to the expanded
349 name. It also initializes needed_locks as a dict, if this hasn't been done
353 if self.needed_locks is None:
354 self.needed_locks = {}
356 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
357 "_ExpandAndLockInstance called with instance-level locks set"
358 self.op.instance_name = _ExpandInstanceName(self.cfg,
359 self.op.instance_name)
360 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
362 def _LockInstancesNodes(self, primary_only=False,
363 level=locking.LEVEL_NODE):
364 """Helper function to declare instances' nodes for locking.
366 This function should be called after locking one or more instances to lock
367 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
368 with all primary or secondary nodes for instances already locked and
369 present in self.needed_locks[locking.LEVEL_INSTANCE].
371 It should be called from DeclareLocks, and for safety only works if
372 self.recalculate_locks[locking.LEVEL_NODE] is set.
374 In the future it may grow parameters to just lock some instance's nodes, or
375 to just lock primaries or secondary nodes, if needed.
377 If should be called in DeclareLocks in a way similar to::
379 if level == locking.LEVEL_NODE:
380 self._LockInstancesNodes()
382 @type primary_only: boolean
383 @param primary_only: only lock primary nodes of locked instances
384 @param level: Which lock level to use for locking nodes
387 assert level in self.recalculate_locks, \
388 "_LockInstancesNodes helper function called with no nodes to recalculate"
390 # TODO: check if we're really been called with the instance locks held
392 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
393 # future we might want to have different behaviors depending on the value
394 # of self.recalculate_locks[locking.LEVEL_NODE]
396 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
397 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
398 wanted_nodes.append(instance.primary_node)
400 wanted_nodes.extend(instance.secondary_nodes)
402 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
403 self.needed_locks[level] = wanted_nodes
404 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
405 self.needed_locks[level].extend(wanted_nodes)
407 raise errors.ProgrammerError("Unknown recalculation mode")
409 del self.recalculate_locks[level]
412 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
413 """Simple LU which runs no hooks.
415 This LU is intended as a parent for other LogicalUnits which will
416 run no hooks, in order to reduce duplicate code.
422 def BuildHooksEnv(self):
423 """Empty BuildHooksEnv for NoHooksLu.
425 This just raises an error.
428 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
430 def BuildHooksNodes(self):
431 """Empty BuildHooksNodes for NoHooksLU.
434 raise AssertionError("BuildHooksNodes called for NoHooksLU")
438 """Tasklet base class.
440 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
441 they can mix legacy code with tasklets. Locking needs to be done in the LU,
442 tasklets know nothing about locks.
444 Subclasses must follow these rules:
445 - Implement CheckPrereq
449 def __init__(self, lu):
456 def CheckPrereq(self):
457 """Check prerequisites for this tasklets.
459 This method should check whether the prerequisites for the execution of
460 this tasklet are fulfilled. It can do internode communication, but it
461 should be idempotent - no cluster or system changes are allowed.
463 The method should raise errors.OpPrereqError in case something is not
464 fulfilled. Its return value is ignored.
466 This method should also update all parameters to their canonical form if it
467 hasn't been done before.
472 def Exec(self, feedback_fn):
473 """Execute the tasklet.
475 This method should implement the actual work. It should raise
476 errors.OpExecError for failures that are somewhat dealt with in code, or
480 raise NotImplementedError
484 """Base for query utility classes.
487 #: Attribute holding field definitions
490 def __init__(self, qfilter, fields, use_locking):
491 """Initializes this class.
494 self.use_locking = use_locking
496 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
498 self.requested_data = self.query.RequestedData()
499 self.names = self.query.RequestedNames()
501 # Sort only if no names were requested
502 self.sort_by_name = not self.names
504 self.do_locking = None
507 def _GetNames(self, lu, all_names, lock_level):
508 """Helper function to determine names asked for in the query.
512 names = lu.owned_locks(lock_level)
516 if self.wanted == locking.ALL_SET:
517 assert not self.names
518 # caller didn't specify names, so ordering is not important
519 return utils.NiceSort(names)
521 # caller specified names and we must keep the same order
523 assert not self.do_locking or lu.glm.is_owned(lock_level)
525 missing = set(self.wanted).difference(names)
527 raise errors.OpExecError("Some items were removed before retrieving"
528 " their data: %s" % missing)
530 # Return expanded names
533 def ExpandNames(self, lu):
534 """Expand names for this query.
536 See L{LogicalUnit.ExpandNames}.
539 raise NotImplementedError()
541 def DeclareLocks(self, lu, level):
542 """Declare locks for this query.
544 See L{LogicalUnit.DeclareLocks}.
547 raise NotImplementedError()
549 def _GetQueryData(self, lu):
550 """Collects all data for this query.
552 @return: Query data object
555 raise NotImplementedError()
557 def NewStyleQuery(self, lu):
558 """Collect data and execute query.
561 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
562 sort_by_name=self.sort_by_name)
564 def OldStyleQuery(self, lu):
565 """Collect data and execute query.
568 return self.query.OldStyleQuery(self._GetQueryData(lu),
569 sort_by_name=self.sort_by_name)
573 """Returns a dict declaring all lock levels shared.
576 return dict.fromkeys(locking.LEVELS, 1)
579 def _MakeLegacyNodeInfo(data):
580 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
582 Converts the data into a single dictionary. This is fine for most use cases,
583 but some require information from more than one volume group or hypervisor.
586 (bootid, (vg_info, ), (hv_info, )) = data
588 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
593 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
594 """Checks if the owned node groups are still correct for an instance.
596 @type cfg: L{config.ConfigWriter}
597 @param cfg: The cluster configuration
598 @type instance_name: string
599 @param instance_name: Instance name
600 @type owned_groups: set or frozenset
601 @param owned_groups: List of currently owned node groups
604 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
606 if not owned_groups.issuperset(inst_groups):
607 raise errors.OpPrereqError("Instance %s's node groups changed since"
608 " locks were acquired, current groups are"
609 " are '%s', owning groups '%s'; retry the"
612 utils.CommaJoin(inst_groups),
613 utils.CommaJoin(owned_groups)),
619 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
620 """Checks if the instances in a node group are still correct.
622 @type cfg: L{config.ConfigWriter}
623 @param cfg: The cluster configuration
624 @type group_uuid: string
625 @param group_uuid: Node group UUID
626 @type owned_instances: set or frozenset
627 @param owned_instances: List of currently owned instances
630 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
631 if owned_instances != wanted_instances:
632 raise errors.OpPrereqError("Instances in node group '%s' changed since"
633 " locks were acquired, wanted '%s', have '%s';"
634 " retry the operation" %
636 utils.CommaJoin(wanted_instances),
637 utils.CommaJoin(owned_instances)),
640 return wanted_instances
643 def _SupportsOob(cfg, node):
644 """Tells if node supports OOB.
646 @type cfg: L{config.ConfigWriter}
647 @param cfg: The cluster configuration
648 @type node: L{objects.Node}
649 @param node: The node
650 @return: The OOB script if supported or an empty string otherwise
653 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
656 def _GetWantedNodes(lu, nodes):
657 """Returns list of checked and expanded node names.
659 @type lu: L{LogicalUnit}
660 @param lu: the logical unit on whose behalf we execute
662 @param nodes: list of node names or None for all nodes
664 @return: the list of nodes, sorted
665 @raise errors.ProgrammerError: if the nodes parameter is wrong type
669 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
671 return utils.NiceSort(lu.cfg.GetNodeList())
674 def _GetWantedInstances(lu, instances):
675 """Returns list of checked and expanded instance names.
677 @type lu: L{LogicalUnit}
678 @param lu: the logical unit on whose behalf we execute
679 @type instances: list
680 @param instances: list of instance names or None for all instances
682 @return: the list of instances, sorted
683 @raise errors.OpPrereqError: if the instances parameter is wrong type
684 @raise errors.OpPrereqError: if any of the passed instances is not found
688 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
690 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
694 def _GetUpdatedParams(old_params, update_dict,
695 use_default=True, use_none=False):
696 """Return the new version of a parameter dictionary.
698 @type old_params: dict
699 @param old_params: old parameters
700 @type update_dict: dict
701 @param update_dict: dict containing new parameter values, or
702 constants.VALUE_DEFAULT to reset the parameter to its default
704 @param use_default: boolean
705 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
706 values as 'to be deleted' values
707 @param use_none: boolean
708 @type use_none: whether to recognise C{None} values as 'to be
711 @return: the new parameter dictionary
714 params_copy = copy.deepcopy(old_params)
715 for key, val in update_dict.iteritems():
716 if ((use_default and val == constants.VALUE_DEFAULT) or
717 (use_none and val is None)):
723 params_copy[key] = val
727 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
728 """Return the new version of a instance policy.
730 @param group_policy: whether this policy applies to a group and thus
731 we should support removal of policy entries
734 use_none = use_default = group_policy
735 ipolicy = copy.deepcopy(old_ipolicy)
736 for key, value in new_ipolicy.items():
737 if key not in constants.IPOLICY_ALL_KEYS:
738 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
740 if key in constants.IPOLICY_ISPECS:
741 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
742 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
744 use_default=use_default)
746 if not value or value == [constants.VALUE_DEFAULT]:
750 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
751 " on the cluster'" % key,
754 if key in constants.IPOLICY_PARAMETERS:
755 # FIXME: we assume all such values are float
757 ipolicy[key] = float(value)
758 except (TypeError, ValueError), err:
759 raise errors.OpPrereqError("Invalid value for attribute"
760 " '%s': '%s', error: %s" %
761 (key, value, err), errors.ECODE_INVAL)
763 # FIXME: we assume all others are lists; this should be redone
765 ipolicy[key] = list(value)
767 objects.InstancePolicy.CheckParameterSyntax(ipolicy)
768 except errors.ConfigurationError, err:
769 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
774 def _UpdateAndVerifySubDict(base, updates, type_check):
775 """Updates and verifies a dict with sub dicts of the same type.
777 @param base: The dict with the old data
778 @param updates: The dict with the new data
779 @param type_check: Dict suitable to ForceDictType to verify correct types
780 @returns: A new dict with updated and verified values
784 new = _GetUpdatedParams(old, value)
785 utils.ForceDictType(new, type_check)
788 ret = copy.deepcopy(base)
789 ret.update(dict((key, fn(base.get(key, {}), value))
790 for key, value in updates.items()))
794 def _MergeAndVerifyHvState(op_input, obj_input):
795 """Combines the hv state from an opcode with the one of the object
797 @param op_input: The input dict from the opcode
798 @param obj_input: The input dict from the objects
799 @return: The verified and updated dict
803 invalid_hvs = set(op_input) - constants.HYPER_TYPES
805 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
806 " %s" % utils.CommaJoin(invalid_hvs),
808 if obj_input is None:
810 type_check = constants.HVSTS_PARAMETER_TYPES
811 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
816 def _MergeAndVerifyDiskState(op_input, obj_input):
817 """Combines the disk state from an opcode with the one of the object
819 @param op_input: The input dict from the opcode
820 @param obj_input: The input dict from the objects
821 @return: The verified and updated dict
824 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
826 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
827 utils.CommaJoin(invalid_dst),
829 type_check = constants.DSS_PARAMETER_TYPES
830 if obj_input is None:
832 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
834 for key, value in op_input.items())
839 def _ReleaseLocks(lu, level, names=None, keep=None):
840 """Releases locks owned by an LU.
842 @type lu: L{LogicalUnit}
843 @param level: Lock level
844 @type names: list or None
845 @param names: Names of locks to release
846 @type keep: list or None
847 @param keep: Names of locks to retain
850 assert not (keep is not None and names is not None), \
851 "Only one of the 'names' and the 'keep' parameters can be given"
853 if names is not None:
854 should_release = names.__contains__
856 should_release = lambda name: name not in keep
858 should_release = None
860 owned = lu.owned_locks(level)
862 # Not owning any lock at this level, do nothing
869 # Determine which locks to release
871 if should_release(name):
876 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
878 # Release just some locks
879 lu.glm.release(level, names=release)
881 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
884 lu.glm.release(level)
886 assert not lu.glm.is_owned(level), "No locks should be owned"
889 def _MapInstanceDisksToNodes(instances):
890 """Creates a map from (node, volume) to instance name.
892 @type instances: list of L{objects.Instance}
893 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
896 return dict(((node, vol), inst.name)
897 for inst in instances
898 for (node, vols) in inst.MapLVsByNode().items()
902 def _RunPostHook(lu, node_name):
903 """Runs the post-hook for an opcode on a single node.
906 hm = lu.proc.BuildHooksManager(lu)
908 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
910 # pylint: disable=W0702
911 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
914 def _CheckOutputFields(static, dynamic, selected):
915 """Checks whether all selected fields are valid.
917 @type static: L{utils.FieldSet}
918 @param static: static fields set
919 @type dynamic: L{utils.FieldSet}
920 @param dynamic: dynamic fields set
927 delta = f.NonMatching(selected)
929 raise errors.OpPrereqError("Unknown output fields selected: %s"
930 % ",".join(delta), errors.ECODE_INVAL)
933 def _CheckGlobalHvParams(params):
934 """Validates that given hypervisor params are not global ones.
936 This will ensure that instances don't get customised versions of
940 used_globals = constants.HVC_GLOBALS.intersection(params)
942 msg = ("The following hypervisor parameters are global and cannot"
943 " be customized at instance level, please modify them at"
944 " cluster level: %s" % utils.CommaJoin(used_globals))
945 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
948 def _CheckNodeOnline(lu, node, msg=None):
949 """Ensure that a given node is online.
951 @param lu: the LU on behalf of which we make the check
952 @param node: the node to check
953 @param msg: if passed, should be a message to replace the default one
954 @raise errors.OpPrereqError: if the node is offline
958 msg = "Can't use offline node"
959 if lu.cfg.GetNodeInfo(node).offline:
960 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
963 def _CheckNodeNotDrained(lu, node):
964 """Ensure that a given node is not drained.
966 @param lu: the LU on behalf of which we make the check
967 @param node: the node to check
968 @raise errors.OpPrereqError: if the node is drained
971 if lu.cfg.GetNodeInfo(node).drained:
972 raise errors.OpPrereqError("Can't use drained node %s" % node,
976 def _CheckNodeVmCapable(lu, node):
977 """Ensure that a given node is vm capable.
979 @param lu: the LU on behalf of which we make the check
980 @param node: the node to check
981 @raise errors.OpPrereqError: if the node is not vm capable
984 if not lu.cfg.GetNodeInfo(node).vm_capable:
985 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
989 def _CheckNodeHasOS(lu, node, os_name, force_variant):
990 """Ensure that a node supports a given OS.
992 @param lu: the LU on behalf of which we make the check
993 @param node: the node to check
994 @param os_name: the OS to query about
995 @param force_variant: whether to ignore variant errors
996 @raise errors.OpPrereqError: if the node is not supporting the OS
999 result = lu.rpc.call_os_get(node, os_name)
1000 result.Raise("OS '%s' not in supported OS list for node %s" %
1002 prereq=True, ecode=errors.ECODE_INVAL)
1003 if not force_variant:
1004 _CheckOSVariant(result.payload, os_name)
1007 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1008 """Ensure that a node has the given secondary ip.
1010 @type lu: L{LogicalUnit}
1011 @param lu: the LU on behalf of which we make the check
1013 @param node: the node to check
1014 @type secondary_ip: string
1015 @param secondary_ip: the ip to check
1016 @type prereq: boolean
1017 @param prereq: whether to throw a prerequisite or an execute error
1018 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1019 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1022 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1023 result.Raise("Failure checking secondary ip on node %s" % node,
1024 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1025 if not result.payload:
1026 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1027 " please fix and re-run this command" % secondary_ip)
1029 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1031 raise errors.OpExecError(msg)
1034 def _GetClusterDomainSecret():
1035 """Reads the cluster domain secret.
1038 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1042 def _CheckInstanceState(lu, instance, req_states, msg=None):
1043 """Ensure that an instance is in one of the required states.
1045 @param lu: the LU on behalf of which we make the check
1046 @param instance: the instance to check
1047 @param msg: if passed, should be a message to replace the default one
1048 @raise errors.OpPrereqError: if the instance is not in the required state
1052 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1053 if instance.admin_state not in req_states:
1054 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1055 (instance.name, instance.admin_state, msg),
1058 if constants.ADMINST_UP not in req_states:
1059 pnode = instance.primary_node
1060 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1061 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1062 prereq=True, ecode=errors.ECODE_ENVIRON)
1064 if instance.name in ins_l.payload:
1065 raise errors.OpPrereqError("Instance %s is running, %s" %
1066 (instance.name, msg), errors.ECODE_STATE)
1069 def _ComputeMinMaxSpec(name, ipolicy, value):
1070 """Computes if value is in the desired range.
1072 @param name: name of the parameter for which we perform the check
1073 @param ipolicy: dictionary containing min, max and std values
1074 @param value: actual value that we want to use
1075 @return: None or element not meeting the criteria
1079 if value in [None, constants.VALUE_AUTO]:
1081 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1082 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1083 if value > max_v or min_v > value:
1084 return ("%s value %s is not in range [%s, %s]" %
1085 (name, value, min_v, max_v))
1089 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1090 nic_count, disk_sizes,
1091 _compute_fn=_ComputeMinMaxSpec):
1092 """Verifies ipolicy against provided specs.
1095 @param ipolicy: The ipolicy
1097 @param mem_size: The memory size
1098 @type cpu_count: int
1099 @param cpu_count: Used cpu cores
1100 @type disk_count: int
1101 @param disk_count: Number of disks used
1102 @type nic_count: int
1103 @param nic_count: Number of nics used
1104 @type disk_sizes: list of ints
1105 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1106 @param _compute_fn: The compute function (unittest only)
1107 @return: A list of violations, or an empty list of no violations are found
1110 assert disk_count == len(disk_sizes)
1113 (constants.ISPEC_MEM_SIZE, mem_size),
1114 (constants.ISPEC_CPU_COUNT, cpu_count),
1115 (constants.ISPEC_DISK_COUNT, disk_count),
1116 (constants.ISPEC_NIC_COUNT, nic_count),
1117 ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
1120 (_compute_fn(name, ipolicy, value)
1121 for (name, value) in test_settings))
1124 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1125 _compute_fn=_ComputeIPolicySpecViolation):
1126 """Compute if instance meets the specs of ipolicy.
1129 @param ipolicy: The ipolicy to verify against
1130 @type instance: L{objects.Instance}
1131 @param instance: The instance to verify
1132 @param _compute_fn: The function to verify ipolicy (unittest only)
1133 @see: L{_ComputeIPolicySpecViolation}
1136 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1137 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1138 disk_count = len(instance.disks)
1139 disk_sizes = [disk.size for disk in instance.disks]
1140 nic_count = len(instance.nics)
1142 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1146 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1147 _compute_fn=_ComputeIPolicySpecViolation):
1148 """Compute if instance specs meets the specs of ipolicy.
1151 @param ipolicy: The ipolicy to verify against
1152 @param instance_spec: dict
1153 @param instance_spec: The instance spec to verify
1154 @param _compute_fn: The function to verify ipolicy (unittest only)
1155 @see: L{_ComputeIPolicySpecViolation}
1158 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1159 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1160 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1161 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1162 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1164 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1168 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1170 _compute_fn=_ComputeIPolicyInstanceViolation):
1171 """Compute if instance meets the specs of the new target group.
1173 @param ipolicy: The ipolicy to verify
1174 @param instance: The instance object to verify
1175 @param current_group: The current group of the instance
1176 @param target_group: The new group of the instance
1177 @param _compute_fn: The function to verify ipolicy (unittest only)
1178 @see: L{_ComputeIPolicySpecViolation}
1181 if current_group == target_group:
1184 return _compute_fn(ipolicy, instance)
1187 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1188 _compute_fn=_ComputeIPolicyNodeViolation):
1189 """Checks that the target node is correct in terms of instance policy.
1191 @param ipolicy: The ipolicy to verify
1192 @param instance: The instance object to verify
1193 @param node: The new node to relocate
1194 @param ignore: Ignore violations of the ipolicy
1195 @param _compute_fn: The function to verify ipolicy (unittest only)
1196 @see: L{_ComputeIPolicySpecViolation}
1199 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1200 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1203 msg = ("Instance does not meet target node group's (%s) instance"
1204 " policy: %s") % (node.group, utils.CommaJoin(res))
1208 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1211 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1212 """Computes a set of any instances that would violate the new ipolicy.
1214 @param old_ipolicy: The current (still in-place) ipolicy
1215 @param new_ipolicy: The new (to become) ipolicy
1216 @param instances: List of instances to verify
1217 @return: A list of instances which violates the new ipolicy but did not before
1220 return (_ComputeViolatingInstances(old_ipolicy, instances) -
1221 _ComputeViolatingInstances(new_ipolicy, instances))
1224 def _ExpandItemName(fn, name, kind):
1225 """Expand an item name.
1227 @param fn: the function to use for expansion
1228 @param name: requested item name
1229 @param kind: text description ('Node' or 'Instance')
1230 @return: the resolved (full) name
1231 @raise errors.OpPrereqError: if the item is not found
1234 full_name = fn(name)
1235 if full_name is None:
1236 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1241 def _ExpandNodeName(cfg, name):
1242 """Wrapper over L{_ExpandItemName} for nodes."""
1243 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1246 def _ExpandInstanceName(cfg, name):
1247 """Wrapper over L{_ExpandItemName} for instance."""
1248 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1251 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1252 minmem, maxmem, vcpus, nics, disk_template, disks,
1253 bep, hvp, hypervisor_name, tags):
1254 """Builds instance related env variables for hooks
1256 This builds the hook environment from individual variables.
1259 @param name: the name of the instance
1260 @type primary_node: string
1261 @param primary_node: the name of the instance's primary node
1262 @type secondary_nodes: list
1263 @param secondary_nodes: list of secondary nodes as strings
1264 @type os_type: string
1265 @param os_type: the name of the instance's OS
1266 @type status: string
1267 @param status: the desired status of the instance
1268 @type minmem: string
1269 @param minmem: the minimum memory size of the instance
1270 @type maxmem: string
1271 @param maxmem: the maximum memory size of the instance
1273 @param vcpus: the count of VCPUs the instance has
1275 @param nics: list of tuples (ip, mac, mode, link) representing
1276 the NICs the instance has
1277 @type disk_template: string
1278 @param disk_template: the disk template of the instance
1280 @param disks: the list of (size, mode) pairs
1282 @param bep: the backend parameters for the instance
1284 @param hvp: the hypervisor parameters for the instance
1285 @type hypervisor_name: string
1286 @param hypervisor_name: the hypervisor for the instance
1288 @param tags: list of instance tags as strings
1290 @return: the hook environment for this instance
1295 "INSTANCE_NAME": name,
1296 "INSTANCE_PRIMARY": primary_node,
1297 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1298 "INSTANCE_OS_TYPE": os_type,
1299 "INSTANCE_STATUS": status,
1300 "INSTANCE_MINMEM": minmem,
1301 "INSTANCE_MAXMEM": maxmem,
1302 # TODO(2.7) remove deprecated "memory" value
1303 "INSTANCE_MEMORY": maxmem,
1304 "INSTANCE_VCPUS": vcpus,
1305 "INSTANCE_DISK_TEMPLATE": disk_template,
1306 "INSTANCE_HYPERVISOR": hypervisor_name,
1309 nic_count = len(nics)
1310 for idx, (ip, mac, mode, link) in enumerate(nics):
1313 env["INSTANCE_NIC%d_IP" % idx] = ip
1314 env["INSTANCE_NIC%d_MAC" % idx] = mac
1315 env["INSTANCE_NIC%d_MODE" % idx] = mode
1316 env["INSTANCE_NIC%d_LINK" % idx] = link
1317 if mode == constants.NIC_MODE_BRIDGED:
1318 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1322 env["INSTANCE_NIC_COUNT"] = nic_count
1325 disk_count = len(disks)
1326 for idx, (size, mode) in enumerate(disks):
1327 env["INSTANCE_DISK%d_SIZE" % idx] = size
1328 env["INSTANCE_DISK%d_MODE" % idx] = mode
1332 env["INSTANCE_DISK_COUNT"] = disk_count
1337 env["INSTANCE_TAGS"] = " ".join(tags)
1339 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1340 for key, value in source.items():
1341 env["INSTANCE_%s_%s" % (kind, key)] = value
1346 def _NICListToTuple(lu, nics):
1347 """Build a list of nic information tuples.
1349 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1350 value in LUInstanceQueryData.
1352 @type lu: L{LogicalUnit}
1353 @param lu: the logical unit on whose behalf we execute
1354 @type nics: list of L{objects.NIC}
1355 @param nics: list of nics to convert to hooks tuples
1359 cluster = lu.cfg.GetClusterInfo()
1363 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1364 mode = filled_params[constants.NIC_MODE]
1365 link = filled_params[constants.NIC_LINK]
1366 hooks_nics.append((ip, mac, mode, link))
1370 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1371 """Builds instance related env variables for hooks from an object.
1373 @type lu: L{LogicalUnit}
1374 @param lu: the logical unit on whose behalf we execute
1375 @type instance: L{objects.Instance}
1376 @param instance: the instance for which we should build the
1378 @type override: dict
1379 @param override: dictionary with key/values that will override
1382 @return: the hook environment dictionary
1385 cluster = lu.cfg.GetClusterInfo()
1386 bep = cluster.FillBE(instance)
1387 hvp = cluster.FillHV(instance)
1389 "name": instance.name,
1390 "primary_node": instance.primary_node,
1391 "secondary_nodes": instance.secondary_nodes,
1392 "os_type": instance.os,
1393 "status": instance.admin_state,
1394 "maxmem": bep[constants.BE_MAXMEM],
1395 "minmem": bep[constants.BE_MINMEM],
1396 "vcpus": bep[constants.BE_VCPUS],
1397 "nics": _NICListToTuple(lu, instance.nics),
1398 "disk_template": instance.disk_template,
1399 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1402 "hypervisor_name": instance.hypervisor,
1403 "tags": instance.tags,
1406 args.update(override)
1407 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1410 def _AdjustCandidatePool(lu, exceptions):
1411 """Adjust the candidate pool after node operations.
1414 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1416 lu.LogInfo("Promoted nodes to master candidate role: %s",
1417 utils.CommaJoin(node.name for node in mod_list))
1418 for name in mod_list:
1419 lu.context.ReaddNode(name)
1420 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1422 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1426 def _DecideSelfPromotion(lu, exceptions=None):
1427 """Decide whether I should promote myself as a master candidate.
1430 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1431 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1432 # the new node will increase mc_max with one, so:
1433 mc_should = min(mc_should + 1, cp_size)
1434 return mc_now < mc_should
1437 def _CalculateGroupIPolicy(cluster, group):
1438 """Calculate instance policy for group.
1441 return cluster.SimpleFillIPolicy(group.ipolicy)
1444 def _ComputeViolatingInstances(ipolicy, instances):
1445 """Computes a set of instances who violates given ipolicy.
1447 @param ipolicy: The ipolicy to verify
1448 @type instances: object.Instance
1449 @param instances: List of instances to verify
1450 @return: A frozenset of instance names violating the ipolicy
1453 return frozenset([inst.name for inst in instances
1454 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1457 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1458 """Check that the brigdes needed by a list of nics exist.
1461 cluster = lu.cfg.GetClusterInfo()
1462 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1463 brlist = [params[constants.NIC_LINK] for params in paramslist
1464 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1466 result = lu.rpc.call_bridges_exist(target_node, brlist)
1467 result.Raise("Error checking bridges on destination node '%s'" %
1468 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1471 def _CheckInstanceBridgesExist(lu, instance, node=None):
1472 """Check that the brigdes needed by an instance exist.
1476 node = instance.primary_node
1477 _CheckNicsBridgesExist(lu, instance.nics, node)
1480 def _CheckOSVariant(os_obj, name):
1481 """Check whether an OS name conforms to the os variants specification.
1483 @type os_obj: L{objects.OS}
1484 @param os_obj: OS object to check
1486 @param name: OS name passed by the user, to check for validity
1489 variant = objects.OS.GetVariant(name)
1490 if not os_obj.supported_variants:
1492 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1493 " passed)" % (os_obj.name, variant),
1497 raise errors.OpPrereqError("OS name must include a variant",
1500 if variant not in os_obj.supported_variants:
1501 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1504 def _GetNodeInstancesInner(cfg, fn):
1505 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1508 def _GetNodeInstances(cfg, node_name):
1509 """Returns a list of all primary and secondary instances on a node.
1513 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1516 def _GetNodePrimaryInstances(cfg, node_name):
1517 """Returns primary instances on a node.
1520 return _GetNodeInstancesInner(cfg,
1521 lambda inst: node_name == inst.primary_node)
1524 def _GetNodeSecondaryInstances(cfg, node_name):
1525 """Returns secondary instances on a node.
1528 return _GetNodeInstancesInner(cfg,
1529 lambda inst: node_name in inst.secondary_nodes)
1532 def _GetStorageTypeArgs(cfg, storage_type):
1533 """Returns the arguments for a storage type.
1536 # Special case for file storage
1537 if storage_type == constants.ST_FILE:
1538 # storage.FileStorage wants a list of storage directories
1539 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1544 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1547 for dev in instance.disks:
1548 cfg.SetDiskID(dev, node_name)
1550 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1551 result.Raise("Failed to get disk status from node %s" % node_name,
1552 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1554 for idx, bdev_status in enumerate(result.payload):
1555 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1561 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1562 """Check the sanity of iallocator and node arguments and use the
1563 cluster-wide iallocator if appropriate.
1565 Check that at most one of (iallocator, node) is specified. If none is
1566 specified, then the LU's opcode's iallocator slot is filled with the
1567 cluster-wide default iallocator.
1569 @type iallocator_slot: string
1570 @param iallocator_slot: the name of the opcode iallocator slot
1571 @type node_slot: string
1572 @param node_slot: the name of the opcode target node slot
1575 node = getattr(lu.op, node_slot, None)
1576 iallocator = getattr(lu.op, iallocator_slot, None)
1578 if node is not None and iallocator is not None:
1579 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1581 elif node is None and iallocator is None:
1582 default_iallocator = lu.cfg.GetDefaultIAllocator()
1583 if default_iallocator:
1584 setattr(lu.op, iallocator_slot, default_iallocator)
1586 raise errors.OpPrereqError("No iallocator or node given and no"
1587 " cluster-wide default iallocator found;"
1588 " please specify either an iallocator or a"
1589 " node, or set a cluster-wide default"
1593 def _GetDefaultIAllocator(cfg, iallocator):
1594 """Decides on which iallocator to use.
1596 @type cfg: L{config.ConfigWriter}
1597 @param cfg: Cluster configuration object
1598 @type iallocator: string or None
1599 @param iallocator: Iallocator specified in opcode
1601 @return: Iallocator name
1605 # Use default iallocator
1606 iallocator = cfg.GetDefaultIAllocator()
1609 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1610 " opcode nor as a cluster-wide default",
1616 class LUClusterPostInit(LogicalUnit):
1617 """Logical unit for running hooks after cluster initialization.
1620 HPATH = "cluster-init"
1621 HTYPE = constants.HTYPE_CLUSTER
1623 def BuildHooksEnv(self):
1628 "OP_TARGET": self.cfg.GetClusterName(),
1631 def BuildHooksNodes(self):
1632 """Build hooks nodes.
1635 return ([], [self.cfg.GetMasterNode()])
1637 def Exec(self, feedback_fn):
1644 class LUClusterDestroy(LogicalUnit):
1645 """Logical unit for destroying the cluster.
1648 HPATH = "cluster-destroy"
1649 HTYPE = constants.HTYPE_CLUSTER
1651 def BuildHooksEnv(self):
1656 "OP_TARGET": self.cfg.GetClusterName(),
1659 def BuildHooksNodes(self):
1660 """Build hooks nodes.
1665 def CheckPrereq(self):
1666 """Check prerequisites.
1668 This checks whether the cluster is empty.
1670 Any errors are signaled by raising errors.OpPrereqError.
1673 master = self.cfg.GetMasterNode()
1675 nodelist = self.cfg.GetNodeList()
1676 if len(nodelist) != 1 or nodelist[0] != master:
1677 raise errors.OpPrereqError("There are still %d node(s) in"
1678 " this cluster." % (len(nodelist) - 1),
1680 instancelist = self.cfg.GetInstanceList()
1682 raise errors.OpPrereqError("There are still %d instance(s) in"
1683 " this cluster." % len(instancelist),
1686 def Exec(self, feedback_fn):
1687 """Destroys the cluster.
1690 master_params = self.cfg.GetMasterNetworkParameters()
1692 # Run post hooks on master node before it's removed
1693 _RunPostHook(self, master_params.name)
1695 ems = self.cfg.GetUseExternalMipScript()
1696 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1699 self.LogWarning("Error disabling the master IP address: %s",
1702 return master_params.name
1705 def _VerifyCertificate(filename):
1706 """Verifies a certificate for L{LUClusterVerifyConfig}.
1708 @type filename: string
1709 @param filename: Path to PEM file
1713 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1714 utils.ReadFile(filename))
1715 except Exception, err: # pylint: disable=W0703
1716 return (LUClusterVerifyConfig.ETYPE_ERROR,
1717 "Failed to load X509 certificate %s: %s" % (filename, err))
1720 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1721 constants.SSL_CERT_EXPIRATION_ERROR)
1724 fnamemsg = "While verifying %s: %s" % (filename, msg)
1729 return (None, fnamemsg)
1730 elif errcode == utils.CERT_WARNING:
1731 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1732 elif errcode == utils.CERT_ERROR:
1733 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1735 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1738 def _GetAllHypervisorParameters(cluster, instances):
1739 """Compute the set of all hypervisor parameters.
1741 @type cluster: L{objects.Cluster}
1742 @param cluster: the cluster object
1743 @param instances: list of L{objects.Instance}
1744 @param instances: additional instances from which to obtain parameters
1745 @rtype: list of (origin, hypervisor, parameters)
1746 @return: a list with all parameters found, indicating the hypervisor they
1747 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1752 for hv_name in cluster.enabled_hypervisors:
1753 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1755 for os_name, os_hvp in cluster.os_hvp.items():
1756 for hv_name, hv_params in os_hvp.items():
1758 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1759 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1761 # TODO: collapse identical parameter values in a single one
1762 for instance in instances:
1763 if instance.hvparams:
1764 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1765 cluster.FillHV(instance)))
1770 class _VerifyErrors(object):
1771 """Mix-in for cluster/group verify LUs.
1773 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1774 self.op and self._feedback_fn to be available.)
1778 ETYPE_FIELD = "code"
1779 ETYPE_ERROR = "ERROR"
1780 ETYPE_WARNING = "WARNING"
1782 def _Error(self, ecode, item, msg, *args, **kwargs):
1783 """Format an error message.
1785 Based on the opcode's error_codes parameter, either format a
1786 parseable error code, or a simpler error string.
1788 This must be called only from Exec and functions called from Exec.
1791 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1792 itype, etxt, _ = ecode
1793 # first complete the msg
1796 # then format the whole message
1797 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1798 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1804 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1805 # and finally report it via the feedback_fn
1806 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1808 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1809 """Log an error message if the passed condition is True.
1813 or self.op.debug_simulate_errors) # pylint: disable=E1101
1815 # If the error code is in the list of ignored errors, demote the error to a
1817 (_, etxt, _) = ecode
1818 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1819 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1822 self._Error(ecode, *args, **kwargs)
1824 # do not mark the operation as failed for WARN cases only
1825 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1826 self.bad = self.bad or cond
1829 class LUClusterVerify(NoHooksLU):
1830 """Submits all jobs necessary to verify the cluster.
1835 def ExpandNames(self):
1836 self.needed_locks = {}
1838 def Exec(self, feedback_fn):
1841 if self.op.group_name:
1842 groups = [self.op.group_name]
1843 depends_fn = lambda: None
1845 groups = self.cfg.GetNodeGroupList()
1847 # Verify global configuration
1849 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1852 # Always depend on global verification
1853 depends_fn = lambda: [(-len(jobs), [])]
1855 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1856 ignore_errors=self.op.ignore_errors,
1857 depends=depends_fn())]
1858 for group in groups)
1860 # Fix up all parameters
1861 for op in itertools.chain(*jobs): # pylint: disable=W0142
1862 op.debug_simulate_errors = self.op.debug_simulate_errors
1863 op.verbose = self.op.verbose
1864 op.error_codes = self.op.error_codes
1866 op.skip_checks = self.op.skip_checks
1867 except AttributeError:
1868 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1870 return ResultWithJobs(jobs)
1873 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1874 """Verifies the cluster config.
1879 def _VerifyHVP(self, hvp_data):
1880 """Verifies locally the syntax of the hypervisor parameters.
1883 for item, hv_name, hv_params in hvp_data:
1884 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1887 hv_class = hypervisor.GetHypervisor(hv_name)
1888 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1889 hv_class.CheckParameterSyntax(hv_params)
1890 except errors.GenericError, err:
1891 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1893 def ExpandNames(self):
1894 # Information can be safely retrieved as the BGL is acquired in exclusive
1896 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1897 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1898 self.all_node_info = self.cfg.GetAllNodesInfo()
1899 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1900 self.needed_locks = {}
1902 def Exec(self, feedback_fn):
1903 """Verify integrity of cluster, performing various test on nodes.
1907 self._feedback_fn = feedback_fn
1909 feedback_fn("* Verifying cluster config")
1911 for msg in self.cfg.VerifyConfig():
1912 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1914 feedback_fn("* Verifying cluster certificate files")
1916 for cert_filename in constants.ALL_CERT_FILES:
1917 (errcode, msg) = _VerifyCertificate(cert_filename)
1918 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1920 feedback_fn("* Verifying hypervisor parameters")
1922 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1923 self.all_inst_info.values()))
1925 feedback_fn("* Verifying all nodes belong to an existing group")
1927 # We do this verification here because, should this bogus circumstance
1928 # occur, it would never be caught by VerifyGroup, which only acts on
1929 # nodes/instances reachable from existing node groups.
1931 dangling_nodes = set(node.name for node in self.all_node_info.values()
1932 if node.group not in self.all_group_info)
1934 dangling_instances = {}
1935 no_node_instances = []
1937 for inst in self.all_inst_info.values():
1938 if inst.primary_node in dangling_nodes:
1939 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1940 elif inst.primary_node not in self.all_node_info:
1941 no_node_instances.append(inst.name)
1946 utils.CommaJoin(dangling_instances.get(node.name,
1948 for node in dangling_nodes]
1950 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1952 "the following nodes (and their instances) belong to a non"
1953 " existing group: %s", utils.CommaJoin(pretty_dangling))
1955 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1957 "the following instances have a non-existing primary-node:"
1958 " %s", utils.CommaJoin(no_node_instances))
1963 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1964 """Verifies the status of a node group.
1967 HPATH = "cluster-verify"
1968 HTYPE = constants.HTYPE_CLUSTER
1971 _HOOKS_INDENT_RE = re.compile("^", re.M)
1973 class NodeImage(object):
1974 """A class representing the logical and physical status of a node.
1977 @ivar name: the node name to which this object refers
1978 @ivar volumes: a structure as returned from
1979 L{ganeti.backend.GetVolumeList} (runtime)
1980 @ivar instances: a list of running instances (runtime)
1981 @ivar pinst: list of configured primary instances (config)
1982 @ivar sinst: list of configured secondary instances (config)
1983 @ivar sbp: dictionary of {primary-node: list of instances} for all
1984 instances for which this node is secondary (config)
1985 @ivar mfree: free memory, as reported by hypervisor (runtime)
1986 @ivar dfree: free disk, as reported by the node (runtime)
1987 @ivar offline: the offline status (config)
1988 @type rpc_fail: boolean
1989 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1990 not whether the individual keys were correct) (runtime)
1991 @type lvm_fail: boolean
1992 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1993 @type hyp_fail: boolean
1994 @ivar hyp_fail: whether the RPC call didn't return the instance list
1995 @type ghost: boolean
1996 @ivar ghost: whether this is a known node or not (config)
1997 @type os_fail: boolean
1998 @ivar os_fail: whether the RPC call didn't return valid OS data
2000 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2001 @type vm_capable: boolean
2002 @ivar vm_capable: whether the node can host instances
2005 def __init__(self, offline=False, name=None, vm_capable=True):
2014 self.offline = offline
2015 self.vm_capable = vm_capable
2016 self.rpc_fail = False
2017 self.lvm_fail = False
2018 self.hyp_fail = False
2020 self.os_fail = False
2023 def ExpandNames(self):
2024 # This raises errors.OpPrereqError on its own:
2025 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2027 # Get instances in node group; this is unsafe and needs verification later
2028 inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
2030 self.needed_locks = {
2031 locking.LEVEL_INSTANCE: inst_names,
2032 locking.LEVEL_NODEGROUP: [self.group_uuid],
2033 locking.LEVEL_NODE: [],
2036 self.share_locks = _ShareAll()
2038 def DeclareLocks(self, level):
2039 if level == locking.LEVEL_NODE:
2040 # Get members of node group; this is unsafe and needs verification later
2041 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2043 all_inst_info = self.cfg.GetAllInstancesInfo()
2045 # In Exec(), we warn about mirrored instances that have primary and
2046 # secondary living in separate node groups. To fully verify that
2047 # volumes for these instances are healthy, we will need to do an
2048 # extra call to their secondaries. We ensure here those nodes will
2050 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2051 # Important: access only the instances whose lock is owned
2052 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2053 nodes.update(all_inst_info[inst].secondary_nodes)
2055 self.needed_locks[locking.LEVEL_NODE] = nodes
2057 def CheckPrereq(self):
2058 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2059 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2061 group_nodes = set(self.group_info.members)
2062 group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
2065 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2067 unlocked_instances = \
2068 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2071 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2072 utils.CommaJoin(unlocked_nodes))
2074 if unlocked_instances:
2075 raise errors.OpPrereqError("Missing lock for instances: %s" %
2076 utils.CommaJoin(unlocked_instances))
2078 self.all_node_info = self.cfg.GetAllNodesInfo()
2079 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2081 self.my_node_names = utils.NiceSort(group_nodes)
2082 self.my_inst_names = utils.NiceSort(group_instances)
2084 self.my_node_info = dict((name, self.all_node_info[name])
2085 for name in self.my_node_names)
2087 self.my_inst_info = dict((name, self.all_inst_info[name])
2088 for name in self.my_inst_names)
2090 # We detect here the nodes that will need the extra RPC calls for verifying
2091 # split LV volumes; they should be locked.
2092 extra_lv_nodes = set()
2094 for inst in self.my_inst_info.values():
2095 if inst.disk_template in constants.DTS_INT_MIRROR:
2096 group = self.my_node_info[inst.primary_node].group
2097 for nname in inst.secondary_nodes:
2098 if self.all_node_info[nname].group != group:
2099 extra_lv_nodes.add(nname)
2101 unlocked_lv_nodes = \
2102 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2104 if unlocked_lv_nodes:
2105 raise errors.OpPrereqError("these nodes could be locked: %s" %
2106 utils.CommaJoin(unlocked_lv_nodes))
2107 self.extra_lv_nodes = list(extra_lv_nodes)
2109 def _VerifyNode(self, ninfo, nresult):
2110 """Perform some basic validation on data returned from a node.
2112 - check the result data structure is well formed and has all the
2114 - check ganeti version
2116 @type ninfo: L{objects.Node}
2117 @param ninfo: the node to check
2118 @param nresult: the results from the node
2120 @return: whether overall this call was successful (and we can expect
2121 reasonable values in the respose)
2125 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2127 # main result, nresult should be a non-empty dict
2128 test = not nresult or not isinstance(nresult, dict)
2129 _ErrorIf(test, constants.CV_ENODERPC, node,
2130 "unable to verify node: no data returned")
2134 # compares ganeti version
2135 local_version = constants.PROTOCOL_VERSION
2136 remote_version = nresult.get("version", None)
2137 test = not (remote_version and
2138 isinstance(remote_version, (list, tuple)) and
2139 len(remote_version) == 2)
2140 _ErrorIf(test, constants.CV_ENODERPC, node,
2141 "connection to node returned invalid data")
2145 test = local_version != remote_version[0]
2146 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2147 "incompatible protocol versions: master %s,"
2148 " node %s", local_version, remote_version[0])
2152 # node seems compatible, we can actually try to look into its results
2154 # full package version
2155 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2156 constants.CV_ENODEVERSION, node,
2157 "software version mismatch: master %s, node %s",
2158 constants.RELEASE_VERSION, remote_version[1],
2159 code=self.ETYPE_WARNING)
2161 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2162 if ninfo.vm_capable and isinstance(hyp_result, dict):
2163 for hv_name, hv_result in hyp_result.iteritems():
2164 test = hv_result is not None
2165 _ErrorIf(test, constants.CV_ENODEHV, node,
2166 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2168 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2169 if ninfo.vm_capable and isinstance(hvp_result, list):
2170 for item, hv_name, hv_result in hvp_result:
2171 _ErrorIf(True, constants.CV_ENODEHV, node,
2172 "hypervisor %s parameter verify failure (source %s): %s",
2173 hv_name, item, hv_result)
2175 test = nresult.get(constants.NV_NODESETUP,
2176 ["Missing NODESETUP results"])
2177 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2182 def _VerifyNodeTime(self, ninfo, nresult,
2183 nvinfo_starttime, nvinfo_endtime):
2184 """Check the node time.
2186 @type ninfo: L{objects.Node}
2187 @param ninfo: the node to check
2188 @param nresult: the remote results for the node
2189 @param nvinfo_starttime: the start time of the RPC call
2190 @param nvinfo_endtime: the end time of the RPC call
2194 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2196 ntime = nresult.get(constants.NV_TIME, None)
2198 ntime_merged = utils.MergeTime(ntime)
2199 except (ValueError, TypeError):
2200 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2203 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2204 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2205 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2206 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2210 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2211 "Node time diverges by at least %s from master node time",
2214 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2215 """Check the node LVM results.
2217 @type ninfo: L{objects.Node}
2218 @param ninfo: the node to check
2219 @param nresult: the remote results for the node
2220 @param vg_name: the configured VG name
2227 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2229 # checks vg existence and size > 20G
2230 vglist = nresult.get(constants.NV_VGLIST, None)
2232 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2234 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2235 constants.MIN_VG_SIZE)
2236 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2239 pvlist = nresult.get(constants.NV_PVLIST, None)
2240 test = pvlist is None
2241 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2243 # check that ':' is not present in PV names, since it's a
2244 # special character for lvcreate (denotes the range of PEs to
2246 for _, pvname, owner_vg in pvlist:
2247 test = ":" in pvname
2248 _ErrorIf(test, constants.CV_ENODELVM, node,
2249 "Invalid character ':' in PV '%s' of VG '%s'",
2252 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2253 """Check the node bridges.
2255 @type ninfo: L{objects.Node}
2256 @param ninfo: the node to check
2257 @param nresult: the remote results for the node
2258 @param bridges: the expected list of bridges
2265 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2267 missing = nresult.get(constants.NV_BRIDGES, None)
2268 test = not isinstance(missing, list)
2269 _ErrorIf(test, constants.CV_ENODENET, node,
2270 "did not return valid bridge information")
2272 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2273 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2275 def _VerifyNodeUserScripts(self, ninfo, nresult):
2276 """Check the results of user scripts presence and executability on the node
2278 @type ninfo: L{objects.Node}
2279 @param ninfo: the node to check
2280 @param nresult: the remote results for the node
2285 test = not constants.NV_USERSCRIPTS in nresult
2286 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2287 "did not return user scripts information")
2289 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2291 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2292 "user scripts not present or not executable: %s" %
2293 utils.CommaJoin(sorted(broken_scripts)))
2295 def _VerifyNodeNetwork(self, ninfo, nresult):
2296 """Check the node network connectivity results.
2298 @type ninfo: L{objects.Node}
2299 @param ninfo: the node to check
2300 @param nresult: the remote results for the node
2304 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2306 test = constants.NV_NODELIST not in nresult
2307 _ErrorIf(test, constants.CV_ENODESSH, node,
2308 "node hasn't returned node ssh connectivity data")
2310 if nresult[constants.NV_NODELIST]:
2311 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2312 _ErrorIf(True, constants.CV_ENODESSH, node,
2313 "ssh communication with node '%s': %s", a_node, a_msg)
2315 test = constants.NV_NODENETTEST not in nresult
2316 _ErrorIf(test, constants.CV_ENODENET, node,
2317 "node hasn't returned node tcp connectivity data")
2319 if nresult[constants.NV_NODENETTEST]:
2320 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2322 _ErrorIf(True, constants.CV_ENODENET, node,
2323 "tcp communication with node '%s': %s",
2324 anode, nresult[constants.NV_NODENETTEST][anode])
2326 test = constants.NV_MASTERIP not in nresult
2327 _ErrorIf(test, constants.CV_ENODENET, node,
2328 "node hasn't returned node master IP reachability data")
2330 if not nresult[constants.NV_MASTERIP]:
2331 if node == self.master_node:
2332 msg = "the master node cannot reach the master IP (not configured?)"
2334 msg = "cannot reach the master IP"
2335 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2337 def _VerifyInstance(self, instance, instanceconfig, node_image,
2339 """Verify an instance.
2341 This function checks to see if the required block devices are
2342 available on the instance's node.
2345 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2346 node_current = instanceconfig.primary_node
2348 node_vol_should = {}
2349 instanceconfig.MapLVsByNode(node_vol_should)
2351 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2352 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2353 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, err)
2355 for node in node_vol_should:
2356 n_img = node_image[node]
2357 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2358 # ignore missing volumes on offline or broken nodes
2360 for volume in node_vol_should[node]:
2361 test = volume not in n_img.volumes
2362 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2363 "volume %s missing on node %s", volume, node)
2365 if instanceconfig.admin_state == constants.ADMINST_UP:
2366 pri_img = node_image[node_current]
2367 test = instance not in pri_img.instances and not pri_img.offline
2368 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2369 "instance not running on its primary node %s",
2372 diskdata = [(nname, success, status, idx)
2373 for (nname, disks) in diskstatus.items()
2374 for idx, (success, status) in enumerate(disks)]
2376 for nname, success, bdev_status, idx in diskdata:
2377 # the 'ghost node' construction in Exec() ensures that we have a
2379 snode = node_image[nname]
2380 bad_snode = snode.ghost or snode.offline
2381 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2382 not success and not bad_snode,
2383 constants.CV_EINSTANCEFAULTYDISK, instance,
2384 "couldn't retrieve status for disk/%s on %s: %s",
2385 idx, nname, bdev_status)
2386 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2387 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2388 constants.CV_EINSTANCEFAULTYDISK, instance,
2389 "disk/%s on %s is faulty", idx, nname)
2391 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2392 """Verify if there are any unknown volumes in the cluster.
2394 The .os, .swap and backup volumes are ignored. All other volumes are
2395 reported as unknown.
2397 @type reserved: L{ganeti.utils.FieldSet}
2398 @param reserved: a FieldSet of reserved volume names
2401 for node, n_img in node_image.items():
2402 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2403 # skip non-healthy nodes
2405 for volume in n_img.volumes:
2406 test = ((node not in node_vol_should or
2407 volume not in node_vol_should[node]) and
2408 not reserved.Matches(volume))
2409 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2410 "volume %s is unknown", volume)
2412 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2413 """Verify N+1 Memory Resilience.
2415 Check that if one single node dies we can still start all the
2416 instances it was primary for.
2419 cluster_info = self.cfg.GetClusterInfo()
2420 for node, n_img in node_image.items():
2421 # This code checks that every node which is now listed as
2422 # secondary has enough memory to host all instances it is
2423 # supposed to should a single other node in the cluster fail.
2424 # FIXME: not ready for failover to an arbitrary node
2425 # FIXME: does not support file-backed instances
2426 # WARNING: we currently take into account down instances as well
2427 # as up ones, considering that even if they're down someone
2428 # might want to start them even in the event of a node failure.
2430 # we're skipping offline nodes from the N+1 warning, since
2431 # most likely we don't have good memory infromation from them;
2432 # we already list instances living on such nodes, and that's
2435 #TODO(dynmem): also consider ballooning out other instances
2436 for prinode, instances in n_img.sbp.items():
2438 for instance in instances:
2439 bep = cluster_info.FillBE(instance_cfg[instance])
2440 if bep[constants.BE_AUTO_BALANCE]:
2441 needed_mem += bep[constants.BE_MINMEM]
2442 test = n_img.mfree < needed_mem
2443 self._ErrorIf(test, constants.CV_ENODEN1, node,
2444 "not enough memory to accomodate instance failovers"
2445 " should node %s fail (%dMiB needed, %dMiB available)",
2446 prinode, needed_mem, n_img.mfree)
2449 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2450 (files_all, files_opt, files_mc, files_vm)):
2451 """Verifies file checksums collected from all nodes.
2453 @param errorif: Callback for reporting errors
2454 @param nodeinfo: List of L{objects.Node} objects
2455 @param master_node: Name of master node
2456 @param all_nvinfo: RPC results
2459 # Define functions determining which nodes to consider for a file
2462 (files_mc, lambda node: (node.master_candidate or
2463 node.name == master_node)),
2464 (files_vm, lambda node: node.vm_capable),
2467 # Build mapping from filename to list of nodes which should have the file
2469 for (files, fn) in files2nodefn:
2471 filenodes = nodeinfo
2473 filenodes = filter(fn, nodeinfo)
2474 nodefiles.update((filename,
2475 frozenset(map(operator.attrgetter("name"), filenodes)))
2476 for filename in files)
2478 assert set(nodefiles) == (files_all | files_mc | files_vm)
2480 fileinfo = dict((filename, {}) for filename in nodefiles)
2481 ignore_nodes = set()
2483 for node in nodeinfo:
2485 ignore_nodes.add(node.name)
2488 nresult = all_nvinfo[node.name]
2490 if nresult.fail_msg or not nresult.payload:
2493 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2495 test = not (node_files and isinstance(node_files, dict))
2496 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2497 "Node did not return file checksum data")
2499 ignore_nodes.add(node.name)
2502 # Build per-checksum mapping from filename to nodes having it
2503 for (filename, checksum) in node_files.items():
2504 assert filename in nodefiles
2505 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2507 for (filename, checksums) in fileinfo.items():
2508 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2510 # Nodes having the file
2511 with_file = frozenset(node_name
2512 for nodes in fileinfo[filename].values()
2513 for node_name in nodes) - ignore_nodes
2515 expected_nodes = nodefiles[filename] - ignore_nodes
2517 # Nodes missing file
2518 missing_file = expected_nodes - with_file
2520 if filename in files_opt:
2522 errorif(missing_file and missing_file != expected_nodes,
2523 constants.CV_ECLUSTERFILECHECK, None,
2524 "File %s is optional, but it must exist on all or no"
2525 " nodes (not found on %s)",
2526 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2528 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2529 "File %s is missing from node(s) %s", filename,
2530 utils.CommaJoin(utils.NiceSort(missing_file)))
2532 # Warn if a node has a file it shouldn't
2533 unexpected = with_file - expected_nodes
2535 constants.CV_ECLUSTERFILECHECK, None,
2536 "File %s should not exist on node(s) %s",
2537 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2539 # See if there are multiple versions of the file
2540 test = len(checksums) > 1
2542 variants = ["variant %s on %s" %
2543 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2544 for (idx, (checksum, nodes)) in
2545 enumerate(sorted(checksums.items()))]
2549 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2550 "File %s found with %s different checksums (%s)",
2551 filename, len(checksums), "; ".join(variants))
2553 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2555 """Verifies and the node DRBD status.
2557 @type ninfo: L{objects.Node}
2558 @param ninfo: the node to check
2559 @param nresult: the remote results for the node
2560 @param instanceinfo: the dict of instances
2561 @param drbd_helper: the configured DRBD usermode helper
2562 @param drbd_map: the DRBD map as returned by
2563 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2567 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2570 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2571 test = (helper_result == None)
2572 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2573 "no drbd usermode helper returned")
2575 status, payload = helper_result
2577 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2578 "drbd usermode helper check unsuccessful: %s", payload)
2579 test = status and (payload != drbd_helper)
2580 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2581 "wrong drbd usermode helper: %s", payload)
2583 # compute the DRBD minors
2585 for minor, instance in drbd_map[node].items():
2586 test = instance not in instanceinfo
2587 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2588 "ghost instance '%s' in temporary DRBD map", instance)
2589 # ghost instance should not be running, but otherwise we
2590 # don't give double warnings (both ghost instance and
2591 # unallocated minor in use)
2593 node_drbd[minor] = (instance, False)
2595 instance = instanceinfo[instance]
2596 node_drbd[minor] = (instance.name,
2597 instance.admin_state == constants.ADMINST_UP)
2599 # and now check them
2600 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2601 test = not isinstance(used_minors, (tuple, list))
2602 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2603 "cannot parse drbd status file: %s", str(used_minors))
2605 # we cannot check drbd status
2608 for minor, (iname, must_exist) in node_drbd.items():
2609 test = minor not in used_minors and must_exist
2610 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2611 "drbd minor %d of instance %s is not active", minor, iname)
2612 for minor in used_minors:
2613 test = minor not in node_drbd
2614 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2615 "unallocated drbd minor %d is in use", minor)
2617 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2618 """Builds the node OS structures.
2620 @type ninfo: L{objects.Node}
2621 @param ninfo: the node to check
2622 @param nresult: the remote results for the node
2623 @param nimg: the node image object
2627 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2629 remote_os = nresult.get(constants.NV_OSLIST, None)
2630 test = (not isinstance(remote_os, list) or
2631 not compat.all(isinstance(v, list) and len(v) == 7
2632 for v in remote_os))
2634 _ErrorIf(test, constants.CV_ENODEOS, node,
2635 "node hasn't returned valid OS data")
2644 for (name, os_path, status, diagnose,
2645 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2647 if name not in os_dict:
2650 # parameters is a list of lists instead of list of tuples due to
2651 # JSON lacking a real tuple type, fix it:
2652 parameters = [tuple(v) for v in parameters]
2653 os_dict[name].append((os_path, status, diagnose,
2654 set(variants), set(parameters), set(api_ver)))
2656 nimg.oslist = os_dict
2658 def _VerifyNodeOS(self, ninfo, nimg, base):
2659 """Verifies the node OS list.
2661 @type ninfo: L{objects.Node}
2662 @param ninfo: the node to check
2663 @param nimg: the node image object
2664 @param base: the 'template' node we match against (e.g. from the master)
2668 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2670 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2672 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2673 for os_name, os_data in nimg.oslist.items():
2674 assert os_data, "Empty OS status for OS %s?!" % os_name
2675 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2676 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2677 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2678 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2679 "OS '%s' has multiple entries (first one shadows the rest): %s",
2680 os_name, utils.CommaJoin([v[0] for v in os_data]))
2681 # comparisons with the 'base' image
2682 test = os_name not in base.oslist
2683 _ErrorIf(test, constants.CV_ENODEOS, node,
2684 "Extra OS %s not present on reference node (%s)",
2688 assert base.oslist[os_name], "Base node has empty OS status?"
2689 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2691 # base OS is invalid, skipping
2693 for kind, a, b in [("API version", f_api, b_api),
2694 ("variants list", f_var, b_var),
2695 ("parameters", beautify_params(f_param),
2696 beautify_params(b_param))]:
2697 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2698 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2699 kind, os_name, base.name,
2700 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2702 # check any missing OSes
2703 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2704 _ErrorIf(missing, constants.CV_ENODEOS, node,
2705 "OSes present on reference node %s but missing on this node: %s",
2706 base.name, utils.CommaJoin(missing))
2708 def _VerifyOob(self, ninfo, nresult):
2709 """Verifies out of band functionality of a node.
2711 @type ninfo: L{objects.Node}
2712 @param ninfo: the node to check
2713 @param nresult: the remote results for the node
2717 # We just have to verify the paths on master and/or master candidates
2718 # as the oob helper is invoked on the master
2719 if ((ninfo.master_candidate or ninfo.master_capable) and
2720 constants.NV_OOB_PATHS in nresult):
2721 for path_result in nresult[constants.NV_OOB_PATHS]:
2722 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2724 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2725 """Verifies and updates the node volume data.
2727 This function will update a L{NodeImage}'s internal structures
2728 with data from the remote call.
2730 @type ninfo: L{objects.Node}
2731 @param ninfo: the node to check
2732 @param nresult: the remote results for the node
2733 @param nimg: the node image object
2734 @param vg_name: the configured VG name
2738 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2740 nimg.lvm_fail = True
2741 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2744 elif isinstance(lvdata, basestring):
2745 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2746 utils.SafeEncode(lvdata))
2747 elif not isinstance(lvdata, dict):
2748 _ErrorIf(True, constants.CV_ENODELVM, node,
2749 "rpc call to node failed (lvlist)")
2751 nimg.volumes = lvdata
2752 nimg.lvm_fail = False
2754 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2755 """Verifies and updates the node instance list.
2757 If the listing was successful, then updates this node's instance
2758 list. Otherwise, it marks the RPC call as failed for the instance
2761 @type ninfo: L{objects.Node}
2762 @param ninfo: the node to check
2763 @param nresult: the remote results for the node
2764 @param nimg: the node image object
2767 idata = nresult.get(constants.NV_INSTANCELIST, None)
2768 test = not isinstance(idata, list)
2769 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2770 "rpc call to node failed (instancelist): %s",
2771 utils.SafeEncode(str(idata)))
2773 nimg.hyp_fail = True
2775 nimg.instances = idata
2777 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2778 """Verifies and computes a node information map
2780 @type ninfo: L{objects.Node}
2781 @param ninfo: the node to check
2782 @param nresult: the remote results for the node
2783 @param nimg: the node image object
2784 @param vg_name: the configured VG name
2788 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2790 # try to read free memory (from the hypervisor)
2791 hv_info = nresult.get(constants.NV_HVINFO, None)
2792 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2793 _ErrorIf(test, constants.CV_ENODEHV, node,
2794 "rpc call to node failed (hvinfo)")
2797 nimg.mfree = int(hv_info["memory_free"])
2798 except (ValueError, TypeError):
2799 _ErrorIf(True, constants.CV_ENODERPC, node,
2800 "node returned invalid nodeinfo, check hypervisor")
2802 # FIXME: devise a free space model for file based instances as well
2803 if vg_name is not None:
2804 test = (constants.NV_VGLIST not in nresult or
2805 vg_name not in nresult[constants.NV_VGLIST])
2806 _ErrorIf(test, constants.CV_ENODELVM, node,
2807 "node didn't return data for the volume group '%s'"
2808 " - it is either missing or broken", vg_name)
2811 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2812 except (ValueError, TypeError):
2813 _ErrorIf(True, constants.CV_ENODERPC, node,
2814 "node returned invalid LVM info, check LVM status")
2816 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2817 """Gets per-disk status information for all instances.
2819 @type nodelist: list of strings
2820 @param nodelist: Node names
2821 @type node_image: dict of (name, L{objects.Node})
2822 @param node_image: Node objects
2823 @type instanceinfo: dict of (name, L{objects.Instance})
2824 @param instanceinfo: Instance objects
2825 @rtype: {instance: {node: [(succes, payload)]}}
2826 @return: a dictionary of per-instance dictionaries with nodes as
2827 keys and disk information as values; the disk information is a
2828 list of tuples (success, payload)
2831 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2834 node_disks_devonly = {}
2835 diskless_instances = set()
2836 diskless = constants.DT_DISKLESS
2838 for nname in nodelist:
2839 node_instances = list(itertools.chain(node_image[nname].pinst,
2840 node_image[nname].sinst))
2841 diskless_instances.update(inst for inst in node_instances
2842 if instanceinfo[inst].disk_template == diskless)
2843 disks = [(inst, disk)
2844 for inst in node_instances
2845 for disk in instanceinfo[inst].disks]
2848 # No need to collect data
2851 node_disks[nname] = disks
2853 # Creating copies as SetDiskID below will modify the objects and that can
2854 # lead to incorrect data returned from nodes
2855 devonly = [dev.Copy() for (_, dev) in disks]
2858 self.cfg.SetDiskID(dev, nname)
2860 node_disks_devonly[nname] = devonly
2862 assert len(node_disks) == len(node_disks_devonly)
2864 # Collect data from all nodes with disks
2865 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2868 assert len(result) == len(node_disks)
2872 for (nname, nres) in result.items():
2873 disks = node_disks[nname]
2876 # No data from this node
2877 data = len(disks) * [(False, "node offline")]
2880 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2881 "while getting disk information: %s", msg)
2883 # No data from this node
2884 data = len(disks) * [(False, msg)]
2887 for idx, i in enumerate(nres.payload):
2888 if isinstance(i, (tuple, list)) and len(i) == 2:
2891 logging.warning("Invalid result from node %s, entry %d: %s",
2893 data.append((False, "Invalid result from the remote node"))
2895 for ((inst, _), status) in zip(disks, data):
2896 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2898 # Add empty entries for diskless instances.
2899 for inst in diskless_instances:
2900 assert inst not in instdisk
2903 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2904 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2905 compat.all(isinstance(s, (tuple, list)) and
2906 len(s) == 2 for s in statuses)
2907 for inst, nnames in instdisk.items()
2908 for nname, statuses in nnames.items())
2909 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2914 def _SshNodeSelector(group_uuid, all_nodes):
2915 """Create endless iterators for all potential SSH check hosts.
2918 nodes = [node for node in all_nodes
2919 if (node.group != group_uuid and
2921 keyfunc = operator.attrgetter("group")
2923 return map(itertools.cycle,
2924 [sorted(map(operator.attrgetter("name"), names))
2925 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2929 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2930 """Choose which nodes should talk to which other nodes.
2932 We will make nodes contact all nodes in their group, and one node from
2935 @warning: This algorithm has a known issue if one node group is much
2936 smaller than others (e.g. just one node). In such a case all other
2937 nodes will talk to the single node.
2940 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2941 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2943 return (online_nodes,
2944 dict((name, sorted([i.next() for i in sel]))
2945 for name in online_nodes))
2947 def BuildHooksEnv(self):
2950 Cluster-Verify hooks just ran in the post phase and their failure makes
2951 the output be logged in the verify output and the verification to fail.
2955 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2958 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2959 for node in self.my_node_info.values())
2963 def BuildHooksNodes(self):
2964 """Build hooks nodes.
2967 return ([], self.my_node_names)
2969 def Exec(self, feedback_fn):
2970 """Verify integrity of the node group, performing various test on nodes.
2973 # This method has too many local variables. pylint: disable=R0914
2974 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2976 if not self.my_node_names:
2978 feedback_fn("* Empty node group, skipping verification")
2982 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2983 verbose = self.op.verbose
2984 self._feedback_fn = feedback_fn
2986 vg_name = self.cfg.GetVGName()
2987 drbd_helper = self.cfg.GetDRBDHelper()
2988 cluster = self.cfg.GetClusterInfo()
2989 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2990 hypervisors = cluster.enabled_hypervisors
2991 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2993 i_non_redundant = [] # Non redundant instances
2994 i_non_a_balanced = [] # Non auto-balanced instances
2995 i_offline = 0 # Count of offline instances
2996 n_offline = 0 # Count of offline nodes
2997 n_drained = 0 # Count of nodes being drained
2998 node_vol_should = {}
3000 # FIXME: verify OS list
3003 filemap = _ComputeAncillaryFiles(cluster, False)
3005 # do local checksums
3006 master_node = self.master_node = self.cfg.GetMasterNode()
3007 master_ip = self.cfg.GetMasterIP()
3009 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3012 if self.cfg.GetUseExternalMipScript():
3013 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3015 node_verify_param = {
3016 constants.NV_FILELIST:
3017 utils.UniqueSequence(filename
3018 for files in filemap
3019 for filename in files),
3020 constants.NV_NODELIST:
3021 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3022 self.all_node_info.values()),
3023 constants.NV_HYPERVISOR: hypervisors,
3024 constants.NV_HVPARAMS:
3025 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3026 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3027 for node in node_data_list
3028 if not node.offline],
3029 constants.NV_INSTANCELIST: hypervisors,
3030 constants.NV_VERSION: None,
3031 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3032 constants.NV_NODESETUP: None,
3033 constants.NV_TIME: None,
3034 constants.NV_MASTERIP: (master_node, master_ip),
3035 constants.NV_OSLIST: None,
3036 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3037 constants.NV_USERSCRIPTS: user_scripts,
3040 if vg_name is not None:
3041 node_verify_param[constants.NV_VGLIST] = None
3042 node_verify_param[constants.NV_LVLIST] = vg_name
3043 node_verify_param[constants.NV_PVLIST] = [vg_name]
3044 node_verify_param[constants.NV_DRBDLIST] = None
3047 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3050 # FIXME: this needs to be changed per node-group, not cluster-wide
3052 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3053 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3054 bridges.add(default_nicpp[constants.NIC_LINK])
3055 for instance in self.my_inst_info.values():
3056 for nic in instance.nics:
3057 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3058 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3059 bridges.add(full_nic[constants.NIC_LINK])
3062 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3064 # Build our expected cluster state
3065 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3067 vm_capable=node.vm_capable))
3068 for node in node_data_list)
3072 for node in self.all_node_info.values():
3073 path = _SupportsOob(self.cfg, node)
3074 if path and path not in oob_paths:
3075 oob_paths.append(path)
3078 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3080 for instance in self.my_inst_names:
3081 inst_config = self.my_inst_info[instance]
3083 for nname in inst_config.all_nodes:
3084 if nname not in node_image:
3085 gnode = self.NodeImage(name=nname)
3086 gnode.ghost = (nname not in self.all_node_info)
3087 node_image[nname] = gnode
3089 inst_config.MapLVsByNode(node_vol_should)
3091 pnode = inst_config.primary_node
3092 node_image[pnode].pinst.append(instance)
3094 for snode in inst_config.secondary_nodes:
3095 nimg = node_image[snode]
3096 nimg.sinst.append(instance)
3097 if pnode not in nimg.sbp:
3098 nimg.sbp[pnode] = []
3099 nimg.sbp[pnode].append(instance)
3101 # At this point, we have the in-memory data structures complete,
3102 # except for the runtime information, which we'll gather next
3104 # Due to the way our RPC system works, exact response times cannot be
3105 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3106 # time before and after executing the request, we can at least have a time
3108 nvinfo_starttime = time.time()
3109 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3111 self.cfg.GetClusterName())
3112 nvinfo_endtime = time.time()
3114 if self.extra_lv_nodes and vg_name is not None:
3116 self.rpc.call_node_verify(self.extra_lv_nodes,
3117 {constants.NV_LVLIST: vg_name},
3118 self.cfg.GetClusterName())
3120 extra_lv_nvinfo = {}
3122 all_drbd_map = self.cfg.ComputeDRBDMap()
3124 feedback_fn("* Gathering disk information (%s nodes)" %
3125 len(self.my_node_names))
3126 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3129 feedback_fn("* Verifying configuration file consistency")
3131 # If not all nodes are being checked, we need to make sure the master node
3132 # and a non-checked vm_capable node are in the list.
3133 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3135 vf_nvinfo = all_nvinfo.copy()
3136 vf_node_info = list(self.my_node_info.values())
3137 additional_nodes = []
3138 if master_node not in self.my_node_info:
3139 additional_nodes.append(master_node)
3140 vf_node_info.append(self.all_node_info[master_node])
3141 # Add the first vm_capable node we find which is not included
3142 for node in absent_nodes:
3143 nodeinfo = self.all_node_info[node]
3144 if nodeinfo.vm_capable and not nodeinfo.offline:
3145 additional_nodes.append(node)
3146 vf_node_info.append(self.all_node_info[node])
3148 key = constants.NV_FILELIST
3149 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3150 {key: node_verify_param[key]},
3151 self.cfg.GetClusterName()))
3153 vf_nvinfo = all_nvinfo
3154 vf_node_info = self.my_node_info.values()
3156 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3158 feedback_fn("* Verifying node status")
3162 for node_i in node_data_list:
3164 nimg = node_image[node]
3168 feedback_fn("* Skipping offline node %s" % (node,))
3172 if node == master_node:
3174 elif node_i.master_candidate:
3175 ntype = "master candidate"
3176 elif node_i.drained:
3182 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3184 msg = all_nvinfo[node].fail_msg
3185 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3188 nimg.rpc_fail = True
3191 nresult = all_nvinfo[node].payload
3193 nimg.call_ok = self._VerifyNode(node_i, nresult)
3194 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3195 self._VerifyNodeNetwork(node_i, nresult)
3196 self._VerifyNodeUserScripts(node_i, nresult)
3197 self._VerifyOob(node_i, nresult)
3200 self._VerifyNodeLVM(node_i, nresult, vg_name)
3201 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3204 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3205 self._UpdateNodeInstances(node_i, nresult, nimg)
3206 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3207 self._UpdateNodeOS(node_i, nresult, nimg)
3209 if not nimg.os_fail:
3210 if refos_img is None:
3212 self._VerifyNodeOS(node_i, nimg, refos_img)
3213 self._VerifyNodeBridges(node_i, nresult, bridges)
3215 # Check whether all running instancies are primary for the node. (This
3216 # can no longer be done from _VerifyInstance below, since some of the
3217 # wrong instances could be from other node groups.)
3218 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3220 for inst in non_primary_inst:
3221 # FIXME: investigate best way to handle offline insts
3222 if inst.admin_state == constants.ADMINST_OFFLINE:
3224 feedback_fn("* Skipping offline instance %s" % inst.name)
3227 test = inst in self.all_inst_info
3228 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3229 "instance should not run on node %s", node_i.name)
3230 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3231 "node is running unknown instance %s", inst)
3233 for node, result in extra_lv_nvinfo.items():
3234 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3235 node_image[node], vg_name)
3237 feedback_fn("* Verifying instance status")
3238 for instance in self.my_inst_names:
3240 feedback_fn("* Verifying instance %s" % instance)
3241 inst_config = self.my_inst_info[instance]
3242 self._VerifyInstance(instance, inst_config, node_image,
3244 inst_nodes_offline = []
3246 pnode = inst_config.primary_node
3247 pnode_img = node_image[pnode]
3248 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3249 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3250 " primary node failed", instance)
3252 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3254 constants.CV_EINSTANCEBADNODE, instance,
3255 "instance is marked as running and lives on offline node %s",
3256 inst_config.primary_node)
3258 # If the instance is non-redundant we cannot survive losing its primary
3259 # node, so we are not N+1 compliant. On the other hand we have no disk
3260 # templates with more than one secondary so that situation is not well
3262 # FIXME: does not support file-backed instances
3263 if not inst_config.secondary_nodes:
3264 i_non_redundant.append(instance)
3266 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3267 constants.CV_EINSTANCELAYOUT,
3268 instance, "instance has multiple secondary nodes: %s",
3269 utils.CommaJoin(inst_config.secondary_nodes),
3270 code=self.ETYPE_WARNING)
3272 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3273 pnode = inst_config.primary_node
3274 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3275 instance_groups = {}
3277 for node in instance_nodes:
3278 instance_groups.setdefault(self.all_node_info[node].group,
3282 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3283 # Sort so that we always list the primary node first.
3284 for group, nodes in sorted(instance_groups.items(),
3285 key=lambda (_, nodes): pnode in nodes,
3288 self._ErrorIf(len(instance_groups) > 1,
3289 constants.CV_EINSTANCESPLITGROUPS,
3290 instance, "instance has primary and secondary nodes in"
3291 " different groups: %s", utils.CommaJoin(pretty_list),
3292 code=self.ETYPE_WARNING)
3294 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3295 i_non_a_balanced.append(instance)
3297 for snode in inst_config.secondary_nodes:
3298 s_img = node_image[snode]
3299 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3300 snode, "instance %s, connection to secondary node failed",
3304 inst_nodes_offline.append(snode)
3306 # warn that the instance lives on offline nodes
3307 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3308 "instance has offline secondary node(s) %s",
3309 utils.CommaJoin(inst_nodes_offline))
3310 # ... or ghost/non-vm_capable nodes
3311 for node in inst_config.all_nodes:
3312 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3313 instance, "instance lives on ghost node %s", node)
3314 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3315 instance, "instance lives on non-vm_capable node %s", node)
3317 feedback_fn("* Verifying orphan volumes")
3318 reserved = utils.FieldSet(*cluster.reserved_lvs)
3320 # We will get spurious "unknown volume" warnings if any node of this group
3321 # is secondary for an instance whose primary is in another group. To avoid
3322 # them, we find these instances and add their volumes to node_vol_should.
3323 for inst in self.all_inst_info.values():
3324 for secondary in inst.secondary_nodes:
3325 if (secondary in self.my_node_info
3326 and inst.name not in self.my_inst_info):
3327 inst.MapLVsByNode(node_vol_should)
3330 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3332 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3333 feedback_fn("* Verifying N+1 Memory redundancy")
3334 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3336 feedback_fn("* Other Notes")
3338 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3339 % len(i_non_redundant))
3341 if i_non_a_balanced:
3342 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3343 % len(i_non_a_balanced))
3346 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3349 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3352 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3356 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3357 """Analyze the post-hooks' result
3359 This method analyses the hook result, handles it, and sends some
3360 nicely-formatted feedback back to the user.
3362 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3363 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3364 @param hooks_results: the results of the multi-node hooks rpc call
3365 @param feedback_fn: function used send feedback back to the caller
3366 @param lu_result: previous Exec result
3367 @return: the new Exec result, based on the previous result
3371 # We only really run POST phase hooks, only for non-empty groups,
3372 # and are only interested in their results
3373 if not self.my_node_names:
3376 elif phase == constants.HOOKS_PHASE_POST:
3377 # Used to change hooks' output to proper indentation
3378 feedback_fn("* Hooks Results")
3379 assert hooks_results, "invalid result from hooks"
3381 for node_name in hooks_results:
3382 res = hooks_results[node_name]
3384 test = msg and not res.offline
3385 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3386 "Communication failure in hooks execution: %s", msg)
3387 if res.offline or msg:
3388 # No need to investigate payload if node is offline or gave
3391 for script, hkr, output in res.payload:
3392 test = hkr == constants.HKR_FAIL
3393 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3394 "Script %s failed, output:", script)
3396 output = self._HOOKS_INDENT_RE.sub(" ", output)
3397 feedback_fn("%s" % output)
3403 class LUClusterVerifyDisks(NoHooksLU):
3404 """Verifies the cluster disks status.
3409 def ExpandNames(self):
3410 self.share_locks = _ShareAll()
3411 self.needed_locks = {
3412 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3415 def Exec(self, feedback_fn):
3416 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3418 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3419 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3420 for group in group_names])
3423 class LUGroupVerifyDisks(NoHooksLU):
3424 """Verifies the status of all disks in a node group.
3429 def ExpandNames(self):
3430 # Raises errors.OpPrereqError on its own if group can't be found
3431 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3433 self.share_locks = _ShareAll()
3434 self.needed_locks = {
3435 locking.LEVEL_INSTANCE: [],
3436 locking.LEVEL_NODEGROUP: [],
3437 locking.LEVEL_NODE: [],
3440 def DeclareLocks(self, level):
3441 if level == locking.LEVEL_INSTANCE:
3442 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3444 # Lock instances optimistically, needs verification once node and group
3445 # locks have been acquired
3446 self.needed_locks[locking.LEVEL_INSTANCE] = \
3447 self.cfg.GetNodeGroupInstances(self.group_uuid)
3449 elif level == locking.LEVEL_NODEGROUP:
3450 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3452 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3453 set([self.group_uuid] +
3454 # Lock all groups used by instances optimistically; this requires
3455 # going via the node before it's locked, requiring verification
3458 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3459 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3461 elif level == locking.LEVEL_NODE:
3462 # This will only lock the nodes in the group to be verified which contain
3464 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3465 self._LockInstancesNodes()
3467 # Lock all nodes in group to be verified
3468 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3469 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3470 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3472 def CheckPrereq(self):
3473 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3474 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3475 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3477 assert self.group_uuid in owned_groups
3479 # Check if locked instances are still correct
3480 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3482 # Get instance information
3483 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3485 # Check if node groups for locked instances are still correct
3486 for (instance_name, inst) in self.instances.items():
3487 assert owned_nodes.issuperset(inst.all_nodes), \
3488 "Instance %s's nodes changed while we kept the lock" % instance_name
3490 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3493 assert self.group_uuid in inst_groups, \
3494 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3496 def Exec(self, feedback_fn):
3497 """Verify integrity of cluster disks.
3499 @rtype: tuple of three items
3500 @return: a tuple of (dict of node-to-node_error, list of instances
3501 which need activate-disks, dict of instance: (node, volume) for
3506 res_instances = set()
3509 nv_dict = _MapInstanceDisksToNodes([inst
3510 for inst in self.instances.values()
3511 if inst.admin_state == constants.ADMINST_UP])
3514 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3515 set(self.cfg.GetVmCapableNodeList()))
3517 node_lvs = self.rpc.call_lv_list(nodes, [])
3519 for (node, node_res) in node_lvs.items():
3520 if node_res.offline:
3523 msg = node_res.fail_msg
3525 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3526 res_nodes[node] = msg
3529 for lv_name, (_, _, lv_online) in node_res.payload.items():
3530 inst = nv_dict.pop((node, lv_name), None)
3531 if not (lv_online or inst is None):
3532 res_instances.add(inst)
3534 # any leftover items in nv_dict are missing LVs, let's arrange the data
3536 for key, inst in nv_dict.iteritems():
3537 res_missing.setdefault(inst, []).append(list(key))
3539 return (res_nodes, list(res_instances), res_missing)
3542 class LUClusterRepairDiskSizes(NoHooksLU):
3543 """Verifies the cluster disks sizes.
3548 def ExpandNames(self):
3549 if self.op.instances:
3550 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3551 self.needed_locks = {
3552 locking.LEVEL_NODE_RES: [],
3553 locking.LEVEL_INSTANCE: self.wanted_names,
3555 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3557 self.wanted_names = None
3558 self.needed_locks = {
3559 locking.LEVEL_NODE_RES: locking.ALL_SET,
3560 locking.LEVEL_INSTANCE: locking.ALL_SET,
3562 self.share_locks = {
3563 locking.LEVEL_NODE_RES: 1,
3564 locking.LEVEL_INSTANCE: 0,
3567 def DeclareLocks(self, level):
3568 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3569 self._LockInstancesNodes(primary_only=True, level=level)
3571 def CheckPrereq(self):
3572 """Check prerequisites.
3574 This only checks the optional instance list against the existing names.
3577 if self.wanted_names is None:
3578 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3580 self.wanted_instances = \
3581 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3583 def _EnsureChildSizes(self, disk):
3584 """Ensure children of the disk have the needed disk size.
3586 This is valid mainly for DRBD8 and fixes an issue where the
3587 children have smaller disk size.
3589 @param disk: an L{ganeti.objects.Disk} object
3592 if disk.dev_type == constants.LD_DRBD8:
3593 assert disk.children, "Empty children for DRBD8?"
3594 fchild = disk.children[0]
3595 mismatch = fchild.size < disk.size
3597 self.LogInfo("Child disk has size %d, parent %d, fixing",
3598 fchild.size, disk.size)
3599 fchild.size = disk.size
3601 # and we recurse on this child only, not on the metadev
3602 return self._EnsureChildSizes(fchild) or mismatch
3606 def Exec(self, feedback_fn):
3607 """Verify the size of cluster disks.
3610 # TODO: check child disks too
3611 # TODO: check differences in size between primary/secondary nodes
3613 for instance in self.wanted_instances:
3614 pnode = instance.primary_node
3615 if pnode not in per_node_disks:
3616 per_node_disks[pnode] = []
3617 for idx, disk in enumerate(instance.disks):
3618 per_node_disks[pnode].append((instance, idx, disk))
3620 assert not (frozenset(per_node_disks.keys()) -
3621 self.owned_locks(locking.LEVEL_NODE_RES)), \
3622 "Not owning correct locks"
3623 assert not self.owned_locks(locking.LEVEL_NODE)
3626 for node, dskl in per_node_disks.items():
3627 newl = [v[2].Copy() for v in dskl]
3629 self.cfg.SetDiskID(dsk, node)
3630 result = self.rpc.call_blockdev_getsize(node, newl)
3632 self.LogWarning("Failure in blockdev_getsize call to node"
3633 " %s, ignoring", node)
3635 if len(result.payload) != len(dskl):
3636 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3637 " result.payload=%s", node, len(dskl), result.payload)
3638 self.LogWarning("Invalid result from node %s, ignoring node results",
3641 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3643 self.LogWarning("Disk %d of instance %s did not return size"
3644 " information, ignoring", idx, instance.name)
3646 if not isinstance(size, (int, long)):
3647 self.LogWarning("Disk %d of instance %s did not return valid"
3648 " size information, ignoring", idx, instance.name)
3651 if size != disk.size:
3652 self.LogInfo("Disk %d of instance %s has mismatched size,"
3653 " correcting: recorded %d, actual %d", idx,
3654 instance.name, disk.size, size)
3656 self.cfg.Update(instance, feedback_fn)
3657 changed.append((instance.name, idx, size))
3658 if self._EnsureChildSizes(disk):
3659 self.cfg.Update(instance, feedback_fn)
3660 changed.append((instance.name, idx, disk.size))
3664 class LUClusterRename(LogicalUnit):
3665 """Rename the cluster.
3668 HPATH = "cluster-rename"
3669 HTYPE = constants.HTYPE_CLUSTER
3671 def BuildHooksEnv(self):
3676 "OP_TARGET": self.cfg.GetClusterName(),
3677 "NEW_NAME": self.op.name,
3680 def BuildHooksNodes(self):
3681 """Build hooks nodes.
3684 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3686 def CheckPrereq(self):
3687 """Verify that the passed name is a valid one.
3690 hostname = netutils.GetHostname(name=self.op.name,
3691 family=self.cfg.GetPrimaryIPFamily())
3693 new_name = hostname.name
3694 self.ip = new_ip = hostname.ip
3695 old_name = self.cfg.GetClusterName()
3696 old_ip = self.cfg.GetMasterIP()
3697 if new_name == old_name and new_ip == old_ip:
3698 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3699 " cluster has changed",
3701 if new_ip != old_ip:
3702 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3703 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3704 " reachable on the network" %
3705 new_ip, errors.ECODE_NOTUNIQUE)
3707 self.op.name = new_name
3709 def Exec(self, feedback_fn):
3710 """Rename the cluster.
3713 clustername = self.op.name
3716 # shutdown the master IP
3717 master_params = self.cfg.GetMasterNetworkParameters()
3718 ems = self.cfg.GetUseExternalMipScript()
3719 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3721 result.Raise("Could not disable the master role")
3724 cluster = self.cfg.GetClusterInfo()
3725 cluster.cluster_name = clustername
3726 cluster.master_ip = new_ip
3727 self.cfg.Update(cluster, feedback_fn)
3729 # update the known hosts file
3730 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3731 node_list = self.cfg.GetOnlineNodeList()
3733 node_list.remove(master_params.name)
3736 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3738 master_params.ip = new_ip
3739 result = self.rpc.call_node_activate_master_ip(master_params.name,
3741 msg = result.fail_msg
3743 self.LogWarning("Could not re-enable the master role on"
3744 " the master, please restart manually: %s", msg)
3749 def _ValidateNetmask(cfg, netmask):
3750 """Checks if a netmask is valid.
3752 @type cfg: L{config.ConfigWriter}
3753 @param cfg: The cluster configuration
3755 @param netmask: the netmask to be verified
3756 @raise errors.OpPrereqError: if the validation fails
3759 ip_family = cfg.GetPrimaryIPFamily()
3761 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3762 except errors.ProgrammerError:
3763 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3765 if not ipcls.ValidateNetmask(netmask):
3766 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3770 class LUClusterSetParams(LogicalUnit):
3771 """Change the parameters of the cluster.
3774 HPATH = "cluster-modify"
3775 HTYPE = constants.HTYPE_CLUSTER
3778 def CheckArguments(self):
3782 if self.op.uid_pool:
3783 uidpool.CheckUidPool(self.op.uid_pool)
3785 if self.op.add_uids:
3786 uidpool.CheckUidPool(self.op.add_uids)
3788 if self.op.remove_uids:
3789 uidpool.CheckUidPool(self.op.remove_uids)
3791 if self.op.master_netmask is not None:
3792 _ValidateNetmask(self.cfg, self.op.master_netmask)
3794 if self.op.diskparams:
3795 for dt_params in self.op.diskparams.values():
3796 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3798 def ExpandNames(self):
3799 # FIXME: in the future maybe other cluster params won't require checking on
3800 # all nodes to be modified.
3801 self.needed_locks = {
3802 locking.LEVEL_NODE: locking.ALL_SET,
3803 locking.LEVEL_INSTANCE: locking.ALL_SET,
3804 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3806 self.share_locks = {
3807 locking.LEVEL_NODE: 1,
3808 locking.LEVEL_INSTANCE: 1,
3809 locking.LEVEL_NODEGROUP: 1,
3812 def BuildHooksEnv(self):
3817 "OP_TARGET": self.cfg.GetClusterName(),
3818 "NEW_VG_NAME": self.op.vg_name,
3821 def BuildHooksNodes(self):
3822 """Build hooks nodes.
3825 mn = self.cfg.GetMasterNode()
3828 def CheckPrereq(self):
3829 """Check prerequisites.
3831 This checks whether the given params don't conflict and
3832 if the given volume group is valid.
3835 if self.op.vg_name is not None and not self.op.vg_name:
3836 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3837 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3838 " instances exist", errors.ECODE_INVAL)
3840 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3841 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3842 raise errors.OpPrereqError("Cannot disable drbd helper while"
3843 " drbd-based instances exist",
3846 node_list = self.owned_locks(locking.LEVEL_NODE)
3848 # if vg_name not None, checks given volume group on all nodes
3850 vglist = self.rpc.call_vg_list(node_list)
3851 for node in node_list:
3852 msg = vglist[node].fail_msg
3854 # ignoring down node
3855 self.LogWarning("Error while gathering data on node %s"
3856 " (ignoring node): %s", node, msg)
3858 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3860 constants.MIN_VG_SIZE)
3862 raise errors.OpPrereqError("Error on node '%s': %s" %
3863 (node, vgstatus), errors.ECODE_ENVIRON)
3865 if self.op.drbd_helper:
3866 # checks given drbd helper on all nodes
3867 helpers = self.rpc.call_drbd_helper(node_list)
3868 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3870 self.LogInfo("Not checking drbd helper on offline node %s", node)
3872 msg = helpers[node].fail_msg
3874 raise errors.OpPrereqError("Error checking drbd helper on node"
3875 " '%s': %s" % (node, msg),
3876 errors.ECODE_ENVIRON)
3877 node_helper = helpers[node].payload
3878 if node_helper != self.op.drbd_helper:
3879 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3880 (node, node_helper), errors.ECODE_ENVIRON)
3882 self.cluster = cluster = self.cfg.GetClusterInfo()
3883 # validate params changes
3884 if self.op.beparams:
3885 objects.UpgradeBeParams(self.op.beparams)
3886 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3887 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3889 if self.op.ndparams:
3890 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3891 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3893 # TODO: we need a more general way to handle resetting
3894 # cluster-level parameters to default values
3895 if self.new_ndparams["oob_program"] == "":
3896 self.new_ndparams["oob_program"] = \
3897 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3899 if self.op.hv_state:
3900 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3901 self.cluster.hv_state_static)
3902 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3903 for hv, values in new_hv_state.items())
3905 if self.op.disk_state:
3906 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3907 self.cluster.disk_state_static)
3908 self.new_disk_state = \
3909 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3910 for name, values in svalues.items()))
3911 for storage, svalues in new_disk_state.items())
3914 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3917 all_instances = self.cfg.GetAllInstancesInfo().values()
3919 for group in self.cfg.GetAllNodeGroupsInfo().values():
3920 instances = frozenset([inst for inst in all_instances
3921 if compat.any(node in group.members
3922 for node in inst.all_nodes)])
3923 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3924 new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3926 new_ipolicy, instances)
3928 violations.update(new)
3931 self.LogWarning("After the ipolicy change the following instances"
3932 " violate them: %s",
3933 utils.CommaJoin(violations))
3935 if self.op.nicparams:
3936 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3937 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3938 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3941 # check all instances for consistency
3942 for instance in self.cfg.GetAllInstancesInfo().values():
3943 for nic_idx, nic in enumerate(instance.nics):
3944 params_copy = copy.deepcopy(nic.nicparams)
3945 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3947 # check parameter syntax
3949 objects.NIC.CheckParameterSyntax(params_filled)
3950 except errors.ConfigurationError, err:
3951 nic_errors.append("Instance %s, nic/%d: %s" %
3952 (instance.name, nic_idx, err))
3954 # if we're moving instances to routed, check that they have an ip
3955 target_mode = params_filled[constants.NIC_MODE]
3956 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3957 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3958 " address" % (instance.name, nic_idx))
3960 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3961 "\n".join(nic_errors))
3963 # hypervisor list/parameters
3964 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3965 if self.op.hvparams:
3966 for hv_name, hv_dict in self.op.hvparams.items():
3967 if hv_name not in self.new_hvparams:
3968 self.new_hvparams[hv_name] = hv_dict
3970 self.new_hvparams[hv_name].update(hv_dict)
3972 # disk template parameters
3973 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3974 if self.op.diskparams:
3975 for dt_name, dt_params in self.op.diskparams.items():
3976 if dt_name not in self.op.diskparams:
3977 self.new_diskparams[dt_name] = dt_params
3979 self.new_diskparams[dt_name].update(dt_params)
3981 # os hypervisor parameters
3982 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3984 for os_name, hvs in self.op.os_hvp.items():
3985 if os_name not in self.new_os_hvp:
3986 self.new_os_hvp[os_name] = hvs
3988 for hv_name, hv_dict in hvs.items():
3989 if hv_name not in self.new_os_hvp[os_name]:
3990 self.new_os_hvp[os_name][hv_name] = hv_dict
3992 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3995 self.new_osp = objects.FillDict(cluster.osparams, {})
3996 if self.op.osparams:
3997 for os_name, osp in self.op.osparams.items():
3998 if os_name not in self.new_osp:
3999 self.new_osp[os_name] = {}
4001 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4004 if not self.new_osp[os_name]:
4005 # we removed all parameters
4006 del self.new_osp[os_name]
4008 # check the parameter validity (remote check)
4009 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4010 os_name, self.new_osp[os_name])
4012 # changes to the hypervisor list
4013 if self.op.enabled_hypervisors is not None:
4014 self.hv_list = self.op.enabled_hypervisors
4015 for hv in self.hv_list:
4016 # if the hypervisor doesn't already exist in the cluster
4017 # hvparams, we initialize it to empty, and then (in both
4018 # cases) we make sure to fill the defaults, as we might not
4019 # have a complete defaults list if the hypervisor wasn't
4021 if hv not in new_hvp:
4023 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4024 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4026 self.hv_list = cluster.enabled_hypervisors
4028 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4029 # either the enabled list has changed, or the parameters have, validate
4030 for hv_name, hv_params in self.new_hvparams.items():
4031 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4032 (self.op.enabled_hypervisors and
4033 hv_name in self.op.enabled_hypervisors)):
4034 # either this is a new hypervisor, or its parameters have changed
4035 hv_class = hypervisor.GetHypervisor(hv_name)
4036 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4037 hv_class.CheckParameterSyntax(hv_params)
4038 _CheckHVParams(self, node_list, hv_name, hv_params)
4041 # no need to check any newly-enabled hypervisors, since the
4042 # defaults have already been checked in the above code-block
4043 for os_name, os_hvp in self.new_os_hvp.items():
4044 for hv_name, hv_params in os_hvp.items():
4045 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4046 # we need to fill in the new os_hvp on top of the actual hv_p
4047 cluster_defaults = self.new_hvparams.get(hv_name, {})
4048 new_osp = objects.FillDict(cluster_defaults, hv_params)
4049 hv_class = hypervisor.GetHypervisor(hv_name)
4050 hv_class.CheckParameterSyntax(new_osp)
4051 _CheckHVParams(self, node_list, hv_name, new_osp)
4053 if self.op.default_iallocator:
4054 alloc_script = utils.FindFile(self.op.default_iallocator,
4055 constants.IALLOCATOR_SEARCH_PATH,
4057 if alloc_script is None:
4058 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4059 " specified" % self.op.default_iallocator,
4062 def Exec(self, feedback_fn):
4063 """Change the parameters of the cluster.
4066 if self.op.vg_name is not None:
4067 new_volume = self.op.vg_name
4070 if new_volume != self.cfg.GetVGName():
4071 self.cfg.SetVGName(new_volume)
4073 feedback_fn("Cluster LVM configuration already in desired"
4074 " state, not changing")
4075 if self.op.drbd_helper is not None:
4076 new_helper = self.op.drbd_helper
4079 if new_helper != self.cfg.GetDRBDHelper():
4080 self.cfg.SetDRBDHelper(new_helper)
4082 feedback_fn("Cluster DRBD helper already in desired state,"
4084 if self.op.hvparams:
4085 self.cluster.hvparams = self.new_hvparams
4087 self.cluster.os_hvp = self.new_os_hvp
4088 if self.op.enabled_hypervisors is not None:
4089 self.cluster.hvparams = self.new_hvparams
4090 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4091 if self.op.beparams:
4092 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4093 if self.op.nicparams:
4094 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4096 self.cluster.ipolicy = self.new_ipolicy
4097 if self.op.osparams:
4098 self.cluster.osparams = self.new_osp
4099 if self.op.ndparams:
4100 self.cluster.ndparams = self.new_ndparams
4101 if self.op.diskparams:
4102 self.cluster.diskparams = self.new_diskparams
4103 if self.op.hv_state:
4104 self.cluster.hv_state_static = self.new_hv_state
4105 if self.op.disk_state:
4106 self.cluster.disk_state_static = self.new_disk_state
4108 if self.op.candidate_pool_size is not None:
4109 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4110 # we need to update the pool size here, otherwise the save will fail
4111 _AdjustCandidatePool(self, [])
4113 if self.op.maintain_node_health is not None:
4114 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4115 feedback_fn("Note: CONFD was disabled at build time, node health"
4116 " maintenance is not useful (still enabling it)")
4117 self.cluster.maintain_node_health = self.op.maintain_node_health
4119 if self.op.prealloc_wipe_disks is not None:
4120 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4122 if self.op.add_uids is not None:
4123 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4125 if self.op.remove_uids is not None:
4126 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4128 if self.op.uid_pool is not None:
4129 self.cluster.uid_pool = self.op.uid_pool
4131 if self.op.default_iallocator is not None:
4132 self.cluster.default_iallocator = self.op.default_iallocator
4134 if self.op.reserved_lvs is not None:
4135 self.cluster.reserved_lvs = self.op.reserved_lvs
4137 if self.op.use_external_mip_script is not None:
4138 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4140 def helper_os(aname, mods, desc):
4142 lst = getattr(self.cluster, aname)
4143 for key, val in mods:
4144 if key == constants.DDM_ADD:
4146 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4149 elif key == constants.DDM_REMOVE:
4153 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4155 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4157 if self.op.hidden_os:
4158 helper_os("hidden_os", self.op.hidden_os, "hidden")
4160 if self.op.blacklisted_os:
4161 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4163 if self.op.master_netdev:
4164 master_params = self.cfg.GetMasterNetworkParameters()
4165 ems = self.cfg.GetUseExternalMipScript()
4166 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4167 self.cluster.master_netdev)
4168 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4170 result.Raise("Could not disable the master ip")
4171 feedback_fn("Changing master_netdev from %s to %s" %
4172 (master_params.netdev, self.op.master_netdev))
4173 self.cluster.master_netdev = self.op.master_netdev
4175 if self.op.master_netmask:
4176 master_params = self.cfg.GetMasterNetworkParameters()
4177 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4178 result = self.rpc.call_node_change_master_netmask(master_params.name,
4179 master_params.netmask,
4180 self.op.master_netmask,
4182 master_params.netdev)
4184 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4187 self.cluster.master_netmask = self.op.master_netmask
4189 self.cfg.Update(self.cluster, feedback_fn)
4191 if self.op.master_netdev:
4192 master_params = self.cfg.GetMasterNetworkParameters()
4193 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4194 self.op.master_netdev)
4195 ems = self.cfg.GetUseExternalMipScript()
4196 result = self.rpc.call_node_activate_master_ip(master_params.name,
4199 self.LogWarning("Could not re-enable the master ip on"
4200 " the master, please restart manually: %s",
4204 def _UploadHelper(lu, nodes, fname):
4205 """Helper for uploading a file and showing warnings.
4208 if os.path.exists(fname):
4209 result = lu.rpc.call_upload_file(nodes, fname)
4210 for to_node, to_result in result.items():
4211 msg = to_result.fail_msg
4213 msg = ("Copy of file %s to node %s failed: %s" %
4214 (fname, to_node, msg))
4215 lu.proc.LogWarning(msg)
4218 def _ComputeAncillaryFiles(cluster, redist):
4219 """Compute files external to Ganeti which need to be consistent.
4221 @type redist: boolean
4222 @param redist: Whether to include files which need to be redistributed
4225 # Compute files for all nodes
4227 constants.SSH_KNOWN_HOSTS_FILE,
4228 constants.CONFD_HMAC_KEY,
4229 constants.CLUSTER_DOMAIN_SECRET_FILE,
4230 constants.SPICE_CERT_FILE,
4231 constants.SPICE_CACERT_FILE,
4232 constants.RAPI_USERS_FILE,
4236 files_all.update(constants.ALL_CERT_FILES)
4237 files_all.update(ssconf.SimpleStore().GetFileList())
4239 # we need to ship at least the RAPI certificate
4240 files_all.add(constants.RAPI_CERT_FILE)
4242 if cluster.modify_etc_hosts:
4243 files_all.add(constants.ETC_HOSTS)
4245 # Files which are optional, these must:
4246 # - be present in one other category as well
4247 # - either exist or not exist on all nodes of that category (mc, vm all)
4249 constants.RAPI_USERS_FILE,
4252 # Files which should only be on master candidates
4256 files_mc.add(constants.CLUSTER_CONF_FILE)
4258 # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4260 files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4262 # Files which should only be on VM-capable nodes
4263 files_vm = set(filename
4264 for hv_name in cluster.enabled_hypervisors
4265 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4267 files_opt |= set(filename
4268 for hv_name in cluster.enabled_hypervisors
4269 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4271 # Filenames in each category must be unique
4272 all_files_set = files_all | files_mc | files_vm
4273 assert (len(all_files_set) ==
4274 sum(map(len, [files_all, files_mc, files_vm]))), \
4275 "Found file listed in more than one file list"
4277 # Optional files must be present in one other category
4278 assert all_files_set.issuperset(files_opt), \
4279 "Optional file not in a different required list"
4281 return (files_all, files_opt, files_mc, files_vm)
4284 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4285 """Distribute additional files which are part of the cluster configuration.
4287 ConfigWriter takes care of distributing the config and ssconf files, but
4288 there are more files which should be distributed to all nodes. This function
4289 makes sure those are copied.
4291 @param lu: calling logical unit
4292 @param additional_nodes: list of nodes not in the config to distribute to
4293 @type additional_vm: boolean
4294 @param additional_vm: whether the additional nodes are vm-capable or not
4297 # Gather target nodes
4298 cluster = lu.cfg.GetClusterInfo()
4299 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4301 online_nodes = lu.cfg.GetOnlineNodeList()
4302 vm_nodes = lu.cfg.GetVmCapableNodeList()
4304 if additional_nodes is not None:
4305 online_nodes.extend(additional_nodes)
4307 vm_nodes.extend(additional_nodes)
4309 # Never distribute to master node
4310 for nodelist in [online_nodes, vm_nodes]:
4311 if master_info.name in nodelist:
4312 nodelist.remove(master_info.name)
4315 (files_all, _, files_mc, files_vm) = \
4316 _ComputeAncillaryFiles(cluster, True)
4318 # Never re-distribute configuration file from here
4319 assert not (constants.CLUSTER_CONF_FILE in files_all or
4320 constants.CLUSTER_CONF_FILE in files_vm)
4321 assert not files_mc, "Master candidates not handled in this function"
4324 (online_nodes, files_all),
4325 (vm_nodes, files_vm),
4329 for (node_list, files) in filemap:
4331 _UploadHelper(lu, node_list, fname)
4334 class LUClusterRedistConf(NoHooksLU):
4335 """Force the redistribution of cluster configuration.
4337 This is a very simple LU.
4342 def ExpandNames(self):
4343 self.needed_locks = {
4344 locking.LEVEL_NODE: locking.ALL_SET,
4346 self.share_locks[locking.LEVEL_NODE] = 1
4348 def Exec(self, feedback_fn):
4349 """Redistribute the configuration.
4352 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4353 _RedistributeAncillaryFiles(self)
4356 class LUClusterActivateMasterIp(NoHooksLU):
4357 """Activate the master IP on the master node.
4360 def Exec(self, feedback_fn):
4361 """Activate the master IP.
4364 master_params = self.cfg.GetMasterNetworkParameters()
4365 ems = self.cfg.GetUseExternalMipScript()
4366 result = self.rpc.call_node_activate_master_ip(master_params.name,
4368 result.Raise("Could not activate the master IP")
4371 class LUClusterDeactivateMasterIp(NoHooksLU):
4372 """Deactivate the master IP on the master node.
4375 def Exec(self, feedback_fn):
4376 """Deactivate the master IP.
4379 master_params = self.cfg.GetMasterNetworkParameters()
4380 ems = self.cfg.GetUseExternalMipScript()
4381 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4383 result.Raise("Could not deactivate the master IP")
4386 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4387 """Sleep and poll for an instance's disk to sync.
4390 if not instance.disks or disks is not None and not disks:
4393 disks = _ExpandCheckDisks(instance, disks)
4396 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4398 node = instance.primary_node
4401 lu.cfg.SetDiskID(dev, node)
4403 # TODO: Convert to utils.Retry
4406 degr_retries = 10 # in seconds, as we sleep 1 second each time
4410 cumul_degraded = False
4411 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4412 msg = rstats.fail_msg
4414 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4417 raise errors.RemoteError("Can't contact node %s for mirror data,"
4418 " aborting." % node)
4421 rstats = rstats.payload
4423 for i, mstat in enumerate(rstats):
4425 lu.LogWarning("Can't compute data for node %s/%s",
4426 node, disks[i].iv_name)
4429 cumul_degraded = (cumul_degraded or
4430 (mstat.is_degraded and mstat.sync_percent is None))
4431 if mstat.sync_percent is not None:
4433 if mstat.estimated_time is not None:
4434 rem_time = ("%s remaining (estimated)" %
4435 utils.FormatSeconds(mstat.estimated_time))
4436 max_time = mstat.estimated_time
4438 rem_time = "no time estimate"
4439 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4440 (disks[i].iv_name, mstat.sync_percent, rem_time))
4442 # if we're done but degraded, let's do a few small retries, to
4443 # make sure we see a stable and not transient situation; therefore
4444 # we force restart of the loop
4445 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4446 logging.info("Degraded disks found, %d retries left", degr_retries)
4454 time.sleep(min(60, max_time))
4457 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4458 return not cumul_degraded
4461 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4462 """Check that mirrors are not degraded.
4464 The ldisk parameter, if True, will change the test from the
4465 is_degraded attribute (which represents overall non-ok status for
4466 the device(s)) to the ldisk (representing the local storage status).
4469 lu.cfg.SetDiskID(dev, node)
4473 if on_primary or dev.AssembleOnSecondary():
4474 rstats = lu.rpc.call_blockdev_find(node, dev)
4475 msg = rstats.fail_msg
4477 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4479 elif not rstats.payload:
4480 lu.LogWarning("Can't find disk on node %s", node)
4484 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4486 result = result and not rstats.payload.is_degraded
4489 for child in dev.children:
4490 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4495 class LUOobCommand(NoHooksLU):
4496 """Logical unit for OOB handling.
4500 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4502 def ExpandNames(self):
4503 """Gather locks we need.
4506 if self.op.node_names:
4507 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4508 lock_names = self.op.node_names
4510 lock_names = locking.ALL_SET
4512 self.needed_locks = {
4513 locking.LEVEL_NODE: lock_names,
4516 def CheckPrereq(self):
4517 """Check prerequisites.
4520 - the node exists in the configuration
4523 Any errors are signaled by raising errors.OpPrereqError.
4527 self.master_node = self.cfg.GetMasterNode()
4529 assert self.op.power_delay >= 0.0
4531 if self.op.node_names:
4532 if (self.op.command in self._SKIP_MASTER and
4533 self.master_node in self.op.node_names):
4534 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4535 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4537 if master_oob_handler:
4538 additional_text = ("run '%s %s %s' if you want to operate on the"
4539 " master regardless") % (master_oob_handler,
4543 additional_text = "it does not support out-of-band operations"
4545 raise errors.OpPrereqError(("Operating on the master node %s is not"
4546 " allowed for %s; %s") %
4547 (self.master_node, self.op.command,
4548 additional_text), errors.ECODE_INVAL)
4550 self.op.node_names = self.cfg.GetNodeList()
4551 if self.op.command in self._SKIP_MASTER:
4552 self.op.node_names.remove(self.master_node)
4554 if self.op.command in self._SKIP_MASTER:
4555 assert self.master_node not in self.op.node_names
4557 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4559 raise errors.OpPrereqError("Node %s not found" % node_name,
4562 self.nodes.append(node)
4564 if (not self.op.ignore_status and
4565 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4566 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4567 " not marked offline") % node_name,
4570 def Exec(self, feedback_fn):
4571 """Execute OOB and return result if we expect any.
4574 master_node = self.master_node
4577 for idx, node in enumerate(utils.NiceSort(self.nodes,
4578 key=lambda node: node.name)):
4579 node_entry = [(constants.RS_NORMAL, node.name)]
4580 ret.append(node_entry)
4582 oob_program = _SupportsOob(self.cfg, node)
4585 node_entry.append((constants.RS_UNAVAIL, None))
4588 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4589 self.op.command, oob_program, node.name)
4590 result = self.rpc.call_run_oob(master_node, oob_program,
4591 self.op.command, node.name,
4595 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4596 node.name, result.fail_msg)
4597 node_entry.append((constants.RS_NODATA, None))
4600 self._CheckPayload(result)
4601 except errors.OpExecError, err:
4602 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4604 node_entry.append((constants.RS_NODATA, None))
4606 if self.op.command == constants.OOB_HEALTH:
4607 # For health we should log important events
4608 for item, status in result.payload:
4609 if status in [constants.OOB_STATUS_WARNING,
4610 constants.OOB_STATUS_CRITICAL]:
4611 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4612 item, node.name, status)
4614 if self.op.command == constants.OOB_POWER_ON:
4616 elif self.op.command == constants.OOB_POWER_OFF:
4617 node.powered = False
4618 elif self.op.command == constants.OOB_POWER_STATUS:
4619 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4620 if powered != node.powered:
4621 logging.warning(("Recorded power state (%s) of node '%s' does not"
4622 " match actual power state (%s)"), node.powered,
4625 # For configuration changing commands we should update the node
4626 if self.op.command in (constants.OOB_POWER_ON,
4627 constants.OOB_POWER_OFF):
4628 self.cfg.Update(node, feedback_fn)
4630 node_entry.append((constants.RS_NORMAL, result.payload))
4632 if (self.op.command == constants.OOB_POWER_ON and
4633 idx < len(self.nodes) - 1):
4634 time.sleep(self.op.power_delay)
4638 def _CheckPayload(self, result):
4639 """Checks if the payload is valid.
4641 @param result: RPC result
4642 @raises errors.OpExecError: If payload is not valid
4646 if self.op.command == constants.OOB_HEALTH:
4647 if not isinstance(result.payload, list):
4648 errs.append("command 'health' is expected to return a list but got %s" %
4649 type(result.payload))
4651 for item, status in result.payload:
4652 if status not in constants.OOB_STATUSES:
4653 errs.append("health item '%s' has invalid status '%s'" %
4656 if self.op.command == constants.OOB_POWER_STATUS:
4657 if not isinstance(result.payload, dict):
4658 errs.append("power-status is expected to return a dict but got %s" %
4659 type(result.payload))
4661 if self.op.command in [
4662 constants.OOB_POWER_ON,
4663 constants.OOB_POWER_OFF,
4664 constants.OOB_POWER_CYCLE,
4666 if result.payload is not None:
4667 errs.append("%s is expected to not return payload but got '%s'" %
4668 (self.op.command, result.payload))
4671 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4672 utils.CommaJoin(errs))
4675 class _OsQuery(_QueryBase):
4676 FIELDS = query.OS_FIELDS
4678 def ExpandNames(self, lu):
4679 # Lock all nodes in shared mode
4680 # Temporary removal of locks, should be reverted later
4681 # TODO: reintroduce locks when they are lighter-weight
4682 lu.needed_locks = {}
4683 #self.share_locks[locking.LEVEL_NODE] = 1
4684 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4686 # The following variables interact with _QueryBase._GetNames
4688 self.wanted = self.names
4690 self.wanted = locking.ALL_SET
4692 self.do_locking = self.use_locking
4694 def DeclareLocks(self, lu, level):
4698 def _DiagnoseByOS(rlist):
4699 """Remaps a per-node return list into an a per-os per-node dictionary
4701 @param rlist: a map with node names as keys and OS objects as values
4704 @return: a dictionary with osnames as keys and as value another
4705 map, with nodes as keys and tuples of (path, status, diagnose,
4706 variants, parameters, api_versions) as values, eg::
4708 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4709 (/srv/..., False, "invalid api")],
4710 "node2": [(/srv/..., True, "", [], [])]}
4715 # we build here the list of nodes that didn't fail the RPC (at RPC
4716 # level), so that nodes with a non-responding node daemon don't
4717 # make all OSes invalid
4718 good_nodes = [node_name for node_name in rlist
4719 if not rlist[node_name].fail_msg]
4720 for node_name, nr in rlist.items():
4721 if nr.fail_msg or not nr.payload:
4723 for (name, path, status, diagnose, variants,
4724 params, api_versions) in nr.payload:
4725 if name not in all_os:
4726 # build a list of nodes for this os containing empty lists
4727 # for each node in node_list
4729 for nname in good_nodes:
4730 all_os[name][nname] = []
4731 # convert params from [name, help] to (name, help)
4732 params = [tuple(v) for v in params]
4733 all_os[name][node_name].append((path, status, diagnose,
4734 variants, params, api_versions))
4737 def _GetQueryData(self, lu):
4738 """Computes the list of nodes and their attributes.
4741 # Locking is not used
4742 assert not (compat.any(lu.glm.is_owned(level)
4743 for level in locking.LEVELS
4744 if level != locking.LEVEL_CLUSTER) or
4745 self.do_locking or self.use_locking)
4747 valid_nodes = [node.name
4748 for node in lu.cfg.GetAllNodesInfo().values()
4749 if not node.offline and node.vm_capable]
4750 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4751 cluster = lu.cfg.GetClusterInfo()
4755 for (os_name, os_data) in pol.items():
4756 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4757 hidden=(os_name in cluster.hidden_os),
4758 blacklisted=(os_name in cluster.blacklisted_os))
4762 api_versions = set()
4764 for idx, osl in enumerate(os_data.values()):
4765 info.valid = bool(info.valid and osl and osl[0][1])
4769 (node_variants, node_params, node_api) = osl[0][3:6]
4772 variants.update(node_variants)
4773 parameters.update(node_params)
4774 api_versions.update(node_api)
4776 # Filter out inconsistent values
4777 variants.intersection_update(node_variants)
4778 parameters.intersection_update(node_params)
4779 api_versions.intersection_update(node_api)
4781 info.variants = list(variants)
4782 info.parameters = list(parameters)
4783 info.api_versions = list(api_versions)
4785 data[os_name] = info
4787 # Prepare data in requested order
4788 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4792 class LUOsDiagnose(NoHooksLU):
4793 """Logical unit for OS diagnose/query.
4799 def _BuildFilter(fields, names):
4800 """Builds a filter for querying OSes.
4803 name_filter = qlang.MakeSimpleFilter("name", names)
4805 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4806 # respective field is not requested
4807 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4808 for fname in ["hidden", "blacklisted"]
4809 if fname not in fields]
4810 if "valid" not in fields:
4811 status_filter.append([qlang.OP_TRUE, "valid"])
4814 status_filter.insert(0, qlang.OP_AND)
4816 status_filter = None
4818 if name_filter and status_filter:
4819 return [qlang.OP_AND, name_filter, status_filter]
4823 return status_filter
4825 def CheckArguments(self):
4826 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4827 self.op.output_fields, False)
4829 def ExpandNames(self):
4830 self.oq.ExpandNames(self)
4832 def Exec(self, feedback_fn):
4833 return self.oq.OldStyleQuery(self)
4836 class LUNodeRemove(LogicalUnit):
4837 """Logical unit for removing a node.
4840 HPATH = "node-remove"
4841 HTYPE = constants.HTYPE_NODE
4843 def BuildHooksEnv(self):
4846 This doesn't run on the target node in the pre phase as a failed
4847 node would then be impossible to remove.
4851 "OP_TARGET": self.op.node_name,
4852 "NODE_NAME": self.op.node_name,
4855 def BuildHooksNodes(self):
4856 """Build hooks nodes.
4859 all_nodes = self.cfg.GetNodeList()
4861 all_nodes.remove(self.op.node_name)
4863 logging.warning("Node '%s', which is about to be removed, was not found"
4864 " in the list of all nodes", self.op.node_name)
4865 return (all_nodes, all_nodes)
4867 def CheckPrereq(self):
4868 """Check prerequisites.
4871 - the node exists in the configuration
4872 - it does not have primary or secondary instances
4873 - it's not the master
4875 Any errors are signaled by raising errors.OpPrereqError.
4878 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4879 node = self.cfg.GetNodeInfo(self.op.node_name)
4880 assert node is not None
4882 masternode = self.cfg.GetMasterNode()
4883 if node.name == masternode:
4884 raise errors.OpPrereqError("Node is the master node, failover to another"
4885 " node is required", errors.ECODE_INVAL)
4887 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4888 if node.name in instance.all_nodes:
4889 raise errors.OpPrereqError("Instance %s is still running on the node,"
4890 " please remove first" % instance_name,
4892 self.op.node_name = node.name
4895 def Exec(self, feedback_fn):
4896 """Removes the node from the cluster.
4900 logging.info("Stopping the node daemon and removing configs from node %s",
4903 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4905 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4908 # Promote nodes to master candidate as needed
4909 _AdjustCandidatePool(self, exceptions=[node.name])
4910 self.context.RemoveNode(node.name)
4912 # Run post hooks on the node before it's removed
4913 _RunPostHook(self, node.name)
4915 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4916 msg = result.fail_msg
4918 self.LogWarning("Errors encountered on the remote node while leaving"
4919 " the cluster: %s", msg)
4921 # Remove node from our /etc/hosts
4922 if self.cfg.GetClusterInfo().modify_etc_hosts:
4923 master_node = self.cfg.GetMasterNode()
4924 result = self.rpc.call_etc_hosts_modify(master_node,
4925 constants.ETC_HOSTS_REMOVE,
4927 result.Raise("Can't update hosts file with new host data")
4928 _RedistributeAncillaryFiles(self)
4931 class _NodeQuery(_QueryBase):
4932 FIELDS = query.NODE_FIELDS
4934 def ExpandNames(self, lu):
4935 lu.needed_locks = {}
4936 lu.share_locks = _ShareAll()
4939 self.wanted = _GetWantedNodes(lu, self.names)
4941 self.wanted = locking.ALL_SET
4943 self.do_locking = (self.use_locking and
4944 query.NQ_LIVE in self.requested_data)
4947 # If any non-static field is requested we need to lock the nodes
4948 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4950 def DeclareLocks(self, lu, level):
4953 def _GetQueryData(self, lu):
4954 """Computes the list of nodes and their attributes.
4957 all_info = lu.cfg.GetAllNodesInfo()
4959 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4961 # Gather data as requested
4962 if query.NQ_LIVE in self.requested_data:
4963 # filter out non-vm_capable nodes
4964 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4966 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4967 [lu.cfg.GetHypervisorType()])
4968 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4969 for (name, nresult) in node_data.items()
4970 if not nresult.fail_msg and nresult.payload)
4974 if query.NQ_INST in self.requested_data:
4975 node_to_primary = dict([(name, set()) for name in nodenames])
4976 node_to_secondary = dict([(name, set()) for name in nodenames])
4978 inst_data = lu.cfg.GetAllInstancesInfo()
4980 for inst in inst_data.values():
4981 if inst.primary_node in node_to_primary:
4982 node_to_primary[inst.primary_node].add(inst.name)
4983 for secnode in inst.secondary_nodes:
4984 if secnode in node_to_secondary:
4985 node_to_secondary[secnode].add(inst.name)
4987 node_to_primary = None
4988 node_to_secondary = None
4990 if query.NQ_OOB in self.requested_data:
4991 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4992 for name, node in all_info.iteritems())
4996 if query.NQ_GROUP in self.requested_data:
4997 groups = lu.cfg.GetAllNodeGroupsInfo()
5001 return query.NodeQueryData([all_info[name] for name in nodenames],
5002 live_data, lu.cfg.GetMasterNode(),
5003 node_to_primary, node_to_secondary, groups,
5004 oob_support, lu.cfg.GetClusterInfo())
5007 class LUNodeQuery(NoHooksLU):
5008 """Logical unit for querying nodes.
5011 # pylint: disable=W0142
5014 def CheckArguments(self):
5015 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5016 self.op.output_fields, self.op.use_locking)
5018 def ExpandNames(self):
5019 self.nq.ExpandNames(self)
5021 def DeclareLocks(self, level):
5022 self.nq.DeclareLocks(self, level)
5024 def Exec(self, feedback_fn):
5025 return self.nq.OldStyleQuery(self)
5028 class LUNodeQueryvols(NoHooksLU):
5029 """Logical unit for getting volumes on node(s).
5033 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5034 _FIELDS_STATIC = utils.FieldSet("node")
5036 def CheckArguments(self):
5037 _CheckOutputFields(static=self._FIELDS_STATIC,
5038 dynamic=self._FIELDS_DYNAMIC,
5039 selected=self.op.output_fields)
5041 def ExpandNames(self):
5042 self.share_locks = _ShareAll()
5043 self.needed_locks = {}
5045 if not self.op.nodes:
5046 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5048 self.needed_locks[locking.LEVEL_NODE] = \
5049 _GetWantedNodes(self, self.op.nodes)
5051 def Exec(self, feedback_fn):
5052 """Computes the list of nodes and their attributes.
5055 nodenames = self.owned_locks(locking.LEVEL_NODE)
5056 volumes = self.rpc.call_node_volumes(nodenames)
5058 ilist = self.cfg.GetAllInstancesInfo()
5059 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5062 for node in nodenames:
5063 nresult = volumes[node]
5066 msg = nresult.fail_msg
5068 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5071 node_vols = sorted(nresult.payload,
5072 key=operator.itemgetter("dev"))
5074 for vol in node_vols:
5076 for field in self.op.output_fields:
5079 elif field == "phys":
5083 elif field == "name":
5085 elif field == "size":
5086 val = int(float(vol["size"]))
5087 elif field == "instance":
5088 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5090 raise errors.ParameterError(field)
5091 node_output.append(str(val))
5093 output.append(node_output)
5098 class LUNodeQueryStorage(NoHooksLU):
5099 """Logical unit for getting information on storage units on node(s).
5102 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5105 def CheckArguments(self):
5106 _CheckOutputFields(static=self._FIELDS_STATIC,
5107 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5108 selected=self.op.output_fields)
5110 def ExpandNames(self):
5111 self.share_locks = _ShareAll()
5112 self.needed_locks = {}
5115 self.needed_locks[locking.LEVEL_NODE] = \
5116 _GetWantedNodes(self, self.op.nodes)
5118 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5120 def Exec(self, feedback_fn):
5121 """Computes the list of nodes and their attributes.
5124 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5126 # Always get name to sort by
5127 if constants.SF_NAME in self.op.output_fields:
5128 fields = self.op.output_fields[:]
5130 fields = [constants.SF_NAME] + self.op.output_fields
5132 # Never ask for node or type as it's only known to the LU
5133 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5134 while extra in fields:
5135 fields.remove(extra)
5137 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5138 name_idx = field_idx[constants.SF_NAME]
5140 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5141 data = self.rpc.call_storage_list(self.nodes,
5142 self.op.storage_type, st_args,
5143 self.op.name, fields)
5147 for node in utils.NiceSort(self.nodes):
5148 nresult = data[node]
5152 msg = nresult.fail_msg
5154 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5157 rows = dict([(row[name_idx], row) for row in nresult.payload])
5159 for name in utils.NiceSort(rows.keys()):
5164 for field in self.op.output_fields:
5165 if field == constants.SF_NODE:
5167 elif field == constants.SF_TYPE:
5168 val = self.op.storage_type
5169 elif field in field_idx:
5170 val = row[field_idx[field]]
5172 raise errors.ParameterError(field)
5181 class _InstanceQuery(_QueryBase):
5182 FIELDS = query.INSTANCE_FIELDS
5184 def ExpandNames(self, lu):
5185 lu.needed_locks = {}
5186 lu.share_locks = _ShareAll()
5189 self.wanted = _GetWantedInstances(lu, self.names)
5191 self.wanted = locking.ALL_SET
5193 self.do_locking = (self.use_locking and
5194 query.IQ_LIVE in self.requested_data)
5196 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5197 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5198 lu.needed_locks[locking.LEVEL_NODE] = []
5199 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5201 self.do_grouplocks = (self.do_locking and
5202 query.IQ_NODES in self.requested_data)
5204 def DeclareLocks(self, lu, level):
5206 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5207 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5209 # Lock all groups used by instances optimistically; this requires going
5210 # via the node before it's locked, requiring verification later on
5211 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5213 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5214 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5215 elif level == locking.LEVEL_NODE:
5216 lu._LockInstancesNodes() # pylint: disable=W0212
5219 def _CheckGroupLocks(lu):
5220 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5221 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5223 # Check if node groups for locked instances are still correct
5224 for instance_name in owned_instances:
5225 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5227 def _GetQueryData(self, lu):
5228 """Computes the list of instances and their attributes.
5231 if self.do_grouplocks:
5232 self._CheckGroupLocks(lu)
5234 cluster = lu.cfg.GetClusterInfo()
5235 all_info = lu.cfg.GetAllInstancesInfo()
5237 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5239 instance_list = [all_info[name] for name in instance_names]
5240 nodes = frozenset(itertools.chain(*(inst.all_nodes
5241 for inst in instance_list)))
5242 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5245 wrongnode_inst = set()
5247 # Gather data as requested
5248 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5250 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5252 result = node_data[name]
5254 # offline nodes will be in both lists
5255 assert result.fail_msg
5256 offline_nodes.append(name)
5258 bad_nodes.append(name)
5259 elif result.payload:
5260 for inst in result.payload:
5261 if inst in all_info:
5262 if all_info[inst].primary_node == name:
5263 live_data.update(result.payload)
5265 wrongnode_inst.add(inst)
5267 # orphan instance; we don't list it here as we don't
5268 # handle this case yet in the output of instance listing
5269 logging.warning("Orphan instance '%s' found on node %s",
5271 # else no instance is alive
5275 if query.IQ_DISKUSAGE in self.requested_data:
5276 disk_usage = dict((inst.name,
5277 _ComputeDiskSize(inst.disk_template,
5278 [{constants.IDISK_SIZE: disk.size}
5279 for disk in inst.disks]))
5280 for inst in instance_list)
5284 if query.IQ_CONSOLE in self.requested_data:
5286 for inst in instance_list:
5287 if inst.name in live_data:
5288 # Instance is running
5289 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5291 consinfo[inst.name] = None
5292 assert set(consinfo.keys()) == set(instance_names)
5296 if query.IQ_NODES in self.requested_data:
5297 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5299 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5300 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5301 for uuid in set(map(operator.attrgetter("group"),
5307 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5308 disk_usage, offline_nodes, bad_nodes,
5309 live_data, wrongnode_inst, consinfo,
5313 class LUQuery(NoHooksLU):
5314 """Query for resources/items of a certain kind.
5317 # pylint: disable=W0142
5320 def CheckArguments(self):
5321 qcls = _GetQueryImplementation(self.op.what)
5323 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5325 def ExpandNames(self):
5326 self.impl.ExpandNames(self)
5328 def DeclareLocks(self, level):
5329 self.impl.DeclareLocks(self, level)
5331 def Exec(self, feedback_fn):
5332 return self.impl.NewStyleQuery(self)
5335 class LUQueryFields(NoHooksLU):
5336 """Query for resources/items of a certain kind.
5339 # pylint: disable=W0142
5342 def CheckArguments(self):
5343 self.qcls = _GetQueryImplementation(self.op.what)
5345 def ExpandNames(self):
5346 self.needed_locks = {}
5348 def Exec(self, feedback_fn):
5349 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5352 class LUNodeModifyStorage(NoHooksLU):
5353 """Logical unit for modifying a storage volume on a node.
5358 def CheckArguments(self):
5359 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5361 storage_type = self.op.storage_type
5364 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5366 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5367 " modified" % storage_type,
5370 diff = set(self.op.changes.keys()) - modifiable
5372 raise errors.OpPrereqError("The following fields can not be modified for"
5373 " storage units of type '%s': %r" %
5374 (storage_type, list(diff)),
5377 def ExpandNames(self):
5378 self.needed_locks = {
5379 locking.LEVEL_NODE: self.op.node_name,
5382 def Exec(self, feedback_fn):
5383 """Computes the list of nodes and their attributes.
5386 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5387 result = self.rpc.call_storage_modify(self.op.node_name,
5388 self.op.storage_type, st_args,
5389 self.op.name, self.op.changes)
5390 result.Raise("Failed to modify storage unit '%s' on %s" %
5391 (self.op.name, self.op.node_name))
5394 class LUNodeAdd(LogicalUnit):
5395 """Logical unit for adding node to the cluster.
5399 HTYPE = constants.HTYPE_NODE
5400 _NFLAGS = ["master_capable", "vm_capable"]
5402 def CheckArguments(self):
5403 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5404 # validate/normalize the node name
5405 self.hostname = netutils.GetHostname(name=self.op.node_name,
5406 family=self.primary_ip_family)
5407 self.op.node_name = self.hostname.name
5409 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5410 raise errors.OpPrereqError("Cannot readd the master node",
5413 if self.op.readd and self.op.group:
5414 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5415 " being readded", errors.ECODE_INVAL)
5417 def BuildHooksEnv(self):
5420 This will run on all nodes before, and on all nodes + the new node after.
5424 "OP_TARGET": self.op.node_name,
5425 "NODE_NAME": self.op.node_name,
5426 "NODE_PIP": self.op.primary_ip,
5427 "NODE_SIP": self.op.secondary_ip,
5428 "MASTER_CAPABLE": str(self.op.master_capable),
5429 "VM_CAPABLE": str(self.op.vm_capable),
5432 def BuildHooksNodes(self):
5433 """Build hooks nodes.
5436 # Exclude added node
5437 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5438 post_nodes = pre_nodes + [self.op.node_name, ]
5440 return (pre_nodes, post_nodes)
5442 def CheckPrereq(self):
5443 """Check prerequisites.
5446 - the new node is not already in the config
5448 - its parameters (single/dual homed) matches the cluster
5450 Any errors are signaled by raising errors.OpPrereqError.
5454 hostname = self.hostname
5455 node = hostname.name
5456 primary_ip = self.op.primary_ip = hostname.ip
5457 if self.op.secondary_ip is None:
5458 if self.primary_ip_family == netutils.IP6Address.family:
5459 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5460 " IPv4 address must be given as secondary",
5462 self.op.secondary_ip = primary_ip
5464 secondary_ip = self.op.secondary_ip
5465 if not netutils.IP4Address.IsValid(secondary_ip):
5466 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5467 " address" % secondary_ip, errors.ECODE_INVAL)
5469 node_list = cfg.GetNodeList()
5470 if not self.op.readd and node in node_list:
5471 raise errors.OpPrereqError("Node %s is already in the configuration" %
5472 node, errors.ECODE_EXISTS)
5473 elif self.op.readd and node not in node_list:
5474 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5477 self.changed_primary_ip = False
5479 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5480 if self.op.readd and node == existing_node_name:
5481 if existing_node.secondary_ip != secondary_ip:
5482 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5483 " address configuration as before",
5485 if existing_node.primary_ip != primary_ip:
5486 self.changed_primary_ip = True
5490 if (existing_node.primary_ip == primary_ip or
5491 existing_node.secondary_ip == primary_ip or
5492 existing_node.primary_ip == secondary_ip or
5493 existing_node.secondary_ip == secondary_ip):
5494 raise errors.OpPrereqError("New node ip address(es) conflict with"
5495 " existing node %s" % existing_node.name,
5496 errors.ECODE_NOTUNIQUE)
5498 # After this 'if' block, None is no longer a valid value for the
5499 # _capable op attributes
5501 old_node = self.cfg.GetNodeInfo(node)
5502 assert old_node is not None, "Can't retrieve locked node %s" % node
5503 for attr in self._NFLAGS:
5504 if getattr(self.op, attr) is None:
5505 setattr(self.op, attr, getattr(old_node, attr))
5507 for attr in self._NFLAGS:
5508 if getattr(self.op, attr) is None:
5509 setattr(self.op, attr, True)
5511 if self.op.readd and not self.op.vm_capable:
5512 pri, sec = cfg.GetNodeInstances(node)
5514 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5515 " flag set to false, but it already holds"
5516 " instances" % node,
5519 # check that the type of the node (single versus dual homed) is the
5520 # same as for the master
5521 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5522 master_singlehomed = myself.secondary_ip == myself.primary_ip
5523 newbie_singlehomed = secondary_ip == primary_ip
5524 if master_singlehomed != newbie_singlehomed:
5525 if master_singlehomed:
5526 raise errors.OpPrereqError("The master has no secondary ip but the"
5527 " new node has one",
5530 raise errors.OpPrereqError("The master has a secondary ip but the"
5531 " new node doesn't have one",
5534 # checks reachability
5535 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5536 raise errors.OpPrereqError("Node not reachable by ping",
5537 errors.ECODE_ENVIRON)
5539 if not newbie_singlehomed:
5540 # check reachability from my secondary ip to newbie's secondary ip
5541 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5542 source=myself.secondary_ip):
5543 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5544 " based ping to node daemon port",
5545 errors.ECODE_ENVIRON)
5552 if self.op.master_capable:
5553 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5555 self.master_candidate = False
5558 self.new_node = old_node
5560 node_group = cfg.LookupNodeGroup(self.op.group)
5561 self.new_node = objects.Node(name=node,
5562 primary_ip=primary_ip,
5563 secondary_ip=secondary_ip,
5564 master_candidate=self.master_candidate,
5565 offline=False, drained=False,
5568 if self.op.ndparams:
5569 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5571 if self.op.hv_state:
5572 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5574 if self.op.disk_state:
5575 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5577 def Exec(self, feedback_fn):
5578 """Adds the new node to the cluster.
5581 new_node = self.new_node
5582 node = new_node.name
5584 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5587 # We adding a new node so we assume it's powered
5588 new_node.powered = True
5590 # for re-adds, reset the offline/drained/master-candidate flags;
5591 # we need to reset here, otherwise offline would prevent RPC calls
5592 # later in the procedure; this also means that if the re-add
5593 # fails, we are left with a non-offlined, broken node
5595 new_node.drained = new_node.offline = False # pylint: disable=W0201
5596 self.LogInfo("Readding a node, the offline/drained flags were reset")
5597 # if we demote the node, we do cleanup later in the procedure
5598 new_node.master_candidate = self.master_candidate
5599 if self.changed_primary_ip:
5600 new_node.primary_ip = self.op.primary_ip
5602 # copy the master/vm_capable flags
5603 for attr in self._NFLAGS:
5604 setattr(new_node, attr, getattr(self.op, attr))
5606 # notify the user about any possible mc promotion
5607 if new_node.master_candidate:
5608 self.LogInfo("Node will be a master candidate")
5610 if self.op.ndparams:
5611 new_node.ndparams = self.op.ndparams
5613 new_node.ndparams = {}
5615 if self.op.hv_state:
5616 new_node.hv_state_static = self.new_hv_state
5618 if self.op.disk_state:
5619 new_node.disk_state_static = self.new_disk_state
5621 # check connectivity
5622 result = self.rpc.call_version([node])[node]
5623 result.Raise("Can't get version information from node %s" % node)
5624 if constants.PROTOCOL_VERSION == result.payload:
5625 logging.info("Communication to node %s fine, sw version %s match",
5626 node, result.payload)
5628 raise errors.OpExecError("Version mismatch master version %s,"
5629 " node version %s" %
5630 (constants.PROTOCOL_VERSION, result.payload))
5632 # Add node to our /etc/hosts, and add key to known_hosts
5633 if self.cfg.GetClusterInfo().modify_etc_hosts:
5634 master_node = self.cfg.GetMasterNode()
5635 result = self.rpc.call_etc_hosts_modify(master_node,
5636 constants.ETC_HOSTS_ADD,
5639 result.Raise("Can't update hosts file with new host data")
5641 if new_node.secondary_ip != new_node.primary_ip:
5642 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5645 node_verify_list = [self.cfg.GetMasterNode()]
5646 node_verify_param = {
5647 constants.NV_NODELIST: ([node], {}),
5648 # TODO: do a node-net-test as well?
5651 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5652 self.cfg.GetClusterName())
5653 for verifier in node_verify_list:
5654 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5655 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5657 for failed in nl_payload:
5658 feedback_fn("ssh/hostname verification failed"
5659 " (checking from %s): %s" %
5660 (verifier, nl_payload[failed]))
5661 raise errors.OpExecError("ssh/hostname verification failed")
5664 _RedistributeAncillaryFiles(self)
5665 self.context.ReaddNode(new_node)
5666 # make sure we redistribute the config
5667 self.cfg.Update(new_node, feedback_fn)
5668 # and make sure the new node will not have old files around
5669 if not new_node.master_candidate:
5670 result = self.rpc.call_node_demote_from_mc(new_node.name)
5671 msg = result.fail_msg
5673 self.LogWarning("Node failed to demote itself from master"
5674 " candidate status: %s" % msg)
5676 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5677 additional_vm=self.op.vm_capable)
5678 self.context.AddNode(new_node, self.proc.GetECId())
5681 class LUNodeSetParams(LogicalUnit):
5682 """Modifies the parameters of a node.
5684 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5685 to the node role (as _ROLE_*)
5686 @cvar _R2F: a dictionary from node role to tuples of flags
5687 @cvar _FLAGS: a list of attribute names corresponding to the flags
5690 HPATH = "node-modify"
5691 HTYPE = constants.HTYPE_NODE
5693 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5695 (True, False, False): _ROLE_CANDIDATE,
5696 (False, True, False): _ROLE_DRAINED,
5697 (False, False, True): _ROLE_OFFLINE,
5698 (False, False, False): _ROLE_REGULAR,
5700 _R2F = dict((v, k) for k, v in _F2R.items())
5701 _FLAGS = ["master_candidate", "drained", "offline"]
5703 def CheckArguments(self):
5704 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5705 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5706 self.op.master_capable, self.op.vm_capable,
5707 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5709 if all_mods.count(None) == len(all_mods):
5710 raise errors.OpPrereqError("Please pass at least one modification",
5712 if all_mods.count(True) > 1:
5713 raise errors.OpPrereqError("Can't set the node into more than one"
5714 " state at the same time",
5717 # Boolean value that tells us whether we might be demoting from MC
5718 self.might_demote = (self.op.master_candidate == False or
5719 self.op.offline == True or
5720 self.op.drained == True or
5721 self.op.master_capable == False)
5723 if self.op.secondary_ip:
5724 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5725 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5726 " address" % self.op.secondary_ip,
5729 self.lock_all = self.op.auto_promote and self.might_demote
5730 self.lock_instances = self.op.secondary_ip is not None
5732 def _InstanceFilter(self, instance):
5733 """Filter for getting affected instances.
5736 return (instance.disk_template in constants.DTS_INT_MIRROR and
5737 self.op.node_name in instance.all_nodes)
5739 def ExpandNames(self):
5741 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5743 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5745 # Since modifying a node can have severe effects on currently running
5746 # operations the resource lock is at least acquired in shared mode
5747 self.needed_locks[locking.LEVEL_NODE_RES] = \
5748 self.needed_locks[locking.LEVEL_NODE]
5750 # Get node resource and instance locks in shared mode; they are not used
5751 # for anything but read-only access
5752 self.share_locks[locking.LEVEL_NODE_RES] = 1
5753 self.share_locks[locking.LEVEL_INSTANCE] = 1
5755 if self.lock_instances:
5756 self.needed_locks[locking.LEVEL_INSTANCE] = \
5757 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5759 def BuildHooksEnv(self):
5762 This runs on the master node.
5766 "OP_TARGET": self.op.node_name,
5767 "MASTER_CANDIDATE": str(self.op.master_candidate),
5768 "OFFLINE": str(self.op.offline),
5769 "DRAINED": str(self.op.drained),
5770 "MASTER_CAPABLE": str(self.op.master_capable),
5771 "VM_CAPABLE": str(self.op.vm_capable),
5774 def BuildHooksNodes(self):
5775 """Build hooks nodes.
5778 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5781 def CheckPrereq(self):
5782 """Check prerequisites.
5784 This only checks the instance list against the existing names.
5787 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5789 if self.lock_instances:
5790 affected_instances = \
5791 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5793 # Verify instance locks
5794 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5795 wanted_instances = frozenset(affected_instances.keys())
5796 if wanted_instances - owned_instances:
5797 raise errors.OpPrereqError("Instances affected by changing node %s's"
5798 " secondary IP address have changed since"
5799 " locks were acquired, wanted '%s', have"
5800 " '%s'; retry the operation" %
5802 utils.CommaJoin(wanted_instances),
5803 utils.CommaJoin(owned_instances)),
5806 affected_instances = None
5808 if (self.op.master_candidate is not None or
5809 self.op.drained is not None or
5810 self.op.offline is not None):
5811 # we can't change the master's node flags
5812 if self.op.node_name == self.cfg.GetMasterNode():
5813 raise errors.OpPrereqError("The master role can be changed"
5814 " only via master-failover",
5817 if self.op.master_candidate and not node.master_capable:
5818 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5819 " it a master candidate" % node.name,
5822 if self.op.vm_capable == False:
5823 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5825 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5826 " the vm_capable flag" % node.name,
5829 if node.master_candidate and self.might_demote and not self.lock_all:
5830 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5831 # check if after removing the current node, we're missing master
5833 (mc_remaining, mc_should, _) = \
5834 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5835 if mc_remaining < mc_should:
5836 raise errors.OpPrereqError("Not enough master candidates, please"
5837 " pass auto promote option to allow"
5838 " promotion", errors.ECODE_STATE)
5840 self.old_flags = old_flags = (node.master_candidate,
5841 node.drained, node.offline)
5842 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5843 self.old_role = old_role = self._F2R[old_flags]
5845 # Check for ineffective changes
5846 for attr in self._FLAGS:
5847 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5848 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5849 setattr(self.op, attr, None)
5851 # Past this point, any flag change to False means a transition
5852 # away from the respective state, as only real changes are kept
5854 # TODO: We might query the real power state if it supports OOB
5855 if _SupportsOob(self.cfg, node):
5856 if self.op.offline is False and not (node.powered or
5857 self.op.powered == True):
5858 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5859 " offline status can be reset") %
5861 elif self.op.powered is not None:
5862 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5863 " as it does not support out-of-band"
5864 " handling") % self.op.node_name)
5866 # If we're being deofflined/drained, we'll MC ourself if needed
5867 if (self.op.drained == False or self.op.offline == False or
5868 (self.op.master_capable and not node.master_capable)):
5869 if _DecideSelfPromotion(self):
5870 self.op.master_candidate = True
5871 self.LogInfo("Auto-promoting node to master candidate")
5873 # If we're no longer master capable, we'll demote ourselves from MC
5874 if self.op.master_capable == False and node.master_candidate:
5875 self.LogInfo("Demoting from master candidate")
5876 self.op.master_candidate = False
5879 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5880 if self.op.master_candidate:
5881 new_role = self._ROLE_CANDIDATE
5882 elif self.op.drained:
5883 new_role = self._ROLE_DRAINED
5884 elif self.op.offline:
5885 new_role = self._ROLE_OFFLINE
5886 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5887 # False is still in new flags, which means we're un-setting (the
5889 new_role = self._ROLE_REGULAR
5890 else: # no new flags, nothing, keep old role
5893 self.new_role = new_role
5895 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5896 # Trying to transition out of offline status
5897 # TODO: Use standard RPC runner, but make sure it works when the node is
5898 # still marked offline
5899 result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5901 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5902 " to report its version: %s" %
5903 (node.name, result.fail_msg),
5906 self.LogWarning("Transitioning node from offline to online state"
5907 " without using re-add. Please make sure the node"
5910 if self.op.secondary_ip:
5911 # Ok even without locking, because this can't be changed by any LU
5912 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5913 master_singlehomed = master.secondary_ip == master.primary_ip
5914 if master_singlehomed and self.op.secondary_ip:
5915 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5916 " homed cluster", errors.ECODE_INVAL)
5918 assert not (frozenset(affected_instances) -
5919 self.owned_locks(locking.LEVEL_INSTANCE))
5922 if affected_instances:
5923 raise errors.OpPrereqError("Cannot change secondary IP address:"
5924 " offline node has instances (%s)"
5925 " configured to use it" %
5926 utils.CommaJoin(affected_instances.keys()))
5928 # On online nodes, check that no instances are running, and that
5929 # the node has the new ip and we can reach it.
5930 for instance in affected_instances.values():
5931 _CheckInstanceState(self, instance, INSTANCE_DOWN,
5932 msg="cannot change secondary ip")
5934 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5935 if master.name != node.name:
5936 # check reachability from master secondary ip to new secondary ip
5937 if not netutils.TcpPing(self.op.secondary_ip,
5938 constants.DEFAULT_NODED_PORT,
5939 source=master.secondary_ip):
5940 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5941 " based ping to node daemon port",
5942 errors.ECODE_ENVIRON)
5944 if self.op.ndparams:
5945 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5946 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5947 self.new_ndparams = new_ndparams
5949 if self.op.hv_state:
5950 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5951 self.node.hv_state_static)
5953 if self.op.disk_state:
5954 self.new_disk_state = \
5955 _MergeAndVerifyDiskState(self.op.disk_state,
5956 self.node.disk_state_static)
5958 def Exec(self, feedback_fn):
5963 old_role = self.old_role
5964 new_role = self.new_role
5968 if self.op.ndparams:
5969 node.ndparams = self.new_ndparams
5971 if self.op.powered is not None:
5972 node.powered = self.op.powered
5974 if self.op.hv_state:
5975 node.hv_state_static = self.new_hv_state
5977 if self.op.disk_state:
5978 node.disk_state_static = self.new_disk_state
5980 for attr in ["master_capable", "vm_capable"]:
5981 val = getattr(self.op, attr)
5983 setattr(node, attr, val)
5984 result.append((attr, str(val)))
5986 if new_role != old_role:
5987 # Tell the node to demote itself, if no longer MC and not offline
5988 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5989 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5991 self.LogWarning("Node failed to demote itself: %s", msg)
5993 new_flags = self._R2F[new_role]
5994 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5996 result.append((desc, str(nf)))
5997 (node.master_candidate, node.drained, node.offline) = new_flags
5999 # we locked all nodes, we adjust the CP before updating this node
6001 _AdjustCandidatePool(self, [node.name])
6003 if self.op.secondary_ip:
6004 node.secondary_ip = self.op.secondary_ip
6005 result.append(("secondary_ip", self.op.secondary_ip))
6007 # this will trigger configuration file update, if needed
6008 self.cfg.Update(node, feedback_fn)
6010 # this will trigger job queue propagation or cleanup if the mc
6012 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6013 self.context.ReaddNode(node)
6018 class LUNodePowercycle(NoHooksLU):
6019 """Powercycles a node.
6024 def CheckArguments(self):
6025 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6026 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6027 raise errors.OpPrereqError("The node is the master and the force"
6028 " parameter was not set",
6031 def ExpandNames(self):
6032 """Locking for PowercycleNode.
6034 This is a last-resort option and shouldn't block on other
6035 jobs. Therefore, we grab no locks.
6038 self.needed_locks = {}
6040 def Exec(self, feedback_fn):
6044 result = self.rpc.call_node_powercycle(self.op.node_name,
6045 self.cfg.GetHypervisorType())
6046 result.Raise("Failed to schedule the reboot")
6047 return result.payload
6050 class LUClusterQuery(NoHooksLU):
6051 """Query cluster configuration.
6056 def ExpandNames(self):
6057 self.needed_locks = {}
6059 def Exec(self, feedback_fn):
6060 """Return cluster config.
6063 cluster = self.cfg.GetClusterInfo()
6066 # Filter just for enabled hypervisors
6067 for os_name, hv_dict in cluster.os_hvp.items():
6068 os_hvp[os_name] = {}
6069 for hv_name, hv_params in hv_dict.items():
6070 if hv_name in cluster.enabled_hypervisors:
6071 os_hvp[os_name][hv_name] = hv_params
6073 # Convert ip_family to ip_version
6074 primary_ip_version = constants.IP4_VERSION
6075 if cluster.primary_ip_family == netutils.IP6Address.family:
6076 primary_ip_version = constants.IP6_VERSION
6079 "software_version": constants.RELEASE_VERSION,
6080 "protocol_version": constants.PROTOCOL_VERSION,
6081 "config_version": constants.CONFIG_VERSION,
6082 "os_api_version": max(constants.OS_API_VERSIONS),
6083 "export_version": constants.EXPORT_VERSION,
6084 "architecture": (platform.architecture()[0], platform.machine()),
6085 "name": cluster.cluster_name,
6086 "master": cluster.master_node,
6087 "default_hypervisor": cluster.primary_hypervisor,
6088 "enabled_hypervisors": cluster.enabled_hypervisors,
6089 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6090 for hypervisor_name in cluster.enabled_hypervisors]),
6092 "beparams": cluster.beparams,
6093 "osparams": cluster.osparams,
6094 "ipolicy": cluster.ipolicy,
6095 "nicparams": cluster.nicparams,
6096 "ndparams": cluster.ndparams,
6097 "candidate_pool_size": cluster.candidate_pool_size,
6098 "master_netdev": cluster.master_netdev,
6099 "master_netmask": cluster.master_netmask,
6100 "use_external_mip_script": cluster.use_external_mip_script,
6101 "volume_group_name": cluster.volume_group_name,
6102 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6103 "file_storage_dir": cluster.file_storage_dir,
6104 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6105 "maintain_node_health": cluster.maintain_node_health,
6106 "ctime": cluster.ctime,
6107 "mtime": cluster.mtime,
6108 "uuid": cluster.uuid,
6109 "tags": list(cluster.GetTags()),
6110 "uid_pool": cluster.uid_pool,
6111 "default_iallocator": cluster.default_iallocator,
6112 "reserved_lvs": cluster.reserved_lvs,
6113 "primary_ip_version": primary_ip_version,
6114 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6115 "hidden_os": cluster.hidden_os,
6116 "blacklisted_os": cluster.blacklisted_os,
6122 class LUClusterConfigQuery(NoHooksLU):
6123 """Return configuration values.
6127 _FIELDS_DYNAMIC = utils.FieldSet()
6128 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
6129 "watcher_pause", "volume_group_name")
6131 def CheckArguments(self):
6132 _CheckOutputFields(static=self._FIELDS_STATIC,
6133 dynamic=self._FIELDS_DYNAMIC,
6134 selected=self.op.output_fields)
6136 def ExpandNames(self):
6137 self.needed_locks = {}
6139 def Exec(self, feedback_fn):
6140 """Dump a representation of the cluster config to the standard output.
6144 for field in self.op.output_fields:
6145 if field == "cluster_name":
6146 entry = self.cfg.GetClusterName()
6147 elif field == "master_node":
6148 entry = self.cfg.GetMasterNode()
6149 elif field == "drain_flag":
6150 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6151 elif field == "watcher_pause":
6152 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6153 elif field == "volume_group_name":
6154 entry = self.cfg.GetVGName()
6156 raise errors.ParameterError(field)
6157 values.append(entry)
6161 class LUInstanceActivateDisks(NoHooksLU):
6162 """Bring up an instance's disks.
6167 def ExpandNames(self):
6168 self._ExpandAndLockInstance()
6169 self.needed_locks[locking.LEVEL_NODE] = []
6170 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6172 def DeclareLocks(self, level):
6173 if level == locking.LEVEL_NODE:
6174 self._LockInstancesNodes()
6176 def CheckPrereq(self):
6177 """Check prerequisites.
6179 This checks that the instance is in the cluster.
6182 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6183 assert self.instance is not None, \
6184 "Cannot retrieve locked instance %s" % self.op.instance_name
6185 _CheckNodeOnline(self, self.instance.primary_node)
6187 def Exec(self, feedback_fn):
6188 """Activate the disks.
6191 disks_ok, disks_info = \
6192 _AssembleInstanceDisks(self, self.instance,
6193 ignore_size=self.op.ignore_size)
6195 raise errors.OpExecError("Cannot activate block devices")
6200 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6202 """Prepare the block devices for an instance.
6204 This sets up the block devices on all nodes.
6206 @type lu: L{LogicalUnit}
6207 @param lu: the logical unit on whose behalf we execute
6208 @type instance: L{objects.Instance}
6209 @param instance: the instance for whose disks we assemble
6210 @type disks: list of L{objects.Disk} or None
6211 @param disks: which disks to assemble (or all, if None)
6212 @type ignore_secondaries: boolean
6213 @param ignore_secondaries: if true, errors on secondary nodes
6214 won't result in an error return from the function
6215 @type ignore_size: boolean
6216 @param ignore_size: if true, the current known size of the disk
6217 will not be used during the disk activation, useful for cases
6218 when the size is wrong
6219 @return: False if the operation failed, otherwise a list of
6220 (host, instance_visible_name, node_visible_name)
6221 with the mapping from node devices to instance devices
6226 iname = instance.name
6227 disks = _ExpandCheckDisks(instance, disks)
6229 # With the two passes mechanism we try to reduce the window of
6230 # opportunity for the race condition of switching DRBD to primary
6231 # before handshaking occured, but we do not eliminate it
6233 # The proper fix would be to wait (with some limits) until the
6234 # connection has been made and drbd transitions from WFConnection
6235 # into any other network-connected state (Connected, SyncTarget,
6238 # 1st pass, assemble on all nodes in secondary mode
6239 for idx, inst_disk in enumerate(disks):
6240 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6242 node_disk = node_disk.Copy()
6243 node_disk.UnsetSize()
6244 lu.cfg.SetDiskID(node_disk, node)
6245 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
6246 msg = result.fail_msg
6248 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6249 " (is_primary=False, pass=1): %s",
6250 inst_disk.iv_name, node, msg)
6251 if not ignore_secondaries:
6254 # FIXME: race condition on drbd migration to primary
6256 # 2nd pass, do only the primary node
6257 for idx, inst_disk in enumerate(disks):
6260 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6261 if node != instance.primary_node:
6264 node_disk = node_disk.Copy()
6265 node_disk.UnsetSize()
6266 lu.cfg.SetDiskID(node_disk, node)
6267 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6268 msg = result.fail_msg
6270 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6271 " (is_primary=True, pass=2): %s",
6272 inst_disk.iv_name, node, msg)
6275 dev_path = result.payload
6277 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6279 # leave the disks configured for the primary node
6280 # this is a workaround that would be fixed better by
6281 # improving the logical/physical id handling
6283 lu.cfg.SetDiskID(disk, instance.primary_node)
6285 return disks_ok, device_info
6288 def _StartInstanceDisks(lu, instance, force):
6289 """Start the disks of an instance.
6292 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6293 ignore_secondaries=force)
6295 _ShutdownInstanceDisks(lu, instance)
6296 if force is not None and not force:
6297 lu.proc.LogWarning("", hint="If the message above refers to a"
6299 " you can retry the operation using '--force'.")
6300 raise errors.OpExecError("Disk consistency error")
6303 class LUInstanceDeactivateDisks(NoHooksLU):
6304 """Shutdown an instance's disks.
6309 def ExpandNames(self):
6310 self._ExpandAndLockInstance()
6311 self.needed_locks[locking.LEVEL_NODE] = []
6312 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6314 def DeclareLocks(self, level):
6315 if level == locking.LEVEL_NODE:
6316 self._LockInstancesNodes()
6318 def CheckPrereq(self):
6319 """Check prerequisites.
6321 This checks that the instance is in the cluster.
6324 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6325 assert self.instance is not None, \
6326 "Cannot retrieve locked instance %s" % self.op.instance_name
6328 def Exec(self, feedback_fn):
6329 """Deactivate the disks
6332 instance = self.instance
6334 _ShutdownInstanceDisks(self, instance)
6336 _SafeShutdownInstanceDisks(self, instance)
6339 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6340 """Shutdown block devices of an instance.
6342 This function checks if an instance is running, before calling
6343 _ShutdownInstanceDisks.
6346 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6347 _ShutdownInstanceDisks(lu, instance, disks=disks)
6350 def _ExpandCheckDisks(instance, disks):
6351 """Return the instance disks selected by the disks list
6353 @type disks: list of L{objects.Disk} or None
6354 @param disks: selected disks
6355 @rtype: list of L{objects.Disk}
6356 @return: selected instance disks to act on
6360 return instance.disks
6362 if not set(disks).issubset(instance.disks):
6363 raise errors.ProgrammerError("Can only act on disks belonging to the"
6368 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6369 """Shutdown block devices of an instance.
6371 This does the shutdown on all nodes of the instance.
6373 If the ignore_primary is false, errors on the primary node are
6378 disks = _ExpandCheckDisks(instance, disks)
6381 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6382 lu.cfg.SetDiskID(top_disk, node)
6383 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6384 msg = result.fail_msg
6386 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6387 disk.iv_name, node, msg)
6388 if ((node == instance.primary_node and not ignore_primary) or
6389 (node != instance.primary_node and not result.offline)):
6394 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6395 """Checks if a node has enough free memory.
6397 This function check if a given node has the needed amount of free
6398 memory. In case the node has less memory or we cannot get the
6399 information from the node, this function raise an OpPrereqError
6402 @type lu: C{LogicalUnit}
6403 @param lu: a logical unit from which we get configuration data
6405 @param node: the node to check
6406 @type reason: C{str}
6407 @param reason: string to use in the error message
6408 @type requested: C{int}
6409 @param requested: the amount of memory in MiB to check for
6410 @type hypervisor_name: C{str}
6411 @param hypervisor_name: the hypervisor to ask for memory stats
6413 @return: node current free memory
6414 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6415 we cannot check the node
6418 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6419 nodeinfo[node].Raise("Can't get data from node %s" % node,
6420 prereq=True, ecode=errors.ECODE_ENVIRON)
6421 (_, _, (hv_info, )) = nodeinfo[node].payload
6423 free_mem = hv_info.get("memory_free", None)
6424 if not isinstance(free_mem, int):
6425 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6426 " was '%s'" % (node, free_mem),
6427 errors.ECODE_ENVIRON)
6428 if requested > free_mem:
6429 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6430 " needed %s MiB, available %s MiB" %
6431 (node, reason, requested, free_mem),
6436 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6437 """Checks if nodes have enough free disk space in the all VGs.
6439 This function check if all given nodes have the needed amount of
6440 free disk. In case any node has less disk or we cannot get the
6441 information from the node, this function raise an OpPrereqError
6444 @type lu: C{LogicalUnit}
6445 @param lu: a logical unit from which we get configuration data
6446 @type nodenames: C{list}
6447 @param nodenames: the list of node names to check
6448 @type req_sizes: C{dict}
6449 @param req_sizes: the hash of vg and corresponding amount of disk in
6451 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6452 or we cannot check the node
6455 for vg, req_size in req_sizes.items():
6456 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6459 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6460 """Checks if nodes have enough free disk space in the specified VG.
6462 This function check if all given nodes have the needed amount of
6463 free disk. In case any node has less disk or we cannot get the
6464 information from the node, this function raise an OpPrereqError
6467 @type lu: C{LogicalUnit}
6468 @param lu: a logical unit from which we get configuration data
6469 @type nodenames: C{list}
6470 @param nodenames: the list of node names to check
6472 @param vg: the volume group to check
6473 @type requested: C{int}
6474 @param requested: the amount of disk in MiB to check for
6475 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6476 or we cannot check the node
6479 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6480 for node in nodenames:
6481 info = nodeinfo[node]
6482 info.Raise("Cannot get current information from node %s" % node,
6483 prereq=True, ecode=errors.ECODE_ENVIRON)
6484 (_, (vg_info, ), _) = info.payload
6485 vg_free = vg_info.get("vg_free", None)
6486 if not isinstance(vg_free, int):
6487 raise errors.OpPrereqError("Can't compute free disk space on node"
6488 " %s for vg %s, result was '%s'" %
6489 (node, vg, vg_free), errors.ECODE_ENVIRON)
6490 if requested > vg_free:
6491 raise errors.OpPrereqError("Not enough disk space on target node %s"
6492 " vg %s: required %d MiB, available %d MiB" %
6493 (node, vg, requested, vg_free),
6497 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6498 """Checks if nodes have enough physical CPUs
6500 This function checks if all given nodes have the needed number of
6501 physical CPUs. In case any node has less CPUs or we cannot get the
6502 information from the node, this function raises an OpPrereqError
6505 @type lu: C{LogicalUnit}
6506 @param lu: a logical unit from which we get configuration data
6507 @type nodenames: C{list}
6508 @param nodenames: the list of node names to check
6509 @type requested: C{int}
6510 @param requested: the minimum acceptable number of physical CPUs
6511 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6512 or we cannot check the node
6515 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6516 for node in nodenames:
6517 info = nodeinfo[node]
6518 info.Raise("Cannot get current information from node %s" % node,
6519 prereq=True, ecode=errors.ECODE_ENVIRON)
6520 (_, _, (hv_info, )) = info.payload
6521 num_cpus = hv_info.get("cpu_total", None)
6522 if not isinstance(num_cpus, int):
6523 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6524 " on node %s, result was '%s'" %
6525 (node, num_cpus), errors.ECODE_ENVIRON)
6526 if requested > num_cpus:
6527 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6528 "required" % (node, num_cpus, requested),
6532 class LUInstanceStartup(LogicalUnit):
6533 """Starts an instance.
6536 HPATH = "instance-start"
6537 HTYPE = constants.HTYPE_INSTANCE
6540 def CheckArguments(self):
6542 if self.op.beparams:
6543 # fill the beparams dict
6544 objects.UpgradeBeParams(self.op.beparams)
6545 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6547 def ExpandNames(self):
6548 self._ExpandAndLockInstance()
6549 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6551 def DeclareLocks(self, level):
6552 if level == locking.LEVEL_NODE_RES:
6553 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6555 def BuildHooksEnv(self):
6558 This runs on master, primary and secondary nodes of the instance.
6562 "FORCE": self.op.force,
6565 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6569 def BuildHooksNodes(self):
6570 """Build hooks nodes.
6573 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6576 def CheckPrereq(self):
6577 """Check prerequisites.
6579 This checks that the instance is in the cluster.
6582 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6583 assert self.instance is not None, \
6584 "Cannot retrieve locked instance %s" % self.op.instance_name
6587 if self.op.hvparams:
6588 # check hypervisor parameter syntax (locally)
6589 cluster = self.cfg.GetClusterInfo()
6590 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6591 filled_hvp = cluster.FillHV(instance)
6592 filled_hvp.update(self.op.hvparams)
6593 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6594 hv_type.CheckParameterSyntax(filled_hvp)
6595 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6597 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6599 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6601 if self.primary_offline and self.op.ignore_offline_nodes:
6602 self.proc.LogWarning("Ignoring offline primary node")
6604 if self.op.hvparams or self.op.beparams:
6605 self.proc.LogWarning("Overridden parameters are ignored")
6607 _CheckNodeOnline(self, instance.primary_node)
6609 bep = self.cfg.GetClusterInfo().FillBE(instance)
6610 bep.update(self.op.beparams)
6612 # check bridges existence
6613 _CheckInstanceBridgesExist(self, instance)
6615 remote_info = self.rpc.call_instance_info(instance.primary_node,
6617 instance.hypervisor)
6618 remote_info.Raise("Error checking node %s" % instance.primary_node,
6619 prereq=True, ecode=errors.ECODE_ENVIRON)
6620 if not remote_info.payload: # not running already
6621 _CheckNodeFreeMemory(self, instance.primary_node,
6622 "starting instance %s" % instance.name,
6623 bep[constants.BE_MINMEM], instance.hypervisor)
6625 def Exec(self, feedback_fn):
6626 """Start the instance.
6629 instance = self.instance
6630 force = self.op.force
6632 if not self.op.no_remember:
6633 self.cfg.MarkInstanceUp(instance.name)
6635 if self.primary_offline:
6636 assert self.op.ignore_offline_nodes
6637 self.proc.LogInfo("Primary node offline, marked instance as started")
6639 node_current = instance.primary_node
6641 _StartInstanceDisks(self, instance, force)
6644 self.rpc.call_instance_start(node_current,
6645 (instance, self.op.hvparams,
6647 self.op.startup_paused)
6648 msg = result.fail_msg
6650 _ShutdownInstanceDisks(self, instance)
6651 raise errors.OpExecError("Could not start instance: %s" % msg)
6654 class LUInstanceReboot(LogicalUnit):
6655 """Reboot an instance.
6658 HPATH = "instance-reboot"
6659 HTYPE = constants.HTYPE_INSTANCE
6662 def ExpandNames(self):
6663 self._ExpandAndLockInstance()
6665 def BuildHooksEnv(self):
6668 This runs on master, primary and secondary nodes of the instance.
6672 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6673 "REBOOT_TYPE": self.op.reboot_type,
6674 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6677 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6681 def BuildHooksNodes(self):
6682 """Build hooks nodes.
6685 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6688 def CheckPrereq(self):
6689 """Check prerequisites.
6691 This checks that the instance is in the cluster.
6694 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6695 assert self.instance is not None, \
6696 "Cannot retrieve locked instance %s" % self.op.instance_name
6697 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6698 _CheckNodeOnline(self, instance.primary_node)
6700 # check bridges existence
6701 _CheckInstanceBridgesExist(self, instance)
6703 def Exec(self, feedback_fn):
6704 """Reboot the instance.
6707 instance = self.instance
6708 ignore_secondaries = self.op.ignore_secondaries
6709 reboot_type = self.op.reboot_type
6711 remote_info = self.rpc.call_instance_info(instance.primary_node,
6713 instance.hypervisor)
6714 remote_info.Raise("Error checking node %s" % instance.primary_node)
6715 instance_running = bool(remote_info.payload)
6717 node_current = instance.primary_node
6719 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6720 constants.INSTANCE_REBOOT_HARD]:
6721 for disk in instance.disks:
6722 self.cfg.SetDiskID(disk, node_current)
6723 result = self.rpc.call_instance_reboot(node_current, instance,
6725 self.op.shutdown_timeout)
6726 result.Raise("Could not reboot instance")
6728 if instance_running:
6729 result = self.rpc.call_instance_shutdown(node_current, instance,
6730 self.op.shutdown_timeout)
6731 result.Raise("Could not shutdown instance for full reboot")
6732 _ShutdownInstanceDisks(self, instance)
6734 self.LogInfo("Instance %s was already stopped, starting now",
6736 _StartInstanceDisks(self, instance, ignore_secondaries)
6737 result = self.rpc.call_instance_start(node_current,
6738 (instance, None, None), False)
6739 msg = result.fail_msg
6741 _ShutdownInstanceDisks(self, instance)
6742 raise errors.OpExecError("Could not start instance for"
6743 " full reboot: %s" % msg)
6745 self.cfg.MarkInstanceUp(instance.name)
6748 class LUInstanceShutdown(LogicalUnit):
6749 """Shutdown an instance.
6752 HPATH = "instance-stop"
6753 HTYPE = constants.HTYPE_INSTANCE
6756 def ExpandNames(self):
6757 self._ExpandAndLockInstance()
6759 def BuildHooksEnv(self):
6762 This runs on master, primary and secondary nodes of the instance.
6765 env = _BuildInstanceHookEnvByObject(self, self.instance)
6766 env["TIMEOUT"] = self.op.timeout
6769 def BuildHooksNodes(self):
6770 """Build hooks nodes.
6773 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6776 def CheckPrereq(self):
6777 """Check prerequisites.
6779 This checks that the instance is in the cluster.
6782 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6783 assert self.instance is not None, \
6784 "Cannot retrieve locked instance %s" % self.op.instance_name
6786 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6788 self.primary_offline = \
6789 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6791 if self.primary_offline and self.op.ignore_offline_nodes:
6792 self.proc.LogWarning("Ignoring offline primary node")
6794 _CheckNodeOnline(self, self.instance.primary_node)
6796 def Exec(self, feedback_fn):
6797 """Shutdown the instance.
6800 instance = self.instance
6801 node_current = instance.primary_node
6802 timeout = self.op.timeout
6804 if not self.op.no_remember:
6805 self.cfg.MarkInstanceDown(instance.name)
6807 if self.primary_offline:
6808 assert self.op.ignore_offline_nodes
6809 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6811 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6812 msg = result.fail_msg
6814 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6816 _ShutdownInstanceDisks(self, instance)
6819 class LUInstanceReinstall(LogicalUnit):
6820 """Reinstall an instance.
6823 HPATH = "instance-reinstall"
6824 HTYPE = constants.HTYPE_INSTANCE
6827 def ExpandNames(self):
6828 self._ExpandAndLockInstance()
6830 def BuildHooksEnv(self):
6833 This runs on master, primary and secondary nodes of the instance.
6836 return _BuildInstanceHookEnvByObject(self, self.instance)
6838 def BuildHooksNodes(self):
6839 """Build hooks nodes.
6842 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6845 def CheckPrereq(self):
6846 """Check prerequisites.
6848 This checks that the instance is in the cluster and is not running.
6851 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6852 assert instance is not None, \
6853 "Cannot retrieve locked instance %s" % self.op.instance_name
6854 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6855 " offline, cannot reinstall")
6856 for node in instance.secondary_nodes:
6857 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6858 " cannot reinstall")
6860 if instance.disk_template == constants.DT_DISKLESS:
6861 raise errors.OpPrereqError("Instance '%s' has no disks" %
6862 self.op.instance_name,
6864 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6866 if self.op.os_type is not None:
6868 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6869 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6870 instance_os = self.op.os_type
6872 instance_os = instance.os
6874 nodelist = list(instance.all_nodes)
6876 if self.op.osparams:
6877 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6878 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6879 self.os_inst = i_osdict # the new dict (without defaults)
6883 self.instance = instance
6885 def Exec(self, feedback_fn):
6886 """Reinstall the instance.
6889 inst = self.instance
6891 if self.op.os_type is not None:
6892 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6893 inst.os = self.op.os_type
6894 # Write to configuration
6895 self.cfg.Update(inst, feedback_fn)
6897 _StartInstanceDisks(self, inst, None)
6899 feedback_fn("Running the instance OS create scripts...")
6900 # FIXME: pass debug option from opcode to backend
6901 result = self.rpc.call_instance_os_add(inst.primary_node,
6902 (inst, self.os_inst), True,
6903 self.op.debug_level)
6904 result.Raise("Could not install OS for instance %s on node %s" %
6905 (inst.name, inst.primary_node))
6907 _ShutdownInstanceDisks(self, inst)
6910 class LUInstanceRecreateDisks(LogicalUnit):
6911 """Recreate an instance's missing disks.
6914 HPATH = "instance-recreate-disks"
6915 HTYPE = constants.HTYPE_INSTANCE
6918 _MODIFYABLE = frozenset([
6919 constants.IDISK_SIZE,
6920 constants.IDISK_MODE,
6923 # New or changed disk parameters may have different semantics
6924 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
6925 constants.IDISK_ADOPT,
6927 # TODO: Implement support changing VG while recreating
6929 constants.IDISK_METAVG,
6932 def CheckArguments(self):
6933 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
6934 # Normalize and convert deprecated list of disk indices
6935 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
6937 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
6939 raise errors.OpPrereqError("Some disks have been specified more than"
6940 " once: %s" % utils.CommaJoin(duplicates),
6943 for (idx, params) in self.op.disks:
6944 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
6945 unsupported = frozenset(params.keys()) - self._MODIFYABLE
6947 raise errors.OpPrereqError("Parameters for disk %s try to change"
6948 " unmodifyable parameter(s): %s" %
6949 (idx, utils.CommaJoin(unsupported)),
6952 def ExpandNames(self):
6953 self._ExpandAndLockInstance()
6954 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6956 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6957 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6959 self.needed_locks[locking.LEVEL_NODE] = []
6960 self.needed_locks[locking.LEVEL_NODE_RES] = []
6962 def DeclareLocks(self, level):
6963 if level == locking.LEVEL_NODE:
6964 # if we replace the nodes, we only need to lock the old primary,
6965 # otherwise we need to lock all nodes for disk re-creation
6966 primary_only = bool(self.op.nodes)
6967 self._LockInstancesNodes(primary_only=primary_only)
6968 elif level == locking.LEVEL_NODE_RES:
6970 self.needed_locks[locking.LEVEL_NODE_RES] = \
6971 self.needed_locks[locking.LEVEL_NODE][:]
6973 def BuildHooksEnv(self):
6976 This runs on master, primary and secondary nodes of the instance.
6979 return _BuildInstanceHookEnvByObject(self, self.instance)
6981 def BuildHooksNodes(self):
6982 """Build hooks nodes.
6985 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6988 def CheckPrereq(self):
6989 """Check prerequisites.
6991 This checks that the instance is in the cluster and is not running.
6994 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6995 assert instance is not None, \
6996 "Cannot retrieve locked instance %s" % self.op.instance_name
6998 if len(self.op.nodes) != len(instance.all_nodes):
6999 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7000 " %d replacement nodes were specified" %
7001 (instance.name, len(instance.all_nodes),
7002 len(self.op.nodes)),
7004 assert instance.disk_template != constants.DT_DRBD8 or \
7005 len(self.op.nodes) == 2
7006 assert instance.disk_template != constants.DT_PLAIN or \
7007 len(self.op.nodes) == 1
7008 primary_node = self.op.nodes[0]
7010 primary_node = instance.primary_node
7011 _CheckNodeOnline(self, primary_node)
7013 if instance.disk_template == constants.DT_DISKLESS:
7014 raise errors.OpPrereqError("Instance '%s' has no disks" %
7015 self.op.instance_name, errors.ECODE_INVAL)
7017 # if we replace nodes *and* the old primary is offline, we don't
7019 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7020 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7021 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7022 if not (self.op.nodes and old_pnode.offline):
7023 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7024 msg="cannot recreate disks")
7027 self.disks = dict(self.op.disks)
7029 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7031 maxidx = max(self.disks.keys())
7032 if maxidx >= len(instance.disks):
7033 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7036 if (self.op.nodes and
7037 sorted(self.disks.keys()) != range(len(instance.disks))):
7038 raise errors.OpPrereqError("Can't recreate disks partially and"
7039 " change the nodes at the same time",
7042 self.instance = instance
7044 def Exec(self, feedback_fn):
7045 """Recreate the disks.
7048 instance = self.instance
7050 assert (self.owned_locks(locking.LEVEL_NODE) ==
7051 self.owned_locks(locking.LEVEL_NODE_RES))
7054 mods = [] # keeps track of needed changes
7056 for idx, disk in enumerate(instance.disks):
7058 changes = self.disks[idx]
7060 # Disk should not be recreated
7064 # update secondaries for disks, if needed
7065 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7066 # need to update the nodes and minors
7067 assert len(self.op.nodes) == 2
7068 assert len(disk.logical_id) == 6 # otherwise disk internals
7070 (_, _, old_port, _, _, old_secret) = disk.logical_id
7071 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7072 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7073 new_minors[0], new_minors[1], old_secret)
7074 assert len(disk.logical_id) == len(new_id)
7078 mods.append((idx, new_id, changes))
7080 # now that we have passed all asserts above, we can apply the mods
7081 # in a single run (to avoid partial changes)
7082 for idx, new_id, changes in mods:
7083 disk = instance.disks[idx]
7084 if new_id is not None:
7085 assert disk.dev_type == constants.LD_DRBD8
7086 disk.logical_id = new_id
7088 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7089 mode=changes.get(constants.IDISK_MODE, None))
7091 # change primary node, if needed
7093 instance.primary_node = self.op.nodes[0]
7094 self.LogWarning("Changing the instance's nodes, you will have to"
7095 " remove any disks left on the older nodes manually")
7098 self.cfg.Update(instance, feedback_fn)
7100 _CreateDisks(self, instance, to_skip=to_skip)
7103 class LUInstanceRename(LogicalUnit):
7104 """Rename an instance.
7107 HPATH = "instance-rename"
7108 HTYPE = constants.HTYPE_INSTANCE
7110 def CheckArguments(self):
7114 if self.op.ip_check and not self.op.name_check:
7115 # TODO: make the ip check more flexible and not depend on the name check
7116 raise errors.OpPrereqError("IP address check requires a name check",
7119 def BuildHooksEnv(self):
7122 This runs on master, primary and secondary nodes of the instance.
7125 env = _BuildInstanceHookEnvByObject(self, self.instance)
7126 env["INSTANCE_NEW_NAME"] = self.op.new_name
7129 def BuildHooksNodes(self):
7130 """Build hooks nodes.
7133 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7136 def CheckPrereq(self):
7137 """Check prerequisites.
7139 This checks that the instance is in the cluster and is not running.
7142 self.op.instance_name = _ExpandInstanceName(self.cfg,
7143 self.op.instance_name)
7144 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7145 assert instance is not None
7146 _CheckNodeOnline(self, instance.primary_node)
7147 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7148 msg="cannot rename")
7149 self.instance = instance
7151 new_name = self.op.new_name
7152 if self.op.name_check:
7153 hostname = netutils.GetHostname(name=new_name)
7154 if hostname.name != new_name:
7155 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7157 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7158 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7159 " same as given hostname '%s'") %
7160 (hostname.name, self.op.new_name),
7162 new_name = self.op.new_name = hostname.name
7163 if (self.op.ip_check and
7164 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7165 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7166 (hostname.ip, new_name),
7167 errors.ECODE_NOTUNIQUE)
7169 instance_list = self.cfg.GetInstanceList()
7170 if new_name in instance_list and new_name != instance.name:
7171 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7172 new_name, errors.ECODE_EXISTS)
7174 def Exec(self, feedback_fn):
7175 """Rename the instance.
7178 inst = self.instance
7179 old_name = inst.name
7181 rename_file_storage = False
7182 if (inst.disk_template in constants.DTS_FILEBASED and
7183 self.op.new_name != inst.name):
7184 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7185 rename_file_storage = True
7187 self.cfg.RenameInstance(inst.name, self.op.new_name)
7188 # Change the instance lock. This is definitely safe while we hold the BGL.
7189 # Otherwise the new lock would have to be added in acquired mode.
7191 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7192 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7194 # re-read the instance from the configuration after rename
7195 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7197 if rename_file_storage:
7198 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7199 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7200 old_file_storage_dir,
7201 new_file_storage_dir)
7202 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7203 " (but the instance has been renamed in Ganeti)" %
7204 (inst.primary_node, old_file_storage_dir,
7205 new_file_storage_dir))
7207 _StartInstanceDisks(self, inst, None)
7209 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7210 old_name, self.op.debug_level)
7211 msg = result.fail_msg
7213 msg = ("Could not run OS rename script for instance %s on node %s"
7214 " (but the instance has been renamed in Ganeti): %s" %
7215 (inst.name, inst.primary_node, msg))
7216 self.proc.LogWarning(msg)
7218 _ShutdownInstanceDisks(self, inst)
7223 class LUInstanceRemove(LogicalUnit):
7224 """Remove an instance.
7227 HPATH = "instance-remove"
7228 HTYPE = constants.HTYPE_INSTANCE
7231 def ExpandNames(self):
7232 self._ExpandAndLockInstance()
7233 self.needed_locks[locking.LEVEL_NODE] = []
7234 self.needed_locks[locking.LEVEL_NODE_RES] = []
7235 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7237 def DeclareLocks(self, level):
7238 if level == locking.LEVEL_NODE:
7239 self._LockInstancesNodes()
7240 elif level == locking.LEVEL_NODE_RES:
7242 self.needed_locks[locking.LEVEL_NODE_RES] = \
7243 self.needed_locks[locking.LEVEL_NODE][:]
7245 def BuildHooksEnv(self):
7248 This runs on master, primary and secondary nodes of the instance.
7251 env = _BuildInstanceHookEnvByObject(self, self.instance)
7252 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7255 def BuildHooksNodes(self):
7256 """Build hooks nodes.
7259 nl = [self.cfg.GetMasterNode()]
7260 nl_post = list(self.instance.all_nodes) + nl
7261 return (nl, nl_post)
7263 def CheckPrereq(self):
7264 """Check prerequisites.
7266 This checks that the instance is in the cluster.
7269 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7270 assert self.instance is not None, \
7271 "Cannot retrieve locked instance %s" % self.op.instance_name
7273 def Exec(self, feedback_fn):
7274 """Remove the instance.
7277 instance = self.instance
7278 logging.info("Shutting down instance %s on node %s",
7279 instance.name, instance.primary_node)
7281 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7282 self.op.shutdown_timeout)
7283 msg = result.fail_msg
7285 if self.op.ignore_failures:
7286 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7288 raise errors.OpExecError("Could not shutdown instance %s on"
7290 (instance.name, instance.primary_node, msg))
7292 assert (self.owned_locks(locking.LEVEL_NODE) ==
7293 self.owned_locks(locking.LEVEL_NODE_RES))
7294 assert not (set(instance.all_nodes) -
7295 self.owned_locks(locking.LEVEL_NODE)), \
7296 "Not owning correct locks"
7298 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7301 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7302 """Utility function to remove an instance.
7305 logging.info("Removing block devices for instance %s", instance.name)
7307 if not _RemoveDisks(lu, instance):
7308 if not ignore_failures:
7309 raise errors.OpExecError("Can't remove instance's disks")
7310 feedback_fn("Warning: can't remove instance's disks")
7312 logging.info("Removing instance %s out of cluster config", instance.name)
7314 lu.cfg.RemoveInstance(instance.name)
7316 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7317 "Instance lock removal conflict"
7319 # Remove lock for the instance
7320 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7323 class LUInstanceQuery(NoHooksLU):
7324 """Logical unit for querying instances.
7327 # pylint: disable=W0142
7330 def CheckArguments(self):
7331 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7332 self.op.output_fields, self.op.use_locking)
7334 def ExpandNames(self):
7335 self.iq.ExpandNames(self)
7337 def DeclareLocks(self, level):
7338 self.iq.DeclareLocks(self, level)
7340 def Exec(self, feedback_fn):
7341 return self.iq.OldStyleQuery(self)
7344 class LUInstanceFailover(LogicalUnit):
7345 """Failover an instance.
7348 HPATH = "instance-failover"
7349 HTYPE = constants.HTYPE_INSTANCE
7352 def CheckArguments(self):
7353 """Check the arguments.
7356 self.iallocator = getattr(self.op, "iallocator", None)
7357 self.target_node = getattr(self.op, "target_node", None)
7359 def ExpandNames(self):
7360 self._ExpandAndLockInstance()
7362 if self.op.target_node is not None:
7363 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7365 self.needed_locks[locking.LEVEL_NODE] = []
7366 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7368 self.needed_locks[locking.LEVEL_NODE_RES] = []
7369 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7371 ignore_consistency = self.op.ignore_consistency
7372 shutdown_timeout = self.op.shutdown_timeout
7373 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7376 ignore_consistency=ignore_consistency,
7377 shutdown_timeout=shutdown_timeout,
7378 ignore_ipolicy=self.op.ignore_ipolicy)
7379 self.tasklets = [self._migrater]
7381 def DeclareLocks(self, level):
7382 if level == locking.LEVEL_NODE:
7383 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7384 if instance.disk_template in constants.DTS_EXT_MIRROR:
7385 if self.op.target_node is None:
7386 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7388 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7389 self.op.target_node]
7390 del self.recalculate_locks[locking.LEVEL_NODE]
7392 self._LockInstancesNodes()
7393 elif level == locking.LEVEL_NODE_RES:
7395 self.needed_locks[locking.LEVEL_NODE_RES] = \
7396 self.needed_locks[locking.LEVEL_NODE][:]
7398 def BuildHooksEnv(self):
7401 This runs on master, primary and secondary nodes of the instance.
7404 instance = self._migrater.instance
7405 source_node = instance.primary_node
7406 target_node = self.op.target_node
7408 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7409 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7410 "OLD_PRIMARY": source_node,
7411 "NEW_PRIMARY": target_node,
7414 if instance.disk_template in constants.DTS_INT_MIRROR:
7415 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7416 env["NEW_SECONDARY"] = source_node
7418 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7420 env.update(_BuildInstanceHookEnvByObject(self, instance))
7424 def BuildHooksNodes(self):
7425 """Build hooks nodes.
7428 instance = self._migrater.instance
7429 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7430 return (nl, nl + [instance.primary_node])
7433 class LUInstanceMigrate(LogicalUnit):
7434 """Migrate an instance.
7436 This is migration without shutting down, compared to the failover,
7437 which is done with shutdown.
7440 HPATH = "instance-migrate"
7441 HTYPE = constants.HTYPE_INSTANCE
7444 def ExpandNames(self):
7445 self._ExpandAndLockInstance()
7447 if self.op.target_node is not None:
7448 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7450 self.needed_locks[locking.LEVEL_NODE] = []
7451 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7453 self.needed_locks[locking.LEVEL_NODE] = []
7454 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7457 TLMigrateInstance(self, self.op.instance_name,
7458 cleanup=self.op.cleanup,
7460 fallback=self.op.allow_failover,
7461 allow_runtime_changes=self.op.allow_runtime_changes,
7462 ignore_ipolicy=self.op.ignore_ipolicy)
7463 self.tasklets = [self._migrater]
7465 def DeclareLocks(self, level):
7466 if level == locking.LEVEL_NODE:
7467 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7468 if instance.disk_template in constants.DTS_EXT_MIRROR:
7469 if self.op.target_node is None:
7470 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7472 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7473 self.op.target_node]
7474 del self.recalculate_locks[locking.LEVEL_NODE]
7476 self._LockInstancesNodes()
7477 elif level == locking.LEVEL_NODE_RES:
7479 self.needed_locks[locking.LEVEL_NODE_RES] = \
7480 self.needed_locks[locking.LEVEL_NODE][:]
7482 def BuildHooksEnv(self):
7485 This runs on master, primary and secondary nodes of the instance.
7488 instance = self._migrater.instance
7489 source_node = instance.primary_node
7490 target_node = self.op.target_node
7491 env = _BuildInstanceHookEnvByObject(self, instance)
7493 "MIGRATE_LIVE": self._migrater.live,
7494 "MIGRATE_CLEANUP": self.op.cleanup,
7495 "OLD_PRIMARY": source_node,
7496 "NEW_PRIMARY": target_node,
7497 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7500 if instance.disk_template in constants.DTS_INT_MIRROR:
7501 env["OLD_SECONDARY"] = target_node
7502 env["NEW_SECONDARY"] = source_node
7504 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7508 def BuildHooksNodes(self):
7509 """Build hooks nodes.
7512 instance = self._migrater.instance
7513 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7514 return (nl, nl + [instance.primary_node])
7517 class LUInstanceMove(LogicalUnit):
7518 """Move an instance by data-copying.
7521 HPATH = "instance-move"
7522 HTYPE = constants.HTYPE_INSTANCE
7525 def ExpandNames(self):
7526 self._ExpandAndLockInstance()
7527 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7528 self.op.target_node = target_node
7529 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7530 self.needed_locks[locking.LEVEL_NODE_RES] = []
7531 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7533 def DeclareLocks(self, level):
7534 if level == locking.LEVEL_NODE:
7535 self._LockInstancesNodes(primary_only=True)
7536 elif level == locking.LEVEL_NODE_RES:
7538 self.needed_locks[locking.LEVEL_NODE_RES] = \
7539 self.needed_locks[locking.LEVEL_NODE][:]
7541 def BuildHooksEnv(self):
7544 This runs on master, primary and secondary nodes of the instance.
7548 "TARGET_NODE": self.op.target_node,
7549 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7551 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7554 def BuildHooksNodes(self):
7555 """Build hooks nodes.
7559 self.cfg.GetMasterNode(),
7560 self.instance.primary_node,
7561 self.op.target_node,
7565 def CheckPrereq(self):
7566 """Check prerequisites.
7568 This checks that the instance is in the cluster.
7571 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7572 assert self.instance is not None, \
7573 "Cannot retrieve locked instance %s" % self.op.instance_name
7575 node = self.cfg.GetNodeInfo(self.op.target_node)
7576 assert node is not None, \
7577 "Cannot retrieve locked node %s" % self.op.target_node
7579 self.target_node = target_node = node.name
7581 if target_node == instance.primary_node:
7582 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7583 (instance.name, target_node),
7586 bep = self.cfg.GetClusterInfo().FillBE(instance)
7588 for idx, dsk in enumerate(instance.disks):
7589 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7590 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7591 " cannot copy" % idx, errors.ECODE_STATE)
7593 _CheckNodeOnline(self, target_node)
7594 _CheckNodeNotDrained(self, target_node)
7595 _CheckNodeVmCapable(self, target_node)
7596 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7597 self.cfg.GetNodeGroup(node.group))
7598 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7599 ignore=self.op.ignore_ipolicy)
7601 if instance.admin_state == constants.ADMINST_UP:
7602 # check memory requirements on the secondary node
7603 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7604 instance.name, bep[constants.BE_MAXMEM],
7605 instance.hypervisor)
7607 self.LogInfo("Not checking memory on the secondary node as"
7608 " instance will not be started")
7610 # check bridge existance
7611 _CheckInstanceBridgesExist(self, instance, node=target_node)
7613 def Exec(self, feedback_fn):
7614 """Move an instance.
7616 The move is done by shutting it down on its present node, copying
7617 the data over (slow) and starting it on the new node.
7620 instance = self.instance
7622 source_node = instance.primary_node
7623 target_node = self.target_node
7625 self.LogInfo("Shutting down instance %s on source node %s",
7626 instance.name, source_node)
7628 assert (self.owned_locks(locking.LEVEL_NODE) ==
7629 self.owned_locks(locking.LEVEL_NODE_RES))
7631 result = self.rpc.call_instance_shutdown(source_node, instance,
7632 self.op.shutdown_timeout)
7633 msg = result.fail_msg
7635 if self.op.ignore_consistency:
7636 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7637 " Proceeding anyway. Please make sure node"
7638 " %s is down. Error details: %s",
7639 instance.name, source_node, source_node, msg)
7641 raise errors.OpExecError("Could not shutdown instance %s on"
7643 (instance.name, source_node, msg))
7645 # create the target disks
7647 _CreateDisks(self, instance, target_node=target_node)
7648 except errors.OpExecError:
7649 self.LogWarning("Device creation failed, reverting...")
7651 _RemoveDisks(self, instance, target_node=target_node)
7653 self.cfg.ReleaseDRBDMinors(instance.name)
7656 cluster_name = self.cfg.GetClusterInfo().cluster_name
7659 # activate, get path, copy the data over
7660 for idx, disk in enumerate(instance.disks):
7661 self.LogInfo("Copying data for disk %d", idx)
7662 result = self.rpc.call_blockdev_assemble(target_node, disk,
7663 instance.name, True, idx)
7665 self.LogWarning("Can't assemble newly created disk %d: %s",
7666 idx, result.fail_msg)
7667 errs.append(result.fail_msg)
7669 dev_path = result.payload
7670 result = self.rpc.call_blockdev_export(source_node, disk,
7671 target_node, dev_path,
7674 self.LogWarning("Can't copy data over for disk %d: %s",
7675 idx, result.fail_msg)
7676 errs.append(result.fail_msg)
7680 self.LogWarning("Some disks failed to copy, aborting")
7682 _RemoveDisks(self, instance, target_node=target_node)
7684 self.cfg.ReleaseDRBDMinors(instance.name)
7685 raise errors.OpExecError("Errors during disk copy: %s" %
7688 instance.primary_node = target_node
7689 self.cfg.Update(instance, feedback_fn)
7691 self.LogInfo("Removing the disks on the original node")
7692 _RemoveDisks(self, instance, target_node=source_node)
7694 # Only start the instance if it's marked as up
7695 if instance.admin_state == constants.ADMINST_UP:
7696 self.LogInfo("Starting instance %s on node %s",
7697 instance.name, target_node)
7699 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7700 ignore_secondaries=True)
7702 _ShutdownInstanceDisks(self, instance)
7703 raise errors.OpExecError("Can't activate the instance's disks")
7705 result = self.rpc.call_instance_start(target_node,
7706 (instance, None, None), False)
7707 msg = result.fail_msg
7709 _ShutdownInstanceDisks(self, instance)
7710 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7711 (instance.name, target_node, msg))
7714 class LUNodeMigrate(LogicalUnit):
7715 """Migrate all instances from a node.
7718 HPATH = "node-migrate"
7719 HTYPE = constants.HTYPE_NODE
7722 def CheckArguments(self):
7725 def ExpandNames(self):
7726 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7728 self.share_locks = _ShareAll()
7729 self.needed_locks = {
7730 locking.LEVEL_NODE: [self.op.node_name],
7733 def BuildHooksEnv(self):
7736 This runs on the master, the primary and all the secondaries.
7740 "NODE_NAME": self.op.node_name,
7741 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7744 def BuildHooksNodes(self):
7745 """Build hooks nodes.
7748 nl = [self.cfg.GetMasterNode()]
7751 def CheckPrereq(self):
7754 def Exec(self, feedback_fn):
7755 # Prepare jobs for migration instances
7756 allow_runtime_changes = self.op.allow_runtime_changes
7758 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7761 iallocator=self.op.iallocator,
7762 target_node=self.op.target_node,
7763 allow_runtime_changes=allow_runtime_changes,
7764 ignore_ipolicy=self.op.ignore_ipolicy)]
7765 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7768 # TODO: Run iallocator in this opcode and pass correct placement options to
7769 # OpInstanceMigrate. Since other jobs can modify the cluster between
7770 # running the iallocator and the actual migration, a good consistency model
7771 # will have to be found.
7773 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7774 frozenset([self.op.node_name]))
7776 return ResultWithJobs(jobs)
7779 class TLMigrateInstance(Tasklet):
7780 """Tasklet class for instance migration.
7783 @ivar live: whether the migration will be done live or non-live;
7784 this variable is initalized only after CheckPrereq has run
7785 @type cleanup: boolean
7786 @ivar cleanup: Wheater we cleanup from a failed migration
7787 @type iallocator: string
7788 @ivar iallocator: The iallocator used to determine target_node
7789 @type target_node: string
7790 @ivar target_node: If given, the target_node to reallocate the instance to
7791 @type failover: boolean
7792 @ivar failover: Whether operation results in failover or migration
7793 @type fallback: boolean
7794 @ivar fallback: Whether fallback to failover is allowed if migration not
7796 @type ignore_consistency: boolean
7797 @ivar ignore_consistency: Wheter we should ignore consistency between source
7799 @type shutdown_timeout: int
7800 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7801 @type ignore_ipolicy: bool
7802 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7807 _MIGRATION_POLL_INTERVAL = 1 # seconds
7808 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7810 def __init__(self, lu, instance_name, cleanup=False,
7811 failover=False, fallback=False,
7812 ignore_consistency=False,
7813 allow_runtime_changes=True,
7814 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7815 ignore_ipolicy=False):
7816 """Initializes this class.
7819 Tasklet.__init__(self, lu)
7822 self.instance_name = instance_name
7823 self.cleanup = cleanup
7824 self.live = False # will be overridden later
7825 self.failover = failover
7826 self.fallback = fallback
7827 self.ignore_consistency = ignore_consistency
7828 self.shutdown_timeout = shutdown_timeout
7829 self.ignore_ipolicy = ignore_ipolicy
7830 self.allow_runtime_changes = allow_runtime_changes
7832 def CheckPrereq(self):
7833 """Check prerequisites.
7835 This checks that the instance is in the cluster.
7838 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7839 instance = self.cfg.GetInstanceInfo(instance_name)
7840 assert instance is not None
7841 self.instance = instance
7842 cluster = self.cfg.GetClusterInfo()
7844 if (not self.cleanup and
7845 not instance.admin_state == constants.ADMINST_UP and
7846 not self.failover and self.fallback):
7847 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7848 " switching to failover")
7849 self.failover = True
7851 if instance.disk_template not in constants.DTS_MIRRORED:
7856 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7857 " %s" % (instance.disk_template, text),
7860 if instance.disk_template in constants.DTS_EXT_MIRROR:
7861 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7863 if self.lu.op.iallocator:
7864 self._RunAllocator()
7866 # We set set self.target_node as it is required by
7868 self.target_node = self.lu.op.target_node
7870 # Check that the target node is correct in terms of instance policy
7871 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
7872 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7873 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7874 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7875 ignore=self.ignore_ipolicy)
7877 # self.target_node is already populated, either directly or by the
7879 target_node = self.target_node
7880 if self.target_node == instance.primary_node:
7881 raise errors.OpPrereqError("Cannot migrate instance %s"
7882 " to its primary (%s)" %
7883 (instance.name, instance.primary_node))
7885 if len(self.lu.tasklets) == 1:
7886 # It is safe to release locks only when we're the only tasklet
7888 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7889 keep=[instance.primary_node, self.target_node])
7892 secondary_nodes = instance.secondary_nodes
7893 if not secondary_nodes:
7894 raise errors.ConfigurationError("No secondary node but using"
7895 " %s disk template" %
7896 instance.disk_template)
7897 target_node = secondary_nodes[0]
7898 if self.lu.op.iallocator or (self.lu.op.target_node and
7899 self.lu.op.target_node != target_node):
7901 text = "failed over"
7904 raise errors.OpPrereqError("Instances with disk template %s cannot"
7905 " be %s to arbitrary nodes"
7906 " (neither an iallocator nor a target"
7907 " node can be passed)" %
7908 (instance.disk_template, text),
7910 nodeinfo = self.cfg.GetNodeInfo(target_node)
7911 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7912 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7913 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7914 ignore=self.ignore_ipolicy)
7916 i_be = cluster.FillBE(instance)
7918 # check memory requirements on the secondary node
7919 if (not self.cleanup and
7920 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
7921 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
7922 "migrating instance %s" %
7924 i_be[constants.BE_MINMEM],
7925 instance.hypervisor)
7927 self.lu.LogInfo("Not checking memory on the secondary node as"
7928 " instance will not be started")
7930 # check if failover must be forced instead of migration
7931 if (not self.cleanup and not self.failover and
7932 i_be[constants.BE_ALWAYS_FAILOVER]):
7934 self.lu.LogInfo("Instance configured to always failover; fallback"
7936 self.failover = True
7938 raise errors.OpPrereqError("This instance has been configured to"
7939 " always failover, please allow failover",
7942 # check bridge existance
7943 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7945 if not self.cleanup:
7946 _CheckNodeNotDrained(self.lu, target_node)
7947 if not self.failover:
7948 result = self.rpc.call_instance_migratable(instance.primary_node,
7950 if result.fail_msg and self.fallback:
7951 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7953 self.failover = True
7955 result.Raise("Can't migrate, please use failover",
7956 prereq=True, ecode=errors.ECODE_STATE)
7958 assert not (self.failover and self.cleanup)
7960 if not self.failover:
7961 if self.lu.op.live is not None and self.lu.op.mode is not None:
7962 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7963 " parameters are accepted",
7965 if self.lu.op.live is not None:
7967 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7969 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7970 # reset the 'live' parameter to None so that repeated
7971 # invocations of CheckPrereq do not raise an exception
7972 self.lu.op.live = None
7973 elif self.lu.op.mode is None:
7974 # read the default value from the hypervisor
7975 i_hv = cluster.FillHV(self.instance, skip_globals=False)
7976 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7978 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7980 # Failover is never live
7983 if not (self.failover or self.cleanup):
7984 remote_info = self.rpc.call_instance_info(instance.primary_node,
7986 instance.hypervisor)
7987 remote_info.Raise("Error checking instance on node %s" %
7988 instance.primary_node)
7989 instance_running = bool(remote_info.payload)
7990 if instance_running:
7991 self.current_mem = int(remote_info.payload["memory"])
7993 def _RunAllocator(self):
7994 """Run the allocator based on input opcode.
7997 # FIXME: add a self.ignore_ipolicy option
7998 ial = IAllocator(self.cfg, self.rpc,
7999 mode=constants.IALLOCATOR_MODE_RELOC,
8000 name=self.instance_name,
8001 # TODO See why hail breaks with a single node below
8002 relocate_from=[self.instance.primary_node,
8003 self.instance.primary_node],
8006 ial.Run(self.lu.op.iallocator)
8009 raise errors.OpPrereqError("Can't compute nodes using"
8010 " iallocator '%s': %s" %
8011 (self.lu.op.iallocator, ial.info),
8013 if len(ial.result) != ial.required_nodes:
8014 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8015 " of nodes (%s), required %s" %
8016 (self.lu.op.iallocator, len(ial.result),
8017 ial.required_nodes), errors.ECODE_FAULT)
8018 self.target_node = ial.result[0]
8019 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8020 self.instance_name, self.lu.op.iallocator,
8021 utils.CommaJoin(ial.result))
8023 def _WaitUntilSync(self):
8024 """Poll with custom rpc for disk sync.
8026 This uses our own step-based rpc call.
8029 self.feedback_fn("* wait until resync is done")
8033 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8035 self.instance.disks)
8037 for node, nres in result.items():
8038 nres.Raise("Cannot resync disks on node %s" % node)
8039 node_done, node_percent = nres.payload
8040 all_done = all_done and node_done
8041 if node_percent is not None:
8042 min_percent = min(min_percent, node_percent)
8044 if min_percent < 100:
8045 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8048 def _EnsureSecondary(self, node):
8049 """Demote a node to secondary.
8052 self.feedback_fn("* switching node %s to secondary mode" % node)
8054 for dev in self.instance.disks:
8055 self.cfg.SetDiskID(dev, node)
8057 result = self.rpc.call_blockdev_close(node, self.instance.name,
8058 self.instance.disks)
8059 result.Raise("Cannot change disk to secondary on node %s" % node)
8061 def _GoStandalone(self):
8062 """Disconnect from the network.
8065 self.feedback_fn("* changing into standalone mode")
8066 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8067 self.instance.disks)
8068 for node, nres in result.items():
8069 nres.Raise("Cannot disconnect disks node %s" % node)
8071 def _GoReconnect(self, multimaster):
8072 """Reconnect to the network.
8078 msg = "single-master"
8079 self.feedback_fn("* changing disks into %s mode" % msg)
8080 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8081 self.instance.disks,
8082 self.instance.name, multimaster)
8083 for node, nres in result.items():
8084 nres.Raise("Cannot change disks config on node %s" % node)
8086 def _ExecCleanup(self):
8087 """Try to cleanup after a failed migration.
8089 The cleanup is done by:
8090 - check that the instance is running only on one node
8091 (and update the config if needed)
8092 - change disks on its secondary node to secondary
8093 - wait until disks are fully synchronized
8094 - disconnect from the network
8095 - change disks into single-master mode
8096 - wait again until disks are fully synchronized
8099 instance = self.instance
8100 target_node = self.target_node
8101 source_node = self.source_node
8103 # check running on only one node
8104 self.feedback_fn("* checking where the instance actually runs"
8105 " (if this hangs, the hypervisor might be in"
8107 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8108 for node, result in ins_l.items():
8109 result.Raise("Can't contact node %s" % node)
8111 runningon_source = instance.name in ins_l[source_node].payload
8112 runningon_target = instance.name in ins_l[target_node].payload
8114 if runningon_source and runningon_target:
8115 raise errors.OpExecError("Instance seems to be running on two nodes,"
8116 " or the hypervisor is confused; you will have"
8117 " to ensure manually that it runs only on one"
8118 " and restart this operation")
8120 if not (runningon_source or runningon_target):
8121 raise errors.OpExecError("Instance does not seem to be running at all;"
8122 " in this case it's safer to repair by"
8123 " running 'gnt-instance stop' to ensure disk"
8124 " shutdown, and then restarting it")
8126 if runningon_target:
8127 # the migration has actually succeeded, we need to update the config
8128 self.feedback_fn("* instance running on secondary node (%s),"
8129 " updating config" % target_node)
8130 instance.primary_node = target_node
8131 self.cfg.Update(instance, self.feedback_fn)
8132 demoted_node = source_node
8134 self.feedback_fn("* instance confirmed to be running on its"
8135 " primary node (%s)" % source_node)
8136 demoted_node = target_node
8138 if instance.disk_template in constants.DTS_INT_MIRROR:
8139 self._EnsureSecondary(demoted_node)
8141 self._WaitUntilSync()
8142 except errors.OpExecError:
8143 # we ignore here errors, since if the device is standalone, it
8144 # won't be able to sync
8146 self._GoStandalone()
8147 self._GoReconnect(False)
8148 self._WaitUntilSync()
8150 self.feedback_fn("* done")
8152 def _RevertDiskStatus(self):
8153 """Try to revert the disk status after a failed migration.
8156 target_node = self.target_node
8157 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8161 self._EnsureSecondary(target_node)
8162 self._GoStandalone()
8163 self._GoReconnect(False)
8164 self._WaitUntilSync()
8165 except errors.OpExecError, err:
8166 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8167 " please try to recover the instance manually;"
8168 " error '%s'" % str(err))
8170 def _AbortMigration(self):
8171 """Call the hypervisor code to abort a started migration.
8174 instance = self.instance
8175 target_node = self.target_node
8176 source_node = self.source_node
8177 migration_info = self.migration_info
8179 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8183 abort_msg = abort_result.fail_msg
8185 logging.error("Aborting migration failed on target node %s: %s",
8186 target_node, abort_msg)
8187 # Don't raise an exception here, as we stil have to try to revert the
8188 # disk status, even if this step failed.
8190 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8191 instance, False, self.live)
8192 abort_msg = abort_result.fail_msg
8194 logging.error("Aborting migration failed on source node %s: %s",
8195 source_node, abort_msg)
8197 def _ExecMigration(self):
8198 """Migrate an instance.
8200 The migrate is done by:
8201 - change the disks into dual-master mode
8202 - wait until disks are fully synchronized again
8203 - migrate the instance
8204 - change disks on the new secondary node (the old primary) to secondary
8205 - wait until disks are fully synchronized
8206 - change disks into single-master mode
8209 instance = self.instance
8210 target_node = self.target_node
8211 source_node = self.source_node
8213 # Check for hypervisor version mismatch and warn the user.
8214 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8215 None, [self.instance.hypervisor])
8216 for ninfo in nodeinfo.values():
8217 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8219 (_, _, (src_info, )) = nodeinfo[source_node].payload
8220 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8222 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8223 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8224 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8225 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8226 if src_version != dst_version:
8227 self.feedback_fn("* warning: hypervisor version mismatch between"
8228 " source (%s) and target (%s) node" %
8229 (src_version, dst_version))
8231 self.feedback_fn("* checking disk consistency between source and target")
8232 for (idx, dev) in enumerate(instance.disks):
8233 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8234 raise errors.OpExecError("Disk %s is degraded or not fully"
8235 " synchronized on target node,"
8236 " aborting migration" % idx)
8238 if self.current_mem > self.tgt_free_mem:
8239 if not self.allow_runtime_changes:
8240 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8241 " free memory to fit instance %s on target"
8242 " node %s (have %dMB, need %dMB)" %
8243 (instance.name, target_node,
8244 self.tgt_free_mem, self.current_mem))
8245 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8246 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8249 rpcres.Raise("Cannot modify instance runtime memory")
8251 # First get the migration information from the remote node
8252 result = self.rpc.call_migration_info(source_node, instance)
8253 msg = result.fail_msg
8255 log_err = ("Failed fetching source migration information from %s: %s" %
8257 logging.error(log_err)
8258 raise errors.OpExecError(log_err)
8260 self.migration_info = migration_info = result.payload
8262 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8263 # Then switch the disks to master/master mode
8264 self._EnsureSecondary(target_node)
8265 self._GoStandalone()
8266 self._GoReconnect(True)
8267 self._WaitUntilSync()
8269 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8270 result = self.rpc.call_accept_instance(target_node,
8273 self.nodes_ip[target_node])
8275 msg = result.fail_msg
8277 logging.error("Instance pre-migration failed, trying to revert"
8278 " disk status: %s", msg)
8279 self.feedback_fn("Pre-migration failed, aborting")
8280 self._AbortMigration()
8281 self._RevertDiskStatus()
8282 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8283 (instance.name, msg))
8285 self.feedback_fn("* migrating instance to %s" % target_node)
8286 result = self.rpc.call_instance_migrate(source_node, instance,
8287 self.nodes_ip[target_node],
8289 msg = result.fail_msg
8291 logging.error("Instance migration failed, trying to revert"
8292 " disk status: %s", msg)
8293 self.feedback_fn("Migration failed, aborting")
8294 self._AbortMigration()
8295 self._RevertDiskStatus()
8296 raise errors.OpExecError("Could not migrate instance %s: %s" %
8297 (instance.name, msg))
8299 self.feedback_fn("* starting memory transfer")
8300 last_feedback = time.time()
8302 result = self.rpc.call_instance_get_migration_status(source_node,
8304 msg = result.fail_msg
8305 ms = result.payload # MigrationStatus instance
8306 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8307 logging.error("Instance migration failed, trying to revert"
8308 " disk status: %s", msg)
8309 self.feedback_fn("Migration failed, aborting")
8310 self._AbortMigration()
8311 self._RevertDiskStatus()
8312 raise errors.OpExecError("Could not migrate instance %s: %s" %
8313 (instance.name, msg))
8315 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8316 self.feedback_fn("* memory transfer complete")
8319 if (utils.TimeoutExpired(last_feedback,
8320 self._MIGRATION_FEEDBACK_INTERVAL) and
8321 ms.transferred_ram is not None):
8322 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8323 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8324 last_feedback = time.time()
8326 time.sleep(self._MIGRATION_POLL_INTERVAL)
8328 result = self.rpc.call_instance_finalize_migration_src(source_node,
8332 msg = result.fail_msg
8334 logging.error("Instance migration succeeded, but finalization failed"
8335 " on the source node: %s", msg)
8336 raise errors.OpExecError("Could not finalize instance migration: %s" %
8339 instance.primary_node = target_node
8341 # distribute new instance config to the other nodes
8342 self.cfg.Update(instance, self.feedback_fn)
8344 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8348 msg = result.fail_msg
8350 logging.error("Instance migration succeeded, but finalization failed"
8351 " on the target node: %s", msg)
8352 raise errors.OpExecError("Could not finalize instance migration: %s" %
8355 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8356 self._EnsureSecondary(source_node)
8357 self._WaitUntilSync()
8358 self._GoStandalone()
8359 self._GoReconnect(False)
8360 self._WaitUntilSync()
8362 # If the instance's disk template is `rbd' and there was a successful
8363 # migration, unmap the device from the source node.
8364 if self.instance.disk_template == constants.DT_RBD:
8365 disks = _ExpandCheckDisks(instance, instance.disks)
8366 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8368 result = self.rpc.call_blockdev_shutdown(source_node, disk)
8369 msg = result.fail_msg
8371 logging.error("Migration was successful, but couldn't unmap the"
8372 " block device %s on source node %s: %s",
8373 disk.iv_name, source_node, msg)
8374 logging.error("You need to unmap the device %s manually on %s",
8375 disk.iv_name, source_node)
8377 self.feedback_fn("* done")
8379 def _ExecFailover(self):
8380 """Failover an instance.
8382 The failover is done by shutting it down on its present node and
8383 starting it on the secondary.
8386 instance = self.instance
8387 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8389 source_node = instance.primary_node
8390 target_node = self.target_node
8392 if instance.admin_state == constants.ADMINST_UP:
8393 self.feedback_fn("* checking disk consistency between source and target")
8394 for (idx, dev) in enumerate(instance.disks):
8395 # for drbd, these are drbd over lvm
8396 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8397 if primary_node.offline:
8398 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8400 (primary_node.name, idx, target_node))
8401 elif not self.ignore_consistency:
8402 raise errors.OpExecError("Disk %s is degraded on target node,"
8403 " aborting failover" % idx)
8405 self.feedback_fn("* not checking disk consistency as instance is not"
8408 self.feedback_fn("* shutting down instance on source node")
8409 logging.info("Shutting down instance %s on node %s",
8410 instance.name, source_node)
8412 result = self.rpc.call_instance_shutdown(source_node, instance,
8413 self.shutdown_timeout)
8414 msg = result.fail_msg
8416 if self.ignore_consistency or primary_node.offline:
8417 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8418 " proceeding anyway; please make sure node"
8419 " %s is down; error details: %s",
8420 instance.name, source_node, source_node, msg)
8422 raise errors.OpExecError("Could not shutdown instance %s on"
8424 (instance.name, source_node, msg))
8426 self.feedback_fn("* deactivating the instance's disks on source node")
8427 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8428 raise errors.OpExecError("Can't shut down the instance's disks")
8430 instance.primary_node = target_node
8431 # distribute new instance config to the other nodes
8432 self.cfg.Update(instance, self.feedback_fn)
8434 # Only start the instance if it's marked as up
8435 if instance.admin_state == constants.ADMINST_UP:
8436 self.feedback_fn("* activating the instance's disks on target node %s" %
8438 logging.info("Starting instance %s on node %s",
8439 instance.name, target_node)
8441 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8442 ignore_secondaries=True)
8444 _ShutdownInstanceDisks(self.lu, instance)
8445 raise errors.OpExecError("Can't activate the instance's disks")
8447 self.feedback_fn("* starting the instance on the target node %s" %
8449 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8451 msg = result.fail_msg
8453 _ShutdownInstanceDisks(self.lu, instance)
8454 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8455 (instance.name, target_node, msg))
8457 def Exec(self, feedback_fn):
8458 """Perform the migration.
8461 self.feedback_fn = feedback_fn
8462 self.source_node = self.instance.primary_node
8464 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8465 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8466 self.target_node = self.instance.secondary_nodes[0]
8467 # Otherwise self.target_node has been populated either
8468 # directly, or through an iallocator.
8470 self.all_nodes = [self.source_node, self.target_node]
8471 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8472 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8475 feedback_fn("Failover instance %s" % self.instance.name)
8476 self._ExecFailover()
8478 feedback_fn("Migrating instance %s" % self.instance.name)
8481 return self._ExecCleanup()
8483 return self._ExecMigration()
8486 def _CreateBlockDev(lu, node, instance, device, force_create,
8488 """Create a tree of block devices on a given node.
8490 If this device type has to be created on secondaries, create it and
8493 If not, just recurse to children keeping the same 'force' value.
8495 @param lu: the lu on whose behalf we execute
8496 @param node: the node on which to create the device
8497 @type instance: L{objects.Instance}
8498 @param instance: the instance which owns the device
8499 @type device: L{objects.Disk}
8500 @param device: the device to create
8501 @type force_create: boolean
8502 @param force_create: whether to force creation of this device; this
8503 will be change to True whenever we find a device which has
8504 CreateOnSecondary() attribute
8505 @param info: the extra 'metadata' we should attach to the device
8506 (this will be represented as a LVM tag)
8507 @type force_open: boolean
8508 @param force_open: this parameter will be passes to the
8509 L{backend.BlockdevCreate} function where it specifies
8510 whether we run on primary or not, and it affects both
8511 the child assembly and the device own Open() execution
8514 if device.CreateOnSecondary():
8518 for child in device.children:
8519 _CreateBlockDev(lu, node, instance, child, force_create,
8522 if not force_create:
8525 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8528 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8529 """Create a single block device on a given node.
8531 This will not recurse over children of the device, so they must be
8534 @param lu: the lu on whose behalf we execute
8535 @param node: the node on which to create the device
8536 @type instance: L{objects.Instance}
8537 @param instance: the instance which owns the device
8538 @type device: L{objects.Disk}
8539 @param device: the device to create
8540 @param info: the extra 'metadata' we should attach to the device
8541 (this will be represented as a LVM tag)
8542 @type force_open: boolean
8543 @param force_open: this parameter will be passes to the
8544 L{backend.BlockdevCreate} function where it specifies
8545 whether we run on primary or not, and it affects both
8546 the child assembly and the device own Open() execution
8549 lu.cfg.SetDiskID(device, node)
8550 result = lu.rpc.call_blockdev_create(node, device, device.size,
8551 instance.name, force_open, info)
8552 result.Raise("Can't create block device %s on"
8553 " node %s for instance %s" % (device, node, instance.name))
8554 if device.physical_id is None:
8555 device.physical_id = result.payload
8558 def _GenerateUniqueNames(lu, exts):
8559 """Generate a suitable LV name.
8561 This will generate a logical volume name for the given instance.
8566 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8567 results.append("%s%s" % (new_id, val))
8571 def _ComputeLDParams(disk_template, disk_params):
8572 """Computes Logical Disk parameters from Disk Template parameters.
8574 @type disk_template: string
8575 @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8576 @type disk_params: dict
8577 @param disk_params: disk template parameters; dict(template_name -> parameters
8579 @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8580 contains the LD parameters of the node. The tree is flattened in-order.
8583 if disk_template not in constants.DISK_TEMPLATES:
8584 raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8587 dt_params = disk_params[disk_template]
8588 if disk_template == constants.DT_DRBD8:
8590 constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8591 constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8592 constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8593 constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8594 constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8595 constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8596 constants.LDP_DYNAMIC_RESYNC: dt_params[constants.DRBD_DYNAMIC_RESYNC],
8597 constants.LDP_PLAN_AHEAD: dt_params[constants.DRBD_PLAN_AHEAD],
8598 constants.LDP_FILL_TARGET: dt_params[constants.DRBD_FILL_TARGET],
8599 constants.LDP_DELAY_TARGET: dt_params[constants.DRBD_DELAY_TARGET],
8600 constants.LDP_MAX_RATE: dt_params[constants.DRBD_MAX_RATE],
8601 constants.LDP_MIN_RATE: dt_params[constants.DRBD_MIN_RATE],
8605 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8608 result.append(drbd_params)
8612 constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8615 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8617 result.append(data_params)
8621 constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8624 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8626 result.append(meta_params)
8628 elif (disk_template == constants.DT_FILE or
8629 disk_template == constants.DT_SHARED_FILE):
8630 result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8632 elif disk_template == constants.DT_PLAIN:
8634 constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8637 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8639 result.append(params)
8641 elif disk_template == constants.DT_BLOCK:
8642 result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8644 elif disk_template == constants.DT_RBD:
8646 constants.LDP_POOL: dt_params[constants.RBD_POOL]
8649 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_RBD],
8651 result.append(params)
8656 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8657 iv_name, p_minor, s_minor, drbd_params, data_params,
8659 """Generate a drbd8 device complete with its children.
8662 assert len(vgnames) == len(names) == 2
8663 port = lu.cfg.AllocatePort()
8664 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8666 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8667 logical_id=(vgnames[0], names[0]),
8669 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8670 logical_id=(vgnames[1], names[1]),
8672 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8673 logical_id=(primary, secondary, port,
8676 children=[dev_data, dev_meta],
8677 iv_name=iv_name, params=drbd_params)
8681 _DISK_TEMPLATE_NAME_PREFIX = {
8682 constants.DT_PLAIN: "",
8683 constants.DT_RBD: ".rbd",
8687 _DISK_TEMPLATE_DEVICE_TYPE = {
8688 constants.DT_PLAIN: constants.LD_LV,
8689 constants.DT_FILE: constants.LD_FILE,
8690 constants.DT_SHARED_FILE: constants.LD_FILE,
8691 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8692 constants.DT_RBD: constants.LD_RBD,
8696 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8697 secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8698 feedback_fn, disk_params,
8699 _req_file_storage=opcodes.RequireFileStorage,
8700 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8701 """Generate the entire disk layout for a given template type.
8704 #TODO: compute space requirements
8706 vgname = lu.cfg.GetVGName()
8707 disk_count = len(disk_info)
8709 ld_params = _ComputeLDParams(template_name, disk_params)
8711 if template_name == constants.DT_DISKLESS:
8713 elif template_name == constants.DT_DRBD8:
8714 drbd_params, data_params, meta_params = ld_params
8715 if len(secondary_nodes) != 1:
8716 raise errors.ProgrammerError("Wrong template configuration")
8717 remote_node = secondary_nodes[0]
8718 minors = lu.cfg.AllocateDRBDMinor(
8719 [primary_node, remote_node] * len(disk_info), instance_name)
8722 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8723 for i in range(disk_count)]):
8724 names.append(lv_prefix + "_data")
8725 names.append(lv_prefix + "_meta")
8726 for idx, disk in enumerate(disk_info):
8727 disk_index = idx + base_index
8728 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8729 data_vg = disk.get(constants.IDISK_VG, vgname)
8730 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8731 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8732 disk[constants.IDISK_SIZE],
8734 names[idx * 2:idx * 2 + 2],
8735 "disk/%d" % disk_index,
8736 minors[idx * 2], minors[idx * 2 + 1],
8737 drbd_params, data_params, meta_params)
8738 disk_dev.mode = disk[constants.IDISK_MODE]
8739 disks.append(disk_dev)
8742 raise errors.ProgrammerError("Wrong template configuration")
8744 if template_name == constants.DT_FILE:
8746 elif template_name == constants.DT_SHARED_FILE:
8747 _req_shr_file_storage()
8749 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8750 if name_prefix is None:
8753 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8754 (name_prefix, base_index + i)
8755 for i in range(disk_count)])
8757 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8759 if template_name == constants.DT_PLAIN:
8760 def logical_id_fn(idx, _, disk):
8761 vg = disk.get(constants.IDISK_VG, vgname)
8762 return (vg, names[idx])
8763 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8765 lambda _, disk_index, disk: (file_driver,
8766 "%s/disk%d" % (file_storage_dir,
8768 elif template_name == constants.DT_BLOCK:
8770 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8771 disk[constants.IDISK_ADOPT])
8772 elif template_name == constants.DT_RBD:
8773 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8775 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8777 for idx, disk in enumerate(disk_info):
8778 disk_index = idx + base_index
8779 size = disk[constants.IDISK_SIZE]
8780 feedback_fn("* disk %s, size %s" %
8781 (disk_index, utils.FormatUnit(size, "h")))
8782 disks.append(objects.Disk(dev_type=dev_type, size=size,
8783 logical_id=logical_id_fn(idx, disk_index, disk),
8784 iv_name="disk/%d" % disk_index,
8785 mode=disk[constants.IDISK_MODE],
8786 params=ld_params[0]))
8791 def _GetInstanceInfoText(instance):
8792 """Compute that text that should be added to the disk's metadata.
8795 return "originstname+%s" % instance.name
8798 def _CalcEta(time_taken, written, total_size):
8799 """Calculates the ETA based on size written and total size.
8801 @param time_taken: The time taken so far
8802 @param written: amount written so far
8803 @param total_size: The total size of data to be written
8804 @return: The remaining time in seconds
8807 avg_time = time_taken / float(written)
8808 return (total_size - written) * avg_time
8811 def _WipeDisks(lu, instance):
8812 """Wipes instance disks.
8814 @type lu: L{LogicalUnit}
8815 @param lu: the logical unit on whose behalf we execute
8816 @type instance: L{objects.Instance}
8817 @param instance: the instance whose disks we should create
8818 @return: the success of the wipe
8821 node = instance.primary_node
8823 for device in instance.disks:
8824 lu.cfg.SetDiskID(device, node)
8826 logging.info("Pause sync of instance %s disks", instance.name)
8827 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8829 for idx, success in enumerate(result.payload):
8831 logging.warn("pause-sync of instance %s for disks %d failed",
8835 for idx, device in enumerate(instance.disks):
8836 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8837 # MAX_WIPE_CHUNK at max
8838 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8839 constants.MIN_WIPE_CHUNK_PERCENT)
8840 # we _must_ make this an int, otherwise rounding errors will
8842 wipe_chunk_size = int(wipe_chunk_size)
8844 lu.LogInfo("* Wiping disk %d", idx)
8845 logging.info("Wiping disk %d for instance %s, node %s using"
8846 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8851 start_time = time.time()
8853 while offset < size:
8854 wipe_size = min(wipe_chunk_size, size - offset)
8855 logging.debug("Wiping disk %d, offset %s, chunk %s",
8856 idx, offset, wipe_size)
8857 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8858 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8859 (idx, offset, wipe_size))
8862 if now - last_output >= 60:
8863 eta = _CalcEta(now - start_time, offset, size)
8864 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8865 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8868 logging.info("Resume sync of instance %s disks", instance.name)
8870 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8872 for idx, success in enumerate(result.payload):
8874 lu.LogWarning("Resume sync of disk %d failed, please have a"
8875 " look at the status and troubleshoot the issue", idx)
8876 logging.warn("resume-sync of instance %s for disks %d failed",
8880 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8881 """Create all disks for an instance.
8883 This abstracts away some work from AddInstance.
8885 @type lu: L{LogicalUnit}
8886 @param lu: the logical unit on whose behalf we execute
8887 @type instance: L{objects.Instance}
8888 @param instance: the instance whose disks we should create
8890 @param to_skip: list of indices to skip
8891 @type target_node: string
8892 @param target_node: if passed, overrides the target node for creation
8894 @return: the success of the creation
8897 info = _GetInstanceInfoText(instance)
8898 if target_node is None:
8899 pnode = instance.primary_node
8900 all_nodes = instance.all_nodes
8905 if instance.disk_template in constants.DTS_FILEBASED:
8906 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8907 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8909 result.Raise("Failed to create directory '%s' on"
8910 " node %s" % (file_storage_dir, pnode))
8912 # Note: this needs to be kept in sync with adding of disks in
8913 # LUInstanceSetParams
8914 for idx, device in enumerate(instance.disks):
8915 if to_skip and idx in to_skip:
8917 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
8919 for node in all_nodes:
8920 f_create = node == pnode
8921 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8924 def _RemoveDisks(lu, instance, target_node=None):
8925 """Remove all disks for an instance.
8927 This abstracts away some work from `AddInstance()` and
8928 `RemoveInstance()`. Note that in case some of the devices couldn't
8929 be removed, the removal will continue with the other ones (compare
8930 with `_CreateDisks()`).
8932 @type lu: L{LogicalUnit}
8933 @param lu: the logical unit on whose behalf we execute
8934 @type instance: L{objects.Instance}
8935 @param instance: the instance whose disks we should remove
8936 @type target_node: string
8937 @param target_node: used to override the node on which to remove the disks
8939 @return: the success of the removal
8942 logging.info("Removing block devices for instance %s", instance.name)
8945 for (idx, device) in enumerate(instance.disks):
8947 edata = [(target_node, device)]
8949 edata = device.ComputeNodeTree(instance.primary_node)
8950 for node, disk in edata:
8951 lu.cfg.SetDiskID(disk, node)
8952 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8954 lu.LogWarning("Could not remove disk %s on node %s,"
8955 " continuing anyway: %s", idx, node, msg)
8958 # if this is a DRBD disk, return its port to the pool
8959 if device.dev_type in constants.LDS_DRBD:
8960 tcp_port = device.logical_id[2]
8961 lu.cfg.AddTcpUdpPort(tcp_port)
8963 if instance.disk_template == constants.DT_FILE:
8964 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8968 tgt = instance.primary_node
8969 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8971 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8972 file_storage_dir, instance.primary_node, result.fail_msg)
8978 def _ComputeDiskSizePerVG(disk_template, disks):
8979 """Compute disk size requirements in the volume group
8982 def _compute(disks, payload):
8983 """Universal algorithm.
8988 vgs[disk[constants.IDISK_VG]] = \
8989 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8993 # Required free disk space as a function of disk and swap space
8995 constants.DT_DISKLESS: {},
8996 constants.DT_PLAIN: _compute(disks, 0),
8997 # 128 MB are added for drbd metadata for each disk
8998 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8999 constants.DT_FILE: {},
9000 constants.DT_SHARED_FILE: {},
9003 if disk_template not in req_size_dict:
9004 raise errors.ProgrammerError("Disk template '%s' size requirement"
9005 " is unknown" % disk_template)
9007 return req_size_dict[disk_template]
9010 def _ComputeDiskSize(disk_template, disks):
9011 """Compute disk size requirements in the volume group
9014 # Required free disk space as a function of disk and swap space
9016 constants.DT_DISKLESS: None,
9017 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9018 # 128 MB are added for drbd metadata for each disk
9020 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9021 constants.DT_FILE: None,
9022 constants.DT_SHARED_FILE: 0,
9023 constants.DT_BLOCK: 0,
9024 constants.DT_RBD: 0,
9027 if disk_template not in req_size_dict:
9028 raise errors.ProgrammerError("Disk template '%s' size requirement"
9029 " is unknown" % disk_template)
9031 return req_size_dict[disk_template]
9034 def _FilterVmNodes(lu, nodenames):
9035 """Filters out non-vm_capable nodes from a list.
9037 @type lu: L{LogicalUnit}
9038 @param lu: the logical unit for which we check
9039 @type nodenames: list
9040 @param nodenames: the list of nodes on which we should check
9042 @return: the list of vm-capable nodes
9045 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9046 return [name for name in nodenames if name not in vm_nodes]
9049 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9050 """Hypervisor parameter validation.
9052 This function abstract the hypervisor parameter validation to be
9053 used in both instance create and instance modify.
9055 @type lu: L{LogicalUnit}
9056 @param lu: the logical unit for which we check
9057 @type nodenames: list
9058 @param nodenames: the list of nodes on which we should check
9059 @type hvname: string
9060 @param hvname: the name of the hypervisor we should use
9061 @type hvparams: dict
9062 @param hvparams: the parameters which we need to check
9063 @raise errors.OpPrereqError: if the parameters are not valid
9066 nodenames = _FilterVmNodes(lu, nodenames)
9068 cluster = lu.cfg.GetClusterInfo()
9069 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9071 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9072 for node in nodenames:
9076 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9079 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9080 """OS parameters validation.
9082 @type lu: L{LogicalUnit}
9083 @param lu: the logical unit for which we check
9084 @type required: boolean
9085 @param required: whether the validation should fail if the OS is not
9087 @type nodenames: list
9088 @param nodenames: the list of nodes on which we should check
9089 @type osname: string
9090 @param osname: the name of the hypervisor we should use
9091 @type osparams: dict
9092 @param osparams: the parameters which we need to check
9093 @raise errors.OpPrereqError: if the parameters are not valid
9096 nodenames = _FilterVmNodes(lu, nodenames)
9097 result = lu.rpc.call_os_validate(nodenames, required, osname,
9098 [constants.OS_VALIDATE_PARAMETERS],
9100 for node, nres in result.items():
9101 # we don't check for offline cases since this should be run only
9102 # against the master node and/or an instance's nodes
9103 nres.Raise("OS Parameters validation failed on node %s" % node)
9104 if not nres.payload:
9105 lu.LogInfo("OS %s not found on node %s, validation skipped",
9109 class LUInstanceCreate(LogicalUnit):
9110 """Create an instance.
9113 HPATH = "instance-add"
9114 HTYPE = constants.HTYPE_INSTANCE
9117 def CheckArguments(self):
9121 # do not require name_check to ease forward/backward compatibility
9123 if self.op.no_install and self.op.start:
9124 self.LogInfo("No-installation mode selected, disabling startup")
9125 self.op.start = False
9126 # validate/normalize the instance name
9127 self.op.instance_name = \
9128 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9130 if self.op.ip_check and not self.op.name_check:
9131 # TODO: make the ip check more flexible and not depend on the name check
9132 raise errors.OpPrereqError("Cannot do IP address check without a name"
9133 " check", errors.ECODE_INVAL)
9135 # check nics' parameter names
9136 for nic in self.op.nics:
9137 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9139 # check disks. parameter names and consistent adopt/no-adopt strategy
9140 has_adopt = has_no_adopt = False
9141 for disk in self.op.disks:
9142 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9143 if constants.IDISK_ADOPT in disk:
9147 if has_adopt and has_no_adopt:
9148 raise errors.OpPrereqError("Either all disks are adopted or none is",
9151 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9152 raise errors.OpPrereqError("Disk adoption is not supported for the"
9153 " '%s' disk template" %
9154 self.op.disk_template,
9156 if self.op.iallocator is not None:
9157 raise errors.OpPrereqError("Disk adoption not allowed with an"
9158 " iallocator script", errors.ECODE_INVAL)
9159 if self.op.mode == constants.INSTANCE_IMPORT:
9160 raise errors.OpPrereqError("Disk adoption not allowed for"
9161 " instance import", errors.ECODE_INVAL)
9163 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9164 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9165 " but no 'adopt' parameter given" %
9166 self.op.disk_template,
9169 self.adopt_disks = has_adopt
9171 # instance name verification
9172 if self.op.name_check:
9173 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9174 self.op.instance_name = self.hostname1.name
9175 # used in CheckPrereq for ip ping check
9176 self.check_ip = self.hostname1.ip
9178 self.check_ip = None
9180 # file storage checks
9181 if (self.op.file_driver and
9182 not self.op.file_driver in constants.FILE_DRIVER):
9183 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9184 self.op.file_driver, errors.ECODE_INVAL)
9186 if self.op.disk_template == constants.DT_FILE:
9187 opcodes.RequireFileStorage()
9188 elif self.op.disk_template == constants.DT_SHARED_FILE:
9189 opcodes.RequireSharedFileStorage()
9191 ### Node/iallocator related checks
9192 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9194 if self.op.pnode is not None:
9195 if self.op.disk_template in constants.DTS_INT_MIRROR:
9196 if self.op.snode is None:
9197 raise errors.OpPrereqError("The networked disk templates need"
9198 " a mirror node", errors.ECODE_INVAL)
9200 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9202 self.op.snode = None
9204 self._cds = _GetClusterDomainSecret()
9206 if self.op.mode == constants.INSTANCE_IMPORT:
9207 # On import force_variant must be True, because if we forced it at
9208 # initial install, our only chance when importing it back is that it
9210 self.op.force_variant = True
9212 if self.op.no_install:
9213 self.LogInfo("No-installation mode has no effect during import")
9215 elif self.op.mode == constants.INSTANCE_CREATE:
9216 if self.op.os_type is None:
9217 raise errors.OpPrereqError("No guest OS specified",
9219 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9220 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9221 " installation" % self.op.os_type,
9223 if self.op.disk_template is None:
9224 raise errors.OpPrereqError("No disk template specified",
9227 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9228 # Check handshake to ensure both clusters have the same domain secret
9229 src_handshake = self.op.source_handshake
9230 if not src_handshake:
9231 raise errors.OpPrereqError("Missing source handshake",
9234 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9237 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9240 # Load and check source CA
9241 self.source_x509_ca_pem = self.op.source_x509_ca
9242 if not self.source_x509_ca_pem:
9243 raise errors.OpPrereqError("Missing source X509 CA",
9247 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9249 except OpenSSL.crypto.Error, err:
9250 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9251 (err, ), errors.ECODE_INVAL)
9253 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9254 if errcode is not None:
9255 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9258 self.source_x509_ca = cert
9260 src_instance_name = self.op.source_instance_name
9261 if not src_instance_name:
9262 raise errors.OpPrereqError("Missing source instance name",
9265 self.source_instance_name = \
9266 netutils.GetHostname(name=src_instance_name).name
9269 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9270 self.op.mode, errors.ECODE_INVAL)
9272 def ExpandNames(self):
9273 """ExpandNames for CreateInstance.
9275 Figure out the right locks for instance creation.
9278 self.needed_locks = {}
9280 instance_name = self.op.instance_name
9281 # this is just a preventive check, but someone might still add this
9282 # instance in the meantime, and creation will fail at lock-add time
9283 if instance_name in self.cfg.GetInstanceList():
9284 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9285 instance_name, errors.ECODE_EXISTS)
9287 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9289 if self.op.iallocator:
9290 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9291 # specifying a group on instance creation and then selecting nodes from
9293 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9294 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9296 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9297 nodelist = [self.op.pnode]
9298 if self.op.snode is not None:
9299 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9300 nodelist.append(self.op.snode)
9301 self.needed_locks[locking.LEVEL_NODE] = nodelist
9302 # Lock resources of instance's primary and secondary nodes (copy to
9303 # prevent accidential modification)
9304 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9306 # in case of import lock the source node too
9307 if self.op.mode == constants.INSTANCE_IMPORT:
9308 src_node = self.op.src_node
9309 src_path = self.op.src_path
9311 if src_path is None:
9312 self.op.src_path = src_path = self.op.instance_name
9314 if src_node is None:
9315 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9316 self.op.src_node = None
9317 if os.path.isabs(src_path):
9318 raise errors.OpPrereqError("Importing an instance from a path"
9319 " requires a source node option",
9322 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9323 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9324 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9325 if not os.path.isabs(src_path):
9326 self.op.src_path = src_path = \
9327 utils.PathJoin(constants.EXPORT_DIR, src_path)
9329 def _RunAllocator(self):
9330 """Run the allocator based on input opcode.
9333 nics = [n.ToDict() for n in self.nics]
9334 ial = IAllocator(self.cfg, self.rpc,
9335 mode=constants.IALLOCATOR_MODE_ALLOC,
9336 name=self.op.instance_name,
9337 disk_template=self.op.disk_template,
9340 vcpus=self.be_full[constants.BE_VCPUS],
9341 memory=self.be_full[constants.BE_MAXMEM],
9344 hypervisor=self.op.hypervisor,
9347 ial.Run(self.op.iallocator)
9350 raise errors.OpPrereqError("Can't compute nodes using"
9351 " iallocator '%s': %s" %
9352 (self.op.iallocator, ial.info),
9354 if len(ial.result) != ial.required_nodes:
9355 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9356 " of nodes (%s), required %s" %
9357 (self.op.iallocator, len(ial.result),
9358 ial.required_nodes), errors.ECODE_FAULT)
9359 self.op.pnode = ial.result[0]
9360 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9361 self.op.instance_name, self.op.iallocator,
9362 utils.CommaJoin(ial.result))
9363 if ial.required_nodes == 2:
9364 self.op.snode = ial.result[1]
9366 def BuildHooksEnv(self):
9369 This runs on master, primary and secondary nodes of the instance.
9373 "ADD_MODE": self.op.mode,
9375 if self.op.mode == constants.INSTANCE_IMPORT:
9376 env["SRC_NODE"] = self.op.src_node
9377 env["SRC_PATH"] = self.op.src_path
9378 env["SRC_IMAGES"] = self.src_images
9380 env.update(_BuildInstanceHookEnv(
9381 name=self.op.instance_name,
9382 primary_node=self.op.pnode,
9383 secondary_nodes=self.secondaries,
9384 status=self.op.start,
9385 os_type=self.op.os_type,
9386 minmem=self.be_full[constants.BE_MINMEM],
9387 maxmem=self.be_full[constants.BE_MAXMEM],
9388 vcpus=self.be_full[constants.BE_VCPUS],
9389 nics=_NICListToTuple(self, self.nics),
9390 disk_template=self.op.disk_template,
9391 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9392 for d in self.disks],
9395 hypervisor_name=self.op.hypervisor,
9401 def BuildHooksNodes(self):
9402 """Build hooks nodes.
9405 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9408 def _ReadExportInfo(self):
9409 """Reads the export information from disk.
9411 It will override the opcode source node and path with the actual
9412 information, if these two were not specified before.
9414 @return: the export information
9417 assert self.op.mode == constants.INSTANCE_IMPORT
9419 src_node = self.op.src_node
9420 src_path = self.op.src_path
9422 if src_node is None:
9423 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9424 exp_list = self.rpc.call_export_list(locked_nodes)
9426 for node in exp_list:
9427 if exp_list[node].fail_msg:
9429 if src_path in exp_list[node].payload:
9431 self.op.src_node = src_node = node
9432 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9436 raise errors.OpPrereqError("No export found for relative path %s" %
9437 src_path, errors.ECODE_INVAL)
9439 _CheckNodeOnline(self, src_node)
9440 result = self.rpc.call_export_info(src_node, src_path)
9441 result.Raise("No export or invalid export found in dir %s" % src_path)
9443 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9444 if not export_info.has_section(constants.INISECT_EXP):
9445 raise errors.ProgrammerError("Corrupted export config",
9446 errors.ECODE_ENVIRON)
9448 ei_version = export_info.get(constants.INISECT_EXP, "version")
9449 if (int(ei_version) != constants.EXPORT_VERSION):
9450 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9451 (ei_version, constants.EXPORT_VERSION),
9452 errors.ECODE_ENVIRON)
9455 def _ReadExportParams(self, einfo):
9456 """Use export parameters as defaults.
9458 In case the opcode doesn't specify (as in override) some instance
9459 parameters, then try to use them from the export information, if
9463 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9465 if self.op.disk_template is None:
9466 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9467 self.op.disk_template = einfo.get(constants.INISECT_INS,
9469 if self.op.disk_template not in constants.DISK_TEMPLATES:
9470 raise errors.OpPrereqError("Disk template specified in configuration"
9471 " file is not one of the allowed values:"
9472 " %s" % " ".join(constants.DISK_TEMPLATES))
9474 raise errors.OpPrereqError("No disk template specified and the export"
9475 " is missing the disk_template information",
9478 if not self.op.disks:
9480 # TODO: import the disk iv_name too
9481 for idx in range(constants.MAX_DISKS):
9482 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9483 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9484 disks.append({constants.IDISK_SIZE: disk_sz})
9485 self.op.disks = disks
9486 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9487 raise errors.OpPrereqError("No disk info specified and the export"
9488 " is missing the disk information",
9491 if not self.op.nics:
9493 for idx in range(constants.MAX_NICS):
9494 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9496 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9497 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9504 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9505 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9507 if (self.op.hypervisor is None and
9508 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9509 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9511 if einfo.has_section(constants.INISECT_HYP):
9512 # use the export parameters but do not override the ones
9513 # specified by the user
9514 for name, value in einfo.items(constants.INISECT_HYP):
9515 if name not in self.op.hvparams:
9516 self.op.hvparams[name] = value
9518 if einfo.has_section(constants.INISECT_BEP):
9519 # use the parameters, without overriding
9520 for name, value in einfo.items(constants.INISECT_BEP):
9521 if name not in self.op.beparams:
9522 self.op.beparams[name] = value
9523 # Compatibility for the old "memory" be param
9524 if name == constants.BE_MEMORY:
9525 if constants.BE_MAXMEM not in self.op.beparams:
9526 self.op.beparams[constants.BE_MAXMEM] = value
9527 if constants.BE_MINMEM not in self.op.beparams:
9528 self.op.beparams[constants.BE_MINMEM] = value
9530 # try to read the parameters old style, from the main section
9531 for name in constants.BES_PARAMETERS:
9532 if (name not in self.op.beparams and
9533 einfo.has_option(constants.INISECT_INS, name)):
9534 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9536 if einfo.has_section(constants.INISECT_OSP):
9537 # use the parameters, without overriding
9538 for name, value in einfo.items(constants.INISECT_OSP):
9539 if name not in self.op.osparams:
9540 self.op.osparams[name] = value
9542 def _RevertToDefaults(self, cluster):
9543 """Revert the instance parameters to the default values.
9547 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9548 for name in self.op.hvparams.keys():
9549 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9550 del self.op.hvparams[name]
9552 be_defs = cluster.SimpleFillBE({})
9553 for name in self.op.beparams.keys():
9554 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9555 del self.op.beparams[name]
9557 nic_defs = cluster.SimpleFillNIC({})
9558 for nic in self.op.nics:
9559 for name in constants.NICS_PARAMETERS:
9560 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9563 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9564 for name in self.op.osparams.keys():
9565 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9566 del self.op.osparams[name]
9568 def _CalculateFileStorageDir(self):
9569 """Calculate final instance file storage dir.
9572 # file storage dir calculation/check
9573 self.instance_file_storage_dir = None
9574 if self.op.disk_template in constants.DTS_FILEBASED:
9575 # build the full file storage dir path
9578 if self.op.disk_template == constants.DT_SHARED_FILE:
9579 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9581 get_fsd_fn = self.cfg.GetFileStorageDir
9583 cfg_storagedir = get_fsd_fn()
9584 if not cfg_storagedir:
9585 raise errors.OpPrereqError("Cluster file storage dir not defined")
9586 joinargs.append(cfg_storagedir)
9588 if self.op.file_storage_dir is not None:
9589 joinargs.append(self.op.file_storage_dir)
9591 joinargs.append(self.op.instance_name)
9593 # pylint: disable=W0142
9594 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9596 def CheckPrereq(self): # pylint: disable=R0914
9597 """Check prerequisites.
9600 self._CalculateFileStorageDir()
9602 if self.op.mode == constants.INSTANCE_IMPORT:
9603 export_info = self._ReadExportInfo()
9604 self._ReadExportParams(export_info)
9606 if (not self.cfg.GetVGName() and
9607 self.op.disk_template not in constants.DTS_NOT_LVM):
9608 raise errors.OpPrereqError("Cluster does not support lvm-based"
9609 " instances", errors.ECODE_STATE)
9611 if (self.op.hypervisor is None or
9612 self.op.hypervisor == constants.VALUE_AUTO):
9613 self.op.hypervisor = self.cfg.GetHypervisorType()
9615 cluster = self.cfg.GetClusterInfo()
9616 enabled_hvs = cluster.enabled_hypervisors
9617 if self.op.hypervisor not in enabled_hvs:
9618 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9619 " cluster (%s)" % (self.op.hypervisor,
9620 ",".join(enabled_hvs)),
9623 # Check tag validity
9624 for tag in self.op.tags:
9625 objects.TaggableObject.ValidateTag(tag)
9627 # check hypervisor parameter syntax (locally)
9628 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9629 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9631 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9632 hv_type.CheckParameterSyntax(filled_hvp)
9633 self.hv_full = filled_hvp
9634 # check that we don't specify global parameters on an instance
9635 _CheckGlobalHvParams(self.op.hvparams)
9637 # fill and remember the beparams dict
9638 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9639 for param, value in self.op.beparams.iteritems():
9640 if value == constants.VALUE_AUTO:
9641 self.op.beparams[param] = default_beparams[param]
9642 objects.UpgradeBeParams(self.op.beparams)
9643 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9644 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9646 # build os parameters
9647 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9649 # now that hvp/bep are in final format, let's reset to defaults,
9651 if self.op.identify_defaults:
9652 self._RevertToDefaults(cluster)
9656 for idx, nic in enumerate(self.op.nics):
9657 nic_mode_req = nic.get(constants.INIC_MODE, None)
9658 nic_mode = nic_mode_req
9659 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9660 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9662 # in routed mode, for the first nic, the default ip is 'auto'
9663 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9664 default_ip_mode = constants.VALUE_AUTO
9666 default_ip_mode = constants.VALUE_NONE
9668 # ip validity checks
9669 ip = nic.get(constants.INIC_IP, default_ip_mode)
9670 if ip is None or ip.lower() == constants.VALUE_NONE:
9672 elif ip.lower() == constants.VALUE_AUTO:
9673 if not self.op.name_check:
9674 raise errors.OpPrereqError("IP address set to auto but name checks"
9675 " have been skipped",
9677 nic_ip = self.hostname1.ip
9679 if not netutils.IPAddress.IsValid(ip):
9680 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9684 # TODO: check the ip address for uniqueness
9685 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9686 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9689 # MAC address verification
9690 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9691 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9692 mac = utils.NormalizeAndValidateMac(mac)
9695 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9696 except errors.ReservationError:
9697 raise errors.OpPrereqError("MAC address %s already in use"
9698 " in cluster" % mac,
9699 errors.ECODE_NOTUNIQUE)
9701 # Build nic parameters
9702 link = nic.get(constants.INIC_LINK, None)
9703 if link == constants.VALUE_AUTO:
9704 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9707 nicparams[constants.NIC_MODE] = nic_mode
9709 nicparams[constants.NIC_LINK] = link
9711 check_params = cluster.SimpleFillNIC(nicparams)
9712 objects.NIC.CheckParameterSyntax(check_params)
9713 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9715 # disk checks/pre-build
9716 default_vg = self.cfg.GetVGName()
9718 for disk in self.op.disks:
9719 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9720 if mode not in constants.DISK_ACCESS_SET:
9721 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9722 mode, errors.ECODE_INVAL)
9723 size = disk.get(constants.IDISK_SIZE, None)
9725 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9728 except (TypeError, ValueError):
9729 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9732 data_vg = disk.get(constants.IDISK_VG, default_vg)
9734 constants.IDISK_SIZE: size,
9735 constants.IDISK_MODE: mode,
9736 constants.IDISK_VG: data_vg,
9738 if constants.IDISK_METAVG in disk:
9739 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9740 if constants.IDISK_ADOPT in disk:
9741 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9742 self.disks.append(new_disk)
9744 if self.op.mode == constants.INSTANCE_IMPORT:
9746 for idx in range(len(self.disks)):
9747 option = "disk%d_dump" % idx
9748 if export_info.has_option(constants.INISECT_INS, option):
9749 # FIXME: are the old os-es, disk sizes, etc. useful?
9750 export_name = export_info.get(constants.INISECT_INS, option)
9751 image = utils.PathJoin(self.op.src_path, export_name)
9752 disk_images.append(image)
9754 disk_images.append(False)
9756 self.src_images = disk_images
9758 old_name = export_info.get(constants.INISECT_INS, "name")
9759 if self.op.instance_name == old_name:
9760 for idx, nic in enumerate(self.nics):
9761 if nic.mac == constants.VALUE_AUTO:
9762 nic_mac_ini = "nic%d_mac" % idx
9763 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9765 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9767 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9768 if self.op.ip_check:
9769 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9770 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9771 (self.check_ip, self.op.instance_name),
9772 errors.ECODE_NOTUNIQUE)
9774 #### mac address generation
9775 # By generating here the mac address both the allocator and the hooks get
9776 # the real final mac address rather than the 'auto' or 'generate' value.
9777 # There is a race condition between the generation and the instance object
9778 # creation, which means that we know the mac is valid now, but we're not
9779 # sure it will be when we actually add the instance. If things go bad
9780 # adding the instance will abort because of a duplicate mac, and the
9781 # creation job will fail.
9782 for nic in self.nics:
9783 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9784 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9788 if self.op.iallocator is not None:
9789 self._RunAllocator()
9791 # Release all unneeded node locks
9792 _ReleaseLocks(self, locking.LEVEL_NODE,
9793 keep=filter(None, [self.op.pnode, self.op.snode,
9795 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9796 keep=filter(None, [self.op.pnode, self.op.snode,
9799 #### node related checks
9801 # check primary node
9802 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9803 assert self.pnode is not None, \
9804 "Cannot retrieve locked node %s" % self.op.pnode
9806 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9807 pnode.name, errors.ECODE_STATE)
9809 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9810 pnode.name, errors.ECODE_STATE)
9811 if not pnode.vm_capable:
9812 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9813 " '%s'" % pnode.name, errors.ECODE_STATE)
9815 self.secondaries = []
9817 # mirror node verification
9818 if self.op.disk_template in constants.DTS_INT_MIRROR:
9819 if self.op.snode == pnode.name:
9820 raise errors.OpPrereqError("The secondary node cannot be the"
9821 " primary node", errors.ECODE_INVAL)
9822 _CheckNodeOnline(self, self.op.snode)
9823 _CheckNodeNotDrained(self, self.op.snode)
9824 _CheckNodeVmCapable(self, self.op.snode)
9825 self.secondaries.append(self.op.snode)
9827 snode = self.cfg.GetNodeInfo(self.op.snode)
9828 if pnode.group != snode.group:
9829 self.LogWarning("The primary and secondary nodes are in two"
9830 " different node groups; the disk parameters"
9831 " from the first disk's node group will be"
9834 nodenames = [pnode.name] + self.secondaries
9836 # Verify instance specs
9838 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9839 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9840 constants.ISPEC_DISK_COUNT: len(self.disks),
9841 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9842 constants.ISPEC_NIC_COUNT: len(self.nics),
9845 group_info = self.cfg.GetNodeGroup(pnode.group)
9846 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9847 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9848 if not self.op.ignore_ipolicy and res:
9849 raise errors.OpPrereqError(("Instance allocation to group %s violates"
9850 " policy: %s") % (pnode.group,
9851 utils.CommaJoin(res)),
9854 # disk parameters (not customizable at instance or node level)
9855 # just use the primary node parameters, ignoring the secondary.
9856 self.diskparams = group_info.diskparams
9858 if not self.adopt_disks:
9859 if self.op.disk_template == constants.DT_RBD:
9860 # _CheckRADOSFreeSpace() is just a placeholder.
9861 # Any function that checks prerequisites can be placed here.
9862 # Check if there is enough space on the RADOS cluster.
9863 _CheckRADOSFreeSpace()
9865 # Check lv size requirements, if not adopting
9866 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9867 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9869 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9870 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9871 disk[constants.IDISK_ADOPT])
9872 for disk in self.disks])
9873 if len(all_lvs) != len(self.disks):
9874 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9876 for lv_name in all_lvs:
9878 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9879 # to ReserveLV uses the same syntax
9880 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9881 except errors.ReservationError:
9882 raise errors.OpPrereqError("LV named %s used by another instance" %
9883 lv_name, errors.ECODE_NOTUNIQUE)
9885 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9886 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9888 node_lvs = self.rpc.call_lv_list([pnode.name],
9889 vg_names.payload.keys())[pnode.name]
9890 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9891 node_lvs = node_lvs.payload
9893 delta = all_lvs.difference(node_lvs.keys())
9895 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9896 utils.CommaJoin(delta),
9898 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9900 raise errors.OpPrereqError("Online logical volumes found, cannot"
9901 " adopt: %s" % utils.CommaJoin(online_lvs),
9903 # update the size of disk based on what is found
9904 for dsk in self.disks:
9905 dsk[constants.IDISK_SIZE] = \
9906 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9907 dsk[constants.IDISK_ADOPT])][0]))
9909 elif self.op.disk_template == constants.DT_BLOCK:
9910 # Normalize and de-duplicate device paths
9911 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9912 for disk in self.disks])
9913 if len(all_disks) != len(self.disks):
9914 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9916 baddisks = [d for d in all_disks
9917 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9919 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9920 " cannot be adopted" %
9921 (", ".join(baddisks),
9922 constants.ADOPTABLE_BLOCKDEV_ROOT),
9925 node_disks = self.rpc.call_bdev_sizes([pnode.name],
9926 list(all_disks))[pnode.name]
9927 node_disks.Raise("Cannot get block device information from node %s" %
9929 node_disks = node_disks.payload
9930 delta = all_disks.difference(node_disks.keys())
9932 raise errors.OpPrereqError("Missing block device(s): %s" %
9933 utils.CommaJoin(delta),
9935 for dsk in self.disks:
9936 dsk[constants.IDISK_SIZE] = \
9937 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9939 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9941 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9942 # check OS parameters (remotely)
9943 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9945 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9947 # memory check on primary node
9948 #TODO(dynmem): use MINMEM for checking
9950 _CheckNodeFreeMemory(self, self.pnode.name,
9951 "creating instance %s" % self.op.instance_name,
9952 self.be_full[constants.BE_MAXMEM],
9955 self.dry_run_result = list(nodenames)
9957 def Exec(self, feedback_fn):
9958 """Create and add the instance to the cluster.
9961 instance = self.op.instance_name
9962 pnode_name = self.pnode.name
9964 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9965 self.owned_locks(locking.LEVEL_NODE)), \
9966 "Node locks differ from node resource locks"
9968 ht_kind = self.op.hypervisor
9969 if ht_kind in constants.HTS_REQ_PORT:
9970 network_port = self.cfg.AllocatePort()
9974 disks = _GenerateDiskTemplate(self,
9975 self.op.disk_template,
9976 instance, pnode_name,
9979 self.instance_file_storage_dir,
9980 self.op.file_driver,
9985 iobj = objects.Instance(name=instance, os=self.op.os_type,
9986 primary_node=pnode_name,
9987 nics=self.nics, disks=disks,
9988 disk_template=self.op.disk_template,
9989 admin_state=constants.ADMINST_DOWN,
9990 network_port=network_port,
9991 beparams=self.op.beparams,
9992 hvparams=self.op.hvparams,
9993 hypervisor=self.op.hypervisor,
9994 osparams=self.op.osparams,
9998 for tag in self.op.tags:
10001 if self.adopt_disks:
10002 if self.op.disk_template == constants.DT_PLAIN:
10003 # rename LVs to the newly-generated names; we need to construct
10004 # 'fake' LV disks with the old data, plus the new unique_id
10005 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10007 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10008 rename_to.append(t_dsk.logical_id)
10009 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10010 self.cfg.SetDiskID(t_dsk, pnode_name)
10011 result = self.rpc.call_blockdev_rename(pnode_name,
10012 zip(tmp_disks, rename_to))
10013 result.Raise("Failed to rename adoped LVs")
10015 feedback_fn("* creating instance disks...")
10017 _CreateDisks(self, iobj)
10018 except errors.OpExecError:
10019 self.LogWarning("Device creation failed, reverting...")
10021 _RemoveDisks(self, iobj)
10023 self.cfg.ReleaseDRBDMinors(instance)
10026 feedback_fn("adding instance %s to cluster config" % instance)
10028 self.cfg.AddInstance(iobj, self.proc.GetECId())
10030 # Declare that we don't want to remove the instance lock anymore, as we've
10031 # added the instance to the config
10032 del self.remove_locks[locking.LEVEL_INSTANCE]
10034 if self.op.mode == constants.INSTANCE_IMPORT:
10035 # Release unused nodes
10036 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10038 # Release all nodes
10039 _ReleaseLocks(self, locking.LEVEL_NODE)
10042 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10043 feedback_fn("* wiping instance disks...")
10045 _WipeDisks(self, iobj)
10046 except errors.OpExecError, err:
10047 logging.exception("Wiping disks failed")
10048 self.LogWarning("Wiping instance disks failed (%s)", err)
10052 # Something is already wrong with the disks, don't do anything else
10054 elif self.op.wait_for_sync:
10055 disk_abort = not _WaitForSync(self, iobj)
10056 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10057 # make sure the disks are not degraded (still sync-ing is ok)
10058 feedback_fn("* checking mirrors status")
10059 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10064 _RemoveDisks(self, iobj)
10065 self.cfg.RemoveInstance(iobj.name)
10066 # Make sure the instance lock gets removed
10067 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10068 raise errors.OpExecError("There are some degraded disks for"
10071 # Release all node resource locks
10072 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10074 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10075 if self.op.mode == constants.INSTANCE_CREATE:
10076 if not self.op.no_install:
10077 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10078 not self.op.wait_for_sync)
10080 feedback_fn("* pausing disk sync to install instance OS")
10081 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10083 for idx, success in enumerate(result.payload):
10085 logging.warn("pause-sync of instance %s for disk %d failed",
10088 feedback_fn("* running the instance OS create scripts...")
10089 # FIXME: pass debug option from opcode to backend
10091 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10092 self.op.debug_level)
10094 feedback_fn("* resuming disk sync")
10095 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10097 for idx, success in enumerate(result.payload):
10099 logging.warn("resume-sync of instance %s for disk %d failed",
10102 os_add_result.Raise("Could not add os for instance %s"
10103 " on node %s" % (instance, pnode_name))
10105 elif self.op.mode == constants.INSTANCE_IMPORT:
10106 feedback_fn("* running the instance OS import scripts...")
10110 for idx, image in enumerate(self.src_images):
10114 # FIXME: pass debug option from opcode to backend
10115 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10116 constants.IEIO_FILE, (image, ),
10117 constants.IEIO_SCRIPT,
10118 (iobj.disks[idx], idx),
10120 transfers.append(dt)
10123 masterd.instance.TransferInstanceData(self, feedback_fn,
10124 self.op.src_node, pnode_name,
10125 self.pnode.secondary_ip,
10127 if not compat.all(import_result):
10128 self.LogWarning("Some disks for instance %s on node %s were not"
10129 " imported successfully" % (instance, pnode_name))
10131 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10132 feedback_fn("* preparing remote import...")
10133 # The source cluster will stop the instance before attempting to make a
10134 # connection. In some cases stopping an instance can take a long time,
10135 # hence the shutdown timeout is added to the connection timeout.
10136 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10137 self.op.source_shutdown_timeout)
10138 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10140 assert iobj.primary_node == self.pnode.name
10142 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10143 self.source_x509_ca,
10144 self._cds, timeouts)
10145 if not compat.all(disk_results):
10146 # TODO: Should the instance still be started, even if some disks
10147 # failed to import (valid for local imports, too)?
10148 self.LogWarning("Some disks for instance %s on node %s were not"
10149 " imported successfully" % (instance, pnode_name))
10151 # Run rename script on newly imported instance
10152 assert iobj.name == instance
10153 feedback_fn("Running rename script for %s" % instance)
10154 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10155 self.source_instance_name,
10156 self.op.debug_level)
10157 if result.fail_msg:
10158 self.LogWarning("Failed to run rename script for %s on node"
10159 " %s: %s" % (instance, pnode_name, result.fail_msg))
10162 # also checked in the prereq part
10163 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10166 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10169 iobj.admin_state = constants.ADMINST_UP
10170 self.cfg.Update(iobj, feedback_fn)
10171 logging.info("Starting instance %s on node %s", instance, pnode_name)
10172 feedback_fn("* starting instance...")
10173 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10175 result.Raise("Could not start instance")
10177 return list(iobj.all_nodes)
10180 def _CheckRADOSFreeSpace():
10181 """Compute disk size requirements inside the RADOS cluster.
10184 # For the RADOS cluster we assume there is always enough space.
10188 class LUInstanceConsole(NoHooksLU):
10189 """Connect to an instance's console.
10191 This is somewhat special in that it returns the command line that
10192 you need to run on the master node in order to connect to the
10198 def ExpandNames(self):
10199 self.share_locks = _ShareAll()
10200 self._ExpandAndLockInstance()
10202 def CheckPrereq(self):
10203 """Check prerequisites.
10205 This checks that the instance is in the cluster.
10208 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10209 assert self.instance is not None, \
10210 "Cannot retrieve locked instance %s" % self.op.instance_name
10211 _CheckNodeOnline(self, self.instance.primary_node)
10213 def Exec(self, feedback_fn):
10214 """Connect to the console of an instance
10217 instance = self.instance
10218 node = instance.primary_node
10220 node_insts = self.rpc.call_instance_list([node],
10221 [instance.hypervisor])[node]
10222 node_insts.Raise("Can't get node information from %s" % node)
10224 if instance.name not in node_insts.payload:
10225 if instance.admin_state == constants.ADMINST_UP:
10226 state = constants.INSTST_ERRORDOWN
10227 elif instance.admin_state == constants.ADMINST_DOWN:
10228 state = constants.INSTST_ADMINDOWN
10230 state = constants.INSTST_ADMINOFFLINE
10231 raise errors.OpExecError("Instance %s is not running (state %s)" %
10232 (instance.name, state))
10234 logging.debug("Connecting to console of %s on %s", instance.name, node)
10236 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10239 def _GetInstanceConsole(cluster, instance):
10240 """Returns console information for an instance.
10242 @type cluster: L{objects.Cluster}
10243 @type instance: L{objects.Instance}
10247 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10248 # beparams and hvparams are passed separately, to avoid editing the
10249 # instance and then saving the defaults in the instance itself.
10250 hvparams = cluster.FillHV(instance)
10251 beparams = cluster.FillBE(instance)
10252 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10254 assert console.instance == instance.name
10255 assert console.Validate()
10257 return console.ToDict()
10260 class LUInstanceReplaceDisks(LogicalUnit):
10261 """Replace the disks of an instance.
10264 HPATH = "mirrors-replace"
10265 HTYPE = constants.HTYPE_INSTANCE
10268 def CheckArguments(self):
10269 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10270 self.op.iallocator)
10272 def ExpandNames(self):
10273 self._ExpandAndLockInstance()
10275 assert locking.LEVEL_NODE not in self.needed_locks
10276 assert locking.LEVEL_NODE_RES not in self.needed_locks
10277 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10279 assert self.op.iallocator is None or self.op.remote_node is None, \
10280 "Conflicting options"
10282 if self.op.remote_node is not None:
10283 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10285 # Warning: do not remove the locking of the new secondary here
10286 # unless DRBD8.AddChildren is changed to work in parallel;
10287 # currently it doesn't since parallel invocations of
10288 # FindUnusedMinor will conflict
10289 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10290 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10292 self.needed_locks[locking.LEVEL_NODE] = []
10293 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10295 if self.op.iallocator is not None:
10296 # iallocator will select a new node in the same group
10297 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10299 self.needed_locks[locking.LEVEL_NODE_RES] = []
10301 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10302 self.op.iallocator, self.op.remote_node,
10303 self.op.disks, False, self.op.early_release,
10304 self.op.ignore_ipolicy)
10306 self.tasklets = [self.replacer]
10308 def DeclareLocks(self, level):
10309 if level == locking.LEVEL_NODEGROUP:
10310 assert self.op.remote_node is None
10311 assert self.op.iallocator is not None
10312 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10314 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10315 # Lock all groups used by instance optimistically; this requires going
10316 # via the node before it's locked, requiring verification later on
10317 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10318 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10320 elif level == locking.LEVEL_NODE:
10321 if self.op.iallocator is not None:
10322 assert self.op.remote_node is None
10323 assert not self.needed_locks[locking.LEVEL_NODE]
10325 # Lock member nodes of all locked groups
10326 self.needed_locks[locking.LEVEL_NODE] = [node_name
10327 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10328 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10330 self._LockInstancesNodes()
10331 elif level == locking.LEVEL_NODE_RES:
10333 self.needed_locks[locking.LEVEL_NODE_RES] = \
10334 self.needed_locks[locking.LEVEL_NODE]
10336 def BuildHooksEnv(self):
10337 """Build hooks env.
10339 This runs on the master, the primary and all the secondaries.
10342 instance = self.replacer.instance
10344 "MODE": self.op.mode,
10345 "NEW_SECONDARY": self.op.remote_node,
10346 "OLD_SECONDARY": instance.secondary_nodes[0],
10348 env.update(_BuildInstanceHookEnvByObject(self, instance))
10351 def BuildHooksNodes(self):
10352 """Build hooks nodes.
10355 instance = self.replacer.instance
10357 self.cfg.GetMasterNode(),
10358 instance.primary_node,
10360 if self.op.remote_node is not None:
10361 nl.append(self.op.remote_node)
10364 def CheckPrereq(self):
10365 """Check prerequisites.
10368 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10369 self.op.iallocator is None)
10371 # Verify if node group locks are still correct
10372 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10374 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10376 return LogicalUnit.CheckPrereq(self)
10379 class TLReplaceDisks(Tasklet):
10380 """Replaces disks for an instance.
10382 Note: Locking is not within the scope of this class.
10385 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10386 disks, delay_iallocator, early_release, ignore_ipolicy):
10387 """Initializes this class.
10390 Tasklet.__init__(self, lu)
10393 self.instance_name = instance_name
10395 self.iallocator_name = iallocator_name
10396 self.remote_node = remote_node
10398 self.delay_iallocator = delay_iallocator
10399 self.early_release = early_release
10400 self.ignore_ipolicy = ignore_ipolicy
10403 self.instance = None
10404 self.new_node = None
10405 self.target_node = None
10406 self.other_node = None
10407 self.remote_node_info = None
10408 self.node_secondary_ip = None
10411 def CheckArguments(mode, remote_node, iallocator):
10412 """Helper function for users of this class.
10415 # check for valid parameter combination
10416 if mode == constants.REPLACE_DISK_CHG:
10417 if remote_node is None and iallocator is None:
10418 raise errors.OpPrereqError("When changing the secondary either an"
10419 " iallocator script must be used or the"
10420 " new node given", errors.ECODE_INVAL)
10422 if remote_node is not None and iallocator is not None:
10423 raise errors.OpPrereqError("Give either the iallocator or the new"
10424 " secondary, not both", errors.ECODE_INVAL)
10426 elif remote_node is not None or iallocator is not None:
10427 # Not replacing the secondary
10428 raise errors.OpPrereqError("The iallocator and new node options can"
10429 " only be used when changing the"
10430 " secondary node", errors.ECODE_INVAL)
10433 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10434 """Compute a new secondary node using an IAllocator.
10437 ial = IAllocator(lu.cfg, lu.rpc,
10438 mode=constants.IALLOCATOR_MODE_RELOC,
10439 name=instance_name,
10440 relocate_from=list(relocate_from))
10442 ial.Run(iallocator_name)
10444 if not ial.success:
10445 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10446 " %s" % (iallocator_name, ial.info),
10447 errors.ECODE_NORES)
10449 if len(ial.result) != ial.required_nodes:
10450 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10451 " of nodes (%s), required %s" %
10453 len(ial.result), ial.required_nodes),
10454 errors.ECODE_FAULT)
10456 remote_node_name = ial.result[0]
10458 lu.LogInfo("Selected new secondary for instance '%s': %s",
10459 instance_name, remote_node_name)
10461 return remote_node_name
10463 def _FindFaultyDisks(self, node_name):
10464 """Wrapper for L{_FindFaultyInstanceDisks}.
10467 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10470 def _CheckDisksActivated(self, instance):
10471 """Checks if the instance disks are activated.
10473 @param instance: The instance to check disks
10474 @return: True if they are activated, False otherwise
10477 nodes = instance.all_nodes
10479 for idx, dev in enumerate(instance.disks):
10481 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10482 self.cfg.SetDiskID(dev, node)
10484 result = self.rpc.call_blockdev_find(node, dev)
10488 elif result.fail_msg or not result.payload:
10493 def CheckPrereq(self):
10494 """Check prerequisites.
10496 This checks that the instance is in the cluster.
10499 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10500 assert instance is not None, \
10501 "Cannot retrieve locked instance %s" % self.instance_name
10503 if instance.disk_template != constants.DT_DRBD8:
10504 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10505 " instances", errors.ECODE_INVAL)
10507 if len(instance.secondary_nodes) != 1:
10508 raise errors.OpPrereqError("The instance has a strange layout,"
10509 " expected one secondary but found %d" %
10510 len(instance.secondary_nodes),
10511 errors.ECODE_FAULT)
10513 if not self.delay_iallocator:
10514 self._CheckPrereq2()
10516 def _CheckPrereq2(self):
10517 """Check prerequisites, second part.
10519 This function should always be part of CheckPrereq. It was separated and is
10520 now called from Exec because during node evacuation iallocator was only
10521 called with an unmodified cluster model, not taking planned changes into
10525 instance = self.instance
10526 secondary_node = instance.secondary_nodes[0]
10528 if self.iallocator_name is None:
10529 remote_node = self.remote_node
10531 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10532 instance.name, instance.secondary_nodes)
10534 if remote_node is None:
10535 self.remote_node_info = None
10537 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10538 "Remote node '%s' is not locked" % remote_node
10540 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10541 assert self.remote_node_info is not None, \
10542 "Cannot retrieve locked node %s" % remote_node
10544 if remote_node == self.instance.primary_node:
10545 raise errors.OpPrereqError("The specified node is the primary node of"
10546 " the instance", errors.ECODE_INVAL)
10548 if remote_node == secondary_node:
10549 raise errors.OpPrereqError("The specified node is already the"
10550 " secondary node of the instance",
10551 errors.ECODE_INVAL)
10553 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10554 constants.REPLACE_DISK_CHG):
10555 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10556 errors.ECODE_INVAL)
10558 if self.mode == constants.REPLACE_DISK_AUTO:
10559 if not self._CheckDisksActivated(instance):
10560 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10561 " first" % self.instance_name,
10562 errors.ECODE_STATE)
10563 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10564 faulty_secondary = self._FindFaultyDisks(secondary_node)
10566 if faulty_primary and faulty_secondary:
10567 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10568 " one node and can not be repaired"
10569 " automatically" % self.instance_name,
10570 errors.ECODE_STATE)
10573 self.disks = faulty_primary
10574 self.target_node = instance.primary_node
10575 self.other_node = secondary_node
10576 check_nodes = [self.target_node, self.other_node]
10577 elif faulty_secondary:
10578 self.disks = faulty_secondary
10579 self.target_node = secondary_node
10580 self.other_node = instance.primary_node
10581 check_nodes = [self.target_node, self.other_node]
10587 # Non-automatic modes
10588 if self.mode == constants.REPLACE_DISK_PRI:
10589 self.target_node = instance.primary_node
10590 self.other_node = secondary_node
10591 check_nodes = [self.target_node, self.other_node]
10593 elif self.mode == constants.REPLACE_DISK_SEC:
10594 self.target_node = secondary_node
10595 self.other_node = instance.primary_node
10596 check_nodes = [self.target_node, self.other_node]
10598 elif self.mode == constants.REPLACE_DISK_CHG:
10599 self.new_node = remote_node
10600 self.other_node = instance.primary_node
10601 self.target_node = secondary_node
10602 check_nodes = [self.new_node, self.other_node]
10604 _CheckNodeNotDrained(self.lu, remote_node)
10605 _CheckNodeVmCapable(self.lu, remote_node)
10607 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10608 assert old_node_info is not None
10609 if old_node_info.offline and not self.early_release:
10610 # doesn't make sense to delay the release
10611 self.early_release = True
10612 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10613 " early-release mode", secondary_node)
10616 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10619 # If not specified all disks should be replaced
10621 self.disks = range(len(self.instance.disks))
10623 # TODO: This is ugly, but right now we can't distinguish between internal
10624 # submitted opcode and external one. We should fix that.
10625 if self.remote_node_info:
10626 # We change the node, lets verify it still meets instance policy
10627 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10628 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10630 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10631 ignore=self.ignore_ipolicy)
10633 # TODO: compute disk parameters
10634 primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10635 secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10636 if primary_node_info.group != secondary_node_info.group:
10637 self.lu.LogInfo("The instance primary and secondary nodes are in two"
10638 " different node groups; the disk parameters of the"
10639 " primary node's group will be applied.")
10641 self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10643 for node in check_nodes:
10644 _CheckNodeOnline(self.lu, node)
10646 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10649 if node_name is not None)
10651 # Release unneeded node and node resource locks
10652 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10653 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10655 # Release any owned node group
10656 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10657 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10659 # Check whether disks are valid
10660 for disk_idx in self.disks:
10661 instance.FindDisk(disk_idx)
10663 # Get secondary node IP addresses
10664 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10665 in self.cfg.GetMultiNodeInfo(touched_nodes))
10667 def Exec(self, feedback_fn):
10668 """Execute disk replacement.
10670 This dispatches the disk replacement to the appropriate handler.
10673 if self.delay_iallocator:
10674 self._CheckPrereq2()
10677 # Verify owned locks before starting operation
10678 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10679 assert set(owned_nodes) == set(self.node_secondary_ip), \
10680 ("Incorrect node locks, owning %s, expected %s" %
10681 (owned_nodes, self.node_secondary_ip.keys()))
10682 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10683 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10685 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10686 assert list(owned_instances) == [self.instance_name], \
10687 "Instance '%s' not locked" % self.instance_name
10689 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10690 "Should not own any node group lock at this point"
10693 feedback_fn("No disks need replacement")
10696 feedback_fn("Replacing disk(s) %s for %s" %
10697 (utils.CommaJoin(self.disks), self.instance.name))
10699 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10701 # Activate the instance disks if we're replacing them on a down instance
10703 _StartInstanceDisks(self.lu, self.instance, True)
10706 # Should we replace the secondary node?
10707 if self.new_node is not None:
10708 fn = self._ExecDrbd8Secondary
10710 fn = self._ExecDrbd8DiskOnly
10712 result = fn(feedback_fn)
10714 # Deactivate the instance disks if we're replacing them on a
10717 _SafeShutdownInstanceDisks(self.lu, self.instance)
10719 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10722 # Verify owned locks
10723 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10724 nodes = frozenset(self.node_secondary_ip)
10725 assert ((self.early_release and not owned_nodes) or
10726 (not self.early_release and not (set(owned_nodes) - nodes))), \
10727 ("Not owning the correct locks, early_release=%s, owned=%r,"
10728 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10732 def _CheckVolumeGroup(self, nodes):
10733 self.lu.LogInfo("Checking volume groups")
10735 vgname = self.cfg.GetVGName()
10737 # Make sure volume group exists on all involved nodes
10738 results = self.rpc.call_vg_list(nodes)
10740 raise errors.OpExecError("Can't list volume groups on the nodes")
10743 res = results[node]
10744 res.Raise("Error checking node %s" % node)
10745 if vgname not in res.payload:
10746 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10749 def _CheckDisksExistence(self, nodes):
10750 # Check disk existence
10751 for idx, dev in enumerate(self.instance.disks):
10752 if idx not in self.disks:
10756 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10757 self.cfg.SetDiskID(dev, node)
10759 result = self.rpc.call_blockdev_find(node, dev)
10761 msg = result.fail_msg
10762 if msg or not result.payload:
10764 msg = "disk not found"
10765 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10768 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10769 for idx, dev in enumerate(self.instance.disks):
10770 if idx not in self.disks:
10773 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10776 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10778 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10779 " replace disks for instance %s" %
10780 (node_name, self.instance.name))
10782 def _CreateNewStorage(self, node_name):
10783 """Create new storage on the primary or secondary node.
10785 This is only used for same-node replaces, not for changing the
10786 secondary node, hence we don't want to modify the existing disk.
10791 for idx, dev in enumerate(self.instance.disks):
10792 if idx not in self.disks:
10795 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10797 self.cfg.SetDiskID(dev, node_name)
10799 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10800 names = _GenerateUniqueNames(self.lu, lv_names)
10802 _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10804 vg_data = dev.children[0].logical_id[0]
10805 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10806 logical_id=(vg_data, names[0]), params=data_p)
10807 vg_meta = dev.children[1].logical_id[0]
10808 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10809 logical_id=(vg_meta, names[1]), params=meta_p)
10811 new_lvs = [lv_data, lv_meta]
10812 old_lvs = [child.Copy() for child in dev.children]
10813 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10815 # we pass force_create=True to force the LVM creation
10816 for new_lv in new_lvs:
10817 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10818 _GetInstanceInfoText(self.instance), False)
10822 def _CheckDevices(self, node_name, iv_names):
10823 for name, (dev, _, _) in iv_names.iteritems():
10824 self.cfg.SetDiskID(dev, node_name)
10826 result = self.rpc.call_blockdev_find(node_name, dev)
10828 msg = result.fail_msg
10829 if msg or not result.payload:
10831 msg = "disk not found"
10832 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10835 if result.payload.is_degraded:
10836 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10838 def _RemoveOldStorage(self, node_name, iv_names):
10839 for name, (_, old_lvs, _) in iv_names.iteritems():
10840 self.lu.LogInfo("Remove logical volumes for %s" % name)
10843 self.cfg.SetDiskID(lv, node_name)
10845 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10847 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10848 hint="remove unused LVs manually")
10850 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10851 """Replace a disk on the primary or secondary for DRBD 8.
10853 The algorithm for replace is quite complicated:
10855 1. for each disk to be replaced:
10857 1. create new LVs on the target node with unique names
10858 1. detach old LVs from the drbd device
10859 1. rename old LVs to name_replaced.<time_t>
10860 1. rename new LVs to old LVs
10861 1. attach the new LVs (with the old names now) to the drbd device
10863 1. wait for sync across all devices
10865 1. for each modified disk:
10867 1. remove old LVs (which have the name name_replaces.<time_t>)
10869 Failures are not very well handled.
10874 # Step: check device activation
10875 self.lu.LogStep(1, steps_total, "Check device existence")
10876 self._CheckDisksExistence([self.other_node, self.target_node])
10877 self._CheckVolumeGroup([self.target_node, self.other_node])
10879 # Step: check other node consistency
10880 self.lu.LogStep(2, steps_total, "Check peer consistency")
10881 self._CheckDisksConsistency(self.other_node,
10882 self.other_node == self.instance.primary_node,
10885 # Step: create new storage
10886 self.lu.LogStep(3, steps_total, "Allocate new storage")
10887 iv_names = self._CreateNewStorage(self.target_node)
10889 # Step: for each lv, detach+rename*2+attach
10890 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10891 for dev, old_lvs, new_lvs in iv_names.itervalues():
10892 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10894 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10896 result.Raise("Can't detach drbd from local storage on node"
10897 " %s for device %s" % (self.target_node, dev.iv_name))
10899 #cfg.Update(instance)
10901 # ok, we created the new LVs, so now we know we have the needed
10902 # storage; as such, we proceed on the target node to rename
10903 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10904 # using the assumption that logical_id == physical_id (which in
10905 # turn is the unique_id on that node)
10907 # FIXME(iustin): use a better name for the replaced LVs
10908 temp_suffix = int(time.time())
10909 ren_fn = lambda d, suff: (d.physical_id[0],
10910 d.physical_id[1] + "_replaced-%s" % suff)
10912 # Build the rename list based on what LVs exist on the node
10913 rename_old_to_new = []
10914 for to_ren in old_lvs:
10915 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10916 if not result.fail_msg and result.payload:
10918 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10920 self.lu.LogInfo("Renaming the old LVs on the target node")
10921 result = self.rpc.call_blockdev_rename(self.target_node,
10923 result.Raise("Can't rename old LVs on node %s" % self.target_node)
10925 # Now we rename the new LVs to the old LVs
10926 self.lu.LogInfo("Renaming the new LVs on the target node")
10927 rename_new_to_old = [(new, old.physical_id)
10928 for old, new in zip(old_lvs, new_lvs)]
10929 result = self.rpc.call_blockdev_rename(self.target_node,
10931 result.Raise("Can't rename new LVs on node %s" % self.target_node)
10933 # Intermediate steps of in memory modifications
10934 for old, new in zip(old_lvs, new_lvs):
10935 new.logical_id = old.logical_id
10936 self.cfg.SetDiskID(new, self.target_node)
10938 # We need to modify old_lvs so that removal later removes the
10939 # right LVs, not the newly added ones; note that old_lvs is a
10941 for disk in old_lvs:
10942 disk.logical_id = ren_fn(disk, temp_suffix)
10943 self.cfg.SetDiskID(disk, self.target_node)
10945 # Now that the new lvs have the old name, we can add them to the device
10946 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10947 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10949 msg = result.fail_msg
10951 for new_lv in new_lvs:
10952 msg2 = self.rpc.call_blockdev_remove(self.target_node,
10955 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10956 hint=("cleanup manually the unused logical"
10958 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10960 cstep = itertools.count(5)
10962 if self.early_release:
10963 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10964 self._RemoveOldStorage(self.target_node, iv_names)
10965 # TODO: Check if releasing locks early still makes sense
10966 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10968 # Release all resource locks except those used by the instance
10969 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10970 keep=self.node_secondary_ip.keys())
10972 # Release all node locks while waiting for sync
10973 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10975 # TODO: Can the instance lock be downgraded here? Take the optional disk
10976 # shutdown in the caller into consideration.
10979 # This can fail as the old devices are degraded and _WaitForSync
10980 # does a combined result over all disks, so we don't check its return value
10981 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10982 _WaitForSync(self.lu, self.instance)
10984 # Check all devices manually
10985 self._CheckDevices(self.instance.primary_node, iv_names)
10987 # Step: remove old storage
10988 if not self.early_release:
10989 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10990 self._RemoveOldStorage(self.target_node, iv_names)
10992 def _ExecDrbd8Secondary(self, feedback_fn):
10993 """Replace the secondary node for DRBD 8.
10995 The algorithm for replace is quite complicated:
10996 - for all disks of the instance:
10997 - create new LVs on the new node with same names
10998 - shutdown the drbd device on the old secondary
10999 - disconnect the drbd network on the primary
11000 - create the drbd device on the new secondary
11001 - network attach the drbd on the primary, using an artifice:
11002 the drbd code for Attach() will connect to the network if it
11003 finds a device which is connected to the good local disks but
11004 not network enabled
11005 - wait for sync across all devices
11006 - remove all disks from the old secondary
11008 Failures are not very well handled.
11013 pnode = self.instance.primary_node
11015 # Step: check device activation
11016 self.lu.LogStep(1, steps_total, "Check device existence")
11017 self._CheckDisksExistence([self.instance.primary_node])
11018 self._CheckVolumeGroup([self.instance.primary_node])
11020 # Step: check other node consistency
11021 self.lu.LogStep(2, steps_total, "Check peer consistency")
11022 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11024 # Step: create new storage
11025 self.lu.LogStep(3, steps_total, "Allocate new storage")
11026 for idx, dev in enumerate(self.instance.disks):
11027 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11028 (self.new_node, idx))
11029 # we pass force_create=True to force LVM creation
11030 for new_lv in dev.children:
11031 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
11032 _GetInstanceInfoText(self.instance), False)
11034 # Step 4: dbrd minors and drbd setups changes
11035 # after this, we must manually remove the drbd minors on both the
11036 # error and the success paths
11037 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11038 minors = self.cfg.AllocateDRBDMinor([self.new_node
11039 for dev in self.instance.disks],
11040 self.instance.name)
11041 logging.debug("Allocated minors %r", minors)
11044 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11045 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11046 (self.new_node, idx))
11047 # create new devices on new_node; note that we create two IDs:
11048 # one without port, so the drbd will be activated without
11049 # networking information on the new node at this stage, and one
11050 # with network, for the latter activation in step 4
11051 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11052 if self.instance.primary_node == o_node1:
11055 assert self.instance.primary_node == o_node2, "Three-node instance?"
11058 new_alone_id = (self.instance.primary_node, self.new_node, None,
11059 p_minor, new_minor, o_secret)
11060 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11061 p_minor, new_minor, o_secret)
11063 iv_names[idx] = (dev, dev.children, new_net_id)
11064 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11066 drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
11067 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11068 logical_id=new_alone_id,
11069 children=dev.children,
11071 params=drbd_params)
11073 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
11074 _GetInstanceInfoText(self.instance), False)
11075 except errors.GenericError:
11076 self.cfg.ReleaseDRBDMinors(self.instance.name)
11079 # We have new devices, shutdown the drbd on the old secondary
11080 for idx, dev in enumerate(self.instance.disks):
11081 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11082 self.cfg.SetDiskID(dev, self.target_node)
11083 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
11085 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11086 "node: %s" % (idx, msg),
11087 hint=("Please cleanup this device manually as"
11088 " soon as possible"))
11090 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11091 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11092 self.instance.disks)[pnode]
11094 msg = result.fail_msg
11096 # detaches didn't succeed (unlikely)
11097 self.cfg.ReleaseDRBDMinors(self.instance.name)
11098 raise errors.OpExecError("Can't detach the disks from the network on"
11099 " old node: %s" % (msg,))
11101 # if we managed to detach at least one, we update all the disks of
11102 # the instance to point to the new secondary
11103 self.lu.LogInfo("Updating instance configuration")
11104 for dev, _, new_logical_id in iv_names.itervalues():
11105 dev.logical_id = new_logical_id
11106 self.cfg.SetDiskID(dev, self.instance.primary_node)
11108 self.cfg.Update(self.instance, feedback_fn)
11110 # Release all node locks (the configuration has been updated)
11111 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11113 # and now perform the drbd attach
11114 self.lu.LogInfo("Attaching primary drbds to new secondary"
11115 " (standalone => connected)")
11116 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11118 self.node_secondary_ip,
11119 self.instance.disks,
11120 self.instance.name,
11122 for to_node, to_result in result.items():
11123 msg = to_result.fail_msg
11125 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11127 hint=("please do a gnt-instance info to see the"
11128 " status of disks"))
11130 cstep = itertools.count(5)
11132 if self.early_release:
11133 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11134 self._RemoveOldStorage(self.target_node, iv_names)
11135 # TODO: Check if releasing locks early still makes sense
11136 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11138 # Release all resource locks except those used by the instance
11139 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11140 keep=self.node_secondary_ip.keys())
11142 # TODO: Can the instance lock be downgraded here? Take the optional disk
11143 # shutdown in the caller into consideration.
11146 # This can fail as the old devices are degraded and _WaitForSync
11147 # does a combined result over all disks, so we don't check its return value
11148 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11149 _WaitForSync(self.lu, self.instance)
11151 # Check all devices manually
11152 self._CheckDevices(self.instance.primary_node, iv_names)
11154 # Step: remove old storage
11155 if not self.early_release:
11156 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11157 self._RemoveOldStorage(self.target_node, iv_names)
11160 class LURepairNodeStorage(NoHooksLU):
11161 """Repairs the volume group on a node.
11166 def CheckArguments(self):
11167 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11169 storage_type = self.op.storage_type
11171 if (constants.SO_FIX_CONSISTENCY not in
11172 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11173 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11174 " repaired" % storage_type,
11175 errors.ECODE_INVAL)
11177 def ExpandNames(self):
11178 self.needed_locks = {
11179 locking.LEVEL_NODE: [self.op.node_name],
11182 def _CheckFaultyDisks(self, instance, node_name):
11183 """Ensure faulty disks abort the opcode or at least warn."""
11185 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11187 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11188 " node '%s'" % (instance.name, node_name),
11189 errors.ECODE_STATE)
11190 except errors.OpPrereqError, err:
11191 if self.op.ignore_consistency:
11192 self.proc.LogWarning(str(err.args[0]))
11196 def CheckPrereq(self):
11197 """Check prerequisites.
11200 # Check whether any instance on this node has faulty disks
11201 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11202 if inst.admin_state != constants.ADMINST_UP:
11204 check_nodes = set(inst.all_nodes)
11205 check_nodes.discard(self.op.node_name)
11206 for inst_node_name in check_nodes:
11207 self._CheckFaultyDisks(inst, inst_node_name)
11209 def Exec(self, feedback_fn):
11210 feedback_fn("Repairing storage unit '%s' on %s ..." %
11211 (self.op.name, self.op.node_name))
11213 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11214 result = self.rpc.call_storage_execute(self.op.node_name,
11215 self.op.storage_type, st_args,
11217 constants.SO_FIX_CONSISTENCY)
11218 result.Raise("Failed to repair storage unit '%s' on %s" %
11219 (self.op.name, self.op.node_name))
11222 class LUNodeEvacuate(NoHooksLU):
11223 """Evacuates instances off a list of nodes.
11228 _MODE2IALLOCATOR = {
11229 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11230 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11231 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11233 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11234 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11235 constants.IALLOCATOR_NEVAC_MODES)
11237 def CheckArguments(self):
11238 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11240 def ExpandNames(self):
11241 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11243 if self.op.remote_node is not None:
11244 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11245 assert self.op.remote_node
11247 if self.op.remote_node == self.op.node_name:
11248 raise errors.OpPrereqError("Can not use evacuated node as a new"
11249 " secondary node", errors.ECODE_INVAL)
11251 if self.op.mode != constants.NODE_EVAC_SEC:
11252 raise errors.OpPrereqError("Without the use of an iallocator only"
11253 " secondary instances can be evacuated",
11254 errors.ECODE_INVAL)
11257 self.share_locks = _ShareAll()
11258 self.needed_locks = {
11259 locking.LEVEL_INSTANCE: [],
11260 locking.LEVEL_NODEGROUP: [],
11261 locking.LEVEL_NODE: [],
11264 # Determine nodes (via group) optimistically, needs verification once locks
11265 # have been acquired
11266 self.lock_nodes = self._DetermineNodes()
11268 def _DetermineNodes(self):
11269 """Gets the list of nodes to operate on.
11272 if self.op.remote_node is None:
11273 # Iallocator will choose any node(s) in the same group
11274 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11276 group_nodes = frozenset([self.op.remote_node])
11278 # Determine nodes to be locked
11279 return set([self.op.node_name]) | group_nodes
11281 def _DetermineInstances(self):
11282 """Builds list of instances to operate on.
11285 assert self.op.mode in constants.NODE_EVAC_MODES
11287 if self.op.mode == constants.NODE_EVAC_PRI:
11288 # Primary instances only
11289 inst_fn = _GetNodePrimaryInstances
11290 assert self.op.remote_node is None, \
11291 "Evacuating primary instances requires iallocator"
11292 elif self.op.mode == constants.NODE_EVAC_SEC:
11293 # Secondary instances only
11294 inst_fn = _GetNodeSecondaryInstances
11297 assert self.op.mode == constants.NODE_EVAC_ALL
11298 inst_fn = _GetNodeInstances
11299 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11301 raise errors.OpPrereqError("Due to an issue with the iallocator"
11302 " interface it is not possible to evacuate"
11303 " all instances at once; specify explicitly"
11304 " whether to evacuate primary or secondary"
11306 errors.ECODE_INVAL)
11308 return inst_fn(self.cfg, self.op.node_name)
11310 def DeclareLocks(self, level):
11311 if level == locking.LEVEL_INSTANCE:
11312 # Lock instances optimistically, needs verification once node and group
11313 # locks have been acquired
11314 self.needed_locks[locking.LEVEL_INSTANCE] = \
11315 set(i.name for i in self._DetermineInstances())
11317 elif level == locking.LEVEL_NODEGROUP:
11318 # Lock node groups for all potential target nodes optimistically, needs
11319 # verification once nodes have been acquired
11320 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11321 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11323 elif level == locking.LEVEL_NODE:
11324 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11326 def CheckPrereq(self):
11328 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11329 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11330 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11332 need_nodes = self._DetermineNodes()
11334 if not owned_nodes.issuperset(need_nodes):
11335 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11336 " locks were acquired, current nodes are"
11337 " are '%s', used to be '%s'; retry the"
11339 (self.op.node_name,
11340 utils.CommaJoin(need_nodes),
11341 utils.CommaJoin(owned_nodes)),
11342 errors.ECODE_STATE)
11344 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11345 if owned_groups != wanted_groups:
11346 raise errors.OpExecError("Node groups changed since locks were acquired,"
11347 " current groups are '%s', used to be '%s';"
11348 " retry the operation" %
11349 (utils.CommaJoin(wanted_groups),
11350 utils.CommaJoin(owned_groups)))
11352 # Determine affected instances
11353 self.instances = self._DetermineInstances()
11354 self.instance_names = [i.name for i in self.instances]
11356 if set(self.instance_names) != owned_instances:
11357 raise errors.OpExecError("Instances on node '%s' changed since locks"
11358 " were acquired, current instances are '%s',"
11359 " used to be '%s'; retry the operation" %
11360 (self.op.node_name,
11361 utils.CommaJoin(self.instance_names),
11362 utils.CommaJoin(owned_instances)))
11364 if self.instance_names:
11365 self.LogInfo("Evacuating instances from node '%s': %s",
11367 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11369 self.LogInfo("No instances to evacuate from node '%s'",
11372 if self.op.remote_node is not None:
11373 for i in self.instances:
11374 if i.primary_node == self.op.remote_node:
11375 raise errors.OpPrereqError("Node %s is the primary node of"
11376 " instance %s, cannot use it as"
11378 (self.op.remote_node, i.name),
11379 errors.ECODE_INVAL)
11381 def Exec(self, feedback_fn):
11382 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11384 if not self.instance_names:
11385 # No instances to evacuate
11388 elif self.op.iallocator is not None:
11389 # TODO: Implement relocation to other group
11390 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11391 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11392 instances=list(self.instance_names))
11394 ial.Run(self.op.iallocator)
11396 if not ial.success:
11397 raise errors.OpPrereqError("Can't compute node evacuation using"
11398 " iallocator '%s': %s" %
11399 (self.op.iallocator, ial.info),
11400 errors.ECODE_NORES)
11402 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11404 elif self.op.remote_node is not None:
11405 assert self.op.mode == constants.NODE_EVAC_SEC
11407 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11408 remote_node=self.op.remote_node,
11410 mode=constants.REPLACE_DISK_CHG,
11411 early_release=self.op.early_release)]
11412 for instance_name in self.instance_names
11416 raise errors.ProgrammerError("No iallocator or remote node")
11418 return ResultWithJobs(jobs)
11421 def _SetOpEarlyRelease(early_release, op):
11422 """Sets C{early_release} flag on opcodes if available.
11426 op.early_release = early_release
11427 except AttributeError:
11428 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11433 def _NodeEvacDest(use_nodes, group, nodes):
11434 """Returns group or nodes depending on caller's choice.
11438 return utils.CommaJoin(nodes)
11443 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11444 """Unpacks the result of change-group and node-evacuate iallocator requests.
11446 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11447 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11449 @type lu: L{LogicalUnit}
11450 @param lu: Logical unit instance
11451 @type alloc_result: tuple/list
11452 @param alloc_result: Result from iallocator
11453 @type early_release: bool
11454 @param early_release: Whether to release locks early if possible
11455 @type use_nodes: bool
11456 @param use_nodes: Whether to display node names instead of groups
11459 (moved, failed, jobs) = alloc_result
11462 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11463 for (name, reason) in failed)
11464 lu.LogWarning("Unable to evacuate instances %s", failreason)
11465 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11468 lu.LogInfo("Instances to be moved: %s",
11469 utils.CommaJoin("%s (to %s)" %
11470 (name, _NodeEvacDest(use_nodes, group, nodes))
11471 for (name, group, nodes) in moved))
11473 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11474 map(opcodes.OpCode.LoadOpCode, ops))
11478 class LUInstanceGrowDisk(LogicalUnit):
11479 """Grow a disk of an instance.
11482 HPATH = "disk-grow"
11483 HTYPE = constants.HTYPE_INSTANCE
11486 def ExpandNames(self):
11487 self._ExpandAndLockInstance()
11488 self.needed_locks[locking.LEVEL_NODE] = []
11489 self.needed_locks[locking.LEVEL_NODE_RES] = []
11490 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11491 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11493 def DeclareLocks(self, level):
11494 if level == locking.LEVEL_NODE:
11495 self._LockInstancesNodes()
11496 elif level == locking.LEVEL_NODE_RES:
11498 self.needed_locks[locking.LEVEL_NODE_RES] = \
11499 self.needed_locks[locking.LEVEL_NODE][:]
11501 def BuildHooksEnv(self):
11502 """Build hooks env.
11504 This runs on the master, the primary and all the secondaries.
11508 "DISK": self.op.disk,
11509 "AMOUNT": self.op.amount,
11511 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11514 def BuildHooksNodes(self):
11515 """Build hooks nodes.
11518 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11521 def CheckPrereq(self):
11522 """Check prerequisites.
11524 This checks that the instance is in the cluster.
11527 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11528 assert instance is not None, \
11529 "Cannot retrieve locked instance %s" % self.op.instance_name
11530 nodenames = list(instance.all_nodes)
11531 for node in nodenames:
11532 _CheckNodeOnline(self, node)
11534 self.instance = instance
11536 if instance.disk_template not in constants.DTS_GROWABLE:
11537 raise errors.OpPrereqError("Instance's disk layout does not support"
11538 " growing", errors.ECODE_INVAL)
11540 self.disk = instance.FindDisk(self.op.disk)
11542 if instance.disk_template not in (constants.DT_FILE,
11543 constants.DT_SHARED_FILE,
11545 # TODO: check the free disk space for file, when that feature will be
11547 _CheckNodesFreeDiskPerVG(self, nodenames,
11548 self.disk.ComputeGrowth(self.op.amount))
11550 def Exec(self, feedback_fn):
11551 """Execute disk grow.
11554 instance = self.instance
11557 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11558 assert (self.owned_locks(locking.LEVEL_NODE) ==
11559 self.owned_locks(locking.LEVEL_NODE_RES))
11561 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11563 raise errors.OpExecError("Cannot activate block device to grow")
11565 feedback_fn("Growing disk %s of instance '%s' by %s" %
11566 (self.op.disk, instance.name,
11567 utils.FormatUnit(self.op.amount, "h")))
11569 # First run all grow ops in dry-run mode
11570 for node in instance.all_nodes:
11571 self.cfg.SetDiskID(disk, node)
11572 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11573 result.Raise("Grow request failed to node %s" % node)
11575 # We know that (as far as we can test) operations across different
11576 # nodes will succeed, time to run it for real
11577 for node in instance.all_nodes:
11578 self.cfg.SetDiskID(disk, node)
11579 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11580 result.Raise("Grow request failed to node %s" % node)
11582 # TODO: Rewrite code to work properly
11583 # DRBD goes into sync mode for a short amount of time after executing the
11584 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11585 # calling "resize" in sync mode fails. Sleeping for a short amount of
11586 # time is a work-around.
11589 disk.RecordGrow(self.op.amount)
11590 self.cfg.Update(instance, feedback_fn)
11592 # Changes have been recorded, release node lock
11593 _ReleaseLocks(self, locking.LEVEL_NODE)
11595 # Downgrade lock while waiting for sync
11596 self.glm.downgrade(locking.LEVEL_INSTANCE)
11598 if self.op.wait_for_sync:
11599 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11601 self.proc.LogWarning("Disk sync-ing has not returned a good"
11602 " status; please check the instance")
11603 if instance.admin_state != constants.ADMINST_UP:
11604 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11605 elif instance.admin_state != constants.ADMINST_UP:
11606 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11607 " not supposed to be running because no wait for"
11608 " sync mode was requested")
11610 assert self.owned_locks(locking.LEVEL_NODE_RES)
11611 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11614 class LUInstanceQueryData(NoHooksLU):
11615 """Query runtime instance data.
11620 def ExpandNames(self):
11621 self.needed_locks = {}
11623 # Use locking if requested or when non-static information is wanted
11624 if not (self.op.static or self.op.use_locking):
11625 self.LogWarning("Non-static data requested, locks need to be acquired")
11626 self.op.use_locking = True
11628 if self.op.instances or not self.op.use_locking:
11629 # Expand instance names right here
11630 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11632 # Will use acquired locks
11633 self.wanted_names = None
11635 if self.op.use_locking:
11636 self.share_locks = _ShareAll()
11638 if self.wanted_names is None:
11639 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11641 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11643 self.needed_locks[locking.LEVEL_NODE] = []
11644 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11646 def DeclareLocks(self, level):
11647 if self.op.use_locking and level == locking.LEVEL_NODE:
11648 self._LockInstancesNodes()
11650 def CheckPrereq(self):
11651 """Check prerequisites.
11653 This only checks the optional instance list against the existing names.
11656 if self.wanted_names is None:
11657 assert self.op.use_locking, "Locking was not used"
11658 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11660 self.wanted_instances = \
11661 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11663 def _ComputeBlockdevStatus(self, node, instance_name, dev):
11664 """Returns the status of a block device
11667 if self.op.static or not node:
11670 self.cfg.SetDiskID(dev, node)
11672 result = self.rpc.call_blockdev_find(node, dev)
11676 result.Raise("Can't compute disk status for %s" % instance_name)
11678 status = result.payload
11682 return (status.dev_path, status.major, status.minor,
11683 status.sync_percent, status.estimated_time,
11684 status.is_degraded, status.ldisk_status)
11686 def _ComputeDiskStatus(self, instance, snode, dev):
11687 """Compute block device status.
11690 if dev.dev_type in constants.LDS_DRBD:
11691 # we change the snode then (otherwise we use the one passed in)
11692 if dev.logical_id[0] == instance.primary_node:
11693 snode = dev.logical_id[1]
11695 snode = dev.logical_id[0]
11697 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11698 instance.name, dev)
11699 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11702 dev_children = map(compat.partial(self._ComputeDiskStatus,
11709 "iv_name": dev.iv_name,
11710 "dev_type": dev.dev_type,
11711 "logical_id": dev.logical_id,
11712 "physical_id": dev.physical_id,
11713 "pstatus": dev_pstatus,
11714 "sstatus": dev_sstatus,
11715 "children": dev_children,
11720 def Exec(self, feedback_fn):
11721 """Gather and return data"""
11724 cluster = self.cfg.GetClusterInfo()
11726 pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11727 for i in self.wanted_instances)
11728 for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11729 if self.op.static or pnode.offline:
11730 remote_state = None
11732 self.LogWarning("Primary node %s is marked offline, returning static"
11733 " information only for instance %s" %
11734 (pnode.name, instance.name))
11736 remote_info = self.rpc.call_instance_info(instance.primary_node,
11738 instance.hypervisor)
11739 remote_info.Raise("Error checking node %s" % instance.primary_node)
11740 remote_info = remote_info.payload
11741 if remote_info and "state" in remote_info:
11742 remote_state = "up"
11744 if instance.admin_state == constants.ADMINST_UP:
11745 remote_state = "down"
11747 remote_state = instance.admin_state
11749 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11752 result[instance.name] = {
11753 "name": instance.name,
11754 "config_state": instance.admin_state,
11755 "run_state": remote_state,
11756 "pnode": instance.primary_node,
11757 "snodes": instance.secondary_nodes,
11759 # this happens to be the same format used for hooks
11760 "nics": _NICListToTuple(self, instance.nics),
11761 "disk_template": instance.disk_template,
11763 "hypervisor": instance.hypervisor,
11764 "network_port": instance.network_port,
11765 "hv_instance": instance.hvparams,
11766 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11767 "be_instance": instance.beparams,
11768 "be_actual": cluster.FillBE(instance),
11769 "os_instance": instance.osparams,
11770 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11771 "serial_no": instance.serial_no,
11772 "mtime": instance.mtime,
11773 "ctime": instance.ctime,
11774 "uuid": instance.uuid,
11780 def PrepareContainerMods(mods, private_fn):
11781 """Prepares a list of container modifications by adding a private data field.
11783 @type mods: list of tuples; (operation, index, parameters)
11784 @param mods: List of modifications
11785 @type private_fn: callable or None
11786 @param private_fn: Callable for constructing a private data field for a
11791 if private_fn is None:
11796 return [(op, idx, params, fn()) for (op, idx, params) in mods]
11799 #: Type description for changes as returned by L{ApplyContainerMods}'s
11801 _TApplyContModsCbChanges = \
11802 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
11803 ht.TNonEmptyString,
11808 def ApplyContainerMods(kind, container, chgdesc, mods,
11809 create_fn, modify_fn, remove_fn):
11810 """Applies descriptions in C{mods} to C{container}.
11813 @param kind: One-word item description
11814 @type container: list
11815 @param container: Container to modify
11816 @type chgdesc: None or list
11817 @param chgdesc: List of applied changes
11819 @param mods: Modifications as returned by L{PrepareContainerMods}
11820 @type create_fn: callable
11821 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
11822 receives absolute item index, parameters and private data object as added
11823 by L{PrepareContainerMods}, returns tuple containing new item and changes
11825 @type modify_fn: callable
11826 @param modify_fn: Callback for modifying an existing item
11827 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
11828 and private data object as added by L{PrepareContainerMods}, returns
11830 @type remove_fn: callable
11831 @param remove_fn: Callback on removing item; receives absolute item index,
11832 item and private data object as added by L{PrepareContainerMods}
11835 for (op, idx, params, private) in mods:
11838 absidx = len(container) - 1
11840 raise IndexError("Not accepting negative indices other than -1")
11841 elif idx > len(container):
11842 raise IndexError("Got %s index %s, but there are only %s" %
11843 (kind, idx, len(container)))
11849 if op == constants.DDM_ADD:
11850 # Calculate where item will be added
11852 addidx = len(container)
11856 if create_fn is None:
11859 (item, changes) = create_fn(addidx, params, private)
11862 container.append(item)
11865 assert idx <= len(container)
11866 # list.insert does so before the specified index
11867 container.insert(idx, item)
11869 # Retrieve existing item
11871 item = container[absidx]
11873 raise IndexError("Invalid %s index %s" % (kind, idx))
11875 if op == constants.DDM_REMOVE:
11878 if remove_fn is not None:
11879 remove_fn(absidx, item, private)
11881 changes = [("%s/%s" % (kind, absidx), "remove")]
11883 assert container[absidx] == item
11884 del container[absidx]
11885 elif op == constants.DDM_MODIFY:
11886 if modify_fn is not None:
11887 changes = modify_fn(absidx, item, params, private)
11889 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
11891 assert _TApplyContModsCbChanges(changes)
11893 if not (chgdesc is None or changes is None):
11894 chgdesc.extend(changes)
11897 def _UpdateIvNames(base_index, disks):
11898 """Updates the C{iv_name} attribute of disks.
11900 @type disks: list of L{objects.Disk}
11903 for (idx, disk) in enumerate(disks):
11904 disk.iv_name = "disk/%s" % (base_index + idx, )
11907 class _InstNicModPrivate:
11908 """Data structure for network interface modifications.
11910 Used by L{LUInstanceSetParams}.
11913 def __init__(self):
11918 class LUInstanceSetParams(LogicalUnit):
11919 """Modifies an instances's parameters.
11922 HPATH = "instance-modify"
11923 HTYPE = constants.HTYPE_INSTANCE
11927 def _UpgradeDiskNicMods(kind, mods, verify_fn):
11928 assert ht.TList(mods)
11929 assert not mods or len(mods[0]) in (2, 3)
11931 if mods and len(mods[0]) == 2:
11935 for op, params in mods:
11936 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
11937 result.append((op, -1, params))
11941 raise errors.OpPrereqError("Only one %s add or remove operation is"
11942 " supported at a time" % kind,
11943 errors.ECODE_INVAL)
11945 result.append((constants.DDM_MODIFY, op, params))
11947 assert verify_fn(result)
11954 def _CheckMods(kind, mods, key_types, item_fn):
11955 """Ensures requested disk/NIC modifications are valid.
11958 for (op, _, params) in mods:
11959 assert ht.TDict(params)
11961 utils.ForceDictType(params, key_types)
11963 if op == constants.DDM_REMOVE:
11965 raise errors.OpPrereqError("No settings should be passed when"
11966 " removing a %s" % kind,
11967 errors.ECODE_INVAL)
11968 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
11969 item_fn(op, params)
11971 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
11974 def _VerifyDiskModification(op, params):
11975 """Verifies a disk modification.
11978 if op == constants.DDM_ADD:
11979 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11980 if mode not in constants.DISK_ACCESS_SET:
11981 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11982 errors.ECODE_INVAL)
11984 size = params.get(constants.IDISK_SIZE, None)
11986 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
11987 constants.IDISK_SIZE, errors.ECODE_INVAL)
11991 except (TypeError, ValueError), err:
11992 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
11993 errors.ECODE_INVAL)
11995 params[constants.IDISK_SIZE] = size
11997 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
11998 raise errors.OpPrereqError("Disk size change not possible, use"
11999 " grow-disk", errors.ECODE_INVAL)
12002 def _VerifyNicModification(op, params):
12003 """Verifies a network interface modification.
12006 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12007 ip = params.get(constants.INIC_IP, None)
12010 elif ip.lower() == constants.VALUE_NONE:
12011 params[constants.INIC_IP] = None
12012 elif not netutils.IPAddress.IsValid(ip):
12013 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12014 errors.ECODE_INVAL)
12016 bridge = params.get("bridge", None)
12017 link = params.get(constants.INIC_LINK, None)
12018 if bridge and link:
12019 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12020 " at the same time", errors.ECODE_INVAL)
12021 elif bridge and bridge.lower() == constants.VALUE_NONE:
12022 params["bridge"] = None
12023 elif link and link.lower() == constants.VALUE_NONE:
12024 params[constants.INIC_LINK] = None
12026 if op == constants.DDM_ADD:
12027 macaddr = params.get(constants.INIC_MAC, None)
12028 if macaddr is None:
12029 params[constants.INIC_MAC] = constants.VALUE_AUTO
12031 if constants.INIC_MAC in params:
12032 macaddr = params[constants.INIC_MAC]
12033 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12034 macaddr = utils.NormalizeAndValidateMac(macaddr)
12036 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12037 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12038 " modifying an existing NIC",
12039 errors.ECODE_INVAL)
12041 def CheckArguments(self):
12042 if not (self.op.nics or self.op.disks or self.op.disk_template or
12043 self.op.hvparams or self.op.beparams or self.op.os_name or
12044 self.op.offline is not None or self.op.runtime_mem):
12045 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12047 if self.op.hvparams:
12048 _CheckGlobalHvParams(self.op.hvparams)
12051 self._UpgradeDiskNicMods("disk", self.op.disks,
12052 opcodes.OpInstanceSetParams.TestDiskModifications)
12054 self._UpgradeDiskNicMods("NIC", self.op.nics,
12055 opcodes.OpInstanceSetParams.TestNicModifications)
12057 # Check disk modifications
12058 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12059 self._VerifyDiskModification)
12061 if self.op.disks and self.op.disk_template is not None:
12062 raise errors.OpPrereqError("Disk template conversion and other disk"
12063 " changes not supported at the same time",
12064 errors.ECODE_INVAL)
12066 if (self.op.disk_template and
12067 self.op.disk_template in constants.DTS_INT_MIRROR and
12068 self.op.remote_node is None):
12069 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12070 " one requires specifying a secondary node",
12071 errors.ECODE_INVAL)
12073 # Check NIC modifications
12074 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12075 self._VerifyNicModification)
12077 def ExpandNames(self):
12078 self._ExpandAndLockInstance()
12079 # Can't even acquire node locks in shared mode as upcoming changes in
12080 # Ganeti 2.6 will start to modify the node object on disk conversion
12081 self.needed_locks[locking.LEVEL_NODE] = []
12082 self.needed_locks[locking.LEVEL_NODE_RES] = []
12083 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12085 def DeclareLocks(self, level):
12086 # TODO: Acquire group lock in shared mode (disk parameters)
12087 if level == locking.LEVEL_NODE:
12088 self._LockInstancesNodes()
12089 if self.op.disk_template and self.op.remote_node:
12090 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12091 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12092 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12094 self.needed_locks[locking.LEVEL_NODE_RES] = \
12095 self.needed_locks[locking.LEVEL_NODE][:]
12097 def BuildHooksEnv(self):
12098 """Build hooks env.
12100 This runs on the master, primary and secondaries.
12104 if constants.BE_MINMEM in self.be_new:
12105 args["minmem"] = self.be_new[constants.BE_MINMEM]
12106 if constants.BE_MAXMEM in self.be_new:
12107 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12108 if constants.BE_VCPUS in self.be_new:
12109 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12110 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12111 # information at all.
12113 if self._new_nics is not None:
12116 for nic in self._new_nics:
12117 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12118 mode = nicparams[constants.NIC_MODE]
12119 link = nicparams[constants.NIC_LINK]
12120 nics.append((nic.ip, nic.mac, mode, link))
12122 args["nics"] = nics
12124 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12125 if self.op.disk_template:
12126 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12127 if self.op.runtime_mem:
12128 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12132 def BuildHooksNodes(self):
12133 """Build hooks nodes.
12136 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12139 def _PrepareNicModification(self, params, private, old_ip, old_params,
12141 update_params_dict = dict([(key, params[key])
12142 for key in constants.NICS_PARAMETERS
12145 if "bridge" in params:
12146 update_params_dict[constants.NIC_LINK] = params["bridge"]
12148 new_params = _GetUpdatedParams(old_params, update_params_dict)
12149 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12151 new_filled_params = cluster.SimpleFillNIC(new_params)
12152 objects.NIC.CheckParameterSyntax(new_filled_params)
12154 new_mode = new_filled_params[constants.NIC_MODE]
12155 if new_mode == constants.NIC_MODE_BRIDGED:
12156 bridge = new_filled_params[constants.NIC_LINK]
12157 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12159 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12161 self.warn.append(msg)
12163 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12165 elif new_mode == constants.NIC_MODE_ROUTED:
12166 ip = params.get(constants.INIC_IP, old_ip)
12168 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12169 " on a routed NIC", errors.ECODE_INVAL)
12171 if constants.INIC_MAC in params:
12172 mac = params[constants.INIC_MAC]
12174 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12175 errors.ECODE_INVAL)
12176 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12177 # otherwise generate the MAC address
12178 params[constants.INIC_MAC] = \
12179 self.cfg.GenerateMAC(self.proc.GetECId())
12181 # or validate/reserve the current one
12183 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12184 except errors.ReservationError:
12185 raise errors.OpPrereqError("MAC address '%s' already in use"
12186 " in cluster" % mac,
12187 errors.ECODE_NOTUNIQUE)
12189 private.params = new_params
12190 private.filled = new_filled_params
12192 return (None, None)
12194 def CheckPrereq(self):
12195 """Check prerequisites.
12197 This only checks the instance list against the existing names.
12200 # checking the new params on the primary/secondary nodes
12202 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12203 cluster = self.cluster = self.cfg.GetClusterInfo()
12204 assert self.instance is not None, \
12205 "Cannot retrieve locked instance %s" % self.op.instance_name
12206 pnode = instance.primary_node
12207 nodelist = list(instance.all_nodes)
12208 pnode_info = self.cfg.GetNodeInfo(pnode)
12209 self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
12211 # Prepare disk/NIC modifications
12212 self.diskmod = PrepareContainerMods(self.op.disks, None)
12213 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12216 if self.op.os_name and not self.op.force:
12217 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12218 self.op.force_variant)
12219 instance_os = self.op.os_name
12221 instance_os = instance.os
12223 assert not (self.op.disk_template and self.op.disks), \
12224 "Can't modify disk template and apply disk changes at the same time"
12226 if self.op.disk_template:
12227 if instance.disk_template == self.op.disk_template:
12228 raise errors.OpPrereqError("Instance already has disk template %s" %
12229 instance.disk_template, errors.ECODE_INVAL)
12231 if (instance.disk_template,
12232 self.op.disk_template) not in self._DISK_CONVERSIONS:
12233 raise errors.OpPrereqError("Unsupported disk template conversion from"
12234 " %s to %s" % (instance.disk_template,
12235 self.op.disk_template),
12236 errors.ECODE_INVAL)
12237 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12238 msg="cannot change disk template")
12239 if self.op.disk_template in constants.DTS_INT_MIRROR:
12240 if self.op.remote_node == pnode:
12241 raise errors.OpPrereqError("Given new secondary node %s is the same"
12242 " as the primary node of the instance" %
12243 self.op.remote_node, errors.ECODE_STATE)
12244 _CheckNodeOnline(self, self.op.remote_node)
12245 _CheckNodeNotDrained(self, self.op.remote_node)
12246 # FIXME: here we assume that the old instance type is DT_PLAIN
12247 assert instance.disk_template == constants.DT_PLAIN
12248 disks = [{constants.IDISK_SIZE: d.size,
12249 constants.IDISK_VG: d.logical_id[0]}
12250 for d in instance.disks]
12251 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12252 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12254 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12255 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12256 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12257 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12258 ignore=self.op.ignore_ipolicy)
12259 if pnode_info.group != snode_info.group:
12260 self.LogWarning("The primary and secondary nodes are in two"
12261 " different node groups; the disk parameters"
12262 " from the first disk's node group will be"
12265 # hvparams processing
12266 if self.op.hvparams:
12267 hv_type = instance.hypervisor
12268 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12269 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12270 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12273 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12274 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12275 self.hv_proposed = self.hv_new = hv_new # the new actual values
12276 self.hv_inst = i_hvdict # the new dict (without defaults)
12278 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12280 self.hv_new = self.hv_inst = {}
12282 # beparams processing
12283 if self.op.beparams:
12284 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12286 objects.UpgradeBeParams(i_bedict)
12287 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12288 be_new = cluster.SimpleFillBE(i_bedict)
12289 self.be_proposed = self.be_new = be_new # the new actual values
12290 self.be_inst = i_bedict # the new dict (without defaults)
12292 self.be_new = self.be_inst = {}
12293 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12294 be_old = cluster.FillBE(instance)
12296 # CPU param validation -- checking every time a paramtere is
12297 # changed to cover all cases where either CPU mask or vcpus have
12299 if (constants.BE_VCPUS in self.be_proposed and
12300 constants.HV_CPU_MASK in self.hv_proposed):
12302 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12303 # Verify mask is consistent with number of vCPUs. Can skip this
12304 # test if only 1 entry in the CPU mask, which means same mask
12305 # is applied to all vCPUs.
12306 if (len(cpu_list) > 1 and
12307 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12308 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12310 (self.be_proposed[constants.BE_VCPUS],
12311 self.hv_proposed[constants.HV_CPU_MASK]),
12312 errors.ECODE_INVAL)
12314 # Only perform this test if a new CPU mask is given
12315 if constants.HV_CPU_MASK in self.hv_new:
12316 # Calculate the largest CPU number requested
12317 max_requested_cpu = max(map(max, cpu_list))
12318 # Check that all of the instance's nodes have enough physical CPUs to
12319 # satisfy the requested CPU mask
12320 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12321 max_requested_cpu + 1, instance.hypervisor)
12323 # osparams processing
12324 if self.op.osparams:
12325 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12326 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12327 self.os_inst = i_osdict # the new dict (without defaults)
12333 #TODO(dynmem): do the appropriate check involving MINMEM
12334 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12335 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12336 mem_check_list = [pnode]
12337 if be_new[constants.BE_AUTO_BALANCE]:
12338 # either we changed auto_balance to yes or it was from before
12339 mem_check_list.extend(instance.secondary_nodes)
12340 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12341 instance.hypervisor)
12342 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12343 [instance.hypervisor])
12344 pninfo = nodeinfo[pnode]
12345 msg = pninfo.fail_msg
12347 # Assume the primary node is unreachable and go ahead
12348 self.warn.append("Can't get info from primary node %s: %s" %
12351 (_, _, (pnhvinfo, )) = pninfo.payload
12352 if not isinstance(pnhvinfo.get("memory_free", None), int):
12353 self.warn.append("Node data from primary node %s doesn't contain"
12354 " free memory information" % pnode)
12355 elif instance_info.fail_msg:
12356 self.warn.append("Can't get instance runtime information: %s" %
12357 instance_info.fail_msg)
12359 if instance_info.payload:
12360 current_mem = int(instance_info.payload["memory"])
12362 # Assume instance not running
12363 # (there is a slight race condition here, but it's not very
12364 # probable, and we have no other way to check)
12365 # TODO: Describe race condition
12367 #TODO(dynmem): do the appropriate check involving MINMEM
12368 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12369 pnhvinfo["memory_free"])
12371 raise errors.OpPrereqError("This change will prevent the instance"
12372 " from starting, due to %d MB of memory"
12373 " missing on its primary node" %
12375 errors.ECODE_NORES)
12377 if be_new[constants.BE_AUTO_BALANCE]:
12378 for node, nres in nodeinfo.items():
12379 if node not in instance.secondary_nodes:
12381 nres.Raise("Can't get info from secondary node %s" % node,
12382 prereq=True, ecode=errors.ECODE_STATE)
12383 (_, _, (nhvinfo, )) = nres.payload
12384 if not isinstance(nhvinfo.get("memory_free", None), int):
12385 raise errors.OpPrereqError("Secondary node %s didn't return free"
12386 " memory information" % node,
12387 errors.ECODE_STATE)
12388 #TODO(dynmem): do the appropriate check involving MINMEM
12389 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12390 raise errors.OpPrereqError("This change will prevent the instance"
12391 " from failover to its secondary node"
12392 " %s, due to not enough memory" % node,
12393 errors.ECODE_STATE)
12395 if self.op.runtime_mem:
12396 remote_info = self.rpc.call_instance_info(instance.primary_node,
12398 instance.hypervisor)
12399 remote_info.Raise("Error checking node %s" % instance.primary_node)
12400 if not remote_info.payload: # not running already
12401 raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12402 errors.ECODE_STATE)
12404 current_memory = remote_info.payload["memory"]
12405 if (not self.op.force and
12406 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12407 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12408 raise errors.OpPrereqError("Instance %s must have memory between %d"
12409 " and %d MB of memory unless --force is"
12410 " given" % (instance.name,
12411 self.be_proposed[constants.BE_MINMEM],
12412 self.be_proposed[constants.BE_MAXMEM]),
12413 errors.ECODE_INVAL)
12415 if self.op.runtime_mem > current_memory:
12416 _CheckNodeFreeMemory(self, instance.primary_node,
12417 "ballooning memory for instance %s" %
12419 self.op.memory - current_memory,
12420 instance.hypervisor)
12422 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12423 raise errors.OpPrereqError("Disk operations not supported for"
12424 " diskless instances",
12425 errors.ECODE_INVAL)
12427 def _PrepareNicCreate(_, params, private):
12428 return self._PrepareNicModification(params, private, None, {},
12431 def _PrepareNicMod(_, nic, params, private):
12432 return self._PrepareNicModification(params, private, nic.ip,
12433 nic.nicparams, cluster, pnode)
12435 # Verify NIC changes (operating on copy)
12436 nics = instance.nics[:]
12437 ApplyContainerMods("NIC", nics, None, self.nicmod,
12438 _PrepareNicCreate, _PrepareNicMod, None)
12439 if len(nics) > constants.MAX_NICS:
12440 raise errors.OpPrereqError("Instance has too many network interfaces"
12441 " (%d), cannot add more" % constants.MAX_NICS,
12442 errors.ECODE_STATE)
12444 # Verify disk changes (operating on a copy)
12445 disks = instance.disks[:]
12446 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12447 if len(disks) > constants.MAX_DISKS:
12448 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12449 " more" % constants.MAX_DISKS,
12450 errors.ECODE_STATE)
12452 if self.op.offline is not None:
12453 if self.op.offline:
12454 msg = "can't change to offline"
12456 msg = "can't change to online"
12457 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12459 # Pre-compute NIC changes (necessary to use result in hooks)
12460 self._nic_chgdesc = []
12462 # Operate on copies as this is still in prereq
12463 nics = [nic.Copy() for nic in instance.nics]
12464 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12465 self._CreateNewNic, self._ApplyNicMods, None)
12466 self._new_nics = nics
12468 self._new_nics = None
12470 def _ConvertPlainToDrbd(self, feedback_fn):
12471 """Converts an instance from plain to drbd.
12474 feedback_fn("Converting template to drbd")
12475 instance = self.instance
12476 pnode = instance.primary_node
12477 snode = self.op.remote_node
12479 assert instance.disk_template == constants.DT_PLAIN
12481 # create a fake disk info for _GenerateDiskTemplate
12482 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12483 constants.IDISK_VG: d.logical_id[0]}
12484 for d in instance.disks]
12485 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12486 instance.name, pnode, [snode],
12487 disk_info, None, None, 0, feedback_fn,
12489 info = _GetInstanceInfoText(instance)
12490 feedback_fn("Creating aditional volumes...")
12491 # first, create the missing data and meta devices
12492 for disk in new_disks:
12493 # unfortunately this is... not too nice
12494 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12496 for child in disk.children:
12497 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12498 # at this stage, all new LVs have been created, we can rename the
12500 feedback_fn("Renaming original volumes...")
12501 rename_list = [(o, n.children[0].logical_id)
12502 for (o, n) in zip(instance.disks, new_disks)]
12503 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12504 result.Raise("Failed to rename original LVs")
12506 feedback_fn("Initializing DRBD devices...")
12507 # all child devices are in place, we can now create the DRBD devices
12508 for disk in new_disks:
12509 for node in [pnode, snode]:
12510 f_create = node == pnode
12511 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12513 # at this point, the instance has been modified
12514 instance.disk_template = constants.DT_DRBD8
12515 instance.disks = new_disks
12516 self.cfg.Update(instance, feedback_fn)
12518 # Release node locks while waiting for sync
12519 _ReleaseLocks(self, locking.LEVEL_NODE)
12521 # disks are created, waiting for sync
12522 disk_abort = not _WaitForSync(self, instance,
12523 oneshot=not self.op.wait_for_sync)
12525 raise errors.OpExecError("There are some degraded disks for"
12526 " this instance, please cleanup manually")
12528 # Node resource locks will be released by caller
12530 def _ConvertDrbdToPlain(self, feedback_fn):
12531 """Converts an instance from drbd to plain.
12534 instance = self.instance
12536 assert len(instance.secondary_nodes) == 1
12537 assert instance.disk_template == constants.DT_DRBD8
12539 pnode = instance.primary_node
12540 snode = instance.secondary_nodes[0]
12541 feedback_fn("Converting template to plain")
12543 old_disks = instance.disks
12544 new_disks = [d.children[0] for d in old_disks]
12546 # copy over size and mode
12547 for parent, child in zip(old_disks, new_disks):
12548 child.size = parent.size
12549 child.mode = parent.mode
12551 # update instance structure
12552 instance.disks = new_disks
12553 instance.disk_template = constants.DT_PLAIN
12554 self.cfg.Update(instance, feedback_fn)
12556 # Release locks in case removing disks takes a while
12557 _ReleaseLocks(self, locking.LEVEL_NODE)
12559 feedback_fn("Removing volumes on the secondary node...")
12560 for disk in old_disks:
12561 self.cfg.SetDiskID(disk, snode)
12562 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12564 self.LogWarning("Could not remove block device %s on node %s,"
12565 " continuing anyway: %s", disk.iv_name, snode, msg)
12567 feedback_fn("Removing unneeded volumes on the primary node...")
12568 for idx, disk in enumerate(old_disks):
12569 meta = disk.children[1]
12570 self.cfg.SetDiskID(meta, pnode)
12571 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12573 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12574 " continuing anyway: %s", idx, pnode, msg)
12576 # this is a DRBD disk, return its port to the pool
12577 for disk in old_disks:
12578 tcp_port = disk.logical_id[2]
12579 self.cfg.AddTcpUdpPort(tcp_port)
12581 # Node resource locks will be released by caller
12583 def _CreateNewDisk(self, idx, params, _):
12584 """Creates a new disk.
12587 instance = self.instance
12590 if instance.disk_template in constants.DTS_FILEBASED:
12591 (file_driver, file_path) = instance.disks[0].logical_id
12592 file_path = os.path.dirname(file_path)
12594 file_driver = file_path = None
12597 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12598 instance.primary_node, instance.secondary_nodes,
12599 [params], file_path, file_driver, idx,
12600 self.Log, self.diskparams)[0]
12602 info = _GetInstanceInfoText(instance)
12604 logging.info("Creating volume %s for instance %s",
12605 disk.iv_name, instance.name)
12606 # Note: this needs to be kept in sync with _CreateDisks
12608 for node in instance.all_nodes:
12609 f_create = (node == instance.primary_node)
12611 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12612 except errors.OpExecError, err:
12613 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12614 disk.iv_name, disk, node, err)
12617 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12621 def _ModifyDisk(idx, disk, params, _):
12622 """Modifies a disk.
12625 disk.mode = params[constants.IDISK_MODE]
12628 ("disk.mode/%d" % idx, disk.mode),
12631 def _RemoveDisk(self, idx, root, _):
12635 for node, disk in root.ComputeNodeTree(self.instance.primary_node):
12636 self.cfg.SetDiskID(disk, node)
12637 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12639 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12640 " continuing anyway", idx, node, msg)
12642 # if this is a DRBD disk, return its port to the pool
12643 if root.dev_type in constants.LDS_DRBD:
12644 self.cfg.AddTcpUdpPort(root.logical_id[2])
12647 def _CreateNewNic(idx, params, private):
12648 """Creates data structure for a new network interface.
12651 mac = params[constants.INIC_MAC]
12652 ip = params.get(constants.INIC_IP, None)
12653 nicparams = private.params
12655 return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12657 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12658 (mac, ip, private.filled[constants.NIC_MODE],
12659 private.filled[constants.NIC_LINK])),
12663 def _ApplyNicMods(idx, nic, params, private):
12664 """Modifies a network interface.
12669 for key in [constants.INIC_MAC, constants.INIC_IP]:
12671 changes.append(("nic.%s/%d" % (key, idx), params[key]))
12672 setattr(nic, key, params[key])
12675 nic.nicparams = private.params
12677 for (key, val) in params.items():
12678 changes.append(("nic.%s/%d" % (key, idx), val))
12682 def Exec(self, feedback_fn):
12683 """Modifies an instance.
12685 All parameters take effect only at the next restart of the instance.
12688 # Process here the warnings from CheckPrereq, as we don't have a
12689 # feedback_fn there.
12690 # TODO: Replace with self.LogWarning
12691 for warn in self.warn:
12692 feedback_fn("WARNING: %s" % warn)
12694 assert ((self.op.disk_template is None) ^
12695 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12696 "Not owning any node resource locks"
12699 instance = self.instance
12702 if self.op.runtime_mem:
12703 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12705 self.op.runtime_mem)
12706 rpcres.Raise("Cannot modify instance runtime memory")
12707 result.append(("runtime_memory", self.op.runtime_mem))
12709 # Apply disk changes
12710 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12711 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12712 _UpdateIvNames(0, instance.disks)
12714 if self.op.disk_template:
12716 check_nodes = set(instance.all_nodes)
12717 if self.op.remote_node:
12718 check_nodes.add(self.op.remote_node)
12719 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12720 owned = self.owned_locks(level)
12721 assert not (check_nodes - owned), \
12722 ("Not owning the correct locks, owning %r, expected at least %r" %
12723 (owned, check_nodes))
12725 r_shut = _ShutdownInstanceDisks(self, instance)
12727 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12728 " proceed with disk template conversion")
12729 mode = (instance.disk_template, self.op.disk_template)
12731 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12733 self.cfg.ReleaseDRBDMinors(instance.name)
12735 result.append(("disk_template", self.op.disk_template))
12737 assert instance.disk_template == self.op.disk_template, \
12738 ("Expected disk template '%s', found '%s'" %
12739 (self.op.disk_template, instance.disk_template))
12741 # Release node and resource locks if there are any (they might already have
12742 # been released during disk conversion)
12743 _ReleaseLocks(self, locking.LEVEL_NODE)
12744 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12746 # Apply NIC changes
12747 if self._new_nics is not None:
12748 instance.nics = self._new_nics
12749 result.extend(self._nic_chgdesc)
12752 if self.op.hvparams:
12753 instance.hvparams = self.hv_inst
12754 for key, val in self.op.hvparams.iteritems():
12755 result.append(("hv/%s" % key, val))
12758 if self.op.beparams:
12759 instance.beparams = self.be_inst
12760 for key, val in self.op.beparams.iteritems():
12761 result.append(("be/%s" % key, val))
12764 if self.op.os_name:
12765 instance.os = self.op.os_name
12768 if self.op.osparams:
12769 instance.osparams = self.os_inst
12770 for key, val in self.op.osparams.iteritems():
12771 result.append(("os/%s" % key, val))
12773 if self.op.offline is None:
12776 elif self.op.offline:
12777 # Mark instance as offline
12778 self.cfg.MarkInstanceOffline(instance.name)
12779 result.append(("admin_state", constants.ADMINST_OFFLINE))
12781 # Mark instance as online, but stopped
12782 self.cfg.MarkInstanceDown(instance.name)
12783 result.append(("admin_state", constants.ADMINST_DOWN))
12785 self.cfg.Update(instance, feedback_fn)
12787 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12788 self.owned_locks(locking.LEVEL_NODE)), \
12789 "All node locks should have been released by now"
12793 _DISK_CONVERSIONS = {
12794 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12795 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12799 class LUInstanceChangeGroup(LogicalUnit):
12800 HPATH = "instance-change-group"
12801 HTYPE = constants.HTYPE_INSTANCE
12804 def ExpandNames(self):
12805 self.share_locks = _ShareAll()
12806 self.needed_locks = {
12807 locking.LEVEL_NODEGROUP: [],
12808 locking.LEVEL_NODE: [],
12811 self._ExpandAndLockInstance()
12813 if self.op.target_groups:
12814 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12815 self.op.target_groups)
12817 self.req_target_uuids = None
12819 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12821 def DeclareLocks(self, level):
12822 if level == locking.LEVEL_NODEGROUP:
12823 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12825 if self.req_target_uuids:
12826 lock_groups = set(self.req_target_uuids)
12828 # Lock all groups used by instance optimistically; this requires going
12829 # via the node before it's locked, requiring verification later on
12830 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12831 lock_groups.update(instance_groups)
12833 # No target groups, need to lock all of them
12834 lock_groups = locking.ALL_SET
12836 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12838 elif level == locking.LEVEL_NODE:
12839 if self.req_target_uuids:
12840 # Lock all nodes used by instances
12841 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12842 self._LockInstancesNodes()
12844 # Lock all nodes in all potential target groups
12845 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12846 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12847 member_nodes = [node_name
12848 for group in lock_groups
12849 for node_name in self.cfg.GetNodeGroup(group).members]
12850 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12852 # Lock all nodes as all groups are potential targets
12853 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12855 def CheckPrereq(self):
12856 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12857 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12858 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12860 assert (self.req_target_uuids is None or
12861 owned_groups.issuperset(self.req_target_uuids))
12862 assert owned_instances == set([self.op.instance_name])
12864 # Get instance information
12865 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12867 # Check if node groups for locked instance are still correct
12868 assert owned_nodes.issuperset(self.instance.all_nodes), \
12869 ("Instance %s's nodes changed while we kept the lock" %
12870 self.op.instance_name)
12872 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12875 if self.req_target_uuids:
12876 # User requested specific target groups
12877 self.target_uuids = self.req_target_uuids
12879 # All groups except those used by the instance are potential targets
12880 self.target_uuids = owned_groups - inst_groups
12882 conflicting_groups = self.target_uuids & inst_groups
12883 if conflicting_groups:
12884 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12885 " used by the instance '%s'" %
12886 (utils.CommaJoin(conflicting_groups),
12887 self.op.instance_name),
12888 errors.ECODE_INVAL)
12890 if not self.target_uuids:
12891 raise errors.OpPrereqError("There are no possible target groups",
12892 errors.ECODE_INVAL)
12894 def BuildHooksEnv(self):
12895 """Build hooks env.
12898 assert self.target_uuids
12901 "TARGET_GROUPS": " ".join(self.target_uuids),
12904 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12908 def BuildHooksNodes(self):
12909 """Build hooks nodes.
12912 mn = self.cfg.GetMasterNode()
12913 return ([mn], [mn])
12915 def Exec(self, feedback_fn):
12916 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12918 assert instances == [self.op.instance_name], "Instance not locked"
12920 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12921 instances=instances, target_groups=list(self.target_uuids))
12923 ial.Run(self.op.iallocator)
12925 if not ial.success:
12926 raise errors.OpPrereqError("Can't compute solution for changing group of"
12927 " instance '%s' using iallocator '%s': %s" %
12928 (self.op.instance_name, self.op.iallocator,
12930 errors.ECODE_NORES)
12932 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12934 self.LogInfo("Iallocator returned %s job(s) for changing group of"
12935 " instance '%s'", len(jobs), self.op.instance_name)
12937 return ResultWithJobs(jobs)
12940 class LUBackupQuery(NoHooksLU):
12941 """Query the exports list
12946 def ExpandNames(self):
12947 self.needed_locks = {}
12948 self.share_locks[locking.LEVEL_NODE] = 1
12949 if not self.op.nodes:
12950 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12952 self.needed_locks[locking.LEVEL_NODE] = \
12953 _GetWantedNodes(self, self.op.nodes)
12955 def Exec(self, feedback_fn):
12956 """Compute the list of all the exported system images.
12959 @return: a dictionary with the structure node->(export-list)
12960 where export-list is a list of the instances exported on
12964 self.nodes = self.owned_locks(locking.LEVEL_NODE)
12965 rpcresult = self.rpc.call_export_list(self.nodes)
12967 for node in rpcresult:
12968 if rpcresult[node].fail_msg:
12969 result[node] = False
12971 result[node] = rpcresult[node].payload
12976 class LUBackupPrepare(NoHooksLU):
12977 """Prepares an instance for an export and returns useful information.
12982 def ExpandNames(self):
12983 self._ExpandAndLockInstance()
12985 def CheckPrereq(self):
12986 """Check prerequisites.
12989 instance_name = self.op.instance_name
12991 self.instance = self.cfg.GetInstanceInfo(instance_name)
12992 assert self.instance is not None, \
12993 "Cannot retrieve locked instance %s" % self.op.instance_name
12994 _CheckNodeOnline(self, self.instance.primary_node)
12996 self._cds = _GetClusterDomainSecret()
12998 def Exec(self, feedback_fn):
12999 """Prepares an instance for an export.
13002 instance = self.instance
13004 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13005 salt = utils.GenerateSecret(8)
13007 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13008 result = self.rpc.call_x509_cert_create(instance.primary_node,
13009 constants.RIE_CERT_VALIDITY)
13010 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13012 (name, cert_pem) = result.payload
13014 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13018 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13019 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13021 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13027 class LUBackupExport(LogicalUnit):
13028 """Export an instance to an image in the cluster.
13031 HPATH = "instance-export"
13032 HTYPE = constants.HTYPE_INSTANCE
13035 def CheckArguments(self):
13036 """Check the arguments.
13039 self.x509_key_name = self.op.x509_key_name
13040 self.dest_x509_ca_pem = self.op.destination_x509_ca
13042 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13043 if not self.x509_key_name:
13044 raise errors.OpPrereqError("Missing X509 key name for encryption",
13045 errors.ECODE_INVAL)
13047 if not self.dest_x509_ca_pem:
13048 raise errors.OpPrereqError("Missing destination X509 CA",
13049 errors.ECODE_INVAL)
13051 def ExpandNames(self):
13052 self._ExpandAndLockInstance()
13054 # Lock all nodes for local exports
13055 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13056 # FIXME: lock only instance primary and destination node
13058 # Sad but true, for now we have do lock all nodes, as we don't know where
13059 # the previous export might be, and in this LU we search for it and
13060 # remove it from its current node. In the future we could fix this by:
13061 # - making a tasklet to search (share-lock all), then create the
13062 # new one, then one to remove, after
13063 # - removing the removal operation altogether
13064 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13066 def DeclareLocks(self, level):
13067 """Last minute lock declaration."""
13068 # All nodes are locked anyway, so nothing to do here.
13070 def BuildHooksEnv(self):
13071 """Build hooks env.
13073 This will run on the master, primary node and target node.
13077 "EXPORT_MODE": self.op.mode,
13078 "EXPORT_NODE": self.op.target_node,
13079 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13080 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13081 # TODO: Generic function for boolean env variables
13082 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13085 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13089 def BuildHooksNodes(self):
13090 """Build hooks nodes.
13093 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13095 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13096 nl.append(self.op.target_node)
13100 def CheckPrereq(self):
13101 """Check prerequisites.
13103 This checks that the instance and node names are valid.
13106 instance_name = self.op.instance_name
13108 self.instance = self.cfg.GetInstanceInfo(instance_name)
13109 assert self.instance is not None, \
13110 "Cannot retrieve locked instance %s" % self.op.instance_name
13111 _CheckNodeOnline(self, self.instance.primary_node)
13113 if (self.op.remove_instance and
13114 self.instance.admin_state == constants.ADMINST_UP and
13115 not self.op.shutdown):
13116 raise errors.OpPrereqError("Can not remove instance without shutting it"
13119 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13120 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13121 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13122 assert self.dst_node is not None
13124 _CheckNodeOnline(self, self.dst_node.name)
13125 _CheckNodeNotDrained(self, self.dst_node.name)
13128 self.dest_disk_info = None
13129 self.dest_x509_ca = None
13131 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13132 self.dst_node = None
13134 if len(self.op.target_node) != len(self.instance.disks):
13135 raise errors.OpPrereqError(("Received destination information for %s"
13136 " disks, but instance %s has %s disks") %
13137 (len(self.op.target_node), instance_name,
13138 len(self.instance.disks)),
13139 errors.ECODE_INVAL)
13141 cds = _GetClusterDomainSecret()
13143 # Check X509 key name
13145 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13146 except (TypeError, ValueError), err:
13147 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13149 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13150 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13151 errors.ECODE_INVAL)
13153 # Load and verify CA
13155 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13156 except OpenSSL.crypto.Error, err:
13157 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13158 (err, ), errors.ECODE_INVAL)
13160 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13161 if errcode is not None:
13162 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13163 (msg, ), errors.ECODE_INVAL)
13165 self.dest_x509_ca = cert
13167 # Verify target information
13169 for idx, disk_data in enumerate(self.op.target_node):
13171 (host, port, magic) = \
13172 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13173 except errors.GenericError, err:
13174 raise errors.OpPrereqError("Target info for disk %s: %s" %
13175 (idx, err), errors.ECODE_INVAL)
13177 disk_info.append((host, port, magic))
13179 assert len(disk_info) == len(self.op.target_node)
13180 self.dest_disk_info = disk_info
13183 raise errors.ProgrammerError("Unhandled export mode %r" %
13186 # instance disk type verification
13187 # TODO: Implement export support for file-based disks
13188 for disk in self.instance.disks:
13189 if disk.dev_type == constants.LD_FILE:
13190 raise errors.OpPrereqError("Export not supported for instances with"
13191 " file-based disks", errors.ECODE_INVAL)
13193 def _CleanupExports(self, feedback_fn):
13194 """Removes exports of current instance from all other nodes.
13196 If an instance in a cluster with nodes A..D was exported to node C, its
13197 exports will be removed from the nodes A, B and D.
13200 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13202 nodelist = self.cfg.GetNodeList()
13203 nodelist.remove(self.dst_node.name)
13205 # on one-node clusters nodelist will be empty after the removal
13206 # if we proceed the backup would be removed because OpBackupQuery
13207 # substitutes an empty list with the full cluster node list.
13208 iname = self.instance.name
13210 feedback_fn("Removing old exports for instance %s" % iname)
13211 exportlist = self.rpc.call_export_list(nodelist)
13212 for node in exportlist:
13213 if exportlist[node].fail_msg:
13215 if iname in exportlist[node].payload:
13216 msg = self.rpc.call_export_remove(node, iname).fail_msg
13218 self.LogWarning("Could not remove older export for instance %s"
13219 " on node %s: %s", iname, node, msg)
13221 def Exec(self, feedback_fn):
13222 """Export an instance to an image in the cluster.
13225 assert self.op.mode in constants.EXPORT_MODES
13227 instance = self.instance
13228 src_node = instance.primary_node
13230 if self.op.shutdown:
13231 # shutdown the instance, but not the disks
13232 feedback_fn("Shutting down instance %s" % instance.name)
13233 result = self.rpc.call_instance_shutdown(src_node, instance,
13234 self.op.shutdown_timeout)
13235 # TODO: Maybe ignore failures if ignore_remove_failures is set
13236 result.Raise("Could not shutdown instance %s on"
13237 " node %s" % (instance.name, src_node))
13239 # set the disks ID correctly since call_instance_start needs the
13240 # correct drbd minor to create the symlinks
13241 for disk in instance.disks:
13242 self.cfg.SetDiskID(disk, src_node)
13244 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13247 # Activate the instance disks if we'exporting a stopped instance
13248 feedback_fn("Activating disks for %s" % instance.name)
13249 _StartInstanceDisks(self, instance, None)
13252 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13255 helper.CreateSnapshots()
13257 if (self.op.shutdown and
13258 instance.admin_state == constants.ADMINST_UP and
13259 not self.op.remove_instance):
13260 assert not activate_disks
13261 feedback_fn("Starting instance %s" % instance.name)
13262 result = self.rpc.call_instance_start(src_node,
13263 (instance, None, None), False)
13264 msg = result.fail_msg
13266 feedback_fn("Failed to start instance: %s" % msg)
13267 _ShutdownInstanceDisks(self, instance)
13268 raise errors.OpExecError("Could not start instance: %s" % msg)
13270 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13271 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13272 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13273 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13274 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13276 (key_name, _, _) = self.x509_key_name
13279 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13282 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13283 key_name, dest_ca_pem,
13288 # Check for backwards compatibility
13289 assert len(dresults) == len(instance.disks)
13290 assert compat.all(isinstance(i, bool) for i in dresults), \
13291 "Not all results are boolean: %r" % dresults
13295 feedback_fn("Deactivating disks for %s" % instance.name)
13296 _ShutdownInstanceDisks(self, instance)
13298 if not (compat.all(dresults) and fin_resu):
13301 failures.append("export finalization")
13302 if not compat.all(dresults):
13303 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13305 failures.append("disk export: disk(s) %s" % fdsk)
13307 raise errors.OpExecError("Export failed, errors in %s" %
13308 utils.CommaJoin(failures))
13310 # At this point, the export was successful, we can cleanup/finish
13312 # Remove instance if requested
13313 if self.op.remove_instance:
13314 feedback_fn("Removing instance %s" % instance.name)
13315 _RemoveInstance(self, feedback_fn, instance,
13316 self.op.ignore_remove_failures)
13318 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13319 self._CleanupExports(feedback_fn)
13321 return fin_resu, dresults
13324 class LUBackupRemove(NoHooksLU):
13325 """Remove exports related to the named instance.
13330 def ExpandNames(self):
13331 self.needed_locks = {}
13332 # We need all nodes to be locked in order for RemoveExport to work, but we
13333 # don't need to lock the instance itself, as nothing will happen to it (and
13334 # we can remove exports also for a removed instance)
13335 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13337 def Exec(self, feedback_fn):
13338 """Remove any export.
13341 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13342 # If the instance was not found we'll try with the name that was passed in.
13343 # This will only work if it was an FQDN, though.
13345 if not instance_name:
13347 instance_name = self.op.instance_name
13349 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13350 exportlist = self.rpc.call_export_list(locked_nodes)
13352 for node in exportlist:
13353 msg = exportlist[node].fail_msg
13355 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13357 if instance_name in exportlist[node].payload:
13359 result = self.rpc.call_export_remove(node, instance_name)
13360 msg = result.fail_msg
13362 logging.error("Could not remove export for instance %s"
13363 " on node %s: %s", instance_name, node, msg)
13365 if fqdn_warn and not found:
13366 feedback_fn("Export not found. If trying to remove an export belonging"
13367 " to a deleted instance please use its Fully Qualified"
13371 class LUGroupAdd(LogicalUnit):
13372 """Logical unit for creating node groups.
13375 HPATH = "group-add"
13376 HTYPE = constants.HTYPE_GROUP
13379 def ExpandNames(self):
13380 # We need the new group's UUID here so that we can create and acquire the
13381 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13382 # that it should not check whether the UUID exists in the configuration.
13383 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13384 self.needed_locks = {}
13385 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13387 def CheckPrereq(self):
13388 """Check prerequisites.
13390 This checks that the given group name is not an existing node group
13395 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13396 except errors.OpPrereqError:
13399 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13400 " node group (UUID: %s)" %
13401 (self.op.group_name, existing_uuid),
13402 errors.ECODE_EXISTS)
13404 if self.op.ndparams:
13405 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13407 if self.op.hv_state:
13408 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13410 self.new_hv_state = None
13412 if self.op.disk_state:
13413 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13415 self.new_disk_state = None
13417 if self.op.diskparams:
13418 for templ in constants.DISK_TEMPLATES:
13419 if templ not in self.op.diskparams:
13420 self.op.diskparams[templ] = {}
13421 utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
13423 self.op.diskparams = self.cfg.GetClusterInfo().diskparams
13425 if self.op.ipolicy:
13426 cluster = self.cfg.GetClusterInfo()
13427 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13429 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
13430 except errors.ConfigurationError, err:
13431 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13432 errors.ECODE_INVAL)
13434 def BuildHooksEnv(self):
13435 """Build hooks env.
13439 "GROUP_NAME": self.op.group_name,
13442 def BuildHooksNodes(self):
13443 """Build hooks nodes.
13446 mn = self.cfg.GetMasterNode()
13447 return ([mn], [mn])
13449 def Exec(self, feedback_fn):
13450 """Add the node group to the cluster.
13453 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13454 uuid=self.group_uuid,
13455 alloc_policy=self.op.alloc_policy,
13456 ndparams=self.op.ndparams,
13457 diskparams=self.op.diskparams,
13458 ipolicy=self.op.ipolicy,
13459 hv_state_static=self.new_hv_state,
13460 disk_state_static=self.new_disk_state)
13462 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13463 del self.remove_locks[locking.LEVEL_NODEGROUP]
13466 class LUGroupAssignNodes(NoHooksLU):
13467 """Logical unit for assigning nodes to groups.
13472 def ExpandNames(self):
13473 # These raise errors.OpPrereqError on their own:
13474 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13475 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13477 # We want to lock all the affected nodes and groups. We have readily
13478 # available the list of nodes, and the *destination* group. To gather the
13479 # list of "source" groups, we need to fetch node information later on.
13480 self.needed_locks = {
13481 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13482 locking.LEVEL_NODE: self.op.nodes,
13485 def DeclareLocks(self, level):
13486 if level == locking.LEVEL_NODEGROUP:
13487 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13489 # Try to get all affected nodes' groups without having the group or node
13490 # lock yet. Needs verification later in the code flow.
13491 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13493 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13495 def CheckPrereq(self):
13496 """Check prerequisites.
13499 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13500 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13501 frozenset(self.op.nodes))
13503 expected_locks = (set([self.group_uuid]) |
13504 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13505 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13506 if actual_locks != expected_locks:
13507 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13508 " current groups are '%s', used to be '%s'" %
13509 (utils.CommaJoin(expected_locks),
13510 utils.CommaJoin(actual_locks)))
13512 self.node_data = self.cfg.GetAllNodesInfo()
13513 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13514 instance_data = self.cfg.GetAllInstancesInfo()
13516 if self.group is None:
13517 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13518 (self.op.group_name, self.group_uuid))
13520 (new_splits, previous_splits) = \
13521 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13522 for node in self.op.nodes],
13523 self.node_data, instance_data)
13526 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13528 if not self.op.force:
13529 raise errors.OpExecError("The following instances get split by this"
13530 " change and --force was not given: %s" %
13533 self.LogWarning("This operation will split the following instances: %s",
13536 if previous_splits:
13537 self.LogWarning("In addition, these already-split instances continue"
13538 " to be split across groups: %s",
13539 utils.CommaJoin(utils.NiceSort(previous_splits)))
13541 def Exec(self, feedback_fn):
13542 """Assign nodes to a new group.
13545 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13547 self.cfg.AssignGroupNodes(mods)
13550 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13551 """Check for split instances after a node assignment.
13553 This method considers a series of node assignments as an atomic operation,
13554 and returns information about split instances after applying the set of
13557 In particular, it returns information about newly split instances, and
13558 instances that were already split, and remain so after the change.
13560 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13563 @type changes: list of (node_name, new_group_uuid) pairs.
13564 @param changes: list of node assignments to consider.
13565 @param node_data: a dict with data for all nodes
13566 @param instance_data: a dict with all instances to consider
13567 @rtype: a two-tuple
13568 @return: a list of instances that were previously okay and result split as a
13569 consequence of this change, and a list of instances that were previously
13570 split and this change does not fix.
13573 changed_nodes = dict((node, group) for node, group in changes
13574 if node_data[node].group != group)
13576 all_split_instances = set()
13577 previously_split_instances = set()
13579 def InstanceNodes(instance):
13580 return [instance.primary_node] + list(instance.secondary_nodes)
13582 for inst in instance_data.values():
13583 if inst.disk_template not in constants.DTS_INT_MIRROR:
13586 instance_nodes = InstanceNodes(inst)
13588 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13589 previously_split_instances.add(inst.name)
13591 if len(set(changed_nodes.get(node, node_data[node].group)
13592 for node in instance_nodes)) > 1:
13593 all_split_instances.add(inst.name)
13595 return (list(all_split_instances - previously_split_instances),
13596 list(previously_split_instances & all_split_instances))
13599 class _GroupQuery(_QueryBase):
13600 FIELDS = query.GROUP_FIELDS
13602 def ExpandNames(self, lu):
13603 lu.needed_locks = {}
13605 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13606 self._cluster = lu.cfg.GetClusterInfo()
13607 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13610 self.wanted = [name_to_uuid[name]
13611 for name in utils.NiceSort(name_to_uuid.keys())]
13613 # Accept names to be either names or UUIDs.
13616 all_uuid = frozenset(self._all_groups.keys())
13618 for name in self.names:
13619 if name in all_uuid:
13620 self.wanted.append(name)
13621 elif name in name_to_uuid:
13622 self.wanted.append(name_to_uuid[name])
13624 missing.append(name)
13627 raise errors.OpPrereqError("Some groups do not exist: %s" %
13628 utils.CommaJoin(missing),
13629 errors.ECODE_NOENT)
13631 def DeclareLocks(self, lu, level):
13634 def _GetQueryData(self, lu):
13635 """Computes the list of node groups and their attributes.
13638 do_nodes = query.GQ_NODE in self.requested_data
13639 do_instances = query.GQ_INST in self.requested_data
13641 group_to_nodes = None
13642 group_to_instances = None
13644 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13645 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13646 # latter GetAllInstancesInfo() is not enough, for we have to go through
13647 # instance->node. Hence, we will need to process nodes even if we only need
13648 # instance information.
13649 if do_nodes or do_instances:
13650 all_nodes = lu.cfg.GetAllNodesInfo()
13651 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13654 for node in all_nodes.values():
13655 if node.group in group_to_nodes:
13656 group_to_nodes[node.group].append(node.name)
13657 node_to_group[node.name] = node.group
13660 all_instances = lu.cfg.GetAllInstancesInfo()
13661 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13663 for instance in all_instances.values():
13664 node = instance.primary_node
13665 if node in node_to_group:
13666 group_to_instances[node_to_group[node]].append(instance.name)
13669 # Do not pass on node information if it was not requested.
13670 group_to_nodes = None
13672 return query.GroupQueryData(self._cluster,
13673 [self._all_groups[uuid]
13674 for uuid in self.wanted],
13675 group_to_nodes, group_to_instances)
13678 class LUGroupQuery(NoHooksLU):
13679 """Logical unit for querying node groups.
13684 def CheckArguments(self):
13685 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13686 self.op.output_fields, False)
13688 def ExpandNames(self):
13689 self.gq.ExpandNames(self)
13691 def DeclareLocks(self, level):
13692 self.gq.DeclareLocks(self, level)
13694 def Exec(self, feedback_fn):
13695 return self.gq.OldStyleQuery(self)
13698 class LUGroupSetParams(LogicalUnit):
13699 """Modifies the parameters of a node group.
13702 HPATH = "group-modify"
13703 HTYPE = constants.HTYPE_GROUP
13706 def CheckArguments(self):
13709 self.op.diskparams,
13710 self.op.alloc_policy,
13712 self.op.disk_state,
13716 if all_changes.count(None) == len(all_changes):
13717 raise errors.OpPrereqError("Please pass at least one modification",
13718 errors.ECODE_INVAL)
13720 def ExpandNames(self):
13721 # This raises errors.OpPrereqError on its own:
13722 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13724 self.needed_locks = {
13725 locking.LEVEL_INSTANCE: [],
13726 locking.LEVEL_NODEGROUP: [self.group_uuid],
13729 self.share_locks[locking.LEVEL_INSTANCE] = 1
13731 def DeclareLocks(self, level):
13732 if level == locking.LEVEL_INSTANCE:
13733 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13735 # Lock instances optimistically, needs verification once group lock has
13737 self.needed_locks[locking.LEVEL_INSTANCE] = \
13738 self.cfg.GetNodeGroupInstances(self.group_uuid)
13740 def CheckPrereq(self):
13741 """Check prerequisites.
13744 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13746 # Check if locked instances are still correct
13747 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13749 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13750 cluster = self.cfg.GetClusterInfo()
13752 if self.group is None:
13753 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13754 (self.op.group_name, self.group_uuid))
13756 if self.op.ndparams:
13757 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13758 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13759 self.new_ndparams = new_ndparams
13761 if self.op.diskparams:
13762 self.new_diskparams = dict()
13763 for templ in constants.DISK_TEMPLATES:
13764 if templ not in self.op.diskparams:
13765 self.op.diskparams[templ] = {}
13766 new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13767 self.op.diskparams[templ])
13768 utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13769 self.new_diskparams[templ] = new_templ_params
13771 if self.op.hv_state:
13772 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13773 self.group.hv_state_static)
13775 if self.op.disk_state:
13776 self.new_disk_state = \
13777 _MergeAndVerifyDiskState(self.op.disk_state,
13778 self.group.disk_state_static)
13780 if self.op.ipolicy:
13781 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
13785 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
13786 inst_filter = lambda inst: inst.name in owned_instances
13787 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
13789 _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
13791 new_ipolicy, instances)
13794 self.LogWarning("After the ipolicy change the following instances"
13795 " violate them: %s",
13796 utils.CommaJoin(violations))
13798 def BuildHooksEnv(self):
13799 """Build hooks env.
13803 "GROUP_NAME": self.op.group_name,
13804 "NEW_ALLOC_POLICY": self.op.alloc_policy,
13807 def BuildHooksNodes(self):
13808 """Build hooks nodes.
13811 mn = self.cfg.GetMasterNode()
13812 return ([mn], [mn])
13814 def Exec(self, feedback_fn):
13815 """Modifies the node group.
13820 if self.op.ndparams:
13821 self.group.ndparams = self.new_ndparams
13822 result.append(("ndparams", str(self.group.ndparams)))
13824 if self.op.diskparams:
13825 self.group.diskparams = self.new_diskparams
13826 result.append(("diskparams", str(self.group.diskparams)))
13828 if self.op.alloc_policy:
13829 self.group.alloc_policy = self.op.alloc_policy
13831 if self.op.hv_state:
13832 self.group.hv_state_static = self.new_hv_state
13834 if self.op.disk_state:
13835 self.group.disk_state_static = self.new_disk_state
13837 if self.op.ipolicy:
13838 self.group.ipolicy = self.new_ipolicy
13840 self.cfg.Update(self.group, feedback_fn)
13844 class LUGroupRemove(LogicalUnit):
13845 HPATH = "group-remove"
13846 HTYPE = constants.HTYPE_GROUP
13849 def ExpandNames(self):
13850 # This will raises errors.OpPrereqError on its own:
13851 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13852 self.needed_locks = {
13853 locking.LEVEL_NODEGROUP: [self.group_uuid],
13856 def CheckPrereq(self):
13857 """Check prerequisites.
13859 This checks that the given group name exists as a node group, that is
13860 empty (i.e., contains no nodes), and that is not the last group of the
13864 # Verify that the group is empty.
13865 group_nodes = [node.name
13866 for node in self.cfg.GetAllNodesInfo().values()
13867 if node.group == self.group_uuid]
13870 raise errors.OpPrereqError("Group '%s' not empty, has the following"
13872 (self.op.group_name,
13873 utils.CommaJoin(utils.NiceSort(group_nodes))),
13874 errors.ECODE_STATE)
13876 # Verify the cluster would not be left group-less.
13877 if len(self.cfg.GetNodeGroupList()) == 1:
13878 raise errors.OpPrereqError("Group '%s' is the only group,"
13879 " cannot be removed" %
13880 self.op.group_name,
13881 errors.ECODE_STATE)
13883 def BuildHooksEnv(self):
13884 """Build hooks env.
13888 "GROUP_NAME": self.op.group_name,
13891 def BuildHooksNodes(self):
13892 """Build hooks nodes.
13895 mn = self.cfg.GetMasterNode()
13896 return ([mn], [mn])
13898 def Exec(self, feedback_fn):
13899 """Remove the node group.
13903 self.cfg.RemoveNodeGroup(self.group_uuid)
13904 except errors.ConfigurationError:
13905 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13906 (self.op.group_name, self.group_uuid))
13908 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13911 class LUGroupRename(LogicalUnit):
13912 HPATH = "group-rename"
13913 HTYPE = constants.HTYPE_GROUP
13916 def ExpandNames(self):
13917 # This raises errors.OpPrereqError on its own:
13918 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13920 self.needed_locks = {
13921 locking.LEVEL_NODEGROUP: [self.group_uuid],
13924 def CheckPrereq(self):
13925 """Check prerequisites.
13927 Ensures requested new name is not yet used.
13931 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13932 except errors.OpPrereqError:
13935 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13936 " node group (UUID: %s)" %
13937 (self.op.new_name, new_name_uuid),
13938 errors.ECODE_EXISTS)
13940 def BuildHooksEnv(self):
13941 """Build hooks env.
13945 "OLD_NAME": self.op.group_name,
13946 "NEW_NAME": self.op.new_name,
13949 def BuildHooksNodes(self):
13950 """Build hooks nodes.
13953 mn = self.cfg.GetMasterNode()
13955 all_nodes = self.cfg.GetAllNodesInfo()
13956 all_nodes.pop(mn, None)
13959 run_nodes.extend(node.name for node in all_nodes.values()
13960 if node.group == self.group_uuid)
13962 return (run_nodes, run_nodes)
13964 def Exec(self, feedback_fn):
13965 """Rename the node group.
13968 group = self.cfg.GetNodeGroup(self.group_uuid)
13971 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13972 (self.op.group_name, self.group_uuid))
13974 group.name = self.op.new_name
13975 self.cfg.Update(group, feedback_fn)
13977 return self.op.new_name
13980 class LUGroupEvacuate(LogicalUnit):
13981 HPATH = "group-evacuate"
13982 HTYPE = constants.HTYPE_GROUP
13985 def ExpandNames(self):
13986 # This raises errors.OpPrereqError on its own:
13987 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13989 if self.op.target_groups:
13990 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13991 self.op.target_groups)
13993 self.req_target_uuids = []
13995 if self.group_uuid in self.req_target_uuids:
13996 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
13997 " as a target group (targets are %s)" %
13999 utils.CommaJoin(self.req_target_uuids)),
14000 errors.ECODE_INVAL)
14002 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14004 self.share_locks = _ShareAll()
14005 self.needed_locks = {
14006 locking.LEVEL_INSTANCE: [],
14007 locking.LEVEL_NODEGROUP: [],
14008 locking.LEVEL_NODE: [],
14011 def DeclareLocks(self, level):
14012 if level == locking.LEVEL_INSTANCE:
14013 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14015 # Lock instances optimistically, needs verification once node and group
14016 # locks have been acquired
14017 self.needed_locks[locking.LEVEL_INSTANCE] = \
14018 self.cfg.GetNodeGroupInstances(self.group_uuid)
14020 elif level == locking.LEVEL_NODEGROUP:
14021 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14023 if self.req_target_uuids:
14024 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14026 # Lock all groups used by instances optimistically; this requires going
14027 # via the node before it's locked, requiring verification later on
14028 lock_groups.update(group_uuid
14029 for instance_name in
14030 self.owned_locks(locking.LEVEL_INSTANCE)
14032 self.cfg.GetInstanceNodeGroups(instance_name))
14034 # No target groups, need to lock all of them
14035 lock_groups = locking.ALL_SET
14037 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14039 elif level == locking.LEVEL_NODE:
14040 # This will only lock the nodes in the group to be evacuated which
14041 # contain actual instances
14042 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14043 self._LockInstancesNodes()
14045 # Lock all nodes in group to be evacuated and target groups
14046 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14047 assert self.group_uuid in owned_groups
14048 member_nodes = [node_name
14049 for group in owned_groups
14050 for node_name in self.cfg.GetNodeGroup(group).members]
14051 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14053 def CheckPrereq(self):
14054 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14055 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14056 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14058 assert owned_groups.issuperset(self.req_target_uuids)
14059 assert self.group_uuid in owned_groups
14061 # Check if locked instances are still correct
14062 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14064 # Get instance information
14065 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14067 # Check if node groups for locked instances are still correct
14068 for instance_name in owned_instances:
14069 inst = self.instances[instance_name]
14070 assert owned_nodes.issuperset(inst.all_nodes), \
14071 "Instance %s's nodes changed while we kept the lock" % instance_name
14073 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
14076 assert self.group_uuid in inst_groups, \
14077 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
14079 if self.req_target_uuids:
14080 # User requested specific target groups
14081 self.target_uuids = self.req_target_uuids
14083 # All groups except the one to be evacuated are potential targets
14084 self.target_uuids = [group_uuid for group_uuid in owned_groups
14085 if group_uuid != self.group_uuid]
14087 if not self.target_uuids:
14088 raise errors.OpPrereqError("There are no possible target groups",
14089 errors.ECODE_INVAL)
14091 def BuildHooksEnv(self):
14092 """Build hooks env.
14096 "GROUP_NAME": self.op.group_name,
14097 "TARGET_GROUPS": " ".join(self.target_uuids),
14100 def BuildHooksNodes(self):
14101 """Build hooks nodes.
14104 mn = self.cfg.GetMasterNode()
14106 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14108 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14110 return (run_nodes, run_nodes)
14112 def Exec(self, feedback_fn):
14113 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14115 assert self.group_uuid not in self.target_uuids
14117 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14118 instances=instances, target_groups=self.target_uuids)
14120 ial.Run(self.op.iallocator)
14122 if not ial.success:
14123 raise errors.OpPrereqError("Can't compute group evacuation using"
14124 " iallocator '%s': %s" %
14125 (self.op.iallocator, ial.info),
14126 errors.ECODE_NORES)
14128 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14130 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14131 len(jobs), self.op.group_name)
14133 return ResultWithJobs(jobs)
14136 class TagsLU(NoHooksLU): # pylint: disable=W0223
14137 """Generic tags LU.
14139 This is an abstract class which is the parent of all the other tags LUs.
14142 def ExpandNames(self):
14143 self.group_uuid = None
14144 self.needed_locks = {}
14145 if self.op.kind == constants.TAG_NODE:
14146 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14147 self.needed_locks[locking.LEVEL_NODE] = self.op.name
14148 elif self.op.kind == constants.TAG_INSTANCE:
14149 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14150 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
14151 elif self.op.kind == constants.TAG_NODEGROUP:
14152 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14154 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14155 # not possible to acquire the BGL based on opcode parameters)
14157 def CheckPrereq(self):
14158 """Check prerequisites.
14161 if self.op.kind == constants.TAG_CLUSTER:
14162 self.target = self.cfg.GetClusterInfo()
14163 elif self.op.kind == constants.TAG_NODE:
14164 self.target = self.cfg.GetNodeInfo(self.op.name)
14165 elif self.op.kind == constants.TAG_INSTANCE:
14166 self.target = self.cfg.GetInstanceInfo(self.op.name)
14167 elif self.op.kind == constants.TAG_NODEGROUP:
14168 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14170 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14171 str(self.op.kind), errors.ECODE_INVAL)
14174 class LUTagsGet(TagsLU):
14175 """Returns the tags of a given object.
14180 def ExpandNames(self):
14181 TagsLU.ExpandNames(self)
14183 # Share locks as this is only a read operation
14184 self.share_locks = _ShareAll()
14186 def Exec(self, feedback_fn):
14187 """Returns the tag list.
14190 return list(self.target.GetTags())
14193 class LUTagsSearch(NoHooksLU):
14194 """Searches the tags for a given pattern.
14199 def ExpandNames(self):
14200 self.needed_locks = {}
14202 def CheckPrereq(self):
14203 """Check prerequisites.
14205 This checks the pattern passed for validity by compiling it.
14209 self.re = re.compile(self.op.pattern)
14210 except re.error, err:
14211 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14212 (self.op.pattern, err), errors.ECODE_INVAL)
14214 def Exec(self, feedback_fn):
14215 """Returns the tag list.
14219 tgts = [("/cluster", cfg.GetClusterInfo())]
14220 ilist = cfg.GetAllInstancesInfo().values()
14221 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14222 nlist = cfg.GetAllNodesInfo().values()
14223 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14224 tgts.extend(("/nodegroup/%s" % n.name, n)
14225 for n in cfg.GetAllNodeGroupsInfo().values())
14227 for path, target in tgts:
14228 for tag in target.GetTags():
14229 if self.re.search(tag):
14230 results.append((path, tag))
14234 class LUTagsSet(TagsLU):
14235 """Sets a tag on a given object.
14240 def CheckPrereq(self):
14241 """Check prerequisites.
14243 This checks the type and length of the tag name and value.
14246 TagsLU.CheckPrereq(self)
14247 for tag in self.op.tags:
14248 objects.TaggableObject.ValidateTag(tag)
14250 def Exec(self, feedback_fn):
14255 for tag in self.op.tags:
14256 self.target.AddTag(tag)
14257 except errors.TagError, err:
14258 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14259 self.cfg.Update(self.target, feedback_fn)
14262 class LUTagsDel(TagsLU):
14263 """Delete a list of tags from a given object.
14268 def CheckPrereq(self):
14269 """Check prerequisites.
14271 This checks that we have the given tag.
14274 TagsLU.CheckPrereq(self)
14275 for tag in self.op.tags:
14276 objects.TaggableObject.ValidateTag(tag)
14277 del_tags = frozenset(self.op.tags)
14278 cur_tags = self.target.GetTags()
14280 diff_tags = del_tags - cur_tags
14282 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14283 raise errors.OpPrereqError("Tag(s) %s not found" %
14284 (utils.CommaJoin(diff_names), ),
14285 errors.ECODE_NOENT)
14287 def Exec(self, feedback_fn):
14288 """Remove the tag from the object.
14291 for tag in self.op.tags:
14292 self.target.RemoveTag(tag)
14293 self.cfg.Update(self.target, feedback_fn)
14296 class LUTestDelay(NoHooksLU):
14297 """Sleep for a specified amount of time.
14299 This LU sleeps on the master and/or nodes for a specified amount of
14305 def ExpandNames(self):
14306 """Expand names and set required locks.
14308 This expands the node list, if any.
14311 self.needed_locks = {}
14312 if self.op.on_nodes:
14313 # _GetWantedNodes can be used here, but is not always appropriate to use
14314 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14315 # more information.
14316 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14317 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14319 def _TestDelay(self):
14320 """Do the actual sleep.
14323 if self.op.on_master:
14324 if not utils.TestDelay(self.op.duration):
14325 raise errors.OpExecError("Error during master delay test")
14326 if self.op.on_nodes:
14327 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14328 for node, node_result in result.items():
14329 node_result.Raise("Failure during rpc call to node %s" % node)
14331 def Exec(self, feedback_fn):
14332 """Execute the test delay opcode, with the wanted repetitions.
14335 if self.op.repeat == 0:
14338 top_value = self.op.repeat - 1
14339 for i in range(self.op.repeat):
14340 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14344 class LUTestJqueue(NoHooksLU):
14345 """Utility LU to test some aspects of the job queue.
14350 # Must be lower than default timeout for WaitForJobChange to see whether it
14351 # notices changed jobs
14352 _CLIENT_CONNECT_TIMEOUT = 20.0
14353 _CLIENT_CONFIRM_TIMEOUT = 60.0
14356 def _NotifyUsingSocket(cls, cb, errcls):
14357 """Opens a Unix socket and waits for another program to connect.
14360 @param cb: Callback to send socket name to client
14361 @type errcls: class
14362 @param errcls: Exception class to use for errors
14365 # Using a temporary directory as there's no easy way to create temporary
14366 # sockets without writing a custom loop around tempfile.mktemp and
14368 tmpdir = tempfile.mkdtemp()
14370 tmpsock = utils.PathJoin(tmpdir, "sock")
14372 logging.debug("Creating temporary socket at %s", tmpsock)
14373 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14378 # Send details to client
14381 # Wait for client to connect before continuing
14382 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14384 (conn, _) = sock.accept()
14385 except socket.error, err:
14386 raise errcls("Client didn't connect in time (%s)" % err)
14390 # Remove as soon as client is connected
14391 shutil.rmtree(tmpdir)
14393 # Wait for client to close
14396 # pylint: disable=E1101
14397 # Instance of '_socketobject' has no ... member
14398 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14400 except socket.error, err:
14401 raise errcls("Client failed to confirm notification (%s)" % err)
14405 def _SendNotification(self, test, arg, sockname):
14406 """Sends a notification to the client.
14409 @param test: Test name
14410 @param arg: Test argument (depends on test)
14411 @type sockname: string
14412 @param sockname: Socket path
14415 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14417 def _Notify(self, prereq, test, arg):
14418 """Notifies the client of a test.
14421 @param prereq: Whether this is a prereq-phase test
14423 @param test: Test name
14424 @param arg: Test argument (depends on test)
14428 errcls = errors.OpPrereqError
14430 errcls = errors.OpExecError
14432 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14436 def CheckArguments(self):
14437 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14438 self.expandnames_calls = 0
14440 def ExpandNames(self):
14441 checkargs_calls = getattr(self, "checkargs_calls", 0)
14442 if checkargs_calls < 1:
14443 raise errors.ProgrammerError("CheckArguments was not called")
14445 self.expandnames_calls += 1
14447 if self.op.notify_waitlock:
14448 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14450 self.LogInfo("Expanding names")
14452 # Get lock on master node (just to get a lock, not for a particular reason)
14453 self.needed_locks = {
14454 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14457 def Exec(self, feedback_fn):
14458 if self.expandnames_calls < 1:
14459 raise errors.ProgrammerError("ExpandNames was not called")
14461 if self.op.notify_exec:
14462 self._Notify(False, constants.JQT_EXEC, None)
14464 self.LogInfo("Executing")
14466 if self.op.log_messages:
14467 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14468 for idx, msg in enumerate(self.op.log_messages):
14469 self.LogInfo("Sending log message %s", idx + 1)
14470 feedback_fn(constants.JQT_MSGPREFIX + msg)
14471 # Report how many test messages have been sent
14472 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14475 raise errors.OpExecError("Opcode failure was requested")
14480 class IAllocator(object):
14481 """IAllocator framework.
14483 An IAllocator instance has three sets of attributes:
14484 - cfg that is needed to query the cluster
14485 - input data (all members of the _KEYS class attribute are required)
14486 - four buffer attributes (in|out_data|text), that represent the
14487 input (to the external script) in text and data structure format,
14488 and the output from it, again in two formats
14489 - the result variables from the script (success, info, nodes) for
14493 # pylint: disable=R0902
14494 # lots of instance attributes
14496 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14498 self.rpc = rpc_runner
14499 # init buffer variables
14500 self.in_text = self.out_text = self.in_data = self.out_data = None
14501 # init all input fields so that pylint is happy
14503 self.memory = self.disks = self.disk_template = None
14504 self.os = self.tags = self.nics = self.vcpus = None
14505 self.hypervisor = None
14506 self.relocate_from = None
14508 self.instances = None
14509 self.evac_mode = None
14510 self.target_groups = []
14512 self.required_nodes = None
14513 # init result fields
14514 self.success = self.info = self.result = None
14517 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14519 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14520 " IAllocator" % self.mode)
14522 keyset = [n for (n, _) in keydata]
14525 if key not in keyset:
14526 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14527 " IAllocator" % key)
14528 setattr(self, key, kwargs[key])
14531 if key not in kwargs:
14532 raise errors.ProgrammerError("Missing input parameter '%s' to"
14533 " IAllocator" % key)
14534 self._BuildInputData(compat.partial(fn, self), keydata)
14536 def _ComputeClusterData(self):
14537 """Compute the generic allocator input data.
14539 This is the data that is independent of the actual operation.
14543 cluster_info = cfg.GetClusterInfo()
14546 "version": constants.IALLOCATOR_VERSION,
14547 "cluster_name": cfg.GetClusterName(),
14548 "cluster_tags": list(cluster_info.GetTags()),
14549 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14550 "ipolicy": cluster_info.ipolicy,
14552 ninfo = cfg.GetAllNodesInfo()
14553 iinfo = cfg.GetAllInstancesInfo().values()
14554 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14557 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14559 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14560 hypervisor_name = self.hypervisor
14561 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14562 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14564 hypervisor_name = cluster_info.primary_hypervisor
14566 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14569 self.rpc.call_all_instances_info(node_list,
14570 cluster_info.enabled_hypervisors)
14572 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14574 config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14575 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14576 i_list, config_ndata)
14577 assert len(data["nodes"]) == len(ninfo), \
14578 "Incomplete node data computed"
14580 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14582 self.in_data = data
14585 def _ComputeNodeGroupData(cfg):
14586 """Compute node groups data.
14589 cluster = cfg.GetClusterInfo()
14590 ng = dict((guuid, {
14591 "name": gdata.name,
14592 "alloc_policy": gdata.alloc_policy,
14593 "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14595 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14600 def _ComputeBasicNodeData(cfg, node_cfg):
14601 """Compute global node data.
14604 @returns: a dict of name: (node dict, node config)
14607 # fill in static (config-based) values
14608 node_results = dict((ninfo.name, {
14609 "tags": list(ninfo.GetTags()),
14610 "primary_ip": ninfo.primary_ip,
14611 "secondary_ip": ninfo.secondary_ip,
14612 "offline": ninfo.offline,
14613 "drained": ninfo.drained,
14614 "master_candidate": ninfo.master_candidate,
14615 "group": ninfo.group,
14616 "master_capable": ninfo.master_capable,
14617 "vm_capable": ninfo.vm_capable,
14618 "ndparams": cfg.GetNdParams(ninfo),
14620 for ninfo in node_cfg.values())
14622 return node_results
14625 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14627 """Compute global node data.
14629 @param node_results: the basic node structures as filled from the config
14632 #TODO(dynmem): compute the right data on MAX and MIN memory
14633 # make a copy of the current dict
14634 node_results = dict(node_results)
14635 for nname, nresult in node_data.items():
14636 assert nname in node_results, "Missing basic data for node %s" % nname
14637 ninfo = node_cfg[nname]
14639 if not (ninfo.offline or ninfo.drained):
14640 nresult.Raise("Can't get data for node %s" % nname)
14641 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14643 remote_info = _MakeLegacyNodeInfo(nresult.payload)
14645 for attr in ["memory_total", "memory_free", "memory_dom0",
14646 "vg_size", "vg_free", "cpu_total"]:
14647 if attr not in remote_info:
14648 raise errors.OpExecError("Node '%s' didn't return attribute"
14649 " '%s'" % (nname, attr))
14650 if not isinstance(remote_info[attr], int):
14651 raise errors.OpExecError("Node '%s' returned invalid value"
14653 (nname, attr, remote_info[attr]))
14654 # compute memory used by primary instances
14655 i_p_mem = i_p_up_mem = 0
14656 for iinfo, beinfo in i_list:
14657 if iinfo.primary_node == nname:
14658 i_p_mem += beinfo[constants.BE_MAXMEM]
14659 if iinfo.name not in node_iinfo[nname].payload:
14662 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14663 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14664 remote_info["memory_free"] -= max(0, i_mem_diff)
14666 if iinfo.admin_state == constants.ADMINST_UP:
14667 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14669 # compute memory used by instances
14671 "total_memory": remote_info["memory_total"],
14672 "reserved_memory": remote_info["memory_dom0"],
14673 "free_memory": remote_info["memory_free"],
14674 "total_disk": remote_info["vg_size"],
14675 "free_disk": remote_info["vg_free"],
14676 "total_cpus": remote_info["cpu_total"],
14677 "i_pri_memory": i_p_mem,
14678 "i_pri_up_memory": i_p_up_mem,
14680 pnr_dyn.update(node_results[nname])
14681 node_results[nname] = pnr_dyn
14683 return node_results
14686 def _ComputeInstanceData(cluster_info, i_list):
14687 """Compute global instance data.
14691 for iinfo, beinfo in i_list:
14693 for nic in iinfo.nics:
14694 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14698 "mode": filled_params[constants.NIC_MODE],
14699 "link": filled_params[constants.NIC_LINK],
14701 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14702 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14703 nic_data.append(nic_dict)
14705 "tags": list(iinfo.GetTags()),
14706 "admin_state": iinfo.admin_state,
14707 "vcpus": beinfo[constants.BE_VCPUS],
14708 "memory": beinfo[constants.BE_MAXMEM],
14710 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14712 "disks": [{constants.IDISK_SIZE: dsk.size,
14713 constants.IDISK_MODE: dsk.mode}
14714 for dsk in iinfo.disks],
14715 "disk_template": iinfo.disk_template,
14716 "hypervisor": iinfo.hypervisor,
14718 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14720 instance_data[iinfo.name] = pir
14722 return instance_data
14724 def _AddNewInstance(self):
14725 """Add new instance data to allocator structure.
14727 This in combination with _AllocatorGetClusterData will create the
14728 correct structure needed as input for the allocator.
14730 The checks for the completeness of the opcode must have already been
14734 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14736 if self.disk_template in constants.DTS_INT_MIRROR:
14737 self.required_nodes = 2
14739 self.required_nodes = 1
14743 "disk_template": self.disk_template,
14746 "vcpus": self.vcpus,
14747 "memory": self.memory,
14748 "disks": self.disks,
14749 "disk_space_total": disk_space,
14751 "required_nodes": self.required_nodes,
14752 "hypervisor": self.hypervisor,
14757 def _AddRelocateInstance(self):
14758 """Add relocate instance data to allocator structure.
14760 This in combination with _IAllocatorGetClusterData will create the
14761 correct structure needed as input for the allocator.
14763 The checks for the completeness of the opcode must have already been
14767 instance = self.cfg.GetInstanceInfo(self.name)
14768 if instance is None:
14769 raise errors.ProgrammerError("Unknown instance '%s' passed to"
14770 " IAllocator" % self.name)
14772 if instance.disk_template not in constants.DTS_MIRRORED:
14773 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14774 errors.ECODE_INVAL)
14776 if instance.disk_template in constants.DTS_INT_MIRROR and \
14777 len(instance.secondary_nodes) != 1:
14778 raise errors.OpPrereqError("Instance has not exactly one secondary node",
14779 errors.ECODE_STATE)
14781 self.required_nodes = 1
14782 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14783 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14787 "disk_space_total": disk_space,
14788 "required_nodes": self.required_nodes,
14789 "relocate_from": self.relocate_from,
14793 def _AddNodeEvacuate(self):
14794 """Get data for node-evacuate requests.
14798 "instances": self.instances,
14799 "evac_mode": self.evac_mode,
14802 def _AddChangeGroup(self):
14803 """Get data for node-evacuate requests.
14807 "instances": self.instances,
14808 "target_groups": self.target_groups,
14811 def _BuildInputData(self, fn, keydata):
14812 """Build input data structures.
14815 self._ComputeClusterData()
14818 request["type"] = self.mode
14819 for keyname, keytype in keydata:
14820 if keyname not in request:
14821 raise errors.ProgrammerError("Request parameter %s is missing" %
14823 val = request[keyname]
14824 if not keytype(val):
14825 raise errors.ProgrammerError("Request parameter %s doesn't pass"
14826 " validation, value %s, expected"
14827 " type %s" % (keyname, val, keytype))
14828 self.in_data["request"] = request
14830 self.in_text = serializer.Dump(self.in_data)
14832 _STRING_LIST = ht.TListOf(ht.TString)
14833 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14834 # pylint: disable=E1101
14835 # Class '...' has no 'OP_ID' member
14836 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14837 opcodes.OpInstanceMigrate.OP_ID,
14838 opcodes.OpInstanceReplaceDisks.OP_ID])
14842 ht.TListOf(ht.TAnd(ht.TIsLength(3),
14843 ht.TItems([ht.TNonEmptyString,
14844 ht.TNonEmptyString,
14845 ht.TListOf(ht.TNonEmptyString),
14848 ht.TListOf(ht.TAnd(ht.TIsLength(2),
14849 ht.TItems([ht.TNonEmptyString,
14852 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14853 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14856 constants.IALLOCATOR_MODE_ALLOC:
14859 ("name", ht.TString),
14860 ("memory", ht.TInt),
14861 ("disks", ht.TListOf(ht.TDict)),
14862 ("disk_template", ht.TString),
14863 ("os", ht.TString),
14864 ("tags", _STRING_LIST),
14865 ("nics", ht.TListOf(ht.TDict)),
14866 ("vcpus", ht.TInt),
14867 ("hypervisor", ht.TString),
14869 constants.IALLOCATOR_MODE_RELOC:
14870 (_AddRelocateInstance,
14871 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14873 constants.IALLOCATOR_MODE_NODE_EVAC:
14874 (_AddNodeEvacuate, [
14875 ("instances", _STRING_LIST),
14876 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14878 constants.IALLOCATOR_MODE_CHG_GROUP:
14879 (_AddChangeGroup, [
14880 ("instances", _STRING_LIST),
14881 ("target_groups", _STRING_LIST),
14885 def Run(self, name, validate=True, call_fn=None):
14886 """Run an instance allocator and return the results.
14889 if call_fn is None:
14890 call_fn = self.rpc.call_iallocator_runner
14892 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14893 result.Raise("Failure while running the iallocator script")
14895 self.out_text = result.payload
14897 self._ValidateResult()
14899 def _ValidateResult(self):
14900 """Process the allocator results.
14902 This will process and if successful save the result in
14903 self.out_data and the other parameters.
14907 rdict = serializer.Load(self.out_text)
14908 except Exception, err:
14909 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14911 if not isinstance(rdict, dict):
14912 raise errors.OpExecError("Can't parse iallocator results: not a dict")
14914 # TODO: remove backwards compatiblity in later versions
14915 if "nodes" in rdict and "result" not in rdict:
14916 rdict["result"] = rdict["nodes"]
14919 for key in "success", "info", "result":
14920 if key not in rdict:
14921 raise errors.OpExecError("Can't parse iallocator results:"
14922 " missing key '%s'" % key)
14923 setattr(self, key, rdict[key])
14925 if not self._result_check(self.result):
14926 raise errors.OpExecError("Iallocator returned invalid result,"
14927 " expected %s, got %s" %
14928 (self._result_check, self.result),
14929 errors.ECODE_INVAL)
14931 if self.mode == constants.IALLOCATOR_MODE_RELOC:
14932 assert self.relocate_from is not None
14933 assert self.required_nodes == 1
14935 node2group = dict((name, ndata["group"])
14936 for (name, ndata) in self.in_data["nodes"].items())
14938 fn = compat.partial(self._NodesToGroups, node2group,
14939 self.in_data["nodegroups"])
14941 instance = self.cfg.GetInstanceInfo(self.name)
14942 request_groups = fn(self.relocate_from + [instance.primary_node])
14943 result_groups = fn(rdict["result"] + [instance.primary_node])
14945 if self.success and not set(result_groups).issubset(request_groups):
14946 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14947 " differ from original groups (%s)" %
14948 (utils.CommaJoin(result_groups),
14949 utils.CommaJoin(request_groups)))
14951 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14952 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14954 self.out_data = rdict
14957 def _NodesToGroups(node2group, groups, nodes):
14958 """Returns a list of unique group names for a list of nodes.
14960 @type node2group: dict
14961 @param node2group: Map from node name to group UUID
14963 @param groups: Group information
14965 @param nodes: Node names
14972 group_uuid = node2group[node]
14974 # Ignore unknown node
14978 group = groups[group_uuid]
14980 # Can't find group, let's use UUID
14981 group_name = group_uuid
14983 group_name = group["name"]
14985 result.add(group_name)
14987 return sorted(result)
14990 class LUTestAllocator(NoHooksLU):
14991 """Run allocator tests.
14993 This LU runs the allocator tests
14996 def CheckPrereq(self):
14997 """Check prerequisites.
14999 This checks the opcode parameters depending on the director and mode test.
15002 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15003 for attr in ["memory", "disks", "disk_template",
15004 "os", "tags", "nics", "vcpus"]:
15005 if not hasattr(self.op, attr):
15006 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15007 attr, errors.ECODE_INVAL)
15008 iname = self.cfg.ExpandInstanceName(self.op.name)
15009 if iname is not None:
15010 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15011 iname, errors.ECODE_EXISTS)
15012 if not isinstance(self.op.nics, list):
15013 raise errors.OpPrereqError("Invalid parameter 'nics'",
15014 errors.ECODE_INVAL)
15015 if not isinstance(self.op.disks, list):
15016 raise errors.OpPrereqError("Invalid parameter 'disks'",
15017 errors.ECODE_INVAL)
15018 for row in self.op.disks:
15019 if (not isinstance(row, dict) or
15020 constants.IDISK_SIZE not in row or
15021 not isinstance(row[constants.IDISK_SIZE], int) or
15022 constants.IDISK_MODE not in row or
15023 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15024 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15025 " parameter", errors.ECODE_INVAL)
15026 if self.op.hypervisor is None:
15027 self.op.hypervisor = self.cfg.GetHypervisorType()
15028 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15029 fname = _ExpandInstanceName(self.cfg, self.op.name)
15030 self.op.name = fname
15031 self.relocate_from = \
15032 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15033 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15034 constants.IALLOCATOR_MODE_NODE_EVAC):
15035 if not self.op.instances:
15036 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15037 self.op.instances = _GetWantedInstances(self, self.op.instances)
15039 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15040 self.op.mode, errors.ECODE_INVAL)
15042 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15043 if self.op.allocator is None:
15044 raise errors.OpPrereqError("Missing allocator name",
15045 errors.ECODE_INVAL)
15046 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15047 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15048 self.op.direction, errors.ECODE_INVAL)
15050 def Exec(self, feedback_fn):
15051 """Run the allocator test.
15054 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15055 ial = IAllocator(self.cfg, self.rpc,
15058 memory=self.op.memory,
15059 disks=self.op.disks,
15060 disk_template=self.op.disk_template,
15064 vcpus=self.op.vcpus,
15065 hypervisor=self.op.hypervisor,
15067 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15068 ial = IAllocator(self.cfg, self.rpc,
15071 relocate_from=list(self.relocate_from),
15073 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15074 ial = IAllocator(self.cfg, self.rpc,
15076 instances=self.op.instances,
15077 target_groups=self.op.target_groups)
15078 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15079 ial = IAllocator(self.cfg, self.rpc,
15081 instances=self.op.instances,
15082 evac_mode=self.op.evac_mode)
15084 raise errors.ProgrammerError("Uncatched mode %s in"
15085 " LUTestAllocator.Exec", self.op.mode)
15087 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15088 result = ial.in_text
15090 ial.Run(self.op.allocator, validate=False)
15091 result = ial.out_text
15095 #: Query type implementations
15097 constants.QR_INSTANCE: _InstanceQuery,
15098 constants.QR_NODE: _NodeQuery,
15099 constants.QR_GROUP: _GroupQuery,
15100 constants.QR_OS: _OsQuery,
15103 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15106 def _GetQueryImplementation(name):
15107 """Returns the implemtnation for a query type.
15109 @param name: Query type, must be one of L{constants.QR_VIA_OP}
15113 return _QUERY_IMPL[name]
15115 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15116 errors.ECODE_INVAL)