4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
62 from ganeti import rpc
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
141 # Dicts used to declare locking needs to mcpu
142 self.needed_locks = None
143 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.remove_locks = {}
146 # Used to force good behavior when calling helper functions
147 self.recalculate_locks = {}
149 self.Log = processor.Log # pylint: disable=C0103
150 self.LogWarning = processor.LogWarning # pylint: disable=C0103
151 self.LogInfo = processor.LogInfo # pylint: disable=C0103
152 self.LogStep = processor.LogStep # pylint: disable=C0103
153 # support for dry-run
154 self.dry_run_result = None
155 # support for generic debug attribute
156 if (not hasattr(self.op, "debug_level") or
157 not isinstance(self.op.debug_level, int)):
158 self.op.debug_level = 0
163 # Validate opcode parameters and set defaults
164 self.op.Validate(True)
166 self.CheckArguments()
168 def CheckArguments(self):
169 """Check syntactic validity for the opcode arguments.
171 This method is for doing a simple syntactic check and ensure
172 validity of opcode parameters, without any cluster-related
173 checks. While the same can be accomplished in ExpandNames and/or
174 CheckPrereq, doing these separate is better because:
176 - ExpandNames is left as as purely a lock-related function
177 - CheckPrereq is run after we have acquired locks (and possible
180 The function is allowed to change the self.op attribute so that
181 later methods can no longer worry about missing parameters.
186 def ExpandNames(self):
187 """Expand names for this LU.
189 This method is called before starting to execute the opcode, and it should
190 update all the parameters of the opcode to their canonical form (e.g. a
191 short node name must be fully expanded after this method has successfully
192 completed). This way locking, hooks, logging, etc. can work correctly.
194 LUs which implement this method must also populate the self.needed_locks
195 member, as a dict with lock levels as keys, and a list of needed lock names
198 - use an empty dict if you don't need any lock
199 - if you don't need any lock at a particular level omit that level
200 - don't put anything for the BGL level
201 - if you want all locks at a level use locking.ALL_SET as a value
203 If you need to share locks (rather than acquire them exclusively) at one
204 level you can modify self.share_locks, setting a true value (usually 1) for
205 that level. By default locks are not shared.
207 This function can also define a list of tasklets, which then will be
208 executed in order instead of the usual LU-level CheckPrereq and Exec
209 functions, if those are not defined by the LU.
213 # Acquire all nodes and one instance
214 self.needed_locks = {
215 locking.LEVEL_NODE: locking.ALL_SET,
216 locking.LEVEL_INSTANCE: ['instance1.example.com'],
218 # Acquire just two nodes
219 self.needed_locks = {
220 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
223 self.needed_locks = {} # No, you can't leave it to the default value None
226 # The implementation of this method is mandatory only if the new LU is
227 # concurrent, so that old LUs don't need to be changed all at the same
230 self.needed_locks = {} # Exclusive LUs don't need locks.
232 raise NotImplementedError
234 def DeclareLocks(self, level):
235 """Declare LU locking needs for a level
237 While most LUs can just declare their locking needs at ExpandNames time,
238 sometimes there's the need to calculate some locks after having acquired
239 the ones before. This function is called just before acquiring locks at a
240 particular level, but after acquiring the ones at lower levels, and permits
241 such calculations. It can be used to modify self.needed_locks, and by
242 default it does nothing.
244 This function is only called if you have something already set in
245 self.needed_locks for the level.
247 @param level: Locking level which is going to be locked
248 @type level: member of ganeti.locking.LEVELS
252 def CheckPrereq(self):
253 """Check prerequisites for this LU.
255 This method should check that the prerequisites for the execution
256 of this LU are fulfilled. It can do internode communication, but
257 it should be idempotent - no cluster or system changes are
260 The method should raise errors.OpPrereqError in case something is
261 not fulfilled. Its return value is ignored.
263 This method should also update all the parameters of the opcode to
264 their canonical form if it hasn't been done by ExpandNames before.
267 if self.tasklets is not None:
268 for (idx, tl) in enumerate(self.tasklets):
269 logging.debug("Checking prerequisites for tasklet %s/%s",
270 idx + 1, len(self.tasklets))
275 def Exec(self, feedback_fn):
278 This method should implement the actual work. It should raise
279 errors.OpExecError for failures that are somewhat dealt with in
283 if self.tasklets is not None:
284 for (idx, tl) in enumerate(self.tasklets):
285 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
288 raise NotImplementedError
290 def BuildHooksEnv(self):
291 """Build hooks environment for this LU.
294 @return: Dictionary containing the environment that will be used for
295 running the hooks for this LU. The keys of the dict must not be prefixed
296 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
297 will extend the environment with additional variables. If no environment
298 should be defined, an empty dictionary should be returned (not C{None}).
299 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
303 raise NotImplementedError
305 def BuildHooksNodes(self):
306 """Build list of nodes to run LU's hooks.
308 @rtype: tuple; (list, list)
309 @return: Tuple containing a list of node names on which the hook
310 should run before the execution and a list of node names on which the
311 hook should run after the execution. No nodes should be returned as an
312 empty list (and not None).
313 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
317 raise NotImplementedError
319 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
320 """Notify the LU about the results of its hooks.
322 This method is called every time a hooks phase is executed, and notifies
323 the Logical Unit about the hooks' result. The LU can then use it to alter
324 its result based on the hooks. By default the method does nothing and the
325 previous result is passed back unchanged but any LU can define it if it
326 wants to use the local cluster hook-scripts somehow.
328 @param phase: one of L{constants.HOOKS_PHASE_POST} or
329 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
330 @param hook_results: the results of the multi-node hooks rpc call
331 @param feedback_fn: function used send feedback back to the caller
332 @param lu_result: the previous Exec result this LU had, or None
334 @return: the new Exec result, based on the previous result
338 # API must be kept, thus we ignore the unused argument and could
339 # be a function warnings
340 # pylint: disable=W0613,R0201
343 def _ExpandAndLockInstance(self):
344 """Helper function to expand and lock an instance.
346 Many LUs that work on an instance take its name in self.op.instance_name
347 and need to expand it and then declare the expanded name for locking. This
348 function does it, and then updates self.op.instance_name to the expanded
349 name. It also initializes needed_locks as a dict, if this hasn't been done
353 if self.needed_locks is None:
354 self.needed_locks = {}
356 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
357 "_ExpandAndLockInstance called with instance-level locks set"
358 self.op.instance_name = _ExpandInstanceName(self.cfg,
359 self.op.instance_name)
360 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
362 def _LockInstancesNodes(self, primary_only=False,
363 level=locking.LEVEL_NODE):
364 """Helper function to declare instances' nodes for locking.
366 This function should be called after locking one or more instances to lock
367 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
368 with all primary or secondary nodes for instances already locked and
369 present in self.needed_locks[locking.LEVEL_INSTANCE].
371 It should be called from DeclareLocks, and for safety only works if
372 self.recalculate_locks[locking.LEVEL_NODE] is set.
374 In the future it may grow parameters to just lock some instance's nodes, or
375 to just lock primaries or secondary nodes, if needed.
377 If should be called in DeclareLocks in a way similar to::
379 if level == locking.LEVEL_NODE:
380 self._LockInstancesNodes()
382 @type primary_only: boolean
383 @param primary_only: only lock primary nodes of locked instances
384 @param level: Which lock level to use for locking nodes
387 assert level in self.recalculate_locks, \
388 "_LockInstancesNodes helper function called with no nodes to recalculate"
390 # TODO: check if we're really been called with the instance locks held
392 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
393 # future we might want to have different behaviors depending on the value
394 # of self.recalculate_locks[locking.LEVEL_NODE]
396 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
397 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
398 wanted_nodes.append(instance.primary_node)
400 wanted_nodes.extend(instance.secondary_nodes)
402 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
403 self.needed_locks[level] = wanted_nodes
404 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
405 self.needed_locks[level].extend(wanted_nodes)
407 raise errors.ProgrammerError("Unknown recalculation mode")
409 del self.recalculate_locks[level]
412 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
413 """Simple LU which runs no hooks.
415 This LU is intended as a parent for other LogicalUnits which will
416 run no hooks, in order to reduce duplicate code.
422 def BuildHooksEnv(self):
423 """Empty BuildHooksEnv for NoHooksLu.
425 This just raises an error.
428 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
430 def BuildHooksNodes(self):
431 """Empty BuildHooksNodes for NoHooksLU.
434 raise AssertionError("BuildHooksNodes called for NoHooksLU")
438 """Tasklet base class.
440 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
441 they can mix legacy code with tasklets. Locking needs to be done in the LU,
442 tasklets know nothing about locks.
444 Subclasses must follow these rules:
445 - Implement CheckPrereq
449 def __init__(self, lu):
456 def CheckPrereq(self):
457 """Check prerequisites for this tasklets.
459 This method should check whether the prerequisites for the execution of
460 this tasklet are fulfilled. It can do internode communication, but it
461 should be idempotent - no cluster or system changes are allowed.
463 The method should raise errors.OpPrereqError in case something is not
464 fulfilled. Its return value is ignored.
466 This method should also update all parameters to their canonical form if it
467 hasn't been done before.
472 def Exec(self, feedback_fn):
473 """Execute the tasklet.
475 This method should implement the actual work. It should raise
476 errors.OpExecError for failures that are somewhat dealt with in code, or
480 raise NotImplementedError
484 """Base for query utility classes.
487 #: Attribute holding field definitions
490 def __init__(self, qfilter, fields, use_locking):
491 """Initializes this class.
494 self.use_locking = use_locking
496 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
498 self.requested_data = self.query.RequestedData()
499 self.names = self.query.RequestedNames()
501 # Sort only if no names were requested
502 self.sort_by_name = not self.names
504 self.do_locking = None
507 def _GetNames(self, lu, all_names, lock_level):
508 """Helper function to determine names asked for in the query.
512 names = lu.owned_locks(lock_level)
516 if self.wanted == locking.ALL_SET:
517 assert not self.names
518 # caller didn't specify names, so ordering is not important
519 return utils.NiceSort(names)
521 # caller specified names and we must keep the same order
523 assert not self.do_locking or lu.glm.is_owned(lock_level)
525 missing = set(self.wanted).difference(names)
527 raise errors.OpExecError("Some items were removed before retrieving"
528 " their data: %s" % missing)
530 # Return expanded names
533 def ExpandNames(self, lu):
534 """Expand names for this query.
536 See L{LogicalUnit.ExpandNames}.
539 raise NotImplementedError()
541 def DeclareLocks(self, lu, level):
542 """Declare locks for this query.
544 See L{LogicalUnit.DeclareLocks}.
547 raise NotImplementedError()
549 def _GetQueryData(self, lu):
550 """Collects all data for this query.
552 @return: Query data object
555 raise NotImplementedError()
557 def NewStyleQuery(self, lu):
558 """Collect data and execute query.
561 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
562 sort_by_name=self.sort_by_name)
564 def OldStyleQuery(self, lu):
565 """Collect data and execute query.
568 return self.query.OldStyleQuery(self._GetQueryData(lu),
569 sort_by_name=self.sort_by_name)
573 """Returns a dict declaring all lock levels shared.
576 return dict.fromkeys(locking.LEVELS, 1)
579 def _MakeLegacyNodeInfo(data):
580 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
582 Converts the data into a single dictionary. This is fine for most use cases,
583 but some require information from more than one volume group or hypervisor.
586 (bootid, (vg_info, ), (hv_info, )) = data
588 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
593 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
594 """Checks if the owned node groups are still correct for an instance.
596 @type cfg: L{config.ConfigWriter}
597 @param cfg: The cluster configuration
598 @type instance_name: string
599 @param instance_name: Instance name
600 @type owned_groups: set or frozenset
601 @param owned_groups: List of currently owned node groups
604 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
606 if not owned_groups.issuperset(inst_groups):
607 raise errors.OpPrereqError("Instance %s's node groups changed since"
608 " locks were acquired, current groups are"
609 " are '%s', owning groups '%s'; retry the"
612 utils.CommaJoin(inst_groups),
613 utils.CommaJoin(owned_groups)),
619 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
620 """Checks if the instances in a node group are still correct.
622 @type cfg: L{config.ConfigWriter}
623 @param cfg: The cluster configuration
624 @type group_uuid: string
625 @param group_uuid: Node group UUID
626 @type owned_instances: set or frozenset
627 @param owned_instances: List of currently owned instances
630 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
631 if owned_instances != wanted_instances:
632 raise errors.OpPrereqError("Instances in node group '%s' changed since"
633 " locks were acquired, wanted '%s', have '%s';"
634 " retry the operation" %
636 utils.CommaJoin(wanted_instances),
637 utils.CommaJoin(owned_instances)),
640 return wanted_instances
643 def _SupportsOob(cfg, node):
644 """Tells if node supports OOB.
646 @type cfg: L{config.ConfigWriter}
647 @param cfg: The cluster configuration
648 @type node: L{objects.Node}
649 @param node: The node
650 @return: The OOB script if supported or an empty string otherwise
653 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
656 def _GetWantedNodes(lu, nodes):
657 """Returns list of checked and expanded node names.
659 @type lu: L{LogicalUnit}
660 @param lu: the logical unit on whose behalf we execute
662 @param nodes: list of node names or None for all nodes
664 @return: the list of nodes, sorted
665 @raise errors.ProgrammerError: if the nodes parameter is wrong type
669 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
671 return utils.NiceSort(lu.cfg.GetNodeList())
674 def _GetWantedInstances(lu, instances):
675 """Returns list of checked and expanded instance names.
677 @type lu: L{LogicalUnit}
678 @param lu: the logical unit on whose behalf we execute
679 @type instances: list
680 @param instances: list of instance names or None for all instances
682 @return: the list of instances, sorted
683 @raise errors.OpPrereqError: if the instances parameter is wrong type
684 @raise errors.OpPrereqError: if any of the passed instances is not found
688 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
690 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
694 def _GetUpdatedParams(old_params, update_dict,
695 use_default=True, use_none=False):
696 """Return the new version of a parameter dictionary.
698 @type old_params: dict
699 @param old_params: old parameters
700 @type update_dict: dict
701 @param update_dict: dict containing new parameter values, or
702 constants.VALUE_DEFAULT to reset the parameter to its default
704 @param use_default: boolean
705 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
706 values as 'to be deleted' values
707 @param use_none: boolean
708 @type use_none: whether to recognise C{None} values as 'to be
711 @return: the new parameter dictionary
714 params_copy = copy.deepcopy(old_params)
715 for key, val in update_dict.iteritems():
716 if ((use_default and val == constants.VALUE_DEFAULT) or
717 (use_none and val is None)):
723 params_copy[key] = val
727 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
728 """Return the new version of a instance policy.
730 @param group_policy: whether this policy applies to a group and thus
731 we should support removal of policy entries
734 use_none = use_default = group_policy
735 ipolicy = copy.deepcopy(old_ipolicy)
736 for key, value in new_ipolicy.items():
737 if key not in constants.IPOLICY_ALL_KEYS:
738 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
740 if key in constants.IPOLICY_ISPECS:
741 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
742 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
744 use_default=use_default)
746 if not value or value == [constants.VALUE_DEFAULT]:
750 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
751 " on the cluster'" % key,
754 if key in constants.IPOLICY_PARAMETERS:
755 # FIXME: we assume all such values are float
757 ipolicy[key] = float(value)
758 except (TypeError, ValueError), err:
759 raise errors.OpPrereqError("Invalid value for attribute"
760 " '%s': '%s', error: %s" %
761 (key, value, err), errors.ECODE_INVAL)
763 # FIXME: we assume all others are lists; this should be redone
765 ipolicy[key] = list(value)
767 objects.InstancePolicy.CheckParameterSyntax(ipolicy)
768 except errors.ConfigurationError, err:
769 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
774 def _UpdateAndVerifySubDict(base, updates, type_check):
775 """Updates and verifies a dict with sub dicts of the same type.
777 @param base: The dict with the old data
778 @param updates: The dict with the new data
779 @param type_check: Dict suitable to ForceDictType to verify correct types
780 @returns: A new dict with updated and verified values
784 new = _GetUpdatedParams(old, value)
785 utils.ForceDictType(new, type_check)
788 ret = copy.deepcopy(base)
789 ret.update(dict((key, fn(base.get(key, {}), value))
790 for key, value in updates.items()))
794 def _MergeAndVerifyHvState(op_input, obj_input):
795 """Combines the hv state from an opcode with the one of the object
797 @param op_input: The input dict from the opcode
798 @param obj_input: The input dict from the objects
799 @return: The verified and updated dict
803 invalid_hvs = set(op_input) - constants.HYPER_TYPES
805 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
806 " %s" % utils.CommaJoin(invalid_hvs),
808 if obj_input is None:
810 type_check = constants.HVSTS_PARAMETER_TYPES
811 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
816 def _MergeAndVerifyDiskState(op_input, obj_input):
817 """Combines the disk state from an opcode with the one of the object
819 @param op_input: The input dict from the opcode
820 @param obj_input: The input dict from the objects
821 @return: The verified and updated dict
824 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
826 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
827 utils.CommaJoin(invalid_dst),
829 type_check = constants.DSS_PARAMETER_TYPES
830 if obj_input is None:
832 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
834 for key, value in op_input.items())
839 def _ReleaseLocks(lu, level, names=None, keep=None):
840 """Releases locks owned by an LU.
842 @type lu: L{LogicalUnit}
843 @param level: Lock level
844 @type names: list or None
845 @param names: Names of locks to release
846 @type keep: list or None
847 @param keep: Names of locks to retain
850 assert not (keep is not None and names is not None), \
851 "Only one of the 'names' and the 'keep' parameters can be given"
853 if names is not None:
854 should_release = names.__contains__
856 should_release = lambda name: name not in keep
858 should_release = None
860 owned = lu.owned_locks(level)
862 # Not owning any lock at this level, do nothing
869 # Determine which locks to release
871 if should_release(name):
876 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
878 # Release just some locks
879 lu.glm.release(level, names=release)
881 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
884 lu.glm.release(level)
886 assert not lu.glm.is_owned(level), "No locks should be owned"
889 def _MapInstanceDisksToNodes(instances):
890 """Creates a map from (node, volume) to instance name.
892 @type instances: list of L{objects.Instance}
893 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
896 return dict(((node, vol), inst.name)
897 for inst in instances
898 for (node, vols) in inst.MapLVsByNode().items()
902 def _RunPostHook(lu, node_name):
903 """Runs the post-hook for an opcode on a single node.
906 hm = lu.proc.BuildHooksManager(lu)
908 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
910 # pylint: disable=W0702
911 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
914 def _CheckOutputFields(static, dynamic, selected):
915 """Checks whether all selected fields are valid.
917 @type static: L{utils.FieldSet}
918 @param static: static fields set
919 @type dynamic: L{utils.FieldSet}
920 @param dynamic: dynamic fields set
927 delta = f.NonMatching(selected)
929 raise errors.OpPrereqError("Unknown output fields selected: %s"
930 % ",".join(delta), errors.ECODE_INVAL)
933 def _CheckGlobalHvParams(params):
934 """Validates that given hypervisor params are not global ones.
936 This will ensure that instances don't get customised versions of
940 used_globals = constants.HVC_GLOBALS.intersection(params)
942 msg = ("The following hypervisor parameters are global and cannot"
943 " be customized at instance level, please modify them at"
944 " cluster level: %s" % utils.CommaJoin(used_globals))
945 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
948 def _CheckNodeOnline(lu, node, msg=None):
949 """Ensure that a given node is online.
951 @param lu: the LU on behalf of which we make the check
952 @param node: the node to check
953 @param msg: if passed, should be a message to replace the default one
954 @raise errors.OpPrereqError: if the node is offline
958 msg = "Can't use offline node"
959 if lu.cfg.GetNodeInfo(node).offline:
960 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
963 def _CheckNodeNotDrained(lu, node):
964 """Ensure that a given node is not drained.
966 @param lu: the LU on behalf of which we make the check
967 @param node: the node to check
968 @raise errors.OpPrereqError: if the node is drained
971 if lu.cfg.GetNodeInfo(node).drained:
972 raise errors.OpPrereqError("Can't use drained node %s" % node,
976 def _CheckNodeVmCapable(lu, node):
977 """Ensure that a given node is vm capable.
979 @param lu: the LU on behalf of which we make the check
980 @param node: the node to check
981 @raise errors.OpPrereqError: if the node is not vm capable
984 if not lu.cfg.GetNodeInfo(node).vm_capable:
985 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
989 def _CheckNodeHasOS(lu, node, os_name, force_variant):
990 """Ensure that a node supports a given OS.
992 @param lu: the LU on behalf of which we make the check
993 @param node: the node to check
994 @param os_name: the OS to query about
995 @param force_variant: whether to ignore variant errors
996 @raise errors.OpPrereqError: if the node is not supporting the OS
999 result = lu.rpc.call_os_get(node, os_name)
1000 result.Raise("OS '%s' not in supported OS list for node %s" %
1002 prereq=True, ecode=errors.ECODE_INVAL)
1003 if not force_variant:
1004 _CheckOSVariant(result.payload, os_name)
1007 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1008 """Ensure that a node has the given secondary ip.
1010 @type lu: L{LogicalUnit}
1011 @param lu: the LU on behalf of which we make the check
1013 @param node: the node to check
1014 @type secondary_ip: string
1015 @param secondary_ip: the ip to check
1016 @type prereq: boolean
1017 @param prereq: whether to throw a prerequisite or an execute error
1018 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1019 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1022 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1023 result.Raise("Failure checking secondary ip on node %s" % node,
1024 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1025 if not result.payload:
1026 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1027 " please fix and re-run this command" % secondary_ip)
1029 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1031 raise errors.OpExecError(msg)
1034 def _GetClusterDomainSecret():
1035 """Reads the cluster domain secret.
1038 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1042 def _CheckInstanceState(lu, instance, req_states, msg=None):
1043 """Ensure that an instance is in one of the required states.
1045 @param lu: the LU on behalf of which we make the check
1046 @param instance: the instance to check
1047 @param msg: if passed, should be a message to replace the default one
1048 @raise errors.OpPrereqError: if the instance is not in the required state
1052 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1053 if instance.admin_state not in req_states:
1054 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1055 (instance.name, instance.admin_state, msg),
1058 if constants.ADMINST_UP not in req_states:
1059 pnode = instance.primary_node
1060 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1061 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1062 prereq=True, ecode=errors.ECODE_ENVIRON)
1064 if instance.name in ins_l.payload:
1065 raise errors.OpPrereqError("Instance %s is running, %s" %
1066 (instance.name, msg), errors.ECODE_STATE)
1069 def _ComputeMinMaxSpec(name, ipolicy, value):
1070 """Computes if value is in the desired range.
1072 @param name: name of the parameter for which we perform the check
1073 @param ipolicy: dictionary containing min, max and std values
1074 @param value: actual value that we want to use
1075 @return: None or element not meeting the criteria
1079 if value in [None, constants.VALUE_AUTO]:
1081 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1082 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1083 if value > max_v or min_v > value:
1084 return ("%s value %s is not in range [%s, %s]" %
1085 (name, value, min_v, max_v))
1089 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1090 nic_count, disk_sizes,
1091 _compute_fn=_ComputeMinMaxSpec):
1092 """Verifies ipolicy against provided specs.
1095 @param ipolicy: The ipolicy
1097 @param mem_size: The memory size
1098 @type cpu_count: int
1099 @param cpu_count: Used cpu cores
1100 @type disk_count: int
1101 @param disk_count: Number of disks used
1102 @type nic_count: int
1103 @param nic_count: Number of nics used
1104 @type disk_sizes: list of ints
1105 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1106 @param _compute_fn: The compute function (unittest only)
1107 @return: A list of violations, or an empty list of no violations are found
1110 assert disk_count == len(disk_sizes)
1113 (constants.ISPEC_MEM_SIZE, mem_size),
1114 (constants.ISPEC_CPU_COUNT, cpu_count),
1115 (constants.ISPEC_DISK_COUNT, disk_count),
1116 (constants.ISPEC_NIC_COUNT, nic_count),
1117 ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
1120 (_compute_fn(name, ipolicy, value)
1121 for (name, value) in test_settings))
1124 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1125 _compute_fn=_ComputeIPolicySpecViolation):
1126 """Compute if instance meets the specs of ipolicy.
1129 @param ipolicy: The ipolicy to verify against
1130 @type instance: L{objects.Instance}
1131 @param instance: The instance to verify
1132 @param _compute_fn: The function to verify ipolicy (unittest only)
1133 @see: L{_ComputeIPolicySpecViolation}
1136 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1137 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1138 disk_count = len(instance.disks)
1139 disk_sizes = [disk.size for disk in instance.disks]
1140 nic_count = len(instance.nics)
1142 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1146 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1147 _compute_fn=_ComputeIPolicySpecViolation):
1148 """Compute if instance specs meets the specs of ipolicy.
1151 @param ipolicy: The ipolicy to verify against
1152 @param instance_spec: dict
1153 @param instance_spec: The instance spec to verify
1154 @param _compute_fn: The function to verify ipolicy (unittest only)
1155 @see: L{_ComputeIPolicySpecViolation}
1158 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1159 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1160 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1161 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1162 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1164 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1168 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1170 _compute_fn=_ComputeIPolicyInstanceViolation):
1171 """Compute if instance meets the specs of the new target group.
1173 @param ipolicy: The ipolicy to verify
1174 @param instance: The instance object to verify
1175 @param current_group: The current group of the instance
1176 @param target_group: The new group of the instance
1177 @param _compute_fn: The function to verify ipolicy (unittest only)
1178 @see: L{_ComputeIPolicySpecViolation}
1181 if current_group == target_group:
1184 return _compute_fn(ipolicy, instance)
1187 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1188 _compute_fn=_ComputeIPolicyNodeViolation):
1189 """Checks that the target node is correct in terms of instance policy.
1191 @param ipolicy: The ipolicy to verify
1192 @param instance: The instance object to verify
1193 @param node: The new node to relocate
1194 @param ignore: Ignore violations of the ipolicy
1195 @param _compute_fn: The function to verify ipolicy (unittest only)
1196 @see: L{_ComputeIPolicySpecViolation}
1199 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1200 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1203 msg = ("Instance does not meet target node group's (%s) instance"
1204 " policy: %s") % (node.group, utils.CommaJoin(res))
1208 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1211 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1212 """Computes a set of any instances that would violate the new ipolicy.
1214 @param old_ipolicy: The current (still in-place) ipolicy
1215 @param new_ipolicy: The new (to become) ipolicy
1216 @param instances: List of instances to verify
1217 @return: A list of instances which violates the new ipolicy but did not before
1220 return (_ComputeViolatingInstances(old_ipolicy, instances) -
1221 _ComputeViolatingInstances(new_ipolicy, instances))
1224 def _ExpandItemName(fn, name, kind):
1225 """Expand an item name.
1227 @param fn: the function to use for expansion
1228 @param name: requested item name
1229 @param kind: text description ('Node' or 'Instance')
1230 @return: the resolved (full) name
1231 @raise errors.OpPrereqError: if the item is not found
1234 full_name = fn(name)
1235 if full_name is None:
1236 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1241 def _ExpandNodeName(cfg, name):
1242 """Wrapper over L{_ExpandItemName} for nodes."""
1243 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1246 def _ExpandInstanceName(cfg, name):
1247 """Wrapper over L{_ExpandItemName} for instance."""
1248 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1251 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1252 minmem, maxmem, vcpus, nics, disk_template, disks,
1253 bep, hvp, hypervisor_name, tags):
1254 """Builds instance related env variables for hooks
1256 This builds the hook environment from individual variables.
1259 @param name: the name of the instance
1260 @type primary_node: string
1261 @param primary_node: the name of the instance's primary node
1262 @type secondary_nodes: list
1263 @param secondary_nodes: list of secondary nodes as strings
1264 @type os_type: string
1265 @param os_type: the name of the instance's OS
1266 @type status: string
1267 @param status: the desired status of the instance
1268 @type minmem: string
1269 @param minmem: the minimum memory size of the instance
1270 @type maxmem: string
1271 @param maxmem: the maximum memory size of the instance
1273 @param vcpus: the count of VCPUs the instance has
1275 @param nics: list of tuples (ip, mac, mode, link) representing
1276 the NICs the instance has
1277 @type disk_template: string
1278 @param disk_template: the disk template of the instance
1280 @param disks: the list of (size, mode) pairs
1282 @param bep: the backend parameters for the instance
1284 @param hvp: the hypervisor parameters for the instance
1285 @type hypervisor_name: string
1286 @param hypervisor_name: the hypervisor for the instance
1288 @param tags: list of instance tags as strings
1290 @return: the hook environment for this instance
1295 "INSTANCE_NAME": name,
1296 "INSTANCE_PRIMARY": primary_node,
1297 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1298 "INSTANCE_OS_TYPE": os_type,
1299 "INSTANCE_STATUS": status,
1300 "INSTANCE_MINMEM": minmem,
1301 "INSTANCE_MAXMEM": maxmem,
1302 # TODO(2.7) remove deprecated "memory" value
1303 "INSTANCE_MEMORY": maxmem,
1304 "INSTANCE_VCPUS": vcpus,
1305 "INSTANCE_DISK_TEMPLATE": disk_template,
1306 "INSTANCE_HYPERVISOR": hypervisor_name,
1309 nic_count = len(nics)
1310 for idx, (ip, mac, mode, link) in enumerate(nics):
1313 env["INSTANCE_NIC%d_IP" % idx] = ip
1314 env["INSTANCE_NIC%d_MAC" % idx] = mac
1315 env["INSTANCE_NIC%d_MODE" % idx] = mode
1316 env["INSTANCE_NIC%d_LINK" % idx] = link
1317 if mode == constants.NIC_MODE_BRIDGED:
1318 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1322 env["INSTANCE_NIC_COUNT"] = nic_count
1325 disk_count = len(disks)
1326 for idx, (size, mode) in enumerate(disks):
1327 env["INSTANCE_DISK%d_SIZE" % idx] = size
1328 env["INSTANCE_DISK%d_MODE" % idx] = mode
1332 env["INSTANCE_DISK_COUNT"] = disk_count
1337 env["INSTANCE_TAGS"] = " ".join(tags)
1339 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1340 for key, value in source.items():
1341 env["INSTANCE_%s_%s" % (kind, key)] = value
1346 def _NICListToTuple(lu, nics):
1347 """Build a list of nic information tuples.
1349 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1350 value in LUInstanceQueryData.
1352 @type lu: L{LogicalUnit}
1353 @param lu: the logical unit on whose behalf we execute
1354 @type nics: list of L{objects.NIC}
1355 @param nics: list of nics to convert to hooks tuples
1359 cluster = lu.cfg.GetClusterInfo()
1363 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1364 mode = filled_params[constants.NIC_MODE]
1365 link = filled_params[constants.NIC_LINK]
1366 hooks_nics.append((ip, mac, mode, link))
1370 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1371 """Builds instance related env variables for hooks from an object.
1373 @type lu: L{LogicalUnit}
1374 @param lu: the logical unit on whose behalf we execute
1375 @type instance: L{objects.Instance}
1376 @param instance: the instance for which we should build the
1378 @type override: dict
1379 @param override: dictionary with key/values that will override
1382 @return: the hook environment dictionary
1385 cluster = lu.cfg.GetClusterInfo()
1386 bep = cluster.FillBE(instance)
1387 hvp = cluster.FillHV(instance)
1389 "name": instance.name,
1390 "primary_node": instance.primary_node,
1391 "secondary_nodes": instance.secondary_nodes,
1392 "os_type": instance.os,
1393 "status": instance.admin_state,
1394 "maxmem": bep[constants.BE_MAXMEM],
1395 "minmem": bep[constants.BE_MINMEM],
1396 "vcpus": bep[constants.BE_VCPUS],
1397 "nics": _NICListToTuple(lu, instance.nics),
1398 "disk_template": instance.disk_template,
1399 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1402 "hypervisor_name": instance.hypervisor,
1403 "tags": instance.tags,
1406 args.update(override)
1407 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1410 def _AdjustCandidatePool(lu, exceptions):
1411 """Adjust the candidate pool after node operations.
1414 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1416 lu.LogInfo("Promoted nodes to master candidate role: %s",
1417 utils.CommaJoin(node.name for node in mod_list))
1418 for name in mod_list:
1419 lu.context.ReaddNode(name)
1420 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1422 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1426 def _DecideSelfPromotion(lu, exceptions=None):
1427 """Decide whether I should promote myself as a master candidate.
1430 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1431 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1432 # the new node will increase mc_max with one, so:
1433 mc_should = min(mc_should + 1, cp_size)
1434 return mc_now < mc_should
1437 def _CalculateGroupIPolicy(cluster, group):
1438 """Calculate instance policy for group.
1441 return cluster.SimpleFillIPolicy(group.ipolicy)
1444 def _ComputeViolatingInstances(ipolicy, instances):
1445 """Computes a set of instances who violates given ipolicy.
1447 @param ipolicy: The ipolicy to verify
1448 @type instances: object.Instance
1449 @param instances: List of instances to verify
1450 @return: A frozenset of instance names violating the ipolicy
1453 return frozenset([inst.name for inst in instances
1454 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1457 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1458 """Check that the brigdes needed by a list of nics exist.
1461 cluster = lu.cfg.GetClusterInfo()
1462 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1463 brlist = [params[constants.NIC_LINK] for params in paramslist
1464 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1466 result = lu.rpc.call_bridges_exist(target_node, brlist)
1467 result.Raise("Error checking bridges on destination node '%s'" %
1468 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1471 def _CheckInstanceBridgesExist(lu, instance, node=None):
1472 """Check that the brigdes needed by an instance exist.
1476 node = instance.primary_node
1477 _CheckNicsBridgesExist(lu, instance.nics, node)
1480 def _CheckOSVariant(os_obj, name):
1481 """Check whether an OS name conforms to the os variants specification.
1483 @type os_obj: L{objects.OS}
1484 @param os_obj: OS object to check
1486 @param name: OS name passed by the user, to check for validity
1489 variant = objects.OS.GetVariant(name)
1490 if not os_obj.supported_variants:
1492 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1493 " passed)" % (os_obj.name, variant),
1497 raise errors.OpPrereqError("OS name must include a variant",
1500 if variant not in os_obj.supported_variants:
1501 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1504 def _GetNodeInstancesInner(cfg, fn):
1505 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1508 def _GetNodeInstances(cfg, node_name):
1509 """Returns a list of all primary and secondary instances on a node.
1513 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1516 def _GetNodePrimaryInstances(cfg, node_name):
1517 """Returns primary instances on a node.
1520 return _GetNodeInstancesInner(cfg,
1521 lambda inst: node_name == inst.primary_node)
1524 def _GetNodeSecondaryInstances(cfg, node_name):
1525 """Returns secondary instances on a node.
1528 return _GetNodeInstancesInner(cfg,
1529 lambda inst: node_name in inst.secondary_nodes)
1532 def _GetStorageTypeArgs(cfg, storage_type):
1533 """Returns the arguments for a storage type.
1536 # Special case for file storage
1537 if storage_type == constants.ST_FILE:
1538 # storage.FileStorage wants a list of storage directories
1539 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1544 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1547 for dev in instance.disks:
1548 cfg.SetDiskID(dev, node_name)
1550 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1551 result.Raise("Failed to get disk status from node %s" % node_name,
1552 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1554 for idx, bdev_status in enumerate(result.payload):
1555 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1561 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1562 """Check the sanity of iallocator and node arguments and use the
1563 cluster-wide iallocator if appropriate.
1565 Check that at most one of (iallocator, node) is specified. If none is
1566 specified, then the LU's opcode's iallocator slot is filled with the
1567 cluster-wide default iallocator.
1569 @type iallocator_slot: string
1570 @param iallocator_slot: the name of the opcode iallocator slot
1571 @type node_slot: string
1572 @param node_slot: the name of the opcode target node slot
1575 node = getattr(lu.op, node_slot, None)
1576 iallocator = getattr(lu.op, iallocator_slot, None)
1578 if node is not None and iallocator is not None:
1579 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1581 elif node is None and iallocator is None:
1582 default_iallocator = lu.cfg.GetDefaultIAllocator()
1583 if default_iallocator:
1584 setattr(lu.op, iallocator_slot, default_iallocator)
1586 raise errors.OpPrereqError("No iallocator or node given and no"
1587 " cluster-wide default iallocator found;"
1588 " please specify either an iallocator or a"
1589 " node, or set a cluster-wide default"
1593 def _GetDefaultIAllocator(cfg, iallocator):
1594 """Decides on which iallocator to use.
1596 @type cfg: L{config.ConfigWriter}
1597 @param cfg: Cluster configuration object
1598 @type iallocator: string or None
1599 @param iallocator: Iallocator specified in opcode
1601 @return: Iallocator name
1605 # Use default iallocator
1606 iallocator = cfg.GetDefaultIAllocator()
1609 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1610 " opcode nor as a cluster-wide default",
1616 class LUClusterPostInit(LogicalUnit):
1617 """Logical unit for running hooks after cluster initialization.
1620 HPATH = "cluster-init"
1621 HTYPE = constants.HTYPE_CLUSTER
1623 def BuildHooksEnv(self):
1628 "OP_TARGET": self.cfg.GetClusterName(),
1631 def BuildHooksNodes(self):
1632 """Build hooks nodes.
1635 return ([], [self.cfg.GetMasterNode()])
1637 def Exec(self, feedback_fn):
1644 class LUClusterDestroy(LogicalUnit):
1645 """Logical unit for destroying the cluster.
1648 HPATH = "cluster-destroy"
1649 HTYPE = constants.HTYPE_CLUSTER
1651 def BuildHooksEnv(self):
1656 "OP_TARGET": self.cfg.GetClusterName(),
1659 def BuildHooksNodes(self):
1660 """Build hooks nodes.
1665 def CheckPrereq(self):
1666 """Check prerequisites.
1668 This checks whether the cluster is empty.
1670 Any errors are signaled by raising errors.OpPrereqError.
1673 master = self.cfg.GetMasterNode()
1675 nodelist = self.cfg.GetNodeList()
1676 if len(nodelist) != 1 or nodelist[0] != master:
1677 raise errors.OpPrereqError("There are still %d node(s) in"
1678 " this cluster." % (len(nodelist) - 1),
1680 instancelist = self.cfg.GetInstanceList()
1682 raise errors.OpPrereqError("There are still %d instance(s) in"
1683 " this cluster." % len(instancelist),
1686 def Exec(self, feedback_fn):
1687 """Destroys the cluster.
1690 master_params = self.cfg.GetMasterNetworkParameters()
1692 # Run post hooks on master node before it's removed
1693 _RunPostHook(self, master_params.name)
1695 ems = self.cfg.GetUseExternalMipScript()
1696 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1699 self.LogWarning("Error disabling the master IP address: %s",
1702 return master_params.name
1705 def _VerifyCertificate(filename):
1706 """Verifies a certificate for L{LUClusterVerifyConfig}.
1708 @type filename: string
1709 @param filename: Path to PEM file
1713 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1714 utils.ReadFile(filename))
1715 except Exception, err: # pylint: disable=W0703
1716 return (LUClusterVerifyConfig.ETYPE_ERROR,
1717 "Failed to load X509 certificate %s: %s" % (filename, err))
1720 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1721 constants.SSL_CERT_EXPIRATION_ERROR)
1724 fnamemsg = "While verifying %s: %s" % (filename, msg)
1729 return (None, fnamemsg)
1730 elif errcode == utils.CERT_WARNING:
1731 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1732 elif errcode == utils.CERT_ERROR:
1733 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1735 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1738 def _GetAllHypervisorParameters(cluster, instances):
1739 """Compute the set of all hypervisor parameters.
1741 @type cluster: L{objects.Cluster}
1742 @param cluster: the cluster object
1743 @param instances: list of L{objects.Instance}
1744 @param instances: additional instances from which to obtain parameters
1745 @rtype: list of (origin, hypervisor, parameters)
1746 @return: a list with all parameters found, indicating the hypervisor they
1747 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1752 for hv_name in cluster.enabled_hypervisors:
1753 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1755 for os_name, os_hvp in cluster.os_hvp.items():
1756 for hv_name, hv_params in os_hvp.items():
1758 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1759 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1761 # TODO: collapse identical parameter values in a single one
1762 for instance in instances:
1763 if instance.hvparams:
1764 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1765 cluster.FillHV(instance)))
1770 class _VerifyErrors(object):
1771 """Mix-in for cluster/group verify LUs.
1773 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1774 self.op and self._feedback_fn to be available.)
1778 ETYPE_FIELD = "code"
1779 ETYPE_ERROR = "ERROR"
1780 ETYPE_WARNING = "WARNING"
1782 def _Error(self, ecode, item, msg, *args, **kwargs):
1783 """Format an error message.
1785 Based on the opcode's error_codes parameter, either format a
1786 parseable error code, or a simpler error string.
1788 This must be called only from Exec and functions called from Exec.
1791 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1792 itype, etxt, _ = ecode
1793 # first complete the msg
1796 # then format the whole message
1797 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1798 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1804 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1805 # and finally report it via the feedback_fn
1806 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1808 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1809 """Log an error message if the passed condition is True.
1813 or self.op.debug_simulate_errors) # pylint: disable=E1101
1815 # If the error code is in the list of ignored errors, demote the error to a
1817 (_, etxt, _) = ecode
1818 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1819 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1822 self._Error(ecode, *args, **kwargs)
1824 # do not mark the operation as failed for WARN cases only
1825 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1826 self.bad = self.bad or cond
1829 class LUClusterVerify(NoHooksLU):
1830 """Submits all jobs necessary to verify the cluster.
1835 def ExpandNames(self):
1836 self.needed_locks = {}
1838 def Exec(self, feedback_fn):
1841 if self.op.group_name:
1842 groups = [self.op.group_name]
1843 depends_fn = lambda: None
1845 groups = self.cfg.GetNodeGroupList()
1847 # Verify global configuration
1849 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1852 # Always depend on global verification
1853 depends_fn = lambda: [(-len(jobs), [])]
1855 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1856 ignore_errors=self.op.ignore_errors,
1857 depends=depends_fn())]
1858 for group in groups)
1860 # Fix up all parameters
1861 for op in itertools.chain(*jobs): # pylint: disable=W0142
1862 op.debug_simulate_errors = self.op.debug_simulate_errors
1863 op.verbose = self.op.verbose
1864 op.error_codes = self.op.error_codes
1866 op.skip_checks = self.op.skip_checks
1867 except AttributeError:
1868 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1870 return ResultWithJobs(jobs)
1873 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1874 """Verifies the cluster config.
1879 def _VerifyHVP(self, hvp_data):
1880 """Verifies locally the syntax of the hypervisor parameters.
1883 for item, hv_name, hv_params in hvp_data:
1884 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1887 hv_class = hypervisor.GetHypervisor(hv_name)
1888 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1889 hv_class.CheckParameterSyntax(hv_params)
1890 except errors.GenericError, err:
1891 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1893 def ExpandNames(self):
1894 # Information can be safely retrieved as the BGL is acquired in exclusive
1896 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1897 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1898 self.all_node_info = self.cfg.GetAllNodesInfo()
1899 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1900 self.needed_locks = {}
1902 def Exec(self, feedback_fn):
1903 """Verify integrity of cluster, performing various test on nodes.
1907 self._feedback_fn = feedback_fn
1909 feedback_fn("* Verifying cluster config")
1911 for msg in self.cfg.VerifyConfig():
1912 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1914 feedback_fn("* Verifying cluster certificate files")
1916 for cert_filename in constants.ALL_CERT_FILES:
1917 (errcode, msg) = _VerifyCertificate(cert_filename)
1918 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1920 feedback_fn("* Verifying hypervisor parameters")
1922 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1923 self.all_inst_info.values()))
1925 feedback_fn("* Verifying all nodes belong to an existing group")
1927 # We do this verification here because, should this bogus circumstance
1928 # occur, it would never be caught by VerifyGroup, which only acts on
1929 # nodes/instances reachable from existing node groups.
1931 dangling_nodes = set(node.name for node in self.all_node_info.values()
1932 if node.group not in self.all_group_info)
1934 dangling_instances = {}
1935 no_node_instances = []
1937 for inst in self.all_inst_info.values():
1938 if inst.primary_node in dangling_nodes:
1939 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1940 elif inst.primary_node not in self.all_node_info:
1941 no_node_instances.append(inst.name)
1946 utils.CommaJoin(dangling_instances.get(node.name,
1948 for node in dangling_nodes]
1950 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1952 "the following nodes (and their instances) belong to a non"
1953 " existing group: %s", utils.CommaJoin(pretty_dangling))
1955 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1957 "the following instances have a non-existing primary-node:"
1958 " %s", utils.CommaJoin(no_node_instances))
1963 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1964 """Verifies the status of a node group.
1967 HPATH = "cluster-verify"
1968 HTYPE = constants.HTYPE_CLUSTER
1971 _HOOKS_INDENT_RE = re.compile("^", re.M)
1973 class NodeImage(object):
1974 """A class representing the logical and physical status of a node.
1977 @ivar name: the node name to which this object refers
1978 @ivar volumes: a structure as returned from
1979 L{ganeti.backend.GetVolumeList} (runtime)
1980 @ivar instances: a list of running instances (runtime)
1981 @ivar pinst: list of configured primary instances (config)
1982 @ivar sinst: list of configured secondary instances (config)
1983 @ivar sbp: dictionary of {primary-node: list of instances} for all
1984 instances for which this node is secondary (config)
1985 @ivar mfree: free memory, as reported by hypervisor (runtime)
1986 @ivar dfree: free disk, as reported by the node (runtime)
1987 @ivar offline: the offline status (config)
1988 @type rpc_fail: boolean
1989 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1990 not whether the individual keys were correct) (runtime)
1991 @type lvm_fail: boolean
1992 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1993 @type hyp_fail: boolean
1994 @ivar hyp_fail: whether the RPC call didn't return the instance list
1995 @type ghost: boolean
1996 @ivar ghost: whether this is a known node or not (config)
1997 @type os_fail: boolean
1998 @ivar os_fail: whether the RPC call didn't return valid OS data
2000 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2001 @type vm_capable: boolean
2002 @ivar vm_capable: whether the node can host instances
2005 def __init__(self, offline=False, name=None, vm_capable=True):
2014 self.offline = offline
2015 self.vm_capable = vm_capable
2016 self.rpc_fail = False
2017 self.lvm_fail = False
2018 self.hyp_fail = False
2020 self.os_fail = False
2023 def ExpandNames(self):
2024 # This raises errors.OpPrereqError on its own:
2025 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2027 # Get instances in node group; this is unsafe and needs verification later
2028 inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
2030 self.needed_locks = {
2031 locking.LEVEL_INSTANCE: inst_names,
2032 locking.LEVEL_NODEGROUP: [self.group_uuid],
2033 locking.LEVEL_NODE: [],
2036 self.share_locks = _ShareAll()
2038 def DeclareLocks(self, level):
2039 if level == locking.LEVEL_NODE:
2040 # Get members of node group; this is unsafe and needs verification later
2041 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2043 all_inst_info = self.cfg.GetAllInstancesInfo()
2045 # In Exec(), we warn about mirrored instances that have primary and
2046 # secondary living in separate node groups. To fully verify that
2047 # volumes for these instances are healthy, we will need to do an
2048 # extra call to their secondaries. We ensure here those nodes will
2050 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2051 # Important: access only the instances whose lock is owned
2052 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2053 nodes.update(all_inst_info[inst].secondary_nodes)
2055 self.needed_locks[locking.LEVEL_NODE] = nodes
2057 def CheckPrereq(self):
2058 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2059 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2061 group_nodes = set(self.group_info.members)
2062 group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
2065 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2067 unlocked_instances = \
2068 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2071 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2072 utils.CommaJoin(unlocked_nodes))
2074 if unlocked_instances:
2075 raise errors.OpPrereqError("Missing lock for instances: %s" %
2076 utils.CommaJoin(unlocked_instances))
2078 self.all_node_info = self.cfg.GetAllNodesInfo()
2079 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2081 self.my_node_names = utils.NiceSort(group_nodes)
2082 self.my_inst_names = utils.NiceSort(group_instances)
2084 self.my_node_info = dict((name, self.all_node_info[name])
2085 for name in self.my_node_names)
2087 self.my_inst_info = dict((name, self.all_inst_info[name])
2088 for name in self.my_inst_names)
2090 # We detect here the nodes that will need the extra RPC calls for verifying
2091 # split LV volumes; they should be locked.
2092 extra_lv_nodes = set()
2094 for inst in self.my_inst_info.values():
2095 if inst.disk_template in constants.DTS_INT_MIRROR:
2096 group = self.my_node_info[inst.primary_node].group
2097 for nname in inst.secondary_nodes:
2098 if self.all_node_info[nname].group != group:
2099 extra_lv_nodes.add(nname)
2101 unlocked_lv_nodes = \
2102 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2104 if unlocked_lv_nodes:
2105 raise errors.OpPrereqError("these nodes could be locked: %s" %
2106 utils.CommaJoin(unlocked_lv_nodes))
2107 self.extra_lv_nodes = list(extra_lv_nodes)
2109 def _VerifyNode(self, ninfo, nresult):
2110 """Perform some basic validation on data returned from a node.
2112 - check the result data structure is well formed and has all the
2114 - check ganeti version
2116 @type ninfo: L{objects.Node}
2117 @param ninfo: the node to check
2118 @param nresult: the results from the node
2120 @return: whether overall this call was successful (and we can expect
2121 reasonable values in the respose)
2125 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2127 # main result, nresult should be a non-empty dict
2128 test = not nresult or not isinstance(nresult, dict)
2129 _ErrorIf(test, constants.CV_ENODERPC, node,
2130 "unable to verify node: no data returned")
2134 # compares ganeti version
2135 local_version = constants.PROTOCOL_VERSION
2136 remote_version = nresult.get("version", None)
2137 test = not (remote_version and
2138 isinstance(remote_version, (list, tuple)) and
2139 len(remote_version) == 2)
2140 _ErrorIf(test, constants.CV_ENODERPC, node,
2141 "connection to node returned invalid data")
2145 test = local_version != remote_version[0]
2146 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2147 "incompatible protocol versions: master %s,"
2148 " node %s", local_version, remote_version[0])
2152 # node seems compatible, we can actually try to look into its results
2154 # full package version
2155 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2156 constants.CV_ENODEVERSION, node,
2157 "software version mismatch: master %s, node %s",
2158 constants.RELEASE_VERSION, remote_version[1],
2159 code=self.ETYPE_WARNING)
2161 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2162 if ninfo.vm_capable and isinstance(hyp_result, dict):
2163 for hv_name, hv_result in hyp_result.iteritems():
2164 test = hv_result is not None
2165 _ErrorIf(test, constants.CV_ENODEHV, node,
2166 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2168 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2169 if ninfo.vm_capable and isinstance(hvp_result, list):
2170 for item, hv_name, hv_result in hvp_result:
2171 _ErrorIf(True, constants.CV_ENODEHV, node,
2172 "hypervisor %s parameter verify failure (source %s): %s",
2173 hv_name, item, hv_result)
2175 test = nresult.get(constants.NV_NODESETUP,
2176 ["Missing NODESETUP results"])
2177 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2182 def _VerifyNodeTime(self, ninfo, nresult,
2183 nvinfo_starttime, nvinfo_endtime):
2184 """Check the node time.
2186 @type ninfo: L{objects.Node}
2187 @param ninfo: the node to check
2188 @param nresult: the remote results for the node
2189 @param nvinfo_starttime: the start time of the RPC call
2190 @param nvinfo_endtime: the end time of the RPC call
2194 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2196 ntime = nresult.get(constants.NV_TIME, None)
2198 ntime_merged = utils.MergeTime(ntime)
2199 except (ValueError, TypeError):
2200 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2203 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2204 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2205 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2206 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2210 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2211 "Node time diverges by at least %s from master node time",
2214 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2215 """Check the node LVM results.
2217 @type ninfo: L{objects.Node}
2218 @param ninfo: the node to check
2219 @param nresult: the remote results for the node
2220 @param vg_name: the configured VG name
2227 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2229 # checks vg existence and size > 20G
2230 vglist = nresult.get(constants.NV_VGLIST, None)
2232 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2234 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2235 constants.MIN_VG_SIZE)
2236 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2239 pvlist = nresult.get(constants.NV_PVLIST, None)
2240 test = pvlist is None
2241 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2243 # check that ':' is not present in PV names, since it's a
2244 # special character for lvcreate (denotes the range of PEs to
2246 for _, pvname, owner_vg in pvlist:
2247 test = ":" in pvname
2248 _ErrorIf(test, constants.CV_ENODELVM, node,
2249 "Invalid character ':' in PV '%s' of VG '%s'",
2252 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2253 """Check the node bridges.
2255 @type ninfo: L{objects.Node}
2256 @param ninfo: the node to check
2257 @param nresult: the remote results for the node
2258 @param bridges: the expected list of bridges
2265 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2267 missing = nresult.get(constants.NV_BRIDGES, None)
2268 test = not isinstance(missing, list)
2269 _ErrorIf(test, constants.CV_ENODENET, node,
2270 "did not return valid bridge information")
2272 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2273 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2275 def _VerifyNodeUserScripts(self, ninfo, nresult):
2276 """Check the results of user scripts presence and executability on the node
2278 @type ninfo: L{objects.Node}
2279 @param ninfo: the node to check
2280 @param nresult: the remote results for the node
2285 test = not constants.NV_USERSCRIPTS in nresult
2286 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2287 "did not return user scripts information")
2289 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2291 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2292 "user scripts not present or not executable: %s" %
2293 utils.CommaJoin(sorted(broken_scripts)))
2295 def _VerifyNodeNetwork(self, ninfo, nresult):
2296 """Check the node network connectivity results.
2298 @type ninfo: L{objects.Node}
2299 @param ninfo: the node to check
2300 @param nresult: the remote results for the node
2304 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2306 test = constants.NV_NODELIST not in nresult
2307 _ErrorIf(test, constants.CV_ENODESSH, node,
2308 "node hasn't returned node ssh connectivity data")
2310 if nresult[constants.NV_NODELIST]:
2311 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2312 _ErrorIf(True, constants.CV_ENODESSH, node,
2313 "ssh communication with node '%s': %s", a_node, a_msg)
2315 test = constants.NV_NODENETTEST not in nresult
2316 _ErrorIf(test, constants.CV_ENODENET, node,
2317 "node hasn't returned node tcp connectivity data")
2319 if nresult[constants.NV_NODENETTEST]:
2320 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2322 _ErrorIf(True, constants.CV_ENODENET, node,
2323 "tcp communication with node '%s': %s",
2324 anode, nresult[constants.NV_NODENETTEST][anode])
2326 test = constants.NV_MASTERIP not in nresult
2327 _ErrorIf(test, constants.CV_ENODENET, node,
2328 "node hasn't returned node master IP reachability data")
2330 if not nresult[constants.NV_MASTERIP]:
2331 if node == self.master_node:
2332 msg = "the master node cannot reach the master IP (not configured?)"
2334 msg = "cannot reach the master IP"
2335 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2337 def _VerifyInstance(self, instance, instanceconfig, node_image,
2339 """Verify an instance.
2341 This function checks to see if the required block devices are
2342 available on the instance's node.
2345 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2346 node_current = instanceconfig.primary_node
2348 node_vol_should = {}
2349 instanceconfig.MapLVsByNode(node_vol_should)
2351 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2352 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2353 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, err)
2355 for node in node_vol_should:
2356 n_img = node_image[node]
2357 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2358 # ignore missing volumes on offline or broken nodes
2360 for volume in node_vol_should[node]:
2361 test = volume not in n_img.volumes
2362 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2363 "volume %s missing on node %s", volume, node)
2365 if instanceconfig.admin_state == constants.ADMINST_UP:
2366 pri_img = node_image[node_current]
2367 test = instance not in pri_img.instances and not pri_img.offline
2368 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2369 "instance not running on its primary node %s",
2372 diskdata = [(nname, success, status, idx)
2373 for (nname, disks) in diskstatus.items()
2374 for idx, (success, status) in enumerate(disks)]
2376 for nname, success, bdev_status, idx in diskdata:
2377 # the 'ghost node' construction in Exec() ensures that we have a
2379 snode = node_image[nname]
2380 bad_snode = snode.ghost or snode.offline
2381 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2382 not success and not bad_snode,
2383 constants.CV_EINSTANCEFAULTYDISK, instance,
2384 "couldn't retrieve status for disk/%s on %s: %s",
2385 idx, nname, bdev_status)
2386 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2387 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2388 constants.CV_EINSTANCEFAULTYDISK, instance,
2389 "disk/%s on %s is faulty", idx, nname)
2391 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2392 """Verify if there are any unknown volumes in the cluster.
2394 The .os, .swap and backup volumes are ignored. All other volumes are
2395 reported as unknown.
2397 @type reserved: L{ganeti.utils.FieldSet}
2398 @param reserved: a FieldSet of reserved volume names
2401 for node, n_img in node_image.items():
2402 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2403 # skip non-healthy nodes
2405 for volume in n_img.volumes:
2406 test = ((node not in node_vol_should or
2407 volume not in node_vol_should[node]) and
2408 not reserved.Matches(volume))
2409 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2410 "volume %s is unknown", volume)
2412 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2413 """Verify N+1 Memory Resilience.
2415 Check that if one single node dies we can still start all the
2416 instances it was primary for.
2419 cluster_info = self.cfg.GetClusterInfo()
2420 for node, n_img in node_image.items():
2421 # This code checks that every node which is now listed as
2422 # secondary has enough memory to host all instances it is
2423 # supposed to should a single other node in the cluster fail.
2424 # FIXME: not ready for failover to an arbitrary node
2425 # FIXME: does not support file-backed instances
2426 # WARNING: we currently take into account down instances as well
2427 # as up ones, considering that even if they're down someone
2428 # might want to start them even in the event of a node failure.
2430 # we're skipping offline nodes from the N+1 warning, since
2431 # most likely we don't have good memory infromation from them;
2432 # we already list instances living on such nodes, and that's
2435 #TODO(dynmem): also consider ballooning out other instances
2436 for prinode, instances in n_img.sbp.items():
2438 for instance in instances:
2439 bep = cluster_info.FillBE(instance_cfg[instance])
2440 if bep[constants.BE_AUTO_BALANCE]:
2441 needed_mem += bep[constants.BE_MINMEM]
2442 test = n_img.mfree < needed_mem
2443 self._ErrorIf(test, constants.CV_ENODEN1, node,
2444 "not enough memory to accomodate instance failovers"
2445 " should node %s fail (%dMiB needed, %dMiB available)",
2446 prinode, needed_mem, n_img.mfree)
2449 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2450 (files_all, files_opt, files_mc, files_vm)):
2451 """Verifies file checksums collected from all nodes.
2453 @param errorif: Callback for reporting errors
2454 @param nodeinfo: List of L{objects.Node} objects
2455 @param master_node: Name of master node
2456 @param all_nvinfo: RPC results
2459 # Define functions determining which nodes to consider for a file
2462 (files_mc, lambda node: (node.master_candidate or
2463 node.name == master_node)),
2464 (files_vm, lambda node: node.vm_capable),
2467 # Build mapping from filename to list of nodes which should have the file
2469 for (files, fn) in files2nodefn:
2471 filenodes = nodeinfo
2473 filenodes = filter(fn, nodeinfo)
2474 nodefiles.update((filename,
2475 frozenset(map(operator.attrgetter("name"), filenodes)))
2476 for filename in files)
2478 assert set(nodefiles) == (files_all | files_mc | files_vm)
2480 fileinfo = dict((filename, {}) for filename in nodefiles)
2481 ignore_nodes = set()
2483 for node in nodeinfo:
2485 ignore_nodes.add(node.name)
2488 nresult = all_nvinfo[node.name]
2490 if nresult.fail_msg or not nresult.payload:
2493 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2495 test = not (node_files and isinstance(node_files, dict))
2496 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2497 "Node did not return file checksum data")
2499 ignore_nodes.add(node.name)
2502 # Build per-checksum mapping from filename to nodes having it
2503 for (filename, checksum) in node_files.items():
2504 assert filename in nodefiles
2505 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2507 for (filename, checksums) in fileinfo.items():
2508 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2510 # Nodes having the file
2511 with_file = frozenset(node_name
2512 for nodes in fileinfo[filename].values()
2513 for node_name in nodes) - ignore_nodes
2515 expected_nodes = nodefiles[filename] - ignore_nodes
2517 # Nodes missing file
2518 missing_file = expected_nodes - with_file
2520 if filename in files_opt:
2522 errorif(missing_file and missing_file != expected_nodes,
2523 constants.CV_ECLUSTERFILECHECK, None,
2524 "File %s is optional, but it must exist on all or no"
2525 " nodes (not found on %s)",
2526 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2528 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2529 "File %s is missing from node(s) %s", filename,
2530 utils.CommaJoin(utils.NiceSort(missing_file)))
2532 # Warn if a node has a file it shouldn't
2533 unexpected = with_file - expected_nodes
2535 constants.CV_ECLUSTERFILECHECK, None,
2536 "File %s should not exist on node(s) %s",
2537 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2539 # See if there are multiple versions of the file
2540 test = len(checksums) > 1
2542 variants = ["variant %s on %s" %
2543 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2544 for (idx, (checksum, nodes)) in
2545 enumerate(sorted(checksums.items()))]
2549 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2550 "File %s found with %s different checksums (%s)",
2551 filename, len(checksums), "; ".join(variants))
2553 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2555 """Verifies and the node DRBD status.
2557 @type ninfo: L{objects.Node}
2558 @param ninfo: the node to check
2559 @param nresult: the remote results for the node
2560 @param instanceinfo: the dict of instances
2561 @param drbd_helper: the configured DRBD usermode helper
2562 @param drbd_map: the DRBD map as returned by
2563 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2567 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2570 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2571 test = (helper_result == None)
2572 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2573 "no drbd usermode helper returned")
2575 status, payload = helper_result
2577 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2578 "drbd usermode helper check unsuccessful: %s", payload)
2579 test = status and (payload != drbd_helper)
2580 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2581 "wrong drbd usermode helper: %s", payload)
2583 # compute the DRBD minors
2585 for minor, instance in drbd_map[node].items():
2586 test = instance not in instanceinfo
2587 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2588 "ghost instance '%s' in temporary DRBD map", instance)
2589 # ghost instance should not be running, but otherwise we
2590 # don't give double warnings (both ghost instance and
2591 # unallocated minor in use)
2593 node_drbd[minor] = (instance, False)
2595 instance = instanceinfo[instance]
2596 node_drbd[minor] = (instance.name,
2597 instance.admin_state == constants.ADMINST_UP)
2599 # and now check them
2600 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2601 test = not isinstance(used_minors, (tuple, list))
2602 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2603 "cannot parse drbd status file: %s", str(used_minors))
2605 # we cannot check drbd status
2608 for minor, (iname, must_exist) in node_drbd.items():
2609 test = minor not in used_minors and must_exist
2610 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2611 "drbd minor %d of instance %s is not active", minor, iname)
2612 for minor in used_minors:
2613 test = minor not in node_drbd
2614 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2615 "unallocated drbd minor %d is in use", minor)
2617 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2618 """Builds the node OS structures.
2620 @type ninfo: L{objects.Node}
2621 @param ninfo: the node to check
2622 @param nresult: the remote results for the node
2623 @param nimg: the node image object
2627 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2629 remote_os = nresult.get(constants.NV_OSLIST, None)
2630 test = (not isinstance(remote_os, list) or
2631 not compat.all(isinstance(v, list) and len(v) == 7
2632 for v in remote_os))
2634 _ErrorIf(test, constants.CV_ENODEOS, node,
2635 "node hasn't returned valid OS data")
2644 for (name, os_path, status, diagnose,
2645 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2647 if name not in os_dict:
2650 # parameters is a list of lists instead of list of tuples due to
2651 # JSON lacking a real tuple type, fix it:
2652 parameters = [tuple(v) for v in parameters]
2653 os_dict[name].append((os_path, status, diagnose,
2654 set(variants), set(parameters), set(api_ver)))
2656 nimg.oslist = os_dict
2658 def _VerifyNodeOS(self, ninfo, nimg, base):
2659 """Verifies the node OS list.
2661 @type ninfo: L{objects.Node}
2662 @param ninfo: the node to check
2663 @param nimg: the node image object
2664 @param base: the 'template' node we match against (e.g. from the master)
2668 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2670 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2672 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2673 for os_name, os_data in nimg.oslist.items():
2674 assert os_data, "Empty OS status for OS %s?!" % os_name
2675 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2676 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2677 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2678 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2679 "OS '%s' has multiple entries (first one shadows the rest): %s",
2680 os_name, utils.CommaJoin([v[0] for v in os_data]))
2681 # comparisons with the 'base' image
2682 test = os_name not in base.oslist
2683 _ErrorIf(test, constants.CV_ENODEOS, node,
2684 "Extra OS %s not present on reference node (%s)",
2688 assert base.oslist[os_name], "Base node has empty OS status?"
2689 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2691 # base OS is invalid, skipping
2693 for kind, a, b in [("API version", f_api, b_api),
2694 ("variants list", f_var, b_var),
2695 ("parameters", beautify_params(f_param),
2696 beautify_params(b_param))]:
2697 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2698 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2699 kind, os_name, base.name,
2700 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2702 # check any missing OSes
2703 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2704 _ErrorIf(missing, constants.CV_ENODEOS, node,
2705 "OSes present on reference node %s but missing on this node: %s",
2706 base.name, utils.CommaJoin(missing))
2708 def _VerifyOob(self, ninfo, nresult):
2709 """Verifies out of band functionality of a node.
2711 @type ninfo: L{objects.Node}
2712 @param ninfo: the node to check
2713 @param nresult: the remote results for the node
2717 # We just have to verify the paths on master and/or master candidates
2718 # as the oob helper is invoked on the master
2719 if ((ninfo.master_candidate or ninfo.master_capable) and
2720 constants.NV_OOB_PATHS in nresult):
2721 for path_result in nresult[constants.NV_OOB_PATHS]:
2722 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2724 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2725 """Verifies and updates the node volume data.
2727 This function will update a L{NodeImage}'s internal structures
2728 with data from the remote call.
2730 @type ninfo: L{objects.Node}
2731 @param ninfo: the node to check
2732 @param nresult: the remote results for the node
2733 @param nimg: the node image object
2734 @param vg_name: the configured VG name
2738 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2740 nimg.lvm_fail = True
2741 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2744 elif isinstance(lvdata, basestring):
2745 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2746 utils.SafeEncode(lvdata))
2747 elif not isinstance(lvdata, dict):
2748 _ErrorIf(True, constants.CV_ENODELVM, node,
2749 "rpc call to node failed (lvlist)")
2751 nimg.volumes = lvdata
2752 nimg.lvm_fail = False
2754 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2755 """Verifies and updates the node instance list.
2757 If the listing was successful, then updates this node's instance
2758 list. Otherwise, it marks the RPC call as failed for the instance
2761 @type ninfo: L{objects.Node}
2762 @param ninfo: the node to check
2763 @param nresult: the remote results for the node
2764 @param nimg: the node image object
2767 idata = nresult.get(constants.NV_INSTANCELIST, None)
2768 test = not isinstance(idata, list)
2769 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2770 "rpc call to node failed (instancelist): %s",
2771 utils.SafeEncode(str(idata)))
2773 nimg.hyp_fail = True
2775 nimg.instances = idata
2777 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2778 """Verifies and computes a node information map
2780 @type ninfo: L{objects.Node}
2781 @param ninfo: the node to check
2782 @param nresult: the remote results for the node
2783 @param nimg: the node image object
2784 @param vg_name: the configured VG name
2788 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2790 # try to read free memory (from the hypervisor)
2791 hv_info = nresult.get(constants.NV_HVINFO, None)
2792 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2793 _ErrorIf(test, constants.CV_ENODEHV, node,
2794 "rpc call to node failed (hvinfo)")
2797 nimg.mfree = int(hv_info["memory_free"])
2798 except (ValueError, TypeError):
2799 _ErrorIf(True, constants.CV_ENODERPC, node,
2800 "node returned invalid nodeinfo, check hypervisor")
2802 # FIXME: devise a free space model for file based instances as well
2803 if vg_name is not None:
2804 test = (constants.NV_VGLIST not in nresult or
2805 vg_name not in nresult[constants.NV_VGLIST])
2806 _ErrorIf(test, constants.CV_ENODELVM, node,
2807 "node didn't return data for the volume group '%s'"
2808 " - it is either missing or broken", vg_name)
2811 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2812 except (ValueError, TypeError):
2813 _ErrorIf(True, constants.CV_ENODERPC, node,
2814 "node returned invalid LVM info, check LVM status")
2816 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2817 """Gets per-disk status information for all instances.
2819 @type nodelist: list of strings
2820 @param nodelist: Node names
2821 @type node_image: dict of (name, L{objects.Node})
2822 @param node_image: Node objects
2823 @type instanceinfo: dict of (name, L{objects.Instance})
2824 @param instanceinfo: Instance objects
2825 @rtype: {instance: {node: [(succes, payload)]}}
2826 @return: a dictionary of per-instance dictionaries with nodes as
2827 keys and disk information as values; the disk information is a
2828 list of tuples (success, payload)
2831 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2834 node_disks_devonly = {}
2835 diskless_instances = set()
2836 diskless = constants.DT_DISKLESS
2838 for nname in nodelist:
2839 node_instances = list(itertools.chain(node_image[nname].pinst,
2840 node_image[nname].sinst))
2841 diskless_instances.update(inst for inst in node_instances
2842 if instanceinfo[inst].disk_template == diskless)
2843 disks = [(inst, disk)
2844 for inst in node_instances
2845 for disk in instanceinfo[inst].disks]
2848 # No need to collect data
2851 node_disks[nname] = disks
2853 # Creating copies as SetDiskID below will modify the objects and that can
2854 # lead to incorrect data returned from nodes
2855 devonly = [dev.Copy() for (_, dev) in disks]
2858 self.cfg.SetDiskID(dev, nname)
2860 node_disks_devonly[nname] = devonly
2862 assert len(node_disks) == len(node_disks_devonly)
2864 # Collect data from all nodes with disks
2865 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2868 assert len(result) == len(node_disks)
2872 for (nname, nres) in result.items():
2873 disks = node_disks[nname]
2876 # No data from this node
2877 data = len(disks) * [(False, "node offline")]
2880 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2881 "while getting disk information: %s", msg)
2883 # No data from this node
2884 data = len(disks) * [(False, msg)]
2887 for idx, i in enumerate(nres.payload):
2888 if isinstance(i, (tuple, list)) and len(i) == 2:
2891 logging.warning("Invalid result from node %s, entry %d: %s",
2893 data.append((False, "Invalid result from the remote node"))
2895 for ((inst, _), status) in zip(disks, data):
2896 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2898 # Add empty entries for diskless instances.
2899 for inst in diskless_instances:
2900 assert inst not in instdisk
2903 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2904 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2905 compat.all(isinstance(s, (tuple, list)) and
2906 len(s) == 2 for s in statuses)
2907 for inst, nnames in instdisk.items()
2908 for nname, statuses in nnames.items())
2909 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2914 def _SshNodeSelector(group_uuid, all_nodes):
2915 """Create endless iterators for all potential SSH check hosts.
2918 nodes = [node for node in all_nodes
2919 if (node.group != group_uuid and
2921 keyfunc = operator.attrgetter("group")
2923 return map(itertools.cycle,
2924 [sorted(map(operator.attrgetter("name"), names))
2925 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2929 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2930 """Choose which nodes should talk to which other nodes.
2932 We will make nodes contact all nodes in their group, and one node from
2935 @warning: This algorithm has a known issue if one node group is much
2936 smaller than others (e.g. just one node). In such a case all other
2937 nodes will talk to the single node.
2940 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2941 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2943 return (online_nodes,
2944 dict((name, sorted([i.next() for i in sel]))
2945 for name in online_nodes))
2947 def BuildHooksEnv(self):
2950 Cluster-Verify hooks just ran in the post phase and their failure makes
2951 the output be logged in the verify output and the verification to fail.
2955 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2958 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2959 for node in self.my_node_info.values())
2963 def BuildHooksNodes(self):
2964 """Build hooks nodes.
2967 return ([], self.my_node_names)
2969 def Exec(self, feedback_fn):
2970 """Verify integrity of the node group, performing various test on nodes.
2973 # This method has too many local variables. pylint: disable=R0914
2974 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2976 if not self.my_node_names:
2978 feedback_fn("* Empty node group, skipping verification")
2982 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2983 verbose = self.op.verbose
2984 self._feedback_fn = feedback_fn
2986 vg_name = self.cfg.GetVGName()
2987 drbd_helper = self.cfg.GetDRBDHelper()
2988 cluster = self.cfg.GetClusterInfo()
2989 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2990 hypervisors = cluster.enabled_hypervisors
2991 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2993 i_non_redundant = [] # Non redundant instances
2994 i_non_a_balanced = [] # Non auto-balanced instances
2995 i_offline = 0 # Count of offline instances
2996 n_offline = 0 # Count of offline nodes
2997 n_drained = 0 # Count of nodes being drained
2998 node_vol_should = {}
3000 # FIXME: verify OS list
3003 filemap = _ComputeAncillaryFiles(cluster, False)
3005 # do local checksums
3006 master_node = self.master_node = self.cfg.GetMasterNode()
3007 master_ip = self.cfg.GetMasterIP()
3009 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3012 if self.cfg.GetUseExternalMipScript():
3013 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3015 node_verify_param = {
3016 constants.NV_FILELIST:
3017 utils.UniqueSequence(filename
3018 for files in filemap
3019 for filename in files),
3020 constants.NV_NODELIST:
3021 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3022 self.all_node_info.values()),
3023 constants.NV_HYPERVISOR: hypervisors,
3024 constants.NV_HVPARAMS:
3025 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3026 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3027 for node in node_data_list
3028 if not node.offline],
3029 constants.NV_INSTANCELIST: hypervisors,
3030 constants.NV_VERSION: None,
3031 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3032 constants.NV_NODESETUP: None,
3033 constants.NV_TIME: None,
3034 constants.NV_MASTERIP: (master_node, master_ip),
3035 constants.NV_OSLIST: None,
3036 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3037 constants.NV_USERSCRIPTS: user_scripts,
3040 if vg_name is not None:
3041 node_verify_param[constants.NV_VGLIST] = None
3042 node_verify_param[constants.NV_LVLIST] = vg_name
3043 node_verify_param[constants.NV_PVLIST] = [vg_name]
3044 node_verify_param[constants.NV_DRBDLIST] = None
3047 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3050 # FIXME: this needs to be changed per node-group, not cluster-wide
3052 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3053 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3054 bridges.add(default_nicpp[constants.NIC_LINK])
3055 for instance in self.my_inst_info.values():
3056 for nic in instance.nics:
3057 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3058 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3059 bridges.add(full_nic[constants.NIC_LINK])
3062 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3064 # Build our expected cluster state
3065 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3067 vm_capable=node.vm_capable))
3068 for node in node_data_list)
3072 for node in self.all_node_info.values():
3073 path = _SupportsOob(self.cfg, node)
3074 if path and path not in oob_paths:
3075 oob_paths.append(path)
3078 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3080 for instance in self.my_inst_names:
3081 inst_config = self.my_inst_info[instance]
3083 for nname in inst_config.all_nodes:
3084 if nname not in node_image:
3085 gnode = self.NodeImage(name=nname)
3086 gnode.ghost = (nname not in self.all_node_info)
3087 node_image[nname] = gnode
3089 inst_config.MapLVsByNode(node_vol_should)
3091 pnode = inst_config.primary_node
3092 node_image[pnode].pinst.append(instance)
3094 for snode in inst_config.secondary_nodes:
3095 nimg = node_image[snode]
3096 nimg.sinst.append(instance)
3097 if pnode not in nimg.sbp:
3098 nimg.sbp[pnode] = []
3099 nimg.sbp[pnode].append(instance)
3101 # At this point, we have the in-memory data structures complete,
3102 # except for the runtime information, which we'll gather next
3104 # Due to the way our RPC system works, exact response times cannot be
3105 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3106 # time before and after executing the request, we can at least have a time
3108 nvinfo_starttime = time.time()
3109 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3111 self.cfg.GetClusterName())
3112 nvinfo_endtime = time.time()
3114 if self.extra_lv_nodes and vg_name is not None:
3116 self.rpc.call_node_verify(self.extra_lv_nodes,
3117 {constants.NV_LVLIST: vg_name},
3118 self.cfg.GetClusterName())
3120 extra_lv_nvinfo = {}
3122 all_drbd_map = self.cfg.ComputeDRBDMap()
3124 feedback_fn("* Gathering disk information (%s nodes)" %
3125 len(self.my_node_names))
3126 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3129 feedback_fn("* Verifying configuration file consistency")
3131 # If not all nodes are being checked, we need to make sure the master node
3132 # and a non-checked vm_capable node are in the list.
3133 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3135 vf_nvinfo = all_nvinfo.copy()
3136 vf_node_info = list(self.my_node_info.values())
3137 additional_nodes = []
3138 if master_node not in self.my_node_info:
3139 additional_nodes.append(master_node)
3140 vf_node_info.append(self.all_node_info[master_node])
3141 # Add the first vm_capable node we find which is not included
3142 for node in absent_nodes:
3143 nodeinfo = self.all_node_info[node]
3144 if nodeinfo.vm_capable and not nodeinfo.offline:
3145 additional_nodes.append(node)
3146 vf_node_info.append(self.all_node_info[node])
3148 key = constants.NV_FILELIST
3149 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3150 {key: node_verify_param[key]},
3151 self.cfg.GetClusterName()))
3153 vf_nvinfo = all_nvinfo
3154 vf_node_info = self.my_node_info.values()
3156 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3158 feedback_fn("* Verifying node status")
3162 for node_i in node_data_list:
3164 nimg = node_image[node]
3168 feedback_fn("* Skipping offline node %s" % (node,))
3172 if node == master_node:
3174 elif node_i.master_candidate:
3175 ntype = "master candidate"
3176 elif node_i.drained:
3182 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3184 msg = all_nvinfo[node].fail_msg
3185 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3188 nimg.rpc_fail = True
3191 nresult = all_nvinfo[node].payload
3193 nimg.call_ok = self._VerifyNode(node_i, nresult)
3194 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3195 self._VerifyNodeNetwork(node_i, nresult)
3196 self._VerifyNodeUserScripts(node_i, nresult)
3197 self._VerifyOob(node_i, nresult)
3200 self._VerifyNodeLVM(node_i, nresult, vg_name)
3201 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3204 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3205 self._UpdateNodeInstances(node_i, nresult, nimg)
3206 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3207 self._UpdateNodeOS(node_i, nresult, nimg)
3209 if not nimg.os_fail:
3210 if refos_img is None:
3212 self._VerifyNodeOS(node_i, nimg, refos_img)
3213 self._VerifyNodeBridges(node_i, nresult, bridges)
3215 # Check whether all running instancies are primary for the node. (This
3216 # can no longer be done from _VerifyInstance below, since some of the
3217 # wrong instances could be from other node groups.)
3218 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3220 for inst in non_primary_inst:
3221 # FIXME: investigate best way to handle offline insts
3222 if inst.admin_state == constants.ADMINST_OFFLINE:
3224 feedback_fn("* Skipping offline instance %s" % inst.name)
3227 test = inst in self.all_inst_info
3228 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3229 "instance should not run on node %s", node_i.name)
3230 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3231 "node is running unknown instance %s", inst)
3233 for node, result in extra_lv_nvinfo.items():
3234 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3235 node_image[node], vg_name)
3237 feedback_fn("* Verifying instance status")
3238 for instance in self.my_inst_names:
3240 feedback_fn("* Verifying instance %s" % instance)
3241 inst_config = self.my_inst_info[instance]
3242 self._VerifyInstance(instance, inst_config, node_image,
3244 inst_nodes_offline = []
3246 pnode = inst_config.primary_node
3247 pnode_img = node_image[pnode]
3248 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3249 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3250 " primary node failed", instance)
3252 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3254 constants.CV_EINSTANCEBADNODE, instance,
3255 "instance is marked as running and lives on offline node %s",
3256 inst_config.primary_node)
3258 # If the instance is non-redundant we cannot survive losing its primary
3259 # node, so we are not N+1 compliant. On the other hand we have no disk
3260 # templates with more than one secondary so that situation is not well
3262 # FIXME: does not support file-backed instances
3263 if not inst_config.secondary_nodes:
3264 i_non_redundant.append(instance)
3266 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3267 constants.CV_EINSTANCELAYOUT,
3268 instance, "instance has multiple secondary nodes: %s",
3269 utils.CommaJoin(inst_config.secondary_nodes),
3270 code=self.ETYPE_WARNING)
3272 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3273 pnode = inst_config.primary_node
3274 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3275 instance_groups = {}
3277 for node in instance_nodes:
3278 instance_groups.setdefault(self.all_node_info[node].group,
3282 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3283 # Sort so that we always list the primary node first.
3284 for group, nodes in sorted(instance_groups.items(),
3285 key=lambda (_, nodes): pnode in nodes,
3288 self._ErrorIf(len(instance_groups) > 1,
3289 constants.CV_EINSTANCESPLITGROUPS,
3290 instance, "instance has primary and secondary nodes in"
3291 " different groups: %s", utils.CommaJoin(pretty_list),
3292 code=self.ETYPE_WARNING)
3294 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3295 i_non_a_balanced.append(instance)
3297 for snode in inst_config.secondary_nodes:
3298 s_img = node_image[snode]
3299 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3300 snode, "instance %s, connection to secondary node failed",
3304 inst_nodes_offline.append(snode)
3306 # warn that the instance lives on offline nodes
3307 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3308 "instance has offline secondary node(s) %s",
3309 utils.CommaJoin(inst_nodes_offline))
3310 # ... or ghost/non-vm_capable nodes
3311 for node in inst_config.all_nodes:
3312 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3313 instance, "instance lives on ghost node %s", node)
3314 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3315 instance, "instance lives on non-vm_capable node %s", node)
3317 feedback_fn("* Verifying orphan volumes")
3318 reserved = utils.FieldSet(*cluster.reserved_lvs)
3320 # We will get spurious "unknown volume" warnings if any node of this group
3321 # is secondary for an instance whose primary is in another group. To avoid
3322 # them, we find these instances and add their volumes to node_vol_should.
3323 for inst in self.all_inst_info.values():
3324 for secondary in inst.secondary_nodes:
3325 if (secondary in self.my_node_info
3326 and inst.name not in self.my_inst_info):
3327 inst.MapLVsByNode(node_vol_should)
3330 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3332 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3333 feedback_fn("* Verifying N+1 Memory redundancy")
3334 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3336 feedback_fn("* Other Notes")
3338 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3339 % len(i_non_redundant))
3341 if i_non_a_balanced:
3342 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3343 % len(i_non_a_balanced))
3346 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3349 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3352 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3356 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3357 """Analyze the post-hooks' result
3359 This method analyses the hook result, handles it, and sends some
3360 nicely-formatted feedback back to the user.
3362 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3363 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3364 @param hooks_results: the results of the multi-node hooks rpc call
3365 @param feedback_fn: function used send feedback back to the caller
3366 @param lu_result: previous Exec result
3367 @return: the new Exec result, based on the previous result
3371 # We only really run POST phase hooks, only for non-empty groups,
3372 # and are only interested in their results
3373 if not self.my_node_names:
3376 elif phase == constants.HOOKS_PHASE_POST:
3377 # Used to change hooks' output to proper indentation
3378 feedback_fn("* Hooks Results")
3379 assert hooks_results, "invalid result from hooks"
3381 for node_name in hooks_results:
3382 res = hooks_results[node_name]
3384 test = msg and not res.offline
3385 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3386 "Communication failure in hooks execution: %s", msg)
3387 if res.offline or msg:
3388 # No need to investigate payload if node is offline or gave
3391 for script, hkr, output in res.payload:
3392 test = hkr == constants.HKR_FAIL
3393 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3394 "Script %s failed, output:", script)
3396 output = self._HOOKS_INDENT_RE.sub(" ", output)
3397 feedback_fn("%s" % output)
3403 class LUClusterVerifyDisks(NoHooksLU):
3404 """Verifies the cluster disks status.
3409 def ExpandNames(self):
3410 self.share_locks = _ShareAll()
3411 self.needed_locks = {
3412 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3415 def Exec(self, feedback_fn):
3416 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3418 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3419 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3420 for group in group_names])
3423 class LUGroupVerifyDisks(NoHooksLU):
3424 """Verifies the status of all disks in a node group.
3429 def ExpandNames(self):
3430 # Raises errors.OpPrereqError on its own if group can't be found
3431 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3433 self.share_locks = _ShareAll()
3434 self.needed_locks = {
3435 locking.LEVEL_INSTANCE: [],
3436 locking.LEVEL_NODEGROUP: [],
3437 locking.LEVEL_NODE: [],
3440 def DeclareLocks(self, level):
3441 if level == locking.LEVEL_INSTANCE:
3442 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3444 # Lock instances optimistically, needs verification once node and group
3445 # locks have been acquired
3446 self.needed_locks[locking.LEVEL_INSTANCE] = \
3447 self.cfg.GetNodeGroupInstances(self.group_uuid)
3449 elif level == locking.LEVEL_NODEGROUP:
3450 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3452 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3453 set([self.group_uuid] +
3454 # Lock all groups used by instances optimistically; this requires
3455 # going via the node before it's locked, requiring verification
3458 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3459 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3461 elif level == locking.LEVEL_NODE:
3462 # This will only lock the nodes in the group to be verified which contain
3464 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3465 self._LockInstancesNodes()
3467 # Lock all nodes in group to be verified
3468 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3469 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3470 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3472 def CheckPrereq(self):
3473 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3474 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3475 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3477 assert self.group_uuid in owned_groups
3479 # Check if locked instances are still correct
3480 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3482 # Get instance information
3483 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3485 # Check if node groups for locked instances are still correct
3486 for (instance_name, inst) in self.instances.items():
3487 assert owned_nodes.issuperset(inst.all_nodes), \
3488 "Instance %s's nodes changed while we kept the lock" % instance_name
3490 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3493 assert self.group_uuid in inst_groups, \
3494 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3496 def Exec(self, feedback_fn):
3497 """Verify integrity of cluster disks.
3499 @rtype: tuple of three items
3500 @return: a tuple of (dict of node-to-node_error, list of instances
3501 which need activate-disks, dict of instance: (node, volume) for
3506 res_instances = set()
3509 nv_dict = _MapInstanceDisksToNodes([inst
3510 for inst in self.instances.values()
3511 if inst.admin_state == constants.ADMINST_UP])
3514 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3515 set(self.cfg.GetVmCapableNodeList()))
3517 node_lvs = self.rpc.call_lv_list(nodes, [])
3519 for (node, node_res) in node_lvs.items():
3520 if node_res.offline:
3523 msg = node_res.fail_msg
3525 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3526 res_nodes[node] = msg
3529 for lv_name, (_, _, lv_online) in node_res.payload.items():
3530 inst = nv_dict.pop((node, lv_name), None)
3531 if not (lv_online or inst is None):
3532 res_instances.add(inst)
3534 # any leftover items in nv_dict are missing LVs, let's arrange the data
3536 for key, inst in nv_dict.iteritems():
3537 res_missing.setdefault(inst, []).append(list(key))
3539 return (res_nodes, list(res_instances), res_missing)
3542 class LUClusterRepairDiskSizes(NoHooksLU):
3543 """Verifies the cluster disks sizes.
3548 def ExpandNames(self):
3549 if self.op.instances:
3550 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3551 self.needed_locks = {
3552 locking.LEVEL_NODE_RES: [],
3553 locking.LEVEL_INSTANCE: self.wanted_names,
3555 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3557 self.wanted_names = None
3558 self.needed_locks = {
3559 locking.LEVEL_NODE_RES: locking.ALL_SET,
3560 locking.LEVEL_INSTANCE: locking.ALL_SET,
3562 self.share_locks = {
3563 locking.LEVEL_NODE_RES: 1,
3564 locking.LEVEL_INSTANCE: 0,
3567 def DeclareLocks(self, level):
3568 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3569 self._LockInstancesNodes(primary_only=True, level=level)
3571 def CheckPrereq(self):
3572 """Check prerequisites.
3574 This only checks the optional instance list against the existing names.
3577 if self.wanted_names is None:
3578 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3580 self.wanted_instances = \
3581 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3583 def _EnsureChildSizes(self, disk):
3584 """Ensure children of the disk have the needed disk size.
3586 This is valid mainly for DRBD8 and fixes an issue where the
3587 children have smaller disk size.
3589 @param disk: an L{ganeti.objects.Disk} object
3592 if disk.dev_type == constants.LD_DRBD8:
3593 assert disk.children, "Empty children for DRBD8?"
3594 fchild = disk.children[0]
3595 mismatch = fchild.size < disk.size
3597 self.LogInfo("Child disk has size %d, parent %d, fixing",
3598 fchild.size, disk.size)
3599 fchild.size = disk.size
3601 # and we recurse on this child only, not on the metadev
3602 return self._EnsureChildSizes(fchild) or mismatch
3606 def Exec(self, feedback_fn):
3607 """Verify the size of cluster disks.
3610 # TODO: check child disks too
3611 # TODO: check differences in size between primary/secondary nodes
3613 for instance in self.wanted_instances:
3614 pnode = instance.primary_node
3615 if pnode not in per_node_disks:
3616 per_node_disks[pnode] = []
3617 for idx, disk in enumerate(instance.disks):
3618 per_node_disks[pnode].append((instance, idx, disk))
3620 assert not (frozenset(per_node_disks.keys()) -
3621 self.owned_locks(locking.LEVEL_NODE_RES)), \
3622 "Not owning correct locks"
3623 assert not self.owned_locks(locking.LEVEL_NODE)
3626 for node, dskl in per_node_disks.items():
3627 newl = [v[2].Copy() for v in dskl]
3629 self.cfg.SetDiskID(dsk, node)
3630 result = self.rpc.call_blockdev_getsize(node, newl)
3632 self.LogWarning("Failure in blockdev_getsize call to node"
3633 " %s, ignoring", node)
3635 if len(result.payload) != len(dskl):
3636 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3637 " result.payload=%s", node, len(dskl), result.payload)
3638 self.LogWarning("Invalid result from node %s, ignoring node results",
3641 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3643 self.LogWarning("Disk %d of instance %s did not return size"
3644 " information, ignoring", idx, instance.name)
3646 if not isinstance(size, (int, long)):
3647 self.LogWarning("Disk %d of instance %s did not return valid"
3648 " size information, ignoring", idx, instance.name)
3651 if size != disk.size:
3652 self.LogInfo("Disk %d of instance %s has mismatched size,"
3653 " correcting: recorded %d, actual %d", idx,
3654 instance.name, disk.size, size)
3656 self.cfg.Update(instance, feedback_fn)
3657 changed.append((instance.name, idx, size))
3658 if self._EnsureChildSizes(disk):
3659 self.cfg.Update(instance, feedback_fn)
3660 changed.append((instance.name, idx, disk.size))
3664 class LUClusterRename(LogicalUnit):
3665 """Rename the cluster.
3668 HPATH = "cluster-rename"
3669 HTYPE = constants.HTYPE_CLUSTER
3671 def BuildHooksEnv(self):
3676 "OP_TARGET": self.cfg.GetClusterName(),
3677 "NEW_NAME": self.op.name,
3680 def BuildHooksNodes(self):
3681 """Build hooks nodes.
3684 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3686 def CheckPrereq(self):
3687 """Verify that the passed name is a valid one.
3690 hostname = netutils.GetHostname(name=self.op.name,
3691 family=self.cfg.GetPrimaryIPFamily())
3693 new_name = hostname.name
3694 self.ip = new_ip = hostname.ip
3695 old_name = self.cfg.GetClusterName()
3696 old_ip = self.cfg.GetMasterIP()
3697 if new_name == old_name and new_ip == old_ip:
3698 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3699 " cluster has changed",
3701 if new_ip != old_ip:
3702 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3703 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3704 " reachable on the network" %
3705 new_ip, errors.ECODE_NOTUNIQUE)
3707 self.op.name = new_name
3709 def Exec(self, feedback_fn):
3710 """Rename the cluster.
3713 clustername = self.op.name
3716 # shutdown the master IP
3717 master_params = self.cfg.GetMasterNetworkParameters()
3718 ems = self.cfg.GetUseExternalMipScript()
3719 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3721 result.Raise("Could not disable the master role")
3724 cluster = self.cfg.GetClusterInfo()
3725 cluster.cluster_name = clustername
3726 cluster.master_ip = new_ip
3727 self.cfg.Update(cluster, feedback_fn)
3729 # update the known hosts file
3730 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3731 node_list = self.cfg.GetOnlineNodeList()
3733 node_list.remove(master_params.name)
3736 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3738 master_params.ip = new_ip
3739 result = self.rpc.call_node_activate_master_ip(master_params.name,
3741 msg = result.fail_msg
3743 self.LogWarning("Could not re-enable the master role on"
3744 " the master, please restart manually: %s", msg)
3749 def _ValidateNetmask(cfg, netmask):
3750 """Checks if a netmask is valid.
3752 @type cfg: L{config.ConfigWriter}
3753 @param cfg: The cluster configuration
3755 @param netmask: the netmask to be verified
3756 @raise errors.OpPrereqError: if the validation fails
3759 ip_family = cfg.GetPrimaryIPFamily()
3761 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3762 except errors.ProgrammerError:
3763 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3765 if not ipcls.ValidateNetmask(netmask):
3766 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3770 class LUClusterSetParams(LogicalUnit):
3771 """Change the parameters of the cluster.
3774 HPATH = "cluster-modify"
3775 HTYPE = constants.HTYPE_CLUSTER
3778 def CheckArguments(self):
3782 if self.op.uid_pool:
3783 uidpool.CheckUidPool(self.op.uid_pool)
3785 if self.op.add_uids:
3786 uidpool.CheckUidPool(self.op.add_uids)
3788 if self.op.remove_uids:
3789 uidpool.CheckUidPool(self.op.remove_uids)
3791 if self.op.master_netmask is not None:
3792 _ValidateNetmask(self.cfg, self.op.master_netmask)
3794 if self.op.diskparams:
3795 for dt_params in self.op.diskparams.values():
3796 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3798 def ExpandNames(self):
3799 # FIXME: in the future maybe other cluster params won't require checking on
3800 # all nodes to be modified.
3801 self.needed_locks = {
3802 locking.LEVEL_NODE: locking.ALL_SET,
3803 locking.LEVEL_INSTANCE: locking.ALL_SET,
3804 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3806 self.share_locks = {
3807 locking.LEVEL_NODE: 1,
3808 locking.LEVEL_INSTANCE: 1,
3809 locking.LEVEL_NODEGROUP: 1,
3812 def BuildHooksEnv(self):
3817 "OP_TARGET": self.cfg.GetClusterName(),
3818 "NEW_VG_NAME": self.op.vg_name,
3821 def BuildHooksNodes(self):
3822 """Build hooks nodes.
3825 mn = self.cfg.GetMasterNode()
3828 def CheckPrereq(self):
3829 """Check prerequisites.
3831 This checks whether the given params don't conflict and
3832 if the given volume group is valid.
3835 if self.op.vg_name is not None and not self.op.vg_name:
3836 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3837 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3838 " instances exist", errors.ECODE_INVAL)
3840 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3841 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3842 raise errors.OpPrereqError("Cannot disable drbd helper while"
3843 " drbd-based instances exist",
3846 node_list = self.owned_locks(locking.LEVEL_NODE)
3848 # if vg_name not None, checks given volume group on all nodes
3850 vglist = self.rpc.call_vg_list(node_list)
3851 for node in node_list:
3852 msg = vglist[node].fail_msg
3854 # ignoring down node
3855 self.LogWarning("Error while gathering data on node %s"
3856 " (ignoring node): %s", node, msg)
3858 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3860 constants.MIN_VG_SIZE)
3862 raise errors.OpPrereqError("Error on node '%s': %s" %
3863 (node, vgstatus), errors.ECODE_ENVIRON)
3865 if self.op.drbd_helper:
3866 # checks given drbd helper on all nodes
3867 helpers = self.rpc.call_drbd_helper(node_list)
3868 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3870 self.LogInfo("Not checking drbd helper on offline node %s", node)
3872 msg = helpers[node].fail_msg
3874 raise errors.OpPrereqError("Error checking drbd helper on node"
3875 " '%s': %s" % (node, msg),
3876 errors.ECODE_ENVIRON)
3877 node_helper = helpers[node].payload
3878 if node_helper != self.op.drbd_helper:
3879 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3880 (node, node_helper), errors.ECODE_ENVIRON)
3882 self.cluster = cluster = self.cfg.GetClusterInfo()
3883 # validate params changes
3884 if self.op.beparams:
3885 objects.UpgradeBeParams(self.op.beparams)
3886 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3887 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3889 if self.op.ndparams:
3890 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3891 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3893 # TODO: we need a more general way to handle resetting
3894 # cluster-level parameters to default values
3895 if self.new_ndparams["oob_program"] == "":
3896 self.new_ndparams["oob_program"] = \
3897 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3899 if self.op.hv_state:
3900 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3901 self.cluster.hv_state_static)
3902 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3903 for hv, values in new_hv_state.items())
3905 if self.op.disk_state:
3906 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3907 self.cluster.disk_state_static)
3908 self.new_disk_state = \
3909 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3910 for name, values in svalues.items()))
3911 for storage, svalues in new_disk_state.items())
3914 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3917 all_instances = self.cfg.GetAllInstancesInfo().values()
3919 for group in self.cfg.GetAllNodeGroupsInfo().values():
3920 instances = frozenset([inst for inst in all_instances
3921 if compat.any(node in group.members
3922 for node in inst.all_nodes)])
3923 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3924 new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3926 new_ipolicy, instances)
3928 violations.update(new)
3931 self.LogWarning("After the ipolicy change the following instances"
3932 " violate them: %s",
3933 utils.CommaJoin(violations))
3935 if self.op.nicparams:
3936 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3937 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3938 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3941 # check all instances for consistency
3942 for instance in self.cfg.GetAllInstancesInfo().values():
3943 for nic_idx, nic in enumerate(instance.nics):
3944 params_copy = copy.deepcopy(nic.nicparams)
3945 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3947 # check parameter syntax
3949 objects.NIC.CheckParameterSyntax(params_filled)
3950 except errors.ConfigurationError, err:
3951 nic_errors.append("Instance %s, nic/%d: %s" %
3952 (instance.name, nic_idx, err))
3954 # if we're moving instances to routed, check that they have an ip
3955 target_mode = params_filled[constants.NIC_MODE]
3956 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3957 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3958 " address" % (instance.name, nic_idx))
3960 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3961 "\n".join(nic_errors))
3963 # hypervisor list/parameters
3964 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3965 if self.op.hvparams:
3966 for hv_name, hv_dict in self.op.hvparams.items():
3967 if hv_name not in self.new_hvparams:
3968 self.new_hvparams[hv_name] = hv_dict
3970 self.new_hvparams[hv_name].update(hv_dict)
3972 # disk template parameters
3973 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3974 if self.op.diskparams:
3975 for dt_name, dt_params in self.op.diskparams.items():
3976 if dt_name not in self.op.diskparams:
3977 self.new_diskparams[dt_name] = dt_params
3979 self.new_diskparams[dt_name].update(dt_params)
3981 # os hypervisor parameters
3982 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3984 for os_name, hvs in self.op.os_hvp.items():
3985 if os_name not in self.new_os_hvp:
3986 self.new_os_hvp[os_name] = hvs
3988 for hv_name, hv_dict in hvs.items():
3989 if hv_name not in self.new_os_hvp[os_name]:
3990 self.new_os_hvp[os_name][hv_name] = hv_dict
3992 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3995 self.new_osp = objects.FillDict(cluster.osparams, {})
3996 if self.op.osparams:
3997 for os_name, osp in self.op.osparams.items():
3998 if os_name not in self.new_osp:
3999 self.new_osp[os_name] = {}
4001 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4004 if not self.new_osp[os_name]:
4005 # we removed all parameters
4006 del self.new_osp[os_name]
4008 # check the parameter validity (remote check)
4009 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4010 os_name, self.new_osp[os_name])
4012 # changes to the hypervisor list
4013 if self.op.enabled_hypervisors is not None:
4014 self.hv_list = self.op.enabled_hypervisors
4015 for hv in self.hv_list:
4016 # if the hypervisor doesn't already exist in the cluster
4017 # hvparams, we initialize it to empty, and then (in both
4018 # cases) we make sure to fill the defaults, as we might not
4019 # have a complete defaults list if the hypervisor wasn't
4021 if hv not in new_hvp:
4023 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4024 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4026 self.hv_list = cluster.enabled_hypervisors
4028 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4029 # either the enabled list has changed, or the parameters have, validate
4030 for hv_name, hv_params in self.new_hvparams.items():
4031 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4032 (self.op.enabled_hypervisors and
4033 hv_name in self.op.enabled_hypervisors)):
4034 # either this is a new hypervisor, or its parameters have changed
4035 hv_class = hypervisor.GetHypervisor(hv_name)
4036 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4037 hv_class.CheckParameterSyntax(hv_params)
4038 _CheckHVParams(self, node_list, hv_name, hv_params)
4041 # no need to check any newly-enabled hypervisors, since the
4042 # defaults have already been checked in the above code-block
4043 for os_name, os_hvp in self.new_os_hvp.items():
4044 for hv_name, hv_params in os_hvp.items():
4045 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4046 # we need to fill in the new os_hvp on top of the actual hv_p
4047 cluster_defaults = self.new_hvparams.get(hv_name, {})
4048 new_osp = objects.FillDict(cluster_defaults, hv_params)
4049 hv_class = hypervisor.GetHypervisor(hv_name)
4050 hv_class.CheckParameterSyntax(new_osp)
4051 _CheckHVParams(self, node_list, hv_name, new_osp)
4053 if self.op.default_iallocator:
4054 alloc_script = utils.FindFile(self.op.default_iallocator,
4055 constants.IALLOCATOR_SEARCH_PATH,
4057 if alloc_script is None:
4058 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4059 " specified" % self.op.default_iallocator,
4062 def Exec(self, feedback_fn):
4063 """Change the parameters of the cluster.
4066 if self.op.vg_name is not None:
4067 new_volume = self.op.vg_name
4070 if new_volume != self.cfg.GetVGName():
4071 self.cfg.SetVGName(new_volume)
4073 feedback_fn("Cluster LVM configuration already in desired"
4074 " state, not changing")
4075 if self.op.drbd_helper is not None:
4076 new_helper = self.op.drbd_helper
4079 if new_helper != self.cfg.GetDRBDHelper():
4080 self.cfg.SetDRBDHelper(new_helper)
4082 feedback_fn("Cluster DRBD helper already in desired state,"
4084 if self.op.hvparams:
4085 self.cluster.hvparams = self.new_hvparams
4087 self.cluster.os_hvp = self.new_os_hvp
4088 if self.op.enabled_hypervisors is not None:
4089 self.cluster.hvparams = self.new_hvparams
4090 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4091 if self.op.beparams:
4092 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4093 if self.op.nicparams:
4094 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4096 self.cluster.ipolicy = self.new_ipolicy
4097 if self.op.osparams:
4098 self.cluster.osparams = self.new_osp
4099 if self.op.ndparams:
4100 self.cluster.ndparams = self.new_ndparams
4101 if self.op.diskparams:
4102 self.cluster.diskparams = self.new_diskparams
4103 if self.op.hv_state:
4104 self.cluster.hv_state_static = self.new_hv_state
4105 if self.op.disk_state:
4106 self.cluster.disk_state_static = self.new_disk_state
4108 if self.op.candidate_pool_size is not None:
4109 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4110 # we need to update the pool size here, otherwise the save will fail
4111 _AdjustCandidatePool(self, [])
4113 if self.op.maintain_node_health is not None:
4114 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4115 feedback_fn("Note: CONFD was disabled at build time, node health"
4116 " maintenance is not useful (still enabling it)")
4117 self.cluster.maintain_node_health = self.op.maintain_node_health
4119 if self.op.prealloc_wipe_disks is not None:
4120 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4122 if self.op.add_uids is not None:
4123 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4125 if self.op.remove_uids is not None:
4126 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4128 if self.op.uid_pool is not None:
4129 self.cluster.uid_pool = self.op.uid_pool
4131 if self.op.default_iallocator is not None:
4132 self.cluster.default_iallocator = self.op.default_iallocator
4134 if self.op.reserved_lvs is not None:
4135 self.cluster.reserved_lvs = self.op.reserved_lvs
4137 if self.op.use_external_mip_script is not None:
4138 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4140 def helper_os(aname, mods, desc):
4142 lst = getattr(self.cluster, aname)
4143 for key, val in mods:
4144 if key == constants.DDM_ADD:
4146 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4149 elif key == constants.DDM_REMOVE:
4153 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4155 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4157 if self.op.hidden_os:
4158 helper_os("hidden_os", self.op.hidden_os, "hidden")
4160 if self.op.blacklisted_os:
4161 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4163 if self.op.master_netdev:
4164 master_params = self.cfg.GetMasterNetworkParameters()
4165 ems = self.cfg.GetUseExternalMipScript()
4166 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4167 self.cluster.master_netdev)
4168 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4170 result.Raise("Could not disable the master ip")
4171 feedback_fn("Changing master_netdev from %s to %s" %
4172 (master_params.netdev, self.op.master_netdev))
4173 self.cluster.master_netdev = self.op.master_netdev
4175 if self.op.master_netmask:
4176 master_params = self.cfg.GetMasterNetworkParameters()
4177 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4178 result = self.rpc.call_node_change_master_netmask(master_params.name,
4179 master_params.netmask,
4180 self.op.master_netmask,
4182 master_params.netdev)
4184 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4187 self.cluster.master_netmask = self.op.master_netmask
4189 self.cfg.Update(self.cluster, feedback_fn)
4191 if self.op.master_netdev:
4192 master_params = self.cfg.GetMasterNetworkParameters()
4193 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4194 self.op.master_netdev)
4195 ems = self.cfg.GetUseExternalMipScript()
4196 result = self.rpc.call_node_activate_master_ip(master_params.name,
4199 self.LogWarning("Could not re-enable the master ip on"
4200 " the master, please restart manually: %s",
4204 def _UploadHelper(lu, nodes, fname):
4205 """Helper for uploading a file and showing warnings.
4208 if os.path.exists(fname):
4209 result = lu.rpc.call_upload_file(nodes, fname)
4210 for to_node, to_result in result.items():
4211 msg = to_result.fail_msg
4213 msg = ("Copy of file %s to node %s failed: %s" %
4214 (fname, to_node, msg))
4215 lu.proc.LogWarning(msg)
4218 def _ComputeAncillaryFiles(cluster, redist):
4219 """Compute files external to Ganeti which need to be consistent.
4221 @type redist: boolean
4222 @param redist: Whether to include files which need to be redistributed
4225 # Compute files for all nodes
4227 constants.SSH_KNOWN_HOSTS_FILE,
4228 constants.CONFD_HMAC_KEY,
4229 constants.CLUSTER_DOMAIN_SECRET_FILE,
4230 constants.SPICE_CERT_FILE,
4231 constants.SPICE_CACERT_FILE,
4232 constants.RAPI_USERS_FILE,
4236 files_all.update(constants.ALL_CERT_FILES)
4237 files_all.update(ssconf.SimpleStore().GetFileList())
4239 # we need to ship at least the RAPI certificate
4240 files_all.add(constants.RAPI_CERT_FILE)
4242 if cluster.modify_etc_hosts:
4243 files_all.add(constants.ETC_HOSTS)
4245 # Files which are optional, these must:
4246 # - be present in one other category as well
4247 # - either exist or not exist on all nodes of that category (mc, vm all)
4249 constants.RAPI_USERS_FILE,
4252 # Files which should only be on master candidates
4256 files_mc.add(constants.CLUSTER_CONF_FILE)
4258 # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4260 files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4262 # Files which should only be on VM-capable nodes
4263 files_vm = set(filename
4264 for hv_name in cluster.enabled_hypervisors
4265 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4267 files_opt |= set(filename
4268 for hv_name in cluster.enabled_hypervisors
4269 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4271 # Filenames in each category must be unique
4272 all_files_set = files_all | files_mc | files_vm
4273 assert (len(all_files_set) ==
4274 sum(map(len, [files_all, files_mc, files_vm]))), \
4275 "Found file listed in more than one file list"
4277 # Optional files must be present in one other category
4278 assert all_files_set.issuperset(files_opt), \
4279 "Optional file not in a different required list"
4281 return (files_all, files_opt, files_mc, files_vm)
4284 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4285 """Distribute additional files which are part of the cluster configuration.
4287 ConfigWriter takes care of distributing the config and ssconf files, but
4288 there are more files which should be distributed to all nodes. This function
4289 makes sure those are copied.
4291 @param lu: calling logical unit
4292 @param additional_nodes: list of nodes not in the config to distribute to
4293 @type additional_vm: boolean
4294 @param additional_vm: whether the additional nodes are vm-capable or not
4297 # Gather target nodes
4298 cluster = lu.cfg.GetClusterInfo()
4299 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4301 online_nodes = lu.cfg.GetOnlineNodeList()
4302 vm_nodes = lu.cfg.GetVmCapableNodeList()
4304 if additional_nodes is not None:
4305 online_nodes.extend(additional_nodes)
4307 vm_nodes.extend(additional_nodes)
4309 # Never distribute to master node
4310 for nodelist in [online_nodes, vm_nodes]:
4311 if master_info.name in nodelist:
4312 nodelist.remove(master_info.name)
4315 (files_all, _, files_mc, files_vm) = \
4316 _ComputeAncillaryFiles(cluster, True)
4318 # Never re-distribute configuration file from here
4319 assert not (constants.CLUSTER_CONF_FILE in files_all or
4320 constants.CLUSTER_CONF_FILE in files_vm)
4321 assert not files_mc, "Master candidates not handled in this function"
4324 (online_nodes, files_all),
4325 (vm_nodes, files_vm),
4329 for (node_list, files) in filemap:
4331 _UploadHelper(lu, node_list, fname)
4334 class LUClusterRedistConf(NoHooksLU):
4335 """Force the redistribution of cluster configuration.
4337 This is a very simple LU.
4342 def ExpandNames(self):
4343 self.needed_locks = {
4344 locking.LEVEL_NODE: locking.ALL_SET,
4346 self.share_locks[locking.LEVEL_NODE] = 1
4348 def Exec(self, feedback_fn):
4349 """Redistribute the configuration.
4352 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4353 _RedistributeAncillaryFiles(self)
4356 class LUClusterActivateMasterIp(NoHooksLU):
4357 """Activate the master IP on the master node.
4360 def Exec(self, feedback_fn):
4361 """Activate the master IP.
4364 master_params = self.cfg.GetMasterNetworkParameters()
4365 ems = self.cfg.GetUseExternalMipScript()
4366 result = self.rpc.call_node_activate_master_ip(master_params.name,
4368 result.Raise("Could not activate the master IP")
4371 class LUClusterDeactivateMasterIp(NoHooksLU):
4372 """Deactivate the master IP on the master node.
4375 def Exec(self, feedback_fn):
4376 """Deactivate the master IP.
4379 master_params = self.cfg.GetMasterNetworkParameters()
4380 ems = self.cfg.GetUseExternalMipScript()
4381 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4383 result.Raise("Could not deactivate the master IP")
4386 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4387 """Sleep and poll for an instance's disk to sync.
4390 if not instance.disks or disks is not None and not disks:
4393 disks = _ExpandCheckDisks(instance, disks)
4396 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4398 node = instance.primary_node
4401 lu.cfg.SetDiskID(dev, node)
4403 # TODO: Convert to utils.Retry
4406 degr_retries = 10 # in seconds, as we sleep 1 second each time
4410 cumul_degraded = False
4411 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4412 msg = rstats.fail_msg
4414 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4417 raise errors.RemoteError("Can't contact node %s for mirror data,"
4418 " aborting." % node)
4421 rstats = rstats.payload
4423 for i, mstat in enumerate(rstats):
4425 lu.LogWarning("Can't compute data for node %s/%s",
4426 node, disks[i].iv_name)
4429 cumul_degraded = (cumul_degraded or
4430 (mstat.is_degraded and mstat.sync_percent is None))
4431 if mstat.sync_percent is not None:
4433 if mstat.estimated_time is not None:
4434 rem_time = ("%s remaining (estimated)" %
4435 utils.FormatSeconds(mstat.estimated_time))
4436 max_time = mstat.estimated_time
4438 rem_time = "no time estimate"
4439 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4440 (disks[i].iv_name, mstat.sync_percent, rem_time))
4442 # if we're done but degraded, let's do a few small retries, to
4443 # make sure we see a stable and not transient situation; therefore
4444 # we force restart of the loop
4445 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4446 logging.info("Degraded disks found, %d retries left", degr_retries)
4454 time.sleep(min(60, max_time))
4457 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4458 return not cumul_degraded
4461 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4462 """Check that mirrors are not degraded.
4464 The ldisk parameter, if True, will change the test from the
4465 is_degraded attribute (which represents overall non-ok status for
4466 the device(s)) to the ldisk (representing the local storage status).
4469 lu.cfg.SetDiskID(dev, node)
4473 if on_primary or dev.AssembleOnSecondary():
4474 rstats = lu.rpc.call_blockdev_find(node, dev)
4475 msg = rstats.fail_msg
4477 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4479 elif not rstats.payload:
4480 lu.LogWarning("Can't find disk on node %s", node)
4484 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4486 result = result and not rstats.payload.is_degraded
4489 for child in dev.children:
4490 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4495 class LUOobCommand(NoHooksLU):
4496 """Logical unit for OOB handling.
4500 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4502 def ExpandNames(self):
4503 """Gather locks we need.
4506 if self.op.node_names:
4507 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4508 lock_names = self.op.node_names
4510 lock_names = locking.ALL_SET
4512 self.needed_locks = {
4513 locking.LEVEL_NODE: lock_names,
4516 def CheckPrereq(self):
4517 """Check prerequisites.
4520 - the node exists in the configuration
4523 Any errors are signaled by raising errors.OpPrereqError.
4527 self.master_node = self.cfg.GetMasterNode()
4529 assert self.op.power_delay >= 0.0
4531 if self.op.node_names:
4532 if (self.op.command in self._SKIP_MASTER and
4533 self.master_node in self.op.node_names):
4534 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4535 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4537 if master_oob_handler:
4538 additional_text = ("run '%s %s %s' if you want to operate on the"
4539 " master regardless") % (master_oob_handler,
4543 additional_text = "it does not support out-of-band operations"
4545 raise errors.OpPrereqError(("Operating on the master node %s is not"
4546 " allowed for %s; %s") %
4547 (self.master_node, self.op.command,
4548 additional_text), errors.ECODE_INVAL)
4550 self.op.node_names = self.cfg.GetNodeList()
4551 if self.op.command in self._SKIP_MASTER:
4552 self.op.node_names.remove(self.master_node)
4554 if self.op.command in self._SKIP_MASTER:
4555 assert self.master_node not in self.op.node_names
4557 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4559 raise errors.OpPrereqError("Node %s not found" % node_name,
4562 self.nodes.append(node)
4564 if (not self.op.ignore_status and
4565 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4566 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4567 " not marked offline") % node_name,
4570 def Exec(self, feedback_fn):
4571 """Execute OOB and return result if we expect any.
4574 master_node = self.master_node
4577 for idx, node in enumerate(utils.NiceSort(self.nodes,
4578 key=lambda node: node.name)):
4579 node_entry = [(constants.RS_NORMAL, node.name)]
4580 ret.append(node_entry)
4582 oob_program = _SupportsOob(self.cfg, node)
4585 node_entry.append((constants.RS_UNAVAIL, None))
4588 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4589 self.op.command, oob_program, node.name)
4590 result = self.rpc.call_run_oob(master_node, oob_program,
4591 self.op.command, node.name,
4595 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4596 node.name, result.fail_msg)
4597 node_entry.append((constants.RS_NODATA, None))
4600 self._CheckPayload(result)
4601 except errors.OpExecError, err:
4602 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4604 node_entry.append((constants.RS_NODATA, None))
4606 if self.op.command == constants.OOB_HEALTH:
4607 # For health we should log important events
4608 for item, status in result.payload:
4609 if status in [constants.OOB_STATUS_WARNING,
4610 constants.OOB_STATUS_CRITICAL]:
4611 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4612 item, node.name, status)
4614 if self.op.command == constants.OOB_POWER_ON:
4616 elif self.op.command == constants.OOB_POWER_OFF:
4617 node.powered = False
4618 elif self.op.command == constants.OOB_POWER_STATUS:
4619 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4620 if powered != node.powered:
4621 logging.warning(("Recorded power state (%s) of node '%s' does not"
4622 " match actual power state (%s)"), node.powered,
4625 # For configuration changing commands we should update the node
4626 if self.op.command in (constants.OOB_POWER_ON,
4627 constants.OOB_POWER_OFF):
4628 self.cfg.Update(node, feedback_fn)
4630 node_entry.append((constants.RS_NORMAL, result.payload))
4632 if (self.op.command == constants.OOB_POWER_ON and
4633 idx < len(self.nodes) - 1):
4634 time.sleep(self.op.power_delay)
4638 def _CheckPayload(self, result):
4639 """Checks if the payload is valid.
4641 @param result: RPC result
4642 @raises errors.OpExecError: If payload is not valid
4646 if self.op.command == constants.OOB_HEALTH:
4647 if not isinstance(result.payload, list):
4648 errs.append("command 'health' is expected to return a list but got %s" %
4649 type(result.payload))
4651 for item, status in result.payload:
4652 if status not in constants.OOB_STATUSES:
4653 errs.append("health item '%s' has invalid status '%s'" %
4656 if self.op.command == constants.OOB_POWER_STATUS:
4657 if not isinstance(result.payload, dict):
4658 errs.append("power-status is expected to return a dict but got %s" %
4659 type(result.payload))
4661 if self.op.command in [
4662 constants.OOB_POWER_ON,
4663 constants.OOB_POWER_OFF,
4664 constants.OOB_POWER_CYCLE,
4666 if result.payload is not None:
4667 errs.append("%s is expected to not return payload but got '%s'" %
4668 (self.op.command, result.payload))
4671 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4672 utils.CommaJoin(errs))
4675 class _OsQuery(_QueryBase):
4676 FIELDS = query.OS_FIELDS
4678 def ExpandNames(self, lu):
4679 # Lock all nodes in shared mode
4680 # Temporary removal of locks, should be reverted later
4681 # TODO: reintroduce locks when they are lighter-weight
4682 lu.needed_locks = {}
4683 #self.share_locks[locking.LEVEL_NODE] = 1
4684 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4686 # The following variables interact with _QueryBase._GetNames
4688 self.wanted = self.names
4690 self.wanted = locking.ALL_SET
4692 self.do_locking = self.use_locking
4694 def DeclareLocks(self, lu, level):
4698 def _DiagnoseByOS(rlist):
4699 """Remaps a per-node return list into an a per-os per-node dictionary
4701 @param rlist: a map with node names as keys and OS objects as values
4704 @return: a dictionary with osnames as keys and as value another
4705 map, with nodes as keys and tuples of (path, status, diagnose,
4706 variants, parameters, api_versions) as values, eg::
4708 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4709 (/srv/..., False, "invalid api")],
4710 "node2": [(/srv/..., True, "", [], [])]}
4715 # we build here the list of nodes that didn't fail the RPC (at RPC
4716 # level), so that nodes with a non-responding node daemon don't
4717 # make all OSes invalid
4718 good_nodes = [node_name for node_name in rlist
4719 if not rlist[node_name].fail_msg]
4720 for node_name, nr in rlist.items():
4721 if nr.fail_msg or not nr.payload:
4723 for (name, path, status, diagnose, variants,
4724 params, api_versions) in nr.payload:
4725 if name not in all_os:
4726 # build a list of nodes for this os containing empty lists
4727 # for each node in node_list
4729 for nname in good_nodes:
4730 all_os[name][nname] = []
4731 # convert params from [name, help] to (name, help)
4732 params = [tuple(v) for v in params]
4733 all_os[name][node_name].append((path, status, diagnose,
4734 variants, params, api_versions))
4737 def _GetQueryData(self, lu):
4738 """Computes the list of nodes and their attributes.
4741 # Locking is not used
4742 assert not (compat.any(lu.glm.is_owned(level)
4743 for level in locking.LEVELS
4744 if level != locking.LEVEL_CLUSTER) or
4745 self.do_locking or self.use_locking)
4747 valid_nodes = [node.name
4748 for node in lu.cfg.GetAllNodesInfo().values()
4749 if not node.offline and node.vm_capable]
4750 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4751 cluster = lu.cfg.GetClusterInfo()
4755 for (os_name, os_data) in pol.items():
4756 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4757 hidden=(os_name in cluster.hidden_os),
4758 blacklisted=(os_name in cluster.blacklisted_os))
4762 api_versions = set()
4764 for idx, osl in enumerate(os_data.values()):
4765 info.valid = bool(info.valid and osl and osl[0][1])
4769 (node_variants, node_params, node_api) = osl[0][3:6]
4772 variants.update(node_variants)
4773 parameters.update(node_params)
4774 api_versions.update(node_api)
4776 # Filter out inconsistent values
4777 variants.intersection_update(node_variants)
4778 parameters.intersection_update(node_params)
4779 api_versions.intersection_update(node_api)
4781 info.variants = list(variants)
4782 info.parameters = list(parameters)
4783 info.api_versions = list(api_versions)
4785 data[os_name] = info
4787 # Prepare data in requested order
4788 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4792 class LUOsDiagnose(NoHooksLU):
4793 """Logical unit for OS diagnose/query.
4799 def _BuildFilter(fields, names):
4800 """Builds a filter for querying OSes.
4803 name_filter = qlang.MakeSimpleFilter("name", names)
4805 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4806 # respective field is not requested
4807 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4808 for fname in ["hidden", "blacklisted"]
4809 if fname not in fields]
4810 if "valid" not in fields:
4811 status_filter.append([qlang.OP_TRUE, "valid"])
4814 status_filter.insert(0, qlang.OP_AND)
4816 status_filter = None
4818 if name_filter and status_filter:
4819 return [qlang.OP_AND, name_filter, status_filter]
4823 return status_filter
4825 def CheckArguments(self):
4826 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4827 self.op.output_fields, False)
4829 def ExpandNames(self):
4830 self.oq.ExpandNames(self)
4832 def Exec(self, feedback_fn):
4833 return self.oq.OldStyleQuery(self)
4836 class LUNodeRemove(LogicalUnit):
4837 """Logical unit for removing a node.
4840 HPATH = "node-remove"
4841 HTYPE = constants.HTYPE_NODE
4843 def BuildHooksEnv(self):
4846 This doesn't run on the target node in the pre phase as a failed
4847 node would then be impossible to remove.
4851 "OP_TARGET": self.op.node_name,
4852 "NODE_NAME": self.op.node_name,
4855 def BuildHooksNodes(self):
4856 """Build hooks nodes.
4859 all_nodes = self.cfg.GetNodeList()
4861 all_nodes.remove(self.op.node_name)
4863 logging.warning("Node '%s', which is about to be removed, was not found"
4864 " in the list of all nodes", self.op.node_name)
4865 return (all_nodes, all_nodes)
4867 def CheckPrereq(self):
4868 """Check prerequisites.
4871 - the node exists in the configuration
4872 - it does not have primary or secondary instances
4873 - it's not the master
4875 Any errors are signaled by raising errors.OpPrereqError.
4878 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4879 node = self.cfg.GetNodeInfo(self.op.node_name)
4880 assert node is not None
4882 masternode = self.cfg.GetMasterNode()
4883 if node.name == masternode:
4884 raise errors.OpPrereqError("Node is the master node, failover to another"
4885 " node is required", errors.ECODE_INVAL)
4887 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4888 if node.name in instance.all_nodes:
4889 raise errors.OpPrereqError("Instance %s is still running on the node,"
4890 " please remove first" % instance_name,
4892 self.op.node_name = node.name
4895 def Exec(self, feedback_fn):
4896 """Removes the node from the cluster.
4900 logging.info("Stopping the node daemon and removing configs from node %s",
4903 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4905 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4908 # Promote nodes to master candidate as needed
4909 _AdjustCandidatePool(self, exceptions=[node.name])
4910 self.context.RemoveNode(node.name)
4912 # Run post hooks on the node before it's removed
4913 _RunPostHook(self, node.name)
4915 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4916 msg = result.fail_msg
4918 self.LogWarning("Errors encountered on the remote node while leaving"
4919 " the cluster: %s", msg)
4921 # Remove node from our /etc/hosts
4922 if self.cfg.GetClusterInfo().modify_etc_hosts:
4923 master_node = self.cfg.GetMasterNode()
4924 result = self.rpc.call_etc_hosts_modify(master_node,
4925 constants.ETC_HOSTS_REMOVE,
4927 result.Raise("Can't update hosts file with new host data")
4928 _RedistributeAncillaryFiles(self)
4931 class _NodeQuery(_QueryBase):
4932 FIELDS = query.NODE_FIELDS
4934 def ExpandNames(self, lu):
4935 lu.needed_locks = {}
4936 lu.share_locks = _ShareAll()
4939 self.wanted = _GetWantedNodes(lu, self.names)
4941 self.wanted = locking.ALL_SET
4943 self.do_locking = (self.use_locking and
4944 query.NQ_LIVE in self.requested_data)
4947 # If any non-static field is requested we need to lock the nodes
4948 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4950 def DeclareLocks(self, lu, level):
4953 def _GetQueryData(self, lu):
4954 """Computes the list of nodes and their attributes.
4957 all_info = lu.cfg.GetAllNodesInfo()
4959 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4961 # Gather data as requested
4962 if query.NQ_LIVE in self.requested_data:
4963 # filter out non-vm_capable nodes
4964 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4966 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4967 [lu.cfg.GetHypervisorType()])
4968 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4969 for (name, nresult) in node_data.items()
4970 if not nresult.fail_msg and nresult.payload)
4974 if query.NQ_INST in self.requested_data:
4975 node_to_primary = dict([(name, set()) for name in nodenames])
4976 node_to_secondary = dict([(name, set()) for name in nodenames])
4978 inst_data = lu.cfg.GetAllInstancesInfo()
4980 for inst in inst_data.values():
4981 if inst.primary_node in node_to_primary:
4982 node_to_primary[inst.primary_node].add(inst.name)
4983 for secnode in inst.secondary_nodes:
4984 if secnode in node_to_secondary:
4985 node_to_secondary[secnode].add(inst.name)
4987 node_to_primary = None
4988 node_to_secondary = None
4990 if query.NQ_OOB in self.requested_data:
4991 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4992 for name, node in all_info.iteritems())
4996 if query.NQ_GROUP in self.requested_data:
4997 groups = lu.cfg.GetAllNodeGroupsInfo()
5001 return query.NodeQueryData([all_info[name] for name in nodenames],
5002 live_data, lu.cfg.GetMasterNode(),
5003 node_to_primary, node_to_secondary, groups,
5004 oob_support, lu.cfg.GetClusterInfo())
5007 class LUNodeQuery(NoHooksLU):
5008 """Logical unit for querying nodes.
5011 # pylint: disable=W0142
5014 def CheckArguments(self):
5015 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5016 self.op.output_fields, self.op.use_locking)
5018 def ExpandNames(self):
5019 self.nq.ExpandNames(self)
5021 def DeclareLocks(self, level):
5022 self.nq.DeclareLocks(self, level)
5024 def Exec(self, feedback_fn):
5025 return self.nq.OldStyleQuery(self)
5028 class LUNodeQueryvols(NoHooksLU):
5029 """Logical unit for getting volumes on node(s).
5033 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5034 _FIELDS_STATIC = utils.FieldSet("node")
5036 def CheckArguments(self):
5037 _CheckOutputFields(static=self._FIELDS_STATIC,
5038 dynamic=self._FIELDS_DYNAMIC,
5039 selected=self.op.output_fields)
5041 def ExpandNames(self):
5042 self.share_locks = _ShareAll()
5043 self.needed_locks = {}
5045 if not self.op.nodes:
5046 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5048 self.needed_locks[locking.LEVEL_NODE] = \
5049 _GetWantedNodes(self, self.op.nodes)
5051 def Exec(self, feedback_fn):
5052 """Computes the list of nodes and their attributes.
5055 nodenames = self.owned_locks(locking.LEVEL_NODE)
5056 volumes = self.rpc.call_node_volumes(nodenames)
5058 ilist = self.cfg.GetAllInstancesInfo()
5059 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5062 for node in nodenames:
5063 nresult = volumes[node]
5066 msg = nresult.fail_msg
5068 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5071 node_vols = sorted(nresult.payload,
5072 key=operator.itemgetter("dev"))
5074 for vol in node_vols:
5076 for field in self.op.output_fields:
5079 elif field == "phys":
5083 elif field == "name":
5085 elif field == "size":
5086 val = int(float(vol["size"]))
5087 elif field == "instance":
5088 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5090 raise errors.ParameterError(field)
5091 node_output.append(str(val))
5093 output.append(node_output)
5098 class LUNodeQueryStorage(NoHooksLU):
5099 """Logical unit for getting information on storage units on node(s).
5102 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5105 def CheckArguments(self):
5106 _CheckOutputFields(static=self._FIELDS_STATIC,
5107 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5108 selected=self.op.output_fields)
5110 def ExpandNames(self):
5111 self.share_locks = _ShareAll()
5112 self.needed_locks = {}
5115 self.needed_locks[locking.LEVEL_NODE] = \
5116 _GetWantedNodes(self, self.op.nodes)
5118 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5120 def Exec(self, feedback_fn):
5121 """Computes the list of nodes and their attributes.
5124 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5126 # Always get name to sort by
5127 if constants.SF_NAME in self.op.output_fields:
5128 fields = self.op.output_fields[:]
5130 fields = [constants.SF_NAME] + self.op.output_fields
5132 # Never ask for node or type as it's only known to the LU
5133 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5134 while extra in fields:
5135 fields.remove(extra)
5137 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5138 name_idx = field_idx[constants.SF_NAME]
5140 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5141 data = self.rpc.call_storage_list(self.nodes,
5142 self.op.storage_type, st_args,
5143 self.op.name, fields)
5147 for node in utils.NiceSort(self.nodes):
5148 nresult = data[node]
5152 msg = nresult.fail_msg
5154 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5157 rows = dict([(row[name_idx], row) for row in nresult.payload])
5159 for name in utils.NiceSort(rows.keys()):
5164 for field in self.op.output_fields:
5165 if field == constants.SF_NODE:
5167 elif field == constants.SF_TYPE:
5168 val = self.op.storage_type
5169 elif field in field_idx:
5170 val = row[field_idx[field]]
5172 raise errors.ParameterError(field)
5181 class _InstanceQuery(_QueryBase):
5182 FIELDS = query.INSTANCE_FIELDS
5184 def ExpandNames(self, lu):
5185 lu.needed_locks = {}
5186 lu.share_locks = _ShareAll()
5189 self.wanted = _GetWantedInstances(lu, self.names)
5191 self.wanted = locking.ALL_SET
5193 self.do_locking = (self.use_locking and
5194 query.IQ_LIVE in self.requested_data)
5196 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5197 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5198 lu.needed_locks[locking.LEVEL_NODE] = []
5199 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5201 self.do_grouplocks = (self.do_locking and
5202 query.IQ_NODES in self.requested_data)
5204 def DeclareLocks(self, lu, level):
5206 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5207 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5209 # Lock all groups used by instances optimistically; this requires going
5210 # via the node before it's locked, requiring verification later on
5211 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5213 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5214 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5215 elif level == locking.LEVEL_NODE:
5216 lu._LockInstancesNodes() # pylint: disable=W0212
5219 def _CheckGroupLocks(lu):
5220 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5221 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5223 # Check if node groups for locked instances are still correct
5224 for instance_name in owned_instances:
5225 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5227 def _GetQueryData(self, lu):
5228 """Computes the list of instances and their attributes.
5231 if self.do_grouplocks:
5232 self._CheckGroupLocks(lu)
5234 cluster = lu.cfg.GetClusterInfo()
5235 all_info = lu.cfg.GetAllInstancesInfo()
5237 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5239 instance_list = [all_info[name] for name in instance_names]
5240 nodes = frozenset(itertools.chain(*(inst.all_nodes
5241 for inst in instance_list)))
5242 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5245 wrongnode_inst = set()
5247 # Gather data as requested
5248 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5250 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5252 result = node_data[name]
5254 # offline nodes will be in both lists
5255 assert result.fail_msg
5256 offline_nodes.append(name)
5258 bad_nodes.append(name)
5259 elif result.payload:
5260 for inst in result.payload:
5261 if inst in all_info:
5262 if all_info[inst].primary_node == name:
5263 live_data.update(result.payload)
5265 wrongnode_inst.add(inst)
5267 # orphan instance; we don't list it here as we don't
5268 # handle this case yet in the output of instance listing
5269 logging.warning("Orphan instance '%s' found on node %s",
5271 # else no instance is alive
5275 if query.IQ_DISKUSAGE in self.requested_data:
5276 disk_usage = dict((inst.name,
5277 _ComputeDiskSize(inst.disk_template,
5278 [{constants.IDISK_SIZE: disk.size}
5279 for disk in inst.disks]))
5280 for inst in instance_list)
5284 if query.IQ_CONSOLE in self.requested_data:
5286 for inst in instance_list:
5287 if inst.name in live_data:
5288 # Instance is running
5289 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5291 consinfo[inst.name] = None
5292 assert set(consinfo.keys()) == set(instance_names)
5296 if query.IQ_NODES in self.requested_data:
5297 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5299 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5300 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5301 for uuid in set(map(operator.attrgetter("group"),
5307 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5308 disk_usage, offline_nodes, bad_nodes,
5309 live_data, wrongnode_inst, consinfo,
5313 class LUQuery(NoHooksLU):
5314 """Query for resources/items of a certain kind.
5317 # pylint: disable=W0142
5320 def CheckArguments(self):
5321 qcls = _GetQueryImplementation(self.op.what)
5323 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5325 def ExpandNames(self):
5326 self.impl.ExpandNames(self)
5328 def DeclareLocks(self, level):
5329 self.impl.DeclareLocks(self, level)
5331 def Exec(self, feedback_fn):
5332 return self.impl.NewStyleQuery(self)
5335 class LUQueryFields(NoHooksLU):
5336 """Query for resources/items of a certain kind.
5339 # pylint: disable=W0142
5342 def CheckArguments(self):
5343 self.qcls = _GetQueryImplementation(self.op.what)
5345 def ExpandNames(self):
5346 self.needed_locks = {}
5348 def Exec(self, feedback_fn):
5349 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5352 class LUNodeModifyStorage(NoHooksLU):
5353 """Logical unit for modifying a storage volume on a node.
5358 def CheckArguments(self):
5359 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5361 storage_type = self.op.storage_type
5364 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5366 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5367 " modified" % storage_type,
5370 diff = set(self.op.changes.keys()) - modifiable
5372 raise errors.OpPrereqError("The following fields can not be modified for"
5373 " storage units of type '%s': %r" %
5374 (storage_type, list(diff)),
5377 def ExpandNames(self):
5378 self.needed_locks = {
5379 locking.LEVEL_NODE: self.op.node_name,
5382 def Exec(self, feedback_fn):
5383 """Computes the list of nodes and their attributes.
5386 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5387 result = self.rpc.call_storage_modify(self.op.node_name,
5388 self.op.storage_type, st_args,
5389 self.op.name, self.op.changes)
5390 result.Raise("Failed to modify storage unit '%s' on %s" %
5391 (self.op.name, self.op.node_name))
5394 class LUNodeAdd(LogicalUnit):
5395 """Logical unit for adding node to the cluster.
5399 HTYPE = constants.HTYPE_NODE
5400 _NFLAGS = ["master_capable", "vm_capable"]
5402 def CheckArguments(self):
5403 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5404 # validate/normalize the node name
5405 self.hostname = netutils.GetHostname(name=self.op.node_name,
5406 family=self.primary_ip_family)
5407 self.op.node_name = self.hostname.name
5409 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5410 raise errors.OpPrereqError("Cannot readd the master node",
5413 if self.op.readd and self.op.group:
5414 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5415 " being readded", errors.ECODE_INVAL)
5417 def BuildHooksEnv(self):
5420 This will run on all nodes before, and on all nodes + the new node after.
5424 "OP_TARGET": self.op.node_name,
5425 "NODE_NAME": self.op.node_name,
5426 "NODE_PIP": self.op.primary_ip,
5427 "NODE_SIP": self.op.secondary_ip,
5428 "MASTER_CAPABLE": str(self.op.master_capable),
5429 "VM_CAPABLE": str(self.op.vm_capable),
5432 def BuildHooksNodes(self):
5433 """Build hooks nodes.
5436 # Exclude added node
5437 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5438 post_nodes = pre_nodes + [self.op.node_name, ]
5440 return (pre_nodes, post_nodes)
5442 def CheckPrereq(self):
5443 """Check prerequisites.
5446 - the new node is not already in the config
5448 - its parameters (single/dual homed) matches the cluster
5450 Any errors are signaled by raising errors.OpPrereqError.
5454 hostname = self.hostname
5455 node = hostname.name
5456 primary_ip = self.op.primary_ip = hostname.ip
5457 if self.op.secondary_ip is None:
5458 if self.primary_ip_family == netutils.IP6Address.family:
5459 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5460 " IPv4 address must be given as secondary",
5462 self.op.secondary_ip = primary_ip
5464 secondary_ip = self.op.secondary_ip
5465 if not netutils.IP4Address.IsValid(secondary_ip):
5466 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5467 " address" % secondary_ip, errors.ECODE_INVAL)
5469 node_list = cfg.GetNodeList()
5470 if not self.op.readd and node in node_list:
5471 raise errors.OpPrereqError("Node %s is already in the configuration" %
5472 node, errors.ECODE_EXISTS)
5473 elif self.op.readd and node not in node_list:
5474 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5477 self.changed_primary_ip = False
5479 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5480 if self.op.readd and node == existing_node_name:
5481 if existing_node.secondary_ip != secondary_ip:
5482 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5483 " address configuration as before",
5485 if existing_node.primary_ip != primary_ip:
5486 self.changed_primary_ip = True
5490 if (existing_node.primary_ip == primary_ip or
5491 existing_node.secondary_ip == primary_ip or
5492 existing_node.primary_ip == secondary_ip or
5493 existing_node.secondary_ip == secondary_ip):
5494 raise errors.OpPrereqError("New node ip address(es) conflict with"
5495 " existing node %s" % existing_node.name,
5496 errors.ECODE_NOTUNIQUE)
5498 # After this 'if' block, None is no longer a valid value for the
5499 # _capable op attributes
5501 old_node = self.cfg.GetNodeInfo(node)
5502 assert old_node is not None, "Can't retrieve locked node %s" % node
5503 for attr in self._NFLAGS:
5504 if getattr(self.op, attr) is None:
5505 setattr(self.op, attr, getattr(old_node, attr))
5507 for attr in self._NFLAGS:
5508 if getattr(self.op, attr) is None:
5509 setattr(self.op, attr, True)
5511 if self.op.readd and not self.op.vm_capable:
5512 pri, sec = cfg.GetNodeInstances(node)
5514 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5515 " flag set to false, but it already holds"
5516 " instances" % node,
5519 # check that the type of the node (single versus dual homed) is the
5520 # same as for the master
5521 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5522 master_singlehomed = myself.secondary_ip == myself.primary_ip
5523 newbie_singlehomed = secondary_ip == primary_ip
5524 if master_singlehomed != newbie_singlehomed:
5525 if master_singlehomed:
5526 raise errors.OpPrereqError("The master has no secondary ip but the"
5527 " new node has one",
5530 raise errors.OpPrereqError("The master has a secondary ip but the"
5531 " new node doesn't have one",
5534 # checks reachability
5535 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5536 raise errors.OpPrereqError("Node not reachable by ping",
5537 errors.ECODE_ENVIRON)
5539 if not newbie_singlehomed:
5540 # check reachability from my secondary ip to newbie's secondary ip
5541 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5542 source=myself.secondary_ip):
5543 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5544 " based ping to node daemon port",
5545 errors.ECODE_ENVIRON)
5552 if self.op.master_capable:
5553 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5555 self.master_candidate = False
5558 self.new_node = old_node
5560 node_group = cfg.LookupNodeGroup(self.op.group)
5561 self.new_node = objects.Node(name=node,
5562 primary_ip=primary_ip,
5563 secondary_ip=secondary_ip,
5564 master_candidate=self.master_candidate,
5565 offline=False, drained=False,
5568 if self.op.ndparams:
5569 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5571 if self.op.hv_state:
5572 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5574 if self.op.disk_state:
5575 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5577 def Exec(self, feedback_fn):
5578 """Adds the new node to the cluster.
5581 new_node = self.new_node
5582 node = new_node.name
5584 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5587 # We adding a new node so we assume it's powered
5588 new_node.powered = True
5590 # for re-adds, reset the offline/drained/master-candidate flags;
5591 # we need to reset here, otherwise offline would prevent RPC calls
5592 # later in the procedure; this also means that if the re-add
5593 # fails, we are left with a non-offlined, broken node
5595 new_node.drained = new_node.offline = False # pylint: disable=W0201
5596 self.LogInfo("Readding a node, the offline/drained flags were reset")
5597 # if we demote the node, we do cleanup later in the procedure
5598 new_node.master_candidate = self.master_candidate
5599 if self.changed_primary_ip:
5600 new_node.primary_ip = self.op.primary_ip
5602 # copy the master/vm_capable flags
5603 for attr in self._NFLAGS:
5604 setattr(new_node, attr, getattr(self.op, attr))
5606 # notify the user about any possible mc promotion
5607 if new_node.master_candidate:
5608 self.LogInfo("Node will be a master candidate")
5610 if self.op.ndparams:
5611 new_node.ndparams = self.op.ndparams
5613 new_node.ndparams = {}
5615 if self.op.hv_state:
5616 new_node.hv_state_static = self.new_hv_state
5618 if self.op.disk_state:
5619 new_node.disk_state_static = self.new_disk_state
5621 # check connectivity
5622 result = self.rpc.call_version([node])[node]
5623 result.Raise("Can't get version information from node %s" % node)
5624 if constants.PROTOCOL_VERSION == result.payload:
5625 logging.info("Communication to node %s fine, sw version %s match",
5626 node, result.payload)
5628 raise errors.OpExecError("Version mismatch master version %s,"
5629 " node version %s" %
5630 (constants.PROTOCOL_VERSION, result.payload))
5632 # Add node to our /etc/hosts, and add key to known_hosts
5633 if self.cfg.GetClusterInfo().modify_etc_hosts:
5634 master_node = self.cfg.GetMasterNode()
5635 result = self.rpc.call_etc_hosts_modify(master_node,
5636 constants.ETC_HOSTS_ADD,
5639 result.Raise("Can't update hosts file with new host data")
5641 if new_node.secondary_ip != new_node.primary_ip:
5642 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5645 node_verify_list = [self.cfg.GetMasterNode()]
5646 node_verify_param = {
5647 constants.NV_NODELIST: ([node], {}),
5648 # TODO: do a node-net-test as well?
5651 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5652 self.cfg.GetClusterName())
5653 for verifier in node_verify_list:
5654 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5655 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5657 for failed in nl_payload:
5658 feedback_fn("ssh/hostname verification failed"
5659 " (checking from %s): %s" %
5660 (verifier, nl_payload[failed]))
5661 raise errors.OpExecError("ssh/hostname verification failed")
5664 _RedistributeAncillaryFiles(self)
5665 self.context.ReaddNode(new_node)
5666 # make sure we redistribute the config
5667 self.cfg.Update(new_node, feedback_fn)
5668 # and make sure the new node will not have old files around
5669 if not new_node.master_candidate:
5670 result = self.rpc.call_node_demote_from_mc(new_node.name)
5671 msg = result.fail_msg
5673 self.LogWarning("Node failed to demote itself from master"
5674 " candidate status: %s" % msg)
5676 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5677 additional_vm=self.op.vm_capable)
5678 self.context.AddNode(new_node, self.proc.GetECId())
5681 class LUNodeSetParams(LogicalUnit):
5682 """Modifies the parameters of a node.
5684 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5685 to the node role (as _ROLE_*)
5686 @cvar _R2F: a dictionary from node role to tuples of flags
5687 @cvar _FLAGS: a list of attribute names corresponding to the flags
5690 HPATH = "node-modify"
5691 HTYPE = constants.HTYPE_NODE
5693 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5695 (True, False, False): _ROLE_CANDIDATE,
5696 (False, True, False): _ROLE_DRAINED,
5697 (False, False, True): _ROLE_OFFLINE,
5698 (False, False, False): _ROLE_REGULAR,
5700 _R2F = dict((v, k) for k, v in _F2R.items())
5701 _FLAGS = ["master_candidate", "drained", "offline"]
5703 def CheckArguments(self):
5704 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5705 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5706 self.op.master_capable, self.op.vm_capable,
5707 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5709 if all_mods.count(None) == len(all_mods):
5710 raise errors.OpPrereqError("Please pass at least one modification",
5712 if all_mods.count(True) > 1:
5713 raise errors.OpPrereqError("Can't set the node into more than one"
5714 " state at the same time",
5717 # Boolean value that tells us whether we might be demoting from MC
5718 self.might_demote = (self.op.master_candidate == False or
5719 self.op.offline == True or
5720 self.op.drained == True or
5721 self.op.master_capable == False)
5723 if self.op.secondary_ip:
5724 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5725 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5726 " address" % self.op.secondary_ip,
5729 self.lock_all = self.op.auto_promote and self.might_demote
5730 self.lock_instances = self.op.secondary_ip is not None
5732 def _InstanceFilter(self, instance):
5733 """Filter for getting affected instances.
5736 return (instance.disk_template in constants.DTS_INT_MIRROR and
5737 self.op.node_name in instance.all_nodes)
5739 def ExpandNames(self):
5741 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5743 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5745 # Since modifying a node can have severe effects on currently running
5746 # operations the resource lock is at least acquired in shared mode
5747 self.needed_locks[locking.LEVEL_NODE_RES] = \
5748 self.needed_locks[locking.LEVEL_NODE]
5750 # Get node resource and instance locks in shared mode; they are not used
5751 # for anything but read-only access
5752 self.share_locks[locking.LEVEL_NODE_RES] = 1
5753 self.share_locks[locking.LEVEL_INSTANCE] = 1
5755 if self.lock_instances:
5756 self.needed_locks[locking.LEVEL_INSTANCE] = \
5757 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5759 def BuildHooksEnv(self):
5762 This runs on the master node.
5766 "OP_TARGET": self.op.node_name,
5767 "MASTER_CANDIDATE": str(self.op.master_candidate),
5768 "OFFLINE": str(self.op.offline),
5769 "DRAINED": str(self.op.drained),
5770 "MASTER_CAPABLE": str(self.op.master_capable),
5771 "VM_CAPABLE": str(self.op.vm_capable),
5774 def BuildHooksNodes(self):
5775 """Build hooks nodes.
5778 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5781 def CheckPrereq(self):
5782 """Check prerequisites.
5784 This only checks the instance list against the existing names.
5787 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5789 if self.lock_instances:
5790 affected_instances = \
5791 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5793 # Verify instance locks
5794 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5795 wanted_instances = frozenset(affected_instances.keys())
5796 if wanted_instances - owned_instances:
5797 raise errors.OpPrereqError("Instances affected by changing node %s's"
5798 " secondary IP address have changed since"
5799 " locks were acquired, wanted '%s', have"
5800 " '%s'; retry the operation" %
5802 utils.CommaJoin(wanted_instances),
5803 utils.CommaJoin(owned_instances)),
5806 affected_instances = None
5808 if (self.op.master_candidate is not None or
5809 self.op.drained is not None or
5810 self.op.offline is not None):
5811 # we can't change the master's node flags
5812 if self.op.node_name == self.cfg.GetMasterNode():
5813 raise errors.OpPrereqError("The master role can be changed"
5814 " only via master-failover",
5817 if self.op.master_candidate and not node.master_capable:
5818 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5819 " it a master candidate" % node.name,
5822 if self.op.vm_capable == False:
5823 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5825 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5826 " the vm_capable flag" % node.name,
5829 if node.master_candidate and self.might_demote and not self.lock_all:
5830 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5831 # check if after removing the current node, we're missing master
5833 (mc_remaining, mc_should, _) = \
5834 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5835 if mc_remaining < mc_should:
5836 raise errors.OpPrereqError("Not enough master candidates, please"
5837 " pass auto promote option to allow"
5838 " promotion", errors.ECODE_STATE)
5840 self.old_flags = old_flags = (node.master_candidate,
5841 node.drained, node.offline)
5842 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5843 self.old_role = old_role = self._F2R[old_flags]
5845 # Check for ineffective changes
5846 for attr in self._FLAGS:
5847 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5848 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5849 setattr(self.op, attr, None)
5851 # Past this point, any flag change to False means a transition
5852 # away from the respective state, as only real changes are kept
5854 # TODO: We might query the real power state if it supports OOB
5855 if _SupportsOob(self.cfg, node):
5856 if self.op.offline is False and not (node.powered or
5857 self.op.powered == True):
5858 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5859 " offline status can be reset") %
5861 elif self.op.powered is not None:
5862 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5863 " as it does not support out-of-band"
5864 " handling") % self.op.node_name)
5866 # If we're being deofflined/drained, we'll MC ourself if needed
5867 if (self.op.drained == False or self.op.offline == False or
5868 (self.op.master_capable and not node.master_capable)):
5869 if _DecideSelfPromotion(self):
5870 self.op.master_candidate = True
5871 self.LogInfo("Auto-promoting node to master candidate")
5873 # If we're no longer master capable, we'll demote ourselves from MC
5874 if self.op.master_capable == False and node.master_candidate:
5875 self.LogInfo("Demoting from master candidate")
5876 self.op.master_candidate = False
5879 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5880 if self.op.master_candidate:
5881 new_role = self._ROLE_CANDIDATE
5882 elif self.op.drained:
5883 new_role = self._ROLE_DRAINED
5884 elif self.op.offline:
5885 new_role = self._ROLE_OFFLINE
5886 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5887 # False is still in new flags, which means we're un-setting (the
5889 new_role = self._ROLE_REGULAR
5890 else: # no new flags, nothing, keep old role
5893 self.new_role = new_role
5895 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5896 # Trying to transition out of offline status
5897 # TODO: Use standard RPC runner, but make sure it works when the node is
5898 # still marked offline
5899 result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5901 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5902 " to report its version: %s" %
5903 (node.name, result.fail_msg),
5906 self.LogWarning("Transitioning node from offline to online state"
5907 " without using re-add. Please make sure the node"
5910 if self.op.secondary_ip:
5911 # Ok even without locking, because this can't be changed by any LU
5912 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5913 master_singlehomed = master.secondary_ip == master.primary_ip
5914 if master_singlehomed and self.op.secondary_ip:
5915 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5916 " homed cluster", errors.ECODE_INVAL)
5918 assert not (frozenset(affected_instances) -
5919 self.owned_locks(locking.LEVEL_INSTANCE))
5922 if affected_instances:
5923 raise errors.OpPrereqError("Cannot change secondary IP address:"
5924 " offline node has instances (%s)"
5925 " configured to use it" %
5926 utils.CommaJoin(affected_instances.keys()))
5928 # On online nodes, check that no instances are running, and that
5929 # the node has the new ip and we can reach it.
5930 for instance in affected_instances.values():
5931 _CheckInstanceState(self, instance, INSTANCE_DOWN,
5932 msg="cannot change secondary ip")
5934 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5935 if master.name != node.name:
5936 # check reachability from master secondary ip to new secondary ip
5937 if not netutils.TcpPing(self.op.secondary_ip,
5938 constants.DEFAULT_NODED_PORT,
5939 source=master.secondary_ip):
5940 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5941 " based ping to node daemon port",
5942 errors.ECODE_ENVIRON)
5944 if self.op.ndparams:
5945 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5946 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5947 self.new_ndparams = new_ndparams
5949 if self.op.hv_state:
5950 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5951 self.node.hv_state_static)
5953 if self.op.disk_state:
5954 self.new_disk_state = \
5955 _MergeAndVerifyDiskState(self.op.disk_state,
5956 self.node.disk_state_static)
5958 def Exec(self, feedback_fn):
5963 old_role = self.old_role
5964 new_role = self.new_role
5968 if self.op.ndparams:
5969 node.ndparams = self.new_ndparams
5971 if self.op.powered is not None:
5972 node.powered = self.op.powered
5974 if self.op.hv_state:
5975 node.hv_state_static = self.new_hv_state
5977 if self.op.disk_state:
5978 node.disk_state_static = self.new_disk_state
5980 for attr in ["master_capable", "vm_capable"]:
5981 val = getattr(self.op, attr)
5983 setattr(node, attr, val)
5984 result.append((attr, str(val)))
5986 if new_role != old_role:
5987 # Tell the node to demote itself, if no longer MC and not offline
5988 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5989 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5991 self.LogWarning("Node failed to demote itself: %s", msg)
5993 new_flags = self._R2F[new_role]
5994 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5996 result.append((desc, str(nf)))
5997 (node.master_candidate, node.drained, node.offline) = new_flags
5999 # we locked all nodes, we adjust the CP before updating this node
6001 _AdjustCandidatePool(self, [node.name])
6003 if self.op.secondary_ip:
6004 node.secondary_ip = self.op.secondary_ip
6005 result.append(("secondary_ip", self.op.secondary_ip))
6007 # this will trigger configuration file update, if needed
6008 self.cfg.Update(node, feedback_fn)
6010 # this will trigger job queue propagation or cleanup if the mc
6012 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6013 self.context.ReaddNode(node)
6018 class LUNodePowercycle(NoHooksLU):
6019 """Powercycles a node.
6024 def CheckArguments(self):
6025 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6026 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6027 raise errors.OpPrereqError("The node is the master and the force"
6028 " parameter was not set",
6031 def ExpandNames(self):
6032 """Locking for PowercycleNode.
6034 This is a last-resort option and shouldn't block on other
6035 jobs. Therefore, we grab no locks.
6038 self.needed_locks = {}
6040 def Exec(self, feedback_fn):
6044 result = self.rpc.call_node_powercycle(self.op.node_name,
6045 self.cfg.GetHypervisorType())
6046 result.Raise("Failed to schedule the reboot")
6047 return result.payload
6050 class LUClusterQuery(NoHooksLU):
6051 """Query cluster configuration.
6056 def ExpandNames(self):
6057 self.needed_locks = {}
6059 def Exec(self, feedback_fn):
6060 """Return cluster config.
6063 cluster = self.cfg.GetClusterInfo()
6066 # Filter just for enabled hypervisors
6067 for os_name, hv_dict in cluster.os_hvp.items():
6068 os_hvp[os_name] = {}
6069 for hv_name, hv_params in hv_dict.items():
6070 if hv_name in cluster.enabled_hypervisors:
6071 os_hvp[os_name][hv_name] = hv_params
6073 # Convert ip_family to ip_version
6074 primary_ip_version = constants.IP4_VERSION
6075 if cluster.primary_ip_family == netutils.IP6Address.family:
6076 primary_ip_version = constants.IP6_VERSION
6079 "software_version": constants.RELEASE_VERSION,
6080 "protocol_version": constants.PROTOCOL_VERSION,
6081 "config_version": constants.CONFIG_VERSION,
6082 "os_api_version": max(constants.OS_API_VERSIONS),
6083 "export_version": constants.EXPORT_VERSION,
6084 "architecture": (platform.architecture()[0], platform.machine()),
6085 "name": cluster.cluster_name,
6086 "master": cluster.master_node,
6087 "default_hypervisor": cluster.primary_hypervisor,
6088 "enabled_hypervisors": cluster.enabled_hypervisors,
6089 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6090 for hypervisor_name in cluster.enabled_hypervisors]),
6092 "beparams": cluster.beparams,
6093 "osparams": cluster.osparams,
6094 "ipolicy": cluster.ipolicy,
6095 "nicparams": cluster.nicparams,
6096 "ndparams": cluster.ndparams,
6097 "candidate_pool_size": cluster.candidate_pool_size,
6098 "master_netdev": cluster.master_netdev,
6099 "master_netmask": cluster.master_netmask,
6100 "use_external_mip_script": cluster.use_external_mip_script,
6101 "volume_group_name": cluster.volume_group_name,
6102 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6103 "file_storage_dir": cluster.file_storage_dir,
6104 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6105 "maintain_node_health": cluster.maintain_node_health,
6106 "ctime": cluster.ctime,
6107 "mtime": cluster.mtime,
6108 "uuid": cluster.uuid,
6109 "tags": list(cluster.GetTags()),
6110 "uid_pool": cluster.uid_pool,
6111 "default_iallocator": cluster.default_iallocator,
6112 "reserved_lvs": cluster.reserved_lvs,
6113 "primary_ip_version": primary_ip_version,
6114 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6115 "hidden_os": cluster.hidden_os,
6116 "blacklisted_os": cluster.blacklisted_os,
6122 class LUClusterConfigQuery(NoHooksLU):
6123 """Return configuration values.
6127 _FIELDS_DYNAMIC = utils.FieldSet()
6128 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
6129 "watcher_pause", "volume_group_name")
6131 def CheckArguments(self):
6132 _CheckOutputFields(static=self._FIELDS_STATIC,
6133 dynamic=self._FIELDS_DYNAMIC,
6134 selected=self.op.output_fields)
6136 def ExpandNames(self):
6137 self.needed_locks = {}
6139 def Exec(self, feedback_fn):
6140 """Dump a representation of the cluster config to the standard output.
6144 for field in self.op.output_fields:
6145 if field == "cluster_name":
6146 entry = self.cfg.GetClusterName()
6147 elif field == "master_node":
6148 entry = self.cfg.GetMasterNode()
6149 elif field == "drain_flag":
6150 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6151 elif field == "watcher_pause":
6152 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6153 elif field == "volume_group_name":
6154 entry = self.cfg.GetVGName()
6156 raise errors.ParameterError(field)
6157 values.append(entry)
6161 class LUInstanceActivateDisks(NoHooksLU):
6162 """Bring up an instance's disks.
6167 def ExpandNames(self):
6168 self._ExpandAndLockInstance()
6169 self.needed_locks[locking.LEVEL_NODE] = []
6170 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6172 def DeclareLocks(self, level):
6173 if level == locking.LEVEL_NODE:
6174 self._LockInstancesNodes()
6176 def CheckPrereq(self):
6177 """Check prerequisites.
6179 This checks that the instance is in the cluster.
6182 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6183 assert self.instance is not None, \
6184 "Cannot retrieve locked instance %s" % self.op.instance_name
6185 _CheckNodeOnline(self, self.instance.primary_node)
6187 def Exec(self, feedback_fn):
6188 """Activate the disks.
6191 disks_ok, disks_info = \
6192 _AssembleInstanceDisks(self, self.instance,
6193 ignore_size=self.op.ignore_size)
6195 raise errors.OpExecError("Cannot activate block devices")
6200 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6202 """Prepare the block devices for an instance.
6204 This sets up the block devices on all nodes.
6206 @type lu: L{LogicalUnit}
6207 @param lu: the logical unit on whose behalf we execute
6208 @type instance: L{objects.Instance}
6209 @param instance: the instance for whose disks we assemble
6210 @type disks: list of L{objects.Disk} or None
6211 @param disks: which disks to assemble (or all, if None)
6212 @type ignore_secondaries: boolean
6213 @param ignore_secondaries: if true, errors on secondary nodes
6214 won't result in an error return from the function
6215 @type ignore_size: boolean
6216 @param ignore_size: if true, the current known size of the disk
6217 will not be used during the disk activation, useful for cases
6218 when the size is wrong
6219 @return: False if the operation failed, otherwise a list of
6220 (host, instance_visible_name, node_visible_name)
6221 with the mapping from node devices to instance devices
6226 iname = instance.name
6227 disks = _ExpandCheckDisks(instance, disks)
6229 # With the two passes mechanism we try to reduce the window of
6230 # opportunity for the race condition of switching DRBD to primary
6231 # before handshaking occured, but we do not eliminate it
6233 # The proper fix would be to wait (with some limits) until the
6234 # connection has been made and drbd transitions from WFConnection
6235 # into any other network-connected state (Connected, SyncTarget,
6238 # 1st pass, assemble on all nodes in secondary mode
6239 for idx, inst_disk in enumerate(disks):
6240 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6242 node_disk = node_disk.Copy()
6243 node_disk.UnsetSize()
6244 lu.cfg.SetDiskID(node_disk, node)
6245 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
6246 msg = result.fail_msg
6248 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6249 " (is_primary=False, pass=1): %s",
6250 inst_disk.iv_name, node, msg)
6251 if not ignore_secondaries:
6254 # FIXME: race condition on drbd migration to primary
6256 # 2nd pass, do only the primary node
6257 for idx, inst_disk in enumerate(disks):
6260 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6261 if node != instance.primary_node:
6264 node_disk = node_disk.Copy()
6265 node_disk.UnsetSize()
6266 lu.cfg.SetDiskID(node_disk, node)
6267 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6268 msg = result.fail_msg
6270 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6271 " (is_primary=True, pass=2): %s",
6272 inst_disk.iv_name, node, msg)
6275 dev_path = result.payload
6277 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6279 # leave the disks configured for the primary node
6280 # this is a workaround that would be fixed better by
6281 # improving the logical/physical id handling
6283 lu.cfg.SetDiskID(disk, instance.primary_node)
6285 return disks_ok, device_info
6288 def _StartInstanceDisks(lu, instance, force):
6289 """Start the disks of an instance.
6292 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6293 ignore_secondaries=force)
6295 _ShutdownInstanceDisks(lu, instance)
6296 if force is not None and not force:
6297 lu.proc.LogWarning("", hint="If the message above refers to a"
6299 " you can retry the operation using '--force'.")
6300 raise errors.OpExecError("Disk consistency error")
6303 class LUInstanceDeactivateDisks(NoHooksLU):
6304 """Shutdown an instance's disks.
6309 def ExpandNames(self):
6310 self._ExpandAndLockInstance()
6311 self.needed_locks[locking.LEVEL_NODE] = []
6312 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6314 def DeclareLocks(self, level):
6315 if level == locking.LEVEL_NODE:
6316 self._LockInstancesNodes()
6318 def CheckPrereq(self):
6319 """Check prerequisites.
6321 This checks that the instance is in the cluster.
6324 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6325 assert self.instance is not None, \
6326 "Cannot retrieve locked instance %s" % self.op.instance_name
6328 def Exec(self, feedback_fn):
6329 """Deactivate the disks
6332 instance = self.instance
6334 _ShutdownInstanceDisks(self, instance)
6336 _SafeShutdownInstanceDisks(self, instance)
6339 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6340 """Shutdown block devices of an instance.
6342 This function checks if an instance is running, before calling
6343 _ShutdownInstanceDisks.
6346 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6347 _ShutdownInstanceDisks(lu, instance, disks=disks)
6350 def _ExpandCheckDisks(instance, disks):
6351 """Return the instance disks selected by the disks list
6353 @type disks: list of L{objects.Disk} or None
6354 @param disks: selected disks
6355 @rtype: list of L{objects.Disk}
6356 @return: selected instance disks to act on
6360 return instance.disks
6362 if not set(disks).issubset(instance.disks):
6363 raise errors.ProgrammerError("Can only act on disks belonging to the"
6368 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6369 """Shutdown block devices of an instance.
6371 This does the shutdown on all nodes of the instance.
6373 If the ignore_primary is false, errors on the primary node are
6378 disks = _ExpandCheckDisks(instance, disks)
6381 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6382 lu.cfg.SetDiskID(top_disk, node)
6383 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6384 msg = result.fail_msg
6386 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6387 disk.iv_name, node, msg)
6388 if ((node == instance.primary_node and not ignore_primary) or
6389 (node != instance.primary_node and not result.offline)):
6394 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6395 """Checks if a node has enough free memory.
6397 This function check if a given node has the needed amount of free
6398 memory. In case the node has less memory or we cannot get the
6399 information from the node, this function raise an OpPrereqError
6402 @type lu: C{LogicalUnit}
6403 @param lu: a logical unit from which we get configuration data
6405 @param node: the node to check
6406 @type reason: C{str}
6407 @param reason: string to use in the error message
6408 @type requested: C{int}
6409 @param requested: the amount of memory in MiB to check for
6410 @type hypervisor_name: C{str}
6411 @param hypervisor_name: the hypervisor to ask for memory stats
6413 @return: node current free memory
6414 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6415 we cannot check the node
6418 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6419 nodeinfo[node].Raise("Can't get data from node %s" % node,
6420 prereq=True, ecode=errors.ECODE_ENVIRON)
6421 (_, _, (hv_info, )) = nodeinfo[node].payload
6423 free_mem = hv_info.get("memory_free", None)
6424 if not isinstance(free_mem, int):
6425 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6426 " was '%s'" % (node, free_mem),
6427 errors.ECODE_ENVIRON)
6428 if requested > free_mem:
6429 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6430 " needed %s MiB, available %s MiB" %
6431 (node, reason, requested, free_mem),
6436 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6437 """Checks if nodes have enough free disk space in the all VGs.
6439 This function check if all given nodes have the needed amount of
6440 free disk. In case any node has less disk or we cannot get the
6441 information from the node, this function raise an OpPrereqError
6444 @type lu: C{LogicalUnit}
6445 @param lu: a logical unit from which we get configuration data
6446 @type nodenames: C{list}
6447 @param nodenames: the list of node names to check
6448 @type req_sizes: C{dict}
6449 @param req_sizes: the hash of vg and corresponding amount of disk in
6451 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6452 or we cannot check the node
6455 for vg, req_size in req_sizes.items():
6456 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6459 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6460 """Checks if nodes have enough free disk space in the specified VG.
6462 This function check if all given nodes have the needed amount of
6463 free disk. In case any node has less disk or we cannot get the
6464 information from the node, this function raise an OpPrereqError
6467 @type lu: C{LogicalUnit}
6468 @param lu: a logical unit from which we get configuration data
6469 @type nodenames: C{list}
6470 @param nodenames: the list of node names to check
6472 @param vg: the volume group to check
6473 @type requested: C{int}
6474 @param requested: the amount of disk in MiB to check for
6475 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6476 or we cannot check the node
6479 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6480 for node in nodenames:
6481 info = nodeinfo[node]
6482 info.Raise("Cannot get current information from node %s" % node,
6483 prereq=True, ecode=errors.ECODE_ENVIRON)
6484 (_, (vg_info, ), _) = info.payload
6485 vg_free = vg_info.get("vg_free", None)
6486 if not isinstance(vg_free, int):
6487 raise errors.OpPrereqError("Can't compute free disk space on node"
6488 " %s for vg %s, result was '%s'" %
6489 (node, vg, vg_free), errors.ECODE_ENVIRON)
6490 if requested > vg_free:
6491 raise errors.OpPrereqError("Not enough disk space on target node %s"
6492 " vg %s: required %d MiB, available %d MiB" %
6493 (node, vg, requested, vg_free),
6497 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6498 """Checks if nodes have enough physical CPUs
6500 This function checks if all given nodes have the needed number of
6501 physical CPUs. In case any node has less CPUs or we cannot get the
6502 information from the node, this function raises an OpPrereqError
6505 @type lu: C{LogicalUnit}
6506 @param lu: a logical unit from which we get configuration data
6507 @type nodenames: C{list}
6508 @param nodenames: the list of node names to check
6509 @type requested: C{int}
6510 @param requested: the minimum acceptable number of physical CPUs
6511 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6512 or we cannot check the node
6515 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6516 for node in nodenames:
6517 info = nodeinfo[node]
6518 info.Raise("Cannot get current information from node %s" % node,
6519 prereq=True, ecode=errors.ECODE_ENVIRON)
6520 (_, _, (hv_info, )) = info.payload
6521 num_cpus = hv_info.get("cpu_total", None)
6522 if not isinstance(num_cpus, int):
6523 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6524 " on node %s, result was '%s'" %
6525 (node, num_cpus), errors.ECODE_ENVIRON)
6526 if requested > num_cpus:
6527 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6528 "required" % (node, num_cpus, requested),
6532 class LUInstanceStartup(LogicalUnit):
6533 """Starts an instance.
6536 HPATH = "instance-start"
6537 HTYPE = constants.HTYPE_INSTANCE
6540 def CheckArguments(self):
6542 if self.op.beparams:
6543 # fill the beparams dict
6544 objects.UpgradeBeParams(self.op.beparams)
6545 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6547 def ExpandNames(self):
6548 self._ExpandAndLockInstance()
6549 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6551 def DeclareLocks(self, level):
6552 if level == locking.LEVEL_NODE_RES:
6553 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6555 def BuildHooksEnv(self):
6558 This runs on master, primary and secondary nodes of the instance.
6562 "FORCE": self.op.force,
6565 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6569 def BuildHooksNodes(self):
6570 """Build hooks nodes.
6573 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6576 def CheckPrereq(self):
6577 """Check prerequisites.
6579 This checks that the instance is in the cluster.
6582 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6583 assert self.instance is not None, \
6584 "Cannot retrieve locked instance %s" % self.op.instance_name
6587 if self.op.hvparams:
6588 # check hypervisor parameter syntax (locally)
6589 cluster = self.cfg.GetClusterInfo()
6590 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6591 filled_hvp = cluster.FillHV(instance)
6592 filled_hvp.update(self.op.hvparams)
6593 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6594 hv_type.CheckParameterSyntax(filled_hvp)
6595 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6597 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6599 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6601 if self.primary_offline and self.op.ignore_offline_nodes:
6602 self.proc.LogWarning("Ignoring offline primary node")
6604 if self.op.hvparams or self.op.beparams:
6605 self.proc.LogWarning("Overridden parameters are ignored")
6607 _CheckNodeOnline(self, instance.primary_node)
6609 bep = self.cfg.GetClusterInfo().FillBE(instance)
6610 bep.update(self.op.beparams)
6612 # check bridges existence
6613 _CheckInstanceBridgesExist(self, instance)
6615 remote_info = self.rpc.call_instance_info(instance.primary_node,
6617 instance.hypervisor)
6618 remote_info.Raise("Error checking node %s" % instance.primary_node,
6619 prereq=True, ecode=errors.ECODE_ENVIRON)
6620 if not remote_info.payload: # not running already
6621 _CheckNodeFreeMemory(self, instance.primary_node,
6622 "starting instance %s" % instance.name,
6623 bep[constants.BE_MINMEM], instance.hypervisor)
6625 def Exec(self, feedback_fn):
6626 """Start the instance.
6629 instance = self.instance
6630 force = self.op.force
6632 if not self.op.no_remember:
6633 self.cfg.MarkInstanceUp(instance.name)
6635 if self.primary_offline:
6636 assert self.op.ignore_offline_nodes
6637 self.proc.LogInfo("Primary node offline, marked instance as started")
6639 node_current = instance.primary_node
6641 _StartInstanceDisks(self, instance, force)
6644 self.rpc.call_instance_start(node_current,
6645 (instance, self.op.hvparams,
6647 self.op.startup_paused)
6648 msg = result.fail_msg
6650 _ShutdownInstanceDisks(self, instance)
6651 raise errors.OpExecError("Could not start instance: %s" % msg)
6654 class LUInstanceReboot(LogicalUnit):
6655 """Reboot an instance.
6658 HPATH = "instance-reboot"
6659 HTYPE = constants.HTYPE_INSTANCE
6662 def ExpandNames(self):
6663 self._ExpandAndLockInstance()
6665 def BuildHooksEnv(self):
6668 This runs on master, primary and secondary nodes of the instance.
6672 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6673 "REBOOT_TYPE": self.op.reboot_type,
6674 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6677 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6681 def BuildHooksNodes(self):
6682 """Build hooks nodes.
6685 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6688 def CheckPrereq(self):
6689 """Check prerequisites.
6691 This checks that the instance is in the cluster.
6694 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6695 assert self.instance is not None, \
6696 "Cannot retrieve locked instance %s" % self.op.instance_name
6697 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6698 _CheckNodeOnline(self, instance.primary_node)
6700 # check bridges existence
6701 _CheckInstanceBridgesExist(self, instance)
6703 def Exec(self, feedback_fn):
6704 """Reboot the instance.
6707 instance = self.instance
6708 ignore_secondaries = self.op.ignore_secondaries
6709 reboot_type = self.op.reboot_type
6711 remote_info = self.rpc.call_instance_info(instance.primary_node,
6713 instance.hypervisor)
6714 remote_info.Raise("Error checking node %s" % instance.primary_node)
6715 instance_running = bool(remote_info.payload)
6717 node_current = instance.primary_node
6719 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6720 constants.INSTANCE_REBOOT_HARD]:
6721 for disk in instance.disks:
6722 self.cfg.SetDiskID(disk, node_current)
6723 result = self.rpc.call_instance_reboot(node_current, instance,
6725 self.op.shutdown_timeout)
6726 result.Raise("Could not reboot instance")
6728 if instance_running:
6729 result = self.rpc.call_instance_shutdown(node_current, instance,
6730 self.op.shutdown_timeout)
6731 result.Raise("Could not shutdown instance for full reboot")
6732 _ShutdownInstanceDisks(self, instance)
6734 self.LogInfo("Instance %s was already stopped, starting now",
6736 _StartInstanceDisks(self, instance, ignore_secondaries)
6737 result = self.rpc.call_instance_start(node_current,
6738 (instance, None, None), False)
6739 msg = result.fail_msg
6741 _ShutdownInstanceDisks(self, instance)
6742 raise errors.OpExecError("Could not start instance for"
6743 " full reboot: %s" % msg)
6745 self.cfg.MarkInstanceUp(instance.name)
6748 class LUInstanceShutdown(LogicalUnit):
6749 """Shutdown an instance.
6752 HPATH = "instance-stop"
6753 HTYPE = constants.HTYPE_INSTANCE
6756 def ExpandNames(self):
6757 self._ExpandAndLockInstance()
6759 def BuildHooksEnv(self):
6762 This runs on master, primary and secondary nodes of the instance.
6765 env = _BuildInstanceHookEnvByObject(self, self.instance)
6766 env["TIMEOUT"] = self.op.timeout
6769 def BuildHooksNodes(self):
6770 """Build hooks nodes.
6773 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6776 def CheckPrereq(self):
6777 """Check prerequisites.
6779 This checks that the instance is in the cluster.
6782 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6783 assert self.instance is not None, \
6784 "Cannot retrieve locked instance %s" % self.op.instance_name
6786 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6788 self.primary_offline = \
6789 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6791 if self.primary_offline and self.op.ignore_offline_nodes:
6792 self.proc.LogWarning("Ignoring offline primary node")
6794 _CheckNodeOnline(self, self.instance.primary_node)
6796 def Exec(self, feedback_fn):
6797 """Shutdown the instance.
6800 instance = self.instance
6801 node_current = instance.primary_node
6802 timeout = self.op.timeout
6804 if not self.op.no_remember:
6805 self.cfg.MarkInstanceDown(instance.name)
6807 if self.primary_offline:
6808 assert self.op.ignore_offline_nodes
6809 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6811 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6812 msg = result.fail_msg
6814 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6816 _ShutdownInstanceDisks(self, instance)
6819 class LUInstanceReinstall(LogicalUnit):
6820 """Reinstall an instance.
6823 HPATH = "instance-reinstall"
6824 HTYPE = constants.HTYPE_INSTANCE
6827 def ExpandNames(self):
6828 self._ExpandAndLockInstance()
6830 def BuildHooksEnv(self):
6833 This runs on master, primary and secondary nodes of the instance.
6836 return _BuildInstanceHookEnvByObject(self, self.instance)
6838 def BuildHooksNodes(self):
6839 """Build hooks nodes.
6842 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6845 def CheckPrereq(self):
6846 """Check prerequisites.
6848 This checks that the instance is in the cluster and is not running.
6851 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6852 assert instance is not None, \
6853 "Cannot retrieve locked instance %s" % self.op.instance_name
6854 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6855 " offline, cannot reinstall")
6856 for node in instance.secondary_nodes:
6857 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6858 " cannot reinstall")
6860 if instance.disk_template == constants.DT_DISKLESS:
6861 raise errors.OpPrereqError("Instance '%s' has no disks" %
6862 self.op.instance_name,
6864 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6866 if self.op.os_type is not None:
6868 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6869 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6870 instance_os = self.op.os_type
6872 instance_os = instance.os
6874 nodelist = list(instance.all_nodes)
6876 if self.op.osparams:
6877 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6878 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6879 self.os_inst = i_osdict # the new dict (without defaults)
6883 self.instance = instance
6885 def Exec(self, feedback_fn):
6886 """Reinstall the instance.
6889 inst = self.instance
6891 if self.op.os_type is not None:
6892 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6893 inst.os = self.op.os_type
6894 # Write to configuration
6895 self.cfg.Update(inst, feedback_fn)
6897 _StartInstanceDisks(self, inst, None)
6899 feedback_fn("Running the instance OS create scripts...")
6900 # FIXME: pass debug option from opcode to backend
6901 result = self.rpc.call_instance_os_add(inst.primary_node,
6902 (inst, self.os_inst), True,
6903 self.op.debug_level)
6904 result.Raise("Could not install OS for instance %s on node %s" %
6905 (inst.name, inst.primary_node))
6907 _ShutdownInstanceDisks(self, inst)
6910 class LUInstanceRecreateDisks(LogicalUnit):
6911 """Recreate an instance's missing disks.
6914 HPATH = "instance-recreate-disks"
6915 HTYPE = constants.HTYPE_INSTANCE
6918 _MODIFYABLE = frozenset([
6919 constants.IDISK_SIZE,
6920 constants.IDISK_MODE,
6923 # New or changed disk parameters may have different semantics
6924 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
6925 constants.IDISK_ADOPT,
6927 # TODO: Implement support changing VG while recreating
6929 constants.IDISK_METAVG,
6932 def CheckArguments(self):
6933 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
6934 # Normalize and convert deprecated list of disk indices
6935 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
6937 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
6939 raise errors.OpPrereqError("Some disks have been specified more than"
6940 " once: %s" % utils.CommaJoin(duplicates),
6943 for (idx, params) in self.op.disks:
6944 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
6945 unsupported = frozenset(params.keys()) - self._MODIFYABLE
6947 raise errors.OpPrereqError("Parameters for disk %s try to change"
6948 " unmodifyable parameter(s): %s" %
6949 (idx, utils.CommaJoin(unsupported)),
6952 def ExpandNames(self):
6953 self._ExpandAndLockInstance()
6954 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6956 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6957 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6959 self.needed_locks[locking.LEVEL_NODE] = []
6960 self.needed_locks[locking.LEVEL_NODE_RES] = []
6962 def DeclareLocks(self, level):
6963 if level == locking.LEVEL_NODE:
6964 # if we replace the nodes, we only need to lock the old primary,
6965 # otherwise we need to lock all nodes for disk re-creation
6966 primary_only = bool(self.op.nodes)
6967 self._LockInstancesNodes(primary_only=primary_only)
6968 elif level == locking.LEVEL_NODE_RES:
6970 self.needed_locks[locking.LEVEL_NODE_RES] = \
6971 self.needed_locks[locking.LEVEL_NODE][:]
6973 def BuildHooksEnv(self):
6976 This runs on master, primary and secondary nodes of the instance.
6979 return _BuildInstanceHookEnvByObject(self, self.instance)
6981 def BuildHooksNodes(self):
6982 """Build hooks nodes.
6985 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6988 def CheckPrereq(self):
6989 """Check prerequisites.
6991 This checks that the instance is in the cluster and is not running.
6994 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6995 assert instance is not None, \
6996 "Cannot retrieve locked instance %s" % self.op.instance_name
6998 if len(self.op.nodes) != len(instance.all_nodes):
6999 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7000 " %d replacement nodes were specified" %
7001 (instance.name, len(instance.all_nodes),
7002 len(self.op.nodes)),
7004 assert instance.disk_template != constants.DT_DRBD8 or \
7005 len(self.op.nodes) == 2
7006 assert instance.disk_template != constants.DT_PLAIN or \
7007 len(self.op.nodes) == 1
7008 primary_node = self.op.nodes[0]
7010 primary_node = instance.primary_node
7011 _CheckNodeOnline(self, primary_node)
7013 if instance.disk_template == constants.DT_DISKLESS:
7014 raise errors.OpPrereqError("Instance '%s' has no disks" %
7015 self.op.instance_name, errors.ECODE_INVAL)
7017 # if we replace nodes *and* the old primary is offline, we don't
7019 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7020 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7021 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7022 if not (self.op.nodes and old_pnode.offline):
7023 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7024 msg="cannot recreate disks")
7027 self.disks = dict(self.op.disks)
7029 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7031 maxidx = max(self.disks.keys())
7032 if maxidx >= len(instance.disks):
7033 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7036 if (self.op.nodes and
7037 sorted(self.disks.keys()) != range(len(instance.disks))):
7038 raise errors.OpPrereqError("Can't recreate disks partially and"
7039 " change the nodes at the same time",
7042 self.instance = instance
7044 def Exec(self, feedback_fn):
7045 """Recreate the disks.
7048 instance = self.instance
7050 assert (self.owned_locks(locking.LEVEL_NODE) ==
7051 self.owned_locks(locking.LEVEL_NODE_RES))
7054 mods = [] # keeps track of needed changes
7056 for idx, disk in enumerate(instance.disks):
7058 changes = self.disks[idx]
7060 # Disk should not be recreated
7064 # update secondaries for disks, if needed
7065 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7066 # need to update the nodes and minors
7067 assert len(self.op.nodes) == 2
7068 assert len(disk.logical_id) == 6 # otherwise disk internals
7070 (_, _, old_port, _, _, old_secret) = disk.logical_id
7071 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7072 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7073 new_minors[0], new_minors[1], old_secret)
7074 assert len(disk.logical_id) == len(new_id)
7078 mods.append((idx, new_id, changes))
7080 # now that we have passed all asserts above, we can apply the mods
7081 # in a single run (to avoid partial changes)
7082 for idx, new_id, changes in mods:
7083 disk = instance.disks[idx]
7084 if new_id is not None:
7085 assert disk.dev_type == constants.LD_DRBD8
7086 disk.logical_id = new_id
7088 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7089 mode=changes.get(constants.IDISK_MODE, None))
7091 # change primary node, if needed
7093 instance.primary_node = self.op.nodes[0]
7094 self.LogWarning("Changing the instance's nodes, you will have to"
7095 " remove any disks left on the older nodes manually")
7098 self.cfg.Update(instance, feedback_fn)
7100 _CreateDisks(self, instance, to_skip=to_skip)
7103 class LUInstanceRename(LogicalUnit):
7104 """Rename an instance.
7107 HPATH = "instance-rename"
7108 HTYPE = constants.HTYPE_INSTANCE
7110 def CheckArguments(self):
7114 if self.op.ip_check and not self.op.name_check:
7115 # TODO: make the ip check more flexible and not depend on the name check
7116 raise errors.OpPrereqError("IP address check requires a name check",
7119 def BuildHooksEnv(self):
7122 This runs on master, primary and secondary nodes of the instance.
7125 env = _BuildInstanceHookEnvByObject(self, self.instance)
7126 env["INSTANCE_NEW_NAME"] = self.op.new_name
7129 def BuildHooksNodes(self):
7130 """Build hooks nodes.
7133 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7136 def CheckPrereq(self):
7137 """Check prerequisites.
7139 This checks that the instance is in the cluster and is not running.
7142 self.op.instance_name = _ExpandInstanceName(self.cfg,
7143 self.op.instance_name)
7144 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7145 assert instance is not None
7146 _CheckNodeOnline(self, instance.primary_node)
7147 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7148 msg="cannot rename")
7149 self.instance = instance
7151 new_name = self.op.new_name
7152 if self.op.name_check:
7153 hostname = netutils.GetHostname(name=new_name)
7154 if hostname.name != new_name:
7155 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7157 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7158 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7159 " same as given hostname '%s'") %
7160 (hostname.name, self.op.new_name),
7162 new_name = self.op.new_name = hostname.name
7163 if (self.op.ip_check and
7164 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7165 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7166 (hostname.ip, new_name),
7167 errors.ECODE_NOTUNIQUE)
7169 instance_list = self.cfg.GetInstanceList()
7170 if new_name in instance_list and new_name != instance.name:
7171 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7172 new_name, errors.ECODE_EXISTS)
7174 def Exec(self, feedback_fn):
7175 """Rename the instance.
7178 inst = self.instance
7179 old_name = inst.name
7181 rename_file_storage = False
7182 if (inst.disk_template in constants.DTS_FILEBASED and
7183 self.op.new_name != inst.name):
7184 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7185 rename_file_storage = True
7187 self.cfg.RenameInstance(inst.name, self.op.new_name)
7188 # Change the instance lock. This is definitely safe while we hold the BGL.
7189 # Otherwise the new lock would have to be added in acquired mode.
7191 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7192 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7194 # re-read the instance from the configuration after rename
7195 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7197 if rename_file_storage:
7198 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7199 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7200 old_file_storage_dir,
7201 new_file_storage_dir)
7202 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7203 " (but the instance has been renamed in Ganeti)" %
7204 (inst.primary_node, old_file_storage_dir,
7205 new_file_storage_dir))
7207 _StartInstanceDisks(self, inst, None)
7209 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7210 old_name, self.op.debug_level)
7211 msg = result.fail_msg
7213 msg = ("Could not run OS rename script for instance %s on node %s"
7214 " (but the instance has been renamed in Ganeti): %s" %
7215 (inst.name, inst.primary_node, msg))
7216 self.proc.LogWarning(msg)
7218 _ShutdownInstanceDisks(self, inst)
7223 class LUInstanceRemove(LogicalUnit):
7224 """Remove an instance.
7227 HPATH = "instance-remove"
7228 HTYPE = constants.HTYPE_INSTANCE
7231 def ExpandNames(self):
7232 self._ExpandAndLockInstance()
7233 self.needed_locks[locking.LEVEL_NODE] = []
7234 self.needed_locks[locking.LEVEL_NODE_RES] = []
7235 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7237 def DeclareLocks(self, level):
7238 if level == locking.LEVEL_NODE:
7239 self._LockInstancesNodes()
7240 elif level == locking.LEVEL_NODE_RES:
7242 self.needed_locks[locking.LEVEL_NODE_RES] = \
7243 self.needed_locks[locking.LEVEL_NODE][:]
7245 def BuildHooksEnv(self):
7248 This runs on master, primary and secondary nodes of the instance.
7251 env = _BuildInstanceHookEnvByObject(self, self.instance)
7252 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7255 def BuildHooksNodes(self):
7256 """Build hooks nodes.
7259 nl = [self.cfg.GetMasterNode()]
7260 nl_post = list(self.instance.all_nodes) + nl
7261 return (nl, nl_post)
7263 def CheckPrereq(self):
7264 """Check prerequisites.
7266 This checks that the instance is in the cluster.
7269 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7270 assert self.instance is not None, \
7271 "Cannot retrieve locked instance %s" % self.op.instance_name
7273 def Exec(self, feedback_fn):
7274 """Remove the instance.
7277 instance = self.instance
7278 logging.info("Shutting down instance %s on node %s",
7279 instance.name, instance.primary_node)
7281 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7282 self.op.shutdown_timeout)
7283 msg = result.fail_msg
7285 if self.op.ignore_failures:
7286 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7288 raise errors.OpExecError("Could not shutdown instance %s on"
7290 (instance.name, instance.primary_node, msg))
7292 assert (self.owned_locks(locking.LEVEL_NODE) ==
7293 self.owned_locks(locking.LEVEL_NODE_RES))
7294 assert not (set(instance.all_nodes) -
7295 self.owned_locks(locking.LEVEL_NODE)), \
7296 "Not owning correct locks"
7298 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7301 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7302 """Utility function to remove an instance.
7305 logging.info("Removing block devices for instance %s", instance.name)
7307 if not _RemoveDisks(lu, instance):
7308 if not ignore_failures:
7309 raise errors.OpExecError("Can't remove instance's disks")
7310 feedback_fn("Warning: can't remove instance's disks")
7312 logging.info("Removing instance %s out of cluster config", instance.name)
7314 lu.cfg.RemoveInstance(instance.name)
7316 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7317 "Instance lock removal conflict"
7319 # Remove lock for the instance
7320 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7323 class LUInstanceQuery(NoHooksLU):
7324 """Logical unit for querying instances.
7327 # pylint: disable=W0142
7330 def CheckArguments(self):
7331 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7332 self.op.output_fields, self.op.use_locking)
7334 def ExpandNames(self):
7335 self.iq.ExpandNames(self)
7337 def DeclareLocks(self, level):
7338 self.iq.DeclareLocks(self, level)
7340 def Exec(self, feedback_fn):
7341 return self.iq.OldStyleQuery(self)
7344 class LUInstanceFailover(LogicalUnit):
7345 """Failover an instance.
7348 HPATH = "instance-failover"
7349 HTYPE = constants.HTYPE_INSTANCE
7352 def CheckArguments(self):
7353 """Check the arguments.
7356 self.iallocator = getattr(self.op, "iallocator", None)
7357 self.target_node = getattr(self.op, "target_node", None)
7359 def ExpandNames(self):
7360 self._ExpandAndLockInstance()
7362 if self.op.target_node is not None:
7363 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7365 self.needed_locks[locking.LEVEL_NODE] = []
7366 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7368 self.needed_locks[locking.LEVEL_NODE_RES] = []
7369 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7371 ignore_consistency = self.op.ignore_consistency
7372 shutdown_timeout = self.op.shutdown_timeout
7373 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7376 ignore_consistency=ignore_consistency,
7377 shutdown_timeout=shutdown_timeout,
7378 ignore_ipolicy=self.op.ignore_ipolicy)
7379 self.tasklets = [self._migrater]
7381 def DeclareLocks(self, level):
7382 if level == locking.LEVEL_NODE:
7383 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7384 if instance.disk_template in constants.DTS_EXT_MIRROR:
7385 if self.op.target_node is None:
7386 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7388 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7389 self.op.target_node]
7390 del self.recalculate_locks[locking.LEVEL_NODE]
7392 self._LockInstancesNodes()
7393 elif level == locking.LEVEL_NODE_RES:
7395 self.needed_locks[locking.LEVEL_NODE_RES] = \
7396 self.needed_locks[locking.LEVEL_NODE][:]
7398 def BuildHooksEnv(self):
7401 This runs on master, primary and secondary nodes of the instance.
7404 instance = self._migrater.instance
7405 source_node = instance.primary_node
7406 target_node = self.op.target_node
7408 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7409 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7410 "OLD_PRIMARY": source_node,
7411 "NEW_PRIMARY": target_node,
7414 if instance.disk_template in constants.DTS_INT_MIRROR:
7415 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7416 env["NEW_SECONDARY"] = source_node
7418 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7420 env.update(_BuildInstanceHookEnvByObject(self, instance))
7424 def BuildHooksNodes(self):
7425 """Build hooks nodes.
7428 instance = self._migrater.instance
7429 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7430 return (nl, nl + [instance.primary_node])
7433 class LUInstanceMigrate(LogicalUnit):
7434 """Migrate an instance.
7436 This is migration without shutting down, compared to the failover,
7437 which is done with shutdown.
7440 HPATH = "instance-migrate"
7441 HTYPE = constants.HTYPE_INSTANCE
7444 def ExpandNames(self):
7445 self._ExpandAndLockInstance()
7447 if self.op.target_node is not None:
7448 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7450 self.needed_locks[locking.LEVEL_NODE] = []
7451 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7453 self.needed_locks[locking.LEVEL_NODE] = []
7454 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7457 TLMigrateInstance(self, self.op.instance_name,
7458 cleanup=self.op.cleanup,
7460 fallback=self.op.allow_failover,
7461 allow_runtime_changes=self.op.allow_runtime_changes,
7462 ignore_ipolicy=self.op.ignore_ipolicy)
7463 self.tasklets = [self._migrater]
7465 def DeclareLocks(self, level):
7466 if level == locking.LEVEL_NODE:
7467 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7468 if instance.disk_template in constants.DTS_EXT_MIRROR:
7469 if self.op.target_node is None:
7470 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7472 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7473 self.op.target_node]
7474 del self.recalculate_locks[locking.LEVEL_NODE]
7476 self._LockInstancesNodes()
7477 elif level == locking.LEVEL_NODE_RES:
7479 self.needed_locks[locking.LEVEL_NODE_RES] = \
7480 self.needed_locks[locking.LEVEL_NODE][:]
7482 def BuildHooksEnv(self):
7485 This runs on master, primary and secondary nodes of the instance.
7488 instance = self._migrater.instance
7489 source_node = instance.primary_node
7490 target_node = self.op.target_node
7491 env = _BuildInstanceHookEnvByObject(self, instance)
7493 "MIGRATE_LIVE": self._migrater.live,
7494 "MIGRATE_CLEANUP": self.op.cleanup,
7495 "OLD_PRIMARY": source_node,
7496 "NEW_PRIMARY": target_node,
7497 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7500 if instance.disk_template in constants.DTS_INT_MIRROR:
7501 env["OLD_SECONDARY"] = target_node
7502 env["NEW_SECONDARY"] = source_node
7504 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7508 def BuildHooksNodes(self):
7509 """Build hooks nodes.
7512 instance = self._migrater.instance
7513 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7514 return (nl, nl + [instance.primary_node])
7517 class LUInstanceMove(LogicalUnit):
7518 """Move an instance by data-copying.
7521 HPATH = "instance-move"
7522 HTYPE = constants.HTYPE_INSTANCE
7525 def ExpandNames(self):
7526 self._ExpandAndLockInstance()
7527 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7528 self.op.target_node = target_node
7529 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7530 self.needed_locks[locking.LEVEL_NODE_RES] = []
7531 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7533 def DeclareLocks(self, level):
7534 if level == locking.LEVEL_NODE:
7535 self._LockInstancesNodes(primary_only=True)
7536 elif level == locking.LEVEL_NODE_RES:
7538 self.needed_locks[locking.LEVEL_NODE_RES] = \
7539 self.needed_locks[locking.LEVEL_NODE][:]
7541 def BuildHooksEnv(self):
7544 This runs on master, primary and secondary nodes of the instance.
7548 "TARGET_NODE": self.op.target_node,
7549 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7551 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7554 def BuildHooksNodes(self):
7555 """Build hooks nodes.
7559 self.cfg.GetMasterNode(),
7560 self.instance.primary_node,
7561 self.op.target_node,
7565 def CheckPrereq(self):
7566 """Check prerequisites.
7568 This checks that the instance is in the cluster.
7571 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7572 assert self.instance is not None, \
7573 "Cannot retrieve locked instance %s" % self.op.instance_name
7575 node = self.cfg.GetNodeInfo(self.op.target_node)
7576 assert node is not None, \
7577 "Cannot retrieve locked node %s" % self.op.target_node
7579 self.target_node = target_node = node.name
7581 if target_node == instance.primary_node:
7582 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7583 (instance.name, target_node),
7586 bep = self.cfg.GetClusterInfo().FillBE(instance)
7588 for idx, dsk in enumerate(instance.disks):
7589 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7590 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7591 " cannot copy" % idx, errors.ECODE_STATE)
7593 _CheckNodeOnline(self, target_node)
7594 _CheckNodeNotDrained(self, target_node)
7595 _CheckNodeVmCapable(self, target_node)
7596 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7597 self.cfg.GetNodeGroup(node.group))
7598 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7599 ignore=self.op.ignore_ipolicy)
7601 if instance.admin_state == constants.ADMINST_UP:
7602 # check memory requirements on the secondary node
7603 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7604 instance.name, bep[constants.BE_MAXMEM],
7605 instance.hypervisor)
7607 self.LogInfo("Not checking memory on the secondary node as"
7608 " instance will not be started")
7610 # check bridge existance
7611 _CheckInstanceBridgesExist(self, instance, node=target_node)
7613 def Exec(self, feedback_fn):
7614 """Move an instance.
7616 The move is done by shutting it down on its present node, copying
7617 the data over (slow) and starting it on the new node.
7620 instance = self.instance
7622 source_node = instance.primary_node
7623 target_node = self.target_node
7625 self.LogInfo("Shutting down instance %s on source node %s",
7626 instance.name, source_node)
7628 assert (self.owned_locks(locking.LEVEL_NODE) ==
7629 self.owned_locks(locking.LEVEL_NODE_RES))
7631 result = self.rpc.call_instance_shutdown(source_node, instance,
7632 self.op.shutdown_timeout)
7633 msg = result.fail_msg
7635 if self.op.ignore_consistency:
7636 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7637 " Proceeding anyway. Please make sure node"
7638 " %s is down. Error details: %s",
7639 instance.name, source_node, source_node, msg)
7641 raise errors.OpExecError("Could not shutdown instance %s on"
7643 (instance.name, source_node, msg))
7645 # create the target disks
7647 _CreateDisks(self, instance, target_node=target_node)
7648 except errors.OpExecError:
7649 self.LogWarning("Device creation failed, reverting...")
7651 _RemoveDisks(self, instance, target_node=target_node)
7653 self.cfg.ReleaseDRBDMinors(instance.name)
7656 cluster_name = self.cfg.GetClusterInfo().cluster_name
7659 # activate, get path, copy the data over
7660 for idx, disk in enumerate(instance.disks):
7661 self.LogInfo("Copying data for disk %d", idx)
7662 result = self.rpc.call_blockdev_assemble(target_node, disk,
7663 instance.name, True, idx)
7665 self.LogWarning("Can't assemble newly created disk %d: %s",
7666 idx, result.fail_msg)
7667 errs.append(result.fail_msg)
7669 dev_path = result.payload
7670 result = self.rpc.call_blockdev_export(source_node, disk,
7671 target_node, dev_path,
7674 self.LogWarning("Can't copy data over for disk %d: %s",
7675 idx, result.fail_msg)
7676 errs.append(result.fail_msg)
7680 self.LogWarning("Some disks failed to copy, aborting")
7682 _RemoveDisks(self, instance, target_node=target_node)
7684 self.cfg.ReleaseDRBDMinors(instance.name)
7685 raise errors.OpExecError("Errors during disk copy: %s" %
7688 instance.primary_node = target_node
7689 self.cfg.Update(instance, feedback_fn)
7691 self.LogInfo("Removing the disks on the original node")
7692 _RemoveDisks(self, instance, target_node=source_node)
7694 # Only start the instance if it's marked as up
7695 if instance.admin_state == constants.ADMINST_UP:
7696 self.LogInfo("Starting instance %s on node %s",
7697 instance.name, target_node)
7699 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7700 ignore_secondaries=True)
7702 _ShutdownInstanceDisks(self, instance)
7703 raise errors.OpExecError("Can't activate the instance's disks")
7705 result = self.rpc.call_instance_start(target_node,
7706 (instance, None, None), False)
7707 msg = result.fail_msg
7709 _ShutdownInstanceDisks(self, instance)
7710 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7711 (instance.name, target_node, msg))
7714 class LUNodeMigrate(LogicalUnit):
7715 """Migrate all instances from a node.
7718 HPATH = "node-migrate"
7719 HTYPE = constants.HTYPE_NODE
7722 def CheckArguments(self):
7725 def ExpandNames(self):
7726 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7728 self.share_locks = _ShareAll()
7729 self.needed_locks = {
7730 locking.LEVEL_NODE: [self.op.node_name],
7733 def BuildHooksEnv(self):
7736 This runs on the master, the primary and all the secondaries.
7740 "NODE_NAME": self.op.node_name,
7741 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7744 def BuildHooksNodes(self):
7745 """Build hooks nodes.
7748 nl = [self.cfg.GetMasterNode()]
7751 def CheckPrereq(self):
7754 def Exec(self, feedback_fn):
7755 # Prepare jobs for migration instances
7756 allow_runtime_changes = self.op.allow_runtime_changes
7758 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7761 iallocator=self.op.iallocator,
7762 target_node=self.op.target_node,
7763 allow_runtime_changes=allow_runtime_changes,
7764 ignore_ipolicy=self.op.ignore_ipolicy)]
7765 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7768 # TODO: Run iallocator in this opcode and pass correct placement options to
7769 # OpInstanceMigrate. Since other jobs can modify the cluster between
7770 # running the iallocator and the actual migration, a good consistency model
7771 # will have to be found.
7773 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7774 frozenset([self.op.node_name]))
7776 return ResultWithJobs(jobs)
7779 class TLMigrateInstance(Tasklet):
7780 """Tasklet class for instance migration.
7783 @ivar live: whether the migration will be done live or non-live;
7784 this variable is initalized only after CheckPrereq has run
7785 @type cleanup: boolean
7786 @ivar cleanup: Wheater we cleanup from a failed migration
7787 @type iallocator: string
7788 @ivar iallocator: The iallocator used to determine target_node
7789 @type target_node: string
7790 @ivar target_node: If given, the target_node to reallocate the instance to
7791 @type failover: boolean
7792 @ivar failover: Whether operation results in failover or migration
7793 @type fallback: boolean
7794 @ivar fallback: Whether fallback to failover is allowed if migration not
7796 @type ignore_consistency: boolean
7797 @ivar ignore_consistency: Wheter we should ignore consistency between source
7799 @type shutdown_timeout: int
7800 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7801 @type ignore_ipolicy: bool
7802 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7807 _MIGRATION_POLL_INTERVAL = 1 # seconds
7808 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7810 def __init__(self, lu, instance_name, cleanup=False,
7811 failover=False, fallback=False,
7812 ignore_consistency=False,
7813 allow_runtime_changes=True,
7814 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7815 ignore_ipolicy=False):
7816 """Initializes this class.
7819 Tasklet.__init__(self, lu)
7822 self.instance_name = instance_name
7823 self.cleanup = cleanup
7824 self.live = False # will be overridden later
7825 self.failover = failover
7826 self.fallback = fallback
7827 self.ignore_consistency = ignore_consistency
7828 self.shutdown_timeout = shutdown_timeout
7829 self.ignore_ipolicy = ignore_ipolicy
7830 self.allow_runtime_changes = allow_runtime_changes
7832 def CheckPrereq(self):
7833 """Check prerequisites.
7835 This checks that the instance is in the cluster.
7838 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7839 instance = self.cfg.GetInstanceInfo(instance_name)
7840 assert instance is not None
7841 self.instance = instance
7842 cluster = self.cfg.GetClusterInfo()
7844 if (not self.cleanup and
7845 not instance.admin_state == constants.ADMINST_UP and
7846 not self.failover and self.fallback):
7847 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7848 " switching to failover")
7849 self.failover = True
7851 if instance.disk_template not in constants.DTS_MIRRORED:
7856 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7857 " %s" % (instance.disk_template, text),
7860 if instance.disk_template in constants.DTS_EXT_MIRROR:
7861 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7863 if self.lu.op.iallocator:
7864 self._RunAllocator()
7866 # We set set self.target_node as it is required by
7868 self.target_node = self.lu.op.target_node
7870 # Check that the target node is correct in terms of instance policy
7871 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
7872 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7873 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7874 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7875 ignore=self.ignore_ipolicy)
7877 # self.target_node is already populated, either directly or by the
7879 target_node = self.target_node
7880 if self.target_node == instance.primary_node:
7881 raise errors.OpPrereqError("Cannot migrate instance %s"
7882 " to its primary (%s)" %
7883 (instance.name, instance.primary_node))
7885 if len(self.lu.tasklets) == 1:
7886 # It is safe to release locks only when we're the only tasklet
7888 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7889 keep=[instance.primary_node, self.target_node])
7892 secondary_nodes = instance.secondary_nodes
7893 if not secondary_nodes:
7894 raise errors.ConfigurationError("No secondary node but using"
7895 " %s disk template" %
7896 instance.disk_template)
7897 target_node = secondary_nodes[0]
7898 if self.lu.op.iallocator or (self.lu.op.target_node and
7899 self.lu.op.target_node != target_node):
7901 text = "failed over"
7904 raise errors.OpPrereqError("Instances with disk template %s cannot"
7905 " be %s to arbitrary nodes"
7906 " (neither an iallocator nor a target"
7907 " node can be passed)" %
7908 (instance.disk_template, text),
7910 nodeinfo = self.cfg.GetNodeInfo(target_node)
7911 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7912 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7913 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7914 ignore=self.ignore_ipolicy)
7916 i_be = cluster.FillBE(instance)
7918 # check memory requirements on the secondary node
7919 if (not self.cleanup and
7920 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
7921 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
7922 "migrating instance %s" %
7924 i_be[constants.BE_MINMEM],
7925 instance.hypervisor)
7927 self.lu.LogInfo("Not checking memory on the secondary node as"
7928 " instance will not be started")
7930 # check if failover must be forced instead of migration
7931 if (not self.cleanup and not self.failover and
7932 i_be[constants.BE_ALWAYS_FAILOVER]):
7934 self.lu.LogInfo("Instance configured to always failover; fallback"
7936 self.failover = True
7938 raise errors.OpPrereqError("This instance has been configured to"
7939 " always failover, please allow failover",
7942 # check bridge existance
7943 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7945 if not self.cleanup:
7946 _CheckNodeNotDrained(self.lu, target_node)
7947 if not self.failover:
7948 result = self.rpc.call_instance_migratable(instance.primary_node,
7950 if result.fail_msg and self.fallback:
7951 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7953 self.failover = True
7955 result.Raise("Can't migrate, please use failover",
7956 prereq=True, ecode=errors.ECODE_STATE)
7958 assert not (self.failover and self.cleanup)
7960 if not self.failover:
7961 if self.lu.op.live is not None and self.lu.op.mode is not None:
7962 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7963 " parameters are accepted",
7965 if self.lu.op.live is not None:
7967 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7969 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7970 # reset the 'live' parameter to None so that repeated
7971 # invocations of CheckPrereq do not raise an exception
7972 self.lu.op.live = None
7973 elif self.lu.op.mode is None:
7974 # read the default value from the hypervisor
7975 i_hv = cluster.FillHV(self.instance, skip_globals=False)
7976 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7978 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7980 # Failover is never live
7983 if not (self.failover or self.cleanup):
7984 remote_info = self.rpc.call_instance_info(instance.primary_node,
7986 instance.hypervisor)
7987 remote_info.Raise("Error checking instance on node %s" %
7988 instance.primary_node)
7989 instance_running = bool(remote_info.payload)
7990 if instance_running:
7991 self.current_mem = int(remote_info.payload["memory"])
7993 def _RunAllocator(self):
7994 """Run the allocator based on input opcode.
7997 # FIXME: add a self.ignore_ipolicy option
7998 ial = IAllocator(self.cfg, self.rpc,
7999 mode=constants.IALLOCATOR_MODE_RELOC,
8000 name=self.instance_name,
8001 # TODO See why hail breaks with a single node below
8002 relocate_from=[self.instance.primary_node,
8003 self.instance.primary_node],
8006 ial.Run(self.lu.op.iallocator)
8009 raise errors.OpPrereqError("Can't compute nodes using"
8010 " iallocator '%s': %s" %
8011 (self.lu.op.iallocator, ial.info),
8013 if len(ial.result) != ial.required_nodes:
8014 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8015 " of nodes (%s), required %s" %
8016 (self.lu.op.iallocator, len(ial.result),
8017 ial.required_nodes), errors.ECODE_FAULT)
8018 self.target_node = ial.result[0]
8019 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8020 self.instance_name, self.lu.op.iallocator,
8021 utils.CommaJoin(ial.result))
8023 def _WaitUntilSync(self):
8024 """Poll with custom rpc for disk sync.
8026 This uses our own step-based rpc call.
8029 self.feedback_fn("* wait until resync is done")
8033 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8035 self.instance.disks)
8037 for node, nres in result.items():
8038 nres.Raise("Cannot resync disks on node %s" % node)
8039 node_done, node_percent = nres.payload
8040 all_done = all_done and node_done
8041 if node_percent is not None:
8042 min_percent = min(min_percent, node_percent)
8044 if min_percent < 100:
8045 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8048 def _EnsureSecondary(self, node):
8049 """Demote a node to secondary.
8052 self.feedback_fn("* switching node %s to secondary mode" % node)
8054 for dev in self.instance.disks:
8055 self.cfg.SetDiskID(dev, node)
8057 result = self.rpc.call_blockdev_close(node, self.instance.name,
8058 self.instance.disks)
8059 result.Raise("Cannot change disk to secondary on node %s" % node)
8061 def _GoStandalone(self):
8062 """Disconnect from the network.
8065 self.feedback_fn("* changing into standalone mode")
8066 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8067 self.instance.disks)
8068 for node, nres in result.items():
8069 nres.Raise("Cannot disconnect disks node %s" % node)
8071 def _GoReconnect(self, multimaster):
8072 """Reconnect to the network.
8078 msg = "single-master"
8079 self.feedback_fn("* changing disks into %s mode" % msg)
8080 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8081 self.instance.disks,
8082 self.instance.name, multimaster)
8083 for node, nres in result.items():
8084 nres.Raise("Cannot change disks config on node %s" % node)
8086 def _ExecCleanup(self):
8087 """Try to cleanup after a failed migration.
8089 The cleanup is done by:
8090 - check that the instance is running only on one node
8091 (and update the config if needed)
8092 - change disks on its secondary node to secondary
8093 - wait until disks are fully synchronized
8094 - disconnect from the network
8095 - change disks into single-master mode
8096 - wait again until disks are fully synchronized
8099 instance = self.instance
8100 target_node = self.target_node
8101 source_node = self.source_node
8103 # check running on only one node
8104 self.feedback_fn("* checking where the instance actually runs"
8105 " (if this hangs, the hypervisor might be in"
8107 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8108 for node, result in ins_l.items():
8109 result.Raise("Can't contact node %s" % node)
8111 runningon_source = instance.name in ins_l[source_node].payload
8112 runningon_target = instance.name in ins_l[target_node].payload
8114 if runningon_source and runningon_target:
8115 raise errors.OpExecError("Instance seems to be running on two nodes,"
8116 " or the hypervisor is confused; you will have"
8117 " to ensure manually that it runs only on one"
8118 " and restart this operation")
8120 if not (runningon_source or runningon_target):
8121 raise errors.OpExecError("Instance does not seem to be running at all;"
8122 " in this case it's safer to repair by"
8123 " running 'gnt-instance stop' to ensure disk"
8124 " shutdown, and then restarting it")
8126 if runningon_target:
8127 # the migration has actually succeeded, we need to update the config
8128 self.feedback_fn("* instance running on secondary node (%s),"
8129 " updating config" % target_node)
8130 instance.primary_node = target_node
8131 self.cfg.Update(instance, self.feedback_fn)
8132 demoted_node = source_node
8134 self.feedback_fn("* instance confirmed to be running on its"
8135 " primary node (%s)" % source_node)
8136 demoted_node = target_node
8138 if instance.disk_template in constants.DTS_INT_MIRROR:
8139 self._EnsureSecondary(demoted_node)
8141 self._WaitUntilSync()
8142 except errors.OpExecError:
8143 # we ignore here errors, since if the device is standalone, it
8144 # won't be able to sync
8146 self._GoStandalone()
8147 self._GoReconnect(False)
8148 self._WaitUntilSync()
8150 self.feedback_fn("* done")
8152 def _RevertDiskStatus(self):
8153 """Try to revert the disk status after a failed migration.
8156 target_node = self.target_node
8157 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8161 self._EnsureSecondary(target_node)
8162 self._GoStandalone()
8163 self._GoReconnect(False)
8164 self._WaitUntilSync()
8165 except errors.OpExecError, err:
8166 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8167 " please try to recover the instance manually;"
8168 " error '%s'" % str(err))
8170 def _AbortMigration(self):
8171 """Call the hypervisor code to abort a started migration.
8174 instance = self.instance
8175 target_node = self.target_node
8176 source_node = self.source_node
8177 migration_info = self.migration_info
8179 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8183 abort_msg = abort_result.fail_msg
8185 logging.error("Aborting migration failed on target node %s: %s",
8186 target_node, abort_msg)
8187 # Don't raise an exception here, as we stil have to try to revert the
8188 # disk status, even if this step failed.
8190 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8191 instance, False, self.live)
8192 abort_msg = abort_result.fail_msg
8194 logging.error("Aborting migration failed on source node %s: %s",
8195 source_node, abort_msg)
8197 def _ExecMigration(self):
8198 """Migrate an instance.
8200 The migrate is done by:
8201 - change the disks into dual-master mode
8202 - wait until disks are fully synchronized again
8203 - migrate the instance
8204 - change disks on the new secondary node (the old primary) to secondary
8205 - wait until disks are fully synchronized
8206 - change disks into single-master mode
8209 instance = self.instance
8210 target_node = self.target_node
8211 source_node = self.source_node
8213 # Check for hypervisor version mismatch and warn the user.
8214 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8215 None, [self.instance.hypervisor])
8216 for ninfo in nodeinfo.values():
8217 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8219 (_, _, (src_info, )) = nodeinfo[source_node].payload
8220 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8222 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8223 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8224 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8225 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8226 if src_version != dst_version:
8227 self.feedback_fn("* warning: hypervisor version mismatch between"
8228 " source (%s) and target (%s) node" %
8229 (src_version, dst_version))
8231 self.feedback_fn("* checking disk consistency between source and target")
8232 for (idx, dev) in enumerate(instance.disks):
8233 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8234 raise errors.OpExecError("Disk %s is degraded or not fully"
8235 " synchronized on target node,"
8236 " aborting migration" % idx)
8238 if self.current_mem > self.tgt_free_mem:
8239 if not self.allow_runtime_changes:
8240 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8241 " free memory to fit instance %s on target"
8242 " node %s (have %dMB, need %dMB)" %
8243 (instance.name, target_node,
8244 self.tgt_free_mem, self.current_mem))
8245 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8246 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8249 rpcres.Raise("Cannot modify instance runtime memory")
8251 # First get the migration information from the remote node
8252 result = self.rpc.call_migration_info(source_node, instance)
8253 msg = result.fail_msg
8255 log_err = ("Failed fetching source migration information from %s: %s" %
8257 logging.error(log_err)
8258 raise errors.OpExecError(log_err)
8260 self.migration_info = migration_info = result.payload
8262 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8263 # Then switch the disks to master/master mode
8264 self._EnsureSecondary(target_node)
8265 self._GoStandalone()
8266 self._GoReconnect(True)
8267 self._WaitUntilSync()
8269 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8270 result = self.rpc.call_accept_instance(target_node,
8273 self.nodes_ip[target_node])
8275 msg = result.fail_msg
8277 logging.error("Instance pre-migration failed, trying to revert"
8278 " disk status: %s", msg)
8279 self.feedback_fn("Pre-migration failed, aborting")
8280 self._AbortMigration()
8281 self._RevertDiskStatus()
8282 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8283 (instance.name, msg))
8285 self.feedback_fn("* migrating instance to %s" % target_node)
8286 result = self.rpc.call_instance_migrate(source_node, instance,
8287 self.nodes_ip[target_node],
8289 msg = result.fail_msg
8291 logging.error("Instance migration failed, trying to revert"
8292 " disk status: %s", msg)
8293 self.feedback_fn("Migration failed, aborting")
8294 self._AbortMigration()
8295 self._RevertDiskStatus()
8296 raise errors.OpExecError("Could not migrate instance %s: %s" %
8297 (instance.name, msg))
8299 self.feedback_fn("* starting memory transfer")
8300 last_feedback = time.time()
8302 result = self.rpc.call_instance_get_migration_status(source_node,
8304 msg = result.fail_msg
8305 ms = result.payload # MigrationStatus instance
8306 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8307 logging.error("Instance migration failed, trying to revert"
8308 " disk status: %s", msg)
8309 self.feedback_fn("Migration failed, aborting")
8310 self._AbortMigration()
8311 self._RevertDiskStatus()
8312 raise errors.OpExecError("Could not migrate instance %s: %s" %
8313 (instance.name, msg))
8315 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8316 self.feedback_fn("* memory transfer complete")
8319 if (utils.TimeoutExpired(last_feedback,
8320 self._MIGRATION_FEEDBACK_INTERVAL) and
8321 ms.transferred_ram is not None):
8322 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8323 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8324 last_feedback = time.time()
8326 time.sleep(self._MIGRATION_POLL_INTERVAL)
8328 result = self.rpc.call_instance_finalize_migration_src(source_node,
8332 msg = result.fail_msg
8334 logging.error("Instance migration succeeded, but finalization failed"
8335 " on the source node: %s", msg)
8336 raise errors.OpExecError("Could not finalize instance migration: %s" %
8339 instance.primary_node = target_node
8341 # distribute new instance config to the other nodes
8342 self.cfg.Update(instance, self.feedback_fn)
8344 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8348 msg = result.fail_msg
8350 logging.error("Instance migration succeeded, but finalization failed"
8351 " on the target node: %s", msg)
8352 raise errors.OpExecError("Could not finalize instance migration: %s" %
8355 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8356 self._EnsureSecondary(source_node)
8357 self._WaitUntilSync()
8358 self._GoStandalone()
8359 self._GoReconnect(False)
8360 self._WaitUntilSync()
8362 # If the instance's disk template is `rbd' and there was a successful
8363 # migration, unmap the device from the source node.
8364 if self.instance.disk_template == constants.DT_RBD:
8365 disks = _ExpandCheckDisks(instance, instance.disks)
8366 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8368 result = self.rpc.call_blockdev_shutdown(source_node, disk)
8369 msg = result.fail_msg
8371 logging.error("Migration was successful, but couldn't unmap the"
8372 " block device %s on source node %s: %s",
8373 disk.iv_name, source_node, msg)
8374 logging.error("You need to unmap the device %s manually on %s",
8375 disk.iv_name, source_node)
8377 self.feedback_fn("* done")
8379 def _ExecFailover(self):
8380 """Failover an instance.
8382 The failover is done by shutting it down on its present node and
8383 starting it on the secondary.
8386 instance = self.instance
8387 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8389 source_node = instance.primary_node
8390 target_node = self.target_node
8392 if instance.admin_state == constants.ADMINST_UP:
8393 self.feedback_fn("* checking disk consistency between source and target")
8394 for (idx, dev) in enumerate(instance.disks):
8395 # for drbd, these are drbd over lvm
8396 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8397 if primary_node.offline:
8398 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8400 (primary_node.name, idx, target_node))
8401 elif not self.ignore_consistency:
8402 raise errors.OpExecError("Disk %s is degraded on target node,"
8403 " aborting failover" % idx)
8405 self.feedback_fn("* not checking disk consistency as instance is not"
8408 self.feedback_fn("* shutting down instance on source node")
8409 logging.info("Shutting down instance %s on node %s",
8410 instance.name, source_node)
8412 result = self.rpc.call_instance_shutdown(source_node, instance,
8413 self.shutdown_timeout)
8414 msg = result.fail_msg
8416 if self.ignore_consistency or primary_node.offline:
8417 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8418 " proceeding anyway; please make sure node"
8419 " %s is down; error details: %s",
8420 instance.name, source_node, source_node, msg)
8422 raise errors.OpExecError("Could not shutdown instance %s on"
8424 (instance.name, source_node, msg))
8426 self.feedback_fn("* deactivating the instance's disks on source node")
8427 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8428 raise errors.OpExecError("Can't shut down the instance's disks")
8430 instance.primary_node = target_node
8431 # distribute new instance config to the other nodes
8432 self.cfg.Update(instance, self.feedback_fn)
8434 # Only start the instance if it's marked as up
8435 if instance.admin_state == constants.ADMINST_UP:
8436 self.feedback_fn("* activating the instance's disks on target node %s" %
8438 logging.info("Starting instance %s on node %s",
8439 instance.name, target_node)
8441 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8442 ignore_secondaries=True)
8444 _ShutdownInstanceDisks(self.lu, instance)
8445 raise errors.OpExecError("Can't activate the instance's disks")
8447 self.feedback_fn("* starting the instance on the target node %s" %
8449 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8451 msg = result.fail_msg
8453 _ShutdownInstanceDisks(self.lu, instance)
8454 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8455 (instance.name, target_node, msg))
8457 def Exec(self, feedback_fn):
8458 """Perform the migration.
8461 self.feedback_fn = feedback_fn
8462 self.source_node = self.instance.primary_node
8464 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8465 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8466 self.target_node = self.instance.secondary_nodes[0]
8467 # Otherwise self.target_node has been populated either
8468 # directly, or through an iallocator.
8470 self.all_nodes = [self.source_node, self.target_node]
8471 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8472 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8475 feedback_fn("Failover instance %s" % self.instance.name)
8476 self._ExecFailover()
8478 feedback_fn("Migrating instance %s" % self.instance.name)
8481 return self._ExecCleanup()
8483 return self._ExecMigration()
8486 def _CreateBlockDev(lu, node, instance, device, force_create,
8488 """Create a tree of block devices on a given node.
8490 If this device type has to be created on secondaries, create it and
8493 If not, just recurse to children keeping the same 'force' value.
8495 @param lu: the lu on whose behalf we execute
8496 @param node: the node on which to create the device
8497 @type instance: L{objects.Instance}
8498 @param instance: the instance which owns the device
8499 @type device: L{objects.Disk}
8500 @param device: the device to create
8501 @type force_create: boolean
8502 @param force_create: whether to force creation of this device; this
8503 will be change to True whenever we find a device which has
8504 CreateOnSecondary() attribute
8505 @param info: the extra 'metadata' we should attach to the device
8506 (this will be represented as a LVM tag)
8507 @type force_open: boolean
8508 @param force_open: this parameter will be passes to the
8509 L{backend.BlockdevCreate} function where it specifies
8510 whether we run on primary or not, and it affects both
8511 the child assembly and the device own Open() execution
8514 if device.CreateOnSecondary():
8518 for child in device.children:
8519 _CreateBlockDev(lu, node, instance, child, force_create,
8522 if not force_create:
8525 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8528 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8529 """Create a single block device on a given node.
8531 This will not recurse over children of the device, so they must be
8534 @param lu: the lu on whose behalf we execute
8535 @param node: the node on which to create the device
8536 @type instance: L{objects.Instance}
8537 @param instance: the instance which owns the device
8538 @type device: L{objects.Disk}
8539 @param device: the device to create
8540 @param info: the extra 'metadata' we should attach to the device
8541 (this will be represented as a LVM tag)
8542 @type force_open: boolean
8543 @param force_open: this parameter will be passes to the
8544 L{backend.BlockdevCreate} function where it specifies
8545 whether we run on primary or not, and it affects both
8546 the child assembly and the device own Open() execution
8549 lu.cfg.SetDiskID(device, node)
8550 result = lu.rpc.call_blockdev_create(node, device, device.size,
8551 instance.name, force_open, info)
8552 result.Raise("Can't create block device %s on"
8553 " node %s for instance %s" % (device, node, instance.name))
8554 if device.physical_id is None:
8555 device.physical_id = result.payload
8558 def _GenerateUniqueNames(lu, exts):
8559 """Generate a suitable LV name.
8561 This will generate a logical volume name for the given instance.
8566 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8567 results.append("%s%s" % (new_id, val))
8571 def _ComputeLDParams(disk_template, disk_params):
8572 """Computes Logical Disk parameters from Disk Template parameters.
8574 @type disk_template: string
8575 @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8576 @type disk_params: dict
8577 @param disk_params: disk template parameters; dict(template_name -> parameters
8579 @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8580 contains the LD parameters of the node. The tree is flattened in-order.
8583 if disk_template not in constants.DISK_TEMPLATES:
8584 raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8587 dt_params = disk_params[disk_template]
8588 if disk_template == constants.DT_DRBD8:
8590 constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8591 constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8592 constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8593 constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8594 constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8595 constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8596 constants.LDP_DYNAMIC_RESYNC: dt_params[constants.DRBD_DYNAMIC_RESYNC],
8597 constants.LDP_PLAN_AHEAD: dt_params[constants.DRBD_PLAN_AHEAD],
8598 constants.LDP_FILL_TARGET: dt_params[constants.DRBD_FILL_TARGET],
8599 constants.LDP_DELAY_TARGET: dt_params[constants.DRBD_DELAY_TARGET],
8600 constants.LDP_MAX_RATE: dt_params[constants.DRBD_MAX_RATE],
8601 constants.LDP_MIN_RATE: dt_params[constants.DRBD_MIN_RATE],
8605 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8608 result.append(drbd_params)
8612 constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8615 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8617 result.append(data_params)
8621 constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8624 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8626 result.append(meta_params)
8628 elif (disk_template == constants.DT_FILE or
8629 disk_template == constants.DT_SHARED_FILE):
8630 result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8632 elif disk_template == constants.DT_PLAIN:
8634 constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8637 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8639 result.append(params)
8641 elif disk_template == constants.DT_BLOCK:
8642 result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8644 elif disk_template == constants.DT_RBD:
8646 constants.LDP_POOL: dt_params[constants.RBD_POOL]
8649 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_RBD],
8651 result.append(params)
8656 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8657 iv_name, p_minor, s_minor, drbd_params, data_params,
8659 """Generate a drbd8 device complete with its children.
8662 assert len(vgnames) == len(names) == 2
8663 port = lu.cfg.AllocatePort()
8664 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8666 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8667 logical_id=(vgnames[0], names[0]),
8669 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8670 logical_id=(vgnames[1], names[1]),
8672 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8673 logical_id=(primary, secondary, port,
8676 children=[dev_data, dev_meta],
8677 iv_name=iv_name, params=drbd_params)
8681 _DISK_TEMPLATE_NAME_PREFIX = {
8682 constants.DT_PLAIN: "",
8683 constants.DT_RBD: ".rbd",
8687 _DISK_TEMPLATE_DEVICE_TYPE = {
8688 constants.DT_PLAIN: constants.LD_LV,
8689 constants.DT_FILE: constants.LD_FILE,
8690 constants.DT_SHARED_FILE: constants.LD_FILE,
8691 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8692 constants.DT_RBD: constants.LD_RBD,
8696 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8697 secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8698 feedback_fn, disk_params,
8699 _req_file_storage=opcodes.RequireFileStorage,
8700 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8701 """Generate the entire disk layout for a given template type.
8704 #TODO: compute space requirements
8706 vgname = lu.cfg.GetVGName()
8707 disk_count = len(disk_info)
8709 ld_params = _ComputeLDParams(template_name, disk_params)
8711 if template_name == constants.DT_DISKLESS:
8713 elif template_name == constants.DT_DRBD8:
8714 drbd_params, data_params, meta_params = ld_params
8715 if len(secondary_nodes) != 1:
8716 raise errors.ProgrammerError("Wrong template configuration")
8717 remote_node = secondary_nodes[0]
8718 minors = lu.cfg.AllocateDRBDMinor(
8719 [primary_node, remote_node] * len(disk_info), instance_name)
8722 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8723 for i in range(disk_count)]):
8724 names.append(lv_prefix + "_data")
8725 names.append(lv_prefix + "_meta")
8726 for idx, disk in enumerate(disk_info):
8727 disk_index = idx + base_index
8728 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8729 data_vg = disk.get(constants.IDISK_VG, vgname)
8730 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8731 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8732 disk[constants.IDISK_SIZE],
8734 names[idx * 2:idx * 2 + 2],
8735 "disk/%d" % disk_index,
8736 minors[idx * 2], minors[idx * 2 + 1],
8737 drbd_params, data_params, meta_params)
8738 disk_dev.mode = disk[constants.IDISK_MODE]
8739 disks.append(disk_dev)
8742 raise errors.ProgrammerError("Wrong template configuration")
8744 if template_name == constants.DT_FILE:
8746 elif template_name == constants.DT_SHARED_FILE:
8747 _req_shr_file_storage()
8749 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8750 if name_prefix is None:
8753 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8754 (name_prefix, base_index + i)
8755 for i in range(disk_count)])
8757 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8759 if template_name == constants.DT_PLAIN:
8760 def logical_id_fn(idx, _, disk):
8761 vg = disk.get(constants.IDISK_VG, vgname)
8762 return (vg, names[idx])
8763 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8765 lambda _, disk_index, disk: (file_driver,
8766 "%s/disk%d" % (file_storage_dir,
8768 elif template_name == constants.DT_BLOCK:
8770 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8771 disk[constants.IDISK_ADOPT])
8772 elif template_name == constants.DT_RBD:
8773 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8775 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8777 for idx, disk in enumerate(disk_info):
8778 disk_index = idx + base_index
8779 size = disk[constants.IDISK_SIZE]
8780 feedback_fn("* disk %s, size %s" %
8781 (disk_index, utils.FormatUnit(size, "h")))
8782 disks.append(objects.Disk(dev_type=dev_type, size=size,
8783 logical_id=logical_id_fn(idx, disk_index, disk),
8784 iv_name="disk/%d" % disk_index,
8785 mode=disk[constants.IDISK_MODE],
8786 params=ld_params[0]))
8791 def _GetInstanceInfoText(instance):
8792 """Compute that text that should be added to the disk's metadata.
8795 return "originstname+%s" % instance.name
8798 def _CalcEta(time_taken, written, total_size):
8799 """Calculates the ETA based on size written and total size.
8801 @param time_taken: The time taken so far
8802 @param written: amount written so far
8803 @param total_size: The total size of data to be written
8804 @return: The remaining time in seconds
8807 avg_time = time_taken / float(written)
8808 return (total_size - written) * avg_time
8811 def _WipeDisks(lu, instance):
8812 """Wipes instance disks.
8814 @type lu: L{LogicalUnit}
8815 @param lu: the logical unit on whose behalf we execute
8816 @type instance: L{objects.Instance}
8817 @param instance: the instance whose disks we should create
8818 @return: the success of the wipe
8821 node = instance.primary_node
8823 for device in instance.disks:
8824 lu.cfg.SetDiskID(device, node)
8826 logging.info("Pause sync of instance %s disks", instance.name)
8827 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8829 for idx, success in enumerate(result.payload):
8831 logging.warn("pause-sync of instance %s for disks %d failed",
8835 for idx, device in enumerate(instance.disks):
8836 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8837 # MAX_WIPE_CHUNK at max
8838 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8839 constants.MIN_WIPE_CHUNK_PERCENT)
8840 # we _must_ make this an int, otherwise rounding errors will
8842 wipe_chunk_size = int(wipe_chunk_size)
8844 lu.LogInfo("* Wiping disk %d", idx)
8845 logging.info("Wiping disk %d for instance %s, node %s using"
8846 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8851 start_time = time.time()
8853 while offset < size:
8854 wipe_size = min(wipe_chunk_size, size - offset)
8855 logging.debug("Wiping disk %d, offset %s, chunk %s",
8856 idx, offset, wipe_size)
8857 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8858 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8859 (idx, offset, wipe_size))
8862 if now - last_output >= 60:
8863 eta = _CalcEta(now - start_time, offset, size)
8864 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8865 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8868 logging.info("Resume sync of instance %s disks", instance.name)
8870 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8872 for idx, success in enumerate(result.payload):
8874 lu.LogWarning("Resume sync of disk %d failed, please have a"
8875 " look at the status and troubleshoot the issue", idx)
8876 logging.warn("resume-sync of instance %s for disks %d failed",
8880 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8881 """Create all disks for an instance.
8883 This abstracts away some work from AddInstance.
8885 @type lu: L{LogicalUnit}
8886 @param lu: the logical unit on whose behalf we execute
8887 @type instance: L{objects.Instance}
8888 @param instance: the instance whose disks we should create
8890 @param to_skip: list of indices to skip
8891 @type target_node: string
8892 @param target_node: if passed, overrides the target node for creation
8894 @return: the success of the creation
8897 info = _GetInstanceInfoText(instance)
8898 if target_node is None:
8899 pnode = instance.primary_node
8900 all_nodes = instance.all_nodes
8905 if instance.disk_template in constants.DTS_FILEBASED:
8906 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8907 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8909 result.Raise("Failed to create directory '%s' on"
8910 " node %s" % (file_storage_dir, pnode))
8912 # Note: this needs to be kept in sync with adding of disks in
8913 # LUInstanceSetParams
8914 for idx, device in enumerate(instance.disks):
8915 if to_skip and idx in to_skip:
8917 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
8919 for node in all_nodes:
8920 f_create = node == pnode
8921 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8924 def _RemoveDisks(lu, instance, target_node=None):
8925 """Remove all disks for an instance.
8927 This abstracts away some work from `AddInstance()` and
8928 `RemoveInstance()`. Note that in case some of the devices couldn't
8929 be removed, the removal will continue with the other ones (compare
8930 with `_CreateDisks()`).
8932 @type lu: L{LogicalUnit}
8933 @param lu: the logical unit on whose behalf we execute
8934 @type instance: L{objects.Instance}
8935 @param instance: the instance whose disks we should remove
8936 @type target_node: string
8937 @param target_node: used to override the node on which to remove the disks
8939 @return: the success of the removal
8942 logging.info("Removing block devices for instance %s", instance.name)
8945 for (idx, device) in instance.disks:
8947 edata = [(target_node, device)]
8949 edata = device.ComputeNodeTree(instance.primary_node)
8950 for node, disk in edata:
8951 lu.cfg.SetDiskID(disk, node)
8952 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8954 lu.LogWarning("Could not remove disk %s on node %s,"
8955 " continuing anyway: %s", idx, node, msg)
8958 # if this is a DRBD disk, return its port to the pool
8959 if device.dev_type in constants.LDS_DRBD:
8960 tcp_port = device.logical_id[2]
8961 lu.cfg.AddTcpUdpPort(tcp_port)
8963 if instance.disk_template == constants.DT_FILE:
8964 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8968 tgt = instance.primary_node
8969 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8971 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8972 file_storage_dir, instance.primary_node, result.fail_msg)
8978 def _ComputeDiskSizePerVG(disk_template, disks):
8979 """Compute disk size requirements in the volume group
8982 def _compute(disks, payload):
8983 """Universal algorithm.
8988 vgs[disk[constants.IDISK_VG]] = \
8989 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8993 # Required free disk space as a function of disk and swap space
8995 constants.DT_DISKLESS: {},
8996 constants.DT_PLAIN: _compute(disks, 0),
8997 # 128 MB are added for drbd metadata for each disk
8998 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8999 constants.DT_FILE: {},
9000 constants.DT_SHARED_FILE: {},
9003 if disk_template not in req_size_dict:
9004 raise errors.ProgrammerError("Disk template '%s' size requirement"
9005 " is unknown" % disk_template)
9007 return req_size_dict[disk_template]
9010 def _ComputeDiskSize(disk_template, disks):
9011 """Compute disk size requirements in the volume group
9014 # Required free disk space as a function of disk and swap space
9016 constants.DT_DISKLESS: None,
9017 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9018 # 128 MB are added for drbd metadata for each disk
9020 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9021 constants.DT_FILE: None,
9022 constants.DT_SHARED_FILE: 0,
9023 constants.DT_BLOCK: 0,
9024 constants.DT_RBD: 0,
9027 if disk_template not in req_size_dict:
9028 raise errors.ProgrammerError("Disk template '%s' size requirement"
9029 " is unknown" % disk_template)
9031 return req_size_dict[disk_template]
9034 def _FilterVmNodes(lu, nodenames):
9035 """Filters out non-vm_capable nodes from a list.
9037 @type lu: L{LogicalUnit}
9038 @param lu: the logical unit for which we check
9039 @type nodenames: list
9040 @param nodenames: the list of nodes on which we should check
9042 @return: the list of vm-capable nodes
9045 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9046 return [name for name in nodenames if name not in vm_nodes]
9049 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9050 """Hypervisor parameter validation.
9052 This function abstract the hypervisor parameter validation to be
9053 used in both instance create and instance modify.
9055 @type lu: L{LogicalUnit}
9056 @param lu: the logical unit for which we check
9057 @type nodenames: list
9058 @param nodenames: the list of nodes on which we should check
9059 @type hvname: string
9060 @param hvname: the name of the hypervisor we should use
9061 @type hvparams: dict
9062 @param hvparams: the parameters which we need to check
9063 @raise errors.OpPrereqError: if the parameters are not valid
9066 nodenames = _FilterVmNodes(lu, nodenames)
9068 cluster = lu.cfg.GetClusterInfo()
9069 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9071 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9072 for node in nodenames:
9076 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9079 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9080 """OS parameters validation.
9082 @type lu: L{LogicalUnit}
9083 @param lu: the logical unit for which we check
9084 @type required: boolean
9085 @param required: whether the validation should fail if the OS is not
9087 @type nodenames: list
9088 @param nodenames: the list of nodes on which we should check
9089 @type osname: string
9090 @param osname: the name of the hypervisor we should use
9091 @type osparams: dict
9092 @param osparams: the parameters which we need to check
9093 @raise errors.OpPrereqError: if the parameters are not valid
9096 nodenames = _FilterVmNodes(lu, nodenames)
9097 result = lu.rpc.call_os_validate(nodenames, required, osname,
9098 [constants.OS_VALIDATE_PARAMETERS],
9100 for node, nres in result.items():
9101 # we don't check for offline cases since this should be run only
9102 # against the master node and/or an instance's nodes
9103 nres.Raise("OS Parameters validation failed on node %s" % node)
9104 if not nres.payload:
9105 lu.LogInfo("OS %s not found on node %s, validation skipped",
9109 class LUInstanceCreate(LogicalUnit):
9110 """Create an instance.
9113 HPATH = "instance-add"
9114 HTYPE = constants.HTYPE_INSTANCE
9117 def CheckArguments(self):
9121 # do not require name_check to ease forward/backward compatibility
9123 if self.op.no_install and self.op.start:
9124 self.LogInfo("No-installation mode selected, disabling startup")
9125 self.op.start = False
9126 # validate/normalize the instance name
9127 self.op.instance_name = \
9128 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9130 if self.op.ip_check and not self.op.name_check:
9131 # TODO: make the ip check more flexible and not depend on the name check
9132 raise errors.OpPrereqError("Cannot do IP address check without a name"
9133 " check", errors.ECODE_INVAL)
9135 # check nics' parameter names
9136 for nic in self.op.nics:
9137 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9139 # check disks. parameter names and consistent adopt/no-adopt strategy
9140 has_adopt = has_no_adopt = False
9141 for disk in self.op.disks:
9142 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9143 if constants.IDISK_ADOPT in disk:
9147 if has_adopt and has_no_adopt:
9148 raise errors.OpPrereqError("Either all disks are adopted or none is",
9151 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9152 raise errors.OpPrereqError("Disk adoption is not supported for the"
9153 " '%s' disk template" %
9154 self.op.disk_template,
9156 if self.op.iallocator is not None:
9157 raise errors.OpPrereqError("Disk adoption not allowed with an"
9158 " iallocator script", errors.ECODE_INVAL)
9159 if self.op.mode == constants.INSTANCE_IMPORT:
9160 raise errors.OpPrereqError("Disk adoption not allowed for"
9161 " instance import", errors.ECODE_INVAL)
9163 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9164 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9165 " but no 'adopt' parameter given" %
9166 self.op.disk_template,
9169 self.adopt_disks = has_adopt
9171 # instance name verification
9172 if self.op.name_check:
9173 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9174 self.op.instance_name = self.hostname1.name
9175 # used in CheckPrereq for ip ping check
9176 self.check_ip = self.hostname1.ip
9178 self.check_ip = None
9180 # file storage checks
9181 if (self.op.file_driver and
9182 not self.op.file_driver in constants.FILE_DRIVER):
9183 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9184 self.op.file_driver, errors.ECODE_INVAL)
9186 if self.op.disk_template == constants.DT_FILE:
9187 opcodes.RequireFileStorage()
9188 elif self.op.disk_template == constants.DT_SHARED_FILE:
9189 opcodes.RequireSharedFileStorage()
9191 ### Node/iallocator related checks
9192 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9194 if self.op.pnode is not None:
9195 if self.op.disk_template in constants.DTS_INT_MIRROR:
9196 if self.op.snode is None:
9197 raise errors.OpPrereqError("The networked disk templates need"
9198 " a mirror node", errors.ECODE_INVAL)
9200 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9202 self.op.snode = None
9204 self._cds = _GetClusterDomainSecret()
9206 if self.op.mode == constants.INSTANCE_IMPORT:
9207 # On import force_variant must be True, because if we forced it at
9208 # initial install, our only chance when importing it back is that it
9210 self.op.force_variant = True
9212 if self.op.no_install:
9213 self.LogInfo("No-installation mode has no effect during import")
9215 elif self.op.mode == constants.INSTANCE_CREATE:
9216 if self.op.os_type is None:
9217 raise errors.OpPrereqError("No guest OS specified",
9219 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9220 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9221 " installation" % self.op.os_type,
9223 if self.op.disk_template is None:
9224 raise errors.OpPrereqError("No disk template specified",
9227 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9228 # Check handshake to ensure both clusters have the same domain secret
9229 src_handshake = self.op.source_handshake
9230 if not src_handshake:
9231 raise errors.OpPrereqError("Missing source handshake",
9234 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9237 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9240 # Load and check source CA
9241 self.source_x509_ca_pem = self.op.source_x509_ca
9242 if not self.source_x509_ca_pem:
9243 raise errors.OpPrereqError("Missing source X509 CA",
9247 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9249 except OpenSSL.crypto.Error, err:
9250 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9251 (err, ), errors.ECODE_INVAL)
9253 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9254 if errcode is not None:
9255 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9258 self.source_x509_ca = cert
9260 src_instance_name = self.op.source_instance_name
9261 if not src_instance_name:
9262 raise errors.OpPrereqError("Missing source instance name",
9265 self.source_instance_name = \
9266 netutils.GetHostname(name=src_instance_name).name
9269 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9270 self.op.mode, errors.ECODE_INVAL)
9272 def ExpandNames(self):
9273 """ExpandNames for CreateInstance.
9275 Figure out the right locks for instance creation.
9278 self.needed_locks = {}
9280 instance_name = self.op.instance_name
9281 # this is just a preventive check, but someone might still add this
9282 # instance in the meantime, and creation will fail at lock-add time
9283 if instance_name in self.cfg.GetInstanceList():
9284 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9285 instance_name, errors.ECODE_EXISTS)
9287 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9289 if self.op.iallocator:
9290 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9291 # specifying a group on instance creation and then selecting nodes from
9293 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9294 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9296 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9297 nodelist = [self.op.pnode]
9298 if self.op.snode is not None:
9299 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9300 nodelist.append(self.op.snode)
9301 self.needed_locks[locking.LEVEL_NODE] = nodelist
9302 # Lock resources of instance's primary and secondary nodes (copy to
9303 # prevent accidential modification)
9304 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9306 # in case of import lock the source node too
9307 if self.op.mode == constants.INSTANCE_IMPORT:
9308 src_node = self.op.src_node
9309 src_path = self.op.src_path
9311 if src_path is None:
9312 self.op.src_path = src_path = self.op.instance_name
9314 if src_node is None:
9315 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9316 self.op.src_node = None
9317 if os.path.isabs(src_path):
9318 raise errors.OpPrereqError("Importing an instance from a path"
9319 " requires a source node option",
9322 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9323 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9324 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9325 if not os.path.isabs(src_path):
9326 self.op.src_path = src_path = \
9327 utils.PathJoin(constants.EXPORT_DIR, src_path)
9329 def _RunAllocator(self):
9330 """Run the allocator based on input opcode.
9333 nics = [n.ToDict() for n in self.nics]
9334 ial = IAllocator(self.cfg, self.rpc,
9335 mode=constants.IALLOCATOR_MODE_ALLOC,
9336 name=self.op.instance_name,
9337 disk_template=self.op.disk_template,
9340 vcpus=self.be_full[constants.BE_VCPUS],
9341 memory=self.be_full[constants.BE_MAXMEM],
9344 hypervisor=self.op.hypervisor,
9347 ial.Run(self.op.iallocator)
9350 raise errors.OpPrereqError("Can't compute nodes using"
9351 " iallocator '%s': %s" %
9352 (self.op.iallocator, ial.info),
9354 if len(ial.result) != ial.required_nodes:
9355 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9356 " of nodes (%s), required %s" %
9357 (self.op.iallocator, len(ial.result),
9358 ial.required_nodes), errors.ECODE_FAULT)
9359 self.op.pnode = ial.result[0]
9360 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9361 self.op.instance_name, self.op.iallocator,
9362 utils.CommaJoin(ial.result))
9363 if ial.required_nodes == 2:
9364 self.op.snode = ial.result[1]
9366 def BuildHooksEnv(self):
9369 This runs on master, primary and secondary nodes of the instance.
9373 "ADD_MODE": self.op.mode,
9375 if self.op.mode == constants.INSTANCE_IMPORT:
9376 env["SRC_NODE"] = self.op.src_node
9377 env["SRC_PATH"] = self.op.src_path
9378 env["SRC_IMAGES"] = self.src_images
9380 env.update(_BuildInstanceHookEnv(
9381 name=self.op.instance_name,
9382 primary_node=self.op.pnode,
9383 secondary_nodes=self.secondaries,
9384 status=self.op.start,
9385 os_type=self.op.os_type,
9386 minmem=self.be_full[constants.BE_MINMEM],
9387 maxmem=self.be_full[constants.BE_MAXMEM],
9388 vcpus=self.be_full[constants.BE_VCPUS],
9389 nics=_NICListToTuple(self, self.nics),
9390 disk_template=self.op.disk_template,
9391 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9392 for d in self.disks],
9395 hypervisor_name=self.op.hypervisor,
9401 def BuildHooksNodes(self):
9402 """Build hooks nodes.
9405 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9408 def _ReadExportInfo(self):
9409 """Reads the export information from disk.
9411 It will override the opcode source node and path with the actual
9412 information, if these two were not specified before.
9414 @return: the export information
9417 assert self.op.mode == constants.INSTANCE_IMPORT
9419 src_node = self.op.src_node
9420 src_path = self.op.src_path
9422 if src_node is None:
9423 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9424 exp_list = self.rpc.call_export_list(locked_nodes)
9426 for node in exp_list:
9427 if exp_list[node].fail_msg:
9429 if src_path in exp_list[node].payload:
9431 self.op.src_node = src_node = node
9432 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9436 raise errors.OpPrereqError("No export found for relative path %s" %
9437 src_path, errors.ECODE_INVAL)
9439 _CheckNodeOnline(self, src_node)
9440 result = self.rpc.call_export_info(src_node, src_path)
9441 result.Raise("No export or invalid export found in dir %s" % src_path)
9443 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9444 if not export_info.has_section(constants.INISECT_EXP):
9445 raise errors.ProgrammerError("Corrupted export config",
9446 errors.ECODE_ENVIRON)
9448 ei_version = export_info.get(constants.INISECT_EXP, "version")
9449 if (int(ei_version) != constants.EXPORT_VERSION):
9450 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9451 (ei_version, constants.EXPORT_VERSION),
9452 errors.ECODE_ENVIRON)
9455 def _ReadExportParams(self, einfo):
9456 """Use export parameters as defaults.
9458 In case the opcode doesn't specify (as in override) some instance
9459 parameters, then try to use them from the export information, if
9463 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9465 if self.op.disk_template is None:
9466 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9467 self.op.disk_template = einfo.get(constants.INISECT_INS,
9469 if self.op.disk_template not in constants.DISK_TEMPLATES:
9470 raise errors.OpPrereqError("Disk template specified in configuration"
9471 " file is not one of the allowed values:"
9472 " %s" % " ".join(constants.DISK_TEMPLATES))
9474 raise errors.OpPrereqError("No disk template specified and the export"
9475 " is missing the disk_template information",
9478 if not self.op.disks:
9480 # TODO: import the disk iv_name too
9481 for idx in range(constants.MAX_DISKS):
9482 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9483 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9484 disks.append({constants.IDISK_SIZE: disk_sz})
9485 self.op.disks = disks
9486 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9487 raise errors.OpPrereqError("No disk info specified and the export"
9488 " is missing the disk information",
9491 if not self.op.nics:
9493 for idx in range(constants.MAX_NICS):
9494 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9496 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9497 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9504 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9505 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9507 if (self.op.hypervisor is None and
9508 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9509 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9511 if einfo.has_section(constants.INISECT_HYP):
9512 # use the export parameters but do not override the ones
9513 # specified by the user
9514 for name, value in einfo.items(constants.INISECT_HYP):
9515 if name not in self.op.hvparams:
9516 self.op.hvparams[name] = value
9518 if einfo.has_section(constants.INISECT_BEP):
9519 # use the parameters, without overriding
9520 for name, value in einfo.items(constants.INISECT_BEP):
9521 if name not in self.op.beparams:
9522 self.op.beparams[name] = value
9523 # Compatibility for the old "memory" be param
9524 if name == constants.BE_MEMORY:
9525 if constants.BE_MAXMEM not in self.op.beparams:
9526 self.op.beparams[constants.BE_MAXMEM] = value
9527 if constants.BE_MINMEM not in self.op.beparams:
9528 self.op.beparams[constants.BE_MINMEM] = value
9530 # try to read the parameters old style, from the main section
9531 for name in constants.BES_PARAMETERS:
9532 if (name not in self.op.beparams and
9533 einfo.has_option(constants.INISECT_INS, name)):
9534 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9536 if einfo.has_section(constants.INISECT_OSP):
9537 # use the parameters, without overriding
9538 for name, value in einfo.items(constants.INISECT_OSP):
9539 if name not in self.op.osparams:
9540 self.op.osparams[name] = value
9542 def _RevertToDefaults(self, cluster):
9543 """Revert the instance parameters to the default values.
9547 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9548 for name in self.op.hvparams.keys():
9549 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9550 del self.op.hvparams[name]
9552 be_defs = cluster.SimpleFillBE({})
9553 for name in self.op.beparams.keys():
9554 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9555 del self.op.beparams[name]
9557 nic_defs = cluster.SimpleFillNIC({})
9558 for nic in self.op.nics:
9559 for name in constants.NICS_PARAMETERS:
9560 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9563 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9564 for name in self.op.osparams.keys():
9565 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9566 del self.op.osparams[name]
9568 def _CalculateFileStorageDir(self):
9569 """Calculate final instance file storage dir.
9572 # file storage dir calculation/check
9573 self.instance_file_storage_dir = None
9574 if self.op.disk_template in constants.DTS_FILEBASED:
9575 # build the full file storage dir path
9578 if self.op.disk_template == constants.DT_SHARED_FILE:
9579 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9581 get_fsd_fn = self.cfg.GetFileStorageDir
9583 cfg_storagedir = get_fsd_fn()
9584 if not cfg_storagedir:
9585 raise errors.OpPrereqError("Cluster file storage dir not defined")
9586 joinargs.append(cfg_storagedir)
9588 if self.op.file_storage_dir is not None:
9589 joinargs.append(self.op.file_storage_dir)
9591 joinargs.append(self.op.instance_name)
9593 # pylint: disable=W0142
9594 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9596 def CheckPrereq(self): # pylint: disable=R0914
9597 """Check prerequisites.
9600 self._CalculateFileStorageDir()
9602 if self.op.mode == constants.INSTANCE_IMPORT:
9603 export_info = self._ReadExportInfo()
9604 self._ReadExportParams(export_info)
9606 if (not self.cfg.GetVGName() and
9607 self.op.disk_template not in constants.DTS_NOT_LVM):
9608 raise errors.OpPrereqError("Cluster does not support lvm-based"
9609 " instances", errors.ECODE_STATE)
9611 if (self.op.hypervisor is None or
9612 self.op.hypervisor == constants.VALUE_AUTO):
9613 self.op.hypervisor = self.cfg.GetHypervisorType()
9615 cluster = self.cfg.GetClusterInfo()
9616 enabled_hvs = cluster.enabled_hypervisors
9617 if self.op.hypervisor not in enabled_hvs:
9618 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9619 " cluster (%s)" % (self.op.hypervisor,
9620 ",".join(enabled_hvs)),
9623 # Check tag validity
9624 for tag in self.op.tags:
9625 objects.TaggableObject.ValidateTag(tag)
9627 # check hypervisor parameter syntax (locally)
9628 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9629 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9631 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9632 hv_type.CheckParameterSyntax(filled_hvp)
9633 self.hv_full = filled_hvp
9634 # check that we don't specify global parameters on an instance
9635 _CheckGlobalHvParams(self.op.hvparams)
9637 # fill and remember the beparams dict
9638 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9639 for param, value in self.op.beparams.iteritems():
9640 if value == constants.VALUE_AUTO:
9641 self.op.beparams[param] = default_beparams[param]
9642 objects.UpgradeBeParams(self.op.beparams)
9643 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9644 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9646 # build os parameters
9647 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9649 # now that hvp/bep are in final format, let's reset to defaults,
9651 if self.op.identify_defaults:
9652 self._RevertToDefaults(cluster)
9656 for idx, nic in enumerate(self.op.nics):
9657 nic_mode_req = nic.get(constants.INIC_MODE, None)
9658 nic_mode = nic_mode_req
9659 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9660 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9662 # in routed mode, for the first nic, the default ip is 'auto'
9663 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9664 default_ip_mode = constants.VALUE_AUTO
9666 default_ip_mode = constants.VALUE_NONE
9668 # ip validity checks
9669 ip = nic.get(constants.INIC_IP, default_ip_mode)
9670 if ip is None or ip.lower() == constants.VALUE_NONE:
9672 elif ip.lower() == constants.VALUE_AUTO:
9673 if not self.op.name_check:
9674 raise errors.OpPrereqError("IP address set to auto but name checks"
9675 " have been skipped",
9677 nic_ip = self.hostname1.ip
9679 if not netutils.IPAddress.IsValid(ip):
9680 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9684 # TODO: check the ip address for uniqueness
9685 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9686 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9689 # MAC address verification
9690 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9691 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9692 mac = utils.NormalizeAndValidateMac(mac)
9695 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9696 except errors.ReservationError:
9697 raise errors.OpPrereqError("MAC address %s already in use"
9698 " in cluster" % mac,
9699 errors.ECODE_NOTUNIQUE)
9701 # Build nic parameters
9702 link = nic.get(constants.INIC_LINK, None)
9703 if link == constants.VALUE_AUTO:
9704 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9707 nicparams[constants.NIC_MODE] = nic_mode
9709 nicparams[constants.NIC_LINK] = link
9711 check_params = cluster.SimpleFillNIC(nicparams)
9712 objects.NIC.CheckParameterSyntax(check_params)
9713 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9715 # disk checks/pre-build
9716 default_vg = self.cfg.GetVGName()
9718 for disk in self.op.disks:
9719 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9720 if mode not in constants.DISK_ACCESS_SET:
9721 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9722 mode, errors.ECODE_INVAL)
9723 size = disk.get(constants.IDISK_SIZE, None)
9725 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9728 except (TypeError, ValueError):
9729 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9732 data_vg = disk.get(constants.IDISK_VG, default_vg)
9734 constants.IDISK_SIZE: size,
9735 constants.IDISK_MODE: mode,
9736 constants.IDISK_VG: data_vg,
9738 if constants.IDISK_METAVG in disk:
9739 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9740 if constants.IDISK_ADOPT in disk:
9741 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9742 self.disks.append(new_disk)
9744 if self.op.mode == constants.INSTANCE_IMPORT:
9746 for idx in range(len(self.disks)):
9747 option = "disk%d_dump" % idx
9748 if export_info.has_option(constants.INISECT_INS, option):
9749 # FIXME: are the old os-es, disk sizes, etc. useful?
9750 export_name = export_info.get(constants.INISECT_INS, option)
9751 image = utils.PathJoin(self.op.src_path, export_name)
9752 disk_images.append(image)
9754 disk_images.append(False)
9756 self.src_images = disk_images
9758 old_name = export_info.get(constants.INISECT_INS, "name")
9759 if self.op.instance_name == old_name:
9760 for idx, nic in enumerate(self.nics):
9761 if nic.mac == constants.VALUE_AUTO:
9762 nic_mac_ini = "nic%d_mac" % idx
9763 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9765 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9767 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9768 if self.op.ip_check:
9769 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9770 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9771 (self.check_ip, self.op.instance_name),
9772 errors.ECODE_NOTUNIQUE)
9774 #### mac address generation
9775 # By generating here the mac address both the allocator and the hooks get
9776 # the real final mac address rather than the 'auto' or 'generate' value.
9777 # There is a race condition between the generation and the instance object
9778 # creation, which means that we know the mac is valid now, but we're not
9779 # sure it will be when we actually add the instance. If things go bad
9780 # adding the instance will abort because of a duplicate mac, and the
9781 # creation job will fail.
9782 for nic in self.nics:
9783 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9784 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9788 if self.op.iallocator is not None:
9789 self._RunAllocator()
9791 # Release all unneeded node locks
9792 _ReleaseLocks(self, locking.LEVEL_NODE,
9793 keep=filter(None, [self.op.pnode, self.op.snode,
9795 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9796 keep=filter(None, [self.op.pnode, self.op.snode,
9799 #### node related checks
9801 # check primary node
9802 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9803 assert self.pnode is not None, \
9804 "Cannot retrieve locked node %s" % self.op.pnode
9806 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9807 pnode.name, errors.ECODE_STATE)
9809 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9810 pnode.name, errors.ECODE_STATE)
9811 if not pnode.vm_capable:
9812 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9813 " '%s'" % pnode.name, errors.ECODE_STATE)
9815 self.secondaries = []
9817 # mirror node verification
9818 if self.op.disk_template in constants.DTS_INT_MIRROR:
9819 if self.op.snode == pnode.name:
9820 raise errors.OpPrereqError("The secondary node cannot be the"
9821 " primary node", errors.ECODE_INVAL)
9822 _CheckNodeOnline(self, self.op.snode)
9823 _CheckNodeNotDrained(self, self.op.snode)
9824 _CheckNodeVmCapable(self, self.op.snode)
9825 self.secondaries.append(self.op.snode)
9827 snode = self.cfg.GetNodeInfo(self.op.snode)
9828 if pnode.group != snode.group:
9829 self.LogWarning("The primary and secondary nodes are in two"
9830 " different node groups; the disk parameters"
9831 " from the first disk's node group will be"
9834 nodenames = [pnode.name] + self.secondaries
9836 # Verify instance specs
9838 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9839 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9840 constants.ISPEC_DISK_COUNT: len(self.disks),
9841 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9842 constants.ISPEC_NIC_COUNT: len(self.nics),
9845 group_info = self.cfg.GetNodeGroup(pnode.group)
9846 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9847 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9848 if not self.op.ignore_ipolicy and res:
9849 raise errors.OpPrereqError(("Instance allocation to group %s violates"
9850 " policy: %s") % (pnode.group,
9851 utils.CommaJoin(res)),
9854 # disk parameters (not customizable at instance or node level)
9855 # just use the primary node parameters, ignoring the secondary.
9856 self.diskparams = group_info.diskparams
9858 if not self.adopt_disks:
9859 if self.op.disk_template == constants.DT_RBD:
9860 # _CheckRADOSFreeSpace() is just a placeholder.
9861 # Any function that checks prerequisites can be placed here.
9862 # Check if there is enough space on the RADOS cluster.
9863 _CheckRADOSFreeSpace()
9865 # Check lv size requirements, if not adopting
9866 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9867 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9869 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9870 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9871 disk[constants.IDISK_ADOPT])
9872 for disk in self.disks])
9873 if len(all_lvs) != len(self.disks):
9874 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9876 for lv_name in all_lvs:
9878 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9879 # to ReserveLV uses the same syntax
9880 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9881 except errors.ReservationError:
9882 raise errors.OpPrereqError("LV named %s used by another instance" %
9883 lv_name, errors.ECODE_NOTUNIQUE)
9885 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9886 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9888 node_lvs = self.rpc.call_lv_list([pnode.name],
9889 vg_names.payload.keys())[pnode.name]
9890 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9891 node_lvs = node_lvs.payload
9893 delta = all_lvs.difference(node_lvs.keys())
9895 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9896 utils.CommaJoin(delta),
9898 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9900 raise errors.OpPrereqError("Online logical volumes found, cannot"
9901 " adopt: %s" % utils.CommaJoin(online_lvs),
9903 # update the size of disk based on what is found
9904 for dsk in self.disks:
9905 dsk[constants.IDISK_SIZE] = \
9906 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9907 dsk[constants.IDISK_ADOPT])][0]))
9909 elif self.op.disk_template == constants.DT_BLOCK:
9910 # Normalize and de-duplicate device paths
9911 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9912 for disk in self.disks])
9913 if len(all_disks) != len(self.disks):
9914 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9916 baddisks = [d for d in all_disks
9917 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9919 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9920 " cannot be adopted" %
9921 (", ".join(baddisks),
9922 constants.ADOPTABLE_BLOCKDEV_ROOT),
9925 node_disks = self.rpc.call_bdev_sizes([pnode.name],
9926 list(all_disks))[pnode.name]
9927 node_disks.Raise("Cannot get block device information from node %s" %
9929 node_disks = node_disks.payload
9930 delta = all_disks.difference(node_disks.keys())
9932 raise errors.OpPrereqError("Missing block device(s): %s" %
9933 utils.CommaJoin(delta),
9935 for dsk in self.disks:
9936 dsk[constants.IDISK_SIZE] = \
9937 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9939 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9941 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9942 # check OS parameters (remotely)
9943 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9945 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9947 # memory check on primary node
9948 #TODO(dynmem): use MINMEM for checking
9950 _CheckNodeFreeMemory(self, self.pnode.name,
9951 "creating instance %s" % self.op.instance_name,
9952 self.be_full[constants.BE_MAXMEM],
9955 self.dry_run_result = list(nodenames)
9957 def Exec(self, feedback_fn):
9958 """Create and add the instance to the cluster.
9961 instance = self.op.instance_name
9962 pnode_name = self.pnode.name
9964 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9965 self.owned_locks(locking.LEVEL_NODE)), \
9966 "Node locks differ from node resource locks"
9968 ht_kind = self.op.hypervisor
9969 if ht_kind in constants.HTS_REQ_PORT:
9970 network_port = self.cfg.AllocatePort()
9974 disks = _GenerateDiskTemplate(self,
9975 self.op.disk_template,
9976 instance, pnode_name,
9979 self.instance_file_storage_dir,
9980 self.op.file_driver,
9985 iobj = objects.Instance(name=instance, os=self.op.os_type,
9986 primary_node=pnode_name,
9987 nics=self.nics, disks=disks,
9988 disk_template=self.op.disk_template,
9989 admin_state=constants.ADMINST_DOWN,
9990 network_port=network_port,
9991 beparams=self.op.beparams,
9992 hvparams=self.op.hvparams,
9993 hypervisor=self.op.hypervisor,
9994 osparams=self.op.osparams,
9998 for tag in self.op.tags:
10001 if self.adopt_disks:
10002 if self.op.disk_template == constants.DT_PLAIN:
10003 # rename LVs to the newly-generated names; we need to construct
10004 # 'fake' LV disks with the old data, plus the new unique_id
10005 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10007 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10008 rename_to.append(t_dsk.logical_id)
10009 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10010 self.cfg.SetDiskID(t_dsk, pnode_name)
10011 result = self.rpc.call_blockdev_rename(pnode_name,
10012 zip(tmp_disks, rename_to))
10013 result.Raise("Failed to rename adoped LVs")
10015 feedback_fn("* creating instance disks...")
10017 _CreateDisks(self, iobj)
10018 except errors.OpExecError:
10019 self.LogWarning("Device creation failed, reverting...")
10021 _RemoveDisks(self, iobj)
10023 self.cfg.ReleaseDRBDMinors(instance)
10026 feedback_fn("adding instance %s to cluster config" % instance)
10028 self.cfg.AddInstance(iobj, self.proc.GetECId())
10030 # Declare that we don't want to remove the instance lock anymore, as we've
10031 # added the instance to the config
10032 del self.remove_locks[locking.LEVEL_INSTANCE]
10034 if self.op.mode == constants.INSTANCE_IMPORT:
10035 # Release unused nodes
10036 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10038 # Release all nodes
10039 _ReleaseLocks(self, locking.LEVEL_NODE)
10042 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10043 feedback_fn("* wiping instance disks...")
10045 _WipeDisks(self, iobj)
10046 except errors.OpExecError, err:
10047 logging.exception("Wiping disks failed")
10048 self.LogWarning("Wiping instance disks failed (%s)", err)
10052 # Something is already wrong with the disks, don't do anything else
10054 elif self.op.wait_for_sync:
10055 disk_abort = not _WaitForSync(self, iobj)
10056 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10057 # make sure the disks are not degraded (still sync-ing is ok)
10058 feedback_fn("* checking mirrors status")
10059 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10064 _RemoveDisks(self, iobj)
10065 self.cfg.RemoveInstance(iobj.name)
10066 # Make sure the instance lock gets removed
10067 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10068 raise errors.OpExecError("There are some degraded disks for"
10071 # Release all node resource locks
10072 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10074 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10075 if self.op.mode == constants.INSTANCE_CREATE:
10076 if not self.op.no_install:
10077 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10078 not self.op.wait_for_sync)
10080 feedback_fn("* pausing disk sync to install instance OS")
10081 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10083 for idx, success in enumerate(result.payload):
10085 logging.warn("pause-sync of instance %s for disk %d failed",
10088 feedback_fn("* running the instance OS create scripts...")
10089 # FIXME: pass debug option from opcode to backend
10091 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10092 self.op.debug_level)
10094 feedback_fn("* resuming disk sync")
10095 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10097 for idx, success in enumerate(result.payload):
10099 logging.warn("resume-sync of instance %s for disk %d failed",
10102 os_add_result.Raise("Could not add os for instance %s"
10103 " on node %s" % (instance, pnode_name))
10105 elif self.op.mode == constants.INSTANCE_IMPORT:
10106 feedback_fn("* running the instance OS import scripts...")
10110 for idx, image in enumerate(self.src_images):
10114 # FIXME: pass debug option from opcode to backend
10115 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10116 constants.IEIO_FILE, (image, ),
10117 constants.IEIO_SCRIPT,
10118 (iobj.disks[idx], idx),
10120 transfers.append(dt)
10123 masterd.instance.TransferInstanceData(self, feedback_fn,
10124 self.op.src_node, pnode_name,
10125 self.pnode.secondary_ip,
10127 if not compat.all(import_result):
10128 self.LogWarning("Some disks for instance %s on node %s were not"
10129 " imported successfully" % (instance, pnode_name))
10131 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10132 feedback_fn("* preparing remote import...")
10133 # The source cluster will stop the instance before attempting to make a
10134 # connection. In some cases stopping an instance can take a long time,
10135 # hence the shutdown timeout is added to the connection timeout.
10136 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10137 self.op.source_shutdown_timeout)
10138 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10140 assert iobj.primary_node == self.pnode.name
10142 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10143 self.source_x509_ca,
10144 self._cds, timeouts)
10145 if not compat.all(disk_results):
10146 # TODO: Should the instance still be started, even if some disks
10147 # failed to import (valid for local imports, too)?
10148 self.LogWarning("Some disks for instance %s on node %s were not"
10149 " imported successfully" % (instance, pnode_name))
10151 # Run rename script on newly imported instance
10152 assert iobj.name == instance
10153 feedback_fn("Running rename script for %s" % instance)
10154 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10155 self.source_instance_name,
10156 self.op.debug_level)
10157 if result.fail_msg:
10158 self.LogWarning("Failed to run rename script for %s on node"
10159 " %s: %s" % (instance, pnode_name, result.fail_msg))
10162 # also checked in the prereq part
10163 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10166 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10169 iobj.admin_state = constants.ADMINST_UP
10170 self.cfg.Update(iobj, feedback_fn)
10171 logging.info("Starting instance %s on node %s", instance, pnode_name)
10172 feedback_fn("* starting instance...")
10173 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10175 result.Raise("Could not start instance")
10177 return list(iobj.all_nodes)
10180 def _CheckRADOSFreeSpace():
10181 """Compute disk size requirements inside the RADOS cluster.
10184 # For the RADOS cluster we assume there is always enough space.
10188 class LUInstanceConsole(NoHooksLU):
10189 """Connect to an instance's console.
10191 This is somewhat special in that it returns the command line that
10192 you need to run on the master node in order to connect to the
10198 def ExpandNames(self):
10199 self.share_locks = _ShareAll()
10200 self._ExpandAndLockInstance()
10202 def CheckPrereq(self):
10203 """Check prerequisites.
10205 This checks that the instance is in the cluster.
10208 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10209 assert self.instance is not None, \
10210 "Cannot retrieve locked instance %s" % self.op.instance_name
10211 _CheckNodeOnline(self, self.instance.primary_node)
10213 def Exec(self, feedback_fn):
10214 """Connect to the console of an instance
10217 instance = self.instance
10218 node = instance.primary_node
10220 node_insts = self.rpc.call_instance_list([node],
10221 [instance.hypervisor])[node]
10222 node_insts.Raise("Can't get node information from %s" % node)
10224 if instance.name not in node_insts.payload:
10225 if instance.admin_state == constants.ADMINST_UP:
10226 state = constants.INSTST_ERRORDOWN
10227 elif instance.admin_state == constants.ADMINST_DOWN:
10228 state = constants.INSTST_ADMINDOWN
10230 state = constants.INSTST_ADMINOFFLINE
10231 raise errors.OpExecError("Instance %s is not running (state %s)" %
10232 (instance.name, state))
10234 logging.debug("Connecting to console of %s on %s", instance.name, node)
10236 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10239 def _GetInstanceConsole(cluster, instance):
10240 """Returns console information for an instance.
10242 @type cluster: L{objects.Cluster}
10243 @type instance: L{objects.Instance}
10247 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10248 # beparams and hvparams are passed separately, to avoid editing the
10249 # instance and then saving the defaults in the instance itself.
10250 hvparams = cluster.FillHV(instance)
10251 beparams = cluster.FillBE(instance)
10252 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10254 assert console.instance == instance.name
10255 assert console.Validate()
10257 return console.ToDict()
10260 class LUInstanceReplaceDisks(LogicalUnit):
10261 """Replace the disks of an instance.
10264 HPATH = "mirrors-replace"
10265 HTYPE = constants.HTYPE_INSTANCE
10268 def CheckArguments(self):
10269 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10270 self.op.iallocator)
10272 def ExpandNames(self):
10273 self._ExpandAndLockInstance()
10275 assert locking.LEVEL_NODE not in self.needed_locks
10276 assert locking.LEVEL_NODE_RES not in self.needed_locks
10277 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10279 assert self.op.iallocator is None or self.op.remote_node is None, \
10280 "Conflicting options"
10282 if self.op.remote_node is not None:
10283 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10285 # Warning: do not remove the locking of the new secondary here
10286 # unless DRBD8.AddChildren is changed to work in parallel;
10287 # currently it doesn't since parallel invocations of
10288 # FindUnusedMinor will conflict
10289 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10290 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10292 self.needed_locks[locking.LEVEL_NODE] = []
10293 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10295 if self.op.iallocator is not None:
10296 # iallocator will select a new node in the same group
10297 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10299 self.needed_locks[locking.LEVEL_NODE_RES] = []
10301 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10302 self.op.iallocator, self.op.remote_node,
10303 self.op.disks, False, self.op.early_release,
10304 self.op.ignore_ipolicy)
10306 self.tasklets = [self.replacer]
10308 def DeclareLocks(self, level):
10309 if level == locking.LEVEL_NODEGROUP:
10310 assert self.op.remote_node is None
10311 assert self.op.iallocator is not None
10312 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10314 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10315 # Lock all groups used by instance optimistically; this requires going
10316 # via the node before it's locked, requiring verification later on
10317 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10318 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10320 elif level == locking.LEVEL_NODE:
10321 if self.op.iallocator is not None:
10322 assert self.op.remote_node is None
10323 assert not self.needed_locks[locking.LEVEL_NODE]
10325 # Lock member nodes of all locked groups
10326 self.needed_locks[locking.LEVEL_NODE] = [node_name
10327 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10328 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10330 self._LockInstancesNodes()
10331 elif level == locking.LEVEL_NODE_RES:
10333 self.needed_locks[locking.LEVEL_NODE_RES] = \
10334 self.needed_locks[locking.LEVEL_NODE]
10336 def BuildHooksEnv(self):
10337 """Build hooks env.
10339 This runs on the master, the primary and all the secondaries.
10342 instance = self.replacer.instance
10344 "MODE": self.op.mode,
10345 "NEW_SECONDARY": self.op.remote_node,
10346 "OLD_SECONDARY": instance.secondary_nodes[0],
10348 env.update(_BuildInstanceHookEnvByObject(self, instance))
10351 def BuildHooksNodes(self):
10352 """Build hooks nodes.
10355 instance = self.replacer.instance
10357 self.cfg.GetMasterNode(),
10358 instance.primary_node,
10360 if self.op.remote_node is not None:
10361 nl.append(self.op.remote_node)
10364 def CheckPrereq(self):
10365 """Check prerequisites.
10368 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10369 self.op.iallocator is None)
10371 # Verify if node group locks are still correct
10372 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10374 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10376 return LogicalUnit.CheckPrereq(self)
10379 class TLReplaceDisks(Tasklet):
10380 """Replaces disks for an instance.
10382 Note: Locking is not within the scope of this class.
10385 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10386 disks, delay_iallocator, early_release, ignore_ipolicy):
10387 """Initializes this class.
10390 Tasklet.__init__(self, lu)
10393 self.instance_name = instance_name
10395 self.iallocator_name = iallocator_name
10396 self.remote_node = remote_node
10398 self.delay_iallocator = delay_iallocator
10399 self.early_release = early_release
10400 self.ignore_ipolicy = ignore_ipolicy
10403 self.instance = None
10404 self.new_node = None
10405 self.target_node = None
10406 self.other_node = None
10407 self.remote_node_info = None
10408 self.node_secondary_ip = None
10411 def CheckArguments(mode, remote_node, iallocator):
10412 """Helper function for users of this class.
10415 # check for valid parameter combination
10416 if mode == constants.REPLACE_DISK_CHG:
10417 if remote_node is None and iallocator is None:
10418 raise errors.OpPrereqError("When changing the secondary either an"
10419 " iallocator script must be used or the"
10420 " new node given", errors.ECODE_INVAL)
10422 if remote_node is not None and iallocator is not None:
10423 raise errors.OpPrereqError("Give either the iallocator or the new"
10424 " secondary, not both", errors.ECODE_INVAL)
10426 elif remote_node is not None or iallocator is not None:
10427 # Not replacing the secondary
10428 raise errors.OpPrereqError("The iallocator and new node options can"
10429 " only be used when changing the"
10430 " secondary node", errors.ECODE_INVAL)
10433 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10434 """Compute a new secondary node using an IAllocator.
10437 ial = IAllocator(lu.cfg, lu.rpc,
10438 mode=constants.IALLOCATOR_MODE_RELOC,
10439 name=instance_name,
10440 relocate_from=list(relocate_from))
10442 ial.Run(iallocator_name)
10444 if not ial.success:
10445 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10446 " %s" % (iallocator_name, ial.info),
10447 errors.ECODE_NORES)
10449 if len(ial.result) != ial.required_nodes:
10450 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10451 " of nodes (%s), required %s" %
10453 len(ial.result), ial.required_nodes),
10454 errors.ECODE_FAULT)
10456 remote_node_name = ial.result[0]
10458 lu.LogInfo("Selected new secondary for instance '%s': %s",
10459 instance_name, remote_node_name)
10461 return remote_node_name
10463 def _FindFaultyDisks(self, node_name):
10464 """Wrapper for L{_FindFaultyInstanceDisks}.
10467 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10470 def _CheckDisksActivated(self, instance):
10471 """Checks if the instance disks are activated.
10473 @param instance: The instance to check disks
10474 @return: True if they are activated, False otherwise
10477 nodes = instance.all_nodes
10479 for idx, dev in enumerate(instance.disks):
10481 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10482 self.cfg.SetDiskID(dev, node)
10484 result = self.rpc.call_blockdev_find(node, dev)
10488 elif result.fail_msg or not result.payload:
10493 def CheckPrereq(self):
10494 """Check prerequisites.
10496 This checks that the instance is in the cluster.
10499 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10500 assert instance is not None, \
10501 "Cannot retrieve locked instance %s" % self.instance_name
10503 if instance.disk_template != constants.DT_DRBD8:
10504 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10505 " instances", errors.ECODE_INVAL)
10507 if len(instance.secondary_nodes) != 1:
10508 raise errors.OpPrereqError("The instance has a strange layout,"
10509 " expected one secondary but found %d" %
10510 len(instance.secondary_nodes),
10511 errors.ECODE_FAULT)
10513 if not self.delay_iallocator:
10514 self._CheckPrereq2()
10516 def _CheckPrereq2(self):
10517 """Check prerequisites, second part.
10519 This function should always be part of CheckPrereq. It was separated and is
10520 now called from Exec because during node evacuation iallocator was only
10521 called with an unmodified cluster model, not taking planned changes into
10525 instance = self.instance
10526 secondary_node = instance.secondary_nodes[0]
10528 if self.iallocator_name is None:
10529 remote_node = self.remote_node
10531 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10532 instance.name, instance.secondary_nodes)
10534 if remote_node is None:
10535 self.remote_node_info = None
10537 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10538 "Remote node '%s' is not locked" % remote_node
10540 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10541 assert self.remote_node_info is not None, \
10542 "Cannot retrieve locked node %s" % remote_node
10544 if remote_node == self.instance.primary_node:
10545 raise errors.OpPrereqError("The specified node is the primary node of"
10546 " the instance", errors.ECODE_INVAL)
10548 if remote_node == secondary_node:
10549 raise errors.OpPrereqError("The specified node is already the"
10550 " secondary node of the instance",
10551 errors.ECODE_INVAL)
10553 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10554 constants.REPLACE_DISK_CHG):
10555 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10556 errors.ECODE_INVAL)
10558 if self.mode == constants.REPLACE_DISK_AUTO:
10559 if not self._CheckDisksActivated(instance):
10560 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10561 " first" % self.instance_name,
10562 errors.ECODE_STATE)
10563 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10564 faulty_secondary = self._FindFaultyDisks(secondary_node)
10566 if faulty_primary and faulty_secondary:
10567 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10568 " one node and can not be repaired"
10569 " automatically" % self.instance_name,
10570 errors.ECODE_STATE)
10573 self.disks = faulty_primary
10574 self.target_node = instance.primary_node
10575 self.other_node = secondary_node
10576 check_nodes = [self.target_node, self.other_node]
10577 elif faulty_secondary:
10578 self.disks = faulty_secondary
10579 self.target_node = secondary_node
10580 self.other_node = instance.primary_node
10581 check_nodes = [self.target_node, self.other_node]
10587 # Non-automatic modes
10588 if self.mode == constants.REPLACE_DISK_PRI:
10589 self.target_node = instance.primary_node
10590 self.other_node = secondary_node
10591 check_nodes = [self.target_node, self.other_node]
10593 elif self.mode == constants.REPLACE_DISK_SEC:
10594 self.target_node = secondary_node
10595 self.other_node = instance.primary_node
10596 check_nodes = [self.target_node, self.other_node]
10598 elif self.mode == constants.REPLACE_DISK_CHG:
10599 self.new_node = remote_node
10600 self.other_node = instance.primary_node
10601 self.target_node = secondary_node
10602 check_nodes = [self.new_node, self.other_node]
10604 _CheckNodeNotDrained(self.lu, remote_node)
10605 _CheckNodeVmCapable(self.lu, remote_node)
10607 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10608 assert old_node_info is not None
10609 if old_node_info.offline and not self.early_release:
10610 # doesn't make sense to delay the release
10611 self.early_release = True
10612 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10613 " early-release mode", secondary_node)
10616 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10619 # If not specified all disks should be replaced
10621 self.disks = range(len(self.instance.disks))
10623 # TODO: This is ugly, but right now we can't distinguish between internal
10624 # submitted opcode and external one. We should fix that.
10625 if self.remote_node_info:
10626 # We change the node, lets verify it still meets instance policy
10627 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10628 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10630 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10631 ignore=self.ignore_ipolicy)
10633 # TODO: compute disk parameters
10634 primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10635 secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10636 if primary_node_info.group != secondary_node_info.group:
10637 self.lu.LogInfo("The instance primary and secondary nodes are in two"
10638 " different node groups; the disk parameters of the"
10639 " primary node's group will be applied.")
10641 self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10643 for node in check_nodes:
10644 _CheckNodeOnline(self.lu, node)
10646 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10649 if node_name is not None)
10651 # Release unneeded node and node resource locks
10652 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10653 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10655 # Release any owned node group
10656 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10657 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10659 # Check whether disks are valid
10660 for disk_idx in self.disks:
10661 instance.FindDisk(disk_idx)
10663 # Get secondary node IP addresses
10664 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10665 in self.cfg.GetMultiNodeInfo(touched_nodes))
10667 def Exec(self, feedback_fn):
10668 """Execute disk replacement.
10670 This dispatches the disk replacement to the appropriate handler.
10673 if self.delay_iallocator:
10674 self._CheckPrereq2()
10677 # Verify owned locks before starting operation
10678 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10679 assert set(owned_nodes) == set(self.node_secondary_ip), \
10680 ("Incorrect node locks, owning %s, expected %s" %
10681 (owned_nodes, self.node_secondary_ip.keys()))
10682 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10683 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10685 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10686 assert list(owned_instances) == [self.instance_name], \
10687 "Instance '%s' not locked" % self.instance_name
10689 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10690 "Should not own any node group lock at this point"
10693 feedback_fn("No disks need replacement")
10696 feedback_fn("Replacing disk(s) %s for %s" %
10697 (utils.CommaJoin(self.disks), self.instance.name))
10699 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10701 # Activate the instance disks if we're replacing them on a down instance
10703 _StartInstanceDisks(self.lu, self.instance, True)
10706 # Should we replace the secondary node?
10707 if self.new_node is not None:
10708 fn = self._ExecDrbd8Secondary
10710 fn = self._ExecDrbd8DiskOnly
10712 result = fn(feedback_fn)
10714 # Deactivate the instance disks if we're replacing them on a
10717 _SafeShutdownInstanceDisks(self.lu, self.instance)
10719 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10722 # Verify owned locks
10723 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10724 nodes = frozenset(self.node_secondary_ip)
10725 assert ((self.early_release and not owned_nodes) or
10726 (not self.early_release and not (set(owned_nodes) - nodes))), \
10727 ("Not owning the correct locks, early_release=%s, owned=%r,"
10728 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10732 def _CheckVolumeGroup(self, nodes):
10733 self.lu.LogInfo("Checking volume groups")
10735 vgname = self.cfg.GetVGName()
10737 # Make sure volume group exists on all involved nodes
10738 results = self.rpc.call_vg_list(nodes)
10740 raise errors.OpExecError("Can't list volume groups on the nodes")
10743 res = results[node]
10744 res.Raise("Error checking node %s" % node)
10745 if vgname not in res.payload:
10746 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10749 def _CheckDisksExistence(self, nodes):
10750 # Check disk existence
10751 for idx, dev in enumerate(self.instance.disks):
10752 if idx not in self.disks:
10756 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10757 self.cfg.SetDiskID(dev, node)
10759 result = self.rpc.call_blockdev_find(node, dev)
10761 msg = result.fail_msg
10762 if msg or not result.payload:
10764 msg = "disk not found"
10765 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10768 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10769 for idx, dev in enumerate(self.instance.disks):
10770 if idx not in self.disks:
10773 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10776 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10778 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10779 " replace disks for instance %s" %
10780 (node_name, self.instance.name))
10782 def _CreateNewStorage(self, node_name):
10783 """Create new storage on the primary or secondary node.
10785 This is only used for same-node replaces, not for changing the
10786 secondary node, hence we don't want to modify the existing disk.
10791 for idx, dev in enumerate(self.instance.disks):
10792 if idx not in self.disks:
10795 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10797 self.cfg.SetDiskID(dev, node_name)
10799 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10800 names = _GenerateUniqueNames(self.lu, lv_names)
10802 _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10804 vg_data = dev.children[0].logical_id[0]
10805 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10806 logical_id=(vg_data, names[0]), params=data_p)
10807 vg_meta = dev.children[1].logical_id[0]
10808 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10809 logical_id=(vg_meta, names[1]), params=meta_p)
10811 new_lvs = [lv_data, lv_meta]
10812 old_lvs = [child.Copy() for child in dev.children]
10813 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10815 # we pass force_create=True to force the LVM creation
10816 for new_lv in new_lvs:
10817 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10818 _GetInstanceInfoText(self.instance), False)
10822 def _CheckDevices(self, node_name, iv_names):
10823 for name, (dev, _, _) in iv_names.iteritems():
10824 self.cfg.SetDiskID(dev, node_name)
10826 result = self.rpc.call_blockdev_find(node_name, dev)
10828 msg = result.fail_msg
10829 if msg or not result.payload:
10831 msg = "disk not found"
10832 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10835 if result.payload.is_degraded:
10836 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10838 def _RemoveOldStorage(self, node_name, iv_names):
10839 for name, (_, old_lvs, _) in iv_names.iteritems():
10840 self.lu.LogInfo("Remove logical volumes for %s" % name)
10843 self.cfg.SetDiskID(lv, node_name)
10845 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10847 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10848 hint="remove unused LVs manually")
10850 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10851 """Replace a disk on the primary or secondary for DRBD 8.
10853 The algorithm for replace is quite complicated:
10855 1. for each disk to be replaced:
10857 1. create new LVs on the target node with unique names
10858 1. detach old LVs from the drbd device
10859 1. rename old LVs to name_replaced.<time_t>
10860 1. rename new LVs to old LVs
10861 1. attach the new LVs (with the old names now) to the drbd device
10863 1. wait for sync across all devices
10865 1. for each modified disk:
10867 1. remove old LVs (which have the name name_replaces.<time_t>)
10869 Failures are not very well handled.
10874 # Step: check device activation
10875 self.lu.LogStep(1, steps_total, "Check device existence")
10876 self._CheckDisksExistence([self.other_node, self.target_node])
10877 self._CheckVolumeGroup([self.target_node, self.other_node])
10879 # Step: check other node consistency
10880 self.lu.LogStep(2, steps_total, "Check peer consistency")
10881 self._CheckDisksConsistency(self.other_node,
10882 self.other_node == self.instance.primary_node,
10885 # Step: create new storage
10886 self.lu.LogStep(3, steps_total, "Allocate new storage")
10887 iv_names = self._CreateNewStorage(self.target_node)
10889 # Step: for each lv, detach+rename*2+attach
10890 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10891 for dev, old_lvs, new_lvs in iv_names.itervalues():
10892 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10894 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10896 result.Raise("Can't detach drbd from local storage on node"
10897 " %s for device %s" % (self.target_node, dev.iv_name))
10899 #cfg.Update(instance)
10901 # ok, we created the new LVs, so now we know we have the needed
10902 # storage; as such, we proceed on the target node to rename
10903 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10904 # using the assumption that logical_id == physical_id (which in
10905 # turn is the unique_id on that node)
10907 # FIXME(iustin): use a better name for the replaced LVs
10908 temp_suffix = int(time.time())
10909 ren_fn = lambda d, suff: (d.physical_id[0],
10910 d.physical_id[1] + "_replaced-%s" % suff)
10912 # Build the rename list based on what LVs exist on the node
10913 rename_old_to_new = []
10914 for to_ren in old_lvs:
10915 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10916 if not result.fail_msg and result.payload:
10918 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10920 self.lu.LogInfo("Renaming the old LVs on the target node")
10921 result = self.rpc.call_blockdev_rename(self.target_node,
10923 result.Raise("Can't rename old LVs on node %s" % self.target_node)
10925 # Now we rename the new LVs to the old LVs
10926 self.lu.LogInfo("Renaming the new LVs on the target node")
10927 rename_new_to_old = [(new, old.physical_id)
10928 for old, new in zip(old_lvs, new_lvs)]
10929 result = self.rpc.call_blockdev_rename(self.target_node,
10931 result.Raise("Can't rename new LVs on node %s" % self.target_node)
10933 # Intermediate steps of in memory modifications
10934 for old, new in zip(old_lvs, new_lvs):
10935 new.logical_id = old.logical_id
10936 self.cfg.SetDiskID(new, self.target_node)
10938 # We need to modify old_lvs so that removal later removes the
10939 # right LVs, not the newly added ones; note that old_lvs is a
10941 for disk in old_lvs:
10942 disk.logical_id = ren_fn(disk, temp_suffix)
10943 self.cfg.SetDiskID(disk, self.target_node)
10945 # Now that the new lvs have the old name, we can add them to the device
10946 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10947 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10949 msg = result.fail_msg
10951 for new_lv in new_lvs:
10952 msg2 = self.rpc.call_blockdev_remove(self.target_node,
10955 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10956 hint=("cleanup manually the unused logical"
10958 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10960 cstep = itertools.count(5)
10962 if self.early_release:
10963 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10964 self._RemoveOldStorage(self.target_node, iv_names)
10965 # TODO: Check if releasing locks early still makes sense
10966 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10968 # Release all resource locks except those used by the instance
10969 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10970 keep=self.node_secondary_ip.keys())
10972 # Release all node locks while waiting for sync
10973 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10975 # TODO: Can the instance lock be downgraded here? Take the optional disk
10976 # shutdown in the caller into consideration.
10979 # This can fail as the old devices are degraded and _WaitForSync
10980 # does a combined result over all disks, so we don't check its return value
10981 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10982 _WaitForSync(self.lu, self.instance)
10984 # Check all devices manually
10985 self._CheckDevices(self.instance.primary_node, iv_names)
10987 # Step: remove old storage
10988 if not self.early_release:
10989 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10990 self._RemoveOldStorage(self.target_node, iv_names)
10992 def _ExecDrbd8Secondary(self, feedback_fn):
10993 """Replace the secondary node for DRBD 8.
10995 The algorithm for replace is quite complicated:
10996 - for all disks of the instance:
10997 - create new LVs on the new node with same names
10998 - shutdown the drbd device on the old secondary
10999 - disconnect the drbd network on the primary
11000 - create the drbd device on the new secondary
11001 - network attach the drbd on the primary, using an artifice:
11002 the drbd code for Attach() will connect to the network if it
11003 finds a device which is connected to the good local disks but
11004 not network enabled
11005 - wait for sync across all devices
11006 - remove all disks from the old secondary
11008 Failures are not very well handled.
11013 pnode = self.instance.primary_node
11015 # Step: check device activation
11016 self.lu.LogStep(1, steps_total, "Check device existence")
11017 self._CheckDisksExistence([self.instance.primary_node])
11018 self._CheckVolumeGroup([self.instance.primary_node])
11020 # Step: check other node consistency
11021 self.lu.LogStep(2, steps_total, "Check peer consistency")
11022 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11024 # Step: create new storage
11025 self.lu.LogStep(3, steps_total, "Allocate new storage")
11026 for idx, dev in enumerate(self.instance.disks):
11027 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11028 (self.new_node, idx))
11029 # we pass force_create=True to force LVM creation
11030 for new_lv in dev.children:
11031 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
11032 _GetInstanceInfoText(self.instance), False)
11034 # Step 4: dbrd minors and drbd setups changes
11035 # after this, we must manually remove the drbd minors on both the
11036 # error and the success paths
11037 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11038 minors = self.cfg.AllocateDRBDMinor([self.new_node
11039 for dev in self.instance.disks],
11040 self.instance.name)
11041 logging.debug("Allocated minors %r", minors)
11044 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11045 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11046 (self.new_node, idx))
11047 # create new devices on new_node; note that we create two IDs:
11048 # one without port, so the drbd will be activated without
11049 # networking information on the new node at this stage, and one
11050 # with network, for the latter activation in step 4
11051 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11052 if self.instance.primary_node == o_node1:
11055 assert self.instance.primary_node == o_node2, "Three-node instance?"
11058 new_alone_id = (self.instance.primary_node, self.new_node, None,
11059 p_minor, new_minor, o_secret)
11060 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11061 p_minor, new_minor, o_secret)
11063 iv_names[idx] = (dev, dev.children, new_net_id)
11064 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11066 drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
11067 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11068 logical_id=new_alone_id,
11069 children=dev.children,
11071 params=drbd_params)
11073 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
11074 _GetInstanceInfoText(self.instance), False)
11075 except errors.GenericError:
11076 self.cfg.ReleaseDRBDMinors(self.instance.name)
11079 # We have new devices, shutdown the drbd on the old secondary
11080 for idx, dev in enumerate(self.instance.disks):
11081 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11082 self.cfg.SetDiskID(dev, self.target_node)
11083 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
11085 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11086 "node: %s" % (idx, msg),
11087 hint=("Please cleanup this device manually as"
11088 " soon as possible"))
11090 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11091 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11092 self.instance.disks)[pnode]
11094 msg = result.fail_msg
11096 # detaches didn't succeed (unlikely)
11097 self.cfg.ReleaseDRBDMinors(self.instance.name)
11098 raise errors.OpExecError("Can't detach the disks from the network on"
11099 " old node: %s" % (msg,))
11101 # if we managed to detach at least one, we update all the disks of
11102 # the instance to point to the new secondary
11103 self.lu.LogInfo("Updating instance configuration")
11104 for dev, _, new_logical_id in iv_names.itervalues():
11105 dev.logical_id = new_logical_id
11106 self.cfg.SetDiskID(dev, self.instance.primary_node)
11108 self.cfg.Update(self.instance, feedback_fn)
11110 # Release all node locks (the configuration has been updated)
11111 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11113 # and now perform the drbd attach
11114 self.lu.LogInfo("Attaching primary drbds to new secondary"
11115 " (standalone => connected)")
11116 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11118 self.node_secondary_ip,
11119 self.instance.disks,
11120 self.instance.name,
11122 for to_node, to_result in result.items():
11123 msg = to_result.fail_msg
11125 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11127 hint=("please do a gnt-instance info to see the"
11128 " status of disks"))
11130 cstep = itertools.count(5)
11132 if self.early_release:
11133 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11134 self._RemoveOldStorage(self.target_node, iv_names)
11135 # TODO: Check if releasing locks early still makes sense
11136 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11138 # Release all resource locks except those used by the instance
11139 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11140 keep=self.node_secondary_ip.keys())
11142 # TODO: Can the instance lock be downgraded here? Take the optional disk
11143 # shutdown in the caller into consideration.
11146 # This can fail as the old devices are degraded and _WaitForSync
11147 # does a combined result over all disks, so we don't check its return value
11148 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11149 _WaitForSync(self.lu, self.instance)
11151 # Check all devices manually
11152 self._CheckDevices(self.instance.primary_node, iv_names)
11154 # Step: remove old storage
11155 if not self.early_release:
11156 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11157 self._RemoveOldStorage(self.target_node, iv_names)
11160 class LURepairNodeStorage(NoHooksLU):
11161 """Repairs the volume group on a node.
11166 def CheckArguments(self):
11167 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11169 storage_type = self.op.storage_type
11171 if (constants.SO_FIX_CONSISTENCY not in
11172 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11173 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11174 " repaired" % storage_type,
11175 errors.ECODE_INVAL)
11177 def ExpandNames(self):
11178 self.needed_locks = {
11179 locking.LEVEL_NODE: [self.op.node_name],
11182 def _CheckFaultyDisks(self, instance, node_name):
11183 """Ensure faulty disks abort the opcode or at least warn."""
11185 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11187 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11188 " node '%s'" % (instance.name, node_name),
11189 errors.ECODE_STATE)
11190 except errors.OpPrereqError, err:
11191 if self.op.ignore_consistency:
11192 self.proc.LogWarning(str(err.args[0]))
11196 def CheckPrereq(self):
11197 """Check prerequisites.
11200 # Check whether any instance on this node has faulty disks
11201 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11202 if inst.admin_state != constants.ADMINST_UP:
11204 check_nodes = set(inst.all_nodes)
11205 check_nodes.discard(self.op.node_name)
11206 for inst_node_name in check_nodes:
11207 self._CheckFaultyDisks(inst, inst_node_name)
11209 def Exec(self, feedback_fn):
11210 feedback_fn("Repairing storage unit '%s' on %s ..." %
11211 (self.op.name, self.op.node_name))
11213 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11214 result = self.rpc.call_storage_execute(self.op.node_name,
11215 self.op.storage_type, st_args,
11217 constants.SO_FIX_CONSISTENCY)
11218 result.Raise("Failed to repair storage unit '%s' on %s" %
11219 (self.op.name, self.op.node_name))
11222 class LUNodeEvacuate(NoHooksLU):
11223 """Evacuates instances off a list of nodes.
11228 _MODE2IALLOCATOR = {
11229 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11230 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11231 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11233 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11234 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11235 constants.IALLOCATOR_NEVAC_MODES)
11237 def CheckArguments(self):
11238 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11240 def ExpandNames(self):
11241 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11243 if self.op.remote_node is not None:
11244 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11245 assert self.op.remote_node
11247 if self.op.remote_node == self.op.node_name:
11248 raise errors.OpPrereqError("Can not use evacuated node as a new"
11249 " secondary node", errors.ECODE_INVAL)
11251 if self.op.mode != constants.NODE_EVAC_SEC:
11252 raise errors.OpPrereqError("Without the use of an iallocator only"
11253 " secondary instances can be evacuated",
11254 errors.ECODE_INVAL)
11257 self.share_locks = _ShareAll()
11258 self.needed_locks = {
11259 locking.LEVEL_INSTANCE: [],
11260 locking.LEVEL_NODEGROUP: [],
11261 locking.LEVEL_NODE: [],
11264 # Determine nodes (via group) optimistically, needs verification once locks
11265 # have been acquired
11266 self.lock_nodes = self._DetermineNodes()
11268 def _DetermineNodes(self):
11269 """Gets the list of nodes to operate on.
11272 if self.op.remote_node is None:
11273 # Iallocator will choose any node(s) in the same group
11274 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11276 group_nodes = frozenset([self.op.remote_node])
11278 # Determine nodes to be locked
11279 return set([self.op.node_name]) | group_nodes
11281 def _DetermineInstances(self):
11282 """Builds list of instances to operate on.
11285 assert self.op.mode in constants.NODE_EVAC_MODES
11287 if self.op.mode == constants.NODE_EVAC_PRI:
11288 # Primary instances only
11289 inst_fn = _GetNodePrimaryInstances
11290 assert self.op.remote_node is None, \
11291 "Evacuating primary instances requires iallocator"
11292 elif self.op.mode == constants.NODE_EVAC_SEC:
11293 # Secondary instances only
11294 inst_fn = _GetNodeSecondaryInstances
11297 assert self.op.mode == constants.NODE_EVAC_ALL
11298 inst_fn = _GetNodeInstances
11299 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11301 raise errors.OpPrereqError("Due to an issue with the iallocator"
11302 " interface it is not possible to evacuate"
11303 " all instances at once; specify explicitly"
11304 " whether to evacuate primary or secondary"
11306 errors.ECODE_INVAL)
11308 return inst_fn(self.cfg, self.op.node_name)
11310 def DeclareLocks(self, level):
11311 if level == locking.LEVEL_INSTANCE:
11312 # Lock instances optimistically, needs verification once node and group
11313 # locks have been acquired
11314 self.needed_locks[locking.LEVEL_INSTANCE] = \
11315 set(i.name for i in self._DetermineInstances())
11317 elif level == locking.LEVEL_NODEGROUP:
11318 # Lock node groups for all potential target nodes optimistically, needs
11319 # verification once nodes have been acquired
11320 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11321 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11323 elif level == locking.LEVEL_NODE:
11324 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11326 def CheckPrereq(self):
11328 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11329 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11330 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11332 need_nodes = self._DetermineNodes()
11334 if not owned_nodes.issuperset(need_nodes):
11335 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11336 " locks were acquired, current nodes are"
11337 " are '%s', used to be '%s'; retry the"
11339 (self.op.node_name,
11340 utils.CommaJoin(need_nodes),
11341 utils.CommaJoin(owned_nodes)),
11342 errors.ECODE_STATE)
11344 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11345 if owned_groups != wanted_groups:
11346 raise errors.OpExecError("Node groups changed since locks were acquired,"
11347 " current groups are '%s', used to be '%s';"
11348 " retry the operation" %
11349 (utils.CommaJoin(wanted_groups),
11350 utils.CommaJoin(owned_groups)))
11352 # Determine affected instances
11353 self.instances = self._DetermineInstances()
11354 self.instance_names = [i.name for i in self.instances]
11356 if set(self.instance_names) != owned_instances:
11357 raise errors.OpExecError("Instances on node '%s' changed since locks"
11358 " were acquired, current instances are '%s',"
11359 " used to be '%s'; retry the operation" %
11360 (self.op.node_name,
11361 utils.CommaJoin(self.instance_names),
11362 utils.CommaJoin(owned_instances)))
11364 if self.instance_names:
11365 self.LogInfo("Evacuating instances from node '%s': %s",
11367 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11369 self.LogInfo("No instances to evacuate from node '%s'",
11372 if self.op.remote_node is not None:
11373 for i in self.instances:
11374 if i.primary_node == self.op.remote_node:
11375 raise errors.OpPrereqError("Node %s is the primary node of"
11376 " instance %s, cannot use it as"
11378 (self.op.remote_node, i.name),
11379 errors.ECODE_INVAL)
11381 def Exec(self, feedback_fn):
11382 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11384 if not self.instance_names:
11385 # No instances to evacuate
11388 elif self.op.iallocator is not None:
11389 # TODO: Implement relocation to other group
11390 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11391 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11392 instances=list(self.instance_names))
11394 ial.Run(self.op.iallocator)
11396 if not ial.success:
11397 raise errors.OpPrereqError("Can't compute node evacuation using"
11398 " iallocator '%s': %s" %
11399 (self.op.iallocator, ial.info),
11400 errors.ECODE_NORES)
11402 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11404 elif self.op.remote_node is not None:
11405 assert self.op.mode == constants.NODE_EVAC_SEC
11407 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11408 remote_node=self.op.remote_node,
11410 mode=constants.REPLACE_DISK_CHG,
11411 early_release=self.op.early_release)]
11412 for instance_name in self.instance_names
11416 raise errors.ProgrammerError("No iallocator or remote node")
11418 return ResultWithJobs(jobs)
11421 def _SetOpEarlyRelease(early_release, op):
11422 """Sets C{early_release} flag on opcodes if available.
11426 op.early_release = early_release
11427 except AttributeError:
11428 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11433 def _NodeEvacDest(use_nodes, group, nodes):
11434 """Returns group or nodes depending on caller's choice.
11438 return utils.CommaJoin(nodes)
11443 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11444 """Unpacks the result of change-group and node-evacuate iallocator requests.
11446 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11447 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11449 @type lu: L{LogicalUnit}
11450 @param lu: Logical unit instance
11451 @type alloc_result: tuple/list
11452 @param alloc_result: Result from iallocator
11453 @type early_release: bool
11454 @param early_release: Whether to release locks early if possible
11455 @type use_nodes: bool
11456 @param use_nodes: Whether to display node names instead of groups
11459 (moved, failed, jobs) = alloc_result
11462 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11463 for (name, reason) in failed)
11464 lu.LogWarning("Unable to evacuate instances %s", failreason)
11465 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11468 lu.LogInfo("Instances to be moved: %s",
11469 utils.CommaJoin("%s (to %s)" %
11470 (name, _NodeEvacDest(use_nodes, group, nodes))
11471 for (name, group, nodes) in moved))
11473 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11474 map(opcodes.OpCode.LoadOpCode, ops))
11478 class LUInstanceGrowDisk(LogicalUnit):
11479 """Grow a disk of an instance.
11482 HPATH = "disk-grow"
11483 HTYPE = constants.HTYPE_INSTANCE
11486 def ExpandNames(self):
11487 self._ExpandAndLockInstance()
11488 self.needed_locks[locking.LEVEL_NODE] = []
11489 self.needed_locks[locking.LEVEL_NODE_RES] = []
11490 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11491 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11493 def DeclareLocks(self, level):
11494 if level == locking.LEVEL_NODE:
11495 self._LockInstancesNodes()
11496 elif level == locking.LEVEL_NODE_RES:
11498 self.needed_locks[locking.LEVEL_NODE_RES] = \
11499 self.needed_locks[locking.LEVEL_NODE][:]
11501 def BuildHooksEnv(self):
11502 """Build hooks env.
11504 This runs on the master, the primary and all the secondaries.
11508 "DISK": self.op.disk,
11509 "AMOUNT": self.op.amount,
11511 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11514 def BuildHooksNodes(self):
11515 """Build hooks nodes.
11518 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11521 def CheckPrereq(self):
11522 """Check prerequisites.
11524 This checks that the instance is in the cluster.
11527 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11528 assert instance is not None, \
11529 "Cannot retrieve locked instance %s" % self.op.instance_name
11530 nodenames = list(instance.all_nodes)
11531 for node in nodenames:
11532 _CheckNodeOnline(self, node)
11534 self.instance = instance
11536 if instance.disk_template not in constants.DTS_GROWABLE:
11537 raise errors.OpPrereqError("Instance's disk layout does not support"
11538 " growing", errors.ECODE_INVAL)
11540 self.disk = instance.FindDisk(self.op.disk)
11542 if instance.disk_template not in (constants.DT_FILE,
11543 constants.DT_SHARED_FILE,
11545 # TODO: check the free disk space for file, when that feature will be
11547 _CheckNodesFreeDiskPerVG(self, nodenames,
11548 self.disk.ComputeGrowth(self.op.amount))
11550 def Exec(self, feedback_fn):
11551 """Execute disk grow.
11554 instance = self.instance
11557 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11558 assert (self.owned_locks(locking.LEVEL_NODE) ==
11559 self.owned_locks(locking.LEVEL_NODE_RES))
11561 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11563 raise errors.OpExecError("Cannot activate block device to grow")
11565 feedback_fn("Growing disk %s of instance '%s' by %s" %
11566 (self.op.disk, instance.name,
11567 utils.FormatUnit(self.op.amount, "h")))
11569 # First run all grow ops in dry-run mode
11570 for node in instance.all_nodes:
11571 self.cfg.SetDiskID(disk, node)
11572 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11573 result.Raise("Grow request failed to node %s" % node)
11575 # We know that (as far as we can test) operations across different
11576 # nodes will succeed, time to run it for real
11577 for node in instance.all_nodes:
11578 self.cfg.SetDiskID(disk, node)
11579 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11580 result.Raise("Grow request failed to node %s" % node)
11582 # TODO: Rewrite code to work properly
11583 # DRBD goes into sync mode for a short amount of time after executing the
11584 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11585 # calling "resize" in sync mode fails. Sleeping for a short amount of
11586 # time is a work-around.
11589 disk.RecordGrow(self.op.amount)
11590 self.cfg.Update(instance, feedback_fn)
11592 # Changes have been recorded, release node lock
11593 _ReleaseLocks(self, locking.LEVEL_NODE)
11595 # Downgrade lock while waiting for sync
11596 self.glm.downgrade(locking.LEVEL_INSTANCE)
11598 if self.op.wait_for_sync:
11599 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11601 self.proc.LogWarning("Disk sync-ing has not returned a good"
11602 " status; please check the instance")
11603 if instance.admin_state != constants.ADMINST_UP:
11604 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11605 elif instance.admin_state != constants.ADMINST_UP:
11606 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11607 " not supposed to be running because no wait for"
11608 " sync mode was requested")
11610 assert self.owned_locks(locking.LEVEL_NODE_RES)
11611 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11614 class LUInstanceQueryData(NoHooksLU):
11615 """Query runtime instance data.
11620 def ExpandNames(self):
11621 self.needed_locks = {}
11623 # Use locking if requested or when non-static information is wanted
11624 if not (self.op.static or self.op.use_locking):
11625 self.LogWarning("Non-static data requested, locks need to be acquired")
11626 self.op.use_locking = True
11628 if self.op.instances or not self.op.use_locking:
11629 # Expand instance names right here
11630 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11632 # Will use acquired locks
11633 self.wanted_names = None
11635 if self.op.use_locking:
11636 self.share_locks = _ShareAll()
11638 if self.wanted_names is None:
11639 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11641 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11643 self.needed_locks[locking.LEVEL_NODE] = []
11644 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11646 def DeclareLocks(self, level):
11647 if self.op.use_locking and level == locking.LEVEL_NODE:
11648 self._LockInstancesNodes()
11650 def CheckPrereq(self):
11651 """Check prerequisites.
11653 This only checks the optional instance list against the existing names.
11656 if self.wanted_names is None:
11657 assert self.op.use_locking, "Locking was not used"
11658 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11660 self.wanted_instances = \
11661 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11663 def _ComputeBlockdevStatus(self, node, instance_name, dev):
11664 """Returns the status of a block device
11667 if self.op.static or not node:
11670 self.cfg.SetDiskID(dev, node)
11672 result = self.rpc.call_blockdev_find(node, dev)
11676 result.Raise("Can't compute disk status for %s" % instance_name)
11678 status = result.payload
11682 return (status.dev_path, status.major, status.minor,
11683 status.sync_percent, status.estimated_time,
11684 status.is_degraded, status.ldisk_status)
11686 def _ComputeDiskStatus(self, instance, snode, dev):
11687 """Compute block device status.
11690 if dev.dev_type in constants.LDS_DRBD:
11691 # we change the snode then (otherwise we use the one passed in)
11692 if dev.logical_id[0] == instance.primary_node:
11693 snode = dev.logical_id[1]
11695 snode = dev.logical_id[0]
11697 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11698 instance.name, dev)
11699 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11702 dev_children = map(compat.partial(self._ComputeDiskStatus,
11709 "iv_name": dev.iv_name,
11710 "dev_type": dev.dev_type,
11711 "logical_id": dev.logical_id,
11712 "physical_id": dev.physical_id,
11713 "pstatus": dev_pstatus,
11714 "sstatus": dev_sstatus,
11715 "children": dev_children,
11720 def Exec(self, feedback_fn):
11721 """Gather and return data"""
11724 cluster = self.cfg.GetClusterInfo()
11726 pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11727 for i in self.wanted_instances)
11728 for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11729 if self.op.static or pnode.offline:
11730 remote_state = None
11732 self.LogWarning("Primary node %s is marked offline, returning static"
11733 " information only for instance %s" %
11734 (pnode.name, instance.name))
11736 remote_info = self.rpc.call_instance_info(instance.primary_node,
11738 instance.hypervisor)
11739 remote_info.Raise("Error checking node %s" % instance.primary_node)
11740 remote_info = remote_info.payload
11741 if remote_info and "state" in remote_info:
11742 remote_state = "up"
11744 if instance.admin_state == constants.ADMINST_UP:
11745 remote_state = "down"
11747 remote_state = instance.admin_state
11749 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11752 result[instance.name] = {
11753 "name": instance.name,
11754 "config_state": instance.admin_state,
11755 "run_state": remote_state,
11756 "pnode": instance.primary_node,
11757 "snodes": instance.secondary_nodes,
11759 # this happens to be the same format used for hooks
11760 "nics": _NICListToTuple(self, instance.nics),
11761 "disk_template": instance.disk_template,
11763 "hypervisor": instance.hypervisor,
11764 "network_port": instance.network_port,
11765 "hv_instance": instance.hvparams,
11766 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11767 "be_instance": instance.beparams,
11768 "be_actual": cluster.FillBE(instance),
11769 "os_instance": instance.osparams,
11770 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11771 "serial_no": instance.serial_no,
11772 "mtime": instance.mtime,
11773 "ctime": instance.ctime,
11774 "uuid": instance.uuid,
11780 def PrepareContainerMods(mods, private_fn):
11781 """Prepares a list of container modifications by adding a private data field.
11783 @type mods: list of tuples; (operation, index, parameters)
11784 @param mods: List of modifications
11785 @type private_fn: callable or None
11786 @param private_fn: Callable for constructing a private data field for a
11791 if private_fn is None:
11796 return [(op, idx, params, fn()) for (op, idx, params) in mods]
11799 #: Type description for changes as returned by L{ApplyContainerMods}'s
11801 _TApplyContModsCbChanges = \
11802 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
11803 ht.TNonEmptyString,
11808 def ApplyContainerMods(kind, container, chgdesc, mods,
11809 create_fn, modify_fn, remove_fn):
11810 """Applies descriptions in C{mods} to C{container}.
11813 @param kind: One-word item description
11814 @type container: list
11815 @param container: Container to modify
11816 @type chgdesc: None or list
11817 @param chgdesc: List of applied changes
11819 @param mods: Modifications as returned by L{PrepareContainerMods}
11820 @type create_fn: callable
11821 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
11822 receives absolute item index, parameters and private data object as added
11823 by L{PrepareContainerMods}, returns tuple containing new item and changes
11825 @type modify_fn: callable
11826 @param modify_fn: Callback for modifying an existing item
11827 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
11828 and private data object as added by L{PrepareContainerMods}, returns
11830 @type remove_fn: callable
11831 @param remove_fn: Callback on removing item; receives absolute item index,
11832 item and private data object as added by L{PrepareContainerMods}
11835 for (op, idx, params, private) in mods:
11838 absidx = len(container) - 1
11840 raise IndexError("Not accepting negative indices")
11846 if op == constants.DDM_ADD:
11847 if create_fn is None:
11850 (item, changes) = create_fn(absidx + 1, params, private)
11853 container.append(item)
11856 # list.insert does so before the specified index
11857 container.insert(idx, item)
11859 # Retrieve existing item
11861 item = container[absidx]
11863 raise IndexError("Invalid %s index %s" % (kind, idx))
11865 if op == constants.DDM_REMOVE:
11868 if remove_fn is not None:
11869 remove_fn(absidx, item, private)
11871 changes = [("%s/%s" % (kind, absidx), "remove")]
11873 assert container[absidx] == item
11874 del container[absidx]
11875 elif op == constants.DDM_MODIFY:
11876 if modify_fn is not None:
11877 changes = modify_fn(absidx, item, params, private)
11879 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
11881 assert _TApplyContModsCbChanges(changes)
11883 if not (chgdesc is None or changes is None):
11884 chgdesc.extend(changes)
11887 class _InstNicModPrivate:
11888 """Data structure for network interface modifications.
11890 Used by L{LUInstanceSetParams}.
11893 def __init__(self):
11898 class LUInstanceSetParams(LogicalUnit):
11899 """Modifies an instances's parameters.
11902 HPATH = "instance-modify"
11903 HTYPE = constants.HTYPE_INSTANCE
11907 def _UpgradeDiskNicMods(kind, mods, verify_fn):
11908 assert ht.TList(mods)
11909 assert not mods or len(mods[0]) in (2, 3)
11911 if mods and len(mods[0]) == 2:
11915 for op, params in mods:
11916 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
11917 result.append((op, -1, params))
11921 raise errors.OpPrereqError("Only one %s add or remove operation is"
11922 " supported at a time" % kind,
11923 errors.ECODE_INVAL)
11925 result.append((constants.DDM_MODIFY, op, params))
11927 assert verify_fn(result)
11934 def _CheckMods(kind, mods, key_types, item_fn):
11935 """Ensures requested disk/NIC modifications are valid.
11938 for (op, _, params) in mods:
11939 assert ht.TDict(params)
11941 utils.ForceDictType(params, key_types)
11943 if op == constants.DDM_REMOVE:
11945 raise errors.OpPrereqError("No settings should be passed when"
11946 " removing a %s" % kind,
11947 errors.ECODE_INVAL)
11948 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
11949 item_fn(op, params)
11951 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
11954 def _VerifyDiskModification(op, params):
11955 """Verifies a disk modification.
11958 if op == constants.DDM_ADD:
11959 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11960 if mode not in constants.DISK_ACCESS_SET:
11961 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11962 errors.ECODE_INVAL)
11964 size = params.get(constants.IDISK_SIZE, None)
11966 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
11967 constants.IDISK_SIZE, errors.ECODE_INVAL)
11971 except (TypeError, ValueError), err:
11972 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
11973 errors.ECODE_INVAL)
11975 params[constants.IDISK_SIZE] = size
11977 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
11978 raise errors.OpPrereqError("Disk size change not possible, use"
11979 " grow-disk", errors.ECODE_INVAL)
11982 def _VerifyNicModification(op, params):
11983 """Verifies a network interface modification.
11986 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
11987 ip = params.get(constants.INIC_IP, None)
11990 elif ip.lower() == constants.VALUE_NONE:
11991 params[constants.INIC_IP] = None
11992 elif not netutils.IPAddress.IsValid(ip):
11993 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
11994 errors.ECODE_INVAL)
11996 bridge = params.get("bridge", None)
11997 link = params.get(constants.INIC_LINK, None)
11998 if bridge and link:
11999 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12000 " at the same time", errors.ECODE_INVAL)
12001 elif bridge and bridge.lower() == constants.VALUE_NONE:
12002 params["bridge"] = None
12003 elif link and link.lower() == constants.VALUE_NONE:
12004 params[constants.INIC_LINK] = None
12006 if op == constants.DDM_ADD:
12007 macaddr = params.get(constants.INIC_MAC, None)
12008 if macaddr is None:
12009 params[constants.INIC_MAC] = constants.VALUE_AUTO
12011 if constants.INIC_MAC in params:
12012 macaddr = params[constants.INIC_MAC]
12013 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12014 macaddr = utils.NormalizeAndValidateMac(macaddr)
12016 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12017 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12018 " modifying an existing NIC",
12019 errors.ECODE_INVAL)
12021 def CheckArguments(self):
12022 if not (self.op.nics or self.op.disks or self.op.disk_template or
12023 self.op.hvparams or self.op.beparams or self.op.os_name or
12024 self.op.offline is not None or self.op.runtime_mem):
12025 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12027 if self.op.hvparams:
12028 _CheckGlobalHvParams(self.op.hvparams)
12031 self._UpgradeDiskNicMods("disk", self.op.disks,
12032 opcodes.OpInstanceSetParams.TestDiskModifications)
12034 self._UpgradeDiskNicMods("NIC", self.op.nics,
12035 opcodes.OpInstanceSetParams.TestNicModifications)
12037 # Check disk modifications
12038 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12039 self._VerifyDiskModification)
12041 if self.op.disks and self.op.disk_template is not None:
12042 raise errors.OpPrereqError("Disk template conversion and other disk"
12043 " changes not supported at the same time",
12044 errors.ECODE_INVAL)
12046 if (self.op.disk_template and
12047 self.op.disk_template in constants.DTS_INT_MIRROR and
12048 self.op.remote_node is None):
12049 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12050 " one requires specifying a secondary node",
12051 errors.ECODE_INVAL)
12053 # Check NIC modifications
12054 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12055 self._VerifyNicModification)
12057 def ExpandNames(self):
12058 self._ExpandAndLockInstance()
12059 # Can't even acquire node locks in shared mode as upcoming changes in
12060 # Ganeti 2.6 will start to modify the node object on disk conversion
12061 self.needed_locks[locking.LEVEL_NODE] = []
12062 self.needed_locks[locking.LEVEL_NODE_RES] = []
12063 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12065 def DeclareLocks(self, level):
12066 # TODO: Acquire group lock in shared mode (disk parameters)
12067 if level == locking.LEVEL_NODE:
12068 self._LockInstancesNodes()
12069 if self.op.disk_template and self.op.remote_node:
12070 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12071 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12072 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12074 self.needed_locks[locking.LEVEL_NODE_RES] = \
12075 self.needed_locks[locking.LEVEL_NODE][:]
12077 def BuildHooksEnv(self):
12078 """Build hooks env.
12080 This runs on the master, primary and secondaries.
12084 if constants.BE_MINMEM in self.be_new:
12085 args["minmem"] = self.be_new[constants.BE_MINMEM]
12086 if constants.BE_MAXMEM in self.be_new:
12087 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12088 if constants.BE_VCPUS in self.be_new:
12089 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12090 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12091 # information at all.
12093 if self._new_nics is not None:
12096 for nic in self._new_nics:
12097 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12098 mode = nicparams[constants.NIC_MODE]
12099 link = nicparams[constants.NIC_LINK]
12100 nics.append((nic.ip, nic.mac, mode, link))
12102 args["nics"] = nics
12104 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12105 if self.op.disk_template:
12106 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12107 if self.op.runtime_mem:
12108 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12112 def BuildHooksNodes(self):
12113 """Build hooks nodes.
12116 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12119 def _PrepareNicModification(self, params, private, old_ip, old_params,
12121 update_params_dict = dict([(key, params[key])
12122 for key in constants.NICS_PARAMETERS
12125 if "bridge" in params:
12126 update_params_dict[constants.NIC_LINK] = params["bridge"]
12128 new_params = _GetUpdatedParams(old_params, update_params_dict)
12129 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12131 new_filled_params = cluster.SimpleFillNIC(new_params)
12132 objects.NIC.CheckParameterSyntax(new_filled_params)
12134 new_mode = new_filled_params[constants.NIC_MODE]
12135 if new_mode == constants.NIC_MODE_BRIDGED:
12136 bridge = new_filled_params[constants.NIC_LINK]
12137 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12139 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12141 self.warn.append(msg)
12143 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12145 elif new_mode == constants.NIC_MODE_ROUTED:
12146 ip = params.get(constants.INIC_IP, old_ip)
12148 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12149 " on a routed NIC", errors.ECODE_INVAL)
12151 if constants.INIC_MAC in params:
12152 mac = params[constants.INIC_MAC]
12154 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12155 errors.ECODE_INVAL)
12156 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12157 # otherwise generate the MAC address
12158 params[constants.INIC_MAC] = \
12159 self.cfg.GenerateMAC(self.proc.GetECId())
12161 # or validate/reserve the current one
12163 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12164 except errors.ReservationError:
12165 raise errors.OpPrereqError("MAC address '%s' already in use"
12166 " in cluster" % mac,
12167 errors.ECODE_NOTUNIQUE)
12169 private.params = new_params
12170 private.filled = new_filled_params
12172 return (None, None)
12174 def CheckPrereq(self):
12175 """Check prerequisites.
12177 This only checks the instance list against the existing names.
12180 # checking the new params on the primary/secondary nodes
12182 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12183 cluster = self.cluster = self.cfg.GetClusterInfo()
12184 assert self.instance is not None, \
12185 "Cannot retrieve locked instance %s" % self.op.instance_name
12186 pnode = instance.primary_node
12187 nodelist = list(instance.all_nodes)
12188 pnode_info = self.cfg.GetNodeInfo(pnode)
12189 self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
12191 # Prepare disk/NIC modifications
12192 self.diskmod = PrepareContainerMods(self.op.disks, None)
12193 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12196 if self.op.os_name and not self.op.force:
12197 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12198 self.op.force_variant)
12199 instance_os = self.op.os_name
12201 instance_os = instance.os
12203 assert not (self.op.disk_template and self.op.disks), \
12204 "Can't modify disk template and apply disk changes at the same time"
12206 if self.op.disk_template:
12207 if instance.disk_template == self.op.disk_template:
12208 raise errors.OpPrereqError("Instance already has disk template %s" %
12209 instance.disk_template, errors.ECODE_INVAL)
12211 if (instance.disk_template,
12212 self.op.disk_template) not in self._DISK_CONVERSIONS:
12213 raise errors.OpPrereqError("Unsupported disk template conversion from"
12214 " %s to %s" % (instance.disk_template,
12215 self.op.disk_template),
12216 errors.ECODE_INVAL)
12217 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12218 msg="cannot change disk template")
12219 if self.op.disk_template in constants.DTS_INT_MIRROR:
12220 if self.op.remote_node == pnode:
12221 raise errors.OpPrereqError("Given new secondary node %s is the same"
12222 " as the primary node of the instance" %
12223 self.op.remote_node, errors.ECODE_STATE)
12224 _CheckNodeOnline(self, self.op.remote_node)
12225 _CheckNodeNotDrained(self, self.op.remote_node)
12226 # FIXME: here we assume that the old instance type is DT_PLAIN
12227 assert instance.disk_template == constants.DT_PLAIN
12228 disks = [{constants.IDISK_SIZE: d.size,
12229 constants.IDISK_VG: d.logical_id[0]}
12230 for d in instance.disks]
12231 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12232 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12234 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12235 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12236 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12237 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12238 ignore=self.op.ignore_ipolicy)
12239 if pnode_info.group != snode_info.group:
12240 self.LogWarning("The primary and secondary nodes are in two"
12241 " different node groups; the disk parameters"
12242 " from the first disk's node group will be"
12245 # hvparams processing
12246 if self.op.hvparams:
12247 hv_type = instance.hypervisor
12248 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12249 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12250 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12253 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12254 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12255 self.hv_proposed = self.hv_new = hv_new # the new actual values
12256 self.hv_inst = i_hvdict # the new dict (without defaults)
12258 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12260 self.hv_new = self.hv_inst = {}
12262 # beparams processing
12263 if self.op.beparams:
12264 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12266 objects.UpgradeBeParams(i_bedict)
12267 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12268 be_new = cluster.SimpleFillBE(i_bedict)
12269 self.be_proposed = self.be_new = be_new # the new actual values
12270 self.be_inst = i_bedict # the new dict (without defaults)
12272 self.be_new = self.be_inst = {}
12273 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12274 be_old = cluster.FillBE(instance)
12276 # CPU param validation -- checking every time a paramtere is
12277 # changed to cover all cases where either CPU mask or vcpus have
12279 if (constants.BE_VCPUS in self.be_proposed and
12280 constants.HV_CPU_MASK in self.hv_proposed):
12282 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12283 # Verify mask is consistent with number of vCPUs. Can skip this
12284 # test if only 1 entry in the CPU mask, which means same mask
12285 # is applied to all vCPUs.
12286 if (len(cpu_list) > 1 and
12287 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12288 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12290 (self.be_proposed[constants.BE_VCPUS],
12291 self.hv_proposed[constants.HV_CPU_MASK]),
12292 errors.ECODE_INVAL)
12294 # Only perform this test if a new CPU mask is given
12295 if constants.HV_CPU_MASK in self.hv_new:
12296 # Calculate the largest CPU number requested
12297 max_requested_cpu = max(map(max, cpu_list))
12298 # Check that all of the instance's nodes have enough physical CPUs to
12299 # satisfy the requested CPU mask
12300 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12301 max_requested_cpu + 1, instance.hypervisor)
12303 # osparams processing
12304 if self.op.osparams:
12305 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12306 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12307 self.os_inst = i_osdict # the new dict (without defaults)
12313 #TODO(dynmem): do the appropriate check involving MINMEM
12314 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12315 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12316 mem_check_list = [pnode]
12317 if be_new[constants.BE_AUTO_BALANCE]:
12318 # either we changed auto_balance to yes or it was from before
12319 mem_check_list.extend(instance.secondary_nodes)
12320 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12321 instance.hypervisor)
12322 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12323 [instance.hypervisor])
12324 pninfo = nodeinfo[pnode]
12325 msg = pninfo.fail_msg
12327 # Assume the primary node is unreachable and go ahead
12328 self.warn.append("Can't get info from primary node %s: %s" %
12331 (_, _, (pnhvinfo, )) = pninfo.payload
12332 if not isinstance(pnhvinfo.get("memory_free", None), int):
12333 self.warn.append("Node data from primary node %s doesn't contain"
12334 " free memory information" % pnode)
12335 elif instance_info.fail_msg:
12336 self.warn.append("Can't get instance runtime information: %s" %
12337 instance_info.fail_msg)
12339 if instance_info.payload:
12340 current_mem = int(instance_info.payload["memory"])
12342 # Assume instance not running
12343 # (there is a slight race condition here, but it's not very
12344 # probable, and we have no other way to check)
12345 # TODO: Describe race condition
12347 #TODO(dynmem): do the appropriate check involving MINMEM
12348 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12349 pnhvinfo["memory_free"])
12351 raise errors.OpPrereqError("This change will prevent the instance"
12352 " from starting, due to %d MB of memory"
12353 " missing on its primary node" %
12355 errors.ECODE_NORES)
12357 if be_new[constants.BE_AUTO_BALANCE]:
12358 for node, nres in nodeinfo.items():
12359 if node not in instance.secondary_nodes:
12361 nres.Raise("Can't get info from secondary node %s" % node,
12362 prereq=True, ecode=errors.ECODE_STATE)
12363 (_, _, (nhvinfo, )) = nres.payload
12364 if not isinstance(nhvinfo.get("memory_free", None), int):
12365 raise errors.OpPrereqError("Secondary node %s didn't return free"
12366 " memory information" % node,
12367 errors.ECODE_STATE)
12368 #TODO(dynmem): do the appropriate check involving MINMEM
12369 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12370 raise errors.OpPrereqError("This change will prevent the instance"
12371 " from failover to its secondary node"
12372 " %s, due to not enough memory" % node,
12373 errors.ECODE_STATE)
12375 if self.op.runtime_mem:
12376 remote_info = self.rpc.call_instance_info(instance.primary_node,
12378 instance.hypervisor)
12379 remote_info.Raise("Error checking node %s" % instance.primary_node)
12380 if not remote_info.payload: # not running already
12381 raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12382 errors.ECODE_STATE)
12384 current_memory = remote_info.payload["memory"]
12385 if (not self.op.force and
12386 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12387 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12388 raise errors.OpPrereqError("Instance %s must have memory between %d"
12389 " and %d MB of memory unless --force is"
12390 " given" % (instance.name,
12391 self.be_proposed[constants.BE_MINMEM],
12392 self.be_proposed[constants.BE_MAXMEM]),
12393 errors.ECODE_INVAL)
12395 if self.op.runtime_mem > current_memory:
12396 _CheckNodeFreeMemory(self, instance.primary_node,
12397 "ballooning memory for instance %s" %
12399 self.op.memory - current_memory,
12400 instance.hypervisor)
12402 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12403 raise errors.OpPrereqError("Disk operations not supported for"
12404 " diskless instances",
12405 errors.ECODE_INVAL)
12407 def _PrepareNicCreate(_, params, private):
12408 return self._PrepareNicModification(params, private, None, {},
12411 def _PrepareNicMod(_, nic, params, private):
12412 return self._PrepareNicModification(params, private, nic.ip,
12413 nic.nicparams, cluster, pnode)
12415 # Verify NIC changes (operating on copy)
12416 nics = instance.nics[:]
12417 ApplyContainerMods("NIC", nics, None, self.nicmod,
12418 _PrepareNicCreate, _PrepareNicMod, None)
12419 if len(nics) > constants.MAX_NICS:
12420 raise errors.OpPrereqError("Instance has too many network interfaces"
12421 " (%d), cannot add more" % constants.MAX_NICS,
12422 errors.ECODE_STATE)
12424 # Verify disk changes (operating on a copy)
12425 disks = instance.disks[:]
12426 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12427 if len(disks) > constants.MAX_DISKS:
12428 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12429 " more" % constants.MAX_DISKS,
12430 errors.ECODE_STATE)
12432 if self.op.offline is not None:
12433 if self.op.offline:
12434 msg = "can't change to offline"
12436 msg = "can't change to online"
12437 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12439 # Pre-compute NIC changes (necessary to use result in hooks)
12440 self._nic_chgdesc = []
12442 # Operate on copies as this is still in prereq
12443 nics = [nic.Copy() for nic in instance.nics]
12444 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12445 self._CreateNewNic, self._ApplyNicMods, None)
12446 self._new_nics = nics
12448 self._new_nics = None
12450 def _ConvertPlainToDrbd(self, feedback_fn):
12451 """Converts an instance from plain to drbd.
12454 feedback_fn("Converting template to drbd")
12455 instance = self.instance
12456 pnode = instance.primary_node
12457 snode = self.op.remote_node
12459 assert instance.disk_template == constants.DT_PLAIN
12461 # create a fake disk info for _GenerateDiskTemplate
12462 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12463 constants.IDISK_VG: d.logical_id[0]}
12464 for d in instance.disks]
12465 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12466 instance.name, pnode, [snode],
12467 disk_info, None, None, 0, feedback_fn,
12469 info = _GetInstanceInfoText(instance)
12470 feedback_fn("Creating aditional volumes...")
12471 # first, create the missing data and meta devices
12472 for disk in new_disks:
12473 # unfortunately this is... not too nice
12474 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12476 for child in disk.children:
12477 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12478 # at this stage, all new LVs have been created, we can rename the
12480 feedback_fn("Renaming original volumes...")
12481 rename_list = [(o, n.children[0].logical_id)
12482 for (o, n) in zip(instance.disks, new_disks)]
12483 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12484 result.Raise("Failed to rename original LVs")
12486 feedback_fn("Initializing DRBD devices...")
12487 # all child devices are in place, we can now create the DRBD devices
12488 for disk in new_disks:
12489 for node in [pnode, snode]:
12490 f_create = node == pnode
12491 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12493 # at this point, the instance has been modified
12494 instance.disk_template = constants.DT_DRBD8
12495 instance.disks = new_disks
12496 self.cfg.Update(instance, feedback_fn)
12498 # Release node locks while waiting for sync
12499 _ReleaseLocks(self, locking.LEVEL_NODE)
12501 # disks are created, waiting for sync
12502 disk_abort = not _WaitForSync(self, instance,
12503 oneshot=not self.op.wait_for_sync)
12505 raise errors.OpExecError("There are some degraded disks for"
12506 " this instance, please cleanup manually")
12508 # Node resource locks will be released by caller
12510 def _ConvertDrbdToPlain(self, feedback_fn):
12511 """Converts an instance from drbd to plain.
12514 instance = self.instance
12516 assert len(instance.secondary_nodes) == 1
12517 assert instance.disk_template == constants.DT_DRBD8
12519 pnode = instance.primary_node
12520 snode = instance.secondary_nodes[0]
12521 feedback_fn("Converting template to plain")
12523 old_disks = instance.disks
12524 new_disks = [d.children[0] for d in old_disks]
12526 # copy over size and mode
12527 for parent, child in zip(old_disks, new_disks):
12528 child.size = parent.size
12529 child.mode = parent.mode
12531 # update instance structure
12532 instance.disks = new_disks
12533 instance.disk_template = constants.DT_PLAIN
12534 self.cfg.Update(instance, feedback_fn)
12536 # Release locks in case removing disks takes a while
12537 _ReleaseLocks(self, locking.LEVEL_NODE)
12539 feedback_fn("Removing volumes on the secondary node...")
12540 for disk in old_disks:
12541 self.cfg.SetDiskID(disk, snode)
12542 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12544 self.LogWarning("Could not remove block device %s on node %s,"
12545 " continuing anyway: %s", disk.iv_name, snode, msg)
12547 feedback_fn("Removing unneeded volumes on the primary node...")
12548 for idx, disk in enumerate(old_disks):
12549 meta = disk.children[1]
12550 self.cfg.SetDiskID(meta, pnode)
12551 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12553 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12554 " continuing anyway: %s", idx, pnode, msg)
12556 # this is a DRBD disk, return its port to the pool
12557 for disk in old_disks:
12558 tcp_port = disk.logical_id[2]
12559 self.cfg.AddTcpUdpPort(tcp_port)
12561 # Node resource locks will be released by caller
12563 def _CreateNewDisk(self, idx, params, _):
12564 """Creates a new disk.
12567 instance = self.instance
12570 if instance.disk_template in constants.DTS_FILEBASED:
12571 (file_driver, file_path) = instance.disks[0].logical_id
12572 file_path = os.path.dirname(file_path)
12574 file_driver = file_path = None
12577 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12578 instance.primary_node, instance.secondary_nodes,
12579 [params], file_path, file_driver, idx,
12580 self.Log, self.diskparams)[0]
12582 info = _GetInstanceInfoText(instance)
12584 logging.info("Creating volume %s for instance %s",
12585 disk.iv_name, instance.name)
12586 # Note: this needs to be kept in sync with _CreateDisks
12588 for node in instance.all_nodes:
12589 f_create = (node == instance.primary_node)
12591 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12592 except errors.OpExecError, err:
12593 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12594 disk.iv_name, disk, node, err)
12597 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12601 def _ModifyDisk(idx, disk, params, _):
12602 """Modifies a disk.
12605 disk.mode = params[constants.IDISK_MODE]
12608 ("disk.mode/%d" % idx, disk.mode),
12611 def _RemoveDisk(self, idx, root, _):
12615 for node, disk in root.ComputeNodeTree(self.instance.primary_node):
12616 self.cfg.SetDiskID(disk, node)
12617 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12619 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12620 " continuing anyway", idx, node, msg)
12622 # if this is a DRBD disk, return its port to the pool
12623 if root.dev_type in constants.LDS_DRBD:
12624 self.cfg.AddTcpUdpPort(root.logical_id[2])
12627 def _CreateNewNic(idx, params, private):
12628 """Creates data structure for a new network interface.
12631 mac = params[constants.INIC_MAC]
12632 ip = params.get(constants.INIC_IP, None)
12633 nicparams = private.params
12635 return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12637 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12638 (mac, ip, private.filled[constants.NIC_MODE],
12639 private.filled[constants.NIC_LINK])),
12643 def _ApplyNicMods(idx, nic, params, private):
12644 """Modifies a network interface.
12649 for key in [constants.INIC_MAC, constants.INIC_IP]:
12651 changes.append(("nic.%s/%d" % (key, idx), params[key]))
12652 setattr(nic, key, params[key])
12655 nic.nicparams = private.params
12657 for (key, val) in params.items():
12658 changes.append(("nic.%s/%d" % (key, idx), val))
12662 def Exec(self, feedback_fn):
12663 """Modifies an instance.
12665 All parameters take effect only at the next restart of the instance.
12668 # Process here the warnings from CheckPrereq, as we don't have a
12669 # feedback_fn there.
12670 # TODO: Replace with self.LogWarning
12671 for warn in self.warn:
12672 feedback_fn("WARNING: %s" % warn)
12674 assert ((self.op.disk_template is None) ^
12675 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12676 "Not owning any node resource locks"
12679 instance = self.instance
12682 if self.op.runtime_mem:
12683 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12685 self.op.runtime_mem)
12686 rpcres.Raise("Cannot modify instance runtime memory")
12687 result.append(("runtime_memory", self.op.runtime_mem))
12689 # Apply disk changes
12690 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12691 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12693 if self.op.disk_template:
12695 check_nodes = set(instance.all_nodes)
12696 if self.op.remote_node:
12697 check_nodes.add(self.op.remote_node)
12698 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12699 owned = self.owned_locks(level)
12700 assert not (check_nodes - owned), \
12701 ("Not owning the correct locks, owning %r, expected at least %r" %
12702 (owned, check_nodes))
12704 r_shut = _ShutdownInstanceDisks(self, instance)
12706 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12707 " proceed with disk template conversion")
12708 mode = (instance.disk_template, self.op.disk_template)
12710 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12712 self.cfg.ReleaseDRBDMinors(instance.name)
12714 result.append(("disk_template", self.op.disk_template))
12716 assert instance.disk_template == self.op.disk_template, \
12717 ("Expected disk template '%s', found '%s'" %
12718 (self.op.disk_template, instance.disk_template))
12720 # Release node and resource locks if there are any (they might already have
12721 # been released during disk conversion)
12722 _ReleaseLocks(self, locking.LEVEL_NODE)
12723 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12725 # Apply NIC changes
12726 if self._new_nics is not None:
12727 instance.nics = self._new_nics
12728 result.extend(self._nic_chgdesc)
12731 if self.op.hvparams:
12732 instance.hvparams = self.hv_inst
12733 for key, val in self.op.hvparams.iteritems():
12734 result.append(("hv/%s" % key, val))
12737 if self.op.beparams:
12738 instance.beparams = self.be_inst
12739 for key, val in self.op.beparams.iteritems():
12740 result.append(("be/%s" % key, val))
12743 if self.op.os_name:
12744 instance.os = self.op.os_name
12747 if self.op.osparams:
12748 instance.osparams = self.os_inst
12749 for key, val in self.op.osparams.iteritems():
12750 result.append(("os/%s" % key, val))
12752 if self.op.offline is None:
12755 elif self.op.offline:
12756 # Mark instance as offline
12757 self.cfg.MarkInstanceOffline(instance.name)
12758 result.append(("admin_state", constants.ADMINST_OFFLINE))
12760 # Mark instance as online, but stopped
12761 self.cfg.MarkInstanceDown(instance.name)
12762 result.append(("admin_state", constants.ADMINST_DOWN))
12764 self.cfg.Update(instance, feedback_fn)
12766 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12767 self.owned_locks(locking.LEVEL_NODE)), \
12768 "All node locks should have been released by now"
12772 _DISK_CONVERSIONS = {
12773 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12774 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12778 class LUInstanceChangeGroup(LogicalUnit):
12779 HPATH = "instance-change-group"
12780 HTYPE = constants.HTYPE_INSTANCE
12783 def ExpandNames(self):
12784 self.share_locks = _ShareAll()
12785 self.needed_locks = {
12786 locking.LEVEL_NODEGROUP: [],
12787 locking.LEVEL_NODE: [],
12790 self._ExpandAndLockInstance()
12792 if self.op.target_groups:
12793 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12794 self.op.target_groups)
12796 self.req_target_uuids = None
12798 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12800 def DeclareLocks(self, level):
12801 if level == locking.LEVEL_NODEGROUP:
12802 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12804 if self.req_target_uuids:
12805 lock_groups = set(self.req_target_uuids)
12807 # Lock all groups used by instance optimistically; this requires going
12808 # via the node before it's locked, requiring verification later on
12809 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12810 lock_groups.update(instance_groups)
12812 # No target groups, need to lock all of them
12813 lock_groups = locking.ALL_SET
12815 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12817 elif level == locking.LEVEL_NODE:
12818 if self.req_target_uuids:
12819 # Lock all nodes used by instances
12820 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12821 self._LockInstancesNodes()
12823 # Lock all nodes in all potential target groups
12824 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12825 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12826 member_nodes = [node_name
12827 for group in lock_groups
12828 for node_name in self.cfg.GetNodeGroup(group).members]
12829 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12831 # Lock all nodes as all groups are potential targets
12832 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12834 def CheckPrereq(self):
12835 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12836 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12837 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12839 assert (self.req_target_uuids is None or
12840 owned_groups.issuperset(self.req_target_uuids))
12841 assert owned_instances == set([self.op.instance_name])
12843 # Get instance information
12844 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12846 # Check if node groups for locked instance are still correct
12847 assert owned_nodes.issuperset(self.instance.all_nodes), \
12848 ("Instance %s's nodes changed while we kept the lock" %
12849 self.op.instance_name)
12851 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12854 if self.req_target_uuids:
12855 # User requested specific target groups
12856 self.target_uuids = self.req_target_uuids
12858 # All groups except those used by the instance are potential targets
12859 self.target_uuids = owned_groups - inst_groups
12861 conflicting_groups = self.target_uuids & inst_groups
12862 if conflicting_groups:
12863 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12864 " used by the instance '%s'" %
12865 (utils.CommaJoin(conflicting_groups),
12866 self.op.instance_name),
12867 errors.ECODE_INVAL)
12869 if not self.target_uuids:
12870 raise errors.OpPrereqError("There are no possible target groups",
12871 errors.ECODE_INVAL)
12873 def BuildHooksEnv(self):
12874 """Build hooks env.
12877 assert self.target_uuids
12880 "TARGET_GROUPS": " ".join(self.target_uuids),
12883 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12887 def BuildHooksNodes(self):
12888 """Build hooks nodes.
12891 mn = self.cfg.GetMasterNode()
12892 return ([mn], [mn])
12894 def Exec(self, feedback_fn):
12895 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12897 assert instances == [self.op.instance_name], "Instance not locked"
12899 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12900 instances=instances, target_groups=list(self.target_uuids))
12902 ial.Run(self.op.iallocator)
12904 if not ial.success:
12905 raise errors.OpPrereqError("Can't compute solution for changing group of"
12906 " instance '%s' using iallocator '%s': %s" %
12907 (self.op.instance_name, self.op.iallocator,
12909 errors.ECODE_NORES)
12911 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12913 self.LogInfo("Iallocator returned %s job(s) for changing group of"
12914 " instance '%s'", len(jobs), self.op.instance_name)
12916 return ResultWithJobs(jobs)
12919 class LUBackupQuery(NoHooksLU):
12920 """Query the exports list
12925 def ExpandNames(self):
12926 self.needed_locks = {}
12927 self.share_locks[locking.LEVEL_NODE] = 1
12928 if not self.op.nodes:
12929 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12931 self.needed_locks[locking.LEVEL_NODE] = \
12932 _GetWantedNodes(self, self.op.nodes)
12934 def Exec(self, feedback_fn):
12935 """Compute the list of all the exported system images.
12938 @return: a dictionary with the structure node->(export-list)
12939 where export-list is a list of the instances exported on
12943 self.nodes = self.owned_locks(locking.LEVEL_NODE)
12944 rpcresult = self.rpc.call_export_list(self.nodes)
12946 for node in rpcresult:
12947 if rpcresult[node].fail_msg:
12948 result[node] = False
12950 result[node] = rpcresult[node].payload
12955 class LUBackupPrepare(NoHooksLU):
12956 """Prepares an instance for an export and returns useful information.
12961 def ExpandNames(self):
12962 self._ExpandAndLockInstance()
12964 def CheckPrereq(self):
12965 """Check prerequisites.
12968 instance_name = self.op.instance_name
12970 self.instance = self.cfg.GetInstanceInfo(instance_name)
12971 assert self.instance is not None, \
12972 "Cannot retrieve locked instance %s" % self.op.instance_name
12973 _CheckNodeOnline(self, self.instance.primary_node)
12975 self._cds = _GetClusterDomainSecret()
12977 def Exec(self, feedback_fn):
12978 """Prepares an instance for an export.
12981 instance = self.instance
12983 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12984 salt = utils.GenerateSecret(8)
12986 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
12987 result = self.rpc.call_x509_cert_create(instance.primary_node,
12988 constants.RIE_CERT_VALIDITY)
12989 result.Raise("Can't create X509 key and certificate on %s" % result.node)
12991 (name, cert_pem) = result.payload
12993 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
12997 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
12998 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13000 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13006 class LUBackupExport(LogicalUnit):
13007 """Export an instance to an image in the cluster.
13010 HPATH = "instance-export"
13011 HTYPE = constants.HTYPE_INSTANCE
13014 def CheckArguments(self):
13015 """Check the arguments.
13018 self.x509_key_name = self.op.x509_key_name
13019 self.dest_x509_ca_pem = self.op.destination_x509_ca
13021 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13022 if not self.x509_key_name:
13023 raise errors.OpPrereqError("Missing X509 key name for encryption",
13024 errors.ECODE_INVAL)
13026 if not self.dest_x509_ca_pem:
13027 raise errors.OpPrereqError("Missing destination X509 CA",
13028 errors.ECODE_INVAL)
13030 def ExpandNames(self):
13031 self._ExpandAndLockInstance()
13033 # Lock all nodes for local exports
13034 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13035 # FIXME: lock only instance primary and destination node
13037 # Sad but true, for now we have do lock all nodes, as we don't know where
13038 # the previous export might be, and in this LU we search for it and
13039 # remove it from its current node. In the future we could fix this by:
13040 # - making a tasklet to search (share-lock all), then create the
13041 # new one, then one to remove, after
13042 # - removing the removal operation altogether
13043 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13045 def DeclareLocks(self, level):
13046 """Last minute lock declaration."""
13047 # All nodes are locked anyway, so nothing to do here.
13049 def BuildHooksEnv(self):
13050 """Build hooks env.
13052 This will run on the master, primary node and target node.
13056 "EXPORT_MODE": self.op.mode,
13057 "EXPORT_NODE": self.op.target_node,
13058 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13059 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13060 # TODO: Generic function for boolean env variables
13061 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13064 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13068 def BuildHooksNodes(self):
13069 """Build hooks nodes.
13072 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13074 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13075 nl.append(self.op.target_node)
13079 def CheckPrereq(self):
13080 """Check prerequisites.
13082 This checks that the instance and node names are valid.
13085 instance_name = self.op.instance_name
13087 self.instance = self.cfg.GetInstanceInfo(instance_name)
13088 assert self.instance is not None, \
13089 "Cannot retrieve locked instance %s" % self.op.instance_name
13090 _CheckNodeOnline(self, self.instance.primary_node)
13092 if (self.op.remove_instance and
13093 self.instance.admin_state == constants.ADMINST_UP and
13094 not self.op.shutdown):
13095 raise errors.OpPrereqError("Can not remove instance without shutting it"
13098 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13099 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13100 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13101 assert self.dst_node is not None
13103 _CheckNodeOnline(self, self.dst_node.name)
13104 _CheckNodeNotDrained(self, self.dst_node.name)
13107 self.dest_disk_info = None
13108 self.dest_x509_ca = None
13110 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13111 self.dst_node = None
13113 if len(self.op.target_node) != len(self.instance.disks):
13114 raise errors.OpPrereqError(("Received destination information for %s"
13115 " disks, but instance %s has %s disks") %
13116 (len(self.op.target_node), instance_name,
13117 len(self.instance.disks)),
13118 errors.ECODE_INVAL)
13120 cds = _GetClusterDomainSecret()
13122 # Check X509 key name
13124 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13125 except (TypeError, ValueError), err:
13126 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13128 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13129 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13130 errors.ECODE_INVAL)
13132 # Load and verify CA
13134 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13135 except OpenSSL.crypto.Error, err:
13136 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13137 (err, ), errors.ECODE_INVAL)
13139 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13140 if errcode is not None:
13141 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13142 (msg, ), errors.ECODE_INVAL)
13144 self.dest_x509_ca = cert
13146 # Verify target information
13148 for idx, disk_data in enumerate(self.op.target_node):
13150 (host, port, magic) = \
13151 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13152 except errors.GenericError, err:
13153 raise errors.OpPrereqError("Target info for disk %s: %s" %
13154 (idx, err), errors.ECODE_INVAL)
13156 disk_info.append((host, port, magic))
13158 assert len(disk_info) == len(self.op.target_node)
13159 self.dest_disk_info = disk_info
13162 raise errors.ProgrammerError("Unhandled export mode %r" %
13165 # instance disk type verification
13166 # TODO: Implement export support for file-based disks
13167 for disk in self.instance.disks:
13168 if disk.dev_type == constants.LD_FILE:
13169 raise errors.OpPrereqError("Export not supported for instances with"
13170 " file-based disks", errors.ECODE_INVAL)
13172 def _CleanupExports(self, feedback_fn):
13173 """Removes exports of current instance from all other nodes.
13175 If an instance in a cluster with nodes A..D was exported to node C, its
13176 exports will be removed from the nodes A, B and D.
13179 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13181 nodelist = self.cfg.GetNodeList()
13182 nodelist.remove(self.dst_node.name)
13184 # on one-node clusters nodelist will be empty after the removal
13185 # if we proceed the backup would be removed because OpBackupQuery
13186 # substitutes an empty list with the full cluster node list.
13187 iname = self.instance.name
13189 feedback_fn("Removing old exports for instance %s" % iname)
13190 exportlist = self.rpc.call_export_list(nodelist)
13191 for node in exportlist:
13192 if exportlist[node].fail_msg:
13194 if iname in exportlist[node].payload:
13195 msg = self.rpc.call_export_remove(node, iname).fail_msg
13197 self.LogWarning("Could not remove older export for instance %s"
13198 " on node %s: %s", iname, node, msg)
13200 def Exec(self, feedback_fn):
13201 """Export an instance to an image in the cluster.
13204 assert self.op.mode in constants.EXPORT_MODES
13206 instance = self.instance
13207 src_node = instance.primary_node
13209 if self.op.shutdown:
13210 # shutdown the instance, but not the disks
13211 feedback_fn("Shutting down instance %s" % instance.name)
13212 result = self.rpc.call_instance_shutdown(src_node, instance,
13213 self.op.shutdown_timeout)
13214 # TODO: Maybe ignore failures if ignore_remove_failures is set
13215 result.Raise("Could not shutdown instance %s on"
13216 " node %s" % (instance.name, src_node))
13218 # set the disks ID correctly since call_instance_start needs the
13219 # correct drbd minor to create the symlinks
13220 for disk in instance.disks:
13221 self.cfg.SetDiskID(disk, src_node)
13223 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13226 # Activate the instance disks if we'exporting a stopped instance
13227 feedback_fn("Activating disks for %s" % instance.name)
13228 _StartInstanceDisks(self, instance, None)
13231 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13234 helper.CreateSnapshots()
13236 if (self.op.shutdown and
13237 instance.admin_state == constants.ADMINST_UP and
13238 not self.op.remove_instance):
13239 assert not activate_disks
13240 feedback_fn("Starting instance %s" % instance.name)
13241 result = self.rpc.call_instance_start(src_node,
13242 (instance, None, None), False)
13243 msg = result.fail_msg
13245 feedback_fn("Failed to start instance: %s" % msg)
13246 _ShutdownInstanceDisks(self, instance)
13247 raise errors.OpExecError("Could not start instance: %s" % msg)
13249 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13250 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13251 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13252 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13253 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13255 (key_name, _, _) = self.x509_key_name
13258 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13261 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13262 key_name, dest_ca_pem,
13267 # Check for backwards compatibility
13268 assert len(dresults) == len(instance.disks)
13269 assert compat.all(isinstance(i, bool) for i in dresults), \
13270 "Not all results are boolean: %r" % dresults
13274 feedback_fn("Deactivating disks for %s" % instance.name)
13275 _ShutdownInstanceDisks(self, instance)
13277 if not (compat.all(dresults) and fin_resu):
13280 failures.append("export finalization")
13281 if not compat.all(dresults):
13282 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13284 failures.append("disk export: disk(s) %s" % fdsk)
13286 raise errors.OpExecError("Export failed, errors in %s" %
13287 utils.CommaJoin(failures))
13289 # At this point, the export was successful, we can cleanup/finish
13291 # Remove instance if requested
13292 if self.op.remove_instance:
13293 feedback_fn("Removing instance %s" % instance.name)
13294 _RemoveInstance(self, feedback_fn, instance,
13295 self.op.ignore_remove_failures)
13297 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13298 self._CleanupExports(feedback_fn)
13300 return fin_resu, dresults
13303 class LUBackupRemove(NoHooksLU):
13304 """Remove exports related to the named instance.
13309 def ExpandNames(self):
13310 self.needed_locks = {}
13311 # We need all nodes to be locked in order for RemoveExport to work, but we
13312 # don't need to lock the instance itself, as nothing will happen to it (and
13313 # we can remove exports also for a removed instance)
13314 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13316 def Exec(self, feedback_fn):
13317 """Remove any export.
13320 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13321 # If the instance was not found we'll try with the name that was passed in.
13322 # This will only work if it was an FQDN, though.
13324 if not instance_name:
13326 instance_name = self.op.instance_name
13328 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13329 exportlist = self.rpc.call_export_list(locked_nodes)
13331 for node in exportlist:
13332 msg = exportlist[node].fail_msg
13334 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13336 if instance_name in exportlist[node].payload:
13338 result = self.rpc.call_export_remove(node, instance_name)
13339 msg = result.fail_msg
13341 logging.error("Could not remove export for instance %s"
13342 " on node %s: %s", instance_name, node, msg)
13344 if fqdn_warn and not found:
13345 feedback_fn("Export not found. If trying to remove an export belonging"
13346 " to a deleted instance please use its Fully Qualified"
13350 class LUGroupAdd(LogicalUnit):
13351 """Logical unit for creating node groups.
13354 HPATH = "group-add"
13355 HTYPE = constants.HTYPE_GROUP
13358 def ExpandNames(self):
13359 # We need the new group's UUID here so that we can create and acquire the
13360 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13361 # that it should not check whether the UUID exists in the configuration.
13362 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13363 self.needed_locks = {}
13364 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13366 def CheckPrereq(self):
13367 """Check prerequisites.
13369 This checks that the given group name is not an existing node group
13374 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13375 except errors.OpPrereqError:
13378 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13379 " node group (UUID: %s)" %
13380 (self.op.group_name, existing_uuid),
13381 errors.ECODE_EXISTS)
13383 if self.op.ndparams:
13384 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13386 if self.op.hv_state:
13387 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13389 self.new_hv_state = None
13391 if self.op.disk_state:
13392 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13394 self.new_disk_state = None
13396 if self.op.diskparams:
13397 for templ in constants.DISK_TEMPLATES:
13398 if templ not in self.op.diskparams:
13399 self.op.diskparams[templ] = {}
13400 utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
13402 self.op.diskparams = self.cfg.GetClusterInfo().diskparams
13404 if self.op.ipolicy:
13405 cluster = self.cfg.GetClusterInfo()
13406 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13408 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
13409 except errors.ConfigurationError, err:
13410 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13411 errors.ECODE_INVAL)
13413 def BuildHooksEnv(self):
13414 """Build hooks env.
13418 "GROUP_NAME": self.op.group_name,
13421 def BuildHooksNodes(self):
13422 """Build hooks nodes.
13425 mn = self.cfg.GetMasterNode()
13426 return ([mn], [mn])
13428 def Exec(self, feedback_fn):
13429 """Add the node group to the cluster.
13432 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13433 uuid=self.group_uuid,
13434 alloc_policy=self.op.alloc_policy,
13435 ndparams=self.op.ndparams,
13436 diskparams=self.op.diskparams,
13437 ipolicy=self.op.ipolicy,
13438 hv_state_static=self.new_hv_state,
13439 disk_state_static=self.new_disk_state)
13441 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13442 del self.remove_locks[locking.LEVEL_NODEGROUP]
13445 class LUGroupAssignNodes(NoHooksLU):
13446 """Logical unit for assigning nodes to groups.
13451 def ExpandNames(self):
13452 # These raise errors.OpPrereqError on their own:
13453 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13454 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13456 # We want to lock all the affected nodes and groups. We have readily
13457 # available the list of nodes, and the *destination* group. To gather the
13458 # list of "source" groups, we need to fetch node information later on.
13459 self.needed_locks = {
13460 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13461 locking.LEVEL_NODE: self.op.nodes,
13464 def DeclareLocks(self, level):
13465 if level == locking.LEVEL_NODEGROUP:
13466 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13468 # Try to get all affected nodes' groups without having the group or node
13469 # lock yet. Needs verification later in the code flow.
13470 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13472 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13474 def CheckPrereq(self):
13475 """Check prerequisites.
13478 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13479 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13480 frozenset(self.op.nodes))
13482 expected_locks = (set([self.group_uuid]) |
13483 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13484 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13485 if actual_locks != expected_locks:
13486 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13487 " current groups are '%s', used to be '%s'" %
13488 (utils.CommaJoin(expected_locks),
13489 utils.CommaJoin(actual_locks)))
13491 self.node_data = self.cfg.GetAllNodesInfo()
13492 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13493 instance_data = self.cfg.GetAllInstancesInfo()
13495 if self.group is None:
13496 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13497 (self.op.group_name, self.group_uuid))
13499 (new_splits, previous_splits) = \
13500 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13501 for node in self.op.nodes],
13502 self.node_data, instance_data)
13505 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13507 if not self.op.force:
13508 raise errors.OpExecError("The following instances get split by this"
13509 " change and --force was not given: %s" %
13512 self.LogWarning("This operation will split the following instances: %s",
13515 if previous_splits:
13516 self.LogWarning("In addition, these already-split instances continue"
13517 " to be split across groups: %s",
13518 utils.CommaJoin(utils.NiceSort(previous_splits)))
13520 def Exec(self, feedback_fn):
13521 """Assign nodes to a new group.
13524 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13526 self.cfg.AssignGroupNodes(mods)
13529 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13530 """Check for split instances after a node assignment.
13532 This method considers a series of node assignments as an atomic operation,
13533 and returns information about split instances after applying the set of
13536 In particular, it returns information about newly split instances, and
13537 instances that were already split, and remain so after the change.
13539 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13542 @type changes: list of (node_name, new_group_uuid) pairs.
13543 @param changes: list of node assignments to consider.
13544 @param node_data: a dict with data for all nodes
13545 @param instance_data: a dict with all instances to consider
13546 @rtype: a two-tuple
13547 @return: a list of instances that were previously okay and result split as a
13548 consequence of this change, and a list of instances that were previously
13549 split and this change does not fix.
13552 changed_nodes = dict((node, group) for node, group in changes
13553 if node_data[node].group != group)
13555 all_split_instances = set()
13556 previously_split_instances = set()
13558 def InstanceNodes(instance):
13559 return [instance.primary_node] + list(instance.secondary_nodes)
13561 for inst in instance_data.values():
13562 if inst.disk_template not in constants.DTS_INT_MIRROR:
13565 instance_nodes = InstanceNodes(inst)
13567 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13568 previously_split_instances.add(inst.name)
13570 if len(set(changed_nodes.get(node, node_data[node].group)
13571 for node in instance_nodes)) > 1:
13572 all_split_instances.add(inst.name)
13574 return (list(all_split_instances - previously_split_instances),
13575 list(previously_split_instances & all_split_instances))
13578 class _GroupQuery(_QueryBase):
13579 FIELDS = query.GROUP_FIELDS
13581 def ExpandNames(self, lu):
13582 lu.needed_locks = {}
13584 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13585 self._cluster = lu.cfg.GetClusterInfo()
13586 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13589 self.wanted = [name_to_uuid[name]
13590 for name in utils.NiceSort(name_to_uuid.keys())]
13592 # Accept names to be either names or UUIDs.
13595 all_uuid = frozenset(self._all_groups.keys())
13597 for name in self.names:
13598 if name in all_uuid:
13599 self.wanted.append(name)
13600 elif name in name_to_uuid:
13601 self.wanted.append(name_to_uuid[name])
13603 missing.append(name)
13606 raise errors.OpPrereqError("Some groups do not exist: %s" %
13607 utils.CommaJoin(missing),
13608 errors.ECODE_NOENT)
13610 def DeclareLocks(self, lu, level):
13613 def _GetQueryData(self, lu):
13614 """Computes the list of node groups and their attributes.
13617 do_nodes = query.GQ_NODE in self.requested_data
13618 do_instances = query.GQ_INST in self.requested_data
13620 group_to_nodes = None
13621 group_to_instances = None
13623 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13624 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13625 # latter GetAllInstancesInfo() is not enough, for we have to go through
13626 # instance->node. Hence, we will need to process nodes even if we only need
13627 # instance information.
13628 if do_nodes or do_instances:
13629 all_nodes = lu.cfg.GetAllNodesInfo()
13630 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13633 for node in all_nodes.values():
13634 if node.group in group_to_nodes:
13635 group_to_nodes[node.group].append(node.name)
13636 node_to_group[node.name] = node.group
13639 all_instances = lu.cfg.GetAllInstancesInfo()
13640 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13642 for instance in all_instances.values():
13643 node = instance.primary_node
13644 if node in node_to_group:
13645 group_to_instances[node_to_group[node]].append(instance.name)
13648 # Do not pass on node information if it was not requested.
13649 group_to_nodes = None
13651 return query.GroupQueryData(self._cluster,
13652 [self._all_groups[uuid]
13653 for uuid in self.wanted],
13654 group_to_nodes, group_to_instances)
13657 class LUGroupQuery(NoHooksLU):
13658 """Logical unit for querying node groups.
13663 def CheckArguments(self):
13664 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13665 self.op.output_fields, False)
13667 def ExpandNames(self):
13668 self.gq.ExpandNames(self)
13670 def DeclareLocks(self, level):
13671 self.gq.DeclareLocks(self, level)
13673 def Exec(self, feedback_fn):
13674 return self.gq.OldStyleQuery(self)
13677 class LUGroupSetParams(LogicalUnit):
13678 """Modifies the parameters of a node group.
13681 HPATH = "group-modify"
13682 HTYPE = constants.HTYPE_GROUP
13685 def CheckArguments(self):
13688 self.op.diskparams,
13689 self.op.alloc_policy,
13691 self.op.disk_state,
13695 if all_changes.count(None) == len(all_changes):
13696 raise errors.OpPrereqError("Please pass at least one modification",
13697 errors.ECODE_INVAL)
13699 def ExpandNames(self):
13700 # This raises errors.OpPrereqError on its own:
13701 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13703 self.needed_locks = {
13704 locking.LEVEL_INSTANCE: [],
13705 locking.LEVEL_NODEGROUP: [self.group_uuid],
13708 self.share_locks[locking.LEVEL_INSTANCE] = 1
13710 def DeclareLocks(self, level):
13711 if level == locking.LEVEL_INSTANCE:
13712 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13714 # Lock instances optimistically, needs verification once group lock has
13716 self.needed_locks[locking.LEVEL_INSTANCE] = \
13717 self.cfg.GetNodeGroupInstances(self.group_uuid)
13719 def CheckPrereq(self):
13720 """Check prerequisites.
13723 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13725 # Check if locked instances are still correct
13726 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13728 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13729 cluster = self.cfg.GetClusterInfo()
13731 if self.group is None:
13732 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13733 (self.op.group_name, self.group_uuid))
13735 if self.op.ndparams:
13736 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13737 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13738 self.new_ndparams = new_ndparams
13740 if self.op.diskparams:
13741 self.new_diskparams = dict()
13742 for templ in constants.DISK_TEMPLATES:
13743 if templ not in self.op.diskparams:
13744 self.op.diskparams[templ] = {}
13745 new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13746 self.op.diskparams[templ])
13747 utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13748 self.new_diskparams[templ] = new_templ_params
13750 if self.op.hv_state:
13751 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13752 self.group.hv_state_static)
13754 if self.op.disk_state:
13755 self.new_disk_state = \
13756 _MergeAndVerifyDiskState(self.op.disk_state,
13757 self.group.disk_state_static)
13759 if self.op.ipolicy:
13760 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
13764 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
13765 inst_filter = lambda inst: inst.name in owned_instances
13766 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
13768 _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
13770 new_ipolicy, instances)
13773 self.LogWarning("After the ipolicy change the following instances"
13774 " violate them: %s",
13775 utils.CommaJoin(violations))
13777 def BuildHooksEnv(self):
13778 """Build hooks env.
13782 "GROUP_NAME": self.op.group_name,
13783 "NEW_ALLOC_POLICY": self.op.alloc_policy,
13786 def BuildHooksNodes(self):
13787 """Build hooks nodes.
13790 mn = self.cfg.GetMasterNode()
13791 return ([mn], [mn])
13793 def Exec(self, feedback_fn):
13794 """Modifies the node group.
13799 if self.op.ndparams:
13800 self.group.ndparams = self.new_ndparams
13801 result.append(("ndparams", str(self.group.ndparams)))
13803 if self.op.diskparams:
13804 self.group.diskparams = self.new_diskparams
13805 result.append(("diskparams", str(self.group.diskparams)))
13807 if self.op.alloc_policy:
13808 self.group.alloc_policy = self.op.alloc_policy
13810 if self.op.hv_state:
13811 self.group.hv_state_static = self.new_hv_state
13813 if self.op.disk_state:
13814 self.group.disk_state_static = self.new_disk_state
13816 if self.op.ipolicy:
13817 self.group.ipolicy = self.new_ipolicy
13819 self.cfg.Update(self.group, feedback_fn)
13823 class LUGroupRemove(LogicalUnit):
13824 HPATH = "group-remove"
13825 HTYPE = constants.HTYPE_GROUP
13828 def ExpandNames(self):
13829 # This will raises errors.OpPrereqError on its own:
13830 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13831 self.needed_locks = {
13832 locking.LEVEL_NODEGROUP: [self.group_uuid],
13835 def CheckPrereq(self):
13836 """Check prerequisites.
13838 This checks that the given group name exists as a node group, that is
13839 empty (i.e., contains no nodes), and that is not the last group of the
13843 # Verify that the group is empty.
13844 group_nodes = [node.name
13845 for node in self.cfg.GetAllNodesInfo().values()
13846 if node.group == self.group_uuid]
13849 raise errors.OpPrereqError("Group '%s' not empty, has the following"
13851 (self.op.group_name,
13852 utils.CommaJoin(utils.NiceSort(group_nodes))),
13853 errors.ECODE_STATE)
13855 # Verify the cluster would not be left group-less.
13856 if len(self.cfg.GetNodeGroupList()) == 1:
13857 raise errors.OpPrereqError("Group '%s' is the only group,"
13858 " cannot be removed" %
13859 self.op.group_name,
13860 errors.ECODE_STATE)
13862 def BuildHooksEnv(self):
13863 """Build hooks env.
13867 "GROUP_NAME": self.op.group_name,
13870 def BuildHooksNodes(self):
13871 """Build hooks nodes.
13874 mn = self.cfg.GetMasterNode()
13875 return ([mn], [mn])
13877 def Exec(self, feedback_fn):
13878 """Remove the node group.
13882 self.cfg.RemoveNodeGroup(self.group_uuid)
13883 except errors.ConfigurationError:
13884 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13885 (self.op.group_name, self.group_uuid))
13887 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13890 class LUGroupRename(LogicalUnit):
13891 HPATH = "group-rename"
13892 HTYPE = constants.HTYPE_GROUP
13895 def ExpandNames(self):
13896 # This raises errors.OpPrereqError on its own:
13897 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13899 self.needed_locks = {
13900 locking.LEVEL_NODEGROUP: [self.group_uuid],
13903 def CheckPrereq(self):
13904 """Check prerequisites.
13906 Ensures requested new name is not yet used.
13910 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13911 except errors.OpPrereqError:
13914 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13915 " node group (UUID: %s)" %
13916 (self.op.new_name, new_name_uuid),
13917 errors.ECODE_EXISTS)
13919 def BuildHooksEnv(self):
13920 """Build hooks env.
13924 "OLD_NAME": self.op.group_name,
13925 "NEW_NAME": self.op.new_name,
13928 def BuildHooksNodes(self):
13929 """Build hooks nodes.
13932 mn = self.cfg.GetMasterNode()
13934 all_nodes = self.cfg.GetAllNodesInfo()
13935 all_nodes.pop(mn, None)
13938 run_nodes.extend(node.name for node in all_nodes.values()
13939 if node.group == self.group_uuid)
13941 return (run_nodes, run_nodes)
13943 def Exec(self, feedback_fn):
13944 """Rename the node group.
13947 group = self.cfg.GetNodeGroup(self.group_uuid)
13950 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13951 (self.op.group_name, self.group_uuid))
13953 group.name = self.op.new_name
13954 self.cfg.Update(group, feedback_fn)
13956 return self.op.new_name
13959 class LUGroupEvacuate(LogicalUnit):
13960 HPATH = "group-evacuate"
13961 HTYPE = constants.HTYPE_GROUP
13964 def ExpandNames(self):
13965 # This raises errors.OpPrereqError on its own:
13966 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13968 if self.op.target_groups:
13969 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13970 self.op.target_groups)
13972 self.req_target_uuids = []
13974 if self.group_uuid in self.req_target_uuids:
13975 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
13976 " as a target group (targets are %s)" %
13978 utils.CommaJoin(self.req_target_uuids)),
13979 errors.ECODE_INVAL)
13981 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13983 self.share_locks = _ShareAll()
13984 self.needed_locks = {
13985 locking.LEVEL_INSTANCE: [],
13986 locking.LEVEL_NODEGROUP: [],
13987 locking.LEVEL_NODE: [],
13990 def DeclareLocks(self, level):
13991 if level == locking.LEVEL_INSTANCE:
13992 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13994 # Lock instances optimistically, needs verification once node and group
13995 # locks have been acquired
13996 self.needed_locks[locking.LEVEL_INSTANCE] = \
13997 self.cfg.GetNodeGroupInstances(self.group_uuid)
13999 elif level == locking.LEVEL_NODEGROUP:
14000 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14002 if self.req_target_uuids:
14003 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14005 # Lock all groups used by instances optimistically; this requires going
14006 # via the node before it's locked, requiring verification later on
14007 lock_groups.update(group_uuid
14008 for instance_name in
14009 self.owned_locks(locking.LEVEL_INSTANCE)
14011 self.cfg.GetInstanceNodeGroups(instance_name))
14013 # No target groups, need to lock all of them
14014 lock_groups = locking.ALL_SET
14016 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14018 elif level == locking.LEVEL_NODE:
14019 # This will only lock the nodes in the group to be evacuated which
14020 # contain actual instances
14021 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14022 self._LockInstancesNodes()
14024 # Lock all nodes in group to be evacuated and target groups
14025 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14026 assert self.group_uuid in owned_groups
14027 member_nodes = [node_name
14028 for group in owned_groups
14029 for node_name in self.cfg.GetNodeGroup(group).members]
14030 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14032 def CheckPrereq(self):
14033 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14034 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14035 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14037 assert owned_groups.issuperset(self.req_target_uuids)
14038 assert self.group_uuid in owned_groups
14040 # Check if locked instances are still correct
14041 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14043 # Get instance information
14044 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14046 # Check if node groups for locked instances are still correct
14047 for instance_name in owned_instances:
14048 inst = self.instances[instance_name]
14049 assert owned_nodes.issuperset(inst.all_nodes), \
14050 "Instance %s's nodes changed while we kept the lock" % instance_name
14052 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
14055 assert self.group_uuid in inst_groups, \
14056 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
14058 if self.req_target_uuids:
14059 # User requested specific target groups
14060 self.target_uuids = self.req_target_uuids
14062 # All groups except the one to be evacuated are potential targets
14063 self.target_uuids = [group_uuid for group_uuid in owned_groups
14064 if group_uuid != self.group_uuid]
14066 if not self.target_uuids:
14067 raise errors.OpPrereqError("There are no possible target groups",
14068 errors.ECODE_INVAL)
14070 def BuildHooksEnv(self):
14071 """Build hooks env.
14075 "GROUP_NAME": self.op.group_name,
14076 "TARGET_GROUPS": " ".join(self.target_uuids),
14079 def BuildHooksNodes(self):
14080 """Build hooks nodes.
14083 mn = self.cfg.GetMasterNode()
14085 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14087 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14089 return (run_nodes, run_nodes)
14091 def Exec(self, feedback_fn):
14092 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14094 assert self.group_uuid not in self.target_uuids
14096 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14097 instances=instances, target_groups=self.target_uuids)
14099 ial.Run(self.op.iallocator)
14101 if not ial.success:
14102 raise errors.OpPrereqError("Can't compute group evacuation using"
14103 " iallocator '%s': %s" %
14104 (self.op.iallocator, ial.info),
14105 errors.ECODE_NORES)
14107 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14109 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14110 len(jobs), self.op.group_name)
14112 return ResultWithJobs(jobs)
14115 class TagsLU(NoHooksLU): # pylint: disable=W0223
14116 """Generic tags LU.
14118 This is an abstract class which is the parent of all the other tags LUs.
14121 def ExpandNames(self):
14122 self.group_uuid = None
14123 self.needed_locks = {}
14124 if self.op.kind == constants.TAG_NODE:
14125 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14126 self.needed_locks[locking.LEVEL_NODE] = self.op.name
14127 elif self.op.kind == constants.TAG_INSTANCE:
14128 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14129 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
14130 elif self.op.kind == constants.TAG_NODEGROUP:
14131 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14133 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14134 # not possible to acquire the BGL based on opcode parameters)
14136 def CheckPrereq(self):
14137 """Check prerequisites.
14140 if self.op.kind == constants.TAG_CLUSTER:
14141 self.target = self.cfg.GetClusterInfo()
14142 elif self.op.kind == constants.TAG_NODE:
14143 self.target = self.cfg.GetNodeInfo(self.op.name)
14144 elif self.op.kind == constants.TAG_INSTANCE:
14145 self.target = self.cfg.GetInstanceInfo(self.op.name)
14146 elif self.op.kind == constants.TAG_NODEGROUP:
14147 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14149 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14150 str(self.op.kind), errors.ECODE_INVAL)
14153 class LUTagsGet(TagsLU):
14154 """Returns the tags of a given object.
14159 def ExpandNames(self):
14160 TagsLU.ExpandNames(self)
14162 # Share locks as this is only a read operation
14163 self.share_locks = _ShareAll()
14165 def Exec(self, feedback_fn):
14166 """Returns the tag list.
14169 return list(self.target.GetTags())
14172 class LUTagsSearch(NoHooksLU):
14173 """Searches the tags for a given pattern.
14178 def ExpandNames(self):
14179 self.needed_locks = {}
14181 def CheckPrereq(self):
14182 """Check prerequisites.
14184 This checks the pattern passed for validity by compiling it.
14188 self.re = re.compile(self.op.pattern)
14189 except re.error, err:
14190 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14191 (self.op.pattern, err), errors.ECODE_INVAL)
14193 def Exec(self, feedback_fn):
14194 """Returns the tag list.
14198 tgts = [("/cluster", cfg.GetClusterInfo())]
14199 ilist = cfg.GetAllInstancesInfo().values()
14200 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14201 nlist = cfg.GetAllNodesInfo().values()
14202 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14203 tgts.extend(("/nodegroup/%s" % n.name, n)
14204 for n in cfg.GetAllNodeGroupsInfo().values())
14206 for path, target in tgts:
14207 for tag in target.GetTags():
14208 if self.re.search(tag):
14209 results.append((path, tag))
14213 class LUTagsSet(TagsLU):
14214 """Sets a tag on a given object.
14219 def CheckPrereq(self):
14220 """Check prerequisites.
14222 This checks the type and length of the tag name and value.
14225 TagsLU.CheckPrereq(self)
14226 for tag in self.op.tags:
14227 objects.TaggableObject.ValidateTag(tag)
14229 def Exec(self, feedback_fn):
14234 for tag in self.op.tags:
14235 self.target.AddTag(tag)
14236 except errors.TagError, err:
14237 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14238 self.cfg.Update(self.target, feedback_fn)
14241 class LUTagsDel(TagsLU):
14242 """Delete a list of tags from a given object.
14247 def CheckPrereq(self):
14248 """Check prerequisites.
14250 This checks that we have the given tag.
14253 TagsLU.CheckPrereq(self)
14254 for tag in self.op.tags:
14255 objects.TaggableObject.ValidateTag(tag)
14256 del_tags = frozenset(self.op.tags)
14257 cur_tags = self.target.GetTags()
14259 diff_tags = del_tags - cur_tags
14261 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14262 raise errors.OpPrereqError("Tag(s) %s not found" %
14263 (utils.CommaJoin(diff_names), ),
14264 errors.ECODE_NOENT)
14266 def Exec(self, feedback_fn):
14267 """Remove the tag from the object.
14270 for tag in self.op.tags:
14271 self.target.RemoveTag(tag)
14272 self.cfg.Update(self.target, feedback_fn)
14275 class LUTestDelay(NoHooksLU):
14276 """Sleep for a specified amount of time.
14278 This LU sleeps on the master and/or nodes for a specified amount of
14284 def ExpandNames(self):
14285 """Expand names and set required locks.
14287 This expands the node list, if any.
14290 self.needed_locks = {}
14291 if self.op.on_nodes:
14292 # _GetWantedNodes can be used here, but is not always appropriate to use
14293 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14294 # more information.
14295 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14296 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14298 def _TestDelay(self):
14299 """Do the actual sleep.
14302 if self.op.on_master:
14303 if not utils.TestDelay(self.op.duration):
14304 raise errors.OpExecError("Error during master delay test")
14305 if self.op.on_nodes:
14306 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14307 for node, node_result in result.items():
14308 node_result.Raise("Failure during rpc call to node %s" % node)
14310 def Exec(self, feedback_fn):
14311 """Execute the test delay opcode, with the wanted repetitions.
14314 if self.op.repeat == 0:
14317 top_value = self.op.repeat - 1
14318 for i in range(self.op.repeat):
14319 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14323 class LUTestJqueue(NoHooksLU):
14324 """Utility LU to test some aspects of the job queue.
14329 # Must be lower than default timeout for WaitForJobChange to see whether it
14330 # notices changed jobs
14331 _CLIENT_CONNECT_TIMEOUT = 20.0
14332 _CLIENT_CONFIRM_TIMEOUT = 60.0
14335 def _NotifyUsingSocket(cls, cb, errcls):
14336 """Opens a Unix socket and waits for another program to connect.
14339 @param cb: Callback to send socket name to client
14340 @type errcls: class
14341 @param errcls: Exception class to use for errors
14344 # Using a temporary directory as there's no easy way to create temporary
14345 # sockets without writing a custom loop around tempfile.mktemp and
14347 tmpdir = tempfile.mkdtemp()
14349 tmpsock = utils.PathJoin(tmpdir, "sock")
14351 logging.debug("Creating temporary socket at %s", tmpsock)
14352 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14357 # Send details to client
14360 # Wait for client to connect before continuing
14361 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14363 (conn, _) = sock.accept()
14364 except socket.error, err:
14365 raise errcls("Client didn't connect in time (%s)" % err)
14369 # Remove as soon as client is connected
14370 shutil.rmtree(tmpdir)
14372 # Wait for client to close
14375 # pylint: disable=E1101
14376 # Instance of '_socketobject' has no ... member
14377 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14379 except socket.error, err:
14380 raise errcls("Client failed to confirm notification (%s)" % err)
14384 def _SendNotification(self, test, arg, sockname):
14385 """Sends a notification to the client.
14388 @param test: Test name
14389 @param arg: Test argument (depends on test)
14390 @type sockname: string
14391 @param sockname: Socket path
14394 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14396 def _Notify(self, prereq, test, arg):
14397 """Notifies the client of a test.
14400 @param prereq: Whether this is a prereq-phase test
14402 @param test: Test name
14403 @param arg: Test argument (depends on test)
14407 errcls = errors.OpPrereqError
14409 errcls = errors.OpExecError
14411 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14415 def CheckArguments(self):
14416 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14417 self.expandnames_calls = 0
14419 def ExpandNames(self):
14420 checkargs_calls = getattr(self, "checkargs_calls", 0)
14421 if checkargs_calls < 1:
14422 raise errors.ProgrammerError("CheckArguments was not called")
14424 self.expandnames_calls += 1
14426 if self.op.notify_waitlock:
14427 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14429 self.LogInfo("Expanding names")
14431 # Get lock on master node (just to get a lock, not for a particular reason)
14432 self.needed_locks = {
14433 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14436 def Exec(self, feedback_fn):
14437 if self.expandnames_calls < 1:
14438 raise errors.ProgrammerError("ExpandNames was not called")
14440 if self.op.notify_exec:
14441 self._Notify(False, constants.JQT_EXEC, None)
14443 self.LogInfo("Executing")
14445 if self.op.log_messages:
14446 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14447 for idx, msg in enumerate(self.op.log_messages):
14448 self.LogInfo("Sending log message %s", idx + 1)
14449 feedback_fn(constants.JQT_MSGPREFIX + msg)
14450 # Report how many test messages have been sent
14451 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14454 raise errors.OpExecError("Opcode failure was requested")
14459 class IAllocator(object):
14460 """IAllocator framework.
14462 An IAllocator instance has three sets of attributes:
14463 - cfg that is needed to query the cluster
14464 - input data (all members of the _KEYS class attribute are required)
14465 - four buffer attributes (in|out_data|text), that represent the
14466 input (to the external script) in text and data structure format,
14467 and the output from it, again in two formats
14468 - the result variables from the script (success, info, nodes) for
14472 # pylint: disable=R0902
14473 # lots of instance attributes
14475 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14477 self.rpc = rpc_runner
14478 # init buffer variables
14479 self.in_text = self.out_text = self.in_data = self.out_data = None
14480 # init all input fields so that pylint is happy
14482 self.memory = self.disks = self.disk_template = None
14483 self.os = self.tags = self.nics = self.vcpus = None
14484 self.hypervisor = None
14485 self.relocate_from = None
14487 self.instances = None
14488 self.evac_mode = None
14489 self.target_groups = []
14491 self.required_nodes = None
14492 # init result fields
14493 self.success = self.info = self.result = None
14496 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14498 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14499 " IAllocator" % self.mode)
14501 keyset = [n for (n, _) in keydata]
14504 if key not in keyset:
14505 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14506 " IAllocator" % key)
14507 setattr(self, key, kwargs[key])
14510 if key not in kwargs:
14511 raise errors.ProgrammerError("Missing input parameter '%s' to"
14512 " IAllocator" % key)
14513 self._BuildInputData(compat.partial(fn, self), keydata)
14515 def _ComputeClusterData(self):
14516 """Compute the generic allocator input data.
14518 This is the data that is independent of the actual operation.
14522 cluster_info = cfg.GetClusterInfo()
14525 "version": constants.IALLOCATOR_VERSION,
14526 "cluster_name": cfg.GetClusterName(),
14527 "cluster_tags": list(cluster_info.GetTags()),
14528 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14529 "ipolicy": cluster_info.ipolicy,
14531 ninfo = cfg.GetAllNodesInfo()
14532 iinfo = cfg.GetAllInstancesInfo().values()
14533 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14536 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14538 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14539 hypervisor_name = self.hypervisor
14540 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14541 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14543 hypervisor_name = cluster_info.primary_hypervisor
14545 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14548 self.rpc.call_all_instances_info(node_list,
14549 cluster_info.enabled_hypervisors)
14551 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14553 config_ndata = self._ComputeBasicNodeData(ninfo)
14554 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14555 i_list, config_ndata)
14556 assert len(data["nodes"]) == len(ninfo), \
14557 "Incomplete node data computed"
14559 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14561 self.in_data = data
14564 def _ComputeNodeGroupData(cfg):
14565 """Compute node groups data.
14568 cluster = cfg.GetClusterInfo()
14569 ng = dict((guuid, {
14570 "name": gdata.name,
14571 "alloc_policy": gdata.alloc_policy,
14572 "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14574 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14579 def _ComputeBasicNodeData(node_cfg):
14580 """Compute global node data.
14583 @returns: a dict of name: (node dict, node config)
14586 # fill in static (config-based) values
14587 node_results = dict((ninfo.name, {
14588 "tags": list(ninfo.GetTags()),
14589 "primary_ip": ninfo.primary_ip,
14590 "secondary_ip": ninfo.secondary_ip,
14591 "offline": ninfo.offline,
14592 "drained": ninfo.drained,
14593 "master_candidate": ninfo.master_candidate,
14594 "group": ninfo.group,
14595 "master_capable": ninfo.master_capable,
14596 "vm_capable": ninfo.vm_capable,
14598 for ninfo in node_cfg.values())
14600 return node_results
14603 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14605 """Compute global node data.
14607 @param node_results: the basic node structures as filled from the config
14610 #TODO(dynmem): compute the right data on MAX and MIN memory
14611 # make a copy of the current dict
14612 node_results = dict(node_results)
14613 for nname, nresult in node_data.items():
14614 assert nname in node_results, "Missing basic data for node %s" % nname
14615 ninfo = node_cfg[nname]
14617 if not (ninfo.offline or ninfo.drained):
14618 nresult.Raise("Can't get data for node %s" % nname)
14619 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14621 remote_info = _MakeLegacyNodeInfo(nresult.payload)
14623 for attr in ["memory_total", "memory_free", "memory_dom0",
14624 "vg_size", "vg_free", "cpu_total"]:
14625 if attr not in remote_info:
14626 raise errors.OpExecError("Node '%s' didn't return attribute"
14627 " '%s'" % (nname, attr))
14628 if not isinstance(remote_info[attr], int):
14629 raise errors.OpExecError("Node '%s' returned invalid value"
14631 (nname, attr, remote_info[attr]))
14632 # compute memory used by primary instances
14633 i_p_mem = i_p_up_mem = 0
14634 for iinfo, beinfo in i_list:
14635 if iinfo.primary_node == nname:
14636 i_p_mem += beinfo[constants.BE_MAXMEM]
14637 if iinfo.name not in node_iinfo[nname].payload:
14640 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14641 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14642 remote_info["memory_free"] -= max(0, i_mem_diff)
14644 if iinfo.admin_state == constants.ADMINST_UP:
14645 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14647 # compute memory used by instances
14649 "total_memory": remote_info["memory_total"],
14650 "reserved_memory": remote_info["memory_dom0"],
14651 "free_memory": remote_info["memory_free"],
14652 "total_disk": remote_info["vg_size"],
14653 "free_disk": remote_info["vg_free"],
14654 "total_cpus": remote_info["cpu_total"],
14655 "i_pri_memory": i_p_mem,
14656 "i_pri_up_memory": i_p_up_mem,
14658 pnr_dyn.update(node_results[nname])
14659 node_results[nname] = pnr_dyn
14661 return node_results
14664 def _ComputeInstanceData(cluster_info, i_list):
14665 """Compute global instance data.
14669 for iinfo, beinfo in i_list:
14671 for nic in iinfo.nics:
14672 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14676 "mode": filled_params[constants.NIC_MODE],
14677 "link": filled_params[constants.NIC_LINK],
14679 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14680 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14681 nic_data.append(nic_dict)
14683 "tags": list(iinfo.GetTags()),
14684 "admin_state": iinfo.admin_state,
14685 "vcpus": beinfo[constants.BE_VCPUS],
14686 "memory": beinfo[constants.BE_MAXMEM],
14688 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14690 "disks": [{constants.IDISK_SIZE: dsk.size,
14691 constants.IDISK_MODE: dsk.mode}
14692 for dsk in iinfo.disks],
14693 "disk_template": iinfo.disk_template,
14694 "hypervisor": iinfo.hypervisor,
14696 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14698 instance_data[iinfo.name] = pir
14700 return instance_data
14702 def _AddNewInstance(self):
14703 """Add new instance data to allocator structure.
14705 This in combination with _AllocatorGetClusterData will create the
14706 correct structure needed as input for the allocator.
14708 The checks for the completeness of the opcode must have already been
14712 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14714 if self.disk_template in constants.DTS_INT_MIRROR:
14715 self.required_nodes = 2
14717 self.required_nodes = 1
14721 "disk_template": self.disk_template,
14724 "vcpus": self.vcpus,
14725 "memory": self.memory,
14726 "disks": self.disks,
14727 "disk_space_total": disk_space,
14729 "required_nodes": self.required_nodes,
14730 "hypervisor": self.hypervisor,
14735 def _AddRelocateInstance(self):
14736 """Add relocate instance data to allocator structure.
14738 This in combination with _IAllocatorGetClusterData will create the
14739 correct structure needed as input for the allocator.
14741 The checks for the completeness of the opcode must have already been
14745 instance = self.cfg.GetInstanceInfo(self.name)
14746 if instance is None:
14747 raise errors.ProgrammerError("Unknown instance '%s' passed to"
14748 " IAllocator" % self.name)
14750 if instance.disk_template not in constants.DTS_MIRRORED:
14751 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14752 errors.ECODE_INVAL)
14754 if instance.disk_template in constants.DTS_INT_MIRROR and \
14755 len(instance.secondary_nodes) != 1:
14756 raise errors.OpPrereqError("Instance has not exactly one secondary node",
14757 errors.ECODE_STATE)
14759 self.required_nodes = 1
14760 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14761 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14765 "disk_space_total": disk_space,
14766 "required_nodes": self.required_nodes,
14767 "relocate_from": self.relocate_from,
14771 def _AddNodeEvacuate(self):
14772 """Get data for node-evacuate requests.
14776 "instances": self.instances,
14777 "evac_mode": self.evac_mode,
14780 def _AddChangeGroup(self):
14781 """Get data for node-evacuate requests.
14785 "instances": self.instances,
14786 "target_groups": self.target_groups,
14789 def _BuildInputData(self, fn, keydata):
14790 """Build input data structures.
14793 self._ComputeClusterData()
14796 request["type"] = self.mode
14797 for keyname, keytype in keydata:
14798 if keyname not in request:
14799 raise errors.ProgrammerError("Request parameter %s is missing" %
14801 val = request[keyname]
14802 if not keytype(val):
14803 raise errors.ProgrammerError("Request parameter %s doesn't pass"
14804 " validation, value %s, expected"
14805 " type %s" % (keyname, val, keytype))
14806 self.in_data["request"] = request
14808 self.in_text = serializer.Dump(self.in_data)
14810 _STRING_LIST = ht.TListOf(ht.TString)
14811 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14812 # pylint: disable=E1101
14813 # Class '...' has no 'OP_ID' member
14814 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14815 opcodes.OpInstanceMigrate.OP_ID,
14816 opcodes.OpInstanceReplaceDisks.OP_ID])
14820 ht.TListOf(ht.TAnd(ht.TIsLength(3),
14821 ht.TItems([ht.TNonEmptyString,
14822 ht.TNonEmptyString,
14823 ht.TListOf(ht.TNonEmptyString),
14826 ht.TListOf(ht.TAnd(ht.TIsLength(2),
14827 ht.TItems([ht.TNonEmptyString,
14830 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14831 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14834 constants.IALLOCATOR_MODE_ALLOC:
14837 ("name", ht.TString),
14838 ("memory", ht.TInt),
14839 ("disks", ht.TListOf(ht.TDict)),
14840 ("disk_template", ht.TString),
14841 ("os", ht.TString),
14842 ("tags", _STRING_LIST),
14843 ("nics", ht.TListOf(ht.TDict)),
14844 ("vcpus", ht.TInt),
14845 ("hypervisor", ht.TString),
14847 constants.IALLOCATOR_MODE_RELOC:
14848 (_AddRelocateInstance,
14849 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14851 constants.IALLOCATOR_MODE_NODE_EVAC:
14852 (_AddNodeEvacuate, [
14853 ("instances", _STRING_LIST),
14854 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14856 constants.IALLOCATOR_MODE_CHG_GROUP:
14857 (_AddChangeGroup, [
14858 ("instances", _STRING_LIST),
14859 ("target_groups", _STRING_LIST),
14863 def Run(self, name, validate=True, call_fn=None):
14864 """Run an instance allocator and return the results.
14867 if call_fn is None:
14868 call_fn = self.rpc.call_iallocator_runner
14870 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14871 result.Raise("Failure while running the iallocator script")
14873 self.out_text = result.payload
14875 self._ValidateResult()
14877 def _ValidateResult(self):
14878 """Process the allocator results.
14880 This will process and if successful save the result in
14881 self.out_data and the other parameters.
14885 rdict = serializer.Load(self.out_text)
14886 except Exception, err:
14887 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14889 if not isinstance(rdict, dict):
14890 raise errors.OpExecError("Can't parse iallocator results: not a dict")
14892 # TODO: remove backwards compatiblity in later versions
14893 if "nodes" in rdict and "result" not in rdict:
14894 rdict["result"] = rdict["nodes"]
14897 for key in "success", "info", "result":
14898 if key not in rdict:
14899 raise errors.OpExecError("Can't parse iallocator results:"
14900 " missing key '%s'" % key)
14901 setattr(self, key, rdict[key])
14903 if not self._result_check(self.result):
14904 raise errors.OpExecError("Iallocator returned invalid result,"
14905 " expected %s, got %s" %
14906 (self._result_check, self.result),
14907 errors.ECODE_INVAL)
14909 if self.mode == constants.IALLOCATOR_MODE_RELOC:
14910 assert self.relocate_from is not None
14911 assert self.required_nodes == 1
14913 node2group = dict((name, ndata["group"])
14914 for (name, ndata) in self.in_data["nodes"].items())
14916 fn = compat.partial(self._NodesToGroups, node2group,
14917 self.in_data["nodegroups"])
14919 instance = self.cfg.GetInstanceInfo(self.name)
14920 request_groups = fn(self.relocate_from + [instance.primary_node])
14921 result_groups = fn(rdict["result"] + [instance.primary_node])
14923 if self.success and not set(result_groups).issubset(request_groups):
14924 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14925 " differ from original groups (%s)" %
14926 (utils.CommaJoin(result_groups),
14927 utils.CommaJoin(request_groups)))
14929 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14930 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14932 self.out_data = rdict
14935 def _NodesToGroups(node2group, groups, nodes):
14936 """Returns a list of unique group names for a list of nodes.
14938 @type node2group: dict
14939 @param node2group: Map from node name to group UUID
14941 @param groups: Group information
14943 @param nodes: Node names
14950 group_uuid = node2group[node]
14952 # Ignore unknown node
14956 group = groups[group_uuid]
14958 # Can't find group, let's use UUID
14959 group_name = group_uuid
14961 group_name = group["name"]
14963 result.add(group_name)
14965 return sorted(result)
14968 class LUTestAllocator(NoHooksLU):
14969 """Run allocator tests.
14971 This LU runs the allocator tests
14974 def CheckPrereq(self):
14975 """Check prerequisites.
14977 This checks the opcode parameters depending on the director and mode test.
14980 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14981 for attr in ["memory", "disks", "disk_template",
14982 "os", "tags", "nics", "vcpus"]:
14983 if not hasattr(self.op, attr):
14984 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14985 attr, errors.ECODE_INVAL)
14986 iname = self.cfg.ExpandInstanceName(self.op.name)
14987 if iname is not None:
14988 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14989 iname, errors.ECODE_EXISTS)
14990 if not isinstance(self.op.nics, list):
14991 raise errors.OpPrereqError("Invalid parameter 'nics'",
14992 errors.ECODE_INVAL)
14993 if not isinstance(self.op.disks, list):
14994 raise errors.OpPrereqError("Invalid parameter 'disks'",
14995 errors.ECODE_INVAL)
14996 for row in self.op.disks:
14997 if (not isinstance(row, dict) or
14998 constants.IDISK_SIZE not in row or
14999 not isinstance(row[constants.IDISK_SIZE], int) or
15000 constants.IDISK_MODE not in row or
15001 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15002 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15003 " parameter", errors.ECODE_INVAL)
15004 if self.op.hypervisor is None:
15005 self.op.hypervisor = self.cfg.GetHypervisorType()
15006 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15007 fname = _ExpandInstanceName(self.cfg, self.op.name)
15008 self.op.name = fname
15009 self.relocate_from = \
15010 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15011 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15012 constants.IALLOCATOR_MODE_NODE_EVAC):
15013 if not self.op.instances:
15014 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15015 self.op.instances = _GetWantedInstances(self, self.op.instances)
15017 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15018 self.op.mode, errors.ECODE_INVAL)
15020 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15021 if self.op.allocator is None:
15022 raise errors.OpPrereqError("Missing allocator name",
15023 errors.ECODE_INVAL)
15024 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15025 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15026 self.op.direction, errors.ECODE_INVAL)
15028 def Exec(self, feedback_fn):
15029 """Run the allocator test.
15032 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15033 ial = IAllocator(self.cfg, self.rpc,
15036 memory=self.op.memory,
15037 disks=self.op.disks,
15038 disk_template=self.op.disk_template,
15042 vcpus=self.op.vcpus,
15043 hypervisor=self.op.hypervisor,
15045 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15046 ial = IAllocator(self.cfg, self.rpc,
15049 relocate_from=list(self.relocate_from),
15051 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15052 ial = IAllocator(self.cfg, self.rpc,
15054 instances=self.op.instances,
15055 target_groups=self.op.target_groups)
15056 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15057 ial = IAllocator(self.cfg, self.rpc,
15059 instances=self.op.instances,
15060 evac_mode=self.op.evac_mode)
15062 raise errors.ProgrammerError("Uncatched mode %s in"
15063 " LUTestAllocator.Exec", self.op.mode)
15065 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15066 result = ial.in_text
15068 ial.Run(self.op.allocator, validate=False)
15069 result = ial.out_text
15073 #: Query type implementations
15075 constants.QR_INSTANCE: _InstanceQuery,
15076 constants.QR_NODE: _NodeQuery,
15077 constants.QR_GROUP: _GroupQuery,
15078 constants.QR_OS: _OsQuery,
15081 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15084 def _GetQueryImplementation(name):
15085 """Returns the implemtnation for a query type.
15087 @param name: Query type, must be one of L{constants.QR_VIA_OP}
15091 return _QUERY_IMPL[name]
15093 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15094 errors.ECODE_INVAL)