4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
62 from ganeti import rpc
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
141 # Dicts used to declare locking needs to mcpu
142 self.needed_locks = None
143 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.remove_locks = {}
146 # Used to force good behavior when calling helper functions
147 self.recalculate_locks = {}
149 self.Log = processor.Log # pylint: disable=C0103
150 self.LogWarning = processor.LogWarning # pylint: disable=C0103
151 self.LogInfo = processor.LogInfo # pylint: disable=C0103
152 self.LogStep = processor.LogStep # pylint: disable=C0103
153 # support for dry-run
154 self.dry_run_result = None
155 # support for generic debug attribute
156 if (not hasattr(self.op, "debug_level") or
157 not isinstance(self.op.debug_level, int)):
158 self.op.debug_level = 0
163 # Validate opcode parameters and set defaults
164 self.op.Validate(True)
166 self.CheckArguments()
168 def CheckArguments(self):
169 """Check syntactic validity for the opcode arguments.
171 This method is for doing a simple syntactic check and ensure
172 validity of opcode parameters, without any cluster-related
173 checks. While the same can be accomplished in ExpandNames and/or
174 CheckPrereq, doing these separate is better because:
176 - ExpandNames is left as as purely a lock-related function
177 - CheckPrereq is run after we have acquired locks (and possible
180 The function is allowed to change the self.op attribute so that
181 later methods can no longer worry about missing parameters.
186 def ExpandNames(self):
187 """Expand names for this LU.
189 This method is called before starting to execute the opcode, and it should
190 update all the parameters of the opcode to their canonical form (e.g. a
191 short node name must be fully expanded after this method has successfully
192 completed). This way locking, hooks, logging, etc. can work correctly.
194 LUs which implement this method must also populate the self.needed_locks
195 member, as a dict with lock levels as keys, and a list of needed lock names
198 - use an empty dict if you don't need any lock
199 - if you don't need any lock at a particular level omit that level
200 - don't put anything for the BGL level
201 - if you want all locks at a level use locking.ALL_SET as a value
203 If you need to share locks (rather than acquire them exclusively) at one
204 level you can modify self.share_locks, setting a true value (usually 1) for
205 that level. By default locks are not shared.
207 This function can also define a list of tasklets, which then will be
208 executed in order instead of the usual LU-level CheckPrereq and Exec
209 functions, if those are not defined by the LU.
213 # Acquire all nodes and one instance
214 self.needed_locks = {
215 locking.LEVEL_NODE: locking.ALL_SET,
216 locking.LEVEL_INSTANCE: ['instance1.example.com'],
218 # Acquire just two nodes
219 self.needed_locks = {
220 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
223 self.needed_locks = {} # No, you can't leave it to the default value None
226 # The implementation of this method is mandatory only if the new LU is
227 # concurrent, so that old LUs don't need to be changed all at the same
230 self.needed_locks = {} # Exclusive LUs don't need locks.
232 raise NotImplementedError
234 def DeclareLocks(self, level):
235 """Declare LU locking needs for a level
237 While most LUs can just declare their locking needs at ExpandNames time,
238 sometimes there's the need to calculate some locks after having acquired
239 the ones before. This function is called just before acquiring locks at a
240 particular level, but after acquiring the ones at lower levels, and permits
241 such calculations. It can be used to modify self.needed_locks, and by
242 default it does nothing.
244 This function is only called if you have something already set in
245 self.needed_locks for the level.
247 @param level: Locking level which is going to be locked
248 @type level: member of ganeti.locking.LEVELS
252 def CheckPrereq(self):
253 """Check prerequisites for this LU.
255 This method should check that the prerequisites for the execution
256 of this LU are fulfilled. It can do internode communication, but
257 it should be idempotent - no cluster or system changes are
260 The method should raise errors.OpPrereqError in case something is
261 not fulfilled. Its return value is ignored.
263 This method should also update all the parameters of the opcode to
264 their canonical form if it hasn't been done by ExpandNames before.
267 if self.tasklets is not None:
268 for (idx, tl) in enumerate(self.tasklets):
269 logging.debug("Checking prerequisites for tasklet %s/%s",
270 idx + 1, len(self.tasklets))
275 def Exec(self, feedback_fn):
278 This method should implement the actual work. It should raise
279 errors.OpExecError for failures that are somewhat dealt with in
283 if self.tasklets is not None:
284 for (idx, tl) in enumerate(self.tasklets):
285 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
288 raise NotImplementedError
290 def BuildHooksEnv(self):
291 """Build hooks environment for this LU.
294 @return: Dictionary containing the environment that will be used for
295 running the hooks for this LU. The keys of the dict must not be prefixed
296 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
297 will extend the environment with additional variables. If no environment
298 should be defined, an empty dictionary should be returned (not C{None}).
299 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
303 raise NotImplementedError
305 def BuildHooksNodes(self):
306 """Build list of nodes to run LU's hooks.
308 @rtype: tuple; (list, list)
309 @return: Tuple containing a list of node names on which the hook
310 should run before the execution and a list of node names on which the
311 hook should run after the execution. No nodes should be returned as an
312 empty list (and not None).
313 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
317 raise NotImplementedError
319 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
320 """Notify the LU about the results of its hooks.
322 This method is called every time a hooks phase is executed, and notifies
323 the Logical Unit about the hooks' result. The LU can then use it to alter
324 its result based on the hooks. By default the method does nothing and the
325 previous result is passed back unchanged but any LU can define it if it
326 wants to use the local cluster hook-scripts somehow.
328 @param phase: one of L{constants.HOOKS_PHASE_POST} or
329 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
330 @param hook_results: the results of the multi-node hooks rpc call
331 @param feedback_fn: function used send feedback back to the caller
332 @param lu_result: the previous Exec result this LU had, or None
334 @return: the new Exec result, based on the previous result
338 # API must be kept, thus we ignore the unused argument and could
339 # be a function warnings
340 # pylint: disable=W0613,R0201
343 def _ExpandAndLockInstance(self):
344 """Helper function to expand and lock an instance.
346 Many LUs that work on an instance take its name in self.op.instance_name
347 and need to expand it and then declare the expanded name for locking. This
348 function does it, and then updates self.op.instance_name to the expanded
349 name. It also initializes needed_locks as a dict, if this hasn't been done
353 if self.needed_locks is None:
354 self.needed_locks = {}
356 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
357 "_ExpandAndLockInstance called with instance-level locks set"
358 self.op.instance_name = _ExpandInstanceName(self.cfg,
359 self.op.instance_name)
360 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
362 def _LockInstancesNodes(self, primary_only=False,
363 level=locking.LEVEL_NODE):
364 """Helper function to declare instances' nodes for locking.
366 This function should be called after locking one or more instances to lock
367 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
368 with all primary or secondary nodes for instances already locked and
369 present in self.needed_locks[locking.LEVEL_INSTANCE].
371 It should be called from DeclareLocks, and for safety only works if
372 self.recalculate_locks[locking.LEVEL_NODE] is set.
374 In the future it may grow parameters to just lock some instance's nodes, or
375 to just lock primaries or secondary nodes, if needed.
377 If should be called in DeclareLocks in a way similar to::
379 if level == locking.LEVEL_NODE:
380 self._LockInstancesNodes()
382 @type primary_only: boolean
383 @param primary_only: only lock primary nodes of locked instances
384 @param level: Which lock level to use for locking nodes
387 assert level in self.recalculate_locks, \
388 "_LockInstancesNodes helper function called with no nodes to recalculate"
390 # TODO: check if we're really been called with the instance locks held
392 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
393 # future we might want to have different behaviors depending on the value
394 # of self.recalculate_locks[locking.LEVEL_NODE]
396 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
397 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
398 wanted_nodes.append(instance.primary_node)
400 wanted_nodes.extend(instance.secondary_nodes)
402 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
403 self.needed_locks[level] = wanted_nodes
404 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
405 self.needed_locks[level].extend(wanted_nodes)
407 raise errors.ProgrammerError("Unknown recalculation mode")
409 del self.recalculate_locks[level]
412 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
413 """Simple LU which runs no hooks.
415 This LU is intended as a parent for other LogicalUnits which will
416 run no hooks, in order to reduce duplicate code.
422 def BuildHooksEnv(self):
423 """Empty BuildHooksEnv for NoHooksLu.
425 This just raises an error.
428 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
430 def BuildHooksNodes(self):
431 """Empty BuildHooksNodes for NoHooksLU.
434 raise AssertionError("BuildHooksNodes called for NoHooksLU")
438 """Tasklet base class.
440 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
441 they can mix legacy code with tasklets. Locking needs to be done in the LU,
442 tasklets know nothing about locks.
444 Subclasses must follow these rules:
445 - Implement CheckPrereq
449 def __init__(self, lu):
456 def CheckPrereq(self):
457 """Check prerequisites for this tasklets.
459 This method should check whether the prerequisites for the execution of
460 this tasklet are fulfilled. It can do internode communication, but it
461 should be idempotent - no cluster or system changes are allowed.
463 The method should raise errors.OpPrereqError in case something is not
464 fulfilled. Its return value is ignored.
466 This method should also update all parameters to their canonical form if it
467 hasn't been done before.
472 def Exec(self, feedback_fn):
473 """Execute the tasklet.
475 This method should implement the actual work. It should raise
476 errors.OpExecError for failures that are somewhat dealt with in code, or
480 raise NotImplementedError
484 """Base for query utility classes.
487 #: Attribute holding field definitions
490 def __init__(self, qfilter, fields, use_locking):
491 """Initializes this class.
494 self.use_locking = use_locking
496 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
498 self.requested_data = self.query.RequestedData()
499 self.names = self.query.RequestedNames()
501 # Sort only if no names were requested
502 self.sort_by_name = not self.names
504 self.do_locking = None
507 def _GetNames(self, lu, all_names, lock_level):
508 """Helper function to determine names asked for in the query.
512 names = lu.owned_locks(lock_level)
516 if self.wanted == locking.ALL_SET:
517 assert not self.names
518 # caller didn't specify names, so ordering is not important
519 return utils.NiceSort(names)
521 # caller specified names and we must keep the same order
523 assert not self.do_locking or lu.glm.is_owned(lock_level)
525 missing = set(self.wanted).difference(names)
527 raise errors.OpExecError("Some items were removed before retrieving"
528 " their data: %s" % missing)
530 # Return expanded names
533 def ExpandNames(self, lu):
534 """Expand names for this query.
536 See L{LogicalUnit.ExpandNames}.
539 raise NotImplementedError()
541 def DeclareLocks(self, lu, level):
542 """Declare locks for this query.
544 See L{LogicalUnit.DeclareLocks}.
547 raise NotImplementedError()
549 def _GetQueryData(self, lu):
550 """Collects all data for this query.
552 @return: Query data object
555 raise NotImplementedError()
557 def NewStyleQuery(self, lu):
558 """Collect data and execute query.
561 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
562 sort_by_name=self.sort_by_name)
564 def OldStyleQuery(self, lu):
565 """Collect data and execute query.
568 return self.query.OldStyleQuery(self._GetQueryData(lu),
569 sort_by_name=self.sort_by_name)
573 """Returns a dict declaring all lock levels shared.
576 return dict.fromkeys(locking.LEVELS, 1)
579 def _MakeLegacyNodeInfo(data):
580 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
582 Converts the data into a single dictionary. This is fine for most use cases,
583 but some require information from more than one volume group or hypervisor.
586 (bootid, (vg_info, ), (hv_info, )) = data
588 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
593 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
594 """Checks if the owned node groups are still correct for an instance.
596 @type cfg: L{config.ConfigWriter}
597 @param cfg: The cluster configuration
598 @type instance_name: string
599 @param instance_name: Instance name
600 @type owned_groups: set or frozenset
601 @param owned_groups: List of currently owned node groups
604 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
606 if not owned_groups.issuperset(inst_groups):
607 raise errors.OpPrereqError("Instance %s's node groups changed since"
608 " locks were acquired, current groups are"
609 " are '%s', owning groups '%s'; retry the"
612 utils.CommaJoin(inst_groups),
613 utils.CommaJoin(owned_groups)),
619 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
620 """Checks if the instances in a node group are still correct.
622 @type cfg: L{config.ConfigWriter}
623 @param cfg: The cluster configuration
624 @type group_uuid: string
625 @param group_uuid: Node group UUID
626 @type owned_instances: set or frozenset
627 @param owned_instances: List of currently owned instances
630 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
631 if owned_instances != wanted_instances:
632 raise errors.OpPrereqError("Instances in node group '%s' changed since"
633 " locks were acquired, wanted '%s', have '%s';"
634 " retry the operation" %
636 utils.CommaJoin(wanted_instances),
637 utils.CommaJoin(owned_instances)),
640 return wanted_instances
643 def _SupportsOob(cfg, node):
644 """Tells if node supports OOB.
646 @type cfg: L{config.ConfigWriter}
647 @param cfg: The cluster configuration
648 @type node: L{objects.Node}
649 @param node: The node
650 @return: The OOB script if supported or an empty string otherwise
653 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
656 def _GetWantedNodes(lu, nodes):
657 """Returns list of checked and expanded node names.
659 @type lu: L{LogicalUnit}
660 @param lu: the logical unit on whose behalf we execute
662 @param nodes: list of node names or None for all nodes
664 @return: the list of nodes, sorted
665 @raise errors.ProgrammerError: if the nodes parameter is wrong type
669 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
671 return utils.NiceSort(lu.cfg.GetNodeList())
674 def _GetWantedInstances(lu, instances):
675 """Returns list of checked and expanded instance names.
677 @type lu: L{LogicalUnit}
678 @param lu: the logical unit on whose behalf we execute
679 @type instances: list
680 @param instances: list of instance names or None for all instances
682 @return: the list of instances, sorted
683 @raise errors.OpPrereqError: if the instances parameter is wrong type
684 @raise errors.OpPrereqError: if any of the passed instances is not found
688 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
690 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
694 def _GetUpdatedParams(old_params, update_dict,
695 use_default=True, use_none=False):
696 """Return the new version of a parameter dictionary.
698 @type old_params: dict
699 @param old_params: old parameters
700 @type update_dict: dict
701 @param update_dict: dict containing new parameter values, or
702 constants.VALUE_DEFAULT to reset the parameter to its default
704 @param use_default: boolean
705 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
706 values as 'to be deleted' values
707 @param use_none: boolean
708 @type use_none: whether to recognise C{None} values as 'to be
711 @return: the new parameter dictionary
714 params_copy = copy.deepcopy(old_params)
715 for key, val in update_dict.iteritems():
716 if ((use_default and val == constants.VALUE_DEFAULT) or
717 (use_none and val is None)):
723 params_copy[key] = val
727 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
728 """Return the new version of a instance policy.
730 @param group_policy: whether this policy applies to a group and thus
731 we should support removal of policy entries
734 use_none = use_default = group_policy
735 ipolicy = copy.deepcopy(old_ipolicy)
736 for key, value in new_ipolicy.items():
737 if key not in constants.IPOLICY_ALL_KEYS:
738 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
740 if key in constants.IPOLICY_ISPECS:
741 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
742 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
744 use_default=use_default)
746 if not value or value == [constants.VALUE_DEFAULT]:
750 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
751 " on the cluster'" % key,
754 if key in constants.IPOLICY_PARAMETERS:
755 # FIXME: we assume all such values are float
757 ipolicy[key] = float(value)
758 except (TypeError, ValueError), err:
759 raise errors.OpPrereqError("Invalid value for attribute"
760 " '%s': '%s', error: %s" %
761 (key, value, err), errors.ECODE_INVAL)
763 # FIXME: we assume all others are lists; this should be redone
765 ipolicy[key] = list(value)
767 objects.InstancePolicy.CheckParameterSyntax(ipolicy)
768 except errors.ConfigurationError, err:
769 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
774 def _UpdateAndVerifySubDict(base, updates, type_check):
775 """Updates and verifies a dict with sub dicts of the same type.
777 @param base: The dict with the old data
778 @param updates: The dict with the new data
779 @param type_check: Dict suitable to ForceDictType to verify correct types
780 @returns: A new dict with updated and verified values
784 new = _GetUpdatedParams(old, value)
785 utils.ForceDictType(new, type_check)
788 ret = copy.deepcopy(base)
789 ret.update(dict((key, fn(base.get(key, {}), value))
790 for key, value in updates.items()))
794 def _MergeAndVerifyHvState(op_input, obj_input):
795 """Combines the hv state from an opcode with the one of the object
797 @param op_input: The input dict from the opcode
798 @param obj_input: The input dict from the objects
799 @return: The verified and updated dict
803 invalid_hvs = set(op_input) - constants.HYPER_TYPES
805 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
806 " %s" % utils.CommaJoin(invalid_hvs),
808 if obj_input is None:
810 type_check = constants.HVSTS_PARAMETER_TYPES
811 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
816 def _MergeAndVerifyDiskState(op_input, obj_input):
817 """Combines the disk state from an opcode with the one of the object
819 @param op_input: The input dict from the opcode
820 @param obj_input: The input dict from the objects
821 @return: The verified and updated dict
824 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
826 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
827 utils.CommaJoin(invalid_dst),
829 type_check = constants.DSS_PARAMETER_TYPES
830 if obj_input is None:
832 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
834 for key, value in op_input.items())
839 def _ReleaseLocks(lu, level, names=None, keep=None):
840 """Releases locks owned by an LU.
842 @type lu: L{LogicalUnit}
843 @param level: Lock level
844 @type names: list or None
845 @param names: Names of locks to release
846 @type keep: list or None
847 @param keep: Names of locks to retain
850 assert not (keep is not None and names is not None), \
851 "Only one of the 'names' and the 'keep' parameters can be given"
853 if names is not None:
854 should_release = names.__contains__
856 should_release = lambda name: name not in keep
858 should_release = None
860 owned = lu.owned_locks(level)
862 # Not owning any lock at this level, do nothing
869 # Determine which locks to release
871 if should_release(name):
876 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
878 # Release just some locks
879 lu.glm.release(level, names=release)
881 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
884 lu.glm.release(level)
886 assert not lu.glm.is_owned(level), "No locks should be owned"
889 def _MapInstanceDisksToNodes(instances):
890 """Creates a map from (node, volume) to instance name.
892 @type instances: list of L{objects.Instance}
893 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
896 return dict(((node, vol), inst.name)
897 for inst in instances
898 for (node, vols) in inst.MapLVsByNode().items()
902 def _RunPostHook(lu, node_name):
903 """Runs the post-hook for an opcode on a single node.
906 hm = lu.proc.BuildHooksManager(lu)
908 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
910 # pylint: disable=W0702
911 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
914 def _CheckOutputFields(static, dynamic, selected):
915 """Checks whether all selected fields are valid.
917 @type static: L{utils.FieldSet}
918 @param static: static fields set
919 @type dynamic: L{utils.FieldSet}
920 @param dynamic: dynamic fields set
927 delta = f.NonMatching(selected)
929 raise errors.OpPrereqError("Unknown output fields selected: %s"
930 % ",".join(delta), errors.ECODE_INVAL)
933 def _CheckGlobalHvParams(params):
934 """Validates that given hypervisor params are not global ones.
936 This will ensure that instances don't get customised versions of
940 used_globals = constants.HVC_GLOBALS.intersection(params)
942 msg = ("The following hypervisor parameters are global and cannot"
943 " be customized at instance level, please modify them at"
944 " cluster level: %s" % utils.CommaJoin(used_globals))
945 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
948 def _CheckNodeOnline(lu, node, msg=None):
949 """Ensure that a given node is online.
951 @param lu: the LU on behalf of which we make the check
952 @param node: the node to check
953 @param msg: if passed, should be a message to replace the default one
954 @raise errors.OpPrereqError: if the node is offline
958 msg = "Can't use offline node"
959 if lu.cfg.GetNodeInfo(node).offline:
960 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
963 def _CheckNodeNotDrained(lu, node):
964 """Ensure that a given node is not drained.
966 @param lu: the LU on behalf of which we make the check
967 @param node: the node to check
968 @raise errors.OpPrereqError: if the node is drained
971 if lu.cfg.GetNodeInfo(node).drained:
972 raise errors.OpPrereqError("Can't use drained node %s" % node,
976 def _CheckNodeVmCapable(lu, node):
977 """Ensure that a given node is vm capable.
979 @param lu: the LU on behalf of which we make the check
980 @param node: the node to check
981 @raise errors.OpPrereqError: if the node is not vm capable
984 if not lu.cfg.GetNodeInfo(node).vm_capable:
985 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
989 def _CheckNodeHasOS(lu, node, os_name, force_variant):
990 """Ensure that a node supports a given OS.
992 @param lu: the LU on behalf of which we make the check
993 @param node: the node to check
994 @param os_name: the OS to query about
995 @param force_variant: whether to ignore variant errors
996 @raise errors.OpPrereqError: if the node is not supporting the OS
999 result = lu.rpc.call_os_get(node, os_name)
1000 result.Raise("OS '%s' not in supported OS list for node %s" %
1002 prereq=True, ecode=errors.ECODE_INVAL)
1003 if not force_variant:
1004 _CheckOSVariant(result.payload, os_name)
1007 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1008 """Ensure that a node has the given secondary ip.
1010 @type lu: L{LogicalUnit}
1011 @param lu: the LU on behalf of which we make the check
1013 @param node: the node to check
1014 @type secondary_ip: string
1015 @param secondary_ip: the ip to check
1016 @type prereq: boolean
1017 @param prereq: whether to throw a prerequisite or an execute error
1018 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1019 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1022 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1023 result.Raise("Failure checking secondary ip on node %s" % node,
1024 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1025 if not result.payload:
1026 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1027 " please fix and re-run this command" % secondary_ip)
1029 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1031 raise errors.OpExecError(msg)
1034 def _GetClusterDomainSecret():
1035 """Reads the cluster domain secret.
1038 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1042 def _CheckInstanceState(lu, instance, req_states, msg=None):
1043 """Ensure that an instance is in one of the required states.
1045 @param lu: the LU on behalf of which we make the check
1046 @param instance: the instance to check
1047 @param msg: if passed, should be a message to replace the default one
1048 @raise errors.OpPrereqError: if the instance is not in the required state
1052 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1053 if instance.admin_state not in req_states:
1054 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1055 (instance.name, instance.admin_state, msg),
1058 if constants.ADMINST_UP not in req_states:
1059 pnode = instance.primary_node
1060 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1061 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1062 prereq=True, ecode=errors.ECODE_ENVIRON)
1064 if instance.name in ins_l.payload:
1065 raise errors.OpPrereqError("Instance %s is running, %s" %
1066 (instance.name, msg), errors.ECODE_STATE)
1069 def _ComputeMinMaxSpec(name, ipolicy, value):
1070 """Computes if value is in the desired range.
1072 @param name: name of the parameter for which we perform the check
1073 @param ipolicy: dictionary containing min, max and std values
1074 @param value: actual value that we want to use
1075 @return: None or element not meeting the criteria
1079 if value in [None, constants.VALUE_AUTO]:
1081 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1082 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1083 if value > max_v or min_v > value:
1084 return ("%s value %s is not in range [%s, %s]" %
1085 (name, value, min_v, max_v))
1089 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1090 nic_count, disk_sizes,
1091 _compute_fn=_ComputeMinMaxSpec):
1092 """Verifies ipolicy against provided specs.
1095 @param ipolicy: The ipolicy
1097 @param mem_size: The memory size
1098 @type cpu_count: int
1099 @param cpu_count: Used cpu cores
1100 @type disk_count: int
1101 @param disk_count: Number of disks used
1102 @type nic_count: int
1103 @param nic_count: Number of nics used
1104 @type disk_sizes: list of ints
1105 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1106 @param _compute_fn: The compute function (unittest only)
1107 @return: A list of violations, or an empty list of no violations are found
1110 assert disk_count == len(disk_sizes)
1113 (constants.ISPEC_MEM_SIZE, mem_size),
1114 (constants.ISPEC_CPU_COUNT, cpu_count),
1115 (constants.ISPEC_DISK_COUNT, disk_count),
1116 (constants.ISPEC_NIC_COUNT, nic_count),
1117 ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
1120 (_compute_fn(name, ipolicy, value)
1121 for (name, value) in test_settings))
1124 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1125 _compute_fn=_ComputeIPolicySpecViolation):
1126 """Compute if instance meets the specs of ipolicy.
1129 @param ipolicy: The ipolicy to verify against
1130 @type instance: L{objects.Instance}
1131 @param instance: The instance to verify
1132 @param _compute_fn: The function to verify ipolicy (unittest only)
1133 @see: L{_ComputeIPolicySpecViolation}
1136 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1137 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1138 disk_count = len(instance.disks)
1139 disk_sizes = [disk.size for disk in instance.disks]
1140 nic_count = len(instance.nics)
1142 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1146 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1147 _compute_fn=_ComputeIPolicySpecViolation):
1148 """Compute if instance specs meets the specs of ipolicy.
1151 @param ipolicy: The ipolicy to verify against
1152 @param instance_spec: dict
1153 @param instance_spec: The instance spec to verify
1154 @param _compute_fn: The function to verify ipolicy (unittest only)
1155 @see: L{_ComputeIPolicySpecViolation}
1158 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1159 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1160 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1161 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1162 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1164 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1168 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1170 _compute_fn=_ComputeIPolicyInstanceViolation):
1171 """Compute if instance meets the specs of the new target group.
1173 @param ipolicy: The ipolicy to verify
1174 @param instance: The instance object to verify
1175 @param current_group: The current group of the instance
1176 @param target_group: The new group of the instance
1177 @param _compute_fn: The function to verify ipolicy (unittest only)
1178 @see: L{_ComputeIPolicySpecViolation}
1181 if current_group == target_group:
1184 return _compute_fn(ipolicy, instance)
1187 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1188 _compute_fn=_ComputeIPolicyNodeViolation):
1189 """Checks that the target node is correct in terms of instance policy.
1191 @param ipolicy: The ipolicy to verify
1192 @param instance: The instance object to verify
1193 @param node: The new node to relocate
1194 @param ignore: Ignore violations of the ipolicy
1195 @param _compute_fn: The function to verify ipolicy (unittest only)
1196 @see: L{_ComputeIPolicySpecViolation}
1199 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1200 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1203 msg = ("Instance does not meet target node group's (%s) instance"
1204 " policy: %s") % (node.group, utils.CommaJoin(res))
1208 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1211 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1212 """Computes a set of any instances that would violate the new ipolicy.
1214 @param old_ipolicy: The current (still in-place) ipolicy
1215 @param new_ipolicy: The new (to become) ipolicy
1216 @param instances: List of instances to verify
1217 @return: A list of instances which violates the new ipolicy but did not before
1220 return (_ComputeViolatingInstances(old_ipolicy, instances) -
1221 _ComputeViolatingInstances(new_ipolicy, instances))
1224 def _ExpandItemName(fn, name, kind):
1225 """Expand an item name.
1227 @param fn: the function to use for expansion
1228 @param name: requested item name
1229 @param kind: text description ('Node' or 'Instance')
1230 @return: the resolved (full) name
1231 @raise errors.OpPrereqError: if the item is not found
1234 full_name = fn(name)
1235 if full_name is None:
1236 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1241 def _ExpandNodeName(cfg, name):
1242 """Wrapper over L{_ExpandItemName} for nodes."""
1243 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1246 def _ExpandInstanceName(cfg, name):
1247 """Wrapper over L{_ExpandItemName} for instance."""
1248 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1251 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1252 minmem, maxmem, vcpus, nics, disk_template, disks,
1253 bep, hvp, hypervisor_name, tags):
1254 """Builds instance related env variables for hooks
1256 This builds the hook environment from individual variables.
1259 @param name: the name of the instance
1260 @type primary_node: string
1261 @param primary_node: the name of the instance's primary node
1262 @type secondary_nodes: list
1263 @param secondary_nodes: list of secondary nodes as strings
1264 @type os_type: string
1265 @param os_type: the name of the instance's OS
1266 @type status: string
1267 @param status: the desired status of the instance
1268 @type minmem: string
1269 @param minmem: the minimum memory size of the instance
1270 @type maxmem: string
1271 @param maxmem: the maximum memory size of the instance
1273 @param vcpus: the count of VCPUs the instance has
1275 @param nics: list of tuples (ip, mac, mode, link) representing
1276 the NICs the instance has
1277 @type disk_template: string
1278 @param disk_template: the disk template of the instance
1280 @param disks: the list of (size, mode) pairs
1282 @param bep: the backend parameters for the instance
1284 @param hvp: the hypervisor parameters for the instance
1285 @type hypervisor_name: string
1286 @param hypervisor_name: the hypervisor for the instance
1288 @param tags: list of instance tags as strings
1290 @return: the hook environment for this instance
1295 "INSTANCE_NAME": name,
1296 "INSTANCE_PRIMARY": primary_node,
1297 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1298 "INSTANCE_OS_TYPE": os_type,
1299 "INSTANCE_STATUS": status,
1300 "INSTANCE_MINMEM": minmem,
1301 "INSTANCE_MAXMEM": maxmem,
1302 # TODO(2.7) remove deprecated "memory" value
1303 "INSTANCE_MEMORY": maxmem,
1304 "INSTANCE_VCPUS": vcpus,
1305 "INSTANCE_DISK_TEMPLATE": disk_template,
1306 "INSTANCE_HYPERVISOR": hypervisor_name,
1309 nic_count = len(nics)
1310 for idx, (ip, mac, mode, link) in enumerate(nics):
1313 env["INSTANCE_NIC%d_IP" % idx] = ip
1314 env["INSTANCE_NIC%d_MAC" % idx] = mac
1315 env["INSTANCE_NIC%d_MODE" % idx] = mode
1316 env["INSTANCE_NIC%d_LINK" % idx] = link
1317 if mode == constants.NIC_MODE_BRIDGED:
1318 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1322 env["INSTANCE_NIC_COUNT"] = nic_count
1325 disk_count = len(disks)
1326 for idx, (size, mode) in enumerate(disks):
1327 env["INSTANCE_DISK%d_SIZE" % idx] = size
1328 env["INSTANCE_DISK%d_MODE" % idx] = mode
1332 env["INSTANCE_DISK_COUNT"] = disk_count
1337 env["INSTANCE_TAGS"] = " ".join(tags)
1339 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1340 for key, value in source.items():
1341 env["INSTANCE_%s_%s" % (kind, key)] = value
1346 def _NICListToTuple(lu, nics):
1347 """Build a list of nic information tuples.
1349 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1350 value in LUInstanceQueryData.
1352 @type lu: L{LogicalUnit}
1353 @param lu: the logical unit on whose behalf we execute
1354 @type nics: list of L{objects.NIC}
1355 @param nics: list of nics to convert to hooks tuples
1359 cluster = lu.cfg.GetClusterInfo()
1363 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1364 mode = filled_params[constants.NIC_MODE]
1365 link = filled_params[constants.NIC_LINK]
1366 hooks_nics.append((ip, mac, mode, link))
1370 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1371 """Builds instance related env variables for hooks from an object.
1373 @type lu: L{LogicalUnit}
1374 @param lu: the logical unit on whose behalf we execute
1375 @type instance: L{objects.Instance}
1376 @param instance: the instance for which we should build the
1378 @type override: dict
1379 @param override: dictionary with key/values that will override
1382 @return: the hook environment dictionary
1385 cluster = lu.cfg.GetClusterInfo()
1386 bep = cluster.FillBE(instance)
1387 hvp = cluster.FillHV(instance)
1389 "name": instance.name,
1390 "primary_node": instance.primary_node,
1391 "secondary_nodes": instance.secondary_nodes,
1392 "os_type": instance.os,
1393 "status": instance.admin_state,
1394 "maxmem": bep[constants.BE_MAXMEM],
1395 "minmem": bep[constants.BE_MINMEM],
1396 "vcpus": bep[constants.BE_VCPUS],
1397 "nics": _NICListToTuple(lu, instance.nics),
1398 "disk_template": instance.disk_template,
1399 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1402 "hypervisor_name": instance.hypervisor,
1403 "tags": instance.tags,
1406 args.update(override)
1407 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1410 def _AdjustCandidatePool(lu, exceptions):
1411 """Adjust the candidate pool after node operations.
1414 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1416 lu.LogInfo("Promoted nodes to master candidate role: %s",
1417 utils.CommaJoin(node.name for node in mod_list))
1418 for name in mod_list:
1419 lu.context.ReaddNode(name)
1420 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1422 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1426 def _DecideSelfPromotion(lu, exceptions=None):
1427 """Decide whether I should promote myself as a master candidate.
1430 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1431 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1432 # the new node will increase mc_max with one, so:
1433 mc_should = min(mc_should + 1, cp_size)
1434 return mc_now < mc_should
1437 def _CalculateGroupIPolicy(cluster, group):
1438 """Calculate instance policy for group.
1441 return cluster.SimpleFillIPolicy(group.ipolicy)
1444 def _ComputeViolatingInstances(ipolicy, instances):
1445 """Computes a set of instances who violates given ipolicy.
1447 @param ipolicy: The ipolicy to verify
1448 @type instances: object.Instance
1449 @param instances: List of instances to verify
1450 @return: A frozenset of instance names violating the ipolicy
1453 return frozenset([inst.name for inst in instances
1454 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1457 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1458 """Check that the brigdes needed by a list of nics exist.
1461 cluster = lu.cfg.GetClusterInfo()
1462 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1463 brlist = [params[constants.NIC_LINK] for params in paramslist
1464 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1466 result = lu.rpc.call_bridges_exist(target_node, brlist)
1467 result.Raise("Error checking bridges on destination node '%s'" %
1468 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1471 def _CheckInstanceBridgesExist(lu, instance, node=None):
1472 """Check that the brigdes needed by an instance exist.
1476 node = instance.primary_node
1477 _CheckNicsBridgesExist(lu, instance.nics, node)
1480 def _CheckOSVariant(os_obj, name):
1481 """Check whether an OS name conforms to the os variants specification.
1483 @type os_obj: L{objects.OS}
1484 @param os_obj: OS object to check
1486 @param name: OS name passed by the user, to check for validity
1489 variant = objects.OS.GetVariant(name)
1490 if not os_obj.supported_variants:
1492 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1493 " passed)" % (os_obj.name, variant),
1497 raise errors.OpPrereqError("OS name must include a variant",
1500 if variant not in os_obj.supported_variants:
1501 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1504 def _GetNodeInstancesInner(cfg, fn):
1505 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1508 def _GetNodeInstances(cfg, node_name):
1509 """Returns a list of all primary and secondary instances on a node.
1513 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1516 def _GetNodePrimaryInstances(cfg, node_name):
1517 """Returns primary instances on a node.
1520 return _GetNodeInstancesInner(cfg,
1521 lambda inst: node_name == inst.primary_node)
1524 def _GetNodeSecondaryInstances(cfg, node_name):
1525 """Returns secondary instances on a node.
1528 return _GetNodeInstancesInner(cfg,
1529 lambda inst: node_name in inst.secondary_nodes)
1532 def _GetStorageTypeArgs(cfg, storage_type):
1533 """Returns the arguments for a storage type.
1536 # Special case for file storage
1537 if storage_type == constants.ST_FILE:
1538 # storage.FileStorage wants a list of storage directories
1539 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1544 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1547 for dev in instance.disks:
1548 cfg.SetDiskID(dev, node_name)
1550 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1551 result.Raise("Failed to get disk status from node %s" % node_name,
1552 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1554 for idx, bdev_status in enumerate(result.payload):
1555 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1561 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1562 """Check the sanity of iallocator and node arguments and use the
1563 cluster-wide iallocator if appropriate.
1565 Check that at most one of (iallocator, node) is specified. If none is
1566 specified, then the LU's opcode's iallocator slot is filled with the
1567 cluster-wide default iallocator.
1569 @type iallocator_slot: string
1570 @param iallocator_slot: the name of the opcode iallocator slot
1571 @type node_slot: string
1572 @param node_slot: the name of the opcode target node slot
1575 node = getattr(lu.op, node_slot, None)
1576 iallocator = getattr(lu.op, iallocator_slot, None)
1578 if node is not None and iallocator is not None:
1579 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1581 elif node is None and iallocator is None:
1582 default_iallocator = lu.cfg.GetDefaultIAllocator()
1583 if default_iallocator:
1584 setattr(lu.op, iallocator_slot, default_iallocator)
1586 raise errors.OpPrereqError("No iallocator or node given and no"
1587 " cluster-wide default iallocator found;"
1588 " please specify either an iallocator or a"
1589 " node, or set a cluster-wide default"
1593 def _GetDefaultIAllocator(cfg, iallocator):
1594 """Decides on which iallocator to use.
1596 @type cfg: L{config.ConfigWriter}
1597 @param cfg: Cluster configuration object
1598 @type iallocator: string or None
1599 @param iallocator: Iallocator specified in opcode
1601 @return: Iallocator name
1605 # Use default iallocator
1606 iallocator = cfg.GetDefaultIAllocator()
1609 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1610 " opcode nor as a cluster-wide default",
1616 class LUClusterPostInit(LogicalUnit):
1617 """Logical unit for running hooks after cluster initialization.
1620 HPATH = "cluster-init"
1621 HTYPE = constants.HTYPE_CLUSTER
1623 def BuildHooksEnv(self):
1628 "OP_TARGET": self.cfg.GetClusterName(),
1631 def BuildHooksNodes(self):
1632 """Build hooks nodes.
1635 return ([], [self.cfg.GetMasterNode()])
1637 def Exec(self, feedback_fn):
1644 class LUClusterDestroy(LogicalUnit):
1645 """Logical unit for destroying the cluster.
1648 HPATH = "cluster-destroy"
1649 HTYPE = constants.HTYPE_CLUSTER
1651 def BuildHooksEnv(self):
1656 "OP_TARGET": self.cfg.GetClusterName(),
1659 def BuildHooksNodes(self):
1660 """Build hooks nodes.
1665 def CheckPrereq(self):
1666 """Check prerequisites.
1668 This checks whether the cluster is empty.
1670 Any errors are signaled by raising errors.OpPrereqError.
1673 master = self.cfg.GetMasterNode()
1675 nodelist = self.cfg.GetNodeList()
1676 if len(nodelist) != 1 or nodelist[0] != master:
1677 raise errors.OpPrereqError("There are still %d node(s) in"
1678 " this cluster." % (len(nodelist) - 1),
1680 instancelist = self.cfg.GetInstanceList()
1682 raise errors.OpPrereqError("There are still %d instance(s) in"
1683 " this cluster." % len(instancelist),
1686 def Exec(self, feedback_fn):
1687 """Destroys the cluster.
1690 master_params = self.cfg.GetMasterNetworkParameters()
1692 # Run post hooks on master node before it's removed
1693 _RunPostHook(self, master_params.name)
1695 ems = self.cfg.GetUseExternalMipScript()
1696 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1699 self.LogWarning("Error disabling the master IP address: %s",
1702 return master_params.name
1705 def _VerifyCertificate(filename):
1706 """Verifies a certificate for L{LUClusterVerifyConfig}.
1708 @type filename: string
1709 @param filename: Path to PEM file
1713 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1714 utils.ReadFile(filename))
1715 except Exception, err: # pylint: disable=W0703
1716 return (LUClusterVerifyConfig.ETYPE_ERROR,
1717 "Failed to load X509 certificate %s: %s" % (filename, err))
1720 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1721 constants.SSL_CERT_EXPIRATION_ERROR)
1724 fnamemsg = "While verifying %s: %s" % (filename, msg)
1729 return (None, fnamemsg)
1730 elif errcode == utils.CERT_WARNING:
1731 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1732 elif errcode == utils.CERT_ERROR:
1733 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1735 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1738 def _GetAllHypervisorParameters(cluster, instances):
1739 """Compute the set of all hypervisor parameters.
1741 @type cluster: L{objects.Cluster}
1742 @param cluster: the cluster object
1743 @param instances: list of L{objects.Instance}
1744 @param instances: additional instances from which to obtain parameters
1745 @rtype: list of (origin, hypervisor, parameters)
1746 @return: a list with all parameters found, indicating the hypervisor they
1747 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1752 for hv_name in cluster.enabled_hypervisors:
1753 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1755 for os_name, os_hvp in cluster.os_hvp.items():
1756 for hv_name, hv_params in os_hvp.items():
1758 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1759 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1761 # TODO: collapse identical parameter values in a single one
1762 for instance in instances:
1763 if instance.hvparams:
1764 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1765 cluster.FillHV(instance)))
1770 class _VerifyErrors(object):
1771 """Mix-in for cluster/group verify LUs.
1773 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1774 self.op and self._feedback_fn to be available.)
1778 ETYPE_FIELD = "code"
1779 ETYPE_ERROR = "ERROR"
1780 ETYPE_WARNING = "WARNING"
1782 def _Error(self, ecode, item, msg, *args, **kwargs):
1783 """Format an error message.
1785 Based on the opcode's error_codes parameter, either format a
1786 parseable error code, or a simpler error string.
1788 This must be called only from Exec and functions called from Exec.
1791 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1792 itype, etxt, _ = ecode
1793 # first complete the msg
1796 # then format the whole message
1797 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1798 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1804 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1805 # and finally report it via the feedback_fn
1806 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1808 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1809 """Log an error message if the passed condition is True.
1813 or self.op.debug_simulate_errors) # pylint: disable=E1101
1815 # If the error code is in the list of ignored errors, demote the error to a
1817 (_, etxt, _) = ecode
1818 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1819 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1822 self._Error(ecode, *args, **kwargs)
1824 # do not mark the operation as failed for WARN cases only
1825 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1826 self.bad = self.bad or cond
1829 class LUClusterVerify(NoHooksLU):
1830 """Submits all jobs necessary to verify the cluster.
1835 def ExpandNames(self):
1836 self.needed_locks = {}
1838 def Exec(self, feedback_fn):
1841 if self.op.group_name:
1842 groups = [self.op.group_name]
1843 depends_fn = lambda: None
1845 groups = self.cfg.GetNodeGroupList()
1847 # Verify global configuration
1849 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1852 # Always depend on global verification
1853 depends_fn = lambda: [(-len(jobs), [])]
1855 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1856 ignore_errors=self.op.ignore_errors,
1857 depends=depends_fn())]
1858 for group in groups)
1860 # Fix up all parameters
1861 for op in itertools.chain(*jobs): # pylint: disable=W0142
1862 op.debug_simulate_errors = self.op.debug_simulate_errors
1863 op.verbose = self.op.verbose
1864 op.error_codes = self.op.error_codes
1866 op.skip_checks = self.op.skip_checks
1867 except AttributeError:
1868 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1870 return ResultWithJobs(jobs)
1873 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1874 """Verifies the cluster config.
1879 def _VerifyHVP(self, hvp_data):
1880 """Verifies locally the syntax of the hypervisor parameters.
1883 for item, hv_name, hv_params in hvp_data:
1884 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1887 hv_class = hypervisor.GetHypervisor(hv_name)
1888 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1889 hv_class.CheckParameterSyntax(hv_params)
1890 except errors.GenericError, err:
1891 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1893 def ExpandNames(self):
1894 # Information can be safely retrieved as the BGL is acquired in exclusive
1896 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1897 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1898 self.all_node_info = self.cfg.GetAllNodesInfo()
1899 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1900 self.needed_locks = {}
1902 def Exec(self, feedback_fn):
1903 """Verify integrity of cluster, performing various test on nodes.
1907 self._feedback_fn = feedback_fn
1909 feedback_fn("* Verifying cluster config")
1911 for msg in self.cfg.VerifyConfig():
1912 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1914 feedback_fn("* Verifying cluster certificate files")
1916 for cert_filename in constants.ALL_CERT_FILES:
1917 (errcode, msg) = _VerifyCertificate(cert_filename)
1918 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1920 feedback_fn("* Verifying hypervisor parameters")
1922 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1923 self.all_inst_info.values()))
1925 feedback_fn("* Verifying all nodes belong to an existing group")
1927 # We do this verification here because, should this bogus circumstance
1928 # occur, it would never be caught by VerifyGroup, which only acts on
1929 # nodes/instances reachable from existing node groups.
1931 dangling_nodes = set(node.name for node in self.all_node_info.values()
1932 if node.group not in self.all_group_info)
1934 dangling_instances = {}
1935 no_node_instances = []
1937 for inst in self.all_inst_info.values():
1938 if inst.primary_node in dangling_nodes:
1939 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1940 elif inst.primary_node not in self.all_node_info:
1941 no_node_instances.append(inst.name)
1946 utils.CommaJoin(dangling_instances.get(node.name,
1948 for node in dangling_nodes]
1950 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1952 "the following nodes (and their instances) belong to a non"
1953 " existing group: %s", utils.CommaJoin(pretty_dangling))
1955 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1957 "the following instances have a non-existing primary-node:"
1958 " %s", utils.CommaJoin(no_node_instances))
1963 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1964 """Verifies the status of a node group.
1967 HPATH = "cluster-verify"
1968 HTYPE = constants.HTYPE_CLUSTER
1971 _HOOKS_INDENT_RE = re.compile("^", re.M)
1973 class NodeImage(object):
1974 """A class representing the logical and physical status of a node.
1977 @ivar name: the node name to which this object refers
1978 @ivar volumes: a structure as returned from
1979 L{ganeti.backend.GetVolumeList} (runtime)
1980 @ivar instances: a list of running instances (runtime)
1981 @ivar pinst: list of configured primary instances (config)
1982 @ivar sinst: list of configured secondary instances (config)
1983 @ivar sbp: dictionary of {primary-node: list of instances} for all
1984 instances for which this node is secondary (config)
1985 @ivar mfree: free memory, as reported by hypervisor (runtime)
1986 @ivar dfree: free disk, as reported by the node (runtime)
1987 @ivar offline: the offline status (config)
1988 @type rpc_fail: boolean
1989 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1990 not whether the individual keys were correct) (runtime)
1991 @type lvm_fail: boolean
1992 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1993 @type hyp_fail: boolean
1994 @ivar hyp_fail: whether the RPC call didn't return the instance list
1995 @type ghost: boolean
1996 @ivar ghost: whether this is a known node or not (config)
1997 @type os_fail: boolean
1998 @ivar os_fail: whether the RPC call didn't return valid OS data
2000 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2001 @type vm_capable: boolean
2002 @ivar vm_capable: whether the node can host instances
2005 def __init__(self, offline=False, name=None, vm_capable=True):
2014 self.offline = offline
2015 self.vm_capable = vm_capable
2016 self.rpc_fail = False
2017 self.lvm_fail = False
2018 self.hyp_fail = False
2020 self.os_fail = False
2023 def ExpandNames(self):
2024 # This raises errors.OpPrereqError on its own:
2025 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2027 # Get instances in node group; this is unsafe and needs verification later
2028 inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
2030 self.needed_locks = {
2031 locking.LEVEL_INSTANCE: inst_names,
2032 locking.LEVEL_NODEGROUP: [self.group_uuid],
2033 locking.LEVEL_NODE: [],
2036 self.share_locks = _ShareAll()
2038 def DeclareLocks(self, level):
2039 if level == locking.LEVEL_NODE:
2040 # Get members of node group; this is unsafe and needs verification later
2041 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2043 all_inst_info = self.cfg.GetAllInstancesInfo()
2045 # In Exec(), we warn about mirrored instances that have primary and
2046 # secondary living in separate node groups. To fully verify that
2047 # volumes for these instances are healthy, we will need to do an
2048 # extra call to their secondaries. We ensure here those nodes will
2050 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2051 # Important: access only the instances whose lock is owned
2052 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2053 nodes.update(all_inst_info[inst].secondary_nodes)
2055 self.needed_locks[locking.LEVEL_NODE] = nodes
2057 def CheckPrereq(self):
2058 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2059 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2061 group_nodes = set(self.group_info.members)
2062 group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
2065 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2067 unlocked_instances = \
2068 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2071 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2072 utils.CommaJoin(unlocked_nodes))
2074 if unlocked_instances:
2075 raise errors.OpPrereqError("Missing lock for instances: %s" %
2076 utils.CommaJoin(unlocked_instances))
2078 self.all_node_info = self.cfg.GetAllNodesInfo()
2079 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2081 self.my_node_names = utils.NiceSort(group_nodes)
2082 self.my_inst_names = utils.NiceSort(group_instances)
2084 self.my_node_info = dict((name, self.all_node_info[name])
2085 for name in self.my_node_names)
2087 self.my_inst_info = dict((name, self.all_inst_info[name])
2088 for name in self.my_inst_names)
2090 # We detect here the nodes that will need the extra RPC calls for verifying
2091 # split LV volumes; they should be locked.
2092 extra_lv_nodes = set()
2094 for inst in self.my_inst_info.values():
2095 if inst.disk_template in constants.DTS_INT_MIRROR:
2096 group = self.my_node_info[inst.primary_node].group
2097 for nname in inst.secondary_nodes:
2098 if self.all_node_info[nname].group != group:
2099 extra_lv_nodes.add(nname)
2101 unlocked_lv_nodes = \
2102 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2104 if unlocked_lv_nodes:
2105 raise errors.OpPrereqError("these nodes could be locked: %s" %
2106 utils.CommaJoin(unlocked_lv_nodes))
2107 self.extra_lv_nodes = list(extra_lv_nodes)
2109 def _VerifyNode(self, ninfo, nresult):
2110 """Perform some basic validation on data returned from a node.
2112 - check the result data structure is well formed and has all the
2114 - check ganeti version
2116 @type ninfo: L{objects.Node}
2117 @param ninfo: the node to check
2118 @param nresult: the results from the node
2120 @return: whether overall this call was successful (and we can expect
2121 reasonable values in the respose)
2125 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2127 # main result, nresult should be a non-empty dict
2128 test = not nresult or not isinstance(nresult, dict)
2129 _ErrorIf(test, constants.CV_ENODERPC, node,
2130 "unable to verify node: no data returned")
2134 # compares ganeti version
2135 local_version = constants.PROTOCOL_VERSION
2136 remote_version = nresult.get("version", None)
2137 test = not (remote_version and
2138 isinstance(remote_version, (list, tuple)) and
2139 len(remote_version) == 2)
2140 _ErrorIf(test, constants.CV_ENODERPC, node,
2141 "connection to node returned invalid data")
2145 test = local_version != remote_version[0]
2146 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2147 "incompatible protocol versions: master %s,"
2148 " node %s", local_version, remote_version[0])
2152 # node seems compatible, we can actually try to look into its results
2154 # full package version
2155 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2156 constants.CV_ENODEVERSION, node,
2157 "software version mismatch: master %s, node %s",
2158 constants.RELEASE_VERSION, remote_version[1],
2159 code=self.ETYPE_WARNING)
2161 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2162 if ninfo.vm_capable and isinstance(hyp_result, dict):
2163 for hv_name, hv_result in hyp_result.iteritems():
2164 test = hv_result is not None
2165 _ErrorIf(test, constants.CV_ENODEHV, node,
2166 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2168 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2169 if ninfo.vm_capable and isinstance(hvp_result, list):
2170 for item, hv_name, hv_result in hvp_result:
2171 _ErrorIf(True, constants.CV_ENODEHV, node,
2172 "hypervisor %s parameter verify failure (source %s): %s",
2173 hv_name, item, hv_result)
2175 test = nresult.get(constants.NV_NODESETUP,
2176 ["Missing NODESETUP results"])
2177 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2182 def _VerifyNodeTime(self, ninfo, nresult,
2183 nvinfo_starttime, nvinfo_endtime):
2184 """Check the node time.
2186 @type ninfo: L{objects.Node}
2187 @param ninfo: the node to check
2188 @param nresult: the remote results for the node
2189 @param nvinfo_starttime: the start time of the RPC call
2190 @param nvinfo_endtime: the end time of the RPC call
2194 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2196 ntime = nresult.get(constants.NV_TIME, None)
2198 ntime_merged = utils.MergeTime(ntime)
2199 except (ValueError, TypeError):
2200 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2203 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2204 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2205 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2206 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2210 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2211 "Node time diverges by at least %s from master node time",
2214 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2215 """Check the node LVM results.
2217 @type ninfo: L{objects.Node}
2218 @param ninfo: the node to check
2219 @param nresult: the remote results for the node
2220 @param vg_name: the configured VG name
2227 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2229 # checks vg existence and size > 20G
2230 vglist = nresult.get(constants.NV_VGLIST, None)
2232 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2234 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2235 constants.MIN_VG_SIZE)
2236 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2239 pvlist = nresult.get(constants.NV_PVLIST, None)
2240 test = pvlist is None
2241 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2243 # check that ':' is not present in PV names, since it's a
2244 # special character for lvcreate (denotes the range of PEs to
2246 for _, pvname, owner_vg in pvlist:
2247 test = ":" in pvname
2248 _ErrorIf(test, constants.CV_ENODELVM, node,
2249 "Invalid character ':' in PV '%s' of VG '%s'",
2252 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2253 """Check the node bridges.
2255 @type ninfo: L{objects.Node}
2256 @param ninfo: the node to check
2257 @param nresult: the remote results for the node
2258 @param bridges: the expected list of bridges
2265 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2267 missing = nresult.get(constants.NV_BRIDGES, None)
2268 test = not isinstance(missing, list)
2269 _ErrorIf(test, constants.CV_ENODENET, node,
2270 "did not return valid bridge information")
2272 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2273 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2275 def _VerifyNodeUserScripts(self, ninfo, nresult):
2276 """Check the results of user scripts presence and executability on the node
2278 @type ninfo: L{objects.Node}
2279 @param ninfo: the node to check
2280 @param nresult: the remote results for the node
2285 test = not constants.NV_USERSCRIPTS in nresult
2286 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2287 "did not return user scripts information")
2289 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2291 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2292 "user scripts not present or not executable: %s" %
2293 utils.CommaJoin(sorted(broken_scripts)))
2295 def _VerifyNodeNetwork(self, ninfo, nresult):
2296 """Check the node network connectivity results.
2298 @type ninfo: L{objects.Node}
2299 @param ninfo: the node to check
2300 @param nresult: the remote results for the node
2304 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2306 test = constants.NV_NODELIST not in nresult
2307 _ErrorIf(test, constants.CV_ENODESSH, node,
2308 "node hasn't returned node ssh connectivity data")
2310 if nresult[constants.NV_NODELIST]:
2311 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2312 _ErrorIf(True, constants.CV_ENODESSH, node,
2313 "ssh communication with node '%s': %s", a_node, a_msg)
2315 test = constants.NV_NODENETTEST not in nresult
2316 _ErrorIf(test, constants.CV_ENODENET, node,
2317 "node hasn't returned node tcp connectivity data")
2319 if nresult[constants.NV_NODENETTEST]:
2320 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2322 _ErrorIf(True, constants.CV_ENODENET, node,
2323 "tcp communication with node '%s': %s",
2324 anode, nresult[constants.NV_NODENETTEST][anode])
2326 test = constants.NV_MASTERIP not in nresult
2327 _ErrorIf(test, constants.CV_ENODENET, node,
2328 "node hasn't returned node master IP reachability data")
2330 if not nresult[constants.NV_MASTERIP]:
2331 if node == self.master_node:
2332 msg = "the master node cannot reach the master IP (not configured?)"
2334 msg = "cannot reach the master IP"
2335 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2337 def _VerifyInstance(self, instance, instanceconfig, node_image,
2339 """Verify an instance.
2341 This function checks to see if the required block devices are
2342 available on the instance's node.
2345 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2346 node_current = instanceconfig.primary_node
2348 node_vol_should = {}
2349 instanceconfig.MapLVsByNode(node_vol_should)
2351 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2352 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2353 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, err)
2355 for node in node_vol_should:
2356 n_img = node_image[node]
2357 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2358 # ignore missing volumes on offline or broken nodes
2360 for volume in node_vol_should[node]:
2361 test = volume not in n_img.volumes
2362 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2363 "volume %s missing on node %s", volume, node)
2365 if instanceconfig.admin_state == constants.ADMINST_UP:
2366 pri_img = node_image[node_current]
2367 test = instance not in pri_img.instances and not pri_img.offline
2368 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2369 "instance not running on its primary node %s",
2372 diskdata = [(nname, success, status, idx)
2373 for (nname, disks) in diskstatus.items()
2374 for idx, (success, status) in enumerate(disks)]
2376 for nname, success, bdev_status, idx in diskdata:
2377 # the 'ghost node' construction in Exec() ensures that we have a
2379 snode = node_image[nname]
2380 bad_snode = snode.ghost or snode.offline
2381 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2382 not success and not bad_snode,
2383 constants.CV_EINSTANCEFAULTYDISK, instance,
2384 "couldn't retrieve status for disk/%s on %s: %s",
2385 idx, nname, bdev_status)
2386 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2387 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2388 constants.CV_EINSTANCEFAULTYDISK, instance,
2389 "disk/%s on %s is faulty", idx, nname)
2391 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2392 """Verify if there are any unknown volumes in the cluster.
2394 The .os, .swap and backup volumes are ignored. All other volumes are
2395 reported as unknown.
2397 @type reserved: L{ganeti.utils.FieldSet}
2398 @param reserved: a FieldSet of reserved volume names
2401 for node, n_img in node_image.items():
2402 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2403 # skip non-healthy nodes
2405 for volume in n_img.volumes:
2406 test = ((node not in node_vol_should or
2407 volume not in node_vol_should[node]) and
2408 not reserved.Matches(volume))
2409 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2410 "volume %s is unknown", volume)
2412 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2413 """Verify N+1 Memory Resilience.
2415 Check that if one single node dies we can still start all the
2416 instances it was primary for.
2419 cluster_info = self.cfg.GetClusterInfo()
2420 for node, n_img in node_image.items():
2421 # This code checks that every node which is now listed as
2422 # secondary has enough memory to host all instances it is
2423 # supposed to should a single other node in the cluster fail.
2424 # FIXME: not ready for failover to an arbitrary node
2425 # FIXME: does not support file-backed instances
2426 # WARNING: we currently take into account down instances as well
2427 # as up ones, considering that even if they're down someone
2428 # might want to start them even in the event of a node failure.
2430 # we're skipping offline nodes from the N+1 warning, since
2431 # most likely we don't have good memory infromation from them;
2432 # we already list instances living on such nodes, and that's
2435 #TODO(dynmem): also consider ballooning out other instances
2436 for prinode, instances in n_img.sbp.items():
2438 for instance in instances:
2439 bep = cluster_info.FillBE(instance_cfg[instance])
2440 if bep[constants.BE_AUTO_BALANCE]:
2441 needed_mem += bep[constants.BE_MINMEM]
2442 test = n_img.mfree < needed_mem
2443 self._ErrorIf(test, constants.CV_ENODEN1, node,
2444 "not enough memory to accomodate instance failovers"
2445 " should node %s fail (%dMiB needed, %dMiB available)",
2446 prinode, needed_mem, n_img.mfree)
2449 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2450 (files_all, files_opt, files_mc, files_vm)):
2451 """Verifies file checksums collected from all nodes.
2453 @param errorif: Callback for reporting errors
2454 @param nodeinfo: List of L{objects.Node} objects
2455 @param master_node: Name of master node
2456 @param all_nvinfo: RPC results
2459 # Define functions determining which nodes to consider for a file
2462 (files_mc, lambda node: (node.master_candidate or
2463 node.name == master_node)),
2464 (files_vm, lambda node: node.vm_capable),
2467 # Build mapping from filename to list of nodes which should have the file
2469 for (files, fn) in files2nodefn:
2471 filenodes = nodeinfo
2473 filenodes = filter(fn, nodeinfo)
2474 nodefiles.update((filename,
2475 frozenset(map(operator.attrgetter("name"), filenodes)))
2476 for filename in files)
2478 assert set(nodefiles) == (files_all | files_mc | files_vm)
2480 fileinfo = dict((filename, {}) for filename in nodefiles)
2481 ignore_nodes = set()
2483 for node in nodeinfo:
2485 ignore_nodes.add(node.name)
2488 nresult = all_nvinfo[node.name]
2490 if nresult.fail_msg or not nresult.payload:
2493 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2495 test = not (node_files and isinstance(node_files, dict))
2496 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2497 "Node did not return file checksum data")
2499 ignore_nodes.add(node.name)
2502 # Build per-checksum mapping from filename to nodes having it
2503 for (filename, checksum) in node_files.items():
2504 assert filename in nodefiles
2505 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2507 for (filename, checksums) in fileinfo.items():
2508 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2510 # Nodes having the file
2511 with_file = frozenset(node_name
2512 for nodes in fileinfo[filename].values()
2513 for node_name in nodes) - ignore_nodes
2515 expected_nodes = nodefiles[filename] - ignore_nodes
2517 # Nodes missing file
2518 missing_file = expected_nodes - with_file
2520 if filename in files_opt:
2522 errorif(missing_file and missing_file != expected_nodes,
2523 constants.CV_ECLUSTERFILECHECK, None,
2524 "File %s is optional, but it must exist on all or no"
2525 " nodes (not found on %s)",
2526 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2528 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2529 "File %s is missing from node(s) %s", filename,
2530 utils.CommaJoin(utils.NiceSort(missing_file)))
2532 # Warn if a node has a file it shouldn't
2533 unexpected = with_file - expected_nodes
2535 constants.CV_ECLUSTERFILECHECK, None,
2536 "File %s should not exist on node(s) %s",
2537 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2539 # See if there are multiple versions of the file
2540 test = len(checksums) > 1
2542 variants = ["variant %s on %s" %
2543 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2544 for (idx, (checksum, nodes)) in
2545 enumerate(sorted(checksums.items()))]
2549 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2550 "File %s found with %s different checksums (%s)",
2551 filename, len(checksums), "; ".join(variants))
2553 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2555 """Verifies and the node DRBD status.
2557 @type ninfo: L{objects.Node}
2558 @param ninfo: the node to check
2559 @param nresult: the remote results for the node
2560 @param instanceinfo: the dict of instances
2561 @param drbd_helper: the configured DRBD usermode helper
2562 @param drbd_map: the DRBD map as returned by
2563 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2567 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2570 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2571 test = (helper_result == None)
2572 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2573 "no drbd usermode helper returned")
2575 status, payload = helper_result
2577 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2578 "drbd usermode helper check unsuccessful: %s", payload)
2579 test = status and (payload != drbd_helper)
2580 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2581 "wrong drbd usermode helper: %s", payload)
2583 # compute the DRBD minors
2585 for minor, instance in drbd_map[node].items():
2586 test = instance not in instanceinfo
2587 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2588 "ghost instance '%s' in temporary DRBD map", instance)
2589 # ghost instance should not be running, but otherwise we
2590 # don't give double warnings (both ghost instance and
2591 # unallocated minor in use)
2593 node_drbd[minor] = (instance, False)
2595 instance = instanceinfo[instance]
2596 node_drbd[minor] = (instance.name,
2597 instance.admin_state == constants.ADMINST_UP)
2599 # and now check them
2600 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2601 test = not isinstance(used_minors, (tuple, list))
2602 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2603 "cannot parse drbd status file: %s", str(used_minors))
2605 # we cannot check drbd status
2608 for minor, (iname, must_exist) in node_drbd.items():
2609 test = minor not in used_minors and must_exist
2610 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2611 "drbd minor %d of instance %s is not active", minor, iname)
2612 for minor in used_minors:
2613 test = minor not in node_drbd
2614 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2615 "unallocated drbd minor %d is in use", minor)
2617 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2618 """Builds the node OS structures.
2620 @type ninfo: L{objects.Node}
2621 @param ninfo: the node to check
2622 @param nresult: the remote results for the node
2623 @param nimg: the node image object
2627 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2629 remote_os = nresult.get(constants.NV_OSLIST, None)
2630 test = (not isinstance(remote_os, list) or
2631 not compat.all(isinstance(v, list) and len(v) == 7
2632 for v in remote_os))
2634 _ErrorIf(test, constants.CV_ENODEOS, node,
2635 "node hasn't returned valid OS data")
2644 for (name, os_path, status, diagnose,
2645 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2647 if name not in os_dict:
2650 # parameters is a list of lists instead of list of tuples due to
2651 # JSON lacking a real tuple type, fix it:
2652 parameters = [tuple(v) for v in parameters]
2653 os_dict[name].append((os_path, status, diagnose,
2654 set(variants), set(parameters), set(api_ver)))
2656 nimg.oslist = os_dict
2658 def _VerifyNodeOS(self, ninfo, nimg, base):
2659 """Verifies the node OS list.
2661 @type ninfo: L{objects.Node}
2662 @param ninfo: the node to check
2663 @param nimg: the node image object
2664 @param base: the 'template' node we match against (e.g. from the master)
2668 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2670 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2672 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2673 for os_name, os_data in nimg.oslist.items():
2674 assert os_data, "Empty OS status for OS %s?!" % os_name
2675 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2676 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2677 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2678 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2679 "OS '%s' has multiple entries (first one shadows the rest): %s",
2680 os_name, utils.CommaJoin([v[0] for v in os_data]))
2681 # comparisons with the 'base' image
2682 test = os_name not in base.oslist
2683 _ErrorIf(test, constants.CV_ENODEOS, node,
2684 "Extra OS %s not present on reference node (%s)",
2688 assert base.oslist[os_name], "Base node has empty OS status?"
2689 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2691 # base OS is invalid, skipping
2693 for kind, a, b in [("API version", f_api, b_api),
2694 ("variants list", f_var, b_var),
2695 ("parameters", beautify_params(f_param),
2696 beautify_params(b_param))]:
2697 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2698 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2699 kind, os_name, base.name,
2700 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2702 # check any missing OSes
2703 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2704 _ErrorIf(missing, constants.CV_ENODEOS, node,
2705 "OSes present on reference node %s but missing on this node: %s",
2706 base.name, utils.CommaJoin(missing))
2708 def _VerifyOob(self, ninfo, nresult):
2709 """Verifies out of band functionality of a node.
2711 @type ninfo: L{objects.Node}
2712 @param ninfo: the node to check
2713 @param nresult: the remote results for the node
2717 # We just have to verify the paths on master and/or master candidates
2718 # as the oob helper is invoked on the master
2719 if ((ninfo.master_candidate or ninfo.master_capable) and
2720 constants.NV_OOB_PATHS in nresult):
2721 for path_result in nresult[constants.NV_OOB_PATHS]:
2722 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2724 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2725 """Verifies and updates the node volume data.
2727 This function will update a L{NodeImage}'s internal structures
2728 with data from the remote call.
2730 @type ninfo: L{objects.Node}
2731 @param ninfo: the node to check
2732 @param nresult: the remote results for the node
2733 @param nimg: the node image object
2734 @param vg_name: the configured VG name
2738 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2740 nimg.lvm_fail = True
2741 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2744 elif isinstance(lvdata, basestring):
2745 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2746 utils.SafeEncode(lvdata))
2747 elif not isinstance(lvdata, dict):
2748 _ErrorIf(True, constants.CV_ENODELVM, node,
2749 "rpc call to node failed (lvlist)")
2751 nimg.volumes = lvdata
2752 nimg.lvm_fail = False
2754 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2755 """Verifies and updates the node instance list.
2757 If the listing was successful, then updates this node's instance
2758 list. Otherwise, it marks the RPC call as failed for the instance
2761 @type ninfo: L{objects.Node}
2762 @param ninfo: the node to check
2763 @param nresult: the remote results for the node
2764 @param nimg: the node image object
2767 idata = nresult.get(constants.NV_INSTANCELIST, None)
2768 test = not isinstance(idata, list)
2769 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2770 "rpc call to node failed (instancelist): %s",
2771 utils.SafeEncode(str(idata)))
2773 nimg.hyp_fail = True
2775 nimg.instances = idata
2777 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2778 """Verifies and computes a node information map
2780 @type ninfo: L{objects.Node}
2781 @param ninfo: the node to check
2782 @param nresult: the remote results for the node
2783 @param nimg: the node image object
2784 @param vg_name: the configured VG name
2788 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2790 # try to read free memory (from the hypervisor)
2791 hv_info = nresult.get(constants.NV_HVINFO, None)
2792 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2793 _ErrorIf(test, constants.CV_ENODEHV, node,
2794 "rpc call to node failed (hvinfo)")
2797 nimg.mfree = int(hv_info["memory_free"])
2798 except (ValueError, TypeError):
2799 _ErrorIf(True, constants.CV_ENODERPC, node,
2800 "node returned invalid nodeinfo, check hypervisor")
2802 # FIXME: devise a free space model for file based instances as well
2803 if vg_name is not None:
2804 test = (constants.NV_VGLIST not in nresult or
2805 vg_name not in nresult[constants.NV_VGLIST])
2806 _ErrorIf(test, constants.CV_ENODELVM, node,
2807 "node didn't return data for the volume group '%s'"
2808 " - it is either missing or broken", vg_name)
2811 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2812 except (ValueError, TypeError):
2813 _ErrorIf(True, constants.CV_ENODERPC, node,
2814 "node returned invalid LVM info, check LVM status")
2816 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2817 """Gets per-disk status information for all instances.
2819 @type nodelist: list of strings
2820 @param nodelist: Node names
2821 @type node_image: dict of (name, L{objects.Node})
2822 @param node_image: Node objects
2823 @type instanceinfo: dict of (name, L{objects.Instance})
2824 @param instanceinfo: Instance objects
2825 @rtype: {instance: {node: [(succes, payload)]}}
2826 @return: a dictionary of per-instance dictionaries with nodes as
2827 keys and disk information as values; the disk information is a
2828 list of tuples (success, payload)
2831 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2834 node_disks_devonly = {}
2835 diskless_instances = set()
2836 diskless = constants.DT_DISKLESS
2838 for nname in nodelist:
2839 node_instances = list(itertools.chain(node_image[nname].pinst,
2840 node_image[nname].sinst))
2841 diskless_instances.update(inst for inst in node_instances
2842 if instanceinfo[inst].disk_template == diskless)
2843 disks = [(inst, disk)
2844 for inst in node_instances
2845 for disk in instanceinfo[inst].disks]
2848 # No need to collect data
2851 node_disks[nname] = disks
2853 # Creating copies as SetDiskID below will modify the objects and that can
2854 # lead to incorrect data returned from nodes
2855 devonly = [dev.Copy() for (_, dev) in disks]
2858 self.cfg.SetDiskID(dev, nname)
2860 node_disks_devonly[nname] = devonly
2862 assert len(node_disks) == len(node_disks_devonly)
2864 # Collect data from all nodes with disks
2865 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2868 assert len(result) == len(node_disks)
2872 for (nname, nres) in result.items():
2873 disks = node_disks[nname]
2876 # No data from this node
2877 data = len(disks) * [(False, "node offline")]
2880 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2881 "while getting disk information: %s", msg)
2883 # No data from this node
2884 data = len(disks) * [(False, msg)]
2887 for idx, i in enumerate(nres.payload):
2888 if isinstance(i, (tuple, list)) and len(i) == 2:
2891 logging.warning("Invalid result from node %s, entry %d: %s",
2893 data.append((False, "Invalid result from the remote node"))
2895 for ((inst, _), status) in zip(disks, data):
2896 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2898 # Add empty entries for diskless instances.
2899 for inst in diskless_instances:
2900 assert inst not in instdisk
2903 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2904 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2905 compat.all(isinstance(s, (tuple, list)) and
2906 len(s) == 2 for s in statuses)
2907 for inst, nnames in instdisk.items()
2908 for nname, statuses in nnames.items())
2909 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2914 def _SshNodeSelector(group_uuid, all_nodes):
2915 """Create endless iterators for all potential SSH check hosts.
2918 nodes = [node for node in all_nodes
2919 if (node.group != group_uuid and
2921 keyfunc = operator.attrgetter("group")
2923 return map(itertools.cycle,
2924 [sorted(map(operator.attrgetter("name"), names))
2925 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2929 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2930 """Choose which nodes should talk to which other nodes.
2932 We will make nodes contact all nodes in their group, and one node from
2935 @warning: This algorithm has a known issue if one node group is much
2936 smaller than others (e.g. just one node). In such a case all other
2937 nodes will talk to the single node.
2940 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2941 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2943 return (online_nodes,
2944 dict((name, sorted([i.next() for i in sel]))
2945 for name in online_nodes))
2947 def BuildHooksEnv(self):
2950 Cluster-Verify hooks just ran in the post phase and their failure makes
2951 the output be logged in the verify output and the verification to fail.
2955 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2958 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2959 for node in self.my_node_info.values())
2963 def BuildHooksNodes(self):
2964 """Build hooks nodes.
2967 return ([], self.my_node_names)
2969 def Exec(self, feedback_fn):
2970 """Verify integrity of the node group, performing various test on nodes.
2973 # This method has too many local variables. pylint: disable=R0914
2974 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2976 if not self.my_node_names:
2978 feedback_fn("* Empty node group, skipping verification")
2982 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2983 verbose = self.op.verbose
2984 self._feedback_fn = feedback_fn
2986 vg_name = self.cfg.GetVGName()
2987 drbd_helper = self.cfg.GetDRBDHelper()
2988 cluster = self.cfg.GetClusterInfo()
2989 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2990 hypervisors = cluster.enabled_hypervisors
2991 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2993 i_non_redundant = [] # Non redundant instances
2994 i_non_a_balanced = [] # Non auto-balanced instances
2995 i_offline = 0 # Count of offline instances
2996 n_offline = 0 # Count of offline nodes
2997 n_drained = 0 # Count of nodes being drained
2998 node_vol_should = {}
3000 # FIXME: verify OS list
3003 filemap = _ComputeAncillaryFiles(cluster, False)
3005 # do local checksums
3006 master_node = self.master_node = self.cfg.GetMasterNode()
3007 master_ip = self.cfg.GetMasterIP()
3009 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3012 if self.cfg.GetUseExternalMipScript():
3013 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3015 node_verify_param = {
3016 constants.NV_FILELIST:
3017 utils.UniqueSequence(filename
3018 for files in filemap
3019 for filename in files),
3020 constants.NV_NODELIST:
3021 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3022 self.all_node_info.values()),
3023 constants.NV_HYPERVISOR: hypervisors,
3024 constants.NV_HVPARAMS:
3025 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3026 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3027 for node in node_data_list
3028 if not node.offline],
3029 constants.NV_INSTANCELIST: hypervisors,
3030 constants.NV_VERSION: None,
3031 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3032 constants.NV_NODESETUP: None,
3033 constants.NV_TIME: None,
3034 constants.NV_MASTERIP: (master_node, master_ip),
3035 constants.NV_OSLIST: None,
3036 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3037 constants.NV_USERSCRIPTS: user_scripts,
3040 if vg_name is not None:
3041 node_verify_param[constants.NV_VGLIST] = None
3042 node_verify_param[constants.NV_LVLIST] = vg_name
3043 node_verify_param[constants.NV_PVLIST] = [vg_name]
3044 node_verify_param[constants.NV_DRBDLIST] = None
3047 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3050 # FIXME: this needs to be changed per node-group, not cluster-wide
3052 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3053 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3054 bridges.add(default_nicpp[constants.NIC_LINK])
3055 for instance in self.my_inst_info.values():
3056 for nic in instance.nics:
3057 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3058 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3059 bridges.add(full_nic[constants.NIC_LINK])
3062 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3064 # Build our expected cluster state
3065 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3067 vm_capable=node.vm_capable))
3068 for node in node_data_list)
3072 for node in self.all_node_info.values():
3073 path = _SupportsOob(self.cfg, node)
3074 if path and path not in oob_paths:
3075 oob_paths.append(path)
3078 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3080 for instance in self.my_inst_names:
3081 inst_config = self.my_inst_info[instance]
3083 for nname in inst_config.all_nodes:
3084 if nname not in node_image:
3085 gnode = self.NodeImage(name=nname)
3086 gnode.ghost = (nname not in self.all_node_info)
3087 node_image[nname] = gnode
3089 inst_config.MapLVsByNode(node_vol_should)
3091 pnode = inst_config.primary_node
3092 node_image[pnode].pinst.append(instance)
3094 for snode in inst_config.secondary_nodes:
3095 nimg = node_image[snode]
3096 nimg.sinst.append(instance)
3097 if pnode not in nimg.sbp:
3098 nimg.sbp[pnode] = []
3099 nimg.sbp[pnode].append(instance)
3101 # At this point, we have the in-memory data structures complete,
3102 # except for the runtime information, which we'll gather next
3104 # Due to the way our RPC system works, exact response times cannot be
3105 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3106 # time before and after executing the request, we can at least have a time
3108 nvinfo_starttime = time.time()
3109 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3111 self.cfg.GetClusterName())
3112 nvinfo_endtime = time.time()
3114 if self.extra_lv_nodes and vg_name is not None:
3116 self.rpc.call_node_verify(self.extra_lv_nodes,
3117 {constants.NV_LVLIST: vg_name},
3118 self.cfg.GetClusterName())
3120 extra_lv_nvinfo = {}
3122 all_drbd_map = self.cfg.ComputeDRBDMap()
3124 feedback_fn("* Gathering disk information (%s nodes)" %
3125 len(self.my_node_names))
3126 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3129 feedback_fn("* Verifying configuration file consistency")
3131 # If not all nodes are being checked, we need to make sure the master node
3132 # and a non-checked vm_capable node are in the list.
3133 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3135 vf_nvinfo = all_nvinfo.copy()
3136 vf_node_info = list(self.my_node_info.values())
3137 additional_nodes = []
3138 if master_node not in self.my_node_info:
3139 additional_nodes.append(master_node)
3140 vf_node_info.append(self.all_node_info[master_node])
3141 # Add the first vm_capable node we find which is not included
3142 for node in absent_nodes:
3143 nodeinfo = self.all_node_info[node]
3144 if nodeinfo.vm_capable and not nodeinfo.offline:
3145 additional_nodes.append(node)
3146 vf_node_info.append(self.all_node_info[node])
3148 key = constants.NV_FILELIST
3149 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3150 {key: node_verify_param[key]},
3151 self.cfg.GetClusterName()))
3153 vf_nvinfo = all_nvinfo
3154 vf_node_info = self.my_node_info.values()
3156 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3158 feedback_fn("* Verifying node status")
3162 for node_i in node_data_list:
3164 nimg = node_image[node]
3168 feedback_fn("* Skipping offline node %s" % (node,))
3172 if node == master_node:
3174 elif node_i.master_candidate:
3175 ntype = "master candidate"
3176 elif node_i.drained:
3182 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3184 msg = all_nvinfo[node].fail_msg
3185 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3188 nimg.rpc_fail = True
3191 nresult = all_nvinfo[node].payload
3193 nimg.call_ok = self._VerifyNode(node_i, nresult)
3194 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3195 self._VerifyNodeNetwork(node_i, nresult)
3196 self._VerifyNodeUserScripts(node_i, nresult)
3197 self._VerifyOob(node_i, nresult)
3200 self._VerifyNodeLVM(node_i, nresult, vg_name)
3201 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3204 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3205 self._UpdateNodeInstances(node_i, nresult, nimg)
3206 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3207 self._UpdateNodeOS(node_i, nresult, nimg)
3209 if not nimg.os_fail:
3210 if refos_img is None:
3212 self._VerifyNodeOS(node_i, nimg, refos_img)
3213 self._VerifyNodeBridges(node_i, nresult, bridges)
3215 # Check whether all running instancies are primary for the node. (This
3216 # can no longer be done from _VerifyInstance below, since some of the
3217 # wrong instances could be from other node groups.)
3218 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3220 for inst in non_primary_inst:
3221 # FIXME: investigate best way to handle offline insts
3222 if inst.admin_state == constants.ADMINST_OFFLINE:
3224 feedback_fn("* Skipping offline instance %s" % inst.name)
3227 test = inst in self.all_inst_info
3228 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3229 "instance should not run on node %s", node_i.name)
3230 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3231 "node is running unknown instance %s", inst)
3233 for node, result in extra_lv_nvinfo.items():
3234 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3235 node_image[node], vg_name)
3237 feedback_fn("* Verifying instance status")
3238 for instance in self.my_inst_names:
3240 feedback_fn("* Verifying instance %s" % instance)
3241 inst_config = self.my_inst_info[instance]
3242 self._VerifyInstance(instance, inst_config, node_image,
3244 inst_nodes_offline = []
3246 pnode = inst_config.primary_node
3247 pnode_img = node_image[pnode]
3248 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3249 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3250 " primary node failed", instance)
3252 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3254 constants.CV_EINSTANCEBADNODE, instance,
3255 "instance is marked as running and lives on offline node %s",
3256 inst_config.primary_node)
3258 # If the instance is non-redundant we cannot survive losing its primary
3259 # node, so we are not N+1 compliant. On the other hand we have no disk
3260 # templates with more than one secondary so that situation is not well
3262 # FIXME: does not support file-backed instances
3263 if not inst_config.secondary_nodes:
3264 i_non_redundant.append(instance)
3266 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3267 constants.CV_EINSTANCELAYOUT,
3268 instance, "instance has multiple secondary nodes: %s",
3269 utils.CommaJoin(inst_config.secondary_nodes),
3270 code=self.ETYPE_WARNING)
3272 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3273 pnode = inst_config.primary_node
3274 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3275 instance_groups = {}
3277 for node in instance_nodes:
3278 instance_groups.setdefault(self.all_node_info[node].group,
3282 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3283 # Sort so that we always list the primary node first.
3284 for group, nodes in sorted(instance_groups.items(),
3285 key=lambda (_, nodes): pnode in nodes,
3288 self._ErrorIf(len(instance_groups) > 1,
3289 constants.CV_EINSTANCESPLITGROUPS,
3290 instance, "instance has primary and secondary nodes in"
3291 " different groups: %s", utils.CommaJoin(pretty_list),
3292 code=self.ETYPE_WARNING)
3294 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3295 i_non_a_balanced.append(instance)
3297 for snode in inst_config.secondary_nodes:
3298 s_img = node_image[snode]
3299 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3300 snode, "instance %s, connection to secondary node failed",
3304 inst_nodes_offline.append(snode)
3306 # warn that the instance lives on offline nodes
3307 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3308 "instance has offline secondary node(s) %s",
3309 utils.CommaJoin(inst_nodes_offline))
3310 # ... or ghost/non-vm_capable nodes
3311 for node in inst_config.all_nodes:
3312 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3313 instance, "instance lives on ghost node %s", node)
3314 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3315 instance, "instance lives on non-vm_capable node %s", node)
3317 feedback_fn("* Verifying orphan volumes")
3318 reserved = utils.FieldSet(*cluster.reserved_lvs)
3320 # We will get spurious "unknown volume" warnings if any node of this group
3321 # is secondary for an instance whose primary is in another group. To avoid
3322 # them, we find these instances and add their volumes to node_vol_should.
3323 for inst in self.all_inst_info.values():
3324 for secondary in inst.secondary_nodes:
3325 if (secondary in self.my_node_info
3326 and inst.name not in self.my_inst_info):
3327 inst.MapLVsByNode(node_vol_should)
3330 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3332 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3333 feedback_fn("* Verifying N+1 Memory redundancy")
3334 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3336 feedback_fn("* Other Notes")
3338 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3339 % len(i_non_redundant))
3341 if i_non_a_balanced:
3342 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3343 % len(i_non_a_balanced))
3346 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3349 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3352 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3356 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3357 """Analyze the post-hooks' result
3359 This method analyses the hook result, handles it, and sends some
3360 nicely-formatted feedback back to the user.
3362 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3363 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3364 @param hooks_results: the results of the multi-node hooks rpc call
3365 @param feedback_fn: function used send feedback back to the caller
3366 @param lu_result: previous Exec result
3367 @return: the new Exec result, based on the previous result
3371 # We only really run POST phase hooks, only for non-empty groups,
3372 # and are only interested in their results
3373 if not self.my_node_names:
3376 elif phase == constants.HOOKS_PHASE_POST:
3377 # Used to change hooks' output to proper indentation
3378 feedback_fn("* Hooks Results")
3379 assert hooks_results, "invalid result from hooks"
3381 for node_name in hooks_results:
3382 res = hooks_results[node_name]
3384 test = msg and not res.offline
3385 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3386 "Communication failure in hooks execution: %s", msg)
3387 if res.offline or msg:
3388 # No need to investigate payload if node is offline or gave
3391 for script, hkr, output in res.payload:
3392 test = hkr == constants.HKR_FAIL
3393 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3394 "Script %s failed, output:", script)
3396 output = self._HOOKS_INDENT_RE.sub(" ", output)
3397 feedback_fn("%s" % output)
3403 class LUClusterVerifyDisks(NoHooksLU):
3404 """Verifies the cluster disks status.
3409 def ExpandNames(self):
3410 self.share_locks = _ShareAll()
3411 self.needed_locks = {
3412 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3415 def Exec(self, feedback_fn):
3416 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3418 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3419 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3420 for group in group_names])
3423 class LUGroupVerifyDisks(NoHooksLU):
3424 """Verifies the status of all disks in a node group.
3429 def ExpandNames(self):
3430 # Raises errors.OpPrereqError on its own if group can't be found
3431 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3433 self.share_locks = _ShareAll()
3434 self.needed_locks = {
3435 locking.LEVEL_INSTANCE: [],
3436 locking.LEVEL_NODEGROUP: [],
3437 locking.LEVEL_NODE: [],
3440 def DeclareLocks(self, level):
3441 if level == locking.LEVEL_INSTANCE:
3442 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3444 # Lock instances optimistically, needs verification once node and group
3445 # locks have been acquired
3446 self.needed_locks[locking.LEVEL_INSTANCE] = \
3447 self.cfg.GetNodeGroupInstances(self.group_uuid)
3449 elif level == locking.LEVEL_NODEGROUP:
3450 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3452 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3453 set([self.group_uuid] +
3454 # Lock all groups used by instances optimistically; this requires
3455 # going via the node before it's locked, requiring verification
3458 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3459 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3461 elif level == locking.LEVEL_NODE:
3462 # This will only lock the nodes in the group to be verified which contain
3464 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3465 self._LockInstancesNodes()
3467 # Lock all nodes in group to be verified
3468 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3469 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3470 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3472 def CheckPrereq(self):
3473 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3474 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3475 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3477 assert self.group_uuid in owned_groups
3479 # Check if locked instances are still correct
3480 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3482 # Get instance information
3483 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3485 # Check if node groups for locked instances are still correct
3486 for (instance_name, inst) in self.instances.items():
3487 assert owned_nodes.issuperset(inst.all_nodes), \
3488 "Instance %s's nodes changed while we kept the lock" % instance_name
3490 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3493 assert self.group_uuid in inst_groups, \
3494 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3496 def Exec(self, feedback_fn):
3497 """Verify integrity of cluster disks.
3499 @rtype: tuple of three items
3500 @return: a tuple of (dict of node-to-node_error, list of instances
3501 which need activate-disks, dict of instance: (node, volume) for
3506 res_instances = set()
3509 nv_dict = _MapInstanceDisksToNodes([inst
3510 for inst in self.instances.values()
3511 if inst.admin_state == constants.ADMINST_UP])
3514 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3515 set(self.cfg.GetVmCapableNodeList()))
3517 node_lvs = self.rpc.call_lv_list(nodes, [])
3519 for (node, node_res) in node_lvs.items():
3520 if node_res.offline:
3523 msg = node_res.fail_msg
3525 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3526 res_nodes[node] = msg
3529 for lv_name, (_, _, lv_online) in node_res.payload.items():
3530 inst = nv_dict.pop((node, lv_name), None)
3531 if not (lv_online or inst is None):
3532 res_instances.add(inst)
3534 # any leftover items in nv_dict are missing LVs, let's arrange the data
3536 for key, inst in nv_dict.iteritems():
3537 res_missing.setdefault(inst, []).append(list(key))
3539 return (res_nodes, list(res_instances), res_missing)
3542 class LUClusterRepairDiskSizes(NoHooksLU):
3543 """Verifies the cluster disks sizes.
3548 def ExpandNames(self):
3549 if self.op.instances:
3550 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3551 self.needed_locks = {
3552 locking.LEVEL_NODE_RES: [],
3553 locking.LEVEL_INSTANCE: self.wanted_names,
3555 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3557 self.wanted_names = None
3558 self.needed_locks = {
3559 locking.LEVEL_NODE_RES: locking.ALL_SET,
3560 locking.LEVEL_INSTANCE: locking.ALL_SET,
3562 self.share_locks = {
3563 locking.LEVEL_NODE_RES: 1,
3564 locking.LEVEL_INSTANCE: 0,
3567 def DeclareLocks(self, level):
3568 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3569 self._LockInstancesNodes(primary_only=True, level=level)
3571 def CheckPrereq(self):
3572 """Check prerequisites.
3574 This only checks the optional instance list against the existing names.
3577 if self.wanted_names is None:
3578 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3580 self.wanted_instances = \
3581 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3583 def _EnsureChildSizes(self, disk):
3584 """Ensure children of the disk have the needed disk size.
3586 This is valid mainly for DRBD8 and fixes an issue where the
3587 children have smaller disk size.
3589 @param disk: an L{ganeti.objects.Disk} object
3592 if disk.dev_type == constants.LD_DRBD8:
3593 assert disk.children, "Empty children for DRBD8?"
3594 fchild = disk.children[0]
3595 mismatch = fchild.size < disk.size
3597 self.LogInfo("Child disk has size %d, parent %d, fixing",
3598 fchild.size, disk.size)
3599 fchild.size = disk.size
3601 # and we recurse on this child only, not on the metadev
3602 return self._EnsureChildSizes(fchild) or mismatch
3606 def Exec(self, feedback_fn):
3607 """Verify the size of cluster disks.
3610 # TODO: check child disks too
3611 # TODO: check differences in size between primary/secondary nodes
3613 for instance in self.wanted_instances:
3614 pnode = instance.primary_node
3615 if pnode not in per_node_disks:
3616 per_node_disks[pnode] = []
3617 for idx, disk in enumerate(instance.disks):
3618 per_node_disks[pnode].append((instance, idx, disk))
3620 assert not (frozenset(per_node_disks.keys()) -
3621 self.owned_locks(locking.LEVEL_NODE_RES)), \
3622 "Not owning correct locks"
3623 assert not self.owned_locks(locking.LEVEL_NODE)
3626 for node, dskl in per_node_disks.items():
3627 newl = [v[2].Copy() for v in dskl]
3629 self.cfg.SetDiskID(dsk, node)
3630 result = self.rpc.call_blockdev_getsize(node, newl)
3632 self.LogWarning("Failure in blockdev_getsize call to node"
3633 " %s, ignoring", node)
3635 if len(result.payload) != len(dskl):
3636 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3637 " result.payload=%s", node, len(dskl), result.payload)
3638 self.LogWarning("Invalid result from node %s, ignoring node results",
3641 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3643 self.LogWarning("Disk %d of instance %s did not return size"
3644 " information, ignoring", idx, instance.name)
3646 if not isinstance(size, (int, long)):
3647 self.LogWarning("Disk %d of instance %s did not return valid"
3648 " size information, ignoring", idx, instance.name)
3651 if size != disk.size:
3652 self.LogInfo("Disk %d of instance %s has mismatched size,"
3653 " correcting: recorded %d, actual %d", idx,
3654 instance.name, disk.size, size)
3656 self.cfg.Update(instance, feedback_fn)
3657 changed.append((instance.name, idx, size))
3658 if self._EnsureChildSizes(disk):
3659 self.cfg.Update(instance, feedback_fn)
3660 changed.append((instance.name, idx, disk.size))
3664 class LUClusterRename(LogicalUnit):
3665 """Rename the cluster.
3668 HPATH = "cluster-rename"
3669 HTYPE = constants.HTYPE_CLUSTER
3671 def BuildHooksEnv(self):
3676 "OP_TARGET": self.cfg.GetClusterName(),
3677 "NEW_NAME": self.op.name,
3680 def BuildHooksNodes(self):
3681 """Build hooks nodes.
3684 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3686 def CheckPrereq(self):
3687 """Verify that the passed name is a valid one.
3690 hostname = netutils.GetHostname(name=self.op.name,
3691 family=self.cfg.GetPrimaryIPFamily())
3693 new_name = hostname.name
3694 self.ip = new_ip = hostname.ip
3695 old_name = self.cfg.GetClusterName()
3696 old_ip = self.cfg.GetMasterIP()
3697 if new_name == old_name and new_ip == old_ip:
3698 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3699 " cluster has changed",
3701 if new_ip != old_ip:
3702 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3703 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3704 " reachable on the network" %
3705 new_ip, errors.ECODE_NOTUNIQUE)
3707 self.op.name = new_name
3709 def Exec(self, feedback_fn):
3710 """Rename the cluster.
3713 clustername = self.op.name
3716 # shutdown the master IP
3717 master_params = self.cfg.GetMasterNetworkParameters()
3718 ems = self.cfg.GetUseExternalMipScript()
3719 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3721 result.Raise("Could not disable the master role")
3724 cluster = self.cfg.GetClusterInfo()
3725 cluster.cluster_name = clustername
3726 cluster.master_ip = new_ip
3727 self.cfg.Update(cluster, feedback_fn)
3729 # update the known hosts file
3730 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3731 node_list = self.cfg.GetOnlineNodeList()
3733 node_list.remove(master_params.name)
3736 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3738 master_params.ip = new_ip
3739 result = self.rpc.call_node_activate_master_ip(master_params.name,
3741 msg = result.fail_msg
3743 self.LogWarning("Could not re-enable the master role on"
3744 " the master, please restart manually: %s", msg)
3749 def _ValidateNetmask(cfg, netmask):
3750 """Checks if a netmask is valid.
3752 @type cfg: L{config.ConfigWriter}
3753 @param cfg: The cluster configuration
3755 @param netmask: the netmask to be verified
3756 @raise errors.OpPrereqError: if the validation fails
3759 ip_family = cfg.GetPrimaryIPFamily()
3761 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3762 except errors.ProgrammerError:
3763 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3765 if not ipcls.ValidateNetmask(netmask):
3766 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3770 class LUClusterSetParams(LogicalUnit):
3771 """Change the parameters of the cluster.
3774 HPATH = "cluster-modify"
3775 HTYPE = constants.HTYPE_CLUSTER
3778 def CheckArguments(self):
3782 if self.op.uid_pool:
3783 uidpool.CheckUidPool(self.op.uid_pool)
3785 if self.op.add_uids:
3786 uidpool.CheckUidPool(self.op.add_uids)
3788 if self.op.remove_uids:
3789 uidpool.CheckUidPool(self.op.remove_uids)
3791 if self.op.master_netmask is not None:
3792 _ValidateNetmask(self.cfg, self.op.master_netmask)
3794 if self.op.diskparams:
3795 for dt_params in self.op.diskparams.values():
3796 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3798 def ExpandNames(self):
3799 # FIXME: in the future maybe other cluster params won't require checking on
3800 # all nodes to be modified.
3801 self.needed_locks = {
3802 locking.LEVEL_NODE: locking.ALL_SET,
3803 locking.LEVEL_INSTANCE: locking.ALL_SET,
3804 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3806 self.share_locks = {
3807 locking.LEVEL_NODE: 1,
3808 locking.LEVEL_INSTANCE: 1,
3809 locking.LEVEL_NODEGROUP: 1,
3812 def BuildHooksEnv(self):
3817 "OP_TARGET": self.cfg.GetClusterName(),
3818 "NEW_VG_NAME": self.op.vg_name,
3821 def BuildHooksNodes(self):
3822 """Build hooks nodes.
3825 mn = self.cfg.GetMasterNode()
3828 def CheckPrereq(self):
3829 """Check prerequisites.
3831 This checks whether the given params don't conflict and
3832 if the given volume group is valid.
3835 if self.op.vg_name is not None and not self.op.vg_name:
3836 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3837 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3838 " instances exist", errors.ECODE_INVAL)
3840 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3841 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3842 raise errors.OpPrereqError("Cannot disable drbd helper while"
3843 " drbd-based instances exist",
3846 node_list = self.owned_locks(locking.LEVEL_NODE)
3848 # if vg_name not None, checks given volume group on all nodes
3850 vglist = self.rpc.call_vg_list(node_list)
3851 for node in node_list:
3852 msg = vglist[node].fail_msg
3854 # ignoring down node
3855 self.LogWarning("Error while gathering data on node %s"
3856 " (ignoring node): %s", node, msg)
3858 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3860 constants.MIN_VG_SIZE)
3862 raise errors.OpPrereqError("Error on node '%s': %s" %
3863 (node, vgstatus), errors.ECODE_ENVIRON)
3865 if self.op.drbd_helper:
3866 # checks given drbd helper on all nodes
3867 helpers = self.rpc.call_drbd_helper(node_list)
3868 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3870 self.LogInfo("Not checking drbd helper on offline node %s", node)
3872 msg = helpers[node].fail_msg
3874 raise errors.OpPrereqError("Error checking drbd helper on node"
3875 " '%s': %s" % (node, msg),
3876 errors.ECODE_ENVIRON)
3877 node_helper = helpers[node].payload
3878 if node_helper != self.op.drbd_helper:
3879 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3880 (node, node_helper), errors.ECODE_ENVIRON)
3882 self.cluster = cluster = self.cfg.GetClusterInfo()
3883 # validate params changes
3884 if self.op.beparams:
3885 objects.UpgradeBeParams(self.op.beparams)
3886 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3887 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3889 if self.op.ndparams:
3890 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3891 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3893 # TODO: we need a more general way to handle resetting
3894 # cluster-level parameters to default values
3895 if self.new_ndparams["oob_program"] == "":
3896 self.new_ndparams["oob_program"] = \
3897 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3899 if self.op.hv_state:
3900 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3901 self.cluster.hv_state_static)
3902 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3903 for hv, values in new_hv_state.items())
3905 if self.op.disk_state:
3906 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3907 self.cluster.disk_state_static)
3908 self.new_disk_state = \
3909 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3910 for name, values in svalues.items()))
3911 for storage, svalues in new_disk_state.items())
3914 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3917 all_instances = self.cfg.GetAllInstancesInfo().values()
3919 for group in self.cfg.GetAllNodeGroupsInfo().values():
3920 instances = frozenset([inst for inst in all_instances
3921 if compat.any(node in group.members
3922 for node in inst.all_nodes)])
3923 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3924 new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3926 new_ipolicy, instances)
3928 violations.update(new)
3931 self.LogWarning("After the ipolicy change the following instances"
3932 " violate them: %s",
3933 utils.CommaJoin(violations))
3935 if self.op.nicparams:
3936 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3937 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3938 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3941 # check all instances for consistency
3942 for instance in self.cfg.GetAllInstancesInfo().values():
3943 for nic_idx, nic in enumerate(instance.nics):
3944 params_copy = copy.deepcopy(nic.nicparams)
3945 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3947 # check parameter syntax
3949 objects.NIC.CheckParameterSyntax(params_filled)
3950 except errors.ConfigurationError, err:
3951 nic_errors.append("Instance %s, nic/%d: %s" %
3952 (instance.name, nic_idx, err))
3954 # if we're moving instances to routed, check that they have an ip
3955 target_mode = params_filled[constants.NIC_MODE]
3956 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3957 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3958 " address" % (instance.name, nic_idx))
3960 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3961 "\n".join(nic_errors))
3963 # hypervisor list/parameters
3964 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3965 if self.op.hvparams:
3966 for hv_name, hv_dict in self.op.hvparams.items():
3967 if hv_name not in self.new_hvparams:
3968 self.new_hvparams[hv_name] = hv_dict
3970 self.new_hvparams[hv_name].update(hv_dict)
3972 # disk template parameters
3973 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3974 if self.op.diskparams:
3975 for dt_name, dt_params in self.op.diskparams.items():
3976 if dt_name not in self.op.diskparams:
3977 self.new_diskparams[dt_name] = dt_params
3979 self.new_diskparams[dt_name].update(dt_params)
3981 # os hypervisor parameters
3982 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3984 for os_name, hvs in self.op.os_hvp.items():
3985 if os_name not in self.new_os_hvp:
3986 self.new_os_hvp[os_name] = hvs
3988 for hv_name, hv_dict in hvs.items():
3989 if hv_name not in self.new_os_hvp[os_name]:
3990 self.new_os_hvp[os_name][hv_name] = hv_dict
3992 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3995 self.new_osp = objects.FillDict(cluster.osparams, {})
3996 if self.op.osparams:
3997 for os_name, osp in self.op.osparams.items():
3998 if os_name not in self.new_osp:
3999 self.new_osp[os_name] = {}
4001 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4004 if not self.new_osp[os_name]:
4005 # we removed all parameters
4006 del self.new_osp[os_name]
4008 # check the parameter validity (remote check)
4009 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4010 os_name, self.new_osp[os_name])
4012 # changes to the hypervisor list
4013 if self.op.enabled_hypervisors is not None:
4014 self.hv_list = self.op.enabled_hypervisors
4015 for hv in self.hv_list:
4016 # if the hypervisor doesn't already exist in the cluster
4017 # hvparams, we initialize it to empty, and then (in both
4018 # cases) we make sure to fill the defaults, as we might not
4019 # have a complete defaults list if the hypervisor wasn't
4021 if hv not in new_hvp:
4023 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4024 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4026 self.hv_list = cluster.enabled_hypervisors
4028 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4029 # either the enabled list has changed, or the parameters have, validate
4030 for hv_name, hv_params in self.new_hvparams.items():
4031 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4032 (self.op.enabled_hypervisors and
4033 hv_name in self.op.enabled_hypervisors)):
4034 # either this is a new hypervisor, or its parameters have changed
4035 hv_class = hypervisor.GetHypervisor(hv_name)
4036 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4037 hv_class.CheckParameterSyntax(hv_params)
4038 _CheckHVParams(self, node_list, hv_name, hv_params)
4041 # no need to check any newly-enabled hypervisors, since the
4042 # defaults have already been checked in the above code-block
4043 for os_name, os_hvp in self.new_os_hvp.items():
4044 for hv_name, hv_params in os_hvp.items():
4045 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4046 # we need to fill in the new os_hvp on top of the actual hv_p
4047 cluster_defaults = self.new_hvparams.get(hv_name, {})
4048 new_osp = objects.FillDict(cluster_defaults, hv_params)
4049 hv_class = hypervisor.GetHypervisor(hv_name)
4050 hv_class.CheckParameterSyntax(new_osp)
4051 _CheckHVParams(self, node_list, hv_name, new_osp)
4053 if self.op.default_iallocator:
4054 alloc_script = utils.FindFile(self.op.default_iallocator,
4055 constants.IALLOCATOR_SEARCH_PATH,
4057 if alloc_script is None:
4058 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4059 " specified" % self.op.default_iallocator,
4062 def Exec(self, feedback_fn):
4063 """Change the parameters of the cluster.
4066 if self.op.vg_name is not None:
4067 new_volume = self.op.vg_name
4070 if new_volume != self.cfg.GetVGName():
4071 self.cfg.SetVGName(new_volume)
4073 feedback_fn("Cluster LVM configuration already in desired"
4074 " state, not changing")
4075 if self.op.drbd_helper is not None:
4076 new_helper = self.op.drbd_helper
4079 if new_helper != self.cfg.GetDRBDHelper():
4080 self.cfg.SetDRBDHelper(new_helper)
4082 feedback_fn("Cluster DRBD helper already in desired state,"
4084 if self.op.hvparams:
4085 self.cluster.hvparams = self.new_hvparams
4087 self.cluster.os_hvp = self.new_os_hvp
4088 if self.op.enabled_hypervisors is not None:
4089 self.cluster.hvparams = self.new_hvparams
4090 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4091 if self.op.beparams:
4092 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4093 if self.op.nicparams:
4094 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4096 self.cluster.ipolicy = self.new_ipolicy
4097 if self.op.osparams:
4098 self.cluster.osparams = self.new_osp
4099 if self.op.ndparams:
4100 self.cluster.ndparams = self.new_ndparams
4101 if self.op.diskparams:
4102 self.cluster.diskparams = self.new_diskparams
4103 if self.op.hv_state:
4104 self.cluster.hv_state_static = self.new_hv_state
4105 if self.op.disk_state:
4106 self.cluster.disk_state_static = self.new_disk_state
4108 if self.op.candidate_pool_size is not None:
4109 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4110 # we need to update the pool size here, otherwise the save will fail
4111 _AdjustCandidatePool(self, [])
4113 if self.op.maintain_node_health is not None:
4114 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4115 feedback_fn("Note: CONFD was disabled at build time, node health"
4116 " maintenance is not useful (still enabling it)")
4117 self.cluster.maintain_node_health = self.op.maintain_node_health
4119 if self.op.prealloc_wipe_disks is not None:
4120 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4122 if self.op.add_uids is not None:
4123 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4125 if self.op.remove_uids is not None:
4126 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4128 if self.op.uid_pool is not None:
4129 self.cluster.uid_pool = self.op.uid_pool
4131 if self.op.default_iallocator is not None:
4132 self.cluster.default_iallocator = self.op.default_iallocator
4134 if self.op.reserved_lvs is not None:
4135 self.cluster.reserved_lvs = self.op.reserved_lvs
4137 if self.op.use_external_mip_script is not None:
4138 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4140 def helper_os(aname, mods, desc):
4142 lst = getattr(self.cluster, aname)
4143 for key, val in mods:
4144 if key == constants.DDM_ADD:
4146 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4149 elif key == constants.DDM_REMOVE:
4153 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4155 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4157 if self.op.hidden_os:
4158 helper_os("hidden_os", self.op.hidden_os, "hidden")
4160 if self.op.blacklisted_os:
4161 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4163 if self.op.master_netdev:
4164 master_params = self.cfg.GetMasterNetworkParameters()
4165 ems = self.cfg.GetUseExternalMipScript()
4166 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4167 self.cluster.master_netdev)
4168 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4170 result.Raise("Could not disable the master ip")
4171 feedback_fn("Changing master_netdev from %s to %s" %
4172 (master_params.netdev, self.op.master_netdev))
4173 self.cluster.master_netdev = self.op.master_netdev
4175 if self.op.master_netmask:
4176 master_params = self.cfg.GetMasterNetworkParameters()
4177 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4178 result = self.rpc.call_node_change_master_netmask(master_params.name,
4179 master_params.netmask,
4180 self.op.master_netmask,
4182 master_params.netdev)
4184 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4187 self.cluster.master_netmask = self.op.master_netmask
4189 self.cfg.Update(self.cluster, feedback_fn)
4191 if self.op.master_netdev:
4192 master_params = self.cfg.GetMasterNetworkParameters()
4193 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4194 self.op.master_netdev)
4195 ems = self.cfg.GetUseExternalMipScript()
4196 result = self.rpc.call_node_activate_master_ip(master_params.name,
4199 self.LogWarning("Could not re-enable the master ip on"
4200 " the master, please restart manually: %s",
4204 def _UploadHelper(lu, nodes, fname):
4205 """Helper for uploading a file and showing warnings.
4208 if os.path.exists(fname):
4209 result = lu.rpc.call_upload_file(nodes, fname)
4210 for to_node, to_result in result.items():
4211 msg = to_result.fail_msg
4213 msg = ("Copy of file %s to node %s failed: %s" %
4214 (fname, to_node, msg))
4215 lu.proc.LogWarning(msg)
4218 def _ComputeAncillaryFiles(cluster, redist):
4219 """Compute files external to Ganeti which need to be consistent.
4221 @type redist: boolean
4222 @param redist: Whether to include files which need to be redistributed
4225 # Compute files for all nodes
4227 constants.SSH_KNOWN_HOSTS_FILE,
4228 constants.CONFD_HMAC_KEY,
4229 constants.CLUSTER_DOMAIN_SECRET_FILE,
4230 constants.SPICE_CERT_FILE,
4231 constants.SPICE_CACERT_FILE,
4232 constants.RAPI_USERS_FILE,
4236 files_all.update(constants.ALL_CERT_FILES)
4237 files_all.update(ssconf.SimpleStore().GetFileList())
4239 # we need to ship at least the RAPI certificate
4240 files_all.add(constants.RAPI_CERT_FILE)
4242 if cluster.modify_etc_hosts:
4243 files_all.add(constants.ETC_HOSTS)
4245 # Files which are optional, these must:
4246 # - be present in one other category as well
4247 # - either exist or not exist on all nodes of that category (mc, vm all)
4249 constants.RAPI_USERS_FILE,
4252 # Files which should only be on master candidates
4256 files_mc.add(constants.CLUSTER_CONF_FILE)
4258 # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4260 files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4262 # Files which should only be on VM-capable nodes
4263 files_vm = set(filename
4264 for hv_name in cluster.enabled_hypervisors
4265 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4267 files_opt |= set(filename
4268 for hv_name in cluster.enabled_hypervisors
4269 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4271 # Filenames in each category must be unique
4272 all_files_set = files_all | files_mc | files_vm
4273 assert (len(all_files_set) ==
4274 sum(map(len, [files_all, files_mc, files_vm]))), \
4275 "Found file listed in more than one file list"
4277 # Optional files must be present in one other category
4278 assert all_files_set.issuperset(files_opt), \
4279 "Optional file not in a different required list"
4281 return (files_all, files_opt, files_mc, files_vm)
4284 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4285 """Distribute additional files which are part of the cluster configuration.
4287 ConfigWriter takes care of distributing the config and ssconf files, but
4288 there are more files which should be distributed to all nodes. This function
4289 makes sure those are copied.
4291 @param lu: calling logical unit
4292 @param additional_nodes: list of nodes not in the config to distribute to
4293 @type additional_vm: boolean
4294 @param additional_vm: whether the additional nodes are vm-capable or not
4297 # Gather target nodes
4298 cluster = lu.cfg.GetClusterInfo()
4299 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4301 online_nodes = lu.cfg.GetOnlineNodeList()
4302 vm_nodes = lu.cfg.GetVmCapableNodeList()
4304 if additional_nodes is not None:
4305 online_nodes.extend(additional_nodes)
4307 vm_nodes.extend(additional_nodes)
4309 # Never distribute to master node
4310 for nodelist in [online_nodes, vm_nodes]:
4311 if master_info.name in nodelist:
4312 nodelist.remove(master_info.name)
4315 (files_all, _, files_mc, files_vm) = \
4316 _ComputeAncillaryFiles(cluster, True)
4318 # Never re-distribute configuration file from here
4319 assert not (constants.CLUSTER_CONF_FILE in files_all or
4320 constants.CLUSTER_CONF_FILE in files_vm)
4321 assert not files_mc, "Master candidates not handled in this function"
4324 (online_nodes, files_all),
4325 (vm_nodes, files_vm),
4329 for (node_list, files) in filemap:
4331 _UploadHelper(lu, node_list, fname)
4334 class LUClusterRedistConf(NoHooksLU):
4335 """Force the redistribution of cluster configuration.
4337 This is a very simple LU.
4342 def ExpandNames(self):
4343 self.needed_locks = {
4344 locking.LEVEL_NODE: locking.ALL_SET,
4346 self.share_locks[locking.LEVEL_NODE] = 1
4348 def Exec(self, feedback_fn):
4349 """Redistribute the configuration.
4352 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4353 _RedistributeAncillaryFiles(self)
4356 class LUClusterActivateMasterIp(NoHooksLU):
4357 """Activate the master IP on the master node.
4360 def Exec(self, feedback_fn):
4361 """Activate the master IP.
4364 master_params = self.cfg.GetMasterNetworkParameters()
4365 ems = self.cfg.GetUseExternalMipScript()
4366 result = self.rpc.call_node_activate_master_ip(master_params.name,
4368 result.Raise("Could not activate the master IP")
4371 class LUClusterDeactivateMasterIp(NoHooksLU):
4372 """Deactivate the master IP on the master node.
4375 def Exec(self, feedback_fn):
4376 """Deactivate the master IP.
4379 master_params = self.cfg.GetMasterNetworkParameters()
4380 ems = self.cfg.GetUseExternalMipScript()
4381 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4383 result.Raise("Could not deactivate the master IP")
4386 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4387 """Sleep and poll for an instance's disk to sync.
4390 if not instance.disks or disks is not None and not disks:
4393 disks = _ExpandCheckDisks(instance, disks)
4396 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4398 node = instance.primary_node
4401 lu.cfg.SetDiskID(dev, node)
4403 # TODO: Convert to utils.Retry
4406 degr_retries = 10 # in seconds, as we sleep 1 second each time
4410 cumul_degraded = False
4411 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4412 msg = rstats.fail_msg
4414 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4417 raise errors.RemoteError("Can't contact node %s for mirror data,"
4418 " aborting." % node)
4421 rstats = rstats.payload
4423 for i, mstat in enumerate(rstats):
4425 lu.LogWarning("Can't compute data for node %s/%s",
4426 node, disks[i].iv_name)
4429 cumul_degraded = (cumul_degraded or
4430 (mstat.is_degraded and mstat.sync_percent is None))
4431 if mstat.sync_percent is not None:
4433 if mstat.estimated_time is not None:
4434 rem_time = ("%s remaining (estimated)" %
4435 utils.FormatSeconds(mstat.estimated_time))
4436 max_time = mstat.estimated_time
4438 rem_time = "no time estimate"
4439 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4440 (disks[i].iv_name, mstat.sync_percent, rem_time))
4442 # if we're done but degraded, let's do a few small retries, to
4443 # make sure we see a stable and not transient situation; therefore
4444 # we force restart of the loop
4445 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4446 logging.info("Degraded disks found, %d retries left", degr_retries)
4454 time.sleep(min(60, max_time))
4457 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4458 return not cumul_degraded
4461 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4462 """Check that mirrors are not degraded.
4464 The ldisk parameter, if True, will change the test from the
4465 is_degraded attribute (which represents overall non-ok status for
4466 the device(s)) to the ldisk (representing the local storage status).
4469 lu.cfg.SetDiskID(dev, node)
4473 if on_primary or dev.AssembleOnSecondary():
4474 rstats = lu.rpc.call_blockdev_find(node, dev)
4475 msg = rstats.fail_msg
4477 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4479 elif not rstats.payload:
4480 lu.LogWarning("Can't find disk on node %s", node)
4484 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4486 result = result and not rstats.payload.is_degraded
4489 for child in dev.children:
4490 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4495 class LUOobCommand(NoHooksLU):
4496 """Logical unit for OOB handling.
4500 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4502 def ExpandNames(self):
4503 """Gather locks we need.
4506 if self.op.node_names:
4507 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4508 lock_names = self.op.node_names
4510 lock_names = locking.ALL_SET
4512 self.needed_locks = {
4513 locking.LEVEL_NODE: lock_names,
4516 def CheckPrereq(self):
4517 """Check prerequisites.
4520 - the node exists in the configuration
4523 Any errors are signaled by raising errors.OpPrereqError.
4527 self.master_node = self.cfg.GetMasterNode()
4529 assert self.op.power_delay >= 0.0
4531 if self.op.node_names:
4532 if (self.op.command in self._SKIP_MASTER and
4533 self.master_node in self.op.node_names):
4534 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4535 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4537 if master_oob_handler:
4538 additional_text = ("run '%s %s %s' if you want to operate on the"
4539 " master regardless") % (master_oob_handler,
4543 additional_text = "it does not support out-of-band operations"
4545 raise errors.OpPrereqError(("Operating on the master node %s is not"
4546 " allowed for %s; %s") %
4547 (self.master_node, self.op.command,
4548 additional_text), errors.ECODE_INVAL)
4550 self.op.node_names = self.cfg.GetNodeList()
4551 if self.op.command in self._SKIP_MASTER:
4552 self.op.node_names.remove(self.master_node)
4554 if self.op.command in self._SKIP_MASTER:
4555 assert self.master_node not in self.op.node_names
4557 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4559 raise errors.OpPrereqError("Node %s not found" % node_name,
4562 self.nodes.append(node)
4564 if (not self.op.ignore_status and
4565 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4566 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4567 " not marked offline") % node_name,
4570 def Exec(self, feedback_fn):
4571 """Execute OOB and return result if we expect any.
4574 master_node = self.master_node
4577 for idx, node in enumerate(utils.NiceSort(self.nodes,
4578 key=lambda node: node.name)):
4579 node_entry = [(constants.RS_NORMAL, node.name)]
4580 ret.append(node_entry)
4582 oob_program = _SupportsOob(self.cfg, node)
4585 node_entry.append((constants.RS_UNAVAIL, None))
4588 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4589 self.op.command, oob_program, node.name)
4590 result = self.rpc.call_run_oob(master_node, oob_program,
4591 self.op.command, node.name,
4595 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4596 node.name, result.fail_msg)
4597 node_entry.append((constants.RS_NODATA, None))
4600 self._CheckPayload(result)
4601 except errors.OpExecError, err:
4602 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4604 node_entry.append((constants.RS_NODATA, None))
4606 if self.op.command == constants.OOB_HEALTH:
4607 # For health we should log important events
4608 for item, status in result.payload:
4609 if status in [constants.OOB_STATUS_WARNING,
4610 constants.OOB_STATUS_CRITICAL]:
4611 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4612 item, node.name, status)
4614 if self.op.command == constants.OOB_POWER_ON:
4616 elif self.op.command == constants.OOB_POWER_OFF:
4617 node.powered = False
4618 elif self.op.command == constants.OOB_POWER_STATUS:
4619 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4620 if powered != node.powered:
4621 logging.warning(("Recorded power state (%s) of node '%s' does not"
4622 " match actual power state (%s)"), node.powered,
4625 # For configuration changing commands we should update the node
4626 if self.op.command in (constants.OOB_POWER_ON,
4627 constants.OOB_POWER_OFF):
4628 self.cfg.Update(node, feedback_fn)
4630 node_entry.append((constants.RS_NORMAL, result.payload))
4632 if (self.op.command == constants.OOB_POWER_ON and
4633 idx < len(self.nodes) - 1):
4634 time.sleep(self.op.power_delay)
4638 def _CheckPayload(self, result):
4639 """Checks if the payload is valid.
4641 @param result: RPC result
4642 @raises errors.OpExecError: If payload is not valid
4646 if self.op.command == constants.OOB_HEALTH:
4647 if not isinstance(result.payload, list):
4648 errs.append("command 'health' is expected to return a list but got %s" %
4649 type(result.payload))
4651 for item, status in result.payload:
4652 if status not in constants.OOB_STATUSES:
4653 errs.append("health item '%s' has invalid status '%s'" %
4656 if self.op.command == constants.OOB_POWER_STATUS:
4657 if not isinstance(result.payload, dict):
4658 errs.append("power-status is expected to return a dict but got %s" %
4659 type(result.payload))
4661 if self.op.command in [
4662 constants.OOB_POWER_ON,
4663 constants.OOB_POWER_OFF,
4664 constants.OOB_POWER_CYCLE,
4666 if result.payload is not None:
4667 errs.append("%s is expected to not return payload but got '%s'" %
4668 (self.op.command, result.payload))
4671 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4672 utils.CommaJoin(errs))
4675 class _OsQuery(_QueryBase):
4676 FIELDS = query.OS_FIELDS
4678 def ExpandNames(self, lu):
4679 # Lock all nodes in shared mode
4680 # Temporary removal of locks, should be reverted later
4681 # TODO: reintroduce locks when they are lighter-weight
4682 lu.needed_locks = {}
4683 #self.share_locks[locking.LEVEL_NODE] = 1
4684 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4686 # The following variables interact with _QueryBase._GetNames
4688 self.wanted = self.names
4690 self.wanted = locking.ALL_SET
4692 self.do_locking = self.use_locking
4694 def DeclareLocks(self, lu, level):
4698 def _DiagnoseByOS(rlist):
4699 """Remaps a per-node return list into an a per-os per-node dictionary
4701 @param rlist: a map with node names as keys and OS objects as values
4704 @return: a dictionary with osnames as keys and as value another
4705 map, with nodes as keys and tuples of (path, status, diagnose,
4706 variants, parameters, api_versions) as values, eg::
4708 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4709 (/srv/..., False, "invalid api")],
4710 "node2": [(/srv/..., True, "", [], [])]}
4715 # we build here the list of nodes that didn't fail the RPC (at RPC
4716 # level), so that nodes with a non-responding node daemon don't
4717 # make all OSes invalid
4718 good_nodes = [node_name for node_name in rlist
4719 if not rlist[node_name].fail_msg]
4720 for node_name, nr in rlist.items():
4721 if nr.fail_msg or not nr.payload:
4723 for (name, path, status, diagnose, variants,
4724 params, api_versions) in nr.payload:
4725 if name not in all_os:
4726 # build a list of nodes for this os containing empty lists
4727 # for each node in node_list
4729 for nname in good_nodes:
4730 all_os[name][nname] = []
4731 # convert params from [name, help] to (name, help)
4732 params = [tuple(v) for v in params]
4733 all_os[name][node_name].append((path, status, diagnose,
4734 variants, params, api_versions))
4737 def _GetQueryData(self, lu):
4738 """Computes the list of nodes and their attributes.
4741 # Locking is not used
4742 assert not (compat.any(lu.glm.is_owned(level)
4743 for level in locking.LEVELS
4744 if level != locking.LEVEL_CLUSTER) or
4745 self.do_locking or self.use_locking)
4747 valid_nodes = [node.name
4748 for node in lu.cfg.GetAllNodesInfo().values()
4749 if not node.offline and node.vm_capable]
4750 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4751 cluster = lu.cfg.GetClusterInfo()
4755 for (os_name, os_data) in pol.items():
4756 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4757 hidden=(os_name in cluster.hidden_os),
4758 blacklisted=(os_name in cluster.blacklisted_os))
4762 api_versions = set()
4764 for idx, osl in enumerate(os_data.values()):
4765 info.valid = bool(info.valid and osl and osl[0][1])
4769 (node_variants, node_params, node_api) = osl[0][3:6]
4772 variants.update(node_variants)
4773 parameters.update(node_params)
4774 api_versions.update(node_api)
4776 # Filter out inconsistent values
4777 variants.intersection_update(node_variants)
4778 parameters.intersection_update(node_params)
4779 api_versions.intersection_update(node_api)
4781 info.variants = list(variants)
4782 info.parameters = list(parameters)
4783 info.api_versions = list(api_versions)
4785 data[os_name] = info
4787 # Prepare data in requested order
4788 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4792 class LUOsDiagnose(NoHooksLU):
4793 """Logical unit for OS diagnose/query.
4799 def _BuildFilter(fields, names):
4800 """Builds a filter for querying OSes.
4803 name_filter = qlang.MakeSimpleFilter("name", names)
4805 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4806 # respective field is not requested
4807 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4808 for fname in ["hidden", "blacklisted"]
4809 if fname not in fields]
4810 if "valid" not in fields:
4811 status_filter.append([qlang.OP_TRUE, "valid"])
4814 status_filter.insert(0, qlang.OP_AND)
4816 status_filter = None
4818 if name_filter and status_filter:
4819 return [qlang.OP_AND, name_filter, status_filter]
4823 return status_filter
4825 def CheckArguments(self):
4826 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4827 self.op.output_fields, False)
4829 def ExpandNames(self):
4830 self.oq.ExpandNames(self)
4832 def Exec(self, feedback_fn):
4833 return self.oq.OldStyleQuery(self)
4836 class LUNodeRemove(LogicalUnit):
4837 """Logical unit for removing a node.
4840 HPATH = "node-remove"
4841 HTYPE = constants.HTYPE_NODE
4843 def BuildHooksEnv(self):
4846 This doesn't run on the target node in the pre phase as a failed
4847 node would then be impossible to remove.
4851 "OP_TARGET": self.op.node_name,
4852 "NODE_NAME": self.op.node_name,
4855 def BuildHooksNodes(self):
4856 """Build hooks nodes.
4859 all_nodes = self.cfg.GetNodeList()
4861 all_nodes.remove(self.op.node_name)
4863 logging.warning("Node '%s', which is about to be removed, was not found"
4864 " in the list of all nodes", self.op.node_name)
4865 return (all_nodes, all_nodes)
4867 def CheckPrereq(self):
4868 """Check prerequisites.
4871 - the node exists in the configuration
4872 - it does not have primary or secondary instances
4873 - it's not the master
4875 Any errors are signaled by raising errors.OpPrereqError.
4878 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4879 node = self.cfg.GetNodeInfo(self.op.node_name)
4880 assert node is not None
4882 masternode = self.cfg.GetMasterNode()
4883 if node.name == masternode:
4884 raise errors.OpPrereqError("Node is the master node, failover to another"
4885 " node is required", errors.ECODE_INVAL)
4887 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4888 if node.name in instance.all_nodes:
4889 raise errors.OpPrereqError("Instance %s is still running on the node,"
4890 " please remove first" % instance_name,
4892 self.op.node_name = node.name
4895 def Exec(self, feedback_fn):
4896 """Removes the node from the cluster.
4900 logging.info("Stopping the node daemon and removing configs from node %s",
4903 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4905 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4908 # Promote nodes to master candidate as needed
4909 _AdjustCandidatePool(self, exceptions=[node.name])
4910 self.context.RemoveNode(node.name)
4912 # Run post hooks on the node before it's removed
4913 _RunPostHook(self, node.name)
4915 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4916 msg = result.fail_msg
4918 self.LogWarning("Errors encountered on the remote node while leaving"
4919 " the cluster: %s", msg)
4921 # Remove node from our /etc/hosts
4922 if self.cfg.GetClusterInfo().modify_etc_hosts:
4923 master_node = self.cfg.GetMasterNode()
4924 result = self.rpc.call_etc_hosts_modify(master_node,
4925 constants.ETC_HOSTS_REMOVE,
4927 result.Raise("Can't update hosts file with new host data")
4928 _RedistributeAncillaryFiles(self)
4931 class _NodeQuery(_QueryBase):
4932 FIELDS = query.NODE_FIELDS
4934 def ExpandNames(self, lu):
4935 lu.needed_locks = {}
4936 lu.share_locks = _ShareAll()
4939 self.wanted = _GetWantedNodes(lu, self.names)
4941 self.wanted = locking.ALL_SET
4943 self.do_locking = (self.use_locking and
4944 query.NQ_LIVE in self.requested_data)
4947 # If any non-static field is requested we need to lock the nodes
4948 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4950 def DeclareLocks(self, lu, level):
4953 def _GetQueryData(self, lu):
4954 """Computes the list of nodes and their attributes.
4957 all_info = lu.cfg.GetAllNodesInfo()
4959 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4961 # Gather data as requested
4962 if query.NQ_LIVE in self.requested_data:
4963 # filter out non-vm_capable nodes
4964 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4966 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4967 [lu.cfg.GetHypervisorType()])
4968 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4969 for (name, nresult) in node_data.items()
4970 if not nresult.fail_msg and nresult.payload)
4974 if query.NQ_INST in self.requested_data:
4975 node_to_primary = dict([(name, set()) for name in nodenames])
4976 node_to_secondary = dict([(name, set()) for name in nodenames])
4978 inst_data = lu.cfg.GetAllInstancesInfo()
4980 for inst in inst_data.values():
4981 if inst.primary_node in node_to_primary:
4982 node_to_primary[inst.primary_node].add(inst.name)
4983 for secnode in inst.secondary_nodes:
4984 if secnode in node_to_secondary:
4985 node_to_secondary[secnode].add(inst.name)
4987 node_to_primary = None
4988 node_to_secondary = None
4990 if query.NQ_OOB in self.requested_data:
4991 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4992 for name, node in all_info.iteritems())
4996 if query.NQ_GROUP in self.requested_data:
4997 groups = lu.cfg.GetAllNodeGroupsInfo()
5001 return query.NodeQueryData([all_info[name] for name in nodenames],
5002 live_data, lu.cfg.GetMasterNode(),
5003 node_to_primary, node_to_secondary, groups,
5004 oob_support, lu.cfg.GetClusterInfo())
5007 class LUNodeQuery(NoHooksLU):
5008 """Logical unit for querying nodes.
5011 # pylint: disable=W0142
5014 def CheckArguments(self):
5015 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5016 self.op.output_fields, self.op.use_locking)
5018 def ExpandNames(self):
5019 self.nq.ExpandNames(self)
5021 def DeclareLocks(self, level):
5022 self.nq.DeclareLocks(self, level)
5024 def Exec(self, feedback_fn):
5025 return self.nq.OldStyleQuery(self)
5028 class LUNodeQueryvols(NoHooksLU):
5029 """Logical unit for getting volumes on node(s).
5033 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5034 _FIELDS_STATIC = utils.FieldSet("node")
5036 def CheckArguments(self):
5037 _CheckOutputFields(static=self._FIELDS_STATIC,
5038 dynamic=self._FIELDS_DYNAMIC,
5039 selected=self.op.output_fields)
5041 def ExpandNames(self):
5042 self.share_locks = _ShareAll()
5043 self.needed_locks = {}
5045 if not self.op.nodes:
5046 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5048 self.needed_locks[locking.LEVEL_NODE] = \
5049 _GetWantedNodes(self, self.op.nodes)
5051 def Exec(self, feedback_fn):
5052 """Computes the list of nodes and their attributes.
5055 nodenames = self.owned_locks(locking.LEVEL_NODE)
5056 volumes = self.rpc.call_node_volumes(nodenames)
5058 ilist = self.cfg.GetAllInstancesInfo()
5059 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5062 for node in nodenames:
5063 nresult = volumes[node]
5066 msg = nresult.fail_msg
5068 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5071 node_vols = sorted(nresult.payload,
5072 key=operator.itemgetter("dev"))
5074 for vol in node_vols:
5076 for field in self.op.output_fields:
5079 elif field == "phys":
5083 elif field == "name":
5085 elif field == "size":
5086 val = int(float(vol["size"]))
5087 elif field == "instance":
5088 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5090 raise errors.ParameterError(field)
5091 node_output.append(str(val))
5093 output.append(node_output)
5098 class LUNodeQueryStorage(NoHooksLU):
5099 """Logical unit for getting information on storage units on node(s).
5102 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5105 def CheckArguments(self):
5106 _CheckOutputFields(static=self._FIELDS_STATIC,
5107 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5108 selected=self.op.output_fields)
5110 def ExpandNames(self):
5111 self.share_locks = _ShareAll()
5112 self.needed_locks = {}
5115 self.needed_locks[locking.LEVEL_NODE] = \
5116 _GetWantedNodes(self, self.op.nodes)
5118 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5120 def Exec(self, feedback_fn):
5121 """Computes the list of nodes and their attributes.
5124 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5126 # Always get name to sort by
5127 if constants.SF_NAME in self.op.output_fields:
5128 fields = self.op.output_fields[:]
5130 fields = [constants.SF_NAME] + self.op.output_fields
5132 # Never ask for node or type as it's only known to the LU
5133 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5134 while extra in fields:
5135 fields.remove(extra)
5137 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5138 name_idx = field_idx[constants.SF_NAME]
5140 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5141 data = self.rpc.call_storage_list(self.nodes,
5142 self.op.storage_type, st_args,
5143 self.op.name, fields)
5147 for node in utils.NiceSort(self.nodes):
5148 nresult = data[node]
5152 msg = nresult.fail_msg
5154 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5157 rows = dict([(row[name_idx], row) for row in nresult.payload])
5159 for name in utils.NiceSort(rows.keys()):
5164 for field in self.op.output_fields:
5165 if field == constants.SF_NODE:
5167 elif field == constants.SF_TYPE:
5168 val = self.op.storage_type
5169 elif field in field_idx:
5170 val = row[field_idx[field]]
5172 raise errors.ParameterError(field)
5181 class _InstanceQuery(_QueryBase):
5182 FIELDS = query.INSTANCE_FIELDS
5184 def ExpandNames(self, lu):
5185 lu.needed_locks = {}
5186 lu.share_locks = _ShareAll()
5189 self.wanted = _GetWantedInstances(lu, self.names)
5191 self.wanted = locking.ALL_SET
5193 self.do_locking = (self.use_locking and
5194 query.IQ_LIVE in self.requested_data)
5196 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5197 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5198 lu.needed_locks[locking.LEVEL_NODE] = []
5199 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5201 self.do_grouplocks = (self.do_locking and
5202 query.IQ_NODES in self.requested_data)
5204 def DeclareLocks(self, lu, level):
5206 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5207 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5209 # Lock all groups used by instances optimistically; this requires going
5210 # via the node before it's locked, requiring verification later on
5211 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5213 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5214 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5215 elif level == locking.LEVEL_NODE:
5216 lu._LockInstancesNodes() # pylint: disable=W0212
5219 def _CheckGroupLocks(lu):
5220 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5221 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5223 # Check if node groups for locked instances are still correct
5224 for instance_name in owned_instances:
5225 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5227 def _GetQueryData(self, lu):
5228 """Computes the list of instances and their attributes.
5231 if self.do_grouplocks:
5232 self._CheckGroupLocks(lu)
5234 cluster = lu.cfg.GetClusterInfo()
5235 all_info = lu.cfg.GetAllInstancesInfo()
5237 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5239 instance_list = [all_info[name] for name in instance_names]
5240 nodes = frozenset(itertools.chain(*(inst.all_nodes
5241 for inst in instance_list)))
5242 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5245 wrongnode_inst = set()
5247 # Gather data as requested
5248 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5250 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5252 result = node_data[name]
5254 # offline nodes will be in both lists
5255 assert result.fail_msg
5256 offline_nodes.append(name)
5258 bad_nodes.append(name)
5259 elif result.payload:
5260 for inst in result.payload:
5261 if inst in all_info:
5262 if all_info[inst].primary_node == name:
5263 live_data.update(result.payload)
5265 wrongnode_inst.add(inst)
5267 # orphan instance; we don't list it here as we don't
5268 # handle this case yet in the output of instance listing
5269 logging.warning("Orphan instance '%s' found on node %s",
5271 # else no instance is alive
5275 if query.IQ_DISKUSAGE in self.requested_data:
5276 disk_usage = dict((inst.name,
5277 _ComputeDiskSize(inst.disk_template,
5278 [{constants.IDISK_SIZE: disk.size}
5279 for disk in inst.disks]))
5280 for inst in instance_list)
5284 if query.IQ_CONSOLE in self.requested_data:
5286 for inst in instance_list:
5287 if inst.name in live_data:
5288 # Instance is running
5289 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5291 consinfo[inst.name] = None
5292 assert set(consinfo.keys()) == set(instance_names)
5296 if query.IQ_NODES in self.requested_data:
5297 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5299 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5300 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5301 for uuid in set(map(operator.attrgetter("group"),
5307 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5308 disk_usage, offline_nodes, bad_nodes,
5309 live_data, wrongnode_inst, consinfo,
5313 class LUQuery(NoHooksLU):
5314 """Query for resources/items of a certain kind.
5317 # pylint: disable=W0142
5320 def CheckArguments(self):
5321 qcls = _GetQueryImplementation(self.op.what)
5323 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5325 def ExpandNames(self):
5326 self.impl.ExpandNames(self)
5328 def DeclareLocks(self, level):
5329 self.impl.DeclareLocks(self, level)
5331 def Exec(self, feedback_fn):
5332 return self.impl.NewStyleQuery(self)
5335 class LUQueryFields(NoHooksLU):
5336 """Query for resources/items of a certain kind.
5339 # pylint: disable=W0142
5342 def CheckArguments(self):
5343 self.qcls = _GetQueryImplementation(self.op.what)
5345 def ExpandNames(self):
5346 self.needed_locks = {}
5348 def Exec(self, feedback_fn):
5349 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5352 class LUNodeModifyStorage(NoHooksLU):
5353 """Logical unit for modifying a storage volume on a node.
5358 def CheckArguments(self):
5359 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5361 storage_type = self.op.storage_type
5364 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5366 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5367 " modified" % storage_type,
5370 diff = set(self.op.changes.keys()) - modifiable
5372 raise errors.OpPrereqError("The following fields can not be modified for"
5373 " storage units of type '%s': %r" %
5374 (storage_type, list(diff)),
5377 def ExpandNames(self):
5378 self.needed_locks = {
5379 locking.LEVEL_NODE: self.op.node_name,
5382 def Exec(self, feedback_fn):
5383 """Computes the list of nodes and their attributes.
5386 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5387 result = self.rpc.call_storage_modify(self.op.node_name,
5388 self.op.storage_type, st_args,
5389 self.op.name, self.op.changes)
5390 result.Raise("Failed to modify storage unit '%s' on %s" %
5391 (self.op.name, self.op.node_name))
5394 class LUNodeAdd(LogicalUnit):
5395 """Logical unit for adding node to the cluster.
5399 HTYPE = constants.HTYPE_NODE
5400 _NFLAGS = ["master_capable", "vm_capable"]
5402 def CheckArguments(self):
5403 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5404 # validate/normalize the node name
5405 self.hostname = netutils.GetHostname(name=self.op.node_name,
5406 family=self.primary_ip_family)
5407 self.op.node_name = self.hostname.name
5409 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5410 raise errors.OpPrereqError("Cannot readd the master node",
5413 if self.op.readd and self.op.group:
5414 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5415 " being readded", errors.ECODE_INVAL)
5417 def BuildHooksEnv(self):
5420 This will run on all nodes before, and on all nodes + the new node after.
5424 "OP_TARGET": self.op.node_name,
5425 "NODE_NAME": self.op.node_name,
5426 "NODE_PIP": self.op.primary_ip,
5427 "NODE_SIP": self.op.secondary_ip,
5428 "MASTER_CAPABLE": str(self.op.master_capable),
5429 "VM_CAPABLE": str(self.op.vm_capable),
5432 def BuildHooksNodes(self):
5433 """Build hooks nodes.
5436 # Exclude added node
5437 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5438 post_nodes = pre_nodes + [self.op.node_name, ]
5440 return (pre_nodes, post_nodes)
5442 def CheckPrereq(self):
5443 """Check prerequisites.
5446 - the new node is not already in the config
5448 - its parameters (single/dual homed) matches the cluster
5450 Any errors are signaled by raising errors.OpPrereqError.
5454 hostname = self.hostname
5455 node = hostname.name
5456 primary_ip = self.op.primary_ip = hostname.ip
5457 if self.op.secondary_ip is None:
5458 if self.primary_ip_family == netutils.IP6Address.family:
5459 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5460 " IPv4 address must be given as secondary",
5462 self.op.secondary_ip = primary_ip
5464 secondary_ip = self.op.secondary_ip
5465 if not netutils.IP4Address.IsValid(secondary_ip):
5466 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5467 " address" % secondary_ip, errors.ECODE_INVAL)
5469 node_list = cfg.GetNodeList()
5470 if not self.op.readd and node in node_list:
5471 raise errors.OpPrereqError("Node %s is already in the configuration" %
5472 node, errors.ECODE_EXISTS)
5473 elif self.op.readd and node not in node_list:
5474 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5477 self.changed_primary_ip = False
5479 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5480 if self.op.readd and node == existing_node_name:
5481 if existing_node.secondary_ip != secondary_ip:
5482 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5483 " address configuration as before",
5485 if existing_node.primary_ip != primary_ip:
5486 self.changed_primary_ip = True
5490 if (existing_node.primary_ip == primary_ip or
5491 existing_node.secondary_ip == primary_ip or
5492 existing_node.primary_ip == secondary_ip or
5493 existing_node.secondary_ip == secondary_ip):
5494 raise errors.OpPrereqError("New node ip address(es) conflict with"
5495 " existing node %s" % existing_node.name,
5496 errors.ECODE_NOTUNIQUE)
5498 # After this 'if' block, None is no longer a valid value for the
5499 # _capable op attributes
5501 old_node = self.cfg.GetNodeInfo(node)
5502 assert old_node is not None, "Can't retrieve locked node %s" % node
5503 for attr in self._NFLAGS:
5504 if getattr(self.op, attr) is None:
5505 setattr(self.op, attr, getattr(old_node, attr))
5507 for attr in self._NFLAGS:
5508 if getattr(self.op, attr) is None:
5509 setattr(self.op, attr, True)
5511 if self.op.readd and not self.op.vm_capable:
5512 pri, sec = cfg.GetNodeInstances(node)
5514 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5515 " flag set to false, but it already holds"
5516 " instances" % node,
5519 # check that the type of the node (single versus dual homed) is the
5520 # same as for the master
5521 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5522 master_singlehomed = myself.secondary_ip == myself.primary_ip
5523 newbie_singlehomed = secondary_ip == primary_ip
5524 if master_singlehomed != newbie_singlehomed:
5525 if master_singlehomed:
5526 raise errors.OpPrereqError("The master has no secondary ip but the"
5527 " new node has one",
5530 raise errors.OpPrereqError("The master has a secondary ip but the"
5531 " new node doesn't have one",
5534 # checks reachability
5535 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5536 raise errors.OpPrereqError("Node not reachable by ping",
5537 errors.ECODE_ENVIRON)
5539 if not newbie_singlehomed:
5540 # check reachability from my secondary ip to newbie's secondary ip
5541 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5542 source=myself.secondary_ip):
5543 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5544 " based ping to node daemon port",
5545 errors.ECODE_ENVIRON)
5552 if self.op.master_capable:
5553 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5555 self.master_candidate = False
5558 self.new_node = old_node
5560 node_group = cfg.LookupNodeGroup(self.op.group)
5561 self.new_node = objects.Node(name=node,
5562 primary_ip=primary_ip,
5563 secondary_ip=secondary_ip,
5564 master_candidate=self.master_candidate,
5565 offline=False, drained=False,
5568 if self.op.ndparams:
5569 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5571 if self.op.hv_state:
5572 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5574 if self.op.disk_state:
5575 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5577 def Exec(self, feedback_fn):
5578 """Adds the new node to the cluster.
5581 new_node = self.new_node
5582 node = new_node.name
5584 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5587 # We adding a new node so we assume it's powered
5588 new_node.powered = True
5590 # for re-adds, reset the offline/drained/master-candidate flags;
5591 # we need to reset here, otherwise offline would prevent RPC calls
5592 # later in the procedure; this also means that if the re-add
5593 # fails, we are left with a non-offlined, broken node
5595 new_node.drained = new_node.offline = False # pylint: disable=W0201
5596 self.LogInfo("Readding a node, the offline/drained flags were reset")
5597 # if we demote the node, we do cleanup later in the procedure
5598 new_node.master_candidate = self.master_candidate
5599 if self.changed_primary_ip:
5600 new_node.primary_ip = self.op.primary_ip
5602 # copy the master/vm_capable flags
5603 for attr in self._NFLAGS:
5604 setattr(new_node, attr, getattr(self.op, attr))
5606 # notify the user about any possible mc promotion
5607 if new_node.master_candidate:
5608 self.LogInfo("Node will be a master candidate")
5610 if self.op.ndparams:
5611 new_node.ndparams = self.op.ndparams
5613 new_node.ndparams = {}
5615 if self.op.hv_state:
5616 new_node.hv_state_static = self.new_hv_state
5618 if self.op.disk_state:
5619 new_node.disk_state_static = self.new_disk_state
5621 # check connectivity
5622 result = self.rpc.call_version([node])[node]
5623 result.Raise("Can't get version information from node %s" % node)
5624 if constants.PROTOCOL_VERSION == result.payload:
5625 logging.info("Communication to node %s fine, sw version %s match",
5626 node, result.payload)
5628 raise errors.OpExecError("Version mismatch master version %s,"
5629 " node version %s" %
5630 (constants.PROTOCOL_VERSION, result.payload))
5632 # Add node to our /etc/hosts, and add key to known_hosts
5633 if self.cfg.GetClusterInfo().modify_etc_hosts:
5634 master_node = self.cfg.GetMasterNode()
5635 result = self.rpc.call_etc_hosts_modify(master_node,
5636 constants.ETC_HOSTS_ADD,
5639 result.Raise("Can't update hosts file with new host data")
5641 if new_node.secondary_ip != new_node.primary_ip:
5642 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5645 node_verify_list = [self.cfg.GetMasterNode()]
5646 node_verify_param = {
5647 constants.NV_NODELIST: ([node], {}),
5648 # TODO: do a node-net-test as well?
5651 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5652 self.cfg.GetClusterName())
5653 for verifier in node_verify_list:
5654 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5655 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5657 for failed in nl_payload:
5658 feedback_fn("ssh/hostname verification failed"
5659 " (checking from %s): %s" %
5660 (verifier, nl_payload[failed]))
5661 raise errors.OpExecError("ssh/hostname verification failed")
5664 _RedistributeAncillaryFiles(self)
5665 self.context.ReaddNode(new_node)
5666 # make sure we redistribute the config
5667 self.cfg.Update(new_node, feedback_fn)
5668 # and make sure the new node will not have old files around
5669 if not new_node.master_candidate:
5670 result = self.rpc.call_node_demote_from_mc(new_node.name)
5671 msg = result.fail_msg
5673 self.LogWarning("Node failed to demote itself from master"
5674 " candidate status: %s" % msg)
5676 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5677 additional_vm=self.op.vm_capable)
5678 self.context.AddNode(new_node, self.proc.GetECId())
5681 class LUNodeSetParams(LogicalUnit):
5682 """Modifies the parameters of a node.
5684 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5685 to the node role (as _ROLE_*)
5686 @cvar _R2F: a dictionary from node role to tuples of flags
5687 @cvar _FLAGS: a list of attribute names corresponding to the flags
5690 HPATH = "node-modify"
5691 HTYPE = constants.HTYPE_NODE
5693 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5695 (True, False, False): _ROLE_CANDIDATE,
5696 (False, True, False): _ROLE_DRAINED,
5697 (False, False, True): _ROLE_OFFLINE,
5698 (False, False, False): _ROLE_REGULAR,
5700 _R2F = dict((v, k) for k, v in _F2R.items())
5701 _FLAGS = ["master_candidate", "drained", "offline"]
5703 def CheckArguments(self):
5704 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5705 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5706 self.op.master_capable, self.op.vm_capable,
5707 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5709 if all_mods.count(None) == len(all_mods):
5710 raise errors.OpPrereqError("Please pass at least one modification",
5712 if all_mods.count(True) > 1:
5713 raise errors.OpPrereqError("Can't set the node into more than one"
5714 " state at the same time",
5717 # Boolean value that tells us whether we might be demoting from MC
5718 self.might_demote = (self.op.master_candidate == False or
5719 self.op.offline == True or
5720 self.op.drained == True or
5721 self.op.master_capable == False)
5723 if self.op.secondary_ip:
5724 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5725 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5726 " address" % self.op.secondary_ip,
5729 self.lock_all = self.op.auto_promote and self.might_demote
5730 self.lock_instances = self.op.secondary_ip is not None
5732 def _InstanceFilter(self, instance):
5733 """Filter for getting affected instances.
5736 return (instance.disk_template in constants.DTS_INT_MIRROR and
5737 self.op.node_name in instance.all_nodes)
5739 def ExpandNames(self):
5741 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5743 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5745 # Since modifying a node can have severe effects on currently running
5746 # operations the resource lock is at least acquired in shared mode
5747 self.needed_locks[locking.LEVEL_NODE_RES] = \
5748 self.needed_locks[locking.LEVEL_NODE]
5750 # Get node resource and instance locks in shared mode; they are not used
5751 # for anything but read-only access
5752 self.share_locks[locking.LEVEL_NODE_RES] = 1
5753 self.share_locks[locking.LEVEL_INSTANCE] = 1
5755 if self.lock_instances:
5756 self.needed_locks[locking.LEVEL_INSTANCE] = \
5757 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5759 def BuildHooksEnv(self):
5762 This runs on the master node.
5766 "OP_TARGET": self.op.node_name,
5767 "MASTER_CANDIDATE": str(self.op.master_candidate),
5768 "OFFLINE": str(self.op.offline),
5769 "DRAINED": str(self.op.drained),
5770 "MASTER_CAPABLE": str(self.op.master_capable),
5771 "VM_CAPABLE": str(self.op.vm_capable),
5774 def BuildHooksNodes(self):
5775 """Build hooks nodes.
5778 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5781 def CheckPrereq(self):
5782 """Check prerequisites.
5784 This only checks the instance list against the existing names.
5787 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5789 if self.lock_instances:
5790 affected_instances = \
5791 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5793 # Verify instance locks
5794 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5795 wanted_instances = frozenset(affected_instances.keys())
5796 if wanted_instances - owned_instances:
5797 raise errors.OpPrereqError("Instances affected by changing node %s's"
5798 " secondary IP address have changed since"
5799 " locks were acquired, wanted '%s', have"
5800 " '%s'; retry the operation" %
5802 utils.CommaJoin(wanted_instances),
5803 utils.CommaJoin(owned_instances)),
5806 affected_instances = None
5808 if (self.op.master_candidate is not None or
5809 self.op.drained is not None or
5810 self.op.offline is not None):
5811 # we can't change the master's node flags
5812 if self.op.node_name == self.cfg.GetMasterNode():
5813 raise errors.OpPrereqError("The master role can be changed"
5814 " only via master-failover",
5817 if self.op.master_candidate and not node.master_capable:
5818 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5819 " it a master candidate" % node.name,
5822 if self.op.vm_capable == False:
5823 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5825 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5826 " the vm_capable flag" % node.name,
5829 if node.master_candidate and self.might_demote and not self.lock_all:
5830 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5831 # check if after removing the current node, we're missing master
5833 (mc_remaining, mc_should, _) = \
5834 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5835 if mc_remaining < mc_should:
5836 raise errors.OpPrereqError("Not enough master candidates, please"
5837 " pass auto promote option to allow"
5838 " promotion", errors.ECODE_STATE)
5840 self.old_flags = old_flags = (node.master_candidate,
5841 node.drained, node.offline)
5842 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5843 self.old_role = old_role = self._F2R[old_flags]
5845 # Check for ineffective changes
5846 for attr in self._FLAGS:
5847 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5848 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5849 setattr(self.op, attr, None)
5851 # Past this point, any flag change to False means a transition
5852 # away from the respective state, as only real changes are kept
5854 # TODO: We might query the real power state if it supports OOB
5855 if _SupportsOob(self.cfg, node):
5856 if self.op.offline is False and not (node.powered or
5857 self.op.powered == True):
5858 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5859 " offline status can be reset") %
5861 elif self.op.powered is not None:
5862 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5863 " as it does not support out-of-band"
5864 " handling") % self.op.node_name)
5866 # If we're being deofflined/drained, we'll MC ourself if needed
5867 if (self.op.drained == False or self.op.offline == False or
5868 (self.op.master_capable and not node.master_capable)):
5869 if _DecideSelfPromotion(self):
5870 self.op.master_candidate = True
5871 self.LogInfo("Auto-promoting node to master candidate")
5873 # If we're no longer master capable, we'll demote ourselves from MC
5874 if self.op.master_capable == False and node.master_candidate:
5875 self.LogInfo("Demoting from master candidate")
5876 self.op.master_candidate = False
5879 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5880 if self.op.master_candidate:
5881 new_role = self._ROLE_CANDIDATE
5882 elif self.op.drained:
5883 new_role = self._ROLE_DRAINED
5884 elif self.op.offline:
5885 new_role = self._ROLE_OFFLINE
5886 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5887 # False is still in new flags, which means we're un-setting (the
5889 new_role = self._ROLE_REGULAR
5890 else: # no new flags, nothing, keep old role
5893 self.new_role = new_role
5895 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5896 # Trying to transition out of offline status
5897 # TODO: Use standard RPC runner, but make sure it works when the node is
5898 # still marked offline
5899 result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5901 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5902 " to report its version: %s" %
5903 (node.name, result.fail_msg),
5906 self.LogWarning("Transitioning node from offline to online state"
5907 " without using re-add. Please make sure the node"
5910 if self.op.secondary_ip:
5911 # Ok even without locking, because this can't be changed by any LU
5912 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5913 master_singlehomed = master.secondary_ip == master.primary_ip
5914 if master_singlehomed and self.op.secondary_ip:
5915 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5916 " homed cluster", errors.ECODE_INVAL)
5918 assert not (frozenset(affected_instances) -
5919 self.owned_locks(locking.LEVEL_INSTANCE))
5922 if affected_instances:
5923 raise errors.OpPrereqError("Cannot change secondary IP address:"
5924 " offline node has instances (%s)"
5925 " configured to use it" %
5926 utils.CommaJoin(affected_instances.keys()))
5928 # On online nodes, check that no instances are running, and that
5929 # the node has the new ip and we can reach it.
5930 for instance in affected_instances.values():
5931 _CheckInstanceState(self, instance, INSTANCE_DOWN,
5932 msg="cannot change secondary ip")
5934 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5935 if master.name != node.name:
5936 # check reachability from master secondary ip to new secondary ip
5937 if not netutils.TcpPing(self.op.secondary_ip,
5938 constants.DEFAULT_NODED_PORT,
5939 source=master.secondary_ip):
5940 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5941 " based ping to node daemon port",
5942 errors.ECODE_ENVIRON)
5944 if self.op.ndparams:
5945 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5946 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5947 self.new_ndparams = new_ndparams
5949 if self.op.hv_state:
5950 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5951 self.node.hv_state_static)
5953 if self.op.disk_state:
5954 self.new_disk_state = \
5955 _MergeAndVerifyDiskState(self.op.disk_state,
5956 self.node.disk_state_static)
5958 def Exec(self, feedback_fn):
5963 old_role = self.old_role
5964 new_role = self.new_role
5968 if self.op.ndparams:
5969 node.ndparams = self.new_ndparams
5971 if self.op.powered is not None:
5972 node.powered = self.op.powered
5974 if self.op.hv_state:
5975 node.hv_state_static = self.new_hv_state
5977 if self.op.disk_state:
5978 node.disk_state_static = self.new_disk_state
5980 for attr in ["master_capable", "vm_capable"]:
5981 val = getattr(self.op, attr)
5983 setattr(node, attr, val)
5984 result.append((attr, str(val)))
5986 if new_role != old_role:
5987 # Tell the node to demote itself, if no longer MC and not offline
5988 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5989 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5991 self.LogWarning("Node failed to demote itself: %s", msg)
5993 new_flags = self._R2F[new_role]
5994 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5996 result.append((desc, str(nf)))
5997 (node.master_candidate, node.drained, node.offline) = new_flags
5999 # we locked all nodes, we adjust the CP before updating this node
6001 _AdjustCandidatePool(self, [node.name])
6003 if self.op.secondary_ip:
6004 node.secondary_ip = self.op.secondary_ip
6005 result.append(("secondary_ip", self.op.secondary_ip))
6007 # this will trigger configuration file update, if needed
6008 self.cfg.Update(node, feedback_fn)
6010 # this will trigger job queue propagation or cleanup if the mc
6012 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6013 self.context.ReaddNode(node)
6018 class LUNodePowercycle(NoHooksLU):
6019 """Powercycles a node.
6024 def CheckArguments(self):
6025 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6026 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6027 raise errors.OpPrereqError("The node is the master and the force"
6028 " parameter was not set",
6031 def ExpandNames(self):
6032 """Locking for PowercycleNode.
6034 This is a last-resort option and shouldn't block on other
6035 jobs. Therefore, we grab no locks.
6038 self.needed_locks = {}
6040 def Exec(self, feedback_fn):
6044 result = self.rpc.call_node_powercycle(self.op.node_name,
6045 self.cfg.GetHypervisorType())
6046 result.Raise("Failed to schedule the reboot")
6047 return result.payload
6050 class LUClusterQuery(NoHooksLU):
6051 """Query cluster configuration.
6056 def ExpandNames(self):
6057 self.needed_locks = {}
6059 def Exec(self, feedback_fn):
6060 """Return cluster config.
6063 cluster = self.cfg.GetClusterInfo()
6066 # Filter just for enabled hypervisors
6067 for os_name, hv_dict in cluster.os_hvp.items():
6068 os_hvp[os_name] = {}
6069 for hv_name, hv_params in hv_dict.items():
6070 if hv_name in cluster.enabled_hypervisors:
6071 os_hvp[os_name][hv_name] = hv_params
6073 # Convert ip_family to ip_version
6074 primary_ip_version = constants.IP4_VERSION
6075 if cluster.primary_ip_family == netutils.IP6Address.family:
6076 primary_ip_version = constants.IP6_VERSION
6079 "software_version": constants.RELEASE_VERSION,
6080 "protocol_version": constants.PROTOCOL_VERSION,
6081 "config_version": constants.CONFIG_VERSION,
6082 "os_api_version": max(constants.OS_API_VERSIONS),
6083 "export_version": constants.EXPORT_VERSION,
6084 "architecture": (platform.architecture()[0], platform.machine()),
6085 "name": cluster.cluster_name,
6086 "master": cluster.master_node,
6087 "default_hypervisor": cluster.primary_hypervisor,
6088 "enabled_hypervisors": cluster.enabled_hypervisors,
6089 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6090 for hypervisor_name in cluster.enabled_hypervisors]),
6092 "beparams": cluster.beparams,
6093 "osparams": cluster.osparams,
6094 "ipolicy": cluster.ipolicy,
6095 "nicparams": cluster.nicparams,
6096 "ndparams": cluster.ndparams,
6097 "candidate_pool_size": cluster.candidate_pool_size,
6098 "master_netdev": cluster.master_netdev,
6099 "master_netmask": cluster.master_netmask,
6100 "use_external_mip_script": cluster.use_external_mip_script,
6101 "volume_group_name": cluster.volume_group_name,
6102 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6103 "file_storage_dir": cluster.file_storage_dir,
6104 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6105 "maintain_node_health": cluster.maintain_node_health,
6106 "ctime": cluster.ctime,
6107 "mtime": cluster.mtime,
6108 "uuid": cluster.uuid,
6109 "tags": list(cluster.GetTags()),
6110 "uid_pool": cluster.uid_pool,
6111 "default_iallocator": cluster.default_iallocator,
6112 "reserved_lvs": cluster.reserved_lvs,
6113 "primary_ip_version": primary_ip_version,
6114 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6115 "hidden_os": cluster.hidden_os,
6116 "blacklisted_os": cluster.blacklisted_os,
6122 class LUClusterConfigQuery(NoHooksLU):
6123 """Return configuration values.
6127 _FIELDS_DYNAMIC = utils.FieldSet()
6128 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
6129 "watcher_pause", "volume_group_name")
6131 def CheckArguments(self):
6132 _CheckOutputFields(static=self._FIELDS_STATIC,
6133 dynamic=self._FIELDS_DYNAMIC,
6134 selected=self.op.output_fields)
6136 def ExpandNames(self):
6137 self.needed_locks = {}
6139 def Exec(self, feedback_fn):
6140 """Dump a representation of the cluster config to the standard output.
6144 for field in self.op.output_fields:
6145 if field == "cluster_name":
6146 entry = self.cfg.GetClusterName()
6147 elif field == "master_node":
6148 entry = self.cfg.GetMasterNode()
6149 elif field == "drain_flag":
6150 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6151 elif field == "watcher_pause":
6152 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6153 elif field == "volume_group_name":
6154 entry = self.cfg.GetVGName()
6156 raise errors.ParameterError(field)
6157 values.append(entry)
6161 class LUInstanceActivateDisks(NoHooksLU):
6162 """Bring up an instance's disks.
6167 def ExpandNames(self):
6168 self._ExpandAndLockInstance()
6169 self.needed_locks[locking.LEVEL_NODE] = []
6170 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6172 def DeclareLocks(self, level):
6173 if level == locking.LEVEL_NODE:
6174 self._LockInstancesNodes()
6176 def CheckPrereq(self):
6177 """Check prerequisites.
6179 This checks that the instance is in the cluster.
6182 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6183 assert self.instance is not None, \
6184 "Cannot retrieve locked instance %s" % self.op.instance_name
6185 _CheckNodeOnline(self, self.instance.primary_node)
6187 def Exec(self, feedback_fn):
6188 """Activate the disks.
6191 disks_ok, disks_info = \
6192 _AssembleInstanceDisks(self, self.instance,
6193 ignore_size=self.op.ignore_size)
6195 raise errors.OpExecError("Cannot activate block devices")
6200 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6202 """Prepare the block devices for an instance.
6204 This sets up the block devices on all nodes.
6206 @type lu: L{LogicalUnit}
6207 @param lu: the logical unit on whose behalf we execute
6208 @type instance: L{objects.Instance}
6209 @param instance: the instance for whose disks we assemble
6210 @type disks: list of L{objects.Disk} or None
6211 @param disks: which disks to assemble (or all, if None)
6212 @type ignore_secondaries: boolean
6213 @param ignore_secondaries: if true, errors on secondary nodes
6214 won't result in an error return from the function
6215 @type ignore_size: boolean
6216 @param ignore_size: if true, the current known size of the disk
6217 will not be used during the disk activation, useful for cases
6218 when the size is wrong
6219 @return: False if the operation failed, otherwise a list of
6220 (host, instance_visible_name, node_visible_name)
6221 with the mapping from node devices to instance devices
6226 iname = instance.name
6227 disks = _ExpandCheckDisks(instance, disks)
6229 # With the two passes mechanism we try to reduce the window of
6230 # opportunity for the race condition of switching DRBD to primary
6231 # before handshaking occured, but we do not eliminate it
6233 # The proper fix would be to wait (with some limits) until the
6234 # connection has been made and drbd transitions from WFConnection
6235 # into any other network-connected state (Connected, SyncTarget,
6238 # 1st pass, assemble on all nodes in secondary mode
6239 for idx, inst_disk in enumerate(disks):
6240 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6242 node_disk = node_disk.Copy()
6243 node_disk.UnsetSize()
6244 lu.cfg.SetDiskID(node_disk, node)
6245 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
6246 msg = result.fail_msg
6248 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6249 " (is_primary=False, pass=1): %s",
6250 inst_disk.iv_name, node, msg)
6251 if not ignore_secondaries:
6254 # FIXME: race condition on drbd migration to primary
6256 # 2nd pass, do only the primary node
6257 for idx, inst_disk in enumerate(disks):
6260 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6261 if node != instance.primary_node:
6264 node_disk = node_disk.Copy()
6265 node_disk.UnsetSize()
6266 lu.cfg.SetDiskID(node_disk, node)
6267 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6268 msg = result.fail_msg
6270 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6271 " (is_primary=True, pass=2): %s",
6272 inst_disk.iv_name, node, msg)
6275 dev_path = result.payload
6277 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6279 # leave the disks configured for the primary node
6280 # this is a workaround that would be fixed better by
6281 # improving the logical/physical id handling
6283 lu.cfg.SetDiskID(disk, instance.primary_node)
6285 return disks_ok, device_info
6288 def _StartInstanceDisks(lu, instance, force):
6289 """Start the disks of an instance.
6292 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6293 ignore_secondaries=force)
6295 _ShutdownInstanceDisks(lu, instance)
6296 if force is not None and not force:
6297 lu.proc.LogWarning("", hint="If the message above refers to a"
6299 " you can retry the operation using '--force'.")
6300 raise errors.OpExecError("Disk consistency error")
6303 class LUInstanceDeactivateDisks(NoHooksLU):
6304 """Shutdown an instance's disks.
6309 def ExpandNames(self):
6310 self._ExpandAndLockInstance()
6311 self.needed_locks[locking.LEVEL_NODE] = []
6312 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6314 def DeclareLocks(self, level):
6315 if level == locking.LEVEL_NODE:
6316 self._LockInstancesNodes()
6318 def CheckPrereq(self):
6319 """Check prerequisites.
6321 This checks that the instance is in the cluster.
6324 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6325 assert self.instance is not None, \
6326 "Cannot retrieve locked instance %s" % self.op.instance_name
6328 def Exec(self, feedback_fn):
6329 """Deactivate the disks
6332 instance = self.instance
6334 _ShutdownInstanceDisks(self, instance)
6336 _SafeShutdownInstanceDisks(self, instance)
6339 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6340 """Shutdown block devices of an instance.
6342 This function checks if an instance is running, before calling
6343 _ShutdownInstanceDisks.
6346 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6347 _ShutdownInstanceDisks(lu, instance, disks=disks)
6350 def _ExpandCheckDisks(instance, disks):
6351 """Return the instance disks selected by the disks list
6353 @type disks: list of L{objects.Disk} or None
6354 @param disks: selected disks
6355 @rtype: list of L{objects.Disk}
6356 @return: selected instance disks to act on
6360 return instance.disks
6362 if not set(disks).issubset(instance.disks):
6363 raise errors.ProgrammerError("Can only act on disks belonging to the"
6368 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6369 """Shutdown block devices of an instance.
6371 This does the shutdown on all nodes of the instance.
6373 If the ignore_primary is false, errors on the primary node are
6378 disks = _ExpandCheckDisks(instance, disks)
6381 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6382 lu.cfg.SetDiskID(top_disk, node)
6383 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6384 msg = result.fail_msg
6386 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6387 disk.iv_name, node, msg)
6388 if ((node == instance.primary_node and not ignore_primary) or
6389 (node != instance.primary_node and not result.offline)):
6394 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6395 """Checks if a node has enough free memory.
6397 This function check if a given node has the needed amount of free
6398 memory. In case the node has less memory or we cannot get the
6399 information from the node, this function raise an OpPrereqError
6402 @type lu: C{LogicalUnit}
6403 @param lu: a logical unit from which we get configuration data
6405 @param node: the node to check
6406 @type reason: C{str}
6407 @param reason: string to use in the error message
6408 @type requested: C{int}
6409 @param requested: the amount of memory in MiB to check for
6410 @type hypervisor_name: C{str}
6411 @param hypervisor_name: the hypervisor to ask for memory stats
6413 @return: node current free memory
6414 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6415 we cannot check the node
6418 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6419 nodeinfo[node].Raise("Can't get data from node %s" % node,
6420 prereq=True, ecode=errors.ECODE_ENVIRON)
6421 (_, _, (hv_info, )) = nodeinfo[node].payload
6423 free_mem = hv_info.get("memory_free", None)
6424 if not isinstance(free_mem, int):
6425 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6426 " was '%s'" % (node, free_mem),
6427 errors.ECODE_ENVIRON)
6428 if requested > free_mem:
6429 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6430 " needed %s MiB, available %s MiB" %
6431 (node, reason, requested, free_mem),
6436 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6437 """Checks if nodes have enough free disk space in the all VGs.
6439 This function check if all given nodes have the needed amount of
6440 free disk. In case any node has less disk or we cannot get the
6441 information from the node, this function raise an OpPrereqError
6444 @type lu: C{LogicalUnit}
6445 @param lu: a logical unit from which we get configuration data
6446 @type nodenames: C{list}
6447 @param nodenames: the list of node names to check
6448 @type req_sizes: C{dict}
6449 @param req_sizes: the hash of vg and corresponding amount of disk in
6451 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6452 or we cannot check the node
6455 for vg, req_size in req_sizes.items():
6456 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6459 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6460 """Checks if nodes have enough free disk space in the specified VG.
6462 This function check if all given nodes have the needed amount of
6463 free disk. In case any node has less disk or we cannot get the
6464 information from the node, this function raise an OpPrereqError
6467 @type lu: C{LogicalUnit}
6468 @param lu: a logical unit from which we get configuration data
6469 @type nodenames: C{list}
6470 @param nodenames: the list of node names to check
6472 @param vg: the volume group to check
6473 @type requested: C{int}
6474 @param requested: the amount of disk in MiB to check for
6475 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6476 or we cannot check the node
6479 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6480 for node in nodenames:
6481 info = nodeinfo[node]
6482 info.Raise("Cannot get current information from node %s" % node,
6483 prereq=True, ecode=errors.ECODE_ENVIRON)
6484 (_, (vg_info, ), _) = info.payload
6485 vg_free = vg_info.get("vg_free", None)
6486 if not isinstance(vg_free, int):
6487 raise errors.OpPrereqError("Can't compute free disk space on node"
6488 " %s for vg %s, result was '%s'" %
6489 (node, vg, vg_free), errors.ECODE_ENVIRON)
6490 if requested > vg_free:
6491 raise errors.OpPrereqError("Not enough disk space on target node %s"
6492 " vg %s: required %d MiB, available %d MiB" %
6493 (node, vg, requested, vg_free),
6497 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6498 """Checks if nodes have enough physical CPUs
6500 This function checks if all given nodes have the needed number of
6501 physical CPUs. In case any node has less CPUs or we cannot get the
6502 information from the node, this function raises an OpPrereqError
6505 @type lu: C{LogicalUnit}
6506 @param lu: a logical unit from which we get configuration data
6507 @type nodenames: C{list}
6508 @param nodenames: the list of node names to check
6509 @type requested: C{int}
6510 @param requested: the minimum acceptable number of physical CPUs
6511 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6512 or we cannot check the node
6515 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6516 for node in nodenames:
6517 info = nodeinfo[node]
6518 info.Raise("Cannot get current information from node %s" % node,
6519 prereq=True, ecode=errors.ECODE_ENVIRON)
6520 (_, _, (hv_info, )) = info.payload
6521 num_cpus = hv_info.get("cpu_total", None)
6522 if not isinstance(num_cpus, int):
6523 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6524 " on node %s, result was '%s'" %
6525 (node, num_cpus), errors.ECODE_ENVIRON)
6526 if requested > num_cpus:
6527 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6528 "required" % (node, num_cpus, requested),
6532 class LUInstanceStartup(LogicalUnit):
6533 """Starts an instance.
6536 HPATH = "instance-start"
6537 HTYPE = constants.HTYPE_INSTANCE
6540 def CheckArguments(self):
6542 if self.op.beparams:
6543 # fill the beparams dict
6544 objects.UpgradeBeParams(self.op.beparams)
6545 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6547 def ExpandNames(self):
6548 self._ExpandAndLockInstance()
6549 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6551 def DeclareLocks(self, level):
6552 if level == locking.LEVEL_NODE_RES:
6553 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6555 def BuildHooksEnv(self):
6558 This runs on master, primary and secondary nodes of the instance.
6562 "FORCE": self.op.force,
6565 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6569 def BuildHooksNodes(self):
6570 """Build hooks nodes.
6573 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6576 def CheckPrereq(self):
6577 """Check prerequisites.
6579 This checks that the instance is in the cluster.
6582 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6583 assert self.instance is not None, \
6584 "Cannot retrieve locked instance %s" % self.op.instance_name
6587 if self.op.hvparams:
6588 # check hypervisor parameter syntax (locally)
6589 cluster = self.cfg.GetClusterInfo()
6590 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6591 filled_hvp = cluster.FillHV(instance)
6592 filled_hvp.update(self.op.hvparams)
6593 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6594 hv_type.CheckParameterSyntax(filled_hvp)
6595 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6597 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6599 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6601 if self.primary_offline and self.op.ignore_offline_nodes:
6602 self.proc.LogWarning("Ignoring offline primary node")
6604 if self.op.hvparams or self.op.beparams:
6605 self.proc.LogWarning("Overridden parameters are ignored")
6607 _CheckNodeOnline(self, instance.primary_node)
6609 bep = self.cfg.GetClusterInfo().FillBE(instance)
6610 bep.update(self.op.beparams)
6612 # check bridges existence
6613 _CheckInstanceBridgesExist(self, instance)
6615 remote_info = self.rpc.call_instance_info(instance.primary_node,
6617 instance.hypervisor)
6618 remote_info.Raise("Error checking node %s" % instance.primary_node,
6619 prereq=True, ecode=errors.ECODE_ENVIRON)
6620 if not remote_info.payload: # not running already
6621 _CheckNodeFreeMemory(self, instance.primary_node,
6622 "starting instance %s" % instance.name,
6623 bep[constants.BE_MINMEM], instance.hypervisor)
6625 def Exec(self, feedback_fn):
6626 """Start the instance.
6629 instance = self.instance
6630 force = self.op.force
6632 if not self.op.no_remember:
6633 self.cfg.MarkInstanceUp(instance.name)
6635 if self.primary_offline:
6636 assert self.op.ignore_offline_nodes
6637 self.proc.LogInfo("Primary node offline, marked instance as started")
6639 node_current = instance.primary_node
6641 _StartInstanceDisks(self, instance, force)
6644 self.rpc.call_instance_start(node_current,
6645 (instance, self.op.hvparams,
6647 self.op.startup_paused)
6648 msg = result.fail_msg
6650 _ShutdownInstanceDisks(self, instance)
6651 raise errors.OpExecError("Could not start instance: %s" % msg)
6654 class LUInstanceReboot(LogicalUnit):
6655 """Reboot an instance.
6658 HPATH = "instance-reboot"
6659 HTYPE = constants.HTYPE_INSTANCE
6662 def ExpandNames(self):
6663 self._ExpandAndLockInstance()
6665 def BuildHooksEnv(self):
6668 This runs on master, primary and secondary nodes of the instance.
6672 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6673 "REBOOT_TYPE": self.op.reboot_type,
6674 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6677 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6681 def BuildHooksNodes(self):
6682 """Build hooks nodes.
6685 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6688 def CheckPrereq(self):
6689 """Check prerequisites.
6691 This checks that the instance is in the cluster.
6694 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6695 assert self.instance is not None, \
6696 "Cannot retrieve locked instance %s" % self.op.instance_name
6697 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6698 _CheckNodeOnline(self, instance.primary_node)
6700 # check bridges existence
6701 _CheckInstanceBridgesExist(self, instance)
6703 def Exec(self, feedback_fn):
6704 """Reboot the instance.
6707 instance = self.instance
6708 ignore_secondaries = self.op.ignore_secondaries
6709 reboot_type = self.op.reboot_type
6711 remote_info = self.rpc.call_instance_info(instance.primary_node,
6713 instance.hypervisor)
6714 remote_info.Raise("Error checking node %s" % instance.primary_node)
6715 instance_running = bool(remote_info.payload)
6717 node_current = instance.primary_node
6719 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6720 constants.INSTANCE_REBOOT_HARD]:
6721 for disk in instance.disks:
6722 self.cfg.SetDiskID(disk, node_current)
6723 result = self.rpc.call_instance_reboot(node_current, instance,
6725 self.op.shutdown_timeout)
6726 result.Raise("Could not reboot instance")
6728 if instance_running:
6729 result = self.rpc.call_instance_shutdown(node_current, instance,
6730 self.op.shutdown_timeout)
6731 result.Raise("Could not shutdown instance for full reboot")
6732 _ShutdownInstanceDisks(self, instance)
6734 self.LogInfo("Instance %s was already stopped, starting now",
6736 _StartInstanceDisks(self, instance, ignore_secondaries)
6737 result = self.rpc.call_instance_start(node_current,
6738 (instance, None, None), False)
6739 msg = result.fail_msg
6741 _ShutdownInstanceDisks(self, instance)
6742 raise errors.OpExecError("Could not start instance for"
6743 " full reboot: %s" % msg)
6745 self.cfg.MarkInstanceUp(instance.name)
6748 class LUInstanceShutdown(LogicalUnit):
6749 """Shutdown an instance.
6752 HPATH = "instance-stop"
6753 HTYPE = constants.HTYPE_INSTANCE
6756 def ExpandNames(self):
6757 self._ExpandAndLockInstance()
6759 def BuildHooksEnv(self):
6762 This runs on master, primary and secondary nodes of the instance.
6765 env = _BuildInstanceHookEnvByObject(self, self.instance)
6766 env["TIMEOUT"] = self.op.timeout
6769 def BuildHooksNodes(self):
6770 """Build hooks nodes.
6773 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6776 def CheckPrereq(self):
6777 """Check prerequisites.
6779 This checks that the instance is in the cluster.
6782 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6783 assert self.instance is not None, \
6784 "Cannot retrieve locked instance %s" % self.op.instance_name
6786 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6788 self.primary_offline = \
6789 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6791 if self.primary_offline and self.op.ignore_offline_nodes:
6792 self.proc.LogWarning("Ignoring offline primary node")
6794 _CheckNodeOnline(self, self.instance.primary_node)
6796 def Exec(self, feedback_fn):
6797 """Shutdown the instance.
6800 instance = self.instance
6801 node_current = instance.primary_node
6802 timeout = self.op.timeout
6804 if not self.op.no_remember:
6805 self.cfg.MarkInstanceDown(instance.name)
6807 if self.primary_offline:
6808 assert self.op.ignore_offline_nodes
6809 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6811 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6812 msg = result.fail_msg
6814 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6816 _ShutdownInstanceDisks(self, instance)
6819 class LUInstanceReinstall(LogicalUnit):
6820 """Reinstall an instance.
6823 HPATH = "instance-reinstall"
6824 HTYPE = constants.HTYPE_INSTANCE
6827 def ExpandNames(self):
6828 self._ExpandAndLockInstance()
6830 def BuildHooksEnv(self):
6833 This runs on master, primary and secondary nodes of the instance.
6836 return _BuildInstanceHookEnvByObject(self, self.instance)
6838 def BuildHooksNodes(self):
6839 """Build hooks nodes.
6842 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6845 def CheckPrereq(self):
6846 """Check prerequisites.
6848 This checks that the instance is in the cluster and is not running.
6851 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6852 assert instance is not None, \
6853 "Cannot retrieve locked instance %s" % self.op.instance_name
6854 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6855 " offline, cannot reinstall")
6856 for node in instance.secondary_nodes:
6857 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6858 " cannot reinstall")
6860 if instance.disk_template == constants.DT_DISKLESS:
6861 raise errors.OpPrereqError("Instance '%s' has no disks" %
6862 self.op.instance_name,
6864 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6866 if self.op.os_type is not None:
6868 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6869 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6870 instance_os = self.op.os_type
6872 instance_os = instance.os
6874 nodelist = list(instance.all_nodes)
6876 if self.op.osparams:
6877 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6878 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6879 self.os_inst = i_osdict # the new dict (without defaults)
6883 self.instance = instance
6885 def Exec(self, feedback_fn):
6886 """Reinstall the instance.
6889 inst = self.instance
6891 if self.op.os_type is not None:
6892 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6893 inst.os = self.op.os_type
6894 # Write to configuration
6895 self.cfg.Update(inst, feedback_fn)
6897 _StartInstanceDisks(self, inst, None)
6899 feedback_fn("Running the instance OS create scripts...")
6900 # FIXME: pass debug option from opcode to backend
6901 result = self.rpc.call_instance_os_add(inst.primary_node,
6902 (inst, self.os_inst), True,
6903 self.op.debug_level)
6904 result.Raise("Could not install OS for instance %s on node %s" %
6905 (inst.name, inst.primary_node))
6907 _ShutdownInstanceDisks(self, inst)
6910 class LUInstanceRecreateDisks(LogicalUnit):
6911 """Recreate an instance's missing disks.
6914 HPATH = "instance-recreate-disks"
6915 HTYPE = constants.HTYPE_INSTANCE
6918 _MODIFYABLE = frozenset([
6919 constants.IDISK_SIZE,
6920 constants.IDISK_MODE,
6923 # New or changed disk parameters may have different semantics
6924 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
6925 constants.IDISK_ADOPT,
6927 # TODO: Implement support changing VG while recreating
6929 constants.IDISK_METAVG,
6932 def CheckArguments(self):
6933 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
6934 # Normalize and convert deprecated list of disk indices
6935 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
6937 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
6939 raise errors.OpPrereqError("Some disks have been specified more than"
6940 " once: %s" % utils.CommaJoin(duplicates),
6943 for (idx, params) in self.op.disks:
6944 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
6945 unsupported = frozenset(params.keys()) - self._MODIFYABLE
6947 raise errors.OpPrereqError("Parameters for disk %s try to change"
6948 " unmodifyable parameter(s): %s" %
6949 (idx, utils.CommaJoin(unsupported)),
6952 def ExpandNames(self):
6953 self._ExpandAndLockInstance()
6954 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6956 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6957 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6959 self.needed_locks[locking.LEVEL_NODE] = []
6960 self.needed_locks[locking.LEVEL_NODE_RES] = []
6962 def DeclareLocks(self, level):
6963 if level == locking.LEVEL_NODE:
6964 # if we replace the nodes, we only need to lock the old primary,
6965 # otherwise we need to lock all nodes for disk re-creation
6966 primary_only = bool(self.op.nodes)
6967 self._LockInstancesNodes(primary_only=primary_only)
6968 elif level == locking.LEVEL_NODE_RES:
6970 self.needed_locks[locking.LEVEL_NODE_RES] = \
6971 self.needed_locks[locking.LEVEL_NODE][:]
6973 def BuildHooksEnv(self):
6976 This runs on master, primary and secondary nodes of the instance.
6979 return _BuildInstanceHookEnvByObject(self, self.instance)
6981 def BuildHooksNodes(self):
6982 """Build hooks nodes.
6985 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6988 def CheckPrereq(self):
6989 """Check prerequisites.
6991 This checks that the instance is in the cluster and is not running.
6994 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6995 assert instance is not None, \
6996 "Cannot retrieve locked instance %s" % self.op.instance_name
6998 if len(self.op.nodes) != len(instance.all_nodes):
6999 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7000 " %d replacement nodes were specified" %
7001 (instance.name, len(instance.all_nodes),
7002 len(self.op.nodes)),
7004 assert instance.disk_template != constants.DT_DRBD8 or \
7005 len(self.op.nodes) == 2
7006 assert instance.disk_template != constants.DT_PLAIN or \
7007 len(self.op.nodes) == 1
7008 primary_node = self.op.nodes[0]
7010 primary_node = instance.primary_node
7011 _CheckNodeOnline(self, primary_node)
7013 if instance.disk_template == constants.DT_DISKLESS:
7014 raise errors.OpPrereqError("Instance '%s' has no disks" %
7015 self.op.instance_name, errors.ECODE_INVAL)
7017 # if we replace nodes *and* the old primary is offline, we don't
7019 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7020 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7021 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7022 if not (self.op.nodes and old_pnode.offline):
7023 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7024 msg="cannot recreate disks")
7027 self.disks = dict(self.op.disks)
7029 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7031 maxidx = max(self.disks.keys())
7032 if maxidx >= len(instance.disks):
7033 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7036 if (self.op.nodes and
7037 sorted(self.disks.keys()) != range(len(instance.disks))):
7038 raise errors.OpPrereqError("Can't recreate disks partially and"
7039 " change the nodes at the same time",
7042 self.instance = instance
7044 def Exec(self, feedback_fn):
7045 """Recreate the disks.
7048 instance = self.instance
7050 assert (self.owned_locks(locking.LEVEL_NODE) ==
7051 self.owned_locks(locking.LEVEL_NODE_RES))
7054 mods = [] # keeps track of needed changes
7056 for idx, disk in enumerate(instance.disks):
7058 changes = self.disks[idx]
7060 # Disk should not be recreated
7064 # update secondaries for disks, if needed
7065 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7066 # need to update the nodes and minors
7067 assert len(self.op.nodes) == 2
7068 assert len(disk.logical_id) == 6 # otherwise disk internals
7070 (_, _, old_port, _, _, old_secret) = disk.logical_id
7071 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7072 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7073 new_minors[0], new_minors[1], old_secret)
7074 assert len(disk.logical_id) == len(new_id)
7078 mods.append((idx, new_id, changes))
7080 # now that we have passed all asserts above, we can apply the mods
7081 # in a single run (to avoid partial changes)
7082 for idx, new_id, changes in mods:
7083 disk = instance.disks[idx]
7084 if new_id is not None:
7085 assert disk.dev_type == constants.LD_DRBD8
7086 disk.logical_id = new_id
7088 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7089 mode=changes.get(constants.IDISK_MODE, None))
7091 # change primary node, if needed
7093 instance.primary_node = self.op.nodes[0]
7094 self.LogWarning("Changing the instance's nodes, you will have to"
7095 " remove any disks left on the older nodes manually")
7098 self.cfg.Update(instance, feedback_fn)
7100 _CreateDisks(self, instance, to_skip=to_skip)
7103 class LUInstanceRename(LogicalUnit):
7104 """Rename an instance.
7107 HPATH = "instance-rename"
7108 HTYPE = constants.HTYPE_INSTANCE
7110 def CheckArguments(self):
7114 if self.op.ip_check and not self.op.name_check:
7115 # TODO: make the ip check more flexible and not depend on the name check
7116 raise errors.OpPrereqError("IP address check requires a name check",
7119 def BuildHooksEnv(self):
7122 This runs on master, primary and secondary nodes of the instance.
7125 env = _BuildInstanceHookEnvByObject(self, self.instance)
7126 env["INSTANCE_NEW_NAME"] = self.op.new_name
7129 def BuildHooksNodes(self):
7130 """Build hooks nodes.
7133 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7136 def CheckPrereq(self):
7137 """Check prerequisites.
7139 This checks that the instance is in the cluster and is not running.
7142 self.op.instance_name = _ExpandInstanceName(self.cfg,
7143 self.op.instance_name)
7144 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7145 assert instance is not None
7146 _CheckNodeOnline(self, instance.primary_node)
7147 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7148 msg="cannot rename")
7149 self.instance = instance
7151 new_name = self.op.new_name
7152 if self.op.name_check:
7153 hostname = netutils.GetHostname(name=new_name)
7154 if hostname.name != new_name:
7155 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7157 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7158 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7159 " same as given hostname '%s'") %
7160 (hostname.name, self.op.new_name),
7162 new_name = self.op.new_name = hostname.name
7163 if (self.op.ip_check and
7164 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7165 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7166 (hostname.ip, new_name),
7167 errors.ECODE_NOTUNIQUE)
7169 instance_list = self.cfg.GetInstanceList()
7170 if new_name in instance_list and new_name != instance.name:
7171 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7172 new_name, errors.ECODE_EXISTS)
7174 def Exec(self, feedback_fn):
7175 """Rename the instance.
7178 inst = self.instance
7179 old_name = inst.name
7181 rename_file_storage = False
7182 if (inst.disk_template in constants.DTS_FILEBASED and
7183 self.op.new_name != inst.name):
7184 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7185 rename_file_storage = True
7187 self.cfg.RenameInstance(inst.name, self.op.new_name)
7188 # Change the instance lock. This is definitely safe while we hold the BGL.
7189 # Otherwise the new lock would have to be added in acquired mode.
7191 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7192 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7194 # re-read the instance from the configuration after rename
7195 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7197 if rename_file_storage:
7198 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7199 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7200 old_file_storage_dir,
7201 new_file_storage_dir)
7202 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7203 " (but the instance has been renamed in Ganeti)" %
7204 (inst.primary_node, old_file_storage_dir,
7205 new_file_storage_dir))
7207 _StartInstanceDisks(self, inst, None)
7209 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7210 old_name, self.op.debug_level)
7211 msg = result.fail_msg
7213 msg = ("Could not run OS rename script for instance %s on node %s"
7214 " (but the instance has been renamed in Ganeti): %s" %
7215 (inst.name, inst.primary_node, msg))
7216 self.proc.LogWarning(msg)
7218 _ShutdownInstanceDisks(self, inst)
7223 class LUInstanceRemove(LogicalUnit):
7224 """Remove an instance.
7227 HPATH = "instance-remove"
7228 HTYPE = constants.HTYPE_INSTANCE
7231 def ExpandNames(self):
7232 self._ExpandAndLockInstance()
7233 self.needed_locks[locking.LEVEL_NODE] = []
7234 self.needed_locks[locking.LEVEL_NODE_RES] = []
7235 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7237 def DeclareLocks(self, level):
7238 if level == locking.LEVEL_NODE:
7239 self._LockInstancesNodes()
7240 elif level == locking.LEVEL_NODE_RES:
7242 self.needed_locks[locking.LEVEL_NODE_RES] = \
7243 self.needed_locks[locking.LEVEL_NODE][:]
7245 def BuildHooksEnv(self):
7248 This runs on master, primary and secondary nodes of the instance.
7251 env = _BuildInstanceHookEnvByObject(self, self.instance)
7252 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7255 def BuildHooksNodes(self):
7256 """Build hooks nodes.
7259 nl = [self.cfg.GetMasterNode()]
7260 nl_post = list(self.instance.all_nodes) + nl
7261 return (nl, nl_post)
7263 def CheckPrereq(self):
7264 """Check prerequisites.
7266 This checks that the instance is in the cluster.
7269 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7270 assert self.instance is not None, \
7271 "Cannot retrieve locked instance %s" % self.op.instance_name
7273 def Exec(self, feedback_fn):
7274 """Remove the instance.
7277 instance = self.instance
7278 logging.info("Shutting down instance %s on node %s",
7279 instance.name, instance.primary_node)
7281 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7282 self.op.shutdown_timeout)
7283 msg = result.fail_msg
7285 if self.op.ignore_failures:
7286 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7288 raise errors.OpExecError("Could not shutdown instance %s on"
7290 (instance.name, instance.primary_node, msg))
7292 assert (self.owned_locks(locking.LEVEL_NODE) ==
7293 self.owned_locks(locking.LEVEL_NODE_RES))
7294 assert not (set(instance.all_nodes) -
7295 self.owned_locks(locking.LEVEL_NODE)), \
7296 "Not owning correct locks"
7298 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7301 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7302 """Utility function to remove an instance.
7305 logging.info("Removing block devices for instance %s", instance.name)
7307 if not _RemoveDisks(lu, instance):
7308 if not ignore_failures:
7309 raise errors.OpExecError("Can't remove instance's disks")
7310 feedback_fn("Warning: can't remove instance's disks")
7312 logging.info("Removing instance %s out of cluster config", instance.name)
7314 lu.cfg.RemoveInstance(instance.name)
7316 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7317 "Instance lock removal conflict"
7319 # Remove lock for the instance
7320 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7323 class LUInstanceQuery(NoHooksLU):
7324 """Logical unit for querying instances.
7327 # pylint: disable=W0142
7330 def CheckArguments(self):
7331 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7332 self.op.output_fields, self.op.use_locking)
7334 def ExpandNames(self):
7335 self.iq.ExpandNames(self)
7337 def DeclareLocks(self, level):
7338 self.iq.DeclareLocks(self, level)
7340 def Exec(self, feedback_fn):
7341 return self.iq.OldStyleQuery(self)
7344 class LUInstanceFailover(LogicalUnit):
7345 """Failover an instance.
7348 HPATH = "instance-failover"
7349 HTYPE = constants.HTYPE_INSTANCE
7352 def CheckArguments(self):
7353 """Check the arguments.
7356 self.iallocator = getattr(self.op, "iallocator", None)
7357 self.target_node = getattr(self.op, "target_node", None)
7359 def ExpandNames(self):
7360 self._ExpandAndLockInstance()
7362 if self.op.target_node is not None:
7363 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7365 self.needed_locks[locking.LEVEL_NODE] = []
7366 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7368 self.needed_locks[locking.LEVEL_NODE_RES] = []
7369 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7371 ignore_consistency = self.op.ignore_consistency
7372 shutdown_timeout = self.op.shutdown_timeout
7373 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7376 ignore_consistency=ignore_consistency,
7377 shutdown_timeout=shutdown_timeout,
7378 ignore_ipolicy=self.op.ignore_ipolicy)
7379 self.tasklets = [self._migrater]
7381 def DeclareLocks(self, level):
7382 if level == locking.LEVEL_NODE:
7383 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7384 if instance.disk_template in constants.DTS_EXT_MIRROR:
7385 if self.op.target_node is None:
7386 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7388 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7389 self.op.target_node]
7390 del self.recalculate_locks[locking.LEVEL_NODE]
7392 self._LockInstancesNodes()
7393 elif level == locking.LEVEL_NODE_RES:
7395 self.needed_locks[locking.LEVEL_NODE_RES] = \
7396 self.needed_locks[locking.LEVEL_NODE][:]
7398 def BuildHooksEnv(self):
7401 This runs on master, primary and secondary nodes of the instance.
7404 instance = self._migrater.instance
7405 source_node = instance.primary_node
7406 target_node = self.op.target_node
7408 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7409 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7410 "OLD_PRIMARY": source_node,
7411 "NEW_PRIMARY": target_node,
7414 if instance.disk_template in constants.DTS_INT_MIRROR:
7415 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7416 env["NEW_SECONDARY"] = source_node
7418 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7420 env.update(_BuildInstanceHookEnvByObject(self, instance))
7424 def BuildHooksNodes(self):
7425 """Build hooks nodes.
7428 instance = self._migrater.instance
7429 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7430 return (nl, nl + [instance.primary_node])
7433 class LUInstanceMigrate(LogicalUnit):
7434 """Migrate an instance.
7436 This is migration without shutting down, compared to the failover,
7437 which is done with shutdown.
7440 HPATH = "instance-migrate"
7441 HTYPE = constants.HTYPE_INSTANCE
7444 def ExpandNames(self):
7445 self._ExpandAndLockInstance()
7447 if self.op.target_node is not None:
7448 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7450 self.needed_locks[locking.LEVEL_NODE] = []
7451 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7453 self.needed_locks[locking.LEVEL_NODE] = []
7454 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7457 TLMigrateInstance(self, self.op.instance_name,
7458 cleanup=self.op.cleanup,
7460 fallback=self.op.allow_failover,
7461 allow_runtime_changes=self.op.allow_runtime_changes,
7462 ignore_ipolicy=self.op.ignore_ipolicy)
7463 self.tasklets = [self._migrater]
7465 def DeclareLocks(self, level):
7466 if level == locking.LEVEL_NODE:
7467 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7468 if instance.disk_template in constants.DTS_EXT_MIRROR:
7469 if self.op.target_node is None:
7470 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7472 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7473 self.op.target_node]
7474 del self.recalculate_locks[locking.LEVEL_NODE]
7476 self._LockInstancesNodes()
7477 elif level == locking.LEVEL_NODE_RES:
7479 self.needed_locks[locking.LEVEL_NODE_RES] = \
7480 self.needed_locks[locking.LEVEL_NODE][:]
7482 def BuildHooksEnv(self):
7485 This runs on master, primary and secondary nodes of the instance.
7488 instance = self._migrater.instance
7489 source_node = instance.primary_node
7490 target_node = self.op.target_node
7491 env = _BuildInstanceHookEnvByObject(self, instance)
7493 "MIGRATE_LIVE": self._migrater.live,
7494 "MIGRATE_CLEANUP": self.op.cleanup,
7495 "OLD_PRIMARY": source_node,
7496 "NEW_PRIMARY": target_node,
7497 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7500 if instance.disk_template in constants.DTS_INT_MIRROR:
7501 env["OLD_SECONDARY"] = target_node
7502 env["NEW_SECONDARY"] = source_node
7504 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7508 def BuildHooksNodes(self):
7509 """Build hooks nodes.
7512 instance = self._migrater.instance
7513 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7514 return (nl, nl + [instance.primary_node])
7517 class LUInstanceMove(LogicalUnit):
7518 """Move an instance by data-copying.
7521 HPATH = "instance-move"
7522 HTYPE = constants.HTYPE_INSTANCE
7525 def ExpandNames(self):
7526 self._ExpandAndLockInstance()
7527 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7528 self.op.target_node = target_node
7529 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7530 self.needed_locks[locking.LEVEL_NODE_RES] = []
7531 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7533 def DeclareLocks(self, level):
7534 if level == locking.LEVEL_NODE:
7535 self._LockInstancesNodes(primary_only=True)
7536 elif level == locking.LEVEL_NODE_RES:
7538 self.needed_locks[locking.LEVEL_NODE_RES] = \
7539 self.needed_locks[locking.LEVEL_NODE][:]
7541 def BuildHooksEnv(self):
7544 This runs on master, primary and secondary nodes of the instance.
7548 "TARGET_NODE": self.op.target_node,
7549 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7551 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7554 def BuildHooksNodes(self):
7555 """Build hooks nodes.
7559 self.cfg.GetMasterNode(),
7560 self.instance.primary_node,
7561 self.op.target_node,
7565 def CheckPrereq(self):
7566 """Check prerequisites.
7568 This checks that the instance is in the cluster.
7571 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7572 assert self.instance is not None, \
7573 "Cannot retrieve locked instance %s" % self.op.instance_name
7575 node = self.cfg.GetNodeInfo(self.op.target_node)
7576 assert node is not None, \
7577 "Cannot retrieve locked node %s" % self.op.target_node
7579 self.target_node = target_node = node.name
7581 if target_node == instance.primary_node:
7582 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7583 (instance.name, target_node),
7586 bep = self.cfg.GetClusterInfo().FillBE(instance)
7588 for idx, dsk in enumerate(instance.disks):
7589 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7590 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7591 " cannot copy" % idx, errors.ECODE_STATE)
7593 _CheckNodeOnline(self, target_node)
7594 _CheckNodeNotDrained(self, target_node)
7595 _CheckNodeVmCapable(self, target_node)
7596 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7597 self.cfg.GetNodeGroup(node.group))
7598 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7599 ignore=self.op.ignore_ipolicy)
7601 if instance.admin_state == constants.ADMINST_UP:
7602 # check memory requirements on the secondary node
7603 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7604 instance.name, bep[constants.BE_MAXMEM],
7605 instance.hypervisor)
7607 self.LogInfo("Not checking memory on the secondary node as"
7608 " instance will not be started")
7610 # check bridge existance
7611 _CheckInstanceBridgesExist(self, instance, node=target_node)
7613 def Exec(self, feedback_fn):
7614 """Move an instance.
7616 The move is done by shutting it down on its present node, copying
7617 the data over (slow) and starting it on the new node.
7620 instance = self.instance
7622 source_node = instance.primary_node
7623 target_node = self.target_node
7625 self.LogInfo("Shutting down instance %s on source node %s",
7626 instance.name, source_node)
7628 assert (self.owned_locks(locking.LEVEL_NODE) ==
7629 self.owned_locks(locking.LEVEL_NODE_RES))
7631 result = self.rpc.call_instance_shutdown(source_node, instance,
7632 self.op.shutdown_timeout)
7633 msg = result.fail_msg
7635 if self.op.ignore_consistency:
7636 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7637 " Proceeding anyway. Please make sure node"
7638 " %s is down. Error details: %s",
7639 instance.name, source_node, source_node, msg)
7641 raise errors.OpExecError("Could not shutdown instance %s on"
7643 (instance.name, source_node, msg))
7645 # create the target disks
7647 _CreateDisks(self, instance, target_node=target_node)
7648 except errors.OpExecError:
7649 self.LogWarning("Device creation failed, reverting...")
7651 _RemoveDisks(self, instance, target_node=target_node)
7653 self.cfg.ReleaseDRBDMinors(instance.name)
7656 cluster_name = self.cfg.GetClusterInfo().cluster_name
7659 # activate, get path, copy the data over
7660 for idx, disk in enumerate(instance.disks):
7661 self.LogInfo("Copying data for disk %d", idx)
7662 result = self.rpc.call_blockdev_assemble(target_node, disk,
7663 instance.name, True, idx)
7665 self.LogWarning("Can't assemble newly created disk %d: %s",
7666 idx, result.fail_msg)
7667 errs.append(result.fail_msg)
7669 dev_path = result.payload
7670 result = self.rpc.call_blockdev_export(source_node, disk,
7671 target_node, dev_path,
7674 self.LogWarning("Can't copy data over for disk %d: %s",
7675 idx, result.fail_msg)
7676 errs.append(result.fail_msg)
7680 self.LogWarning("Some disks failed to copy, aborting")
7682 _RemoveDisks(self, instance, target_node=target_node)
7684 self.cfg.ReleaseDRBDMinors(instance.name)
7685 raise errors.OpExecError("Errors during disk copy: %s" %
7688 instance.primary_node = target_node
7689 self.cfg.Update(instance, feedback_fn)
7691 self.LogInfo("Removing the disks on the original node")
7692 _RemoveDisks(self, instance, target_node=source_node)
7694 # Only start the instance if it's marked as up
7695 if instance.admin_state == constants.ADMINST_UP:
7696 self.LogInfo("Starting instance %s on node %s",
7697 instance.name, target_node)
7699 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7700 ignore_secondaries=True)
7702 _ShutdownInstanceDisks(self, instance)
7703 raise errors.OpExecError("Can't activate the instance's disks")
7705 result = self.rpc.call_instance_start(target_node,
7706 (instance, None, None), False)
7707 msg = result.fail_msg
7709 _ShutdownInstanceDisks(self, instance)
7710 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7711 (instance.name, target_node, msg))
7714 class LUNodeMigrate(LogicalUnit):
7715 """Migrate all instances from a node.
7718 HPATH = "node-migrate"
7719 HTYPE = constants.HTYPE_NODE
7722 def CheckArguments(self):
7725 def ExpandNames(self):
7726 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7728 self.share_locks = _ShareAll()
7729 self.needed_locks = {
7730 locking.LEVEL_NODE: [self.op.node_name],
7733 def BuildHooksEnv(self):
7736 This runs on the master, the primary and all the secondaries.
7740 "NODE_NAME": self.op.node_name,
7741 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7744 def BuildHooksNodes(self):
7745 """Build hooks nodes.
7748 nl = [self.cfg.GetMasterNode()]
7751 def CheckPrereq(self):
7754 def Exec(self, feedback_fn):
7755 # Prepare jobs for migration instances
7756 allow_runtime_changes = self.op.allow_runtime_changes
7758 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7761 iallocator=self.op.iallocator,
7762 target_node=self.op.target_node,
7763 allow_runtime_changes=allow_runtime_changes,
7764 ignore_ipolicy=self.op.ignore_ipolicy)]
7765 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7768 # TODO: Run iallocator in this opcode and pass correct placement options to
7769 # OpInstanceMigrate. Since other jobs can modify the cluster between
7770 # running the iallocator and the actual migration, a good consistency model
7771 # will have to be found.
7773 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7774 frozenset([self.op.node_name]))
7776 return ResultWithJobs(jobs)
7779 class TLMigrateInstance(Tasklet):
7780 """Tasklet class for instance migration.
7783 @ivar live: whether the migration will be done live or non-live;
7784 this variable is initalized only after CheckPrereq has run
7785 @type cleanup: boolean
7786 @ivar cleanup: Wheater we cleanup from a failed migration
7787 @type iallocator: string
7788 @ivar iallocator: The iallocator used to determine target_node
7789 @type target_node: string
7790 @ivar target_node: If given, the target_node to reallocate the instance to
7791 @type failover: boolean
7792 @ivar failover: Whether operation results in failover or migration
7793 @type fallback: boolean
7794 @ivar fallback: Whether fallback to failover is allowed if migration not
7796 @type ignore_consistency: boolean
7797 @ivar ignore_consistency: Wheter we should ignore consistency between source
7799 @type shutdown_timeout: int
7800 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7801 @type ignore_ipolicy: bool
7802 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7807 _MIGRATION_POLL_INTERVAL = 1 # seconds
7808 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7810 def __init__(self, lu, instance_name, cleanup=False,
7811 failover=False, fallback=False,
7812 ignore_consistency=False,
7813 allow_runtime_changes=True,
7814 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7815 ignore_ipolicy=False):
7816 """Initializes this class.
7819 Tasklet.__init__(self, lu)
7822 self.instance_name = instance_name
7823 self.cleanup = cleanup
7824 self.live = False # will be overridden later
7825 self.failover = failover
7826 self.fallback = fallback
7827 self.ignore_consistency = ignore_consistency
7828 self.shutdown_timeout = shutdown_timeout
7829 self.ignore_ipolicy = ignore_ipolicy
7830 self.allow_runtime_changes = allow_runtime_changes
7832 def CheckPrereq(self):
7833 """Check prerequisites.
7835 This checks that the instance is in the cluster.
7838 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7839 instance = self.cfg.GetInstanceInfo(instance_name)
7840 assert instance is not None
7841 self.instance = instance
7842 cluster = self.cfg.GetClusterInfo()
7844 if (not self.cleanup and
7845 not instance.admin_state == constants.ADMINST_UP and
7846 not self.failover and self.fallback):
7847 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7848 " switching to failover")
7849 self.failover = True
7851 if instance.disk_template not in constants.DTS_MIRRORED:
7856 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7857 " %s" % (instance.disk_template, text),
7860 if instance.disk_template in constants.DTS_EXT_MIRROR:
7861 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7863 if self.lu.op.iallocator:
7864 self._RunAllocator()
7866 # We set set self.target_node as it is required by
7868 self.target_node = self.lu.op.target_node
7870 # Check that the target node is correct in terms of instance policy
7871 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
7872 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7873 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7874 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7875 ignore=self.ignore_ipolicy)
7877 # self.target_node is already populated, either directly or by the
7879 target_node = self.target_node
7880 if self.target_node == instance.primary_node:
7881 raise errors.OpPrereqError("Cannot migrate instance %s"
7882 " to its primary (%s)" %
7883 (instance.name, instance.primary_node))
7885 if len(self.lu.tasklets) == 1:
7886 # It is safe to release locks only when we're the only tasklet
7888 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7889 keep=[instance.primary_node, self.target_node])
7892 secondary_nodes = instance.secondary_nodes
7893 if not secondary_nodes:
7894 raise errors.ConfigurationError("No secondary node but using"
7895 " %s disk template" %
7896 instance.disk_template)
7897 target_node = secondary_nodes[0]
7898 if self.lu.op.iallocator or (self.lu.op.target_node and
7899 self.lu.op.target_node != target_node):
7901 text = "failed over"
7904 raise errors.OpPrereqError("Instances with disk template %s cannot"
7905 " be %s to arbitrary nodes"
7906 " (neither an iallocator nor a target"
7907 " node can be passed)" %
7908 (instance.disk_template, text),
7910 nodeinfo = self.cfg.GetNodeInfo(target_node)
7911 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7912 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7913 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7914 ignore=self.ignore_ipolicy)
7916 i_be = cluster.FillBE(instance)
7918 # check memory requirements on the secondary node
7919 if (not self.cleanup and
7920 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
7921 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
7922 "migrating instance %s" %
7924 i_be[constants.BE_MINMEM],
7925 instance.hypervisor)
7927 self.lu.LogInfo("Not checking memory on the secondary node as"
7928 " instance will not be started")
7930 # check if failover must be forced instead of migration
7931 if (not self.cleanup and not self.failover and
7932 i_be[constants.BE_ALWAYS_FAILOVER]):
7934 self.lu.LogInfo("Instance configured to always failover; fallback"
7936 self.failover = True
7938 raise errors.OpPrereqError("This instance has been configured to"
7939 " always failover, please allow failover",
7942 # check bridge existance
7943 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7945 if not self.cleanup:
7946 _CheckNodeNotDrained(self.lu, target_node)
7947 if not self.failover:
7948 result = self.rpc.call_instance_migratable(instance.primary_node,
7950 if result.fail_msg and self.fallback:
7951 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7953 self.failover = True
7955 result.Raise("Can't migrate, please use failover",
7956 prereq=True, ecode=errors.ECODE_STATE)
7958 assert not (self.failover and self.cleanup)
7960 if not self.failover:
7961 if self.lu.op.live is not None and self.lu.op.mode is not None:
7962 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7963 " parameters are accepted",
7965 if self.lu.op.live is not None:
7967 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7969 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7970 # reset the 'live' parameter to None so that repeated
7971 # invocations of CheckPrereq do not raise an exception
7972 self.lu.op.live = None
7973 elif self.lu.op.mode is None:
7974 # read the default value from the hypervisor
7975 i_hv = cluster.FillHV(self.instance, skip_globals=False)
7976 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7978 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7980 # Failover is never live
7983 if not (self.failover or self.cleanup):
7984 remote_info = self.rpc.call_instance_info(instance.primary_node,
7986 instance.hypervisor)
7987 remote_info.Raise("Error checking instance on node %s" %
7988 instance.primary_node)
7989 instance_running = bool(remote_info.payload)
7990 if instance_running:
7991 self.current_mem = int(remote_info.payload["memory"])
7993 def _RunAllocator(self):
7994 """Run the allocator based on input opcode.
7997 # FIXME: add a self.ignore_ipolicy option
7998 ial = IAllocator(self.cfg, self.rpc,
7999 mode=constants.IALLOCATOR_MODE_RELOC,
8000 name=self.instance_name,
8001 # TODO See why hail breaks with a single node below
8002 relocate_from=[self.instance.primary_node,
8003 self.instance.primary_node],
8006 ial.Run(self.lu.op.iallocator)
8009 raise errors.OpPrereqError("Can't compute nodes using"
8010 " iallocator '%s': %s" %
8011 (self.lu.op.iallocator, ial.info),
8013 if len(ial.result) != ial.required_nodes:
8014 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8015 " of nodes (%s), required %s" %
8016 (self.lu.op.iallocator, len(ial.result),
8017 ial.required_nodes), errors.ECODE_FAULT)
8018 self.target_node = ial.result[0]
8019 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8020 self.instance_name, self.lu.op.iallocator,
8021 utils.CommaJoin(ial.result))
8023 def _WaitUntilSync(self):
8024 """Poll with custom rpc for disk sync.
8026 This uses our own step-based rpc call.
8029 self.feedback_fn("* wait until resync is done")
8033 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8035 self.instance.disks)
8037 for node, nres in result.items():
8038 nres.Raise("Cannot resync disks on node %s" % node)
8039 node_done, node_percent = nres.payload
8040 all_done = all_done and node_done
8041 if node_percent is not None:
8042 min_percent = min(min_percent, node_percent)
8044 if min_percent < 100:
8045 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8048 def _EnsureSecondary(self, node):
8049 """Demote a node to secondary.
8052 self.feedback_fn("* switching node %s to secondary mode" % node)
8054 for dev in self.instance.disks:
8055 self.cfg.SetDiskID(dev, node)
8057 result = self.rpc.call_blockdev_close(node, self.instance.name,
8058 self.instance.disks)
8059 result.Raise("Cannot change disk to secondary on node %s" % node)
8061 def _GoStandalone(self):
8062 """Disconnect from the network.
8065 self.feedback_fn("* changing into standalone mode")
8066 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8067 self.instance.disks)
8068 for node, nres in result.items():
8069 nres.Raise("Cannot disconnect disks node %s" % node)
8071 def _GoReconnect(self, multimaster):
8072 """Reconnect to the network.
8078 msg = "single-master"
8079 self.feedback_fn("* changing disks into %s mode" % msg)
8080 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8081 self.instance.disks,
8082 self.instance.name, multimaster)
8083 for node, nres in result.items():
8084 nres.Raise("Cannot change disks config on node %s" % node)
8086 def _ExecCleanup(self):
8087 """Try to cleanup after a failed migration.
8089 The cleanup is done by:
8090 - check that the instance is running only on one node
8091 (and update the config if needed)
8092 - change disks on its secondary node to secondary
8093 - wait until disks are fully synchronized
8094 - disconnect from the network
8095 - change disks into single-master mode
8096 - wait again until disks are fully synchronized
8099 instance = self.instance
8100 target_node = self.target_node
8101 source_node = self.source_node
8103 # check running on only one node
8104 self.feedback_fn("* checking where the instance actually runs"
8105 " (if this hangs, the hypervisor might be in"
8107 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8108 for node, result in ins_l.items():
8109 result.Raise("Can't contact node %s" % node)
8111 runningon_source = instance.name in ins_l[source_node].payload
8112 runningon_target = instance.name in ins_l[target_node].payload
8114 if runningon_source and runningon_target:
8115 raise errors.OpExecError("Instance seems to be running on two nodes,"
8116 " or the hypervisor is confused; you will have"
8117 " to ensure manually that it runs only on one"
8118 " and restart this operation")
8120 if not (runningon_source or runningon_target):
8121 raise errors.OpExecError("Instance does not seem to be running at all;"
8122 " in this case it's safer to repair by"
8123 " running 'gnt-instance stop' to ensure disk"
8124 " shutdown, and then restarting it")
8126 if runningon_target:
8127 # the migration has actually succeeded, we need to update the config
8128 self.feedback_fn("* instance running on secondary node (%s),"
8129 " updating config" % target_node)
8130 instance.primary_node = target_node
8131 self.cfg.Update(instance, self.feedback_fn)
8132 demoted_node = source_node
8134 self.feedback_fn("* instance confirmed to be running on its"
8135 " primary node (%s)" % source_node)
8136 demoted_node = target_node
8138 if instance.disk_template in constants.DTS_INT_MIRROR:
8139 self._EnsureSecondary(demoted_node)
8141 self._WaitUntilSync()
8142 except errors.OpExecError:
8143 # we ignore here errors, since if the device is standalone, it
8144 # won't be able to sync
8146 self._GoStandalone()
8147 self._GoReconnect(False)
8148 self._WaitUntilSync()
8150 self.feedback_fn("* done")
8152 def _RevertDiskStatus(self):
8153 """Try to revert the disk status after a failed migration.
8156 target_node = self.target_node
8157 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8161 self._EnsureSecondary(target_node)
8162 self._GoStandalone()
8163 self._GoReconnect(False)
8164 self._WaitUntilSync()
8165 except errors.OpExecError, err:
8166 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8167 " please try to recover the instance manually;"
8168 " error '%s'" % str(err))
8170 def _AbortMigration(self):
8171 """Call the hypervisor code to abort a started migration.
8174 instance = self.instance
8175 target_node = self.target_node
8176 source_node = self.source_node
8177 migration_info = self.migration_info
8179 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8183 abort_msg = abort_result.fail_msg
8185 logging.error("Aborting migration failed on target node %s: %s",
8186 target_node, abort_msg)
8187 # Don't raise an exception here, as we stil have to try to revert the
8188 # disk status, even if this step failed.
8190 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8191 instance, False, self.live)
8192 abort_msg = abort_result.fail_msg
8194 logging.error("Aborting migration failed on source node %s: %s",
8195 source_node, abort_msg)
8197 def _ExecMigration(self):
8198 """Migrate an instance.
8200 The migrate is done by:
8201 - change the disks into dual-master mode
8202 - wait until disks are fully synchronized again
8203 - migrate the instance
8204 - change disks on the new secondary node (the old primary) to secondary
8205 - wait until disks are fully synchronized
8206 - change disks into single-master mode
8209 instance = self.instance
8210 target_node = self.target_node
8211 source_node = self.source_node
8213 # Check for hypervisor version mismatch and warn the user.
8214 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8215 None, [self.instance.hypervisor])
8216 for ninfo in nodeinfo.values():
8217 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8219 (_, _, (src_info, )) = nodeinfo[source_node].payload
8220 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8222 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8223 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8224 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8225 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8226 if src_version != dst_version:
8227 self.feedback_fn("* warning: hypervisor version mismatch between"
8228 " source (%s) and target (%s) node" %
8229 (src_version, dst_version))
8231 self.feedback_fn("* checking disk consistency between source and target")
8232 for dev in instance.disks:
8233 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8234 raise errors.OpExecError("Disk %s is degraded or not fully"
8235 " synchronized on target node,"
8236 " aborting migration" % dev.iv_name)
8238 if self.current_mem > self.tgt_free_mem:
8239 if not self.allow_runtime_changes:
8240 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8241 " free memory to fit instance %s on target"
8242 " node %s (have %dMB, need %dMB)" %
8243 (instance.name, target_node,
8244 self.tgt_free_mem, self.current_mem))
8245 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8246 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8249 rpcres.Raise("Cannot modify instance runtime memory")
8251 # First get the migration information from the remote node
8252 result = self.rpc.call_migration_info(source_node, instance)
8253 msg = result.fail_msg
8255 log_err = ("Failed fetching source migration information from %s: %s" %
8257 logging.error(log_err)
8258 raise errors.OpExecError(log_err)
8260 self.migration_info = migration_info = result.payload
8262 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8263 # Then switch the disks to master/master mode
8264 self._EnsureSecondary(target_node)
8265 self._GoStandalone()
8266 self._GoReconnect(True)
8267 self._WaitUntilSync()
8269 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8270 result = self.rpc.call_accept_instance(target_node,
8273 self.nodes_ip[target_node])
8275 msg = result.fail_msg
8277 logging.error("Instance pre-migration failed, trying to revert"
8278 " disk status: %s", msg)
8279 self.feedback_fn("Pre-migration failed, aborting")
8280 self._AbortMigration()
8281 self._RevertDiskStatus()
8282 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8283 (instance.name, msg))
8285 self.feedback_fn("* migrating instance to %s" % target_node)
8286 result = self.rpc.call_instance_migrate(source_node, instance,
8287 self.nodes_ip[target_node],
8289 msg = result.fail_msg
8291 logging.error("Instance migration failed, trying to revert"
8292 " disk status: %s", msg)
8293 self.feedback_fn("Migration failed, aborting")
8294 self._AbortMigration()
8295 self._RevertDiskStatus()
8296 raise errors.OpExecError("Could not migrate instance %s: %s" %
8297 (instance.name, msg))
8299 self.feedback_fn("* starting memory transfer")
8300 last_feedback = time.time()
8302 result = self.rpc.call_instance_get_migration_status(source_node,
8304 msg = result.fail_msg
8305 ms = result.payload # MigrationStatus instance
8306 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8307 logging.error("Instance migration failed, trying to revert"
8308 " disk status: %s", msg)
8309 self.feedback_fn("Migration failed, aborting")
8310 self._AbortMigration()
8311 self._RevertDiskStatus()
8312 raise errors.OpExecError("Could not migrate instance %s: %s" %
8313 (instance.name, msg))
8315 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8316 self.feedback_fn("* memory transfer complete")
8319 if (utils.TimeoutExpired(last_feedback,
8320 self._MIGRATION_FEEDBACK_INTERVAL) and
8321 ms.transferred_ram is not None):
8322 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8323 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8324 last_feedback = time.time()
8326 time.sleep(self._MIGRATION_POLL_INTERVAL)
8328 result = self.rpc.call_instance_finalize_migration_src(source_node,
8332 msg = result.fail_msg
8334 logging.error("Instance migration succeeded, but finalization failed"
8335 " on the source node: %s", msg)
8336 raise errors.OpExecError("Could not finalize instance migration: %s" %
8339 instance.primary_node = target_node
8341 # distribute new instance config to the other nodes
8342 self.cfg.Update(instance, self.feedback_fn)
8344 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8348 msg = result.fail_msg
8350 logging.error("Instance migration succeeded, but finalization failed"
8351 " on the target node: %s", msg)
8352 raise errors.OpExecError("Could not finalize instance migration: %s" %
8355 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8356 self._EnsureSecondary(source_node)
8357 self._WaitUntilSync()
8358 self._GoStandalone()
8359 self._GoReconnect(False)
8360 self._WaitUntilSync()
8362 # If the instance's disk template is `rbd' and there was a successful
8363 # migration, unmap the device from the source node.
8364 if self.instance.disk_template == constants.DT_RBD:
8365 disks = _ExpandCheckDisks(instance, instance.disks)
8366 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8368 result = self.rpc.call_blockdev_shutdown(source_node, disk)
8369 msg = result.fail_msg
8371 logging.error("Migration was successful, but couldn't unmap the"
8372 " block device %s on source node %s: %s",
8373 disk.iv_name, source_node, msg)
8374 logging.error("You need to unmap the device %s manually on %s",
8375 disk.iv_name, source_node)
8377 self.feedback_fn("* done")
8379 def _ExecFailover(self):
8380 """Failover an instance.
8382 The failover is done by shutting it down on its present node and
8383 starting it on the secondary.
8386 instance = self.instance
8387 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8389 source_node = instance.primary_node
8390 target_node = self.target_node
8392 if instance.admin_state == constants.ADMINST_UP:
8393 self.feedback_fn("* checking disk consistency between source and target")
8394 for dev in instance.disks:
8395 # for drbd, these are drbd over lvm
8396 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8397 if primary_node.offline:
8398 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8400 (primary_node.name, dev.iv_name, target_node))
8401 elif not self.ignore_consistency:
8402 raise errors.OpExecError("Disk %s is degraded on target node,"
8403 " aborting failover" % dev.iv_name)
8405 self.feedback_fn("* not checking disk consistency as instance is not"
8408 self.feedback_fn("* shutting down instance on source node")
8409 logging.info("Shutting down instance %s on node %s",
8410 instance.name, source_node)
8412 result = self.rpc.call_instance_shutdown(source_node, instance,
8413 self.shutdown_timeout)
8414 msg = result.fail_msg
8416 if self.ignore_consistency or primary_node.offline:
8417 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8418 " proceeding anyway; please make sure node"
8419 " %s is down; error details: %s",
8420 instance.name, source_node, source_node, msg)
8422 raise errors.OpExecError("Could not shutdown instance %s on"
8424 (instance.name, source_node, msg))
8426 self.feedback_fn("* deactivating the instance's disks on source node")
8427 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8428 raise errors.OpExecError("Can't shut down the instance's disks")
8430 instance.primary_node = target_node
8431 # distribute new instance config to the other nodes
8432 self.cfg.Update(instance, self.feedback_fn)
8434 # Only start the instance if it's marked as up
8435 if instance.admin_state == constants.ADMINST_UP:
8436 self.feedback_fn("* activating the instance's disks on target node %s" %
8438 logging.info("Starting instance %s on node %s",
8439 instance.name, target_node)
8441 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8442 ignore_secondaries=True)
8444 _ShutdownInstanceDisks(self.lu, instance)
8445 raise errors.OpExecError("Can't activate the instance's disks")
8447 self.feedback_fn("* starting the instance on the target node %s" %
8449 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8451 msg = result.fail_msg
8453 _ShutdownInstanceDisks(self.lu, instance)
8454 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8455 (instance.name, target_node, msg))
8457 def Exec(self, feedback_fn):
8458 """Perform the migration.
8461 self.feedback_fn = feedback_fn
8462 self.source_node = self.instance.primary_node
8464 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8465 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8466 self.target_node = self.instance.secondary_nodes[0]
8467 # Otherwise self.target_node has been populated either
8468 # directly, or through an iallocator.
8470 self.all_nodes = [self.source_node, self.target_node]
8471 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8472 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8475 feedback_fn("Failover instance %s" % self.instance.name)
8476 self._ExecFailover()
8478 feedback_fn("Migrating instance %s" % self.instance.name)
8481 return self._ExecCleanup()
8483 return self._ExecMigration()
8486 def _CreateBlockDev(lu, node, instance, device, force_create,
8488 """Create a tree of block devices on a given node.
8490 If this device type has to be created on secondaries, create it and
8493 If not, just recurse to children keeping the same 'force' value.
8495 @param lu: the lu on whose behalf we execute
8496 @param node: the node on which to create the device
8497 @type instance: L{objects.Instance}
8498 @param instance: the instance which owns the device
8499 @type device: L{objects.Disk}
8500 @param device: the device to create
8501 @type force_create: boolean
8502 @param force_create: whether to force creation of this device; this
8503 will be change to True whenever we find a device which has
8504 CreateOnSecondary() attribute
8505 @param info: the extra 'metadata' we should attach to the device
8506 (this will be represented as a LVM tag)
8507 @type force_open: boolean
8508 @param force_open: this parameter will be passes to the
8509 L{backend.BlockdevCreate} function where it specifies
8510 whether we run on primary or not, and it affects both
8511 the child assembly and the device own Open() execution
8514 if device.CreateOnSecondary():
8518 for child in device.children:
8519 _CreateBlockDev(lu, node, instance, child, force_create,
8522 if not force_create:
8525 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8528 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8529 """Create a single block device on a given node.
8531 This will not recurse over children of the device, so they must be
8534 @param lu: the lu on whose behalf we execute
8535 @param node: the node on which to create the device
8536 @type instance: L{objects.Instance}
8537 @param instance: the instance which owns the device
8538 @type device: L{objects.Disk}
8539 @param device: the device to create
8540 @param info: the extra 'metadata' we should attach to the device
8541 (this will be represented as a LVM tag)
8542 @type force_open: boolean
8543 @param force_open: this parameter will be passes to the
8544 L{backend.BlockdevCreate} function where it specifies
8545 whether we run on primary or not, and it affects both
8546 the child assembly and the device own Open() execution
8549 lu.cfg.SetDiskID(device, node)
8550 result = lu.rpc.call_blockdev_create(node, device, device.size,
8551 instance.name, force_open, info)
8552 result.Raise("Can't create block device %s on"
8553 " node %s for instance %s" % (device, node, instance.name))
8554 if device.physical_id is None:
8555 device.physical_id = result.payload
8558 def _GenerateUniqueNames(lu, exts):
8559 """Generate a suitable LV name.
8561 This will generate a logical volume name for the given instance.
8566 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8567 results.append("%s%s" % (new_id, val))
8571 def _ComputeLDParams(disk_template, disk_params):
8572 """Computes Logical Disk parameters from Disk Template parameters.
8574 @type disk_template: string
8575 @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8576 @type disk_params: dict
8577 @param disk_params: disk template parameters; dict(template_name -> parameters
8579 @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8580 contains the LD parameters of the node. The tree is flattened in-order.
8583 if disk_template not in constants.DISK_TEMPLATES:
8584 raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8587 dt_params = disk_params[disk_template]
8588 if disk_template == constants.DT_DRBD8:
8590 constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8591 constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8592 constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8593 constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8594 constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8595 constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8596 constants.LDP_DYNAMIC_RESYNC: dt_params[constants.DRBD_DYNAMIC_RESYNC],
8597 constants.LDP_PLAN_AHEAD: dt_params[constants.DRBD_PLAN_AHEAD],
8598 constants.LDP_FILL_TARGET: dt_params[constants.DRBD_FILL_TARGET],
8599 constants.LDP_DELAY_TARGET: dt_params[constants.DRBD_DELAY_TARGET],
8600 constants.LDP_MAX_RATE: dt_params[constants.DRBD_MAX_RATE],
8601 constants.LDP_MIN_RATE: dt_params[constants.DRBD_MIN_RATE],
8605 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8608 result.append(drbd_params)
8612 constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8615 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8617 result.append(data_params)
8621 constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8624 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8626 result.append(meta_params)
8628 elif (disk_template == constants.DT_FILE or
8629 disk_template == constants.DT_SHARED_FILE):
8630 result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8632 elif disk_template == constants.DT_PLAIN:
8634 constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8637 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8639 result.append(params)
8641 elif disk_template == constants.DT_BLOCK:
8642 result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8644 elif disk_template == constants.DT_RBD:
8646 constants.LDP_POOL: dt_params[constants.RBD_POOL]
8649 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_RBD],
8651 result.append(params)
8656 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8657 iv_name, p_minor, s_minor, drbd_params, data_params,
8659 """Generate a drbd8 device complete with its children.
8662 assert len(vgnames) == len(names) == 2
8663 port = lu.cfg.AllocatePort()
8664 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8666 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8667 logical_id=(vgnames[0], names[0]),
8669 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8670 logical_id=(vgnames[1], names[1]),
8672 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8673 logical_id=(primary, secondary, port,
8676 children=[dev_data, dev_meta],
8677 iv_name=iv_name, params=drbd_params)
8681 def _GenerateDiskTemplate(lu, template_name,
8682 instance_name, primary_node,
8683 secondary_nodes, disk_info,
8684 file_storage_dir, file_driver,
8685 base_index, feedback_fn, disk_params):
8686 """Generate the entire disk layout for a given template type.
8689 #TODO: compute space requirements
8691 vgname = lu.cfg.GetVGName()
8692 disk_count = len(disk_info)
8694 ld_params = _ComputeLDParams(template_name, disk_params)
8695 if template_name == constants.DT_DISKLESS:
8697 elif template_name == constants.DT_PLAIN:
8699 raise errors.ProgrammerError("Wrong template configuration")
8701 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8702 for i in range(disk_count)])
8703 for idx, disk in enumerate(disk_info):
8704 disk_index = idx + base_index
8705 vg = disk.get(constants.IDISK_VG, vgname)
8706 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8707 disk_dev = objects.Disk(dev_type=constants.LD_LV,
8708 size=disk[constants.IDISK_SIZE],
8709 logical_id=(vg, names[idx]),
8710 iv_name="disk/%d" % disk_index,
8711 mode=disk[constants.IDISK_MODE],
8712 params=ld_params[0])
8713 disks.append(disk_dev)
8714 elif template_name == constants.DT_DRBD8:
8715 drbd_params, data_params, meta_params = ld_params
8716 if len(secondary_nodes) != 1:
8717 raise errors.ProgrammerError("Wrong template configuration")
8718 remote_node = secondary_nodes[0]
8719 minors = lu.cfg.AllocateDRBDMinor(
8720 [primary_node, remote_node] * len(disk_info), instance_name)
8723 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8724 for i in range(disk_count)]):
8725 names.append(lv_prefix + "_data")
8726 names.append(lv_prefix + "_meta")
8727 for idx, disk in enumerate(disk_info):
8728 disk_index = idx + base_index
8729 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8730 data_vg = disk.get(constants.IDISK_VG, vgname)
8731 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8732 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8733 disk[constants.IDISK_SIZE],
8735 names[idx * 2:idx * 2 + 2],
8736 "disk/%d" % disk_index,
8737 minors[idx * 2], minors[idx * 2 + 1],
8738 drbd_params, data_params, meta_params)
8739 disk_dev.mode = disk[constants.IDISK_MODE]
8740 disks.append(disk_dev)
8741 elif template_name == constants.DT_FILE:
8743 raise errors.ProgrammerError("Wrong template configuration")
8745 opcodes.RequireFileStorage()
8747 for idx, disk in enumerate(disk_info):
8748 disk_index = idx + base_index
8749 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8750 size=disk[constants.IDISK_SIZE],
8751 iv_name="disk/%d" % disk_index,
8752 logical_id=(file_driver,
8753 "%s/disk%d" % (file_storage_dir,
8755 mode=disk[constants.IDISK_MODE],
8756 params=ld_params[0])
8757 disks.append(disk_dev)
8758 elif template_name == constants.DT_SHARED_FILE:
8760 raise errors.ProgrammerError("Wrong template configuration")
8762 opcodes.RequireSharedFileStorage()
8764 for idx, disk in enumerate(disk_info):
8765 disk_index = idx + base_index
8766 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8767 size=disk[constants.IDISK_SIZE],
8768 iv_name="disk/%d" % disk_index,
8769 logical_id=(file_driver,
8770 "%s/disk%d" % (file_storage_dir,
8772 mode=disk[constants.IDISK_MODE],
8773 params=ld_params[0])
8774 disks.append(disk_dev)
8775 elif template_name == constants.DT_BLOCK:
8777 raise errors.ProgrammerError("Wrong template configuration")
8779 for idx, disk in enumerate(disk_info):
8780 disk_index = idx + base_index
8781 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8782 size=disk[constants.IDISK_SIZE],
8783 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8784 disk[constants.IDISK_ADOPT]),
8785 iv_name="disk/%d" % disk_index,
8786 mode=disk[constants.IDISK_MODE],
8787 params=ld_params[0])
8788 disks.append(disk_dev)
8789 elif template_name == constants.DT_RBD:
8791 raise errors.ProgrammerError("Wrong template configuration")
8793 names = _GenerateUniqueNames(lu, [".rbd.disk%d" % (base_index + i)
8794 for i in range(disk_count)])
8796 for idx, disk in enumerate(disk_info):
8797 disk_index = idx + base_index
8798 disk_dev = objects.Disk(dev_type=constants.LD_RBD,
8799 size=disk[constants.IDISK_SIZE],
8800 logical_id=("rbd", names[idx]),
8801 iv_name="disk/%d" % disk_index,
8802 mode=disk[constants.IDISK_MODE],
8803 params=ld_params[0])
8804 disks.append(disk_dev)
8807 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8811 def _GetInstanceInfoText(instance):
8812 """Compute that text that should be added to the disk's metadata.
8815 return "originstname+%s" % instance.name
8818 def _CalcEta(time_taken, written, total_size):
8819 """Calculates the ETA based on size written and total size.
8821 @param time_taken: The time taken so far
8822 @param written: amount written so far
8823 @param total_size: The total size of data to be written
8824 @return: The remaining time in seconds
8827 avg_time = time_taken / float(written)
8828 return (total_size - written) * avg_time
8831 def _WipeDisks(lu, instance):
8832 """Wipes instance disks.
8834 @type lu: L{LogicalUnit}
8835 @param lu: the logical unit on whose behalf we execute
8836 @type instance: L{objects.Instance}
8837 @param instance: the instance whose disks we should create
8838 @return: the success of the wipe
8841 node = instance.primary_node
8843 for device in instance.disks:
8844 lu.cfg.SetDiskID(device, node)
8846 logging.info("Pause sync of instance %s disks", instance.name)
8847 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8849 for idx, success in enumerate(result.payload):
8851 logging.warn("pause-sync of instance %s for disks %d failed",
8855 for idx, device in enumerate(instance.disks):
8856 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8857 # MAX_WIPE_CHUNK at max
8858 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8859 constants.MIN_WIPE_CHUNK_PERCENT)
8860 # we _must_ make this an int, otherwise rounding errors will
8862 wipe_chunk_size = int(wipe_chunk_size)
8864 lu.LogInfo("* Wiping disk %d", idx)
8865 logging.info("Wiping disk %d for instance %s, node %s using"
8866 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8871 start_time = time.time()
8873 while offset < size:
8874 wipe_size = min(wipe_chunk_size, size - offset)
8875 logging.debug("Wiping disk %d, offset %s, chunk %s",
8876 idx, offset, wipe_size)
8877 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8878 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8879 (idx, offset, wipe_size))
8882 if now - last_output >= 60:
8883 eta = _CalcEta(now - start_time, offset, size)
8884 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8885 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8888 logging.info("Resume sync of instance %s disks", instance.name)
8890 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8892 for idx, success in enumerate(result.payload):
8894 lu.LogWarning("Resume sync of disk %d failed, please have a"
8895 " look at the status and troubleshoot the issue", idx)
8896 logging.warn("resume-sync of instance %s for disks %d failed",
8900 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8901 """Create all disks for an instance.
8903 This abstracts away some work from AddInstance.
8905 @type lu: L{LogicalUnit}
8906 @param lu: the logical unit on whose behalf we execute
8907 @type instance: L{objects.Instance}
8908 @param instance: the instance whose disks we should create
8910 @param to_skip: list of indices to skip
8911 @type target_node: string
8912 @param target_node: if passed, overrides the target node for creation
8914 @return: the success of the creation
8917 info = _GetInstanceInfoText(instance)
8918 if target_node is None:
8919 pnode = instance.primary_node
8920 all_nodes = instance.all_nodes
8925 if instance.disk_template in constants.DTS_FILEBASED:
8926 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8927 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8929 result.Raise("Failed to create directory '%s' on"
8930 " node %s" % (file_storage_dir, pnode))
8932 # Note: this needs to be kept in sync with adding of disks in
8933 # LUInstanceSetParams
8934 for idx, device in enumerate(instance.disks):
8935 if to_skip and idx in to_skip:
8937 logging.info("Creating volume %s for instance %s",
8938 device.iv_name, instance.name)
8940 for node in all_nodes:
8941 f_create = node == pnode
8942 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8945 def _RemoveDisks(lu, instance, target_node=None):
8946 """Remove all disks for an instance.
8948 This abstracts away some work from `AddInstance()` and
8949 `RemoveInstance()`. Note that in case some of the devices couldn't
8950 be removed, the removal will continue with the other ones (compare
8951 with `_CreateDisks()`).
8953 @type lu: L{LogicalUnit}
8954 @param lu: the logical unit on whose behalf we execute
8955 @type instance: L{objects.Instance}
8956 @param instance: the instance whose disks we should remove
8957 @type target_node: string
8958 @param target_node: used to override the node on which to remove the disks
8960 @return: the success of the removal
8963 logging.info("Removing block devices for instance %s", instance.name)
8966 for device in instance.disks:
8968 edata = [(target_node, device)]
8970 edata = device.ComputeNodeTree(instance.primary_node)
8971 for node, disk in edata:
8972 lu.cfg.SetDiskID(disk, node)
8973 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8975 lu.LogWarning("Could not remove block device %s on node %s,"
8976 " continuing anyway: %s", device.iv_name, node, msg)
8979 # if this is a DRBD disk, return its port to the pool
8980 if device.dev_type in constants.LDS_DRBD:
8981 tcp_port = device.logical_id[2]
8982 lu.cfg.AddTcpUdpPort(tcp_port)
8984 if instance.disk_template == constants.DT_FILE:
8985 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8989 tgt = instance.primary_node
8990 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8992 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8993 file_storage_dir, instance.primary_node, result.fail_msg)
8999 def _ComputeDiskSizePerVG(disk_template, disks):
9000 """Compute disk size requirements in the volume group
9003 def _compute(disks, payload):
9004 """Universal algorithm.
9009 vgs[disk[constants.IDISK_VG]] = \
9010 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9014 # Required free disk space as a function of disk and swap space
9016 constants.DT_DISKLESS: {},
9017 constants.DT_PLAIN: _compute(disks, 0),
9018 # 128 MB are added for drbd metadata for each disk
9019 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9020 constants.DT_FILE: {},
9021 constants.DT_SHARED_FILE: {},
9024 if disk_template not in req_size_dict:
9025 raise errors.ProgrammerError("Disk template '%s' size requirement"
9026 " is unknown" % disk_template)
9028 return req_size_dict[disk_template]
9031 def _ComputeDiskSize(disk_template, disks):
9032 """Compute disk size requirements in the volume group
9035 # Required free disk space as a function of disk and swap space
9037 constants.DT_DISKLESS: None,
9038 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9039 # 128 MB are added for drbd metadata for each disk
9041 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9042 constants.DT_FILE: None,
9043 constants.DT_SHARED_FILE: 0,
9044 constants.DT_BLOCK: 0,
9045 constants.DT_RBD: 0,
9048 if disk_template not in req_size_dict:
9049 raise errors.ProgrammerError("Disk template '%s' size requirement"
9050 " is unknown" % disk_template)
9052 return req_size_dict[disk_template]
9055 def _FilterVmNodes(lu, nodenames):
9056 """Filters out non-vm_capable nodes from a list.
9058 @type lu: L{LogicalUnit}
9059 @param lu: the logical unit for which we check
9060 @type nodenames: list
9061 @param nodenames: the list of nodes on which we should check
9063 @return: the list of vm-capable nodes
9066 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9067 return [name for name in nodenames if name not in vm_nodes]
9070 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9071 """Hypervisor parameter validation.
9073 This function abstract the hypervisor parameter validation to be
9074 used in both instance create and instance modify.
9076 @type lu: L{LogicalUnit}
9077 @param lu: the logical unit for which we check
9078 @type nodenames: list
9079 @param nodenames: the list of nodes on which we should check
9080 @type hvname: string
9081 @param hvname: the name of the hypervisor we should use
9082 @type hvparams: dict
9083 @param hvparams: the parameters which we need to check
9084 @raise errors.OpPrereqError: if the parameters are not valid
9087 nodenames = _FilterVmNodes(lu, nodenames)
9089 cluster = lu.cfg.GetClusterInfo()
9090 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9092 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9093 for node in nodenames:
9097 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9100 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9101 """OS parameters validation.
9103 @type lu: L{LogicalUnit}
9104 @param lu: the logical unit for which we check
9105 @type required: boolean
9106 @param required: whether the validation should fail if the OS is not
9108 @type nodenames: list
9109 @param nodenames: the list of nodes on which we should check
9110 @type osname: string
9111 @param osname: the name of the hypervisor we should use
9112 @type osparams: dict
9113 @param osparams: the parameters which we need to check
9114 @raise errors.OpPrereqError: if the parameters are not valid
9117 nodenames = _FilterVmNodes(lu, nodenames)
9118 result = lu.rpc.call_os_validate(nodenames, required, osname,
9119 [constants.OS_VALIDATE_PARAMETERS],
9121 for node, nres in result.items():
9122 # we don't check for offline cases since this should be run only
9123 # against the master node and/or an instance's nodes
9124 nres.Raise("OS Parameters validation failed on node %s" % node)
9125 if not nres.payload:
9126 lu.LogInfo("OS %s not found on node %s, validation skipped",
9130 class LUInstanceCreate(LogicalUnit):
9131 """Create an instance.
9134 HPATH = "instance-add"
9135 HTYPE = constants.HTYPE_INSTANCE
9138 def CheckArguments(self):
9142 # do not require name_check to ease forward/backward compatibility
9144 if self.op.no_install and self.op.start:
9145 self.LogInfo("No-installation mode selected, disabling startup")
9146 self.op.start = False
9147 # validate/normalize the instance name
9148 self.op.instance_name = \
9149 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9151 if self.op.ip_check and not self.op.name_check:
9152 # TODO: make the ip check more flexible and not depend on the name check
9153 raise errors.OpPrereqError("Cannot do IP address check without a name"
9154 " check", errors.ECODE_INVAL)
9156 # check nics' parameter names
9157 for nic in self.op.nics:
9158 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9160 # check disks. parameter names and consistent adopt/no-adopt strategy
9161 has_adopt = has_no_adopt = False
9162 for disk in self.op.disks:
9163 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9164 if constants.IDISK_ADOPT in disk:
9168 if has_adopt and has_no_adopt:
9169 raise errors.OpPrereqError("Either all disks are adopted or none is",
9172 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9173 raise errors.OpPrereqError("Disk adoption is not supported for the"
9174 " '%s' disk template" %
9175 self.op.disk_template,
9177 if self.op.iallocator is not None:
9178 raise errors.OpPrereqError("Disk adoption not allowed with an"
9179 " iallocator script", errors.ECODE_INVAL)
9180 if self.op.mode == constants.INSTANCE_IMPORT:
9181 raise errors.OpPrereqError("Disk adoption not allowed for"
9182 " instance import", errors.ECODE_INVAL)
9184 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9185 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9186 " but no 'adopt' parameter given" %
9187 self.op.disk_template,
9190 self.adopt_disks = has_adopt
9192 # instance name verification
9193 if self.op.name_check:
9194 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9195 self.op.instance_name = self.hostname1.name
9196 # used in CheckPrereq for ip ping check
9197 self.check_ip = self.hostname1.ip
9199 self.check_ip = None
9201 # file storage checks
9202 if (self.op.file_driver and
9203 not self.op.file_driver in constants.FILE_DRIVER):
9204 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9205 self.op.file_driver, errors.ECODE_INVAL)
9207 if self.op.disk_template == constants.DT_FILE:
9208 opcodes.RequireFileStorage()
9209 elif self.op.disk_template == constants.DT_SHARED_FILE:
9210 opcodes.RequireSharedFileStorage()
9212 ### Node/iallocator related checks
9213 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9215 if self.op.pnode is not None:
9216 if self.op.disk_template in constants.DTS_INT_MIRROR:
9217 if self.op.snode is None:
9218 raise errors.OpPrereqError("The networked disk templates need"
9219 " a mirror node", errors.ECODE_INVAL)
9221 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9223 self.op.snode = None
9225 self._cds = _GetClusterDomainSecret()
9227 if self.op.mode == constants.INSTANCE_IMPORT:
9228 # On import force_variant must be True, because if we forced it at
9229 # initial install, our only chance when importing it back is that it
9231 self.op.force_variant = True
9233 if self.op.no_install:
9234 self.LogInfo("No-installation mode has no effect during import")
9236 elif self.op.mode == constants.INSTANCE_CREATE:
9237 if self.op.os_type is None:
9238 raise errors.OpPrereqError("No guest OS specified",
9240 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9241 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9242 " installation" % self.op.os_type,
9244 if self.op.disk_template is None:
9245 raise errors.OpPrereqError("No disk template specified",
9248 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9249 # Check handshake to ensure both clusters have the same domain secret
9250 src_handshake = self.op.source_handshake
9251 if not src_handshake:
9252 raise errors.OpPrereqError("Missing source handshake",
9255 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9258 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9261 # Load and check source CA
9262 self.source_x509_ca_pem = self.op.source_x509_ca
9263 if not self.source_x509_ca_pem:
9264 raise errors.OpPrereqError("Missing source X509 CA",
9268 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9270 except OpenSSL.crypto.Error, err:
9271 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9272 (err, ), errors.ECODE_INVAL)
9274 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9275 if errcode is not None:
9276 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9279 self.source_x509_ca = cert
9281 src_instance_name = self.op.source_instance_name
9282 if not src_instance_name:
9283 raise errors.OpPrereqError("Missing source instance name",
9286 self.source_instance_name = \
9287 netutils.GetHostname(name=src_instance_name).name
9290 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9291 self.op.mode, errors.ECODE_INVAL)
9293 def ExpandNames(self):
9294 """ExpandNames for CreateInstance.
9296 Figure out the right locks for instance creation.
9299 self.needed_locks = {}
9301 instance_name = self.op.instance_name
9302 # this is just a preventive check, but someone might still add this
9303 # instance in the meantime, and creation will fail at lock-add time
9304 if instance_name in self.cfg.GetInstanceList():
9305 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9306 instance_name, errors.ECODE_EXISTS)
9308 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9310 if self.op.iallocator:
9311 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9312 # specifying a group on instance creation and then selecting nodes from
9314 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9315 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9317 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9318 nodelist = [self.op.pnode]
9319 if self.op.snode is not None:
9320 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9321 nodelist.append(self.op.snode)
9322 self.needed_locks[locking.LEVEL_NODE] = nodelist
9323 # Lock resources of instance's primary and secondary nodes (copy to
9324 # prevent accidential modification)
9325 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9327 # in case of import lock the source node too
9328 if self.op.mode == constants.INSTANCE_IMPORT:
9329 src_node = self.op.src_node
9330 src_path = self.op.src_path
9332 if src_path is None:
9333 self.op.src_path = src_path = self.op.instance_name
9335 if src_node is None:
9336 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9337 self.op.src_node = None
9338 if os.path.isabs(src_path):
9339 raise errors.OpPrereqError("Importing an instance from a path"
9340 " requires a source node option",
9343 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9344 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9345 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9346 if not os.path.isabs(src_path):
9347 self.op.src_path = src_path = \
9348 utils.PathJoin(constants.EXPORT_DIR, src_path)
9350 def _RunAllocator(self):
9351 """Run the allocator based on input opcode.
9354 nics = [n.ToDict() for n in self.nics]
9355 ial = IAllocator(self.cfg, self.rpc,
9356 mode=constants.IALLOCATOR_MODE_ALLOC,
9357 name=self.op.instance_name,
9358 disk_template=self.op.disk_template,
9361 vcpus=self.be_full[constants.BE_VCPUS],
9362 memory=self.be_full[constants.BE_MAXMEM],
9365 hypervisor=self.op.hypervisor,
9368 ial.Run(self.op.iallocator)
9371 raise errors.OpPrereqError("Can't compute nodes using"
9372 " iallocator '%s': %s" %
9373 (self.op.iallocator, ial.info),
9375 if len(ial.result) != ial.required_nodes:
9376 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9377 " of nodes (%s), required %s" %
9378 (self.op.iallocator, len(ial.result),
9379 ial.required_nodes), errors.ECODE_FAULT)
9380 self.op.pnode = ial.result[0]
9381 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9382 self.op.instance_name, self.op.iallocator,
9383 utils.CommaJoin(ial.result))
9384 if ial.required_nodes == 2:
9385 self.op.snode = ial.result[1]
9387 def BuildHooksEnv(self):
9390 This runs on master, primary and secondary nodes of the instance.
9394 "ADD_MODE": self.op.mode,
9396 if self.op.mode == constants.INSTANCE_IMPORT:
9397 env["SRC_NODE"] = self.op.src_node
9398 env["SRC_PATH"] = self.op.src_path
9399 env["SRC_IMAGES"] = self.src_images
9401 env.update(_BuildInstanceHookEnv(
9402 name=self.op.instance_name,
9403 primary_node=self.op.pnode,
9404 secondary_nodes=self.secondaries,
9405 status=self.op.start,
9406 os_type=self.op.os_type,
9407 minmem=self.be_full[constants.BE_MINMEM],
9408 maxmem=self.be_full[constants.BE_MAXMEM],
9409 vcpus=self.be_full[constants.BE_VCPUS],
9410 nics=_NICListToTuple(self, self.nics),
9411 disk_template=self.op.disk_template,
9412 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9413 for d in self.disks],
9416 hypervisor_name=self.op.hypervisor,
9422 def BuildHooksNodes(self):
9423 """Build hooks nodes.
9426 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9429 def _ReadExportInfo(self):
9430 """Reads the export information from disk.
9432 It will override the opcode source node and path with the actual
9433 information, if these two were not specified before.
9435 @return: the export information
9438 assert self.op.mode == constants.INSTANCE_IMPORT
9440 src_node = self.op.src_node
9441 src_path = self.op.src_path
9443 if src_node is None:
9444 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9445 exp_list = self.rpc.call_export_list(locked_nodes)
9447 for node in exp_list:
9448 if exp_list[node].fail_msg:
9450 if src_path in exp_list[node].payload:
9452 self.op.src_node = src_node = node
9453 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9457 raise errors.OpPrereqError("No export found for relative path %s" %
9458 src_path, errors.ECODE_INVAL)
9460 _CheckNodeOnline(self, src_node)
9461 result = self.rpc.call_export_info(src_node, src_path)
9462 result.Raise("No export or invalid export found in dir %s" % src_path)
9464 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9465 if not export_info.has_section(constants.INISECT_EXP):
9466 raise errors.ProgrammerError("Corrupted export config",
9467 errors.ECODE_ENVIRON)
9469 ei_version = export_info.get(constants.INISECT_EXP, "version")
9470 if (int(ei_version) != constants.EXPORT_VERSION):
9471 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9472 (ei_version, constants.EXPORT_VERSION),
9473 errors.ECODE_ENVIRON)
9476 def _ReadExportParams(self, einfo):
9477 """Use export parameters as defaults.
9479 In case the opcode doesn't specify (as in override) some instance
9480 parameters, then try to use them from the export information, if
9484 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9486 if self.op.disk_template is None:
9487 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9488 self.op.disk_template = einfo.get(constants.INISECT_INS,
9490 if self.op.disk_template not in constants.DISK_TEMPLATES:
9491 raise errors.OpPrereqError("Disk template specified in configuration"
9492 " file is not one of the allowed values:"
9493 " %s" % " ".join(constants.DISK_TEMPLATES))
9495 raise errors.OpPrereqError("No disk template specified and the export"
9496 " is missing the disk_template information",
9499 if not self.op.disks:
9501 # TODO: import the disk iv_name too
9502 for idx in range(constants.MAX_DISKS):
9503 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9504 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9505 disks.append({constants.IDISK_SIZE: disk_sz})
9506 self.op.disks = disks
9507 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9508 raise errors.OpPrereqError("No disk info specified and the export"
9509 " is missing the disk information",
9512 if not self.op.nics:
9514 for idx in range(constants.MAX_NICS):
9515 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9517 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9518 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9525 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9526 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9528 if (self.op.hypervisor is None and
9529 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9530 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9532 if einfo.has_section(constants.INISECT_HYP):
9533 # use the export parameters but do not override the ones
9534 # specified by the user
9535 for name, value in einfo.items(constants.INISECT_HYP):
9536 if name not in self.op.hvparams:
9537 self.op.hvparams[name] = value
9539 if einfo.has_section(constants.INISECT_BEP):
9540 # use the parameters, without overriding
9541 for name, value in einfo.items(constants.INISECT_BEP):
9542 if name not in self.op.beparams:
9543 self.op.beparams[name] = value
9544 # Compatibility for the old "memory" be param
9545 if name == constants.BE_MEMORY:
9546 if constants.BE_MAXMEM not in self.op.beparams:
9547 self.op.beparams[constants.BE_MAXMEM] = value
9548 if constants.BE_MINMEM not in self.op.beparams:
9549 self.op.beparams[constants.BE_MINMEM] = value
9551 # try to read the parameters old style, from the main section
9552 for name in constants.BES_PARAMETERS:
9553 if (name not in self.op.beparams and
9554 einfo.has_option(constants.INISECT_INS, name)):
9555 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9557 if einfo.has_section(constants.INISECT_OSP):
9558 # use the parameters, without overriding
9559 for name, value in einfo.items(constants.INISECT_OSP):
9560 if name not in self.op.osparams:
9561 self.op.osparams[name] = value
9563 def _RevertToDefaults(self, cluster):
9564 """Revert the instance parameters to the default values.
9568 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9569 for name in self.op.hvparams.keys():
9570 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9571 del self.op.hvparams[name]
9573 be_defs = cluster.SimpleFillBE({})
9574 for name in self.op.beparams.keys():
9575 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9576 del self.op.beparams[name]
9578 nic_defs = cluster.SimpleFillNIC({})
9579 for nic in self.op.nics:
9580 for name in constants.NICS_PARAMETERS:
9581 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9584 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9585 for name in self.op.osparams.keys():
9586 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9587 del self.op.osparams[name]
9589 def _CalculateFileStorageDir(self):
9590 """Calculate final instance file storage dir.
9593 # file storage dir calculation/check
9594 self.instance_file_storage_dir = None
9595 if self.op.disk_template in constants.DTS_FILEBASED:
9596 # build the full file storage dir path
9599 if self.op.disk_template == constants.DT_SHARED_FILE:
9600 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9602 get_fsd_fn = self.cfg.GetFileStorageDir
9604 cfg_storagedir = get_fsd_fn()
9605 if not cfg_storagedir:
9606 raise errors.OpPrereqError("Cluster file storage dir not defined")
9607 joinargs.append(cfg_storagedir)
9609 if self.op.file_storage_dir is not None:
9610 joinargs.append(self.op.file_storage_dir)
9612 joinargs.append(self.op.instance_name)
9614 # pylint: disable=W0142
9615 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9617 def CheckPrereq(self): # pylint: disable=R0914
9618 """Check prerequisites.
9621 self._CalculateFileStorageDir()
9623 if self.op.mode == constants.INSTANCE_IMPORT:
9624 export_info = self._ReadExportInfo()
9625 self._ReadExportParams(export_info)
9627 if (not self.cfg.GetVGName() and
9628 self.op.disk_template not in constants.DTS_NOT_LVM):
9629 raise errors.OpPrereqError("Cluster does not support lvm-based"
9630 " instances", errors.ECODE_STATE)
9632 if (self.op.hypervisor is None or
9633 self.op.hypervisor == constants.VALUE_AUTO):
9634 self.op.hypervisor = self.cfg.GetHypervisorType()
9636 cluster = self.cfg.GetClusterInfo()
9637 enabled_hvs = cluster.enabled_hypervisors
9638 if self.op.hypervisor not in enabled_hvs:
9639 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9640 " cluster (%s)" % (self.op.hypervisor,
9641 ",".join(enabled_hvs)),
9644 # Check tag validity
9645 for tag in self.op.tags:
9646 objects.TaggableObject.ValidateTag(tag)
9648 # check hypervisor parameter syntax (locally)
9649 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9650 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9652 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9653 hv_type.CheckParameterSyntax(filled_hvp)
9654 self.hv_full = filled_hvp
9655 # check that we don't specify global parameters on an instance
9656 _CheckGlobalHvParams(self.op.hvparams)
9658 # fill and remember the beparams dict
9659 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9660 for param, value in self.op.beparams.iteritems():
9661 if value == constants.VALUE_AUTO:
9662 self.op.beparams[param] = default_beparams[param]
9663 objects.UpgradeBeParams(self.op.beparams)
9664 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9665 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9667 # build os parameters
9668 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9670 # now that hvp/bep are in final format, let's reset to defaults,
9672 if self.op.identify_defaults:
9673 self._RevertToDefaults(cluster)
9677 for idx, nic in enumerate(self.op.nics):
9678 nic_mode_req = nic.get(constants.INIC_MODE, None)
9679 nic_mode = nic_mode_req
9680 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9681 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9683 # in routed mode, for the first nic, the default ip is 'auto'
9684 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9685 default_ip_mode = constants.VALUE_AUTO
9687 default_ip_mode = constants.VALUE_NONE
9689 # ip validity checks
9690 ip = nic.get(constants.INIC_IP, default_ip_mode)
9691 if ip is None or ip.lower() == constants.VALUE_NONE:
9693 elif ip.lower() == constants.VALUE_AUTO:
9694 if not self.op.name_check:
9695 raise errors.OpPrereqError("IP address set to auto but name checks"
9696 " have been skipped",
9698 nic_ip = self.hostname1.ip
9700 if not netutils.IPAddress.IsValid(ip):
9701 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9705 # TODO: check the ip address for uniqueness
9706 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9707 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9710 # MAC address verification
9711 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9712 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9713 mac = utils.NormalizeAndValidateMac(mac)
9716 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9717 except errors.ReservationError:
9718 raise errors.OpPrereqError("MAC address %s already in use"
9719 " in cluster" % mac,
9720 errors.ECODE_NOTUNIQUE)
9722 # Build nic parameters
9723 link = nic.get(constants.INIC_LINK, None)
9724 if link == constants.VALUE_AUTO:
9725 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9728 nicparams[constants.NIC_MODE] = nic_mode
9730 nicparams[constants.NIC_LINK] = link
9732 check_params = cluster.SimpleFillNIC(nicparams)
9733 objects.NIC.CheckParameterSyntax(check_params)
9734 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9736 # disk checks/pre-build
9737 default_vg = self.cfg.GetVGName()
9739 for disk in self.op.disks:
9740 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9741 if mode not in constants.DISK_ACCESS_SET:
9742 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9743 mode, errors.ECODE_INVAL)
9744 size = disk.get(constants.IDISK_SIZE, None)
9746 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9749 except (TypeError, ValueError):
9750 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9753 data_vg = disk.get(constants.IDISK_VG, default_vg)
9755 constants.IDISK_SIZE: size,
9756 constants.IDISK_MODE: mode,
9757 constants.IDISK_VG: data_vg,
9759 if constants.IDISK_METAVG in disk:
9760 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9761 if constants.IDISK_ADOPT in disk:
9762 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9763 self.disks.append(new_disk)
9765 if self.op.mode == constants.INSTANCE_IMPORT:
9767 for idx in range(len(self.disks)):
9768 option = "disk%d_dump" % idx
9769 if export_info.has_option(constants.INISECT_INS, option):
9770 # FIXME: are the old os-es, disk sizes, etc. useful?
9771 export_name = export_info.get(constants.INISECT_INS, option)
9772 image = utils.PathJoin(self.op.src_path, export_name)
9773 disk_images.append(image)
9775 disk_images.append(False)
9777 self.src_images = disk_images
9779 old_name = export_info.get(constants.INISECT_INS, "name")
9780 if self.op.instance_name == old_name:
9781 for idx, nic in enumerate(self.nics):
9782 if nic.mac == constants.VALUE_AUTO:
9783 nic_mac_ini = "nic%d_mac" % idx
9784 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9786 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9788 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9789 if self.op.ip_check:
9790 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9791 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9792 (self.check_ip, self.op.instance_name),
9793 errors.ECODE_NOTUNIQUE)
9795 #### mac address generation
9796 # By generating here the mac address both the allocator and the hooks get
9797 # the real final mac address rather than the 'auto' or 'generate' value.
9798 # There is a race condition between the generation and the instance object
9799 # creation, which means that we know the mac is valid now, but we're not
9800 # sure it will be when we actually add the instance. If things go bad
9801 # adding the instance will abort because of a duplicate mac, and the
9802 # creation job will fail.
9803 for nic in self.nics:
9804 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9805 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9809 if self.op.iallocator is not None:
9810 self._RunAllocator()
9812 # Release all unneeded node locks
9813 _ReleaseLocks(self, locking.LEVEL_NODE,
9814 keep=filter(None, [self.op.pnode, self.op.snode,
9816 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9817 keep=filter(None, [self.op.pnode, self.op.snode,
9820 #### node related checks
9822 # check primary node
9823 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9824 assert self.pnode is not None, \
9825 "Cannot retrieve locked node %s" % self.op.pnode
9827 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9828 pnode.name, errors.ECODE_STATE)
9830 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9831 pnode.name, errors.ECODE_STATE)
9832 if not pnode.vm_capable:
9833 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9834 " '%s'" % pnode.name, errors.ECODE_STATE)
9836 self.secondaries = []
9838 # mirror node verification
9839 if self.op.disk_template in constants.DTS_INT_MIRROR:
9840 if self.op.snode == pnode.name:
9841 raise errors.OpPrereqError("The secondary node cannot be the"
9842 " primary node", errors.ECODE_INVAL)
9843 _CheckNodeOnline(self, self.op.snode)
9844 _CheckNodeNotDrained(self, self.op.snode)
9845 _CheckNodeVmCapable(self, self.op.snode)
9846 self.secondaries.append(self.op.snode)
9848 snode = self.cfg.GetNodeInfo(self.op.snode)
9849 if pnode.group != snode.group:
9850 self.LogWarning("The primary and secondary nodes are in two"
9851 " different node groups; the disk parameters"
9852 " from the first disk's node group will be"
9855 nodenames = [pnode.name] + self.secondaries
9857 # Verify instance specs
9859 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9860 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9861 constants.ISPEC_DISK_COUNT: len(self.disks),
9862 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9863 constants.ISPEC_NIC_COUNT: len(self.nics),
9866 group_info = self.cfg.GetNodeGroup(pnode.group)
9867 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9868 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9869 if not self.op.ignore_ipolicy and res:
9870 raise errors.OpPrereqError(("Instance allocation to group %s violates"
9871 " policy: %s") % (pnode.group,
9872 utils.CommaJoin(res)),
9875 # disk parameters (not customizable at instance or node level)
9876 # just use the primary node parameters, ignoring the secondary.
9877 self.diskparams = group_info.diskparams
9879 if not self.adopt_disks:
9880 if self.op.disk_template == constants.DT_RBD:
9881 # _CheckRADOSFreeSpace() is just a placeholder.
9882 # Any function that checks prerequisites can be placed here.
9883 # Check if there is enough space on the RADOS cluster.
9884 _CheckRADOSFreeSpace()
9886 # Check lv size requirements, if not adopting
9887 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9888 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9890 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9891 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9892 disk[constants.IDISK_ADOPT])
9893 for disk in self.disks])
9894 if len(all_lvs) != len(self.disks):
9895 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9897 for lv_name in all_lvs:
9899 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9900 # to ReserveLV uses the same syntax
9901 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9902 except errors.ReservationError:
9903 raise errors.OpPrereqError("LV named %s used by another instance" %
9904 lv_name, errors.ECODE_NOTUNIQUE)
9906 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9907 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9909 node_lvs = self.rpc.call_lv_list([pnode.name],
9910 vg_names.payload.keys())[pnode.name]
9911 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9912 node_lvs = node_lvs.payload
9914 delta = all_lvs.difference(node_lvs.keys())
9916 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9917 utils.CommaJoin(delta),
9919 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9921 raise errors.OpPrereqError("Online logical volumes found, cannot"
9922 " adopt: %s" % utils.CommaJoin(online_lvs),
9924 # update the size of disk based on what is found
9925 for dsk in self.disks:
9926 dsk[constants.IDISK_SIZE] = \
9927 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9928 dsk[constants.IDISK_ADOPT])][0]))
9930 elif self.op.disk_template == constants.DT_BLOCK:
9931 # Normalize and de-duplicate device paths
9932 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9933 for disk in self.disks])
9934 if len(all_disks) != len(self.disks):
9935 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9937 baddisks = [d for d in all_disks
9938 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9940 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9941 " cannot be adopted" %
9942 (", ".join(baddisks),
9943 constants.ADOPTABLE_BLOCKDEV_ROOT),
9946 node_disks = self.rpc.call_bdev_sizes([pnode.name],
9947 list(all_disks))[pnode.name]
9948 node_disks.Raise("Cannot get block device information from node %s" %
9950 node_disks = node_disks.payload
9951 delta = all_disks.difference(node_disks.keys())
9953 raise errors.OpPrereqError("Missing block device(s): %s" %
9954 utils.CommaJoin(delta),
9956 for dsk in self.disks:
9957 dsk[constants.IDISK_SIZE] = \
9958 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9960 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9962 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9963 # check OS parameters (remotely)
9964 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9966 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9968 # memory check on primary node
9969 #TODO(dynmem): use MINMEM for checking
9971 _CheckNodeFreeMemory(self, self.pnode.name,
9972 "creating instance %s" % self.op.instance_name,
9973 self.be_full[constants.BE_MAXMEM],
9976 self.dry_run_result = list(nodenames)
9978 def Exec(self, feedback_fn):
9979 """Create and add the instance to the cluster.
9982 instance = self.op.instance_name
9983 pnode_name = self.pnode.name
9985 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9986 self.owned_locks(locking.LEVEL_NODE)), \
9987 "Node locks differ from node resource locks"
9989 ht_kind = self.op.hypervisor
9990 if ht_kind in constants.HTS_REQ_PORT:
9991 network_port = self.cfg.AllocatePort()
9995 disks = _GenerateDiskTemplate(self,
9996 self.op.disk_template,
9997 instance, pnode_name,
10000 self.instance_file_storage_dir,
10001 self.op.file_driver,
10006 iobj = objects.Instance(name=instance, os=self.op.os_type,
10007 primary_node=pnode_name,
10008 nics=self.nics, disks=disks,
10009 disk_template=self.op.disk_template,
10010 admin_state=constants.ADMINST_DOWN,
10011 network_port=network_port,
10012 beparams=self.op.beparams,
10013 hvparams=self.op.hvparams,
10014 hypervisor=self.op.hypervisor,
10015 osparams=self.op.osparams,
10019 for tag in self.op.tags:
10022 if self.adopt_disks:
10023 if self.op.disk_template == constants.DT_PLAIN:
10024 # rename LVs to the newly-generated names; we need to construct
10025 # 'fake' LV disks with the old data, plus the new unique_id
10026 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10028 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10029 rename_to.append(t_dsk.logical_id)
10030 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10031 self.cfg.SetDiskID(t_dsk, pnode_name)
10032 result = self.rpc.call_blockdev_rename(pnode_name,
10033 zip(tmp_disks, rename_to))
10034 result.Raise("Failed to rename adoped LVs")
10036 feedback_fn("* creating instance disks...")
10038 _CreateDisks(self, iobj)
10039 except errors.OpExecError:
10040 self.LogWarning("Device creation failed, reverting...")
10042 _RemoveDisks(self, iobj)
10044 self.cfg.ReleaseDRBDMinors(instance)
10047 feedback_fn("adding instance %s to cluster config" % instance)
10049 self.cfg.AddInstance(iobj, self.proc.GetECId())
10051 # Declare that we don't want to remove the instance lock anymore, as we've
10052 # added the instance to the config
10053 del self.remove_locks[locking.LEVEL_INSTANCE]
10055 if self.op.mode == constants.INSTANCE_IMPORT:
10056 # Release unused nodes
10057 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10059 # Release all nodes
10060 _ReleaseLocks(self, locking.LEVEL_NODE)
10063 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10064 feedback_fn("* wiping instance disks...")
10066 _WipeDisks(self, iobj)
10067 except errors.OpExecError, err:
10068 logging.exception("Wiping disks failed")
10069 self.LogWarning("Wiping instance disks failed (%s)", err)
10073 # Something is already wrong with the disks, don't do anything else
10075 elif self.op.wait_for_sync:
10076 disk_abort = not _WaitForSync(self, iobj)
10077 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10078 # make sure the disks are not degraded (still sync-ing is ok)
10079 feedback_fn("* checking mirrors status")
10080 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10085 _RemoveDisks(self, iobj)
10086 self.cfg.RemoveInstance(iobj.name)
10087 # Make sure the instance lock gets removed
10088 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10089 raise errors.OpExecError("There are some degraded disks for"
10092 # Release all node resource locks
10093 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10095 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10096 if self.op.mode == constants.INSTANCE_CREATE:
10097 if not self.op.no_install:
10098 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10099 not self.op.wait_for_sync)
10101 feedback_fn("* pausing disk sync to install instance OS")
10102 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10104 for idx, success in enumerate(result.payload):
10106 logging.warn("pause-sync of instance %s for disk %d failed",
10109 feedback_fn("* running the instance OS create scripts...")
10110 # FIXME: pass debug option from opcode to backend
10112 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10113 self.op.debug_level)
10115 feedback_fn("* resuming disk sync")
10116 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10118 for idx, success in enumerate(result.payload):
10120 logging.warn("resume-sync of instance %s for disk %d failed",
10123 os_add_result.Raise("Could not add os for instance %s"
10124 " on node %s" % (instance, pnode_name))
10126 elif self.op.mode == constants.INSTANCE_IMPORT:
10127 feedback_fn("* running the instance OS import scripts...")
10131 for idx, image in enumerate(self.src_images):
10135 # FIXME: pass debug option from opcode to backend
10136 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10137 constants.IEIO_FILE, (image, ),
10138 constants.IEIO_SCRIPT,
10139 (iobj.disks[idx], idx),
10141 transfers.append(dt)
10144 masterd.instance.TransferInstanceData(self, feedback_fn,
10145 self.op.src_node, pnode_name,
10146 self.pnode.secondary_ip,
10148 if not compat.all(import_result):
10149 self.LogWarning("Some disks for instance %s on node %s were not"
10150 " imported successfully" % (instance, pnode_name))
10152 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10153 feedback_fn("* preparing remote import...")
10154 # The source cluster will stop the instance before attempting to make a
10155 # connection. In some cases stopping an instance can take a long time,
10156 # hence the shutdown timeout is added to the connection timeout.
10157 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10158 self.op.source_shutdown_timeout)
10159 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10161 assert iobj.primary_node == self.pnode.name
10163 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10164 self.source_x509_ca,
10165 self._cds, timeouts)
10166 if not compat.all(disk_results):
10167 # TODO: Should the instance still be started, even if some disks
10168 # failed to import (valid for local imports, too)?
10169 self.LogWarning("Some disks for instance %s on node %s were not"
10170 " imported successfully" % (instance, pnode_name))
10172 # Run rename script on newly imported instance
10173 assert iobj.name == instance
10174 feedback_fn("Running rename script for %s" % instance)
10175 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10176 self.source_instance_name,
10177 self.op.debug_level)
10178 if result.fail_msg:
10179 self.LogWarning("Failed to run rename script for %s on node"
10180 " %s: %s" % (instance, pnode_name, result.fail_msg))
10183 # also checked in the prereq part
10184 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10187 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10190 iobj.admin_state = constants.ADMINST_UP
10191 self.cfg.Update(iobj, feedback_fn)
10192 logging.info("Starting instance %s on node %s", instance, pnode_name)
10193 feedback_fn("* starting instance...")
10194 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10196 result.Raise("Could not start instance")
10198 return list(iobj.all_nodes)
10201 def _CheckRADOSFreeSpace():
10202 """Compute disk size requirements inside the RADOS cluster.
10205 # For the RADOS cluster we assume there is always enough space.
10209 class LUInstanceConsole(NoHooksLU):
10210 """Connect to an instance's console.
10212 This is somewhat special in that it returns the command line that
10213 you need to run on the master node in order to connect to the
10219 def ExpandNames(self):
10220 self.share_locks = _ShareAll()
10221 self._ExpandAndLockInstance()
10223 def CheckPrereq(self):
10224 """Check prerequisites.
10226 This checks that the instance is in the cluster.
10229 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10230 assert self.instance is not None, \
10231 "Cannot retrieve locked instance %s" % self.op.instance_name
10232 _CheckNodeOnline(self, self.instance.primary_node)
10234 def Exec(self, feedback_fn):
10235 """Connect to the console of an instance
10238 instance = self.instance
10239 node = instance.primary_node
10241 node_insts = self.rpc.call_instance_list([node],
10242 [instance.hypervisor])[node]
10243 node_insts.Raise("Can't get node information from %s" % node)
10245 if instance.name not in node_insts.payload:
10246 if instance.admin_state == constants.ADMINST_UP:
10247 state = constants.INSTST_ERRORDOWN
10248 elif instance.admin_state == constants.ADMINST_DOWN:
10249 state = constants.INSTST_ADMINDOWN
10251 state = constants.INSTST_ADMINOFFLINE
10252 raise errors.OpExecError("Instance %s is not running (state %s)" %
10253 (instance.name, state))
10255 logging.debug("Connecting to console of %s on %s", instance.name, node)
10257 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10260 def _GetInstanceConsole(cluster, instance):
10261 """Returns console information for an instance.
10263 @type cluster: L{objects.Cluster}
10264 @type instance: L{objects.Instance}
10268 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10269 # beparams and hvparams are passed separately, to avoid editing the
10270 # instance and then saving the defaults in the instance itself.
10271 hvparams = cluster.FillHV(instance)
10272 beparams = cluster.FillBE(instance)
10273 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10275 assert console.instance == instance.name
10276 assert console.Validate()
10278 return console.ToDict()
10281 class LUInstanceReplaceDisks(LogicalUnit):
10282 """Replace the disks of an instance.
10285 HPATH = "mirrors-replace"
10286 HTYPE = constants.HTYPE_INSTANCE
10289 def CheckArguments(self):
10290 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10291 self.op.iallocator)
10293 def ExpandNames(self):
10294 self._ExpandAndLockInstance()
10296 assert locking.LEVEL_NODE not in self.needed_locks
10297 assert locking.LEVEL_NODE_RES not in self.needed_locks
10298 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10300 assert self.op.iallocator is None or self.op.remote_node is None, \
10301 "Conflicting options"
10303 if self.op.remote_node is not None:
10304 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10306 # Warning: do not remove the locking of the new secondary here
10307 # unless DRBD8.AddChildren is changed to work in parallel;
10308 # currently it doesn't since parallel invocations of
10309 # FindUnusedMinor will conflict
10310 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10311 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10313 self.needed_locks[locking.LEVEL_NODE] = []
10314 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10316 if self.op.iallocator is not None:
10317 # iallocator will select a new node in the same group
10318 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10320 self.needed_locks[locking.LEVEL_NODE_RES] = []
10322 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10323 self.op.iallocator, self.op.remote_node,
10324 self.op.disks, False, self.op.early_release,
10325 self.op.ignore_ipolicy)
10327 self.tasklets = [self.replacer]
10329 def DeclareLocks(self, level):
10330 if level == locking.LEVEL_NODEGROUP:
10331 assert self.op.remote_node is None
10332 assert self.op.iallocator is not None
10333 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10335 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10336 # Lock all groups used by instance optimistically; this requires going
10337 # via the node before it's locked, requiring verification later on
10338 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10339 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10341 elif level == locking.LEVEL_NODE:
10342 if self.op.iallocator is not None:
10343 assert self.op.remote_node is None
10344 assert not self.needed_locks[locking.LEVEL_NODE]
10346 # Lock member nodes of all locked groups
10347 self.needed_locks[locking.LEVEL_NODE] = [node_name
10348 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10349 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10351 self._LockInstancesNodes()
10352 elif level == locking.LEVEL_NODE_RES:
10354 self.needed_locks[locking.LEVEL_NODE_RES] = \
10355 self.needed_locks[locking.LEVEL_NODE]
10357 def BuildHooksEnv(self):
10358 """Build hooks env.
10360 This runs on the master, the primary and all the secondaries.
10363 instance = self.replacer.instance
10365 "MODE": self.op.mode,
10366 "NEW_SECONDARY": self.op.remote_node,
10367 "OLD_SECONDARY": instance.secondary_nodes[0],
10369 env.update(_BuildInstanceHookEnvByObject(self, instance))
10372 def BuildHooksNodes(self):
10373 """Build hooks nodes.
10376 instance = self.replacer.instance
10378 self.cfg.GetMasterNode(),
10379 instance.primary_node,
10381 if self.op.remote_node is not None:
10382 nl.append(self.op.remote_node)
10385 def CheckPrereq(self):
10386 """Check prerequisites.
10389 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10390 self.op.iallocator is None)
10392 # Verify if node group locks are still correct
10393 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10395 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10397 return LogicalUnit.CheckPrereq(self)
10400 class TLReplaceDisks(Tasklet):
10401 """Replaces disks for an instance.
10403 Note: Locking is not within the scope of this class.
10406 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10407 disks, delay_iallocator, early_release, ignore_ipolicy):
10408 """Initializes this class.
10411 Tasklet.__init__(self, lu)
10414 self.instance_name = instance_name
10416 self.iallocator_name = iallocator_name
10417 self.remote_node = remote_node
10419 self.delay_iallocator = delay_iallocator
10420 self.early_release = early_release
10421 self.ignore_ipolicy = ignore_ipolicy
10424 self.instance = None
10425 self.new_node = None
10426 self.target_node = None
10427 self.other_node = None
10428 self.remote_node_info = None
10429 self.node_secondary_ip = None
10432 def CheckArguments(mode, remote_node, iallocator):
10433 """Helper function for users of this class.
10436 # check for valid parameter combination
10437 if mode == constants.REPLACE_DISK_CHG:
10438 if remote_node is None and iallocator is None:
10439 raise errors.OpPrereqError("When changing the secondary either an"
10440 " iallocator script must be used or the"
10441 " new node given", errors.ECODE_INVAL)
10443 if remote_node is not None and iallocator is not None:
10444 raise errors.OpPrereqError("Give either the iallocator or the new"
10445 " secondary, not both", errors.ECODE_INVAL)
10447 elif remote_node is not None or iallocator is not None:
10448 # Not replacing the secondary
10449 raise errors.OpPrereqError("The iallocator and new node options can"
10450 " only be used when changing the"
10451 " secondary node", errors.ECODE_INVAL)
10454 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10455 """Compute a new secondary node using an IAllocator.
10458 ial = IAllocator(lu.cfg, lu.rpc,
10459 mode=constants.IALLOCATOR_MODE_RELOC,
10460 name=instance_name,
10461 relocate_from=list(relocate_from))
10463 ial.Run(iallocator_name)
10465 if not ial.success:
10466 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10467 " %s" % (iallocator_name, ial.info),
10468 errors.ECODE_NORES)
10470 if len(ial.result) != ial.required_nodes:
10471 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10472 " of nodes (%s), required %s" %
10474 len(ial.result), ial.required_nodes),
10475 errors.ECODE_FAULT)
10477 remote_node_name = ial.result[0]
10479 lu.LogInfo("Selected new secondary for instance '%s': %s",
10480 instance_name, remote_node_name)
10482 return remote_node_name
10484 def _FindFaultyDisks(self, node_name):
10485 """Wrapper for L{_FindFaultyInstanceDisks}.
10488 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10491 def _CheckDisksActivated(self, instance):
10492 """Checks if the instance disks are activated.
10494 @param instance: The instance to check disks
10495 @return: True if they are activated, False otherwise
10498 nodes = instance.all_nodes
10500 for idx, dev in enumerate(instance.disks):
10502 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10503 self.cfg.SetDiskID(dev, node)
10505 result = self.rpc.call_blockdev_find(node, dev)
10509 elif result.fail_msg or not result.payload:
10514 def CheckPrereq(self):
10515 """Check prerequisites.
10517 This checks that the instance is in the cluster.
10520 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10521 assert instance is not None, \
10522 "Cannot retrieve locked instance %s" % self.instance_name
10524 if instance.disk_template != constants.DT_DRBD8:
10525 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10526 " instances", errors.ECODE_INVAL)
10528 if len(instance.secondary_nodes) != 1:
10529 raise errors.OpPrereqError("The instance has a strange layout,"
10530 " expected one secondary but found %d" %
10531 len(instance.secondary_nodes),
10532 errors.ECODE_FAULT)
10534 if not self.delay_iallocator:
10535 self._CheckPrereq2()
10537 def _CheckPrereq2(self):
10538 """Check prerequisites, second part.
10540 This function should always be part of CheckPrereq. It was separated and is
10541 now called from Exec because during node evacuation iallocator was only
10542 called with an unmodified cluster model, not taking planned changes into
10546 instance = self.instance
10547 secondary_node = instance.secondary_nodes[0]
10549 if self.iallocator_name is None:
10550 remote_node = self.remote_node
10552 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10553 instance.name, instance.secondary_nodes)
10555 if remote_node is None:
10556 self.remote_node_info = None
10558 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10559 "Remote node '%s' is not locked" % remote_node
10561 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10562 assert self.remote_node_info is not None, \
10563 "Cannot retrieve locked node %s" % remote_node
10565 if remote_node == self.instance.primary_node:
10566 raise errors.OpPrereqError("The specified node is the primary node of"
10567 " the instance", errors.ECODE_INVAL)
10569 if remote_node == secondary_node:
10570 raise errors.OpPrereqError("The specified node is already the"
10571 " secondary node of the instance",
10572 errors.ECODE_INVAL)
10574 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10575 constants.REPLACE_DISK_CHG):
10576 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10577 errors.ECODE_INVAL)
10579 if self.mode == constants.REPLACE_DISK_AUTO:
10580 if not self._CheckDisksActivated(instance):
10581 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10582 " first" % self.instance_name,
10583 errors.ECODE_STATE)
10584 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10585 faulty_secondary = self._FindFaultyDisks(secondary_node)
10587 if faulty_primary and faulty_secondary:
10588 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10589 " one node and can not be repaired"
10590 " automatically" % self.instance_name,
10591 errors.ECODE_STATE)
10594 self.disks = faulty_primary
10595 self.target_node = instance.primary_node
10596 self.other_node = secondary_node
10597 check_nodes = [self.target_node, self.other_node]
10598 elif faulty_secondary:
10599 self.disks = faulty_secondary
10600 self.target_node = secondary_node
10601 self.other_node = instance.primary_node
10602 check_nodes = [self.target_node, self.other_node]
10608 # Non-automatic modes
10609 if self.mode == constants.REPLACE_DISK_PRI:
10610 self.target_node = instance.primary_node
10611 self.other_node = secondary_node
10612 check_nodes = [self.target_node, self.other_node]
10614 elif self.mode == constants.REPLACE_DISK_SEC:
10615 self.target_node = secondary_node
10616 self.other_node = instance.primary_node
10617 check_nodes = [self.target_node, self.other_node]
10619 elif self.mode == constants.REPLACE_DISK_CHG:
10620 self.new_node = remote_node
10621 self.other_node = instance.primary_node
10622 self.target_node = secondary_node
10623 check_nodes = [self.new_node, self.other_node]
10625 _CheckNodeNotDrained(self.lu, remote_node)
10626 _CheckNodeVmCapable(self.lu, remote_node)
10628 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10629 assert old_node_info is not None
10630 if old_node_info.offline and not self.early_release:
10631 # doesn't make sense to delay the release
10632 self.early_release = True
10633 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10634 " early-release mode", secondary_node)
10637 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10640 # If not specified all disks should be replaced
10642 self.disks = range(len(self.instance.disks))
10644 # TODO: This is ugly, but right now we can't distinguish between internal
10645 # submitted opcode and external one. We should fix that.
10646 if self.remote_node_info:
10647 # We change the node, lets verify it still meets instance policy
10648 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10649 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10651 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10652 ignore=self.ignore_ipolicy)
10654 # TODO: compute disk parameters
10655 primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10656 secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10657 if primary_node_info.group != secondary_node_info.group:
10658 self.lu.LogInfo("The instance primary and secondary nodes are in two"
10659 " different node groups; the disk parameters of the"
10660 " primary node's group will be applied.")
10662 self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10664 for node in check_nodes:
10665 _CheckNodeOnline(self.lu, node)
10667 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10670 if node_name is not None)
10672 # Release unneeded node and node resource locks
10673 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10674 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10676 # Release any owned node group
10677 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10678 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10680 # Check whether disks are valid
10681 for disk_idx in self.disks:
10682 instance.FindDisk(disk_idx)
10684 # Get secondary node IP addresses
10685 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10686 in self.cfg.GetMultiNodeInfo(touched_nodes))
10688 def Exec(self, feedback_fn):
10689 """Execute disk replacement.
10691 This dispatches the disk replacement to the appropriate handler.
10694 if self.delay_iallocator:
10695 self._CheckPrereq2()
10698 # Verify owned locks before starting operation
10699 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10700 assert set(owned_nodes) == set(self.node_secondary_ip), \
10701 ("Incorrect node locks, owning %s, expected %s" %
10702 (owned_nodes, self.node_secondary_ip.keys()))
10703 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10704 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10706 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10707 assert list(owned_instances) == [self.instance_name], \
10708 "Instance '%s' not locked" % self.instance_name
10710 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10711 "Should not own any node group lock at this point"
10714 feedback_fn("No disks need replacement")
10717 feedback_fn("Replacing disk(s) %s for %s" %
10718 (utils.CommaJoin(self.disks), self.instance.name))
10720 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10722 # Activate the instance disks if we're replacing them on a down instance
10724 _StartInstanceDisks(self.lu, self.instance, True)
10727 # Should we replace the secondary node?
10728 if self.new_node is not None:
10729 fn = self._ExecDrbd8Secondary
10731 fn = self._ExecDrbd8DiskOnly
10733 result = fn(feedback_fn)
10735 # Deactivate the instance disks if we're replacing them on a
10738 _SafeShutdownInstanceDisks(self.lu, self.instance)
10740 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10743 # Verify owned locks
10744 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10745 nodes = frozenset(self.node_secondary_ip)
10746 assert ((self.early_release and not owned_nodes) or
10747 (not self.early_release and not (set(owned_nodes) - nodes))), \
10748 ("Not owning the correct locks, early_release=%s, owned=%r,"
10749 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10753 def _CheckVolumeGroup(self, nodes):
10754 self.lu.LogInfo("Checking volume groups")
10756 vgname = self.cfg.GetVGName()
10758 # Make sure volume group exists on all involved nodes
10759 results = self.rpc.call_vg_list(nodes)
10761 raise errors.OpExecError("Can't list volume groups on the nodes")
10764 res = results[node]
10765 res.Raise("Error checking node %s" % node)
10766 if vgname not in res.payload:
10767 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10770 def _CheckDisksExistence(self, nodes):
10771 # Check disk existence
10772 for idx, dev in enumerate(self.instance.disks):
10773 if idx not in self.disks:
10777 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10778 self.cfg.SetDiskID(dev, node)
10780 result = self.rpc.call_blockdev_find(node, dev)
10782 msg = result.fail_msg
10783 if msg or not result.payload:
10785 msg = "disk not found"
10786 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10789 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10790 for idx, dev in enumerate(self.instance.disks):
10791 if idx not in self.disks:
10794 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10797 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10799 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10800 " replace disks for instance %s" %
10801 (node_name, self.instance.name))
10803 def _CreateNewStorage(self, node_name):
10804 """Create new storage on the primary or secondary node.
10806 This is only used for same-node replaces, not for changing the
10807 secondary node, hence we don't want to modify the existing disk.
10812 for idx, dev in enumerate(self.instance.disks):
10813 if idx not in self.disks:
10816 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10818 self.cfg.SetDiskID(dev, node_name)
10820 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10821 names = _GenerateUniqueNames(self.lu, lv_names)
10823 _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10825 vg_data = dev.children[0].logical_id[0]
10826 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10827 logical_id=(vg_data, names[0]), params=data_p)
10828 vg_meta = dev.children[1].logical_id[0]
10829 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10830 logical_id=(vg_meta, names[1]), params=meta_p)
10832 new_lvs = [lv_data, lv_meta]
10833 old_lvs = [child.Copy() for child in dev.children]
10834 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10836 # we pass force_create=True to force the LVM creation
10837 for new_lv in new_lvs:
10838 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10839 _GetInstanceInfoText(self.instance), False)
10843 def _CheckDevices(self, node_name, iv_names):
10844 for name, (dev, _, _) in iv_names.iteritems():
10845 self.cfg.SetDiskID(dev, node_name)
10847 result = self.rpc.call_blockdev_find(node_name, dev)
10849 msg = result.fail_msg
10850 if msg or not result.payload:
10852 msg = "disk not found"
10853 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10856 if result.payload.is_degraded:
10857 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10859 def _RemoveOldStorage(self, node_name, iv_names):
10860 for name, (_, old_lvs, _) in iv_names.iteritems():
10861 self.lu.LogInfo("Remove logical volumes for %s" % name)
10864 self.cfg.SetDiskID(lv, node_name)
10866 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10868 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10869 hint="remove unused LVs manually")
10871 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10872 """Replace a disk on the primary or secondary for DRBD 8.
10874 The algorithm for replace is quite complicated:
10876 1. for each disk to be replaced:
10878 1. create new LVs on the target node with unique names
10879 1. detach old LVs from the drbd device
10880 1. rename old LVs to name_replaced.<time_t>
10881 1. rename new LVs to old LVs
10882 1. attach the new LVs (with the old names now) to the drbd device
10884 1. wait for sync across all devices
10886 1. for each modified disk:
10888 1. remove old LVs (which have the name name_replaces.<time_t>)
10890 Failures are not very well handled.
10895 # Step: check device activation
10896 self.lu.LogStep(1, steps_total, "Check device existence")
10897 self._CheckDisksExistence([self.other_node, self.target_node])
10898 self._CheckVolumeGroup([self.target_node, self.other_node])
10900 # Step: check other node consistency
10901 self.lu.LogStep(2, steps_total, "Check peer consistency")
10902 self._CheckDisksConsistency(self.other_node,
10903 self.other_node == self.instance.primary_node,
10906 # Step: create new storage
10907 self.lu.LogStep(3, steps_total, "Allocate new storage")
10908 iv_names = self._CreateNewStorage(self.target_node)
10910 # Step: for each lv, detach+rename*2+attach
10911 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10912 for dev, old_lvs, new_lvs in iv_names.itervalues():
10913 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10915 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10917 result.Raise("Can't detach drbd from local storage on node"
10918 " %s for device %s" % (self.target_node, dev.iv_name))
10920 #cfg.Update(instance)
10922 # ok, we created the new LVs, so now we know we have the needed
10923 # storage; as such, we proceed on the target node to rename
10924 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10925 # using the assumption that logical_id == physical_id (which in
10926 # turn is the unique_id on that node)
10928 # FIXME(iustin): use a better name for the replaced LVs
10929 temp_suffix = int(time.time())
10930 ren_fn = lambda d, suff: (d.physical_id[0],
10931 d.physical_id[1] + "_replaced-%s" % suff)
10933 # Build the rename list based on what LVs exist on the node
10934 rename_old_to_new = []
10935 for to_ren in old_lvs:
10936 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10937 if not result.fail_msg and result.payload:
10939 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10941 self.lu.LogInfo("Renaming the old LVs on the target node")
10942 result = self.rpc.call_blockdev_rename(self.target_node,
10944 result.Raise("Can't rename old LVs on node %s" % self.target_node)
10946 # Now we rename the new LVs to the old LVs
10947 self.lu.LogInfo("Renaming the new LVs on the target node")
10948 rename_new_to_old = [(new, old.physical_id)
10949 for old, new in zip(old_lvs, new_lvs)]
10950 result = self.rpc.call_blockdev_rename(self.target_node,
10952 result.Raise("Can't rename new LVs on node %s" % self.target_node)
10954 # Intermediate steps of in memory modifications
10955 for old, new in zip(old_lvs, new_lvs):
10956 new.logical_id = old.logical_id
10957 self.cfg.SetDiskID(new, self.target_node)
10959 # We need to modify old_lvs so that removal later removes the
10960 # right LVs, not the newly added ones; note that old_lvs is a
10962 for disk in old_lvs:
10963 disk.logical_id = ren_fn(disk, temp_suffix)
10964 self.cfg.SetDiskID(disk, self.target_node)
10966 # Now that the new lvs have the old name, we can add them to the device
10967 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10968 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10970 msg = result.fail_msg
10972 for new_lv in new_lvs:
10973 msg2 = self.rpc.call_blockdev_remove(self.target_node,
10976 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10977 hint=("cleanup manually the unused logical"
10979 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10981 cstep = itertools.count(5)
10983 if self.early_release:
10984 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10985 self._RemoveOldStorage(self.target_node, iv_names)
10986 # TODO: Check if releasing locks early still makes sense
10987 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10989 # Release all resource locks except those used by the instance
10990 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10991 keep=self.node_secondary_ip.keys())
10993 # Release all node locks while waiting for sync
10994 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10996 # TODO: Can the instance lock be downgraded here? Take the optional disk
10997 # shutdown in the caller into consideration.
11000 # This can fail as the old devices are degraded and _WaitForSync
11001 # does a combined result over all disks, so we don't check its return value
11002 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11003 _WaitForSync(self.lu, self.instance)
11005 # Check all devices manually
11006 self._CheckDevices(self.instance.primary_node, iv_names)
11008 # Step: remove old storage
11009 if not self.early_release:
11010 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11011 self._RemoveOldStorage(self.target_node, iv_names)
11013 def _ExecDrbd8Secondary(self, feedback_fn):
11014 """Replace the secondary node for DRBD 8.
11016 The algorithm for replace is quite complicated:
11017 - for all disks of the instance:
11018 - create new LVs on the new node with same names
11019 - shutdown the drbd device on the old secondary
11020 - disconnect the drbd network on the primary
11021 - create the drbd device on the new secondary
11022 - network attach the drbd on the primary, using an artifice:
11023 the drbd code for Attach() will connect to the network if it
11024 finds a device which is connected to the good local disks but
11025 not network enabled
11026 - wait for sync across all devices
11027 - remove all disks from the old secondary
11029 Failures are not very well handled.
11034 pnode = self.instance.primary_node
11036 # Step: check device activation
11037 self.lu.LogStep(1, steps_total, "Check device existence")
11038 self._CheckDisksExistence([self.instance.primary_node])
11039 self._CheckVolumeGroup([self.instance.primary_node])
11041 # Step: check other node consistency
11042 self.lu.LogStep(2, steps_total, "Check peer consistency")
11043 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11045 # Step: create new storage
11046 self.lu.LogStep(3, steps_total, "Allocate new storage")
11047 for idx, dev in enumerate(self.instance.disks):
11048 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11049 (self.new_node, idx))
11050 # we pass force_create=True to force LVM creation
11051 for new_lv in dev.children:
11052 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
11053 _GetInstanceInfoText(self.instance), False)
11055 # Step 4: dbrd minors and drbd setups changes
11056 # after this, we must manually remove the drbd minors on both the
11057 # error and the success paths
11058 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11059 minors = self.cfg.AllocateDRBDMinor([self.new_node
11060 for dev in self.instance.disks],
11061 self.instance.name)
11062 logging.debug("Allocated minors %r", minors)
11065 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11066 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11067 (self.new_node, idx))
11068 # create new devices on new_node; note that we create two IDs:
11069 # one without port, so the drbd will be activated without
11070 # networking information on the new node at this stage, and one
11071 # with network, for the latter activation in step 4
11072 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11073 if self.instance.primary_node == o_node1:
11076 assert self.instance.primary_node == o_node2, "Three-node instance?"
11079 new_alone_id = (self.instance.primary_node, self.new_node, None,
11080 p_minor, new_minor, o_secret)
11081 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11082 p_minor, new_minor, o_secret)
11084 iv_names[idx] = (dev, dev.children, new_net_id)
11085 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11087 drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
11088 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11089 logical_id=new_alone_id,
11090 children=dev.children,
11092 params=drbd_params)
11094 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
11095 _GetInstanceInfoText(self.instance), False)
11096 except errors.GenericError:
11097 self.cfg.ReleaseDRBDMinors(self.instance.name)
11100 # We have new devices, shutdown the drbd on the old secondary
11101 for idx, dev in enumerate(self.instance.disks):
11102 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11103 self.cfg.SetDiskID(dev, self.target_node)
11104 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
11106 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11107 "node: %s" % (idx, msg),
11108 hint=("Please cleanup this device manually as"
11109 " soon as possible"))
11111 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11112 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11113 self.instance.disks)[pnode]
11115 msg = result.fail_msg
11117 # detaches didn't succeed (unlikely)
11118 self.cfg.ReleaseDRBDMinors(self.instance.name)
11119 raise errors.OpExecError("Can't detach the disks from the network on"
11120 " old node: %s" % (msg,))
11122 # if we managed to detach at least one, we update all the disks of
11123 # the instance to point to the new secondary
11124 self.lu.LogInfo("Updating instance configuration")
11125 for dev, _, new_logical_id in iv_names.itervalues():
11126 dev.logical_id = new_logical_id
11127 self.cfg.SetDiskID(dev, self.instance.primary_node)
11129 self.cfg.Update(self.instance, feedback_fn)
11131 # Release all node locks (the configuration has been updated)
11132 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11134 # and now perform the drbd attach
11135 self.lu.LogInfo("Attaching primary drbds to new secondary"
11136 " (standalone => connected)")
11137 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11139 self.node_secondary_ip,
11140 self.instance.disks,
11141 self.instance.name,
11143 for to_node, to_result in result.items():
11144 msg = to_result.fail_msg
11146 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11148 hint=("please do a gnt-instance info to see the"
11149 " status of disks"))
11151 cstep = itertools.count(5)
11153 if self.early_release:
11154 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11155 self._RemoveOldStorage(self.target_node, iv_names)
11156 # TODO: Check if releasing locks early still makes sense
11157 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11159 # Release all resource locks except those used by the instance
11160 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11161 keep=self.node_secondary_ip.keys())
11163 # TODO: Can the instance lock be downgraded here? Take the optional disk
11164 # shutdown in the caller into consideration.
11167 # This can fail as the old devices are degraded and _WaitForSync
11168 # does a combined result over all disks, so we don't check its return value
11169 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11170 _WaitForSync(self.lu, self.instance)
11172 # Check all devices manually
11173 self._CheckDevices(self.instance.primary_node, iv_names)
11175 # Step: remove old storage
11176 if not self.early_release:
11177 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11178 self._RemoveOldStorage(self.target_node, iv_names)
11181 class LURepairNodeStorage(NoHooksLU):
11182 """Repairs the volume group on a node.
11187 def CheckArguments(self):
11188 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11190 storage_type = self.op.storage_type
11192 if (constants.SO_FIX_CONSISTENCY not in
11193 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11194 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11195 " repaired" % storage_type,
11196 errors.ECODE_INVAL)
11198 def ExpandNames(self):
11199 self.needed_locks = {
11200 locking.LEVEL_NODE: [self.op.node_name],
11203 def _CheckFaultyDisks(self, instance, node_name):
11204 """Ensure faulty disks abort the opcode or at least warn."""
11206 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11208 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11209 " node '%s'" % (instance.name, node_name),
11210 errors.ECODE_STATE)
11211 except errors.OpPrereqError, err:
11212 if self.op.ignore_consistency:
11213 self.proc.LogWarning(str(err.args[0]))
11217 def CheckPrereq(self):
11218 """Check prerequisites.
11221 # Check whether any instance on this node has faulty disks
11222 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11223 if inst.admin_state != constants.ADMINST_UP:
11225 check_nodes = set(inst.all_nodes)
11226 check_nodes.discard(self.op.node_name)
11227 for inst_node_name in check_nodes:
11228 self._CheckFaultyDisks(inst, inst_node_name)
11230 def Exec(self, feedback_fn):
11231 feedback_fn("Repairing storage unit '%s' on %s ..." %
11232 (self.op.name, self.op.node_name))
11234 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11235 result = self.rpc.call_storage_execute(self.op.node_name,
11236 self.op.storage_type, st_args,
11238 constants.SO_FIX_CONSISTENCY)
11239 result.Raise("Failed to repair storage unit '%s' on %s" %
11240 (self.op.name, self.op.node_name))
11243 class LUNodeEvacuate(NoHooksLU):
11244 """Evacuates instances off a list of nodes.
11249 _MODE2IALLOCATOR = {
11250 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11251 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11252 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11254 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11255 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11256 constants.IALLOCATOR_NEVAC_MODES)
11258 def CheckArguments(self):
11259 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11261 def ExpandNames(self):
11262 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11264 if self.op.remote_node is not None:
11265 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11266 assert self.op.remote_node
11268 if self.op.remote_node == self.op.node_name:
11269 raise errors.OpPrereqError("Can not use evacuated node as a new"
11270 " secondary node", errors.ECODE_INVAL)
11272 if self.op.mode != constants.NODE_EVAC_SEC:
11273 raise errors.OpPrereqError("Without the use of an iallocator only"
11274 " secondary instances can be evacuated",
11275 errors.ECODE_INVAL)
11278 self.share_locks = _ShareAll()
11279 self.needed_locks = {
11280 locking.LEVEL_INSTANCE: [],
11281 locking.LEVEL_NODEGROUP: [],
11282 locking.LEVEL_NODE: [],
11285 # Determine nodes (via group) optimistically, needs verification once locks
11286 # have been acquired
11287 self.lock_nodes = self._DetermineNodes()
11289 def _DetermineNodes(self):
11290 """Gets the list of nodes to operate on.
11293 if self.op.remote_node is None:
11294 # Iallocator will choose any node(s) in the same group
11295 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11297 group_nodes = frozenset([self.op.remote_node])
11299 # Determine nodes to be locked
11300 return set([self.op.node_name]) | group_nodes
11302 def _DetermineInstances(self):
11303 """Builds list of instances to operate on.
11306 assert self.op.mode in constants.NODE_EVAC_MODES
11308 if self.op.mode == constants.NODE_EVAC_PRI:
11309 # Primary instances only
11310 inst_fn = _GetNodePrimaryInstances
11311 assert self.op.remote_node is None, \
11312 "Evacuating primary instances requires iallocator"
11313 elif self.op.mode == constants.NODE_EVAC_SEC:
11314 # Secondary instances only
11315 inst_fn = _GetNodeSecondaryInstances
11318 assert self.op.mode == constants.NODE_EVAC_ALL
11319 inst_fn = _GetNodeInstances
11320 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11322 raise errors.OpPrereqError("Due to an issue with the iallocator"
11323 " interface it is not possible to evacuate"
11324 " all instances at once; specify explicitly"
11325 " whether to evacuate primary or secondary"
11327 errors.ECODE_INVAL)
11329 return inst_fn(self.cfg, self.op.node_name)
11331 def DeclareLocks(self, level):
11332 if level == locking.LEVEL_INSTANCE:
11333 # Lock instances optimistically, needs verification once node and group
11334 # locks have been acquired
11335 self.needed_locks[locking.LEVEL_INSTANCE] = \
11336 set(i.name for i in self._DetermineInstances())
11338 elif level == locking.LEVEL_NODEGROUP:
11339 # Lock node groups for all potential target nodes optimistically, needs
11340 # verification once nodes have been acquired
11341 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11342 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11344 elif level == locking.LEVEL_NODE:
11345 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11347 def CheckPrereq(self):
11349 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11350 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11351 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11353 need_nodes = self._DetermineNodes()
11355 if not owned_nodes.issuperset(need_nodes):
11356 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11357 " locks were acquired, current nodes are"
11358 " are '%s', used to be '%s'; retry the"
11360 (self.op.node_name,
11361 utils.CommaJoin(need_nodes),
11362 utils.CommaJoin(owned_nodes)),
11363 errors.ECODE_STATE)
11365 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11366 if owned_groups != wanted_groups:
11367 raise errors.OpExecError("Node groups changed since locks were acquired,"
11368 " current groups are '%s', used to be '%s';"
11369 " retry the operation" %
11370 (utils.CommaJoin(wanted_groups),
11371 utils.CommaJoin(owned_groups)))
11373 # Determine affected instances
11374 self.instances = self._DetermineInstances()
11375 self.instance_names = [i.name for i in self.instances]
11377 if set(self.instance_names) != owned_instances:
11378 raise errors.OpExecError("Instances on node '%s' changed since locks"
11379 " were acquired, current instances are '%s',"
11380 " used to be '%s'; retry the operation" %
11381 (self.op.node_name,
11382 utils.CommaJoin(self.instance_names),
11383 utils.CommaJoin(owned_instances)))
11385 if self.instance_names:
11386 self.LogInfo("Evacuating instances from node '%s': %s",
11388 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11390 self.LogInfo("No instances to evacuate from node '%s'",
11393 if self.op.remote_node is not None:
11394 for i in self.instances:
11395 if i.primary_node == self.op.remote_node:
11396 raise errors.OpPrereqError("Node %s is the primary node of"
11397 " instance %s, cannot use it as"
11399 (self.op.remote_node, i.name),
11400 errors.ECODE_INVAL)
11402 def Exec(self, feedback_fn):
11403 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11405 if not self.instance_names:
11406 # No instances to evacuate
11409 elif self.op.iallocator is not None:
11410 # TODO: Implement relocation to other group
11411 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11412 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11413 instances=list(self.instance_names))
11415 ial.Run(self.op.iallocator)
11417 if not ial.success:
11418 raise errors.OpPrereqError("Can't compute node evacuation using"
11419 " iallocator '%s': %s" %
11420 (self.op.iallocator, ial.info),
11421 errors.ECODE_NORES)
11423 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11425 elif self.op.remote_node is not None:
11426 assert self.op.mode == constants.NODE_EVAC_SEC
11428 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11429 remote_node=self.op.remote_node,
11431 mode=constants.REPLACE_DISK_CHG,
11432 early_release=self.op.early_release)]
11433 for instance_name in self.instance_names
11437 raise errors.ProgrammerError("No iallocator or remote node")
11439 return ResultWithJobs(jobs)
11442 def _SetOpEarlyRelease(early_release, op):
11443 """Sets C{early_release} flag on opcodes if available.
11447 op.early_release = early_release
11448 except AttributeError:
11449 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11454 def _NodeEvacDest(use_nodes, group, nodes):
11455 """Returns group or nodes depending on caller's choice.
11459 return utils.CommaJoin(nodes)
11464 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11465 """Unpacks the result of change-group and node-evacuate iallocator requests.
11467 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11468 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11470 @type lu: L{LogicalUnit}
11471 @param lu: Logical unit instance
11472 @type alloc_result: tuple/list
11473 @param alloc_result: Result from iallocator
11474 @type early_release: bool
11475 @param early_release: Whether to release locks early if possible
11476 @type use_nodes: bool
11477 @param use_nodes: Whether to display node names instead of groups
11480 (moved, failed, jobs) = alloc_result
11483 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11484 for (name, reason) in failed)
11485 lu.LogWarning("Unable to evacuate instances %s", failreason)
11486 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11489 lu.LogInfo("Instances to be moved: %s",
11490 utils.CommaJoin("%s (to %s)" %
11491 (name, _NodeEvacDest(use_nodes, group, nodes))
11492 for (name, group, nodes) in moved))
11494 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11495 map(opcodes.OpCode.LoadOpCode, ops))
11499 class LUInstanceGrowDisk(LogicalUnit):
11500 """Grow a disk of an instance.
11503 HPATH = "disk-grow"
11504 HTYPE = constants.HTYPE_INSTANCE
11507 def ExpandNames(self):
11508 self._ExpandAndLockInstance()
11509 self.needed_locks[locking.LEVEL_NODE] = []
11510 self.needed_locks[locking.LEVEL_NODE_RES] = []
11511 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11512 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11514 def DeclareLocks(self, level):
11515 if level == locking.LEVEL_NODE:
11516 self._LockInstancesNodes()
11517 elif level == locking.LEVEL_NODE_RES:
11519 self.needed_locks[locking.LEVEL_NODE_RES] = \
11520 self.needed_locks[locking.LEVEL_NODE][:]
11522 def BuildHooksEnv(self):
11523 """Build hooks env.
11525 This runs on the master, the primary and all the secondaries.
11529 "DISK": self.op.disk,
11530 "AMOUNT": self.op.amount,
11532 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11535 def BuildHooksNodes(self):
11536 """Build hooks nodes.
11539 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11542 def CheckPrereq(self):
11543 """Check prerequisites.
11545 This checks that the instance is in the cluster.
11548 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11549 assert instance is not None, \
11550 "Cannot retrieve locked instance %s" % self.op.instance_name
11551 nodenames = list(instance.all_nodes)
11552 for node in nodenames:
11553 _CheckNodeOnline(self, node)
11555 self.instance = instance
11557 if instance.disk_template not in constants.DTS_GROWABLE:
11558 raise errors.OpPrereqError("Instance's disk layout does not support"
11559 " growing", errors.ECODE_INVAL)
11561 self.disk = instance.FindDisk(self.op.disk)
11563 if instance.disk_template not in (constants.DT_FILE,
11564 constants.DT_SHARED_FILE,
11566 # TODO: check the free disk space for file, when that feature will be
11568 _CheckNodesFreeDiskPerVG(self, nodenames,
11569 self.disk.ComputeGrowth(self.op.amount))
11571 def Exec(self, feedback_fn):
11572 """Execute disk grow.
11575 instance = self.instance
11578 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11579 assert (self.owned_locks(locking.LEVEL_NODE) ==
11580 self.owned_locks(locking.LEVEL_NODE_RES))
11582 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11584 raise errors.OpExecError("Cannot activate block device to grow")
11586 feedback_fn("Growing disk %s of instance '%s' by %s" %
11587 (self.op.disk, instance.name,
11588 utils.FormatUnit(self.op.amount, "h")))
11590 # First run all grow ops in dry-run mode
11591 for node in instance.all_nodes:
11592 self.cfg.SetDiskID(disk, node)
11593 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11594 result.Raise("Grow request failed to node %s" % node)
11596 # We know that (as far as we can test) operations across different
11597 # nodes will succeed, time to run it for real
11598 for node in instance.all_nodes:
11599 self.cfg.SetDiskID(disk, node)
11600 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11601 result.Raise("Grow request failed to node %s" % node)
11603 # TODO: Rewrite code to work properly
11604 # DRBD goes into sync mode for a short amount of time after executing the
11605 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11606 # calling "resize" in sync mode fails. Sleeping for a short amount of
11607 # time is a work-around.
11610 disk.RecordGrow(self.op.amount)
11611 self.cfg.Update(instance, feedback_fn)
11613 # Changes have been recorded, release node lock
11614 _ReleaseLocks(self, locking.LEVEL_NODE)
11616 # Downgrade lock while waiting for sync
11617 self.glm.downgrade(locking.LEVEL_INSTANCE)
11619 if self.op.wait_for_sync:
11620 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11622 self.proc.LogWarning("Disk sync-ing has not returned a good"
11623 " status; please check the instance")
11624 if instance.admin_state != constants.ADMINST_UP:
11625 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11626 elif instance.admin_state != constants.ADMINST_UP:
11627 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11628 " not supposed to be running because no wait for"
11629 " sync mode was requested")
11631 assert self.owned_locks(locking.LEVEL_NODE_RES)
11632 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11635 class LUInstanceQueryData(NoHooksLU):
11636 """Query runtime instance data.
11641 def ExpandNames(self):
11642 self.needed_locks = {}
11644 # Use locking if requested or when non-static information is wanted
11645 if not (self.op.static or self.op.use_locking):
11646 self.LogWarning("Non-static data requested, locks need to be acquired")
11647 self.op.use_locking = True
11649 if self.op.instances or not self.op.use_locking:
11650 # Expand instance names right here
11651 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11653 # Will use acquired locks
11654 self.wanted_names = None
11656 if self.op.use_locking:
11657 self.share_locks = _ShareAll()
11659 if self.wanted_names is None:
11660 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11662 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11664 self.needed_locks[locking.LEVEL_NODE] = []
11665 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11667 def DeclareLocks(self, level):
11668 if self.op.use_locking and level == locking.LEVEL_NODE:
11669 self._LockInstancesNodes()
11671 def CheckPrereq(self):
11672 """Check prerequisites.
11674 This only checks the optional instance list against the existing names.
11677 if self.wanted_names is None:
11678 assert self.op.use_locking, "Locking was not used"
11679 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11681 self.wanted_instances = \
11682 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11684 def _ComputeBlockdevStatus(self, node, instance_name, dev):
11685 """Returns the status of a block device
11688 if self.op.static or not node:
11691 self.cfg.SetDiskID(dev, node)
11693 result = self.rpc.call_blockdev_find(node, dev)
11697 result.Raise("Can't compute disk status for %s" % instance_name)
11699 status = result.payload
11703 return (status.dev_path, status.major, status.minor,
11704 status.sync_percent, status.estimated_time,
11705 status.is_degraded, status.ldisk_status)
11707 def _ComputeDiskStatus(self, instance, snode, dev):
11708 """Compute block device status.
11711 if dev.dev_type in constants.LDS_DRBD:
11712 # we change the snode then (otherwise we use the one passed in)
11713 if dev.logical_id[0] == instance.primary_node:
11714 snode = dev.logical_id[1]
11716 snode = dev.logical_id[0]
11718 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11719 instance.name, dev)
11720 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11723 dev_children = map(compat.partial(self._ComputeDiskStatus,
11730 "iv_name": dev.iv_name,
11731 "dev_type": dev.dev_type,
11732 "logical_id": dev.logical_id,
11733 "physical_id": dev.physical_id,
11734 "pstatus": dev_pstatus,
11735 "sstatus": dev_sstatus,
11736 "children": dev_children,
11741 def Exec(self, feedback_fn):
11742 """Gather and return data"""
11745 cluster = self.cfg.GetClusterInfo()
11747 pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11748 for i in self.wanted_instances)
11749 for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11750 if self.op.static or pnode.offline:
11751 remote_state = None
11753 self.LogWarning("Primary node %s is marked offline, returning static"
11754 " information only for instance %s" %
11755 (pnode.name, instance.name))
11757 remote_info = self.rpc.call_instance_info(instance.primary_node,
11759 instance.hypervisor)
11760 remote_info.Raise("Error checking node %s" % instance.primary_node)
11761 remote_info = remote_info.payload
11762 if remote_info and "state" in remote_info:
11763 remote_state = "up"
11765 if instance.admin_state == constants.ADMINST_UP:
11766 remote_state = "down"
11768 remote_state = instance.admin_state
11770 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11773 result[instance.name] = {
11774 "name": instance.name,
11775 "config_state": instance.admin_state,
11776 "run_state": remote_state,
11777 "pnode": instance.primary_node,
11778 "snodes": instance.secondary_nodes,
11780 # this happens to be the same format used for hooks
11781 "nics": _NICListToTuple(self, instance.nics),
11782 "disk_template": instance.disk_template,
11784 "hypervisor": instance.hypervisor,
11785 "network_port": instance.network_port,
11786 "hv_instance": instance.hvparams,
11787 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11788 "be_instance": instance.beparams,
11789 "be_actual": cluster.FillBE(instance),
11790 "os_instance": instance.osparams,
11791 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11792 "serial_no": instance.serial_no,
11793 "mtime": instance.mtime,
11794 "ctime": instance.ctime,
11795 "uuid": instance.uuid,
11801 def PrepareContainerMods(mods, private_fn):
11802 """Prepares a list of container modifications by adding a private data field.
11804 @type mods: list of tuples; (operation, index, parameters)
11805 @param mods: List of modifications
11806 @type private_fn: callable or None
11807 @param private_fn: Callable for constructing a private data field for a
11812 if private_fn is None:
11817 return [(op, idx, params, fn()) for (op, idx, params) in mods]
11820 #: Type description for changes as returned by L{ApplyContainerMods}'s
11822 _TApplyContModsCbChanges = \
11823 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
11824 ht.TNonEmptyString,
11829 def ApplyContainerMods(kind, container, chgdesc, mods,
11830 create_fn, modify_fn, remove_fn):
11831 """Applies descriptions in C{mods} to C{container}.
11834 @param kind: One-word item description
11835 @type container: list
11836 @param container: Container to modify
11837 @type chgdesc: None or list
11838 @param chgdesc: List of applied changes
11840 @param mods: Modifications as returned by L{PrepareContainerMods}
11841 @type create_fn: callable
11842 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
11843 receives absolute item index, parameters and private data object as added
11844 by L{PrepareContainerMods}, returns tuple containing new item and changes
11846 @type modify_fn: callable
11847 @param modify_fn: Callback for modifying an existing item
11848 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
11849 and private data object as added by L{PrepareContainerMods}, returns
11851 @type remove_fn: callable
11852 @param remove_fn: Callback on removing item; receives absolute item index,
11853 item and private data object as added by L{PrepareContainerMods}
11856 for (op, idx, params, private) in mods:
11859 absidx = len(container) - 1
11861 raise IndexError("Not accepting negative indices")
11867 if op == constants.DDM_ADD:
11868 if create_fn is None:
11871 (item, changes) = create_fn(absidx + 1, params, private)
11874 container.append(item)
11877 # list.insert does so before the specified index
11878 container.insert(idx, item)
11880 # Retrieve existing item
11882 item = container[absidx]
11884 raise IndexError("Invalid %s index %s" % (kind, idx))
11886 if op == constants.DDM_REMOVE:
11889 if remove_fn is not None:
11890 remove_fn(absidx, item, private)
11892 changes = [("%s/%s" % (kind, absidx), "remove")]
11894 assert container[absidx] == item
11895 del container[absidx]
11896 elif op == constants.DDM_MODIFY:
11897 if modify_fn is not None:
11898 changes = modify_fn(absidx, item, params, private)
11900 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
11902 assert _TApplyContModsCbChanges(changes)
11904 if not (chgdesc is None or changes is None):
11905 chgdesc.extend(changes)
11908 class _InstNicModPrivate:
11909 """Data structure for network interface modifications.
11911 Used by L{LUInstanceSetParams}.
11914 def __init__(self):
11919 class LUInstanceSetParams(LogicalUnit):
11920 """Modifies an instances's parameters.
11923 HPATH = "instance-modify"
11924 HTYPE = constants.HTYPE_INSTANCE
11928 def _UpgradeDiskNicMods(kind, mods, verify_fn):
11929 assert ht.TList(mods)
11930 assert not mods or len(mods[0]) in (2, 3)
11932 if mods and len(mods[0]) == 2:
11936 for op, params in mods:
11937 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
11938 result.append((op, -1, params))
11942 raise errors.OpPrereqError("Only one %s add or remove operation is"
11943 " supported at a time" % kind,
11944 errors.ECODE_INVAL)
11946 result.append((constants.DDM_MODIFY, op, params))
11948 assert verify_fn(result)
11955 def _CheckMods(kind, mods, key_types, item_fn):
11956 """Ensures requested disk/NIC modifications are valid.
11959 for (op, _, params) in mods:
11960 assert ht.TDict(params)
11962 utils.ForceDictType(params, key_types)
11964 if op == constants.DDM_REMOVE:
11966 raise errors.OpPrereqError("No settings should be passed when"
11967 " removing a %s" % kind,
11968 errors.ECODE_INVAL)
11969 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
11970 item_fn(op, params)
11972 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
11975 def _VerifyDiskModification(op, params):
11976 """Verifies a disk modification.
11979 if op == constants.DDM_ADD:
11980 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11981 if mode not in constants.DISK_ACCESS_SET:
11982 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11983 errors.ECODE_INVAL)
11985 size = params.get(constants.IDISK_SIZE, None)
11987 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
11988 constants.IDISK_SIZE, errors.ECODE_INVAL)
11992 except (TypeError, ValueError), err:
11993 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
11994 errors.ECODE_INVAL)
11996 params[constants.IDISK_SIZE] = size
11998 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
11999 raise errors.OpPrereqError("Disk size change not possible, use"
12000 " grow-disk", errors.ECODE_INVAL)
12003 def _VerifyNicModification(op, params):
12004 """Verifies a network interface modification.
12007 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12008 ip = params.get(constants.INIC_IP, None)
12011 elif ip.lower() == constants.VALUE_NONE:
12012 params[constants.INIC_IP] = None
12013 elif not netutils.IPAddress.IsValid(ip):
12014 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12015 errors.ECODE_INVAL)
12017 bridge = params.get("bridge", None)
12018 link = params.get(constants.INIC_LINK, None)
12019 if bridge and link:
12020 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12021 " at the same time", errors.ECODE_INVAL)
12022 elif bridge and bridge.lower() == constants.VALUE_NONE:
12023 params["bridge"] = None
12024 elif link and link.lower() == constants.VALUE_NONE:
12025 params[constants.INIC_LINK] = None
12027 if op == constants.DDM_ADD:
12028 macaddr = params.get(constants.INIC_MAC, None)
12029 if macaddr is None:
12030 params[constants.INIC_MAC] = constants.VALUE_AUTO
12032 if constants.INIC_MAC in params:
12033 macaddr = params[constants.INIC_MAC]
12034 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12035 macaddr = utils.NormalizeAndValidateMac(macaddr)
12037 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12038 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12039 " modifying an existing NIC",
12040 errors.ECODE_INVAL)
12042 def CheckArguments(self):
12043 if not (self.op.nics or self.op.disks or self.op.disk_template or
12044 self.op.hvparams or self.op.beparams or self.op.os_name or
12045 self.op.offline is not None or self.op.runtime_mem):
12046 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12048 if self.op.hvparams:
12049 _CheckGlobalHvParams(self.op.hvparams)
12052 self._UpgradeDiskNicMods("disk", self.op.disks,
12053 opcodes.OpInstanceSetParams.TestDiskModifications)
12055 self._UpgradeDiskNicMods("NIC", self.op.nics,
12056 opcodes.OpInstanceSetParams.TestNicModifications)
12058 # Check disk modifications
12059 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12060 self._VerifyDiskModification)
12062 if self.op.disks and self.op.disk_template is not None:
12063 raise errors.OpPrereqError("Disk template conversion and other disk"
12064 " changes not supported at the same time",
12065 errors.ECODE_INVAL)
12067 if (self.op.disk_template and
12068 self.op.disk_template in constants.DTS_INT_MIRROR and
12069 self.op.remote_node is None):
12070 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12071 " one requires specifying a secondary node",
12072 errors.ECODE_INVAL)
12074 # Check NIC modifications
12075 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12076 self._VerifyNicModification)
12078 def ExpandNames(self):
12079 self._ExpandAndLockInstance()
12080 # Can't even acquire node locks in shared mode as upcoming changes in
12081 # Ganeti 2.6 will start to modify the node object on disk conversion
12082 self.needed_locks[locking.LEVEL_NODE] = []
12083 self.needed_locks[locking.LEVEL_NODE_RES] = []
12084 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12086 def DeclareLocks(self, level):
12087 # TODO: Acquire group lock in shared mode (disk parameters)
12088 if level == locking.LEVEL_NODE:
12089 self._LockInstancesNodes()
12090 if self.op.disk_template and self.op.remote_node:
12091 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12092 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12093 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12095 self.needed_locks[locking.LEVEL_NODE_RES] = \
12096 self.needed_locks[locking.LEVEL_NODE][:]
12098 def BuildHooksEnv(self):
12099 """Build hooks env.
12101 This runs on the master, primary and secondaries.
12105 if constants.BE_MINMEM in self.be_new:
12106 args["minmem"] = self.be_new[constants.BE_MINMEM]
12107 if constants.BE_MAXMEM in self.be_new:
12108 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12109 if constants.BE_VCPUS in self.be_new:
12110 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12111 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12112 # information at all.
12114 if self._new_nics is not None:
12117 for nic in self._new_nics:
12118 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12119 mode = nicparams[constants.NIC_MODE]
12120 link = nicparams[constants.NIC_LINK]
12121 nics.append((nic.ip, nic.mac, mode, link))
12123 args["nics"] = nics
12125 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12126 if self.op.disk_template:
12127 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12128 if self.op.runtime_mem:
12129 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12133 def BuildHooksNodes(self):
12134 """Build hooks nodes.
12137 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12140 def _PrepareNicModification(self, params, private, old_ip, old_params,
12142 update_params_dict = dict([(key, params[key])
12143 for key in constants.NICS_PARAMETERS
12146 if "bridge" in params:
12147 update_params_dict[constants.NIC_LINK] = params["bridge"]
12149 new_params = _GetUpdatedParams(old_params, update_params_dict)
12150 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12152 new_filled_params = cluster.SimpleFillNIC(new_params)
12153 objects.NIC.CheckParameterSyntax(new_filled_params)
12155 new_mode = new_filled_params[constants.NIC_MODE]
12156 if new_mode == constants.NIC_MODE_BRIDGED:
12157 bridge = new_filled_params[constants.NIC_LINK]
12158 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12160 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12162 self.warn.append(msg)
12164 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12166 elif new_mode == constants.NIC_MODE_ROUTED:
12167 ip = params.get(constants.INIC_IP, old_ip)
12169 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12170 " on a routed NIC", errors.ECODE_INVAL)
12172 if constants.INIC_MAC in params:
12173 mac = params[constants.INIC_MAC]
12175 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12176 errors.ECODE_INVAL)
12177 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12178 # otherwise generate the MAC address
12179 params[constants.INIC_MAC] = \
12180 self.cfg.GenerateMAC(self.proc.GetECId())
12182 # or validate/reserve the current one
12184 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12185 except errors.ReservationError:
12186 raise errors.OpPrereqError("MAC address '%s' already in use"
12187 " in cluster" % mac,
12188 errors.ECODE_NOTUNIQUE)
12190 private.params = new_params
12191 private.filled = new_filled_params
12193 return (None, None)
12195 def CheckPrereq(self):
12196 """Check prerequisites.
12198 This only checks the instance list against the existing names.
12201 # checking the new params on the primary/secondary nodes
12203 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12204 cluster = self.cluster = self.cfg.GetClusterInfo()
12205 assert self.instance is not None, \
12206 "Cannot retrieve locked instance %s" % self.op.instance_name
12207 pnode = instance.primary_node
12208 nodelist = list(instance.all_nodes)
12209 pnode_info = self.cfg.GetNodeInfo(pnode)
12210 self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
12212 # Prepare disk/NIC modifications
12213 self.diskmod = PrepareContainerMods(self.op.disks, None)
12214 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12217 if self.op.os_name and not self.op.force:
12218 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12219 self.op.force_variant)
12220 instance_os = self.op.os_name
12222 instance_os = instance.os
12224 assert not (self.op.disk_template and self.op.disks), \
12225 "Can't modify disk template and apply disk changes at the same time"
12227 if self.op.disk_template:
12228 if instance.disk_template == self.op.disk_template:
12229 raise errors.OpPrereqError("Instance already has disk template %s" %
12230 instance.disk_template, errors.ECODE_INVAL)
12232 if (instance.disk_template,
12233 self.op.disk_template) not in self._DISK_CONVERSIONS:
12234 raise errors.OpPrereqError("Unsupported disk template conversion from"
12235 " %s to %s" % (instance.disk_template,
12236 self.op.disk_template),
12237 errors.ECODE_INVAL)
12238 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12239 msg="cannot change disk template")
12240 if self.op.disk_template in constants.DTS_INT_MIRROR:
12241 if self.op.remote_node == pnode:
12242 raise errors.OpPrereqError("Given new secondary node %s is the same"
12243 " as the primary node of the instance" %
12244 self.op.remote_node, errors.ECODE_STATE)
12245 _CheckNodeOnline(self, self.op.remote_node)
12246 _CheckNodeNotDrained(self, self.op.remote_node)
12247 # FIXME: here we assume that the old instance type is DT_PLAIN
12248 assert instance.disk_template == constants.DT_PLAIN
12249 disks = [{constants.IDISK_SIZE: d.size,
12250 constants.IDISK_VG: d.logical_id[0]}
12251 for d in instance.disks]
12252 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12253 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12255 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12256 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12257 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12258 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12259 ignore=self.op.ignore_ipolicy)
12260 if pnode_info.group != snode_info.group:
12261 self.LogWarning("The primary and secondary nodes are in two"
12262 " different node groups; the disk parameters"
12263 " from the first disk's node group will be"
12266 # hvparams processing
12267 if self.op.hvparams:
12268 hv_type = instance.hypervisor
12269 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12270 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12271 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12274 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12275 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12276 self.hv_proposed = self.hv_new = hv_new # the new actual values
12277 self.hv_inst = i_hvdict # the new dict (without defaults)
12279 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12281 self.hv_new = self.hv_inst = {}
12283 # beparams processing
12284 if self.op.beparams:
12285 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12287 objects.UpgradeBeParams(i_bedict)
12288 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12289 be_new = cluster.SimpleFillBE(i_bedict)
12290 self.be_proposed = self.be_new = be_new # the new actual values
12291 self.be_inst = i_bedict # the new dict (without defaults)
12293 self.be_new = self.be_inst = {}
12294 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12295 be_old = cluster.FillBE(instance)
12297 # CPU param validation -- checking every time a paramtere is
12298 # changed to cover all cases where either CPU mask or vcpus have
12300 if (constants.BE_VCPUS in self.be_proposed and
12301 constants.HV_CPU_MASK in self.hv_proposed):
12303 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12304 # Verify mask is consistent with number of vCPUs. Can skip this
12305 # test if only 1 entry in the CPU mask, which means same mask
12306 # is applied to all vCPUs.
12307 if (len(cpu_list) > 1 and
12308 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12309 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12311 (self.be_proposed[constants.BE_VCPUS],
12312 self.hv_proposed[constants.HV_CPU_MASK]),
12313 errors.ECODE_INVAL)
12315 # Only perform this test if a new CPU mask is given
12316 if constants.HV_CPU_MASK in self.hv_new:
12317 # Calculate the largest CPU number requested
12318 max_requested_cpu = max(map(max, cpu_list))
12319 # Check that all of the instance's nodes have enough physical CPUs to
12320 # satisfy the requested CPU mask
12321 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12322 max_requested_cpu + 1, instance.hypervisor)
12324 # osparams processing
12325 if self.op.osparams:
12326 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12327 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12328 self.os_inst = i_osdict # the new dict (without defaults)
12334 #TODO(dynmem): do the appropriate check involving MINMEM
12335 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12336 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12337 mem_check_list = [pnode]
12338 if be_new[constants.BE_AUTO_BALANCE]:
12339 # either we changed auto_balance to yes or it was from before
12340 mem_check_list.extend(instance.secondary_nodes)
12341 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12342 instance.hypervisor)
12343 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12344 [instance.hypervisor])
12345 pninfo = nodeinfo[pnode]
12346 msg = pninfo.fail_msg
12348 # Assume the primary node is unreachable and go ahead
12349 self.warn.append("Can't get info from primary node %s: %s" %
12352 (_, _, (pnhvinfo, )) = pninfo.payload
12353 if not isinstance(pnhvinfo.get("memory_free", None), int):
12354 self.warn.append("Node data from primary node %s doesn't contain"
12355 " free memory information" % pnode)
12356 elif instance_info.fail_msg:
12357 self.warn.append("Can't get instance runtime information: %s" %
12358 instance_info.fail_msg)
12360 if instance_info.payload:
12361 current_mem = int(instance_info.payload["memory"])
12363 # Assume instance not running
12364 # (there is a slight race condition here, but it's not very
12365 # probable, and we have no other way to check)
12366 # TODO: Describe race condition
12368 #TODO(dynmem): do the appropriate check involving MINMEM
12369 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12370 pnhvinfo["memory_free"])
12372 raise errors.OpPrereqError("This change will prevent the instance"
12373 " from starting, due to %d MB of memory"
12374 " missing on its primary node" %
12376 errors.ECODE_NORES)
12378 if be_new[constants.BE_AUTO_BALANCE]:
12379 for node, nres in nodeinfo.items():
12380 if node not in instance.secondary_nodes:
12382 nres.Raise("Can't get info from secondary node %s" % node,
12383 prereq=True, ecode=errors.ECODE_STATE)
12384 (_, _, (nhvinfo, )) = nres.payload
12385 if not isinstance(nhvinfo.get("memory_free", None), int):
12386 raise errors.OpPrereqError("Secondary node %s didn't return free"
12387 " memory information" % node,
12388 errors.ECODE_STATE)
12389 #TODO(dynmem): do the appropriate check involving MINMEM
12390 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12391 raise errors.OpPrereqError("This change will prevent the instance"
12392 " from failover to its secondary node"
12393 " %s, due to not enough memory" % node,
12394 errors.ECODE_STATE)
12396 if self.op.runtime_mem:
12397 remote_info = self.rpc.call_instance_info(instance.primary_node,
12399 instance.hypervisor)
12400 remote_info.Raise("Error checking node %s" % instance.primary_node)
12401 if not remote_info.payload: # not running already
12402 raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12403 errors.ECODE_STATE)
12405 current_memory = remote_info.payload["memory"]
12406 if (not self.op.force and
12407 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12408 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12409 raise errors.OpPrereqError("Instance %s must have memory between %d"
12410 " and %d MB of memory unless --force is"
12411 " given" % (instance.name,
12412 self.be_proposed[constants.BE_MINMEM],
12413 self.be_proposed[constants.BE_MAXMEM]),
12414 errors.ECODE_INVAL)
12416 if self.op.runtime_mem > current_memory:
12417 _CheckNodeFreeMemory(self, instance.primary_node,
12418 "ballooning memory for instance %s" %
12420 self.op.memory - current_memory,
12421 instance.hypervisor)
12423 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12424 raise errors.OpPrereqError("Disk operations not supported for"
12425 " diskless instances",
12426 errors.ECODE_INVAL)
12428 def _PrepareNicCreate(_, params, private):
12429 return self._PrepareNicModification(params, private, None, {},
12432 def _PrepareNicMod(_, nic, params, private):
12433 return self._PrepareNicModification(params, private, nic.ip,
12434 nic.nicparams, cluster, pnode)
12436 # Verify NIC changes (operating on copy)
12437 nics = instance.nics[:]
12438 ApplyContainerMods("NIC", nics, None, self.nicmod,
12439 _PrepareNicCreate, _PrepareNicMod, None)
12440 if len(nics) > constants.MAX_NICS:
12441 raise errors.OpPrereqError("Instance has too many network interfaces"
12442 " (%d), cannot add more" % constants.MAX_NICS,
12443 errors.ECODE_STATE)
12445 # Verify disk changes (operating on a copy)
12446 disks = instance.disks[:]
12447 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12448 if len(disks) > constants.MAX_DISKS:
12449 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12450 " more" % constants.MAX_DISKS,
12451 errors.ECODE_STATE)
12453 if self.op.offline is not None:
12454 if self.op.offline:
12455 msg = "can't change to offline"
12457 msg = "can't change to online"
12458 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12460 # Pre-compute NIC changes (necessary to use result in hooks)
12461 self._nic_chgdesc = []
12463 # Operate on copies as this is still in prereq
12464 nics = [nic.Copy() for nic in instance.nics]
12465 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12466 self._CreateNewNic, self._ApplyNicMods, None)
12467 self._new_nics = nics
12469 self._new_nics = None
12471 def _ConvertPlainToDrbd(self, feedback_fn):
12472 """Converts an instance from plain to drbd.
12475 feedback_fn("Converting template to drbd")
12476 instance = self.instance
12477 pnode = instance.primary_node
12478 snode = self.op.remote_node
12480 assert instance.disk_template == constants.DT_PLAIN
12482 # create a fake disk info for _GenerateDiskTemplate
12483 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12484 constants.IDISK_VG: d.logical_id[0]}
12485 for d in instance.disks]
12486 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12487 instance.name, pnode, [snode],
12488 disk_info, None, None, 0, feedback_fn,
12490 info = _GetInstanceInfoText(instance)
12491 feedback_fn("Creating aditional volumes...")
12492 # first, create the missing data and meta devices
12493 for disk in new_disks:
12494 # unfortunately this is... not too nice
12495 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12497 for child in disk.children:
12498 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12499 # at this stage, all new LVs have been created, we can rename the
12501 feedback_fn("Renaming original volumes...")
12502 rename_list = [(o, n.children[0].logical_id)
12503 for (o, n) in zip(instance.disks, new_disks)]
12504 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12505 result.Raise("Failed to rename original LVs")
12507 feedback_fn("Initializing DRBD devices...")
12508 # all child devices are in place, we can now create the DRBD devices
12509 for disk in new_disks:
12510 for node in [pnode, snode]:
12511 f_create = node == pnode
12512 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12514 # at this point, the instance has been modified
12515 instance.disk_template = constants.DT_DRBD8
12516 instance.disks = new_disks
12517 self.cfg.Update(instance, feedback_fn)
12519 # Release node locks while waiting for sync
12520 _ReleaseLocks(self, locking.LEVEL_NODE)
12522 # disks are created, waiting for sync
12523 disk_abort = not _WaitForSync(self, instance,
12524 oneshot=not self.op.wait_for_sync)
12526 raise errors.OpExecError("There are some degraded disks for"
12527 " this instance, please cleanup manually")
12529 # Node resource locks will be released by caller
12531 def _ConvertDrbdToPlain(self, feedback_fn):
12532 """Converts an instance from drbd to plain.
12535 instance = self.instance
12537 assert len(instance.secondary_nodes) == 1
12538 assert instance.disk_template == constants.DT_DRBD8
12540 pnode = instance.primary_node
12541 snode = instance.secondary_nodes[0]
12542 feedback_fn("Converting template to plain")
12544 old_disks = instance.disks
12545 new_disks = [d.children[0] for d in old_disks]
12547 # copy over size and mode
12548 for parent, child in zip(old_disks, new_disks):
12549 child.size = parent.size
12550 child.mode = parent.mode
12552 # update instance structure
12553 instance.disks = new_disks
12554 instance.disk_template = constants.DT_PLAIN
12555 self.cfg.Update(instance, feedback_fn)
12557 # Release locks in case removing disks takes a while
12558 _ReleaseLocks(self, locking.LEVEL_NODE)
12560 feedback_fn("Removing volumes on the secondary node...")
12561 for disk in old_disks:
12562 self.cfg.SetDiskID(disk, snode)
12563 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12565 self.LogWarning("Could not remove block device %s on node %s,"
12566 " continuing anyway: %s", disk.iv_name, snode, msg)
12568 feedback_fn("Removing unneeded volumes on the primary node...")
12569 for idx, disk in enumerate(old_disks):
12570 meta = disk.children[1]
12571 self.cfg.SetDiskID(meta, pnode)
12572 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12574 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12575 " continuing anyway: %s", idx, pnode, msg)
12577 # this is a DRBD disk, return its port to the pool
12578 for disk in old_disks:
12579 tcp_port = disk.logical_id[2]
12580 self.cfg.AddTcpUdpPort(tcp_port)
12582 # Node resource locks will be released by caller
12584 def _CreateNewDisk(self, idx, params, _):
12585 """Creates a new disk.
12588 instance = self.instance
12591 if instance.disk_template in constants.DTS_FILEBASED:
12592 (file_driver, file_path) = instance.disks[0].logical_id
12593 file_path = os.path.dirname(file_path)
12595 file_driver = file_path = None
12598 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12599 instance.primary_node, instance.secondary_nodes,
12600 [params], file_path, file_driver, idx,
12601 self.Log, self.diskparams)[0]
12603 info = _GetInstanceInfoText(instance)
12605 logging.info("Creating volume %s for instance %s",
12606 disk.iv_name, instance.name)
12607 # Note: this needs to be kept in sync with _CreateDisks
12609 for node in instance.all_nodes:
12610 f_create = (node == instance.primary_node)
12612 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12613 except errors.OpExecError, err:
12614 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12615 disk.iv_name, disk, node, err)
12618 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12622 def _ModifyDisk(idx, disk, params, _):
12623 """Modifies a disk.
12626 disk.mode = params[constants.IDISK_MODE]
12629 ("disk.mode/%d" % idx, disk.mode),
12632 def _RemoveDisk(self, idx, root, _):
12636 for node, disk in root.ComputeNodeTree(self.instance.primary_node):
12637 self.cfg.SetDiskID(disk, node)
12638 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12640 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12641 " continuing anyway", idx, node, msg)
12643 # if this is a DRBD disk, return its port to the pool
12644 if root.dev_type in constants.LDS_DRBD:
12645 self.cfg.AddTcpUdpPort(root.logical_id[2])
12648 def _CreateNewNic(idx, params, private):
12649 """Creates data structure for a new network interface.
12652 mac = params[constants.INIC_MAC]
12653 ip = params.get(constants.INIC_IP, None)
12654 nicparams = private.params
12656 return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12658 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12659 (mac, ip, private.filled[constants.NIC_MODE],
12660 private.filled[constants.NIC_LINK])),
12664 def _ApplyNicMods(idx, nic, params, private):
12665 """Modifies a network interface.
12670 for key in [constants.INIC_MAC, constants.INIC_IP]:
12672 changes.append(("nic.%s/%d" % (key, idx), params[key]))
12673 setattr(nic, key, params[key])
12676 nic.nicparams = private.params
12678 for (key, val) in params.items():
12679 changes.append(("nic.%s/%d" % (key, idx), val))
12683 def Exec(self, feedback_fn):
12684 """Modifies an instance.
12686 All parameters take effect only at the next restart of the instance.
12689 # Process here the warnings from CheckPrereq, as we don't have a
12690 # feedback_fn there.
12691 # TODO: Replace with self.LogWarning
12692 for warn in self.warn:
12693 feedback_fn("WARNING: %s" % warn)
12695 assert ((self.op.disk_template is None) ^
12696 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12697 "Not owning any node resource locks"
12700 instance = self.instance
12703 if self.op.runtime_mem:
12704 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12706 self.op.runtime_mem)
12707 rpcres.Raise("Cannot modify instance runtime memory")
12708 result.append(("runtime_memory", self.op.runtime_mem))
12710 # Apply disk changes
12711 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12712 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12714 if self.op.disk_template:
12716 check_nodes = set(instance.all_nodes)
12717 if self.op.remote_node:
12718 check_nodes.add(self.op.remote_node)
12719 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12720 owned = self.owned_locks(level)
12721 assert not (check_nodes - owned), \
12722 ("Not owning the correct locks, owning %r, expected at least %r" %
12723 (owned, check_nodes))
12725 r_shut = _ShutdownInstanceDisks(self, instance)
12727 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12728 " proceed with disk template conversion")
12729 mode = (instance.disk_template, self.op.disk_template)
12731 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12733 self.cfg.ReleaseDRBDMinors(instance.name)
12735 result.append(("disk_template", self.op.disk_template))
12737 assert instance.disk_template == self.op.disk_template, \
12738 ("Expected disk template '%s', found '%s'" %
12739 (self.op.disk_template, instance.disk_template))
12741 # Release node and resource locks if there are any (they might already have
12742 # been released during disk conversion)
12743 _ReleaseLocks(self, locking.LEVEL_NODE)
12744 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12746 # Apply NIC changes
12747 if self._new_nics is not None:
12748 instance.nics = self._new_nics
12749 result.extend(self._nic_chgdesc)
12752 if self.op.hvparams:
12753 instance.hvparams = self.hv_inst
12754 for key, val in self.op.hvparams.iteritems():
12755 result.append(("hv/%s" % key, val))
12758 if self.op.beparams:
12759 instance.beparams = self.be_inst
12760 for key, val in self.op.beparams.iteritems():
12761 result.append(("be/%s" % key, val))
12764 if self.op.os_name:
12765 instance.os = self.op.os_name
12768 if self.op.osparams:
12769 instance.osparams = self.os_inst
12770 for key, val in self.op.osparams.iteritems():
12771 result.append(("os/%s" % key, val))
12773 if self.op.offline is None:
12776 elif self.op.offline:
12777 # Mark instance as offline
12778 self.cfg.MarkInstanceOffline(instance.name)
12779 result.append(("admin_state", constants.ADMINST_OFFLINE))
12781 # Mark instance as online, but stopped
12782 self.cfg.MarkInstanceDown(instance.name)
12783 result.append(("admin_state", constants.ADMINST_DOWN))
12785 self.cfg.Update(instance, feedback_fn)
12787 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12788 self.owned_locks(locking.LEVEL_NODE)), \
12789 "All node locks should have been released by now"
12793 _DISK_CONVERSIONS = {
12794 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12795 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12799 class LUInstanceChangeGroup(LogicalUnit):
12800 HPATH = "instance-change-group"
12801 HTYPE = constants.HTYPE_INSTANCE
12804 def ExpandNames(self):
12805 self.share_locks = _ShareAll()
12806 self.needed_locks = {
12807 locking.LEVEL_NODEGROUP: [],
12808 locking.LEVEL_NODE: [],
12811 self._ExpandAndLockInstance()
12813 if self.op.target_groups:
12814 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12815 self.op.target_groups)
12817 self.req_target_uuids = None
12819 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12821 def DeclareLocks(self, level):
12822 if level == locking.LEVEL_NODEGROUP:
12823 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12825 if self.req_target_uuids:
12826 lock_groups = set(self.req_target_uuids)
12828 # Lock all groups used by instance optimistically; this requires going
12829 # via the node before it's locked, requiring verification later on
12830 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12831 lock_groups.update(instance_groups)
12833 # No target groups, need to lock all of them
12834 lock_groups = locking.ALL_SET
12836 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12838 elif level == locking.LEVEL_NODE:
12839 if self.req_target_uuids:
12840 # Lock all nodes used by instances
12841 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12842 self._LockInstancesNodes()
12844 # Lock all nodes in all potential target groups
12845 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12846 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12847 member_nodes = [node_name
12848 for group in lock_groups
12849 for node_name in self.cfg.GetNodeGroup(group).members]
12850 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12852 # Lock all nodes as all groups are potential targets
12853 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12855 def CheckPrereq(self):
12856 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12857 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12858 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12860 assert (self.req_target_uuids is None or
12861 owned_groups.issuperset(self.req_target_uuids))
12862 assert owned_instances == set([self.op.instance_name])
12864 # Get instance information
12865 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12867 # Check if node groups for locked instance are still correct
12868 assert owned_nodes.issuperset(self.instance.all_nodes), \
12869 ("Instance %s's nodes changed while we kept the lock" %
12870 self.op.instance_name)
12872 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12875 if self.req_target_uuids:
12876 # User requested specific target groups
12877 self.target_uuids = self.req_target_uuids
12879 # All groups except those used by the instance are potential targets
12880 self.target_uuids = owned_groups - inst_groups
12882 conflicting_groups = self.target_uuids & inst_groups
12883 if conflicting_groups:
12884 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12885 " used by the instance '%s'" %
12886 (utils.CommaJoin(conflicting_groups),
12887 self.op.instance_name),
12888 errors.ECODE_INVAL)
12890 if not self.target_uuids:
12891 raise errors.OpPrereqError("There are no possible target groups",
12892 errors.ECODE_INVAL)
12894 def BuildHooksEnv(self):
12895 """Build hooks env.
12898 assert self.target_uuids
12901 "TARGET_GROUPS": " ".join(self.target_uuids),
12904 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12908 def BuildHooksNodes(self):
12909 """Build hooks nodes.
12912 mn = self.cfg.GetMasterNode()
12913 return ([mn], [mn])
12915 def Exec(self, feedback_fn):
12916 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12918 assert instances == [self.op.instance_name], "Instance not locked"
12920 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12921 instances=instances, target_groups=list(self.target_uuids))
12923 ial.Run(self.op.iallocator)
12925 if not ial.success:
12926 raise errors.OpPrereqError("Can't compute solution for changing group of"
12927 " instance '%s' using iallocator '%s': %s" %
12928 (self.op.instance_name, self.op.iallocator,
12930 errors.ECODE_NORES)
12932 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12934 self.LogInfo("Iallocator returned %s job(s) for changing group of"
12935 " instance '%s'", len(jobs), self.op.instance_name)
12937 return ResultWithJobs(jobs)
12940 class LUBackupQuery(NoHooksLU):
12941 """Query the exports list
12946 def ExpandNames(self):
12947 self.needed_locks = {}
12948 self.share_locks[locking.LEVEL_NODE] = 1
12949 if not self.op.nodes:
12950 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12952 self.needed_locks[locking.LEVEL_NODE] = \
12953 _GetWantedNodes(self, self.op.nodes)
12955 def Exec(self, feedback_fn):
12956 """Compute the list of all the exported system images.
12959 @return: a dictionary with the structure node->(export-list)
12960 where export-list is a list of the instances exported on
12964 self.nodes = self.owned_locks(locking.LEVEL_NODE)
12965 rpcresult = self.rpc.call_export_list(self.nodes)
12967 for node in rpcresult:
12968 if rpcresult[node].fail_msg:
12969 result[node] = False
12971 result[node] = rpcresult[node].payload
12976 class LUBackupPrepare(NoHooksLU):
12977 """Prepares an instance for an export and returns useful information.
12982 def ExpandNames(self):
12983 self._ExpandAndLockInstance()
12985 def CheckPrereq(self):
12986 """Check prerequisites.
12989 instance_name = self.op.instance_name
12991 self.instance = self.cfg.GetInstanceInfo(instance_name)
12992 assert self.instance is not None, \
12993 "Cannot retrieve locked instance %s" % self.op.instance_name
12994 _CheckNodeOnline(self, self.instance.primary_node)
12996 self._cds = _GetClusterDomainSecret()
12998 def Exec(self, feedback_fn):
12999 """Prepares an instance for an export.
13002 instance = self.instance
13004 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13005 salt = utils.GenerateSecret(8)
13007 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13008 result = self.rpc.call_x509_cert_create(instance.primary_node,
13009 constants.RIE_CERT_VALIDITY)
13010 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13012 (name, cert_pem) = result.payload
13014 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13018 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13019 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13021 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13027 class LUBackupExport(LogicalUnit):
13028 """Export an instance to an image in the cluster.
13031 HPATH = "instance-export"
13032 HTYPE = constants.HTYPE_INSTANCE
13035 def CheckArguments(self):
13036 """Check the arguments.
13039 self.x509_key_name = self.op.x509_key_name
13040 self.dest_x509_ca_pem = self.op.destination_x509_ca
13042 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13043 if not self.x509_key_name:
13044 raise errors.OpPrereqError("Missing X509 key name for encryption",
13045 errors.ECODE_INVAL)
13047 if not self.dest_x509_ca_pem:
13048 raise errors.OpPrereqError("Missing destination X509 CA",
13049 errors.ECODE_INVAL)
13051 def ExpandNames(self):
13052 self._ExpandAndLockInstance()
13054 # Lock all nodes for local exports
13055 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13056 # FIXME: lock only instance primary and destination node
13058 # Sad but true, for now we have do lock all nodes, as we don't know where
13059 # the previous export might be, and in this LU we search for it and
13060 # remove it from its current node. In the future we could fix this by:
13061 # - making a tasklet to search (share-lock all), then create the
13062 # new one, then one to remove, after
13063 # - removing the removal operation altogether
13064 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13066 def DeclareLocks(self, level):
13067 """Last minute lock declaration."""
13068 # All nodes are locked anyway, so nothing to do here.
13070 def BuildHooksEnv(self):
13071 """Build hooks env.
13073 This will run on the master, primary node and target node.
13077 "EXPORT_MODE": self.op.mode,
13078 "EXPORT_NODE": self.op.target_node,
13079 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13080 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13081 # TODO: Generic function for boolean env variables
13082 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13085 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13089 def BuildHooksNodes(self):
13090 """Build hooks nodes.
13093 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13095 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13096 nl.append(self.op.target_node)
13100 def CheckPrereq(self):
13101 """Check prerequisites.
13103 This checks that the instance and node names are valid.
13106 instance_name = self.op.instance_name
13108 self.instance = self.cfg.GetInstanceInfo(instance_name)
13109 assert self.instance is not None, \
13110 "Cannot retrieve locked instance %s" % self.op.instance_name
13111 _CheckNodeOnline(self, self.instance.primary_node)
13113 if (self.op.remove_instance and
13114 self.instance.admin_state == constants.ADMINST_UP and
13115 not self.op.shutdown):
13116 raise errors.OpPrereqError("Can not remove instance without shutting it"
13119 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13120 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13121 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13122 assert self.dst_node is not None
13124 _CheckNodeOnline(self, self.dst_node.name)
13125 _CheckNodeNotDrained(self, self.dst_node.name)
13128 self.dest_disk_info = None
13129 self.dest_x509_ca = None
13131 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13132 self.dst_node = None
13134 if len(self.op.target_node) != len(self.instance.disks):
13135 raise errors.OpPrereqError(("Received destination information for %s"
13136 " disks, but instance %s has %s disks") %
13137 (len(self.op.target_node), instance_name,
13138 len(self.instance.disks)),
13139 errors.ECODE_INVAL)
13141 cds = _GetClusterDomainSecret()
13143 # Check X509 key name
13145 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13146 except (TypeError, ValueError), err:
13147 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13149 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13150 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13151 errors.ECODE_INVAL)
13153 # Load and verify CA
13155 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13156 except OpenSSL.crypto.Error, err:
13157 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13158 (err, ), errors.ECODE_INVAL)
13160 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13161 if errcode is not None:
13162 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13163 (msg, ), errors.ECODE_INVAL)
13165 self.dest_x509_ca = cert
13167 # Verify target information
13169 for idx, disk_data in enumerate(self.op.target_node):
13171 (host, port, magic) = \
13172 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13173 except errors.GenericError, err:
13174 raise errors.OpPrereqError("Target info for disk %s: %s" %
13175 (idx, err), errors.ECODE_INVAL)
13177 disk_info.append((host, port, magic))
13179 assert len(disk_info) == len(self.op.target_node)
13180 self.dest_disk_info = disk_info
13183 raise errors.ProgrammerError("Unhandled export mode %r" %
13186 # instance disk type verification
13187 # TODO: Implement export support for file-based disks
13188 for disk in self.instance.disks:
13189 if disk.dev_type == constants.LD_FILE:
13190 raise errors.OpPrereqError("Export not supported for instances with"
13191 " file-based disks", errors.ECODE_INVAL)
13193 def _CleanupExports(self, feedback_fn):
13194 """Removes exports of current instance from all other nodes.
13196 If an instance in a cluster with nodes A..D was exported to node C, its
13197 exports will be removed from the nodes A, B and D.
13200 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13202 nodelist = self.cfg.GetNodeList()
13203 nodelist.remove(self.dst_node.name)
13205 # on one-node clusters nodelist will be empty after the removal
13206 # if we proceed the backup would be removed because OpBackupQuery
13207 # substitutes an empty list with the full cluster node list.
13208 iname = self.instance.name
13210 feedback_fn("Removing old exports for instance %s" % iname)
13211 exportlist = self.rpc.call_export_list(nodelist)
13212 for node in exportlist:
13213 if exportlist[node].fail_msg:
13215 if iname in exportlist[node].payload:
13216 msg = self.rpc.call_export_remove(node, iname).fail_msg
13218 self.LogWarning("Could not remove older export for instance %s"
13219 " on node %s: %s", iname, node, msg)
13221 def Exec(self, feedback_fn):
13222 """Export an instance to an image in the cluster.
13225 assert self.op.mode in constants.EXPORT_MODES
13227 instance = self.instance
13228 src_node = instance.primary_node
13230 if self.op.shutdown:
13231 # shutdown the instance, but not the disks
13232 feedback_fn("Shutting down instance %s" % instance.name)
13233 result = self.rpc.call_instance_shutdown(src_node, instance,
13234 self.op.shutdown_timeout)
13235 # TODO: Maybe ignore failures if ignore_remove_failures is set
13236 result.Raise("Could not shutdown instance %s on"
13237 " node %s" % (instance.name, src_node))
13239 # set the disks ID correctly since call_instance_start needs the
13240 # correct drbd minor to create the symlinks
13241 for disk in instance.disks:
13242 self.cfg.SetDiskID(disk, src_node)
13244 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13247 # Activate the instance disks if we'exporting a stopped instance
13248 feedback_fn("Activating disks for %s" % instance.name)
13249 _StartInstanceDisks(self, instance, None)
13252 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13255 helper.CreateSnapshots()
13257 if (self.op.shutdown and
13258 instance.admin_state == constants.ADMINST_UP and
13259 not self.op.remove_instance):
13260 assert not activate_disks
13261 feedback_fn("Starting instance %s" % instance.name)
13262 result = self.rpc.call_instance_start(src_node,
13263 (instance, None, None), False)
13264 msg = result.fail_msg
13266 feedback_fn("Failed to start instance: %s" % msg)
13267 _ShutdownInstanceDisks(self, instance)
13268 raise errors.OpExecError("Could not start instance: %s" % msg)
13270 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13271 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13272 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13273 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13274 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13276 (key_name, _, _) = self.x509_key_name
13279 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13282 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13283 key_name, dest_ca_pem,
13288 # Check for backwards compatibility
13289 assert len(dresults) == len(instance.disks)
13290 assert compat.all(isinstance(i, bool) for i in dresults), \
13291 "Not all results are boolean: %r" % dresults
13295 feedback_fn("Deactivating disks for %s" % instance.name)
13296 _ShutdownInstanceDisks(self, instance)
13298 if not (compat.all(dresults) and fin_resu):
13301 failures.append("export finalization")
13302 if not compat.all(dresults):
13303 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13305 failures.append("disk export: disk(s) %s" % fdsk)
13307 raise errors.OpExecError("Export failed, errors in %s" %
13308 utils.CommaJoin(failures))
13310 # At this point, the export was successful, we can cleanup/finish
13312 # Remove instance if requested
13313 if self.op.remove_instance:
13314 feedback_fn("Removing instance %s" % instance.name)
13315 _RemoveInstance(self, feedback_fn, instance,
13316 self.op.ignore_remove_failures)
13318 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13319 self._CleanupExports(feedback_fn)
13321 return fin_resu, dresults
13324 class LUBackupRemove(NoHooksLU):
13325 """Remove exports related to the named instance.
13330 def ExpandNames(self):
13331 self.needed_locks = {}
13332 # We need all nodes to be locked in order for RemoveExport to work, but we
13333 # don't need to lock the instance itself, as nothing will happen to it (and
13334 # we can remove exports also for a removed instance)
13335 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13337 def Exec(self, feedback_fn):
13338 """Remove any export.
13341 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13342 # If the instance was not found we'll try with the name that was passed in.
13343 # This will only work if it was an FQDN, though.
13345 if not instance_name:
13347 instance_name = self.op.instance_name
13349 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13350 exportlist = self.rpc.call_export_list(locked_nodes)
13352 for node in exportlist:
13353 msg = exportlist[node].fail_msg
13355 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13357 if instance_name in exportlist[node].payload:
13359 result = self.rpc.call_export_remove(node, instance_name)
13360 msg = result.fail_msg
13362 logging.error("Could not remove export for instance %s"
13363 " on node %s: %s", instance_name, node, msg)
13365 if fqdn_warn and not found:
13366 feedback_fn("Export not found. If trying to remove an export belonging"
13367 " to a deleted instance please use its Fully Qualified"
13371 class LUGroupAdd(LogicalUnit):
13372 """Logical unit for creating node groups.
13375 HPATH = "group-add"
13376 HTYPE = constants.HTYPE_GROUP
13379 def ExpandNames(self):
13380 # We need the new group's UUID here so that we can create and acquire the
13381 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13382 # that it should not check whether the UUID exists in the configuration.
13383 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13384 self.needed_locks = {}
13385 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13387 def CheckPrereq(self):
13388 """Check prerequisites.
13390 This checks that the given group name is not an existing node group
13395 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13396 except errors.OpPrereqError:
13399 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13400 " node group (UUID: %s)" %
13401 (self.op.group_name, existing_uuid),
13402 errors.ECODE_EXISTS)
13404 if self.op.ndparams:
13405 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13407 if self.op.hv_state:
13408 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13410 self.new_hv_state = None
13412 if self.op.disk_state:
13413 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13415 self.new_disk_state = None
13417 if self.op.diskparams:
13418 for templ in constants.DISK_TEMPLATES:
13419 if templ not in self.op.diskparams:
13420 self.op.diskparams[templ] = {}
13421 utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
13423 self.op.diskparams = self.cfg.GetClusterInfo().diskparams
13425 if self.op.ipolicy:
13426 cluster = self.cfg.GetClusterInfo()
13427 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13429 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
13430 except errors.ConfigurationError, err:
13431 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13432 errors.ECODE_INVAL)
13434 def BuildHooksEnv(self):
13435 """Build hooks env.
13439 "GROUP_NAME": self.op.group_name,
13442 def BuildHooksNodes(self):
13443 """Build hooks nodes.
13446 mn = self.cfg.GetMasterNode()
13447 return ([mn], [mn])
13449 def Exec(self, feedback_fn):
13450 """Add the node group to the cluster.
13453 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13454 uuid=self.group_uuid,
13455 alloc_policy=self.op.alloc_policy,
13456 ndparams=self.op.ndparams,
13457 diskparams=self.op.diskparams,
13458 ipolicy=self.op.ipolicy,
13459 hv_state_static=self.new_hv_state,
13460 disk_state_static=self.new_disk_state)
13462 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13463 del self.remove_locks[locking.LEVEL_NODEGROUP]
13466 class LUGroupAssignNodes(NoHooksLU):
13467 """Logical unit for assigning nodes to groups.
13472 def ExpandNames(self):
13473 # These raise errors.OpPrereqError on their own:
13474 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13475 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13477 # We want to lock all the affected nodes and groups. We have readily
13478 # available the list of nodes, and the *destination* group. To gather the
13479 # list of "source" groups, we need to fetch node information later on.
13480 self.needed_locks = {
13481 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13482 locking.LEVEL_NODE: self.op.nodes,
13485 def DeclareLocks(self, level):
13486 if level == locking.LEVEL_NODEGROUP:
13487 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13489 # Try to get all affected nodes' groups without having the group or node
13490 # lock yet. Needs verification later in the code flow.
13491 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13493 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13495 def CheckPrereq(self):
13496 """Check prerequisites.
13499 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13500 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13501 frozenset(self.op.nodes))
13503 expected_locks = (set([self.group_uuid]) |
13504 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13505 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13506 if actual_locks != expected_locks:
13507 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13508 " current groups are '%s', used to be '%s'" %
13509 (utils.CommaJoin(expected_locks),
13510 utils.CommaJoin(actual_locks)))
13512 self.node_data = self.cfg.GetAllNodesInfo()
13513 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13514 instance_data = self.cfg.GetAllInstancesInfo()
13516 if self.group is None:
13517 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13518 (self.op.group_name, self.group_uuid))
13520 (new_splits, previous_splits) = \
13521 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13522 for node in self.op.nodes],
13523 self.node_data, instance_data)
13526 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13528 if not self.op.force:
13529 raise errors.OpExecError("The following instances get split by this"
13530 " change and --force was not given: %s" %
13533 self.LogWarning("This operation will split the following instances: %s",
13536 if previous_splits:
13537 self.LogWarning("In addition, these already-split instances continue"
13538 " to be split across groups: %s",
13539 utils.CommaJoin(utils.NiceSort(previous_splits)))
13541 def Exec(self, feedback_fn):
13542 """Assign nodes to a new group.
13545 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13547 self.cfg.AssignGroupNodes(mods)
13550 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13551 """Check for split instances after a node assignment.
13553 This method considers a series of node assignments as an atomic operation,
13554 and returns information about split instances after applying the set of
13557 In particular, it returns information about newly split instances, and
13558 instances that were already split, and remain so after the change.
13560 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13563 @type changes: list of (node_name, new_group_uuid) pairs.
13564 @param changes: list of node assignments to consider.
13565 @param node_data: a dict with data for all nodes
13566 @param instance_data: a dict with all instances to consider
13567 @rtype: a two-tuple
13568 @return: a list of instances that were previously okay and result split as a
13569 consequence of this change, and a list of instances that were previously
13570 split and this change does not fix.
13573 changed_nodes = dict((node, group) for node, group in changes
13574 if node_data[node].group != group)
13576 all_split_instances = set()
13577 previously_split_instances = set()
13579 def InstanceNodes(instance):
13580 return [instance.primary_node] + list(instance.secondary_nodes)
13582 for inst in instance_data.values():
13583 if inst.disk_template not in constants.DTS_INT_MIRROR:
13586 instance_nodes = InstanceNodes(inst)
13588 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13589 previously_split_instances.add(inst.name)
13591 if len(set(changed_nodes.get(node, node_data[node].group)
13592 for node in instance_nodes)) > 1:
13593 all_split_instances.add(inst.name)
13595 return (list(all_split_instances - previously_split_instances),
13596 list(previously_split_instances & all_split_instances))
13599 class _GroupQuery(_QueryBase):
13600 FIELDS = query.GROUP_FIELDS
13602 def ExpandNames(self, lu):
13603 lu.needed_locks = {}
13605 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13606 self._cluster = lu.cfg.GetClusterInfo()
13607 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13610 self.wanted = [name_to_uuid[name]
13611 for name in utils.NiceSort(name_to_uuid.keys())]
13613 # Accept names to be either names or UUIDs.
13616 all_uuid = frozenset(self._all_groups.keys())
13618 for name in self.names:
13619 if name in all_uuid:
13620 self.wanted.append(name)
13621 elif name in name_to_uuid:
13622 self.wanted.append(name_to_uuid[name])
13624 missing.append(name)
13627 raise errors.OpPrereqError("Some groups do not exist: %s" %
13628 utils.CommaJoin(missing),
13629 errors.ECODE_NOENT)
13631 def DeclareLocks(self, lu, level):
13634 def _GetQueryData(self, lu):
13635 """Computes the list of node groups and their attributes.
13638 do_nodes = query.GQ_NODE in self.requested_data
13639 do_instances = query.GQ_INST in self.requested_data
13641 group_to_nodes = None
13642 group_to_instances = None
13644 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13645 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13646 # latter GetAllInstancesInfo() is not enough, for we have to go through
13647 # instance->node. Hence, we will need to process nodes even if we only need
13648 # instance information.
13649 if do_nodes or do_instances:
13650 all_nodes = lu.cfg.GetAllNodesInfo()
13651 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13654 for node in all_nodes.values():
13655 if node.group in group_to_nodes:
13656 group_to_nodes[node.group].append(node.name)
13657 node_to_group[node.name] = node.group
13660 all_instances = lu.cfg.GetAllInstancesInfo()
13661 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13663 for instance in all_instances.values():
13664 node = instance.primary_node
13665 if node in node_to_group:
13666 group_to_instances[node_to_group[node]].append(instance.name)
13669 # Do not pass on node information if it was not requested.
13670 group_to_nodes = None
13672 return query.GroupQueryData(self._cluster,
13673 [self._all_groups[uuid]
13674 for uuid in self.wanted],
13675 group_to_nodes, group_to_instances)
13678 class LUGroupQuery(NoHooksLU):
13679 """Logical unit for querying node groups.
13684 def CheckArguments(self):
13685 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13686 self.op.output_fields, False)
13688 def ExpandNames(self):
13689 self.gq.ExpandNames(self)
13691 def DeclareLocks(self, level):
13692 self.gq.DeclareLocks(self, level)
13694 def Exec(self, feedback_fn):
13695 return self.gq.OldStyleQuery(self)
13698 class LUGroupSetParams(LogicalUnit):
13699 """Modifies the parameters of a node group.
13702 HPATH = "group-modify"
13703 HTYPE = constants.HTYPE_GROUP
13706 def CheckArguments(self):
13709 self.op.diskparams,
13710 self.op.alloc_policy,
13712 self.op.disk_state,
13716 if all_changes.count(None) == len(all_changes):
13717 raise errors.OpPrereqError("Please pass at least one modification",
13718 errors.ECODE_INVAL)
13720 def ExpandNames(self):
13721 # This raises errors.OpPrereqError on its own:
13722 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13724 self.needed_locks = {
13725 locking.LEVEL_INSTANCE: [],
13726 locking.LEVEL_NODEGROUP: [self.group_uuid],
13729 self.share_locks[locking.LEVEL_INSTANCE] = 1
13731 def DeclareLocks(self, level):
13732 if level == locking.LEVEL_INSTANCE:
13733 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13735 # Lock instances optimistically, needs verification once group lock has
13737 self.needed_locks[locking.LEVEL_INSTANCE] = \
13738 self.cfg.GetNodeGroupInstances(self.group_uuid)
13740 def CheckPrereq(self):
13741 """Check prerequisites.
13744 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13746 # Check if locked instances are still correct
13747 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13749 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13750 cluster = self.cfg.GetClusterInfo()
13752 if self.group is None:
13753 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13754 (self.op.group_name, self.group_uuid))
13756 if self.op.ndparams:
13757 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13758 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13759 self.new_ndparams = new_ndparams
13761 if self.op.diskparams:
13762 self.new_diskparams = dict()
13763 for templ in constants.DISK_TEMPLATES:
13764 if templ not in self.op.diskparams:
13765 self.op.diskparams[templ] = {}
13766 new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13767 self.op.diskparams[templ])
13768 utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13769 self.new_diskparams[templ] = new_templ_params
13771 if self.op.hv_state:
13772 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13773 self.group.hv_state_static)
13775 if self.op.disk_state:
13776 self.new_disk_state = \
13777 _MergeAndVerifyDiskState(self.op.disk_state,
13778 self.group.disk_state_static)
13780 if self.op.ipolicy:
13781 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
13785 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
13786 inst_filter = lambda inst: inst.name in owned_instances
13787 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
13789 _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
13791 new_ipolicy, instances)
13794 self.LogWarning("After the ipolicy change the following instances"
13795 " violate them: %s",
13796 utils.CommaJoin(violations))
13798 def BuildHooksEnv(self):
13799 """Build hooks env.
13803 "GROUP_NAME": self.op.group_name,
13804 "NEW_ALLOC_POLICY": self.op.alloc_policy,
13807 def BuildHooksNodes(self):
13808 """Build hooks nodes.
13811 mn = self.cfg.GetMasterNode()
13812 return ([mn], [mn])
13814 def Exec(self, feedback_fn):
13815 """Modifies the node group.
13820 if self.op.ndparams:
13821 self.group.ndparams = self.new_ndparams
13822 result.append(("ndparams", str(self.group.ndparams)))
13824 if self.op.diskparams:
13825 self.group.diskparams = self.new_diskparams
13826 result.append(("diskparams", str(self.group.diskparams)))
13828 if self.op.alloc_policy:
13829 self.group.alloc_policy = self.op.alloc_policy
13831 if self.op.hv_state:
13832 self.group.hv_state_static = self.new_hv_state
13834 if self.op.disk_state:
13835 self.group.disk_state_static = self.new_disk_state
13837 if self.op.ipolicy:
13838 self.group.ipolicy = self.new_ipolicy
13840 self.cfg.Update(self.group, feedback_fn)
13844 class LUGroupRemove(LogicalUnit):
13845 HPATH = "group-remove"
13846 HTYPE = constants.HTYPE_GROUP
13849 def ExpandNames(self):
13850 # This will raises errors.OpPrereqError on its own:
13851 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13852 self.needed_locks = {
13853 locking.LEVEL_NODEGROUP: [self.group_uuid],
13856 def CheckPrereq(self):
13857 """Check prerequisites.
13859 This checks that the given group name exists as a node group, that is
13860 empty (i.e., contains no nodes), and that is not the last group of the
13864 # Verify that the group is empty.
13865 group_nodes = [node.name
13866 for node in self.cfg.GetAllNodesInfo().values()
13867 if node.group == self.group_uuid]
13870 raise errors.OpPrereqError("Group '%s' not empty, has the following"
13872 (self.op.group_name,
13873 utils.CommaJoin(utils.NiceSort(group_nodes))),
13874 errors.ECODE_STATE)
13876 # Verify the cluster would not be left group-less.
13877 if len(self.cfg.GetNodeGroupList()) == 1:
13878 raise errors.OpPrereqError("Group '%s' is the only group,"
13879 " cannot be removed" %
13880 self.op.group_name,
13881 errors.ECODE_STATE)
13883 def BuildHooksEnv(self):
13884 """Build hooks env.
13888 "GROUP_NAME": self.op.group_name,
13891 def BuildHooksNodes(self):
13892 """Build hooks nodes.
13895 mn = self.cfg.GetMasterNode()
13896 return ([mn], [mn])
13898 def Exec(self, feedback_fn):
13899 """Remove the node group.
13903 self.cfg.RemoveNodeGroup(self.group_uuid)
13904 except errors.ConfigurationError:
13905 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13906 (self.op.group_name, self.group_uuid))
13908 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13911 class LUGroupRename(LogicalUnit):
13912 HPATH = "group-rename"
13913 HTYPE = constants.HTYPE_GROUP
13916 def ExpandNames(self):
13917 # This raises errors.OpPrereqError on its own:
13918 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13920 self.needed_locks = {
13921 locking.LEVEL_NODEGROUP: [self.group_uuid],
13924 def CheckPrereq(self):
13925 """Check prerequisites.
13927 Ensures requested new name is not yet used.
13931 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13932 except errors.OpPrereqError:
13935 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13936 " node group (UUID: %s)" %
13937 (self.op.new_name, new_name_uuid),
13938 errors.ECODE_EXISTS)
13940 def BuildHooksEnv(self):
13941 """Build hooks env.
13945 "OLD_NAME": self.op.group_name,
13946 "NEW_NAME": self.op.new_name,
13949 def BuildHooksNodes(self):
13950 """Build hooks nodes.
13953 mn = self.cfg.GetMasterNode()
13955 all_nodes = self.cfg.GetAllNodesInfo()
13956 all_nodes.pop(mn, None)
13959 run_nodes.extend(node.name for node in all_nodes.values()
13960 if node.group == self.group_uuid)
13962 return (run_nodes, run_nodes)
13964 def Exec(self, feedback_fn):
13965 """Rename the node group.
13968 group = self.cfg.GetNodeGroup(self.group_uuid)
13971 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13972 (self.op.group_name, self.group_uuid))
13974 group.name = self.op.new_name
13975 self.cfg.Update(group, feedback_fn)
13977 return self.op.new_name
13980 class LUGroupEvacuate(LogicalUnit):
13981 HPATH = "group-evacuate"
13982 HTYPE = constants.HTYPE_GROUP
13985 def ExpandNames(self):
13986 # This raises errors.OpPrereqError on its own:
13987 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13989 if self.op.target_groups:
13990 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13991 self.op.target_groups)
13993 self.req_target_uuids = []
13995 if self.group_uuid in self.req_target_uuids:
13996 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
13997 " as a target group (targets are %s)" %
13999 utils.CommaJoin(self.req_target_uuids)),
14000 errors.ECODE_INVAL)
14002 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14004 self.share_locks = _ShareAll()
14005 self.needed_locks = {
14006 locking.LEVEL_INSTANCE: [],
14007 locking.LEVEL_NODEGROUP: [],
14008 locking.LEVEL_NODE: [],
14011 def DeclareLocks(self, level):
14012 if level == locking.LEVEL_INSTANCE:
14013 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14015 # Lock instances optimistically, needs verification once node and group
14016 # locks have been acquired
14017 self.needed_locks[locking.LEVEL_INSTANCE] = \
14018 self.cfg.GetNodeGroupInstances(self.group_uuid)
14020 elif level == locking.LEVEL_NODEGROUP:
14021 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14023 if self.req_target_uuids:
14024 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14026 # Lock all groups used by instances optimistically; this requires going
14027 # via the node before it's locked, requiring verification later on
14028 lock_groups.update(group_uuid
14029 for instance_name in
14030 self.owned_locks(locking.LEVEL_INSTANCE)
14032 self.cfg.GetInstanceNodeGroups(instance_name))
14034 # No target groups, need to lock all of them
14035 lock_groups = locking.ALL_SET
14037 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14039 elif level == locking.LEVEL_NODE:
14040 # This will only lock the nodes in the group to be evacuated which
14041 # contain actual instances
14042 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14043 self._LockInstancesNodes()
14045 # Lock all nodes in group to be evacuated and target groups
14046 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14047 assert self.group_uuid in owned_groups
14048 member_nodes = [node_name
14049 for group in owned_groups
14050 for node_name in self.cfg.GetNodeGroup(group).members]
14051 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14053 def CheckPrereq(self):
14054 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14055 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14056 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14058 assert owned_groups.issuperset(self.req_target_uuids)
14059 assert self.group_uuid in owned_groups
14061 # Check if locked instances are still correct
14062 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14064 # Get instance information
14065 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14067 # Check if node groups for locked instances are still correct
14068 for instance_name in owned_instances:
14069 inst = self.instances[instance_name]
14070 assert owned_nodes.issuperset(inst.all_nodes), \
14071 "Instance %s's nodes changed while we kept the lock" % instance_name
14073 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
14076 assert self.group_uuid in inst_groups, \
14077 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
14079 if self.req_target_uuids:
14080 # User requested specific target groups
14081 self.target_uuids = self.req_target_uuids
14083 # All groups except the one to be evacuated are potential targets
14084 self.target_uuids = [group_uuid for group_uuid in owned_groups
14085 if group_uuid != self.group_uuid]
14087 if not self.target_uuids:
14088 raise errors.OpPrereqError("There are no possible target groups",
14089 errors.ECODE_INVAL)
14091 def BuildHooksEnv(self):
14092 """Build hooks env.
14096 "GROUP_NAME": self.op.group_name,
14097 "TARGET_GROUPS": " ".join(self.target_uuids),
14100 def BuildHooksNodes(self):
14101 """Build hooks nodes.
14104 mn = self.cfg.GetMasterNode()
14106 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14108 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14110 return (run_nodes, run_nodes)
14112 def Exec(self, feedback_fn):
14113 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14115 assert self.group_uuid not in self.target_uuids
14117 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14118 instances=instances, target_groups=self.target_uuids)
14120 ial.Run(self.op.iallocator)
14122 if not ial.success:
14123 raise errors.OpPrereqError("Can't compute group evacuation using"
14124 " iallocator '%s': %s" %
14125 (self.op.iallocator, ial.info),
14126 errors.ECODE_NORES)
14128 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14130 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14131 len(jobs), self.op.group_name)
14133 return ResultWithJobs(jobs)
14136 class TagsLU(NoHooksLU): # pylint: disable=W0223
14137 """Generic tags LU.
14139 This is an abstract class which is the parent of all the other tags LUs.
14142 def ExpandNames(self):
14143 self.group_uuid = None
14144 self.needed_locks = {}
14145 if self.op.kind == constants.TAG_NODE:
14146 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14147 self.needed_locks[locking.LEVEL_NODE] = self.op.name
14148 elif self.op.kind == constants.TAG_INSTANCE:
14149 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14150 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
14151 elif self.op.kind == constants.TAG_NODEGROUP:
14152 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14154 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14155 # not possible to acquire the BGL based on opcode parameters)
14157 def CheckPrereq(self):
14158 """Check prerequisites.
14161 if self.op.kind == constants.TAG_CLUSTER:
14162 self.target = self.cfg.GetClusterInfo()
14163 elif self.op.kind == constants.TAG_NODE:
14164 self.target = self.cfg.GetNodeInfo(self.op.name)
14165 elif self.op.kind == constants.TAG_INSTANCE:
14166 self.target = self.cfg.GetInstanceInfo(self.op.name)
14167 elif self.op.kind == constants.TAG_NODEGROUP:
14168 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14170 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14171 str(self.op.kind), errors.ECODE_INVAL)
14174 class LUTagsGet(TagsLU):
14175 """Returns the tags of a given object.
14180 def ExpandNames(self):
14181 TagsLU.ExpandNames(self)
14183 # Share locks as this is only a read operation
14184 self.share_locks = _ShareAll()
14186 def Exec(self, feedback_fn):
14187 """Returns the tag list.
14190 return list(self.target.GetTags())
14193 class LUTagsSearch(NoHooksLU):
14194 """Searches the tags for a given pattern.
14199 def ExpandNames(self):
14200 self.needed_locks = {}
14202 def CheckPrereq(self):
14203 """Check prerequisites.
14205 This checks the pattern passed for validity by compiling it.
14209 self.re = re.compile(self.op.pattern)
14210 except re.error, err:
14211 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14212 (self.op.pattern, err), errors.ECODE_INVAL)
14214 def Exec(self, feedback_fn):
14215 """Returns the tag list.
14219 tgts = [("/cluster", cfg.GetClusterInfo())]
14220 ilist = cfg.GetAllInstancesInfo().values()
14221 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14222 nlist = cfg.GetAllNodesInfo().values()
14223 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14224 tgts.extend(("/nodegroup/%s" % n.name, n)
14225 for n in cfg.GetAllNodeGroupsInfo().values())
14227 for path, target in tgts:
14228 for tag in target.GetTags():
14229 if self.re.search(tag):
14230 results.append((path, tag))
14234 class LUTagsSet(TagsLU):
14235 """Sets a tag on a given object.
14240 def CheckPrereq(self):
14241 """Check prerequisites.
14243 This checks the type and length of the tag name and value.
14246 TagsLU.CheckPrereq(self)
14247 for tag in self.op.tags:
14248 objects.TaggableObject.ValidateTag(tag)
14250 def Exec(self, feedback_fn):
14255 for tag in self.op.tags:
14256 self.target.AddTag(tag)
14257 except errors.TagError, err:
14258 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14259 self.cfg.Update(self.target, feedback_fn)
14262 class LUTagsDel(TagsLU):
14263 """Delete a list of tags from a given object.
14268 def CheckPrereq(self):
14269 """Check prerequisites.
14271 This checks that we have the given tag.
14274 TagsLU.CheckPrereq(self)
14275 for tag in self.op.tags:
14276 objects.TaggableObject.ValidateTag(tag)
14277 del_tags = frozenset(self.op.tags)
14278 cur_tags = self.target.GetTags()
14280 diff_tags = del_tags - cur_tags
14282 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14283 raise errors.OpPrereqError("Tag(s) %s not found" %
14284 (utils.CommaJoin(diff_names), ),
14285 errors.ECODE_NOENT)
14287 def Exec(self, feedback_fn):
14288 """Remove the tag from the object.
14291 for tag in self.op.tags:
14292 self.target.RemoveTag(tag)
14293 self.cfg.Update(self.target, feedback_fn)
14296 class LUTestDelay(NoHooksLU):
14297 """Sleep for a specified amount of time.
14299 This LU sleeps on the master and/or nodes for a specified amount of
14305 def ExpandNames(self):
14306 """Expand names and set required locks.
14308 This expands the node list, if any.
14311 self.needed_locks = {}
14312 if self.op.on_nodes:
14313 # _GetWantedNodes can be used here, but is not always appropriate to use
14314 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14315 # more information.
14316 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14317 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14319 def _TestDelay(self):
14320 """Do the actual sleep.
14323 if self.op.on_master:
14324 if not utils.TestDelay(self.op.duration):
14325 raise errors.OpExecError("Error during master delay test")
14326 if self.op.on_nodes:
14327 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14328 for node, node_result in result.items():
14329 node_result.Raise("Failure during rpc call to node %s" % node)
14331 def Exec(self, feedback_fn):
14332 """Execute the test delay opcode, with the wanted repetitions.
14335 if self.op.repeat == 0:
14338 top_value = self.op.repeat - 1
14339 for i in range(self.op.repeat):
14340 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14344 class LUTestJqueue(NoHooksLU):
14345 """Utility LU to test some aspects of the job queue.
14350 # Must be lower than default timeout for WaitForJobChange to see whether it
14351 # notices changed jobs
14352 _CLIENT_CONNECT_TIMEOUT = 20.0
14353 _CLIENT_CONFIRM_TIMEOUT = 60.0
14356 def _NotifyUsingSocket(cls, cb, errcls):
14357 """Opens a Unix socket and waits for another program to connect.
14360 @param cb: Callback to send socket name to client
14361 @type errcls: class
14362 @param errcls: Exception class to use for errors
14365 # Using a temporary directory as there's no easy way to create temporary
14366 # sockets without writing a custom loop around tempfile.mktemp and
14368 tmpdir = tempfile.mkdtemp()
14370 tmpsock = utils.PathJoin(tmpdir, "sock")
14372 logging.debug("Creating temporary socket at %s", tmpsock)
14373 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14378 # Send details to client
14381 # Wait for client to connect before continuing
14382 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14384 (conn, _) = sock.accept()
14385 except socket.error, err:
14386 raise errcls("Client didn't connect in time (%s)" % err)
14390 # Remove as soon as client is connected
14391 shutil.rmtree(tmpdir)
14393 # Wait for client to close
14396 # pylint: disable=E1101
14397 # Instance of '_socketobject' has no ... member
14398 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14400 except socket.error, err:
14401 raise errcls("Client failed to confirm notification (%s)" % err)
14405 def _SendNotification(self, test, arg, sockname):
14406 """Sends a notification to the client.
14409 @param test: Test name
14410 @param arg: Test argument (depends on test)
14411 @type sockname: string
14412 @param sockname: Socket path
14415 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14417 def _Notify(self, prereq, test, arg):
14418 """Notifies the client of a test.
14421 @param prereq: Whether this is a prereq-phase test
14423 @param test: Test name
14424 @param arg: Test argument (depends on test)
14428 errcls = errors.OpPrereqError
14430 errcls = errors.OpExecError
14432 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14436 def CheckArguments(self):
14437 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14438 self.expandnames_calls = 0
14440 def ExpandNames(self):
14441 checkargs_calls = getattr(self, "checkargs_calls", 0)
14442 if checkargs_calls < 1:
14443 raise errors.ProgrammerError("CheckArguments was not called")
14445 self.expandnames_calls += 1
14447 if self.op.notify_waitlock:
14448 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14450 self.LogInfo("Expanding names")
14452 # Get lock on master node (just to get a lock, not for a particular reason)
14453 self.needed_locks = {
14454 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14457 def Exec(self, feedback_fn):
14458 if self.expandnames_calls < 1:
14459 raise errors.ProgrammerError("ExpandNames was not called")
14461 if self.op.notify_exec:
14462 self._Notify(False, constants.JQT_EXEC, None)
14464 self.LogInfo("Executing")
14466 if self.op.log_messages:
14467 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14468 for idx, msg in enumerate(self.op.log_messages):
14469 self.LogInfo("Sending log message %s", idx + 1)
14470 feedback_fn(constants.JQT_MSGPREFIX + msg)
14471 # Report how many test messages have been sent
14472 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14475 raise errors.OpExecError("Opcode failure was requested")
14480 class IAllocator(object):
14481 """IAllocator framework.
14483 An IAllocator instance has three sets of attributes:
14484 - cfg that is needed to query the cluster
14485 - input data (all members of the _KEYS class attribute are required)
14486 - four buffer attributes (in|out_data|text), that represent the
14487 input (to the external script) in text and data structure format,
14488 and the output from it, again in two formats
14489 - the result variables from the script (success, info, nodes) for
14493 # pylint: disable=R0902
14494 # lots of instance attributes
14496 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14498 self.rpc = rpc_runner
14499 # init buffer variables
14500 self.in_text = self.out_text = self.in_data = self.out_data = None
14501 # init all input fields so that pylint is happy
14503 self.memory = self.disks = self.disk_template = None
14504 self.os = self.tags = self.nics = self.vcpus = None
14505 self.hypervisor = None
14506 self.relocate_from = None
14508 self.instances = None
14509 self.evac_mode = None
14510 self.target_groups = []
14512 self.required_nodes = None
14513 # init result fields
14514 self.success = self.info = self.result = None
14517 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14519 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14520 " IAllocator" % self.mode)
14522 keyset = [n for (n, _) in keydata]
14525 if key not in keyset:
14526 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14527 " IAllocator" % key)
14528 setattr(self, key, kwargs[key])
14531 if key not in kwargs:
14532 raise errors.ProgrammerError("Missing input parameter '%s' to"
14533 " IAllocator" % key)
14534 self._BuildInputData(compat.partial(fn, self), keydata)
14536 def _ComputeClusterData(self):
14537 """Compute the generic allocator input data.
14539 This is the data that is independent of the actual operation.
14543 cluster_info = cfg.GetClusterInfo()
14546 "version": constants.IALLOCATOR_VERSION,
14547 "cluster_name": cfg.GetClusterName(),
14548 "cluster_tags": list(cluster_info.GetTags()),
14549 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14550 "ipolicy": cluster_info.ipolicy,
14552 ninfo = cfg.GetAllNodesInfo()
14553 iinfo = cfg.GetAllInstancesInfo().values()
14554 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14557 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14559 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14560 hypervisor_name = self.hypervisor
14561 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14562 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14564 hypervisor_name = cluster_info.primary_hypervisor
14566 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14569 self.rpc.call_all_instances_info(node_list,
14570 cluster_info.enabled_hypervisors)
14572 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14574 config_ndata = self._ComputeBasicNodeData(ninfo)
14575 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14576 i_list, config_ndata)
14577 assert len(data["nodes"]) == len(ninfo), \
14578 "Incomplete node data computed"
14580 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14582 self.in_data = data
14585 def _ComputeNodeGroupData(cfg):
14586 """Compute node groups data.
14589 cluster = cfg.GetClusterInfo()
14590 ng = dict((guuid, {
14591 "name": gdata.name,
14592 "alloc_policy": gdata.alloc_policy,
14593 "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14595 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14600 def _ComputeBasicNodeData(node_cfg):
14601 """Compute global node data.
14604 @returns: a dict of name: (node dict, node config)
14607 # fill in static (config-based) values
14608 node_results = dict((ninfo.name, {
14609 "tags": list(ninfo.GetTags()),
14610 "primary_ip": ninfo.primary_ip,
14611 "secondary_ip": ninfo.secondary_ip,
14612 "offline": ninfo.offline,
14613 "drained": ninfo.drained,
14614 "master_candidate": ninfo.master_candidate,
14615 "group": ninfo.group,
14616 "master_capable": ninfo.master_capable,
14617 "vm_capable": ninfo.vm_capable,
14619 for ninfo in node_cfg.values())
14621 return node_results
14624 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14626 """Compute global node data.
14628 @param node_results: the basic node structures as filled from the config
14631 #TODO(dynmem): compute the right data on MAX and MIN memory
14632 # make a copy of the current dict
14633 node_results = dict(node_results)
14634 for nname, nresult in node_data.items():
14635 assert nname in node_results, "Missing basic data for node %s" % nname
14636 ninfo = node_cfg[nname]
14638 if not (ninfo.offline or ninfo.drained):
14639 nresult.Raise("Can't get data for node %s" % nname)
14640 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14642 remote_info = _MakeLegacyNodeInfo(nresult.payload)
14644 for attr in ["memory_total", "memory_free", "memory_dom0",
14645 "vg_size", "vg_free", "cpu_total"]:
14646 if attr not in remote_info:
14647 raise errors.OpExecError("Node '%s' didn't return attribute"
14648 " '%s'" % (nname, attr))
14649 if not isinstance(remote_info[attr], int):
14650 raise errors.OpExecError("Node '%s' returned invalid value"
14652 (nname, attr, remote_info[attr]))
14653 # compute memory used by primary instances
14654 i_p_mem = i_p_up_mem = 0
14655 for iinfo, beinfo in i_list:
14656 if iinfo.primary_node == nname:
14657 i_p_mem += beinfo[constants.BE_MAXMEM]
14658 if iinfo.name not in node_iinfo[nname].payload:
14661 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14662 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14663 remote_info["memory_free"] -= max(0, i_mem_diff)
14665 if iinfo.admin_state == constants.ADMINST_UP:
14666 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14668 # compute memory used by instances
14670 "total_memory": remote_info["memory_total"],
14671 "reserved_memory": remote_info["memory_dom0"],
14672 "free_memory": remote_info["memory_free"],
14673 "total_disk": remote_info["vg_size"],
14674 "free_disk": remote_info["vg_free"],
14675 "total_cpus": remote_info["cpu_total"],
14676 "i_pri_memory": i_p_mem,
14677 "i_pri_up_memory": i_p_up_mem,
14679 pnr_dyn.update(node_results[nname])
14680 node_results[nname] = pnr_dyn
14682 return node_results
14685 def _ComputeInstanceData(cluster_info, i_list):
14686 """Compute global instance data.
14690 for iinfo, beinfo in i_list:
14692 for nic in iinfo.nics:
14693 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14697 "mode": filled_params[constants.NIC_MODE],
14698 "link": filled_params[constants.NIC_LINK],
14700 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14701 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14702 nic_data.append(nic_dict)
14704 "tags": list(iinfo.GetTags()),
14705 "admin_state": iinfo.admin_state,
14706 "vcpus": beinfo[constants.BE_VCPUS],
14707 "memory": beinfo[constants.BE_MAXMEM],
14709 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14711 "disks": [{constants.IDISK_SIZE: dsk.size,
14712 constants.IDISK_MODE: dsk.mode}
14713 for dsk in iinfo.disks],
14714 "disk_template": iinfo.disk_template,
14715 "hypervisor": iinfo.hypervisor,
14717 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14719 instance_data[iinfo.name] = pir
14721 return instance_data
14723 def _AddNewInstance(self):
14724 """Add new instance data to allocator structure.
14726 This in combination with _AllocatorGetClusterData will create the
14727 correct structure needed as input for the allocator.
14729 The checks for the completeness of the opcode must have already been
14733 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14735 if self.disk_template in constants.DTS_INT_MIRROR:
14736 self.required_nodes = 2
14738 self.required_nodes = 1
14742 "disk_template": self.disk_template,
14745 "vcpus": self.vcpus,
14746 "memory": self.memory,
14747 "disks": self.disks,
14748 "disk_space_total": disk_space,
14750 "required_nodes": self.required_nodes,
14751 "hypervisor": self.hypervisor,
14756 def _AddRelocateInstance(self):
14757 """Add relocate instance data to allocator structure.
14759 This in combination with _IAllocatorGetClusterData will create the
14760 correct structure needed as input for the allocator.
14762 The checks for the completeness of the opcode must have already been
14766 instance = self.cfg.GetInstanceInfo(self.name)
14767 if instance is None:
14768 raise errors.ProgrammerError("Unknown instance '%s' passed to"
14769 " IAllocator" % self.name)
14771 if instance.disk_template not in constants.DTS_MIRRORED:
14772 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14773 errors.ECODE_INVAL)
14775 if instance.disk_template in constants.DTS_INT_MIRROR and \
14776 len(instance.secondary_nodes) != 1:
14777 raise errors.OpPrereqError("Instance has not exactly one secondary node",
14778 errors.ECODE_STATE)
14780 self.required_nodes = 1
14781 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14782 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14786 "disk_space_total": disk_space,
14787 "required_nodes": self.required_nodes,
14788 "relocate_from": self.relocate_from,
14792 def _AddNodeEvacuate(self):
14793 """Get data for node-evacuate requests.
14797 "instances": self.instances,
14798 "evac_mode": self.evac_mode,
14801 def _AddChangeGroup(self):
14802 """Get data for node-evacuate requests.
14806 "instances": self.instances,
14807 "target_groups": self.target_groups,
14810 def _BuildInputData(self, fn, keydata):
14811 """Build input data structures.
14814 self._ComputeClusterData()
14817 request["type"] = self.mode
14818 for keyname, keytype in keydata:
14819 if keyname not in request:
14820 raise errors.ProgrammerError("Request parameter %s is missing" %
14822 val = request[keyname]
14823 if not keytype(val):
14824 raise errors.ProgrammerError("Request parameter %s doesn't pass"
14825 " validation, value %s, expected"
14826 " type %s" % (keyname, val, keytype))
14827 self.in_data["request"] = request
14829 self.in_text = serializer.Dump(self.in_data)
14831 _STRING_LIST = ht.TListOf(ht.TString)
14832 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14833 # pylint: disable=E1101
14834 # Class '...' has no 'OP_ID' member
14835 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14836 opcodes.OpInstanceMigrate.OP_ID,
14837 opcodes.OpInstanceReplaceDisks.OP_ID])
14841 ht.TListOf(ht.TAnd(ht.TIsLength(3),
14842 ht.TItems([ht.TNonEmptyString,
14843 ht.TNonEmptyString,
14844 ht.TListOf(ht.TNonEmptyString),
14847 ht.TListOf(ht.TAnd(ht.TIsLength(2),
14848 ht.TItems([ht.TNonEmptyString,
14851 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14852 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14855 constants.IALLOCATOR_MODE_ALLOC:
14858 ("name", ht.TString),
14859 ("memory", ht.TInt),
14860 ("disks", ht.TListOf(ht.TDict)),
14861 ("disk_template", ht.TString),
14862 ("os", ht.TString),
14863 ("tags", _STRING_LIST),
14864 ("nics", ht.TListOf(ht.TDict)),
14865 ("vcpus", ht.TInt),
14866 ("hypervisor", ht.TString),
14868 constants.IALLOCATOR_MODE_RELOC:
14869 (_AddRelocateInstance,
14870 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14872 constants.IALLOCATOR_MODE_NODE_EVAC:
14873 (_AddNodeEvacuate, [
14874 ("instances", _STRING_LIST),
14875 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14877 constants.IALLOCATOR_MODE_CHG_GROUP:
14878 (_AddChangeGroup, [
14879 ("instances", _STRING_LIST),
14880 ("target_groups", _STRING_LIST),
14884 def Run(self, name, validate=True, call_fn=None):
14885 """Run an instance allocator and return the results.
14888 if call_fn is None:
14889 call_fn = self.rpc.call_iallocator_runner
14891 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14892 result.Raise("Failure while running the iallocator script")
14894 self.out_text = result.payload
14896 self._ValidateResult()
14898 def _ValidateResult(self):
14899 """Process the allocator results.
14901 This will process and if successful save the result in
14902 self.out_data and the other parameters.
14906 rdict = serializer.Load(self.out_text)
14907 except Exception, err:
14908 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14910 if not isinstance(rdict, dict):
14911 raise errors.OpExecError("Can't parse iallocator results: not a dict")
14913 # TODO: remove backwards compatiblity in later versions
14914 if "nodes" in rdict and "result" not in rdict:
14915 rdict["result"] = rdict["nodes"]
14918 for key in "success", "info", "result":
14919 if key not in rdict:
14920 raise errors.OpExecError("Can't parse iallocator results:"
14921 " missing key '%s'" % key)
14922 setattr(self, key, rdict[key])
14924 if not self._result_check(self.result):
14925 raise errors.OpExecError("Iallocator returned invalid result,"
14926 " expected %s, got %s" %
14927 (self._result_check, self.result),
14928 errors.ECODE_INVAL)
14930 if self.mode == constants.IALLOCATOR_MODE_RELOC:
14931 assert self.relocate_from is not None
14932 assert self.required_nodes == 1
14934 node2group = dict((name, ndata["group"])
14935 for (name, ndata) in self.in_data["nodes"].items())
14937 fn = compat.partial(self._NodesToGroups, node2group,
14938 self.in_data["nodegroups"])
14940 instance = self.cfg.GetInstanceInfo(self.name)
14941 request_groups = fn(self.relocate_from + [instance.primary_node])
14942 result_groups = fn(rdict["result"] + [instance.primary_node])
14944 if self.success and not set(result_groups).issubset(request_groups):
14945 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14946 " differ from original groups (%s)" %
14947 (utils.CommaJoin(result_groups),
14948 utils.CommaJoin(request_groups)))
14950 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14951 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14953 self.out_data = rdict
14956 def _NodesToGroups(node2group, groups, nodes):
14957 """Returns a list of unique group names for a list of nodes.
14959 @type node2group: dict
14960 @param node2group: Map from node name to group UUID
14962 @param groups: Group information
14964 @param nodes: Node names
14971 group_uuid = node2group[node]
14973 # Ignore unknown node
14977 group = groups[group_uuid]
14979 # Can't find group, let's use UUID
14980 group_name = group_uuid
14982 group_name = group["name"]
14984 result.add(group_name)
14986 return sorted(result)
14989 class LUTestAllocator(NoHooksLU):
14990 """Run allocator tests.
14992 This LU runs the allocator tests
14995 def CheckPrereq(self):
14996 """Check prerequisites.
14998 This checks the opcode parameters depending on the director and mode test.
15001 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15002 for attr in ["memory", "disks", "disk_template",
15003 "os", "tags", "nics", "vcpus"]:
15004 if not hasattr(self.op, attr):
15005 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15006 attr, errors.ECODE_INVAL)
15007 iname = self.cfg.ExpandInstanceName(self.op.name)
15008 if iname is not None:
15009 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15010 iname, errors.ECODE_EXISTS)
15011 if not isinstance(self.op.nics, list):
15012 raise errors.OpPrereqError("Invalid parameter 'nics'",
15013 errors.ECODE_INVAL)
15014 if not isinstance(self.op.disks, list):
15015 raise errors.OpPrereqError("Invalid parameter 'disks'",
15016 errors.ECODE_INVAL)
15017 for row in self.op.disks:
15018 if (not isinstance(row, dict) or
15019 constants.IDISK_SIZE not in row or
15020 not isinstance(row[constants.IDISK_SIZE], int) or
15021 constants.IDISK_MODE not in row or
15022 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15023 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15024 " parameter", errors.ECODE_INVAL)
15025 if self.op.hypervisor is None:
15026 self.op.hypervisor = self.cfg.GetHypervisorType()
15027 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15028 fname = _ExpandInstanceName(self.cfg, self.op.name)
15029 self.op.name = fname
15030 self.relocate_from = \
15031 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15032 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15033 constants.IALLOCATOR_MODE_NODE_EVAC):
15034 if not self.op.instances:
15035 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15036 self.op.instances = _GetWantedInstances(self, self.op.instances)
15038 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15039 self.op.mode, errors.ECODE_INVAL)
15041 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15042 if self.op.allocator is None:
15043 raise errors.OpPrereqError("Missing allocator name",
15044 errors.ECODE_INVAL)
15045 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15046 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15047 self.op.direction, errors.ECODE_INVAL)
15049 def Exec(self, feedback_fn):
15050 """Run the allocator test.
15053 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15054 ial = IAllocator(self.cfg, self.rpc,
15057 memory=self.op.memory,
15058 disks=self.op.disks,
15059 disk_template=self.op.disk_template,
15063 vcpus=self.op.vcpus,
15064 hypervisor=self.op.hypervisor,
15066 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15067 ial = IAllocator(self.cfg, self.rpc,
15070 relocate_from=list(self.relocate_from),
15072 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15073 ial = IAllocator(self.cfg, self.rpc,
15075 instances=self.op.instances,
15076 target_groups=self.op.target_groups)
15077 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15078 ial = IAllocator(self.cfg, self.rpc,
15080 instances=self.op.instances,
15081 evac_mode=self.op.evac_mode)
15083 raise errors.ProgrammerError("Uncatched mode %s in"
15084 " LUTestAllocator.Exec", self.op.mode)
15086 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15087 result = ial.in_text
15089 ial.Run(self.op.allocator, validate=False)
15090 result = ial.out_text
15094 #: Query type implementations
15096 constants.QR_INSTANCE: _InstanceQuery,
15097 constants.QR_NODE: _NodeQuery,
15098 constants.QR_GROUP: _GroupQuery,
15099 constants.QR_OS: _OsQuery,
15102 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15105 def _GetQueryImplementation(name):
15106 """Returns the implemtnation for a query type.
15108 @param name: Query type, must be one of L{constants.QR_VIA_OP}
15112 return _QUERY_IMPL[name]
15114 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15115 errors.ECODE_INVAL)