4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
62 from ganeti import rpc
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
141 # Dicts used to declare locking needs to mcpu
142 self.needed_locks = None
143 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.remove_locks = {}
146 # Used to force good behavior when calling helper functions
147 self.recalculate_locks = {}
149 self.Log = processor.Log # pylint: disable=C0103
150 self.LogWarning = processor.LogWarning # pylint: disable=C0103
151 self.LogInfo = processor.LogInfo # pylint: disable=C0103
152 self.LogStep = processor.LogStep # pylint: disable=C0103
153 # support for dry-run
154 self.dry_run_result = None
155 # support for generic debug attribute
156 if (not hasattr(self.op, "debug_level") or
157 not isinstance(self.op.debug_level, int)):
158 self.op.debug_level = 0
163 # Validate opcode parameters and set defaults
164 self.op.Validate(True)
166 self.CheckArguments()
168 def CheckArguments(self):
169 """Check syntactic validity for the opcode arguments.
171 This method is for doing a simple syntactic check and ensure
172 validity of opcode parameters, without any cluster-related
173 checks. While the same can be accomplished in ExpandNames and/or
174 CheckPrereq, doing these separate is better because:
176 - ExpandNames is left as as purely a lock-related function
177 - CheckPrereq is run after we have acquired locks (and possible
180 The function is allowed to change the self.op attribute so that
181 later methods can no longer worry about missing parameters.
186 def ExpandNames(self):
187 """Expand names for this LU.
189 This method is called before starting to execute the opcode, and it should
190 update all the parameters of the opcode to their canonical form (e.g. a
191 short node name must be fully expanded after this method has successfully
192 completed). This way locking, hooks, logging, etc. can work correctly.
194 LUs which implement this method must also populate the self.needed_locks
195 member, as a dict with lock levels as keys, and a list of needed lock names
198 - use an empty dict if you don't need any lock
199 - if you don't need any lock at a particular level omit that
200 level (note that in this case C{DeclareLocks} won't be called
201 at all for that level)
202 - if you need locks at a level, but you can't calculate it in
203 this function, initialise that level with an empty list and do
204 further processing in L{LogicalUnit.DeclareLocks} (see that
205 function's docstring)
206 - don't put anything for the BGL level
207 - if you want all locks at a level use L{locking.ALL_SET} as a value
209 If you need to share locks (rather than acquire them exclusively) at one
210 level you can modify self.share_locks, setting a true value (usually 1) for
211 that level. By default locks are not shared.
213 This function can also define a list of tasklets, which then will be
214 executed in order instead of the usual LU-level CheckPrereq and Exec
215 functions, if those are not defined by the LU.
219 # Acquire all nodes and one instance
220 self.needed_locks = {
221 locking.LEVEL_NODE: locking.ALL_SET,
222 locking.LEVEL_INSTANCE: ['instance1.example.com'],
224 # Acquire just two nodes
225 self.needed_locks = {
226 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
229 self.needed_locks = {} # No, you can't leave it to the default value None
232 # The implementation of this method is mandatory only if the new LU is
233 # concurrent, so that old LUs don't need to be changed all at the same
236 self.needed_locks = {} # Exclusive LUs don't need locks.
238 raise NotImplementedError
240 def DeclareLocks(self, level):
241 """Declare LU locking needs for a level
243 While most LUs can just declare their locking needs at ExpandNames time,
244 sometimes there's the need to calculate some locks after having acquired
245 the ones before. This function is called just before acquiring locks at a
246 particular level, but after acquiring the ones at lower levels, and permits
247 such calculations. It can be used to modify self.needed_locks, and by
248 default it does nothing.
250 This function is only called if you have something already set in
251 self.needed_locks for the level.
253 @param level: Locking level which is going to be locked
254 @type level: member of L{ganeti.locking.LEVELS}
258 def CheckPrereq(self):
259 """Check prerequisites for this LU.
261 This method should check that the prerequisites for the execution
262 of this LU are fulfilled. It can do internode communication, but
263 it should be idempotent - no cluster or system changes are
266 The method should raise errors.OpPrereqError in case something is
267 not fulfilled. Its return value is ignored.
269 This method should also update all the parameters of the opcode to
270 their canonical form if it hasn't been done by ExpandNames before.
273 if self.tasklets is not None:
274 for (idx, tl) in enumerate(self.tasklets):
275 logging.debug("Checking prerequisites for tasklet %s/%s",
276 idx + 1, len(self.tasklets))
281 def Exec(self, feedback_fn):
284 This method should implement the actual work. It should raise
285 errors.OpExecError for failures that are somewhat dealt with in
289 if self.tasklets is not None:
290 for (idx, tl) in enumerate(self.tasklets):
291 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
294 raise NotImplementedError
296 def BuildHooksEnv(self):
297 """Build hooks environment for this LU.
300 @return: Dictionary containing the environment that will be used for
301 running the hooks for this LU. The keys of the dict must not be prefixed
302 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
303 will extend the environment with additional variables. If no environment
304 should be defined, an empty dictionary should be returned (not C{None}).
305 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
309 raise NotImplementedError
311 def BuildHooksNodes(self):
312 """Build list of nodes to run LU's hooks.
314 @rtype: tuple; (list, list)
315 @return: Tuple containing a list of node names on which the hook
316 should run before the execution and a list of node names on which the
317 hook should run after the execution. No nodes should be returned as an
318 empty list (and not None).
319 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
323 raise NotImplementedError
325 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
326 """Notify the LU about the results of its hooks.
328 This method is called every time a hooks phase is executed, and notifies
329 the Logical Unit about the hooks' result. The LU can then use it to alter
330 its result based on the hooks. By default the method does nothing and the
331 previous result is passed back unchanged but any LU can define it if it
332 wants to use the local cluster hook-scripts somehow.
334 @param phase: one of L{constants.HOOKS_PHASE_POST} or
335 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
336 @param hook_results: the results of the multi-node hooks rpc call
337 @param feedback_fn: function used send feedback back to the caller
338 @param lu_result: the previous Exec result this LU had, or None
340 @return: the new Exec result, based on the previous result
344 # API must be kept, thus we ignore the unused argument and could
345 # be a function warnings
346 # pylint: disable=W0613,R0201
349 def _ExpandAndLockInstance(self):
350 """Helper function to expand and lock an instance.
352 Many LUs that work on an instance take its name in self.op.instance_name
353 and need to expand it and then declare the expanded name for locking. This
354 function does it, and then updates self.op.instance_name to the expanded
355 name. It also initializes needed_locks as a dict, if this hasn't been done
359 if self.needed_locks is None:
360 self.needed_locks = {}
362 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
363 "_ExpandAndLockInstance called with instance-level locks set"
364 self.op.instance_name = _ExpandInstanceName(self.cfg,
365 self.op.instance_name)
366 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
368 def _LockInstancesNodes(self, primary_only=False,
369 level=locking.LEVEL_NODE):
370 """Helper function to declare instances' nodes for locking.
372 This function should be called after locking one or more instances to lock
373 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
374 with all primary or secondary nodes for instances already locked and
375 present in self.needed_locks[locking.LEVEL_INSTANCE].
377 It should be called from DeclareLocks, and for safety only works if
378 self.recalculate_locks[locking.LEVEL_NODE] is set.
380 In the future it may grow parameters to just lock some instance's nodes, or
381 to just lock primaries or secondary nodes, if needed.
383 If should be called in DeclareLocks in a way similar to::
385 if level == locking.LEVEL_NODE:
386 self._LockInstancesNodes()
388 @type primary_only: boolean
389 @param primary_only: only lock primary nodes of locked instances
390 @param level: Which lock level to use for locking nodes
393 assert level in self.recalculate_locks, \
394 "_LockInstancesNodes helper function called with no nodes to recalculate"
396 # TODO: check if we're really been called with the instance locks held
398 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
399 # future we might want to have different behaviors depending on the value
400 # of self.recalculate_locks[locking.LEVEL_NODE]
402 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
403 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
404 wanted_nodes.append(instance.primary_node)
406 wanted_nodes.extend(instance.secondary_nodes)
408 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
409 self.needed_locks[level] = wanted_nodes
410 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
411 self.needed_locks[level].extend(wanted_nodes)
413 raise errors.ProgrammerError("Unknown recalculation mode")
415 del self.recalculate_locks[level]
418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
419 """Simple LU which runs no hooks.
421 This LU is intended as a parent for other LogicalUnits which will
422 run no hooks, in order to reduce duplicate code.
428 def BuildHooksEnv(self):
429 """Empty BuildHooksEnv for NoHooksLu.
431 This just raises an error.
434 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
436 def BuildHooksNodes(self):
437 """Empty BuildHooksNodes for NoHooksLU.
440 raise AssertionError("BuildHooksNodes called for NoHooksLU")
444 """Tasklet base class.
446 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
447 they can mix legacy code with tasklets. Locking needs to be done in the LU,
448 tasklets know nothing about locks.
450 Subclasses must follow these rules:
451 - Implement CheckPrereq
455 def __init__(self, lu):
462 def CheckPrereq(self):
463 """Check prerequisites for this tasklets.
465 This method should check whether the prerequisites for the execution of
466 this tasklet are fulfilled. It can do internode communication, but it
467 should be idempotent - no cluster or system changes are allowed.
469 The method should raise errors.OpPrereqError in case something is not
470 fulfilled. Its return value is ignored.
472 This method should also update all parameters to their canonical form if it
473 hasn't been done before.
478 def Exec(self, feedback_fn):
479 """Execute the tasklet.
481 This method should implement the actual work. It should raise
482 errors.OpExecError for failures that are somewhat dealt with in code, or
486 raise NotImplementedError
490 """Base for query utility classes.
493 #: Attribute holding field definitions
496 def __init__(self, qfilter, fields, use_locking):
497 """Initializes this class.
500 self.use_locking = use_locking
502 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
504 self.requested_data = self.query.RequestedData()
505 self.names = self.query.RequestedNames()
507 # Sort only if no names were requested
508 self.sort_by_name = not self.names
510 self.do_locking = None
513 def _GetNames(self, lu, all_names, lock_level):
514 """Helper function to determine names asked for in the query.
518 names = lu.owned_locks(lock_level)
522 if self.wanted == locking.ALL_SET:
523 assert not self.names
524 # caller didn't specify names, so ordering is not important
525 return utils.NiceSort(names)
527 # caller specified names and we must keep the same order
529 assert not self.do_locking or lu.glm.is_owned(lock_level)
531 missing = set(self.wanted).difference(names)
533 raise errors.OpExecError("Some items were removed before retrieving"
534 " their data: %s" % missing)
536 # Return expanded names
539 def ExpandNames(self, lu):
540 """Expand names for this query.
542 See L{LogicalUnit.ExpandNames}.
545 raise NotImplementedError()
547 def DeclareLocks(self, lu, level):
548 """Declare locks for this query.
550 See L{LogicalUnit.DeclareLocks}.
553 raise NotImplementedError()
555 def _GetQueryData(self, lu):
556 """Collects all data for this query.
558 @return: Query data object
561 raise NotImplementedError()
563 def NewStyleQuery(self, lu):
564 """Collect data and execute query.
567 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
568 sort_by_name=self.sort_by_name)
570 def OldStyleQuery(self, lu):
571 """Collect data and execute query.
574 return self.query.OldStyleQuery(self._GetQueryData(lu),
575 sort_by_name=self.sort_by_name)
579 """Returns a dict declaring all lock levels shared.
582 return dict.fromkeys(locking.LEVELS, 1)
585 def _MakeLegacyNodeInfo(data):
586 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
588 Converts the data into a single dictionary. This is fine for most use cases,
589 but some require information from more than one volume group or hypervisor.
592 (bootid, (vg_info, ), (hv_info, )) = data
594 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
599 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
600 """Checks if the owned node groups are still correct for an instance.
602 @type cfg: L{config.ConfigWriter}
603 @param cfg: The cluster configuration
604 @type instance_name: string
605 @param instance_name: Instance name
606 @type owned_groups: set or frozenset
607 @param owned_groups: List of currently owned node groups
610 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
612 if not owned_groups.issuperset(inst_groups):
613 raise errors.OpPrereqError("Instance %s's node groups changed since"
614 " locks were acquired, current groups are"
615 " are '%s', owning groups '%s'; retry the"
618 utils.CommaJoin(inst_groups),
619 utils.CommaJoin(owned_groups)),
625 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
626 """Checks if the instances in a node group are still correct.
628 @type cfg: L{config.ConfigWriter}
629 @param cfg: The cluster configuration
630 @type group_uuid: string
631 @param group_uuid: Node group UUID
632 @type owned_instances: set or frozenset
633 @param owned_instances: List of currently owned instances
636 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
637 if owned_instances != wanted_instances:
638 raise errors.OpPrereqError("Instances in node group '%s' changed since"
639 " locks were acquired, wanted '%s', have '%s';"
640 " retry the operation" %
642 utils.CommaJoin(wanted_instances),
643 utils.CommaJoin(owned_instances)),
646 return wanted_instances
649 def _SupportsOob(cfg, node):
650 """Tells if node supports OOB.
652 @type cfg: L{config.ConfigWriter}
653 @param cfg: The cluster configuration
654 @type node: L{objects.Node}
655 @param node: The node
656 @return: The OOB script if supported or an empty string otherwise
659 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
662 def _GetWantedNodes(lu, nodes):
663 """Returns list of checked and expanded node names.
665 @type lu: L{LogicalUnit}
666 @param lu: the logical unit on whose behalf we execute
668 @param nodes: list of node names or None for all nodes
670 @return: the list of nodes, sorted
671 @raise errors.ProgrammerError: if the nodes parameter is wrong type
675 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
677 return utils.NiceSort(lu.cfg.GetNodeList())
680 def _GetWantedInstances(lu, instances):
681 """Returns list of checked and expanded instance names.
683 @type lu: L{LogicalUnit}
684 @param lu: the logical unit on whose behalf we execute
685 @type instances: list
686 @param instances: list of instance names or None for all instances
688 @return: the list of instances, sorted
689 @raise errors.OpPrereqError: if the instances parameter is wrong type
690 @raise errors.OpPrereqError: if any of the passed instances is not found
694 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
696 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
700 def _GetUpdatedParams(old_params, update_dict,
701 use_default=True, use_none=False):
702 """Return the new version of a parameter dictionary.
704 @type old_params: dict
705 @param old_params: old parameters
706 @type update_dict: dict
707 @param update_dict: dict containing new parameter values, or
708 constants.VALUE_DEFAULT to reset the parameter to its default
710 @param use_default: boolean
711 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
712 values as 'to be deleted' values
713 @param use_none: boolean
714 @type use_none: whether to recognise C{None} values as 'to be
717 @return: the new parameter dictionary
720 params_copy = copy.deepcopy(old_params)
721 for key, val in update_dict.iteritems():
722 if ((use_default and val == constants.VALUE_DEFAULT) or
723 (use_none and val is None)):
729 params_copy[key] = val
733 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
734 """Return the new version of a instance policy.
736 @param group_policy: whether this policy applies to a group and thus
737 we should support removal of policy entries
740 use_none = use_default = group_policy
741 ipolicy = copy.deepcopy(old_ipolicy)
742 for key, value in new_ipolicy.items():
743 if key not in constants.IPOLICY_ALL_KEYS:
744 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
746 if key in constants.IPOLICY_ISPECS:
747 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
748 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
750 use_default=use_default)
752 if not value or value == [constants.VALUE_DEFAULT]:
756 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
757 " on the cluster'" % key,
760 if key in constants.IPOLICY_PARAMETERS:
761 # FIXME: we assume all such values are float
763 ipolicy[key] = float(value)
764 except (TypeError, ValueError), err:
765 raise errors.OpPrereqError("Invalid value for attribute"
766 " '%s': '%s', error: %s" %
767 (key, value, err), errors.ECODE_INVAL)
769 # FIXME: we assume all others are lists; this should be redone
771 ipolicy[key] = list(value)
773 objects.InstancePolicy.CheckParameterSyntax(ipolicy)
774 except errors.ConfigurationError, err:
775 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
780 def _UpdateAndVerifySubDict(base, updates, type_check):
781 """Updates and verifies a dict with sub dicts of the same type.
783 @param base: The dict with the old data
784 @param updates: The dict with the new data
785 @param type_check: Dict suitable to ForceDictType to verify correct types
786 @returns: A new dict with updated and verified values
790 new = _GetUpdatedParams(old, value)
791 utils.ForceDictType(new, type_check)
794 ret = copy.deepcopy(base)
795 ret.update(dict((key, fn(base.get(key, {}), value))
796 for key, value in updates.items()))
800 def _MergeAndVerifyHvState(op_input, obj_input):
801 """Combines the hv state from an opcode with the one of the object
803 @param op_input: The input dict from the opcode
804 @param obj_input: The input dict from the objects
805 @return: The verified and updated dict
809 invalid_hvs = set(op_input) - constants.HYPER_TYPES
811 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
812 " %s" % utils.CommaJoin(invalid_hvs),
814 if obj_input is None:
816 type_check = constants.HVSTS_PARAMETER_TYPES
817 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
822 def _MergeAndVerifyDiskState(op_input, obj_input):
823 """Combines the disk state from an opcode with the one of the object
825 @param op_input: The input dict from the opcode
826 @param obj_input: The input dict from the objects
827 @return: The verified and updated dict
830 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
832 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
833 utils.CommaJoin(invalid_dst),
835 type_check = constants.DSS_PARAMETER_TYPES
836 if obj_input is None:
838 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
840 for key, value in op_input.items())
845 def _ReleaseLocks(lu, level, names=None, keep=None):
846 """Releases locks owned by an LU.
848 @type lu: L{LogicalUnit}
849 @param level: Lock level
850 @type names: list or None
851 @param names: Names of locks to release
852 @type keep: list or None
853 @param keep: Names of locks to retain
856 assert not (keep is not None and names is not None), \
857 "Only one of the 'names' and the 'keep' parameters can be given"
859 if names is not None:
860 should_release = names.__contains__
862 should_release = lambda name: name not in keep
864 should_release = None
866 owned = lu.owned_locks(level)
868 # Not owning any lock at this level, do nothing
875 # Determine which locks to release
877 if should_release(name):
882 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
884 # Release just some locks
885 lu.glm.release(level, names=release)
887 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
890 lu.glm.release(level)
892 assert not lu.glm.is_owned(level), "No locks should be owned"
895 def _MapInstanceDisksToNodes(instances):
896 """Creates a map from (node, volume) to instance name.
898 @type instances: list of L{objects.Instance}
899 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
902 return dict(((node, vol), inst.name)
903 for inst in instances
904 for (node, vols) in inst.MapLVsByNode().items()
908 def _RunPostHook(lu, node_name):
909 """Runs the post-hook for an opcode on a single node.
912 hm = lu.proc.BuildHooksManager(lu)
914 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
916 # pylint: disable=W0702
917 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
920 def _CheckOutputFields(static, dynamic, selected):
921 """Checks whether all selected fields are valid.
923 @type static: L{utils.FieldSet}
924 @param static: static fields set
925 @type dynamic: L{utils.FieldSet}
926 @param dynamic: dynamic fields set
933 delta = f.NonMatching(selected)
935 raise errors.OpPrereqError("Unknown output fields selected: %s"
936 % ",".join(delta), errors.ECODE_INVAL)
939 def _CheckGlobalHvParams(params):
940 """Validates that given hypervisor params are not global ones.
942 This will ensure that instances don't get customised versions of
946 used_globals = constants.HVC_GLOBALS.intersection(params)
948 msg = ("The following hypervisor parameters are global and cannot"
949 " be customized at instance level, please modify them at"
950 " cluster level: %s" % utils.CommaJoin(used_globals))
951 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
954 def _CheckNodeOnline(lu, node, msg=None):
955 """Ensure that a given node is online.
957 @param lu: the LU on behalf of which we make the check
958 @param node: the node to check
959 @param msg: if passed, should be a message to replace the default one
960 @raise errors.OpPrereqError: if the node is offline
964 msg = "Can't use offline node"
965 if lu.cfg.GetNodeInfo(node).offline:
966 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
969 def _CheckNodeNotDrained(lu, node):
970 """Ensure that a given node is not drained.
972 @param lu: the LU on behalf of which we make the check
973 @param node: the node to check
974 @raise errors.OpPrereqError: if the node is drained
977 if lu.cfg.GetNodeInfo(node).drained:
978 raise errors.OpPrereqError("Can't use drained node %s" % node,
982 def _CheckNodeVmCapable(lu, node):
983 """Ensure that a given node is vm capable.
985 @param lu: the LU on behalf of which we make the check
986 @param node: the node to check
987 @raise errors.OpPrereqError: if the node is not vm capable
990 if not lu.cfg.GetNodeInfo(node).vm_capable:
991 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
995 def _CheckNodeHasOS(lu, node, os_name, force_variant):
996 """Ensure that a node supports a given OS.
998 @param lu: the LU on behalf of which we make the check
999 @param node: the node to check
1000 @param os_name: the OS to query about
1001 @param force_variant: whether to ignore variant errors
1002 @raise errors.OpPrereqError: if the node is not supporting the OS
1005 result = lu.rpc.call_os_get(node, os_name)
1006 result.Raise("OS '%s' not in supported OS list for node %s" %
1008 prereq=True, ecode=errors.ECODE_INVAL)
1009 if not force_variant:
1010 _CheckOSVariant(result.payload, os_name)
1013 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1014 """Ensure that a node has the given secondary ip.
1016 @type lu: L{LogicalUnit}
1017 @param lu: the LU on behalf of which we make the check
1019 @param node: the node to check
1020 @type secondary_ip: string
1021 @param secondary_ip: the ip to check
1022 @type prereq: boolean
1023 @param prereq: whether to throw a prerequisite or an execute error
1024 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1025 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1028 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1029 result.Raise("Failure checking secondary ip on node %s" % node,
1030 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1031 if not result.payload:
1032 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1033 " please fix and re-run this command" % secondary_ip)
1035 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1037 raise errors.OpExecError(msg)
1040 def _GetClusterDomainSecret():
1041 """Reads the cluster domain secret.
1044 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1048 def _CheckInstanceState(lu, instance, req_states, msg=None):
1049 """Ensure that an instance is in one of the required states.
1051 @param lu: the LU on behalf of which we make the check
1052 @param instance: the instance to check
1053 @param msg: if passed, should be a message to replace the default one
1054 @raise errors.OpPrereqError: if the instance is not in the required state
1058 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1059 if instance.admin_state not in req_states:
1060 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1061 (instance.name, instance.admin_state, msg),
1064 if constants.ADMINST_UP not in req_states:
1065 pnode = instance.primary_node
1066 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1067 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1068 prereq=True, ecode=errors.ECODE_ENVIRON)
1070 if instance.name in ins_l.payload:
1071 raise errors.OpPrereqError("Instance %s is running, %s" %
1072 (instance.name, msg), errors.ECODE_STATE)
1075 def _ComputeMinMaxSpec(name, ipolicy, value):
1076 """Computes if value is in the desired range.
1078 @param name: name of the parameter for which we perform the check
1079 @param ipolicy: dictionary containing min, max and std values
1080 @param value: actual value that we want to use
1081 @return: None or element not meeting the criteria
1085 if value in [None, constants.VALUE_AUTO]:
1087 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1088 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1089 if value > max_v or min_v > value:
1090 return ("%s value %s is not in range [%s, %s]" %
1091 (name, value, min_v, max_v))
1095 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1096 nic_count, disk_sizes,
1097 _compute_fn=_ComputeMinMaxSpec):
1098 """Verifies ipolicy against provided specs.
1101 @param ipolicy: The ipolicy
1103 @param mem_size: The memory size
1104 @type cpu_count: int
1105 @param cpu_count: Used cpu cores
1106 @type disk_count: int
1107 @param disk_count: Number of disks used
1108 @type nic_count: int
1109 @param nic_count: Number of nics used
1110 @type disk_sizes: list of ints
1111 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1112 @param _compute_fn: The compute function (unittest only)
1113 @return: A list of violations, or an empty list of no violations are found
1116 assert disk_count == len(disk_sizes)
1119 (constants.ISPEC_MEM_SIZE, mem_size),
1120 (constants.ISPEC_CPU_COUNT, cpu_count),
1121 (constants.ISPEC_DISK_COUNT, disk_count),
1122 (constants.ISPEC_NIC_COUNT, nic_count),
1123 ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
1126 (_compute_fn(name, ipolicy, value)
1127 for (name, value) in test_settings))
1130 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1131 _compute_fn=_ComputeIPolicySpecViolation):
1132 """Compute if instance meets the specs of ipolicy.
1135 @param ipolicy: The ipolicy to verify against
1136 @type instance: L{objects.Instance}
1137 @param instance: The instance to verify
1138 @param _compute_fn: The function to verify ipolicy (unittest only)
1139 @see: L{_ComputeIPolicySpecViolation}
1142 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1143 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1144 disk_count = len(instance.disks)
1145 disk_sizes = [disk.size for disk in instance.disks]
1146 nic_count = len(instance.nics)
1148 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1152 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1153 _compute_fn=_ComputeIPolicySpecViolation):
1154 """Compute if instance specs meets the specs of ipolicy.
1157 @param ipolicy: The ipolicy to verify against
1158 @param instance_spec: dict
1159 @param instance_spec: The instance spec to verify
1160 @param _compute_fn: The function to verify ipolicy (unittest only)
1161 @see: L{_ComputeIPolicySpecViolation}
1164 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1165 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1166 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1167 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1168 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1170 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1174 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1176 _compute_fn=_ComputeIPolicyInstanceViolation):
1177 """Compute if instance meets the specs of the new target group.
1179 @param ipolicy: The ipolicy to verify
1180 @param instance: The instance object to verify
1181 @param current_group: The current group of the instance
1182 @param target_group: The new group of the instance
1183 @param _compute_fn: The function to verify ipolicy (unittest only)
1184 @see: L{_ComputeIPolicySpecViolation}
1187 if current_group == target_group:
1190 return _compute_fn(ipolicy, instance)
1193 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1194 _compute_fn=_ComputeIPolicyNodeViolation):
1195 """Checks that the target node is correct in terms of instance policy.
1197 @param ipolicy: The ipolicy to verify
1198 @param instance: The instance object to verify
1199 @param node: The new node to relocate
1200 @param ignore: Ignore violations of the ipolicy
1201 @param _compute_fn: The function to verify ipolicy (unittest only)
1202 @see: L{_ComputeIPolicySpecViolation}
1205 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1206 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1209 msg = ("Instance does not meet target node group's (%s) instance"
1210 " policy: %s") % (node.group, utils.CommaJoin(res))
1214 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1217 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1218 """Computes a set of any instances that would violate the new ipolicy.
1220 @param old_ipolicy: The current (still in-place) ipolicy
1221 @param new_ipolicy: The new (to become) ipolicy
1222 @param instances: List of instances to verify
1223 @return: A list of instances which violates the new ipolicy but did not before
1226 return (_ComputeViolatingInstances(old_ipolicy, instances) -
1227 _ComputeViolatingInstances(new_ipolicy, instances))
1230 def _ExpandItemName(fn, name, kind):
1231 """Expand an item name.
1233 @param fn: the function to use for expansion
1234 @param name: requested item name
1235 @param kind: text description ('Node' or 'Instance')
1236 @return: the resolved (full) name
1237 @raise errors.OpPrereqError: if the item is not found
1240 full_name = fn(name)
1241 if full_name is None:
1242 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1247 def _ExpandNodeName(cfg, name):
1248 """Wrapper over L{_ExpandItemName} for nodes."""
1249 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1252 def _ExpandInstanceName(cfg, name):
1253 """Wrapper over L{_ExpandItemName} for instance."""
1254 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1257 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1258 minmem, maxmem, vcpus, nics, disk_template, disks,
1259 bep, hvp, hypervisor_name, tags):
1260 """Builds instance related env variables for hooks
1262 This builds the hook environment from individual variables.
1265 @param name: the name of the instance
1266 @type primary_node: string
1267 @param primary_node: the name of the instance's primary node
1268 @type secondary_nodes: list
1269 @param secondary_nodes: list of secondary nodes as strings
1270 @type os_type: string
1271 @param os_type: the name of the instance's OS
1272 @type status: string
1273 @param status: the desired status of the instance
1274 @type minmem: string
1275 @param minmem: the minimum memory size of the instance
1276 @type maxmem: string
1277 @param maxmem: the maximum memory size of the instance
1279 @param vcpus: the count of VCPUs the instance has
1281 @param nics: list of tuples (ip, mac, mode, link) representing
1282 the NICs the instance has
1283 @type disk_template: string
1284 @param disk_template: the disk template of the instance
1286 @param disks: the list of (size, mode) pairs
1288 @param bep: the backend parameters for the instance
1290 @param hvp: the hypervisor parameters for the instance
1291 @type hypervisor_name: string
1292 @param hypervisor_name: the hypervisor for the instance
1294 @param tags: list of instance tags as strings
1296 @return: the hook environment for this instance
1301 "INSTANCE_NAME": name,
1302 "INSTANCE_PRIMARY": primary_node,
1303 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1304 "INSTANCE_OS_TYPE": os_type,
1305 "INSTANCE_STATUS": status,
1306 "INSTANCE_MINMEM": minmem,
1307 "INSTANCE_MAXMEM": maxmem,
1308 # TODO(2.7) remove deprecated "memory" value
1309 "INSTANCE_MEMORY": maxmem,
1310 "INSTANCE_VCPUS": vcpus,
1311 "INSTANCE_DISK_TEMPLATE": disk_template,
1312 "INSTANCE_HYPERVISOR": hypervisor_name,
1315 nic_count = len(nics)
1316 for idx, (ip, mac, mode, link) in enumerate(nics):
1319 env["INSTANCE_NIC%d_IP" % idx] = ip
1320 env["INSTANCE_NIC%d_MAC" % idx] = mac
1321 env["INSTANCE_NIC%d_MODE" % idx] = mode
1322 env["INSTANCE_NIC%d_LINK" % idx] = link
1323 if mode == constants.NIC_MODE_BRIDGED:
1324 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1328 env["INSTANCE_NIC_COUNT"] = nic_count
1331 disk_count = len(disks)
1332 for idx, (size, mode) in enumerate(disks):
1333 env["INSTANCE_DISK%d_SIZE" % idx] = size
1334 env["INSTANCE_DISK%d_MODE" % idx] = mode
1338 env["INSTANCE_DISK_COUNT"] = disk_count
1343 env["INSTANCE_TAGS"] = " ".join(tags)
1345 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1346 for key, value in source.items():
1347 env["INSTANCE_%s_%s" % (kind, key)] = value
1352 def _NICListToTuple(lu, nics):
1353 """Build a list of nic information tuples.
1355 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1356 value in LUInstanceQueryData.
1358 @type lu: L{LogicalUnit}
1359 @param lu: the logical unit on whose behalf we execute
1360 @type nics: list of L{objects.NIC}
1361 @param nics: list of nics to convert to hooks tuples
1365 cluster = lu.cfg.GetClusterInfo()
1369 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1370 mode = filled_params[constants.NIC_MODE]
1371 link = filled_params[constants.NIC_LINK]
1372 hooks_nics.append((ip, mac, mode, link))
1376 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1377 """Builds instance related env variables for hooks from an object.
1379 @type lu: L{LogicalUnit}
1380 @param lu: the logical unit on whose behalf we execute
1381 @type instance: L{objects.Instance}
1382 @param instance: the instance for which we should build the
1384 @type override: dict
1385 @param override: dictionary with key/values that will override
1388 @return: the hook environment dictionary
1391 cluster = lu.cfg.GetClusterInfo()
1392 bep = cluster.FillBE(instance)
1393 hvp = cluster.FillHV(instance)
1395 "name": instance.name,
1396 "primary_node": instance.primary_node,
1397 "secondary_nodes": instance.secondary_nodes,
1398 "os_type": instance.os,
1399 "status": instance.admin_state,
1400 "maxmem": bep[constants.BE_MAXMEM],
1401 "minmem": bep[constants.BE_MINMEM],
1402 "vcpus": bep[constants.BE_VCPUS],
1403 "nics": _NICListToTuple(lu, instance.nics),
1404 "disk_template": instance.disk_template,
1405 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1408 "hypervisor_name": instance.hypervisor,
1409 "tags": instance.tags,
1412 args.update(override)
1413 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1416 def _AdjustCandidatePool(lu, exceptions):
1417 """Adjust the candidate pool after node operations.
1420 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1422 lu.LogInfo("Promoted nodes to master candidate role: %s",
1423 utils.CommaJoin(node.name for node in mod_list))
1424 for name in mod_list:
1425 lu.context.ReaddNode(name)
1426 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1428 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1432 def _DecideSelfPromotion(lu, exceptions=None):
1433 """Decide whether I should promote myself as a master candidate.
1436 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1437 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1438 # the new node will increase mc_max with one, so:
1439 mc_should = min(mc_should + 1, cp_size)
1440 return mc_now < mc_should
1443 def _CalculateGroupIPolicy(cluster, group):
1444 """Calculate instance policy for group.
1447 return cluster.SimpleFillIPolicy(group.ipolicy)
1450 def _ComputeViolatingInstances(ipolicy, instances):
1451 """Computes a set of instances who violates given ipolicy.
1453 @param ipolicy: The ipolicy to verify
1454 @type instances: object.Instance
1455 @param instances: List of instances to verify
1456 @return: A frozenset of instance names violating the ipolicy
1459 return frozenset([inst.name for inst in instances
1460 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1463 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1464 """Check that the brigdes needed by a list of nics exist.
1467 cluster = lu.cfg.GetClusterInfo()
1468 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1469 brlist = [params[constants.NIC_LINK] for params in paramslist
1470 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1472 result = lu.rpc.call_bridges_exist(target_node, brlist)
1473 result.Raise("Error checking bridges on destination node '%s'" %
1474 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1477 def _CheckInstanceBridgesExist(lu, instance, node=None):
1478 """Check that the brigdes needed by an instance exist.
1482 node = instance.primary_node
1483 _CheckNicsBridgesExist(lu, instance.nics, node)
1486 def _CheckOSVariant(os_obj, name):
1487 """Check whether an OS name conforms to the os variants specification.
1489 @type os_obj: L{objects.OS}
1490 @param os_obj: OS object to check
1492 @param name: OS name passed by the user, to check for validity
1495 variant = objects.OS.GetVariant(name)
1496 if not os_obj.supported_variants:
1498 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1499 " passed)" % (os_obj.name, variant),
1503 raise errors.OpPrereqError("OS name must include a variant",
1506 if variant not in os_obj.supported_variants:
1507 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1510 def _GetNodeInstancesInner(cfg, fn):
1511 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1514 def _GetNodeInstances(cfg, node_name):
1515 """Returns a list of all primary and secondary instances on a node.
1519 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1522 def _GetNodePrimaryInstances(cfg, node_name):
1523 """Returns primary instances on a node.
1526 return _GetNodeInstancesInner(cfg,
1527 lambda inst: node_name == inst.primary_node)
1530 def _GetNodeSecondaryInstances(cfg, node_name):
1531 """Returns secondary instances on a node.
1534 return _GetNodeInstancesInner(cfg,
1535 lambda inst: node_name in inst.secondary_nodes)
1538 def _GetStorageTypeArgs(cfg, storage_type):
1539 """Returns the arguments for a storage type.
1542 # Special case for file storage
1543 if storage_type == constants.ST_FILE:
1544 # storage.FileStorage wants a list of storage directories
1545 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1550 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1553 for dev in instance.disks:
1554 cfg.SetDiskID(dev, node_name)
1556 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1557 result.Raise("Failed to get disk status from node %s" % node_name,
1558 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1560 for idx, bdev_status in enumerate(result.payload):
1561 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1567 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1568 """Check the sanity of iallocator and node arguments and use the
1569 cluster-wide iallocator if appropriate.
1571 Check that at most one of (iallocator, node) is specified. If none is
1572 specified, then the LU's opcode's iallocator slot is filled with the
1573 cluster-wide default iallocator.
1575 @type iallocator_slot: string
1576 @param iallocator_slot: the name of the opcode iallocator slot
1577 @type node_slot: string
1578 @param node_slot: the name of the opcode target node slot
1581 node = getattr(lu.op, node_slot, None)
1582 iallocator = getattr(lu.op, iallocator_slot, None)
1584 if node is not None and iallocator is not None:
1585 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1587 elif node is None and iallocator is None:
1588 default_iallocator = lu.cfg.GetDefaultIAllocator()
1589 if default_iallocator:
1590 setattr(lu.op, iallocator_slot, default_iallocator)
1592 raise errors.OpPrereqError("No iallocator or node given and no"
1593 " cluster-wide default iallocator found;"
1594 " please specify either an iallocator or a"
1595 " node, or set a cluster-wide default"
1599 def _GetDefaultIAllocator(cfg, iallocator):
1600 """Decides on which iallocator to use.
1602 @type cfg: L{config.ConfigWriter}
1603 @param cfg: Cluster configuration object
1604 @type iallocator: string or None
1605 @param iallocator: Iallocator specified in opcode
1607 @return: Iallocator name
1611 # Use default iallocator
1612 iallocator = cfg.GetDefaultIAllocator()
1615 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1616 " opcode nor as a cluster-wide default",
1622 class LUClusterPostInit(LogicalUnit):
1623 """Logical unit for running hooks after cluster initialization.
1626 HPATH = "cluster-init"
1627 HTYPE = constants.HTYPE_CLUSTER
1629 def BuildHooksEnv(self):
1634 "OP_TARGET": self.cfg.GetClusterName(),
1637 def BuildHooksNodes(self):
1638 """Build hooks nodes.
1641 return ([], [self.cfg.GetMasterNode()])
1643 def Exec(self, feedback_fn):
1650 class LUClusterDestroy(LogicalUnit):
1651 """Logical unit for destroying the cluster.
1654 HPATH = "cluster-destroy"
1655 HTYPE = constants.HTYPE_CLUSTER
1657 def BuildHooksEnv(self):
1662 "OP_TARGET": self.cfg.GetClusterName(),
1665 def BuildHooksNodes(self):
1666 """Build hooks nodes.
1671 def CheckPrereq(self):
1672 """Check prerequisites.
1674 This checks whether the cluster is empty.
1676 Any errors are signaled by raising errors.OpPrereqError.
1679 master = self.cfg.GetMasterNode()
1681 nodelist = self.cfg.GetNodeList()
1682 if len(nodelist) != 1 or nodelist[0] != master:
1683 raise errors.OpPrereqError("There are still %d node(s) in"
1684 " this cluster." % (len(nodelist) - 1),
1686 instancelist = self.cfg.GetInstanceList()
1688 raise errors.OpPrereqError("There are still %d instance(s) in"
1689 " this cluster." % len(instancelist),
1692 def Exec(self, feedback_fn):
1693 """Destroys the cluster.
1696 master_params = self.cfg.GetMasterNetworkParameters()
1698 # Run post hooks on master node before it's removed
1699 _RunPostHook(self, master_params.name)
1701 ems = self.cfg.GetUseExternalMipScript()
1702 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1705 self.LogWarning("Error disabling the master IP address: %s",
1708 return master_params.name
1711 def _VerifyCertificate(filename):
1712 """Verifies a certificate for L{LUClusterVerifyConfig}.
1714 @type filename: string
1715 @param filename: Path to PEM file
1719 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1720 utils.ReadFile(filename))
1721 except Exception, err: # pylint: disable=W0703
1722 return (LUClusterVerifyConfig.ETYPE_ERROR,
1723 "Failed to load X509 certificate %s: %s" % (filename, err))
1726 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1727 constants.SSL_CERT_EXPIRATION_ERROR)
1730 fnamemsg = "While verifying %s: %s" % (filename, msg)
1735 return (None, fnamemsg)
1736 elif errcode == utils.CERT_WARNING:
1737 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1738 elif errcode == utils.CERT_ERROR:
1739 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1741 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1744 def _GetAllHypervisorParameters(cluster, instances):
1745 """Compute the set of all hypervisor parameters.
1747 @type cluster: L{objects.Cluster}
1748 @param cluster: the cluster object
1749 @param instances: list of L{objects.Instance}
1750 @param instances: additional instances from which to obtain parameters
1751 @rtype: list of (origin, hypervisor, parameters)
1752 @return: a list with all parameters found, indicating the hypervisor they
1753 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1758 for hv_name in cluster.enabled_hypervisors:
1759 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1761 for os_name, os_hvp in cluster.os_hvp.items():
1762 for hv_name, hv_params in os_hvp.items():
1764 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1765 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1767 # TODO: collapse identical parameter values in a single one
1768 for instance in instances:
1769 if instance.hvparams:
1770 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1771 cluster.FillHV(instance)))
1776 class _VerifyErrors(object):
1777 """Mix-in for cluster/group verify LUs.
1779 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1780 self.op and self._feedback_fn to be available.)
1784 ETYPE_FIELD = "code"
1785 ETYPE_ERROR = "ERROR"
1786 ETYPE_WARNING = "WARNING"
1788 def _Error(self, ecode, item, msg, *args, **kwargs):
1789 """Format an error message.
1791 Based on the opcode's error_codes parameter, either format a
1792 parseable error code, or a simpler error string.
1794 This must be called only from Exec and functions called from Exec.
1797 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1798 itype, etxt, _ = ecode
1799 # first complete the msg
1802 # then format the whole message
1803 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1804 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1810 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1811 # and finally report it via the feedback_fn
1812 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1814 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1815 """Log an error message if the passed condition is True.
1819 or self.op.debug_simulate_errors) # pylint: disable=E1101
1821 # If the error code is in the list of ignored errors, demote the error to a
1823 (_, etxt, _) = ecode
1824 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1825 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1828 self._Error(ecode, *args, **kwargs)
1830 # do not mark the operation as failed for WARN cases only
1831 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1832 self.bad = self.bad or cond
1835 class LUClusterVerify(NoHooksLU):
1836 """Submits all jobs necessary to verify the cluster.
1841 def ExpandNames(self):
1842 self.needed_locks = {}
1844 def Exec(self, feedback_fn):
1847 if self.op.group_name:
1848 groups = [self.op.group_name]
1849 depends_fn = lambda: None
1851 groups = self.cfg.GetNodeGroupList()
1853 # Verify global configuration
1855 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1858 # Always depend on global verification
1859 depends_fn = lambda: [(-len(jobs), [])]
1861 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1862 ignore_errors=self.op.ignore_errors,
1863 depends=depends_fn())]
1864 for group in groups)
1866 # Fix up all parameters
1867 for op in itertools.chain(*jobs): # pylint: disable=W0142
1868 op.debug_simulate_errors = self.op.debug_simulate_errors
1869 op.verbose = self.op.verbose
1870 op.error_codes = self.op.error_codes
1872 op.skip_checks = self.op.skip_checks
1873 except AttributeError:
1874 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1876 return ResultWithJobs(jobs)
1879 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1880 """Verifies the cluster config.
1885 def _VerifyHVP(self, hvp_data):
1886 """Verifies locally the syntax of the hypervisor parameters.
1889 for item, hv_name, hv_params in hvp_data:
1890 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1893 hv_class = hypervisor.GetHypervisor(hv_name)
1894 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1895 hv_class.CheckParameterSyntax(hv_params)
1896 except errors.GenericError, err:
1897 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1899 def ExpandNames(self):
1900 # Information can be safely retrieved as the BGL is acquired in exclusive
1902 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1903 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1904 self.all_node_info = self.cfg.GetAllNodesInfo()
1905 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1906 self.needed_locks = {}
1908 def Exec(self, feedback_fn):
1909 """Verify integrity of cluster, performing various test on nodes.
1913 self._feedback_fn = feedback_fn
1915 feedback_fn("* Verifying cluster config")
1917 for msg in self.cfg.VerifyConfig():
1918 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1920 feedback_fn("* Verifying cluster certificate files")
1922 for cert_filename in constants.ALL_CERT_FILES:
1923 (errcode, msg) = _VerifyCertificate(cert_filename)
1924 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1926 feedback_fn("* Verifying hypervisor parameters")
1928 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1929 self.all_inst_info.values()))
1931 feedback_fn("* Verifying all nodes belong to an existing group")
1933 # We do this verification here because, should this bogus circumstance
1934 # occur, it would never be caught by VerifyGroup, which only acts on
1935 # nodes/instances reachable from existing node groups.
1937 dangling_nodes = set(node.name for node in self.all_node_info.values()
1938 if node.group not in self.all_group_info)
1940 dangling_instances = {}
1941 no_node_instances = []
1943 for inst in self.all_inst_info.values():
1944 if inst.primary_node in dangling_nodes:
1945 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1946 elif inst.primary_node not in self.all_node_info:
1947 no_node_instances.append(inst.name)
1952 utils.CommaJoin(dangling_instances.get(node.name,
1954 for node in dangling_nodes]
1956 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1958 "the following nodes (and their instances) belong to a non"
1959 " existing group: %s", utils.CommaJoin(pretty_dangling))
1961 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1963 "the following instances have a non-existing primary-node:"
1964 " %s", utils.CommaJoin(no_node_instances))
1969 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1970 """Verifies the status of a node group.
1973 HPATH = "cluster-verify"
1974 HTYPE = constants.HTYPE_CLUSTER
1977 _HOOKS_INDENT_RE = re.compile("^", re.M)
1979 class NodeImage(object):
1980 """A class representing the logical and physical status of a node.
1983 @ivar name: the node name to which this object refers
1984 @ivar volumes: a structure as returned from
1985 L{ganeti.backend.GetVolumeList} (runtime)
1986 @ivar instances: a list of running instances (runtime)
1987 @ivar pinst: list of configured primary instances (config)
1988 @ivar sinst: list of configured secondary instances (config)
1989 @ivar sbp: dictionary of {primary-node: list of instances} for all
1990 instances for which this node is secondary (config)
1991 @ivar mfree: free memory, as reported by hypervisor (runtime)
1992 @ivar dfree: free disk, as reported by the node (runtime)
1993 @ivar offline: the offline status (config)
1994 @type rpc_fail: boolean
1995 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1996 not whether the individual keys were correct) (runtime)
1997 @type lvm_fail: boolean
1998 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1999 @type hyp_fail: boolean
2000 @ivar hyp_fail: whether the RPC call didn't return the instance list
2001 @type ghost: boolean
2002 @ivar ghost: whether this is a known node or not (config)
2003 @type os_fail: boolean
2004 @ivar os_fail: whether the RPC call didn't return valid OS data
2006 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2007 @type vm_capable: boolean
2008 @ivar vm_capable: whether the node can host instances
2011 def __init__(self, offline=False, name=None, vm_capable=True):
2020 self.offline = offline
2021 self.vm_capable = vm_capable
2022 self.rpc_fail = False
2023 self.lvm_fail = False
2024 self.hyp_fail = False
2026 self.os_fail = False
2029 def ExpandNames(self):
2030 # This raises errors.OpPrereqError on its own:
2031 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2033 # Get instances in node group; this is unsafe and needs verification later
2035 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2037 self.needed_locks = {
2038 locking.LEVEL_INSTANCE: inst_names,
2039 locking.LEVEL_NODEGROUP: [self.group_uuid],
2040 locking.LEVEL_NODE: [],
2043 self.share_locks = _ShareAll()
2045 def DeclareLocks(self, level):
2046 if level == locking.LEVEL_NODE:
2047 # Get members of node group; this is unsafe and needs verification later
2048 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2050 all_inst_info = self.cfg.GetAllInstancesInfo()
2052 # In Exec(), we warn about mirrored instances that have primary and
2053 # secondary living in separate node groups. To fully verify that
2054 # volumes for these instances are healthy, we will need to do an
2055 # extra call to their secondaries. We ensure here those nodes will
2057 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2058 # Important: access only the instances whose lock is owned
2059 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2060 nodes.update(all_inst_info[inst].secondary_nodes)
2062 self.needed_locks[locking.LEVEL_NODE] = nodes
2064 def CheckPrereq(self):
2065 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2066 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2068 group_nodes = set(self.group_info.members)
2070 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2073 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2075 unlocked_instances = \
2076 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2079 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2080 utils.CommaJoin(unlocked_nodes),
2083 if unlocked_instances:
2084 raise errors.OpPrereqError("Missing lock for instances: %s" %
2085 utils.CommaJoin(unlocked_instances),
2088 self.all_node_info = self.cfg.GetAllNodesInfo()
2089 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2091 self.my_node_names = utils.NiceSort(group_nodes)
2092 self.my_inst_names = utils.NiceSort(group_instances)
2094 self.my_node_info = dict((name, self.all_node_info[name])
2095 for name in self.my_node_names)
2097 self.my_inst_info = dict((name, self.all_inst_info[name])
2098 for name in self.my_inst_names)
2100 # We detect here the nodes that will need the extra RPC calls for verifying
2101 # split LV volumes; they should be locked.
2102 extra_lv_nodes = set()
2104 for inst in self.my_inst_info.values():
2105 if inst.disk_template in constants.DTS_INT_MIRROR:
2106 for nname in inst.all_nodes:
2107 if self.all_node_info[nname].group != self.group_uuid:
2108 extra_lv_nodes.add(nname)
2110 unlocked_lv_nodes = \
2111 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2113 if unlocked_lv_nodes:
2114 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2115 utils.CommaJoin(unlocked_lv_nodes),
2117 self.extra_lv_nodes = list(extra_lv_nodes)
2119 def _VerifyNode(self, ninfo, nresult):
2120 """Perform some basic validation on data returned from a node.
2122 - check the result data structure is well formed and has all the
2124 - check ganeti version
2126 @type ninfo: L{objects.Node}
2127 @param ninfo: the node to check
2128 @param nresult: the results from the node
2130 @return: whether overall this call was successful (and we can expect
2131 reasonable values in the respose)
2135 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2137 # main result, nresult should be a non-empty dict
2138 test = not nresult or not isinstance(nresult, dict)
2139 _ErrorIf(test, constants.CV_ENODERPC, node,
2140 "unable to verify node: no data returned")
2144 # compares ganeti version
2145 local_version = constants.PROTOCOL_VERSION
2146 remote_version = nresult.get("version", None)
2147 test = not (remote_version and
2148 isinstance(remote_version, (list, tuple)) and
2149 len(remote_version) == 2)
2150 _ErrorIf(test, constants.CV_ENODERPC, node,
2151 "connection to node returned invalid data")
2155 test = local_version != remote_version[0]
2156 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2157 "incompatible protocol versions: master %s,"
2158 " node %s", local_version, remote_version[0])
2162 # node seems compatible, we can actually try to look into its results
2164 # full package version
2165 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2166 constants.CV_ENODEVERSION, node,
2167 "software version mismatch: master %s, node %s",
2168 constants.RELEASE_VERSION, remote_version[1],
2169 code=self.ETYPE_WARNING)
2171 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2172 if ninfo.vm_capable and isinstance(hyp_result, dict):
2173 for hv_name, hv_result in hyp_result.iteritems():
2174 test = hv_result is not None
2175 _ErrorIf(test, constants.CV_ENODEHV, node,
2176 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2178 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2179 if ninfo.vm_capable and isinstance(hvp_result, list):
2180 for item, hv_name, hv_result in hvp_result:
2181 _ErrorIf(True, constants.CV_ENODEHV, node,
2182 "hypervisor %s parameter verify failure (source %s): %s",
2183 hv_name, item, hv_result)
2185 test = nresult.get(constants.NV_NODESETUP,
2186 ["Missing NODESETUP results"])
2187 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2192 def _VerifyNodeTime(self, ninfo, nresult,
2193 nvinfo_starttime, nvinfo_endtime):
2194 """Check the node time.
2196 @type ninfo: L{objects.Node}
2197 @param ninfo: the node to check
2198 @param nresult: the remote results for the node
2199 @param nvinfo_starttime: the start time of the RPC call
2200 @param nvinfo_endtime: the end time of the RPC call
2204 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2206 ntime = nresult.get(constants.NV_TIME, None)
2208 ntime_merged = utils.MergeTime(ntime)
2209 except (ValueError, TypeError):
2210 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2213 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2214 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2215 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2216 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2220 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2221 "Node time diverges by at least %s from master node time",
2224 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2225 """Check the node LVM results.
2227 @type ninfo: L{objects.Node}
2228 @param ninfo: the node to check
2229 @param nresult: the remote results for the node
2230 @param vg_name: the configured VG name
2237 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2239 # checks vg existence and size > 20G
2240 vglist = nresult.get(constants.NV_VGLIST, None)
2242 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2244 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2245 constants.MIN_VG_SIZE)
2246 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2249 pvlist = nresult.get(constants.NV_PVLIST, None)
2250 test = pvlist is None
2251 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2253 # check that ':' is not present in PV names, since it's a
2254 # special character for lvcreate (denotes the range of PEs to
2256 for _, pvname, owner_vg in pvlist:
2257 test = ":" in pvname
2258 _ErrorIf(test, constants.CV_ENODELVM, node,
2259 "Invalid character ':' in PV '%s' of VG '%s'",
2262 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2263 """Check the node bridges.
2265 @type ninfo: L{objects.Node}
2266 @param ninfo: the node to check
2267 @param nresult: the remote results for the node
2268 @param bridges: the expected list of bridges
2275 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2277 missing = nresult.get(constants.NV_BRIDGES, None)
2278 test = not isinstance(missing, list)
2279 _ErrorIf(test, constants.CV_ENODENET, node,
2280 "did not return valid bridge information")
2282 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2283 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2285 def _VerifyNodeUserScripts(self, ninfo, nresult):
2286 """Check the results of user scripts presence and executability on the node
2288 @type ninfo: L{objects.Node}
2289 @param ninfo: the node to check
2290 @param nresult: the remote results for the node
2295 test = not constants.NV_USERSCRIPTS in nresult
2296 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2297 "did not return user scripts information")
2299 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2301 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2302 "user scripts not present or not executable: %s" %
2303 utils.CommaJoin(sorted(broken_scripts)))
2305 def _VerifyNodeNetwork(self, ninfo, nresult):
2306 """Check the node network connectivity results.
2308 @type ninfo: L{objects.Node}
2309 @param ninfo: the node to check
2310 @param nresult: the remote results for the node
2314 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2316 test = constants.NV_NODELIST not in nresult
2317 _ErrorIf(test, constants.CV_ENODESSH, node,
2318 "node hasn't returned node ssh connectivity data")
2320 if nresult[constants.NV_NODELIST]:
2321 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2322 _ErrorIf(True, constants.CV_ENODESSH, node,
2323 "ssh communication with node '%s': %s", a_node, a_msg)
2325 test = constants.NV_NODENETTEST not in nresult
2326 _ErrorIf(test, constants.CV_ENODENET, node,
2327 "node hasn't returned node tcp connectivity data")
2329 if nresult[constants.NV_NODENETTEST]:
2330 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2332 _ErrorIf(True, constants.CV_ENODENET, node,
2333 "tcp communication with node '%s': %s",
2334 anode, nresult[constants.NV_NODENETTEST][anode])
2336 test = constants.NV_MASTERIP not in nresult
2337 _ErrorIf(test, constants.CV_ENODENET, node,
2338 "node hasn't returned node master IP reachability data")
2340 if not nresult[constants.NV_MASTERIP]:
2341 if node == self.master_node:
2342 msg = "the master node cannot reach the master IP (not configured?)"
2344 msg = "cannot reach the master IP"
2345 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2347 def _VerifyInstance(self, instance, instanceconfig, node_image,
2349 """Verify an instance.
2351 This function checks to see if the required block devices are
2352 available on the instance's node.
2355 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2356 node_current = instanceconfig.primary_node
2358 node_vol_should = {}
2359 instanceconfig.MapLVsByNode(node_vol_should)
2361 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2362 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2363 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, err)
2365 for node in node_vol_should:
2366 n_img = node_image[node]
2367 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2368 # ignore missing volumes on offline or broken nodes
2370 for volume in node_vol_should[node]:
2371 test = volume not in n_img.volumes
2372 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2373 "volume %s missing on node %s", volume, node)
2375 if instanceconfig.admin_state == constants.ADMINST_UP:
2376 pri_img = node_image[node_current]
2377 test = instance not in pri_img.instances and not pri_img.offline
2378 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2379 "instance not running on its primary node %s",
2382 diskdata = [(nname, success, status, idx)
2383 for (nname, disks) in diskstatus.items()
2384 for idx, (success, status) in enumerate(disks)]
2386 for nname, success, bdev_status, idx in diskdata:
2387 # the 'ghost node' construction in Exec() ensures that we have a
2389 snode = node_image[nname]
2390 bad_snode = snode.ghost or snode.offline
2391 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2392 not success and not bad_snode,
2393 constants.CV_EINSTANCEFAULTYDISK, instance,
2394 "couldn't retrieve status for disk/%s on %s: %s",
2395 idx, nname, bdev_status)
2396 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2397 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2398 constants.CV_EINSTANCEFAULTYDISK, instance,
2399 "disk/%s on %s is faulty", idx, nname)
2401 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2402 """Verify if there are any unknown volumes in the cluster.
2404 The .os, .swap and backup volumes are ignored. All other volumes are
2405 reported as unknown.
2407 @type reserved: L{ganeti.utils.FieldSet}
2408 @param reserved: a FieldSet of reserved volume names
2411 for node, n_img in node_image.items():
2412 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2413 self.all_node_info[node].group != self.group_uuid):
2414 # skip non-healthy nodes
2416 for volume in n_img.volumes:
2417 test = ((node not in node_vol_should or
2418 volume not in node_vol_should[node]) and
2419 not reserved.Matches(volume))
2420 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2421 "volume %s is unknown", volume)
2423 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2424 """Verify N+1 Memory Resilience.
2426 Check that if one single node dies we can still start all the
2427 instances it was primary for.
2430 cluster_info = self.cfg.GetClusterInfo()
2431 for node, n_img in node_image.items():
2432 # This code checks that every node which is now listed as
2433 # secondary has enough memory to host all instances it is
2434 # supposed to should a single other node in the cluster fail.
2435 # FIXME: not ready for failover to an arbitrary node
2436 # FIXME: does not support file-backed instances
2437 # WARNING: we currently take into account down instances as well
2438 # as up ones, considering that even if they're down someone
2439 # might want to start them even in the event of a node failure.
2440 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2441 # we're skipping nodes marked offline and nodes in other groups from
2442 # the N+1 warning, since most likely we don't have good memory
2443 # infromation from them; we already list instances living on such
2444 # nodes, and that's enough warning
2446 #TODO(dynmem): also consider ballooning out other instances
2447 for prinode, instances in n_img.sbp.items():
2449 for instance in instances:
2450 bep = cluster_info.FillBE(instance_cfg[instance])
2451 if bep[constants.BE_AUTO_BALANCE]:
2452 needed_mem += bep[constants.BE_MINMEM]
2453 test = n_img.mfree < needed_mem
2454 self._ErrorIf(test, constants.CV_ENODEN1, node,
2455 "not enough memory to accomodate instance failovers"
2456 " should node %s fail (%dMiB needed, %dMiB available)",
2457 prinode, needed_mem, n_img.mfree)
2460 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2461 (files_all, files_opt, files_mc, files_vm)):
2462 """Verifies file checksums collected from all nodes.
2464 @param errorif: Callback for reporting errors
2465 @param nodeinfo: List of L{objects.Node} objects
2466 @param master_node: Name of master node
2467 @param all_nvinfo: RPC results
2470 # Define functions determining which nodes to consider for a file
2473 (files_mc, lambda node: (node.master_candidate or
2474 node.name == master_node)),
2475 (files_vm, lambda node: node.vm_capable),
2478 # Build mapping from filename to list of nodes which should have the file
2480 for (files, fn) in files2nodefn:
2482 filenodes = nodeinfo
2484 filenodes = filter(fn, nodeinfo)
2485 nodefiles.update((filename,
2486 frozenset(map(operator.attrgetter("name"), filenodes)))
2487 for filename in files)
2489 assert set(nodefiles) == (files_all | files_mc | files_vm)
2491 fileinfo = dict((filename, {}) for filename in nodefiles)
2492 ignore_nodes = set()
2494 for node in nodeinfo:
2496 ignore_nodes.add(node.name)
2499 nresult = all_nvinfo[node.name]
2501 if nresult.fail_msg or not nresult.payload:
2504 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2506 test = not (node_files and isinstance(node_files, dict))
2507 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2508 "Node did not return file checksum data")
2510 ignore_nodes.add(node.name)
2513 # Build per-checksum mapping from filename to nodes having it
2514 for (filename, checksum) in node_files.items():
2515 assert filename in nodefiles
2516 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2518 for (filename, checksums) in fileinfo.items():
2519 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2521 # Nodes having the file
2522 with_file = frozenset(node_name
2523 for nodes in fileinfo[filename].values()
2524 for node_name in nodes) - ignore_nodes
2526 expected_nodes = nodefiles[filename] - ignore_nodes
2528 # Nodes missing file
2529 missing_file = expected_nodes - with_file
2531 if filename in files_opt:
2533 errorif(missing_file and missing_file != expected_nodes,
2534 constants.CV_ECLUSTERFILECHECK, None,
2535 "File %s is optional, but it must exist on all or no"
2536 " nodes (not found on %s)",
2537 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2539 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2540 "File %s is missing from node(s) %s", filename,
2541 utils.CommaJoin(utils.NiceSort(missing_file)))
2543 # Warn if a node has a file it shouldn't
2544 unexpected = with_file - expected_nodes
2546 constants.CV_ECLUSTERFILECHECK, None,
2547 "File %s should not exist on node(s) %s",
2548 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2550 # See if there are multiple versions of the file
2551 test = len(checksums) > 1
2553 variants = ["variant %s on %s" %
2554 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2555 for (idx, (checksum, nodes)) in
2556 enumerate(sorted(checksums.items()))]
2560 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2561 "File %s found with %s different checksums (%s)",
2562 filename, len(checksums), "; ".join(variants))
2564 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2566 """Verifies and the node DRBD status.
2568 @type ninfo: L{objects.Node}
2569 @param ninfo: the node to check
2570 @param nresult: the remote results for the node
2571 @param instanceinfo: the dict of instances
2572 @param drbd_helper: the configured DRBD usermode helper
2573 @param drbd_map: the DRBD map as returned by
2574 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2578 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2581 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2582 test = (helper_result == None)
2583 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2584 "no drbd usermode helper returned")
2586 status, payload = helper_result
2588 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2589 "drbd usermode helper check unsuccessful: %s", payload)
2590 test = status and (payload != drbd_helper)
2591 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2592 "wrong drbd usermode helper: %s", payload)
2594 # compute the DRBD minors
2596 for minor, instance in drbd_map[node].items():
2597 test = instance not in instanceinfo
2598 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2599 "ghost instance '%s' in temporary DRBD map", instance)
2600 # ghost instance should not be running, but otherwise we
2601 # don't give double warnings (both ghost instance and
2602 # unallocated minor in use)
2604 node_drbd[minor] = (instance, False)
2606 instance = instanceinfo[instance]
2607 node_drbd[minor] = (instance.name,
2608 instance.admin_state == constants.ADMINST_UP)
2610 # and now check them
2611 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2612 test = not isinstance(used_minors, (tuple, list))
2613 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2614 "cannot parse drbd status file: %s", str(used_minors))
2616 # we cannot check drbd status
2619 for minor, (iname, must_exist) in node_drbd.items():
2620 test = minor not in used_minors and must_exist
2621 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2622 "drbd minor %d of instance %s is not active", minor, iname)
2623 for minor in used_minors:
2624 test = minor not in node_drbd
2625 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2626 "unallocated drbd minor %d is in use", minor)
2628 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2629 """Builds the node OS structures.
2631 @type ninfo: L{objects.Node}
2632 @param ninfo: the node to check
2633 @param nresult: the remote results for the node
2634 @param nimg: the node image object
2638 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2640 remote_os = nresult.get(constants.NV_OSLIST, None)
2641 test = (not isinstance(remote_os, list) or
2642 not compat.all(isinstance(v, list) and len(v) == 7
2643 for v in remote_os))
2645 _ErrorIf(test, constants.CV_ENODEOS, node,
2646 "node hasn't returned valid OS data")
2655 for (name, os_path, status, diagnose,
2656 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2658 if name not in os_dict:
2661 # parameters is a list of lists instead of list of tuples due to
2662 # JSON lacking a real tuple type, fix it:
2663 parameters = [tuple(v) for v in parameters]
2664 os_dict[name].append((os_path, status, diagnose,
2665 set(variants), set(parameters), set(api_ver)))
2667 nimg.oslist = os_dict
2669 def _VerifyNodeOS(self, ninfo, nimg, base):
2670 """Verifies the node OS list.
2672 @type ninfo: L{objects.Node}
2673 @param ninfo: the node to check
2674 @param nimg: the node image object
2675 @param base: the 'template' node we match against (e.g. from the master)
2679 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2681 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2683 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2684 for os_name, os_data in nimg.oslist.items():
2685 assert os_data, "Empty OS status for OS %s?!" % os_name
2686 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2687 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2688 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2689 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2690 "OS '%s' has multiple entries (first one shadows the rest): %s",
2691 os_name, utils.CommaJoin([v[0] for v in os_data]))
2692 # comparisons with the 'base' image
2693 test = os_name not in base.oslist
2694 _ErrorIf(test, constants.CV_ENODEOS, node,
2695 "Extra OS %s not present on reference node (%s)",
2699 assert base.oslist[os_name], "Base node has empty OS status?"
2700 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2702 # base OS is invalid, skipping
2704 for kind, a, b in [("API version", f_api, b_api),
2705 ("variants list", f_var, b_var),
2706 ("parameters", beautify_params(f_param),
2707 beautify_params(b_param))]:
2708 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2709 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2710 kind, os_name, base.name,
2711 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2713 # check any missing OSes
2714 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2715 _ErrorIf(missing, constants.CV_ENODEOS, node,
2716 "OSes present on reference node %s but missing on this node: %s",
2717 base.name, utils.CommaJoin(missing))
2719 def _VerifyOob(self, ninfo, nresult):
2720 """Verifies out of band functionality of a node.
2722 @type ninfo: L{objects.Node}
2723 @param ninfo: the node to check
2724 @param nresult: the remote results for the node
2728 # We just have to verify the paths on master and/or master candidates
2729 # as the oob helper is invoked on the master
2730 if ((ninfo.master_candidate or ninfo.master_capable) and
2731 constants.NV_OOB_PATHS in nresult):
2732 for path_result in nresult[constants.NV_OOB_PATHS]:
2733 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2735 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2736 """Verifies and updates the node volume data.
2738 This function will update a L{NodeImage}'s internal structures
2739 with data from the remote call.
2741 @type ninfo: L{objects.Node}
2742 @param ninfo: the node to check
2743 @param nresult: the remote results for the node
2744 @param nimg: the node image object
2745 @param vg_name: the configured VG name
2749 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2751 nimg.lvm_fail = True
2752 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2755 elif isinstance(lvdata, basestring):
2756 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2757 utils.SafeEncode(lvdata))
2758 elif not isinstance(lvdata, dict):
2759 _ErrorIf(True, constants.CV_ENODELVM, node,
2760 "rpc call to node failed (lvlist)")
2762 nimg.volumes = lvdata
2763 nimg.lvm_fail = False
2765 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2766 """Verifies and updates the node instance list.
2768 If the listing was successful, then updates this node's instance
2769 list. Otherwise, it marks the RPC call as failed for the instance
2772 @type ninfo: L{objects.Node}
2773 @param ninfo: the node to check
2774 @param nresult: the remote results for the node
2775 @param nimg: the node image object
2778 idata = nresult.get(constants.NV_INSTANCELIST, None)
2779 test = not isinstance(idata, list)
2780 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2781 "rpc call to node failed (instancelist): %s",
2782 utils.SafeEncode(str(idata)))
2784 nimg.hyp_fail = True
2786 nimg.instances = idata
2788 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2789 """Verifies and computes a node information map
2791 @type ninfo: L{objects.Node}
2792 @param ninfo: the node to check
2793 @param nresult: the remote results for the node
2794 @param nimg: the node image object
2795 @param vg_name: the configured VG name
2799 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2801 # try to read free memory (from the hypervisor)
2802 hv_info = nresult.get(constants.NV_HVINFO, None)
2803 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2804 _ErrorIf(test, constants.CV_ENODEHV, node,
2805 "rpc call to node failed (hvinfo)")
2808 nimg.mfree = int(hv_info["memory_free"])
2809 except (ValueError, TypeError):
2810 _ErrorIf(True, constants.CV_ENODERPC, node,
2811 "node returned invalid nodeinfo, check hypervisor")
2813 # FIXME: devise a free space model for file based instances as well
2814 if vg_name is not None:
2815 test = (constants.NV_VGLIST not in nresult or
2816 vg_name not in nresult[constants.NV_VGLIST])
2817 _ErrorIf(test, constants.CV_ENODELVM, node,
2818 "node didn't return data for the volume group '%s'"
2819 " - it is either missing or broken", vg_name)
2822 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2823 except (ValueError, TypeError):
2824 _ErrorIf(True, constants.CV_ENODERPC, node,
2825 "node returned invalid LVM info, check LVM status")
2827 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2828 """Gets per-disk status information for all instances.
2830 @type nodelist: list of strings
2831 @param nodelist: Node names
2832 @type node_image: dict of (name, L{objects.Node})
2833 @param node_image: Node objects
2834 @type instanceinfo: dict of (name, L{objects.Instance})
2835 @param instanceinfo: Instance objects
2836 @rtype: {instance: {node: [(succes, payload)]}}
2837 @return: a dictionary of per-instance dictionaries with nodes as
2838 keys and disk information as values; the disk information is a
2839 list of tuples (success, payload)
2842 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2845 node_disks_devonly = {}
2846 diskless_instances = set()
2847 diskless = constants.DT_DISKLESS
2849 for nname in nodelist:
2850 node_instances = list(itertools.chain(node_image[nname].pinst,
2851 node_image[nname].sinst))
2852 diskless_instances.update(inst for inst in node_instances
2853 if instanceinfo[inst].disk_template == diskless)
2854 disks = [(inst, disk)
2855 for inst in node_instances
2856 for disk in instanceinfo[inst].disks]
2859 # No need to collect data
2862 node_disks[nname] = disks
2864 # Creating copies as SetDiskID below will modify the objects and that can
2865 # lead to incorrect data returned from nodes
2866 devonly = [dev.Copy() for (_, dev) in disks]
2869 self.cfg.SetDiskID(dev, nname)
2871 node_disks_devonly[nname] = devonly
2873 assert len(node_disks) == len(node_disks_devonly)
2875 # Collect data from all nodes with disks
2876 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2879 assert len(result) == len(node_disks)
2883 for (nname, nres) in result.items():
2884 disks = node_disks[nname]
2887 # No data from this node
2888 data = len(disks) * [(False, "node offline")]
2891 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2892 "while getting disk information: %s", msg)
2894 # No data from this node
2895 data = len(disks) * [(False, msg)]
2898 for idx, i in enumerate(nres.payload):
2899 if isinstance(i, (tuple, list)) and len(i) == 2:
2902 logging.warning("Invalid result from node %s, entry %d: %s",
2904 data.append((False, "Invalid result from the remote node"))
2906 for ((inst, _), status) in zip(disks, data):
2907 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2909 # Add empty entries for diskless instances.
2910 for inst in diskless_instances:
2911 assert inst not in instdisk
2914 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2915 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2916 compat.all(isinstance(s, (tuple, list)) and
2917 len(s) == 2 for s in statuses)
2918 for inst, nnames in instdisk.items()
2919 for nname, statuses in nnames.items())
2920 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2925 def _SshNodeSelector(group_uuid, all_nodes):
2926 """Create endless iterators for all potential SSH check hosts.
2929 nodes = [node for node in all_nodes
2930 if (node.group != group_uuid and
2932 keyfunc = operator.attrgetter("group")
2934 return map(itertools.cycle,
2935 [sorted(map(operator.attrgetter("name"), names))
2936 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2940 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2941 """Choose which nodes should talk to which other nodes.
2943 We will make nodes contact all nodes in their group, and one node from
2946 @warning: This algorithm has a known issue if one node group is much
2947 smaller than others (e.g. just one node). In such a case all other
2948 nodes will talk to the single node.
2951 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2952 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2954 return (online_nodes,
2955 dict((name, sorted([i.next() for i in sel]))
2956 for name in online_nodes))
2958 def BuildHooksEnv(self):
2961 Cluster-Verify hooks just ran in the post phase and their failure makes
2962 the output be logged in the verify output and the verification to fail.
2966 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2969 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2970 for node in self.my_node_info.values())
2974 def BuildHooksNodes(self):
2975 """Build hooks nodes.
2978 return ([], self.my_node_names)
2980 def Exec(self, feedback_fn):
2981 """Verify integrity of the node group, performing various test on nodes.
2984 # This method has too many local variables. pylint: disable=R0914
2985 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2987 if not self.my_node_names:
2989 feedback_fn("* Empty node group, skipping verification")
2993 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2994 verbose = self.op.verbose
2995 self._feedback_fn = feedback_fn
2997 vg_name = self.cfg.GetVGName()
2998 drbd_helper = self.cfg.GetDRBDHelper()
2999 cluster = self.cfg.GetClusterInfo()
3000 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3001 hypervisors = cluster.enabled_hypervisors
3002 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3004 i_non_redundant = [] # Non redundant instances
3005 i_non_a_balanced = [] # Non auto-balanced instances
3006 i_offline = 0 # Count of offline instances
3007 n_offline = 0 # Count of offline nodes
3008 n_drained = 0 # Count of nodes being drained
3009 node_vol_should = {}
3011 # FIXME: verify OS list
3014 filemap = _ComputeAncillaryFiles(cluster, False)
3016 # do local checksums
3017 master_node = self.master_node = self.cfg.GetMasterNode()
3018 master_ip = self.cfg.GetMasterIP()
3020 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3023 if self.cfg.GetUseExternalMipScript():
3024 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3026 node_verify_param = {
3027 constants.NV_FILELIST:
3028 utils.UniqueSequence(filename
3029 for files in filemap
3030 for filename in files),
3031 constants.NV_NODELIST:
3032 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3033 self.all_node_info.values()),
3034 constants.NV_HYPERVISOR: hypervisors,
3035 constants.NV_HVPARAMS:
3036 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3037 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3038 for node in node_data_list
3039 if not node.offline],
3040 constants.NV_INSTANCELIST: hypervisors,
3041 constants.NV_VERSION: None,
3042 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3043 constants.NV_NODESETUP: None,
3044 constants.NV_TIME: None,
3045 constants.NV_MASTERIP: (master_node, master_ip),
3046 constants.NV_OSLIST: None,
3047 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3048 constants.NV_USERSCRIPTS: user_scripts,
3051 if vg_name is not None:
3052 node_verify_param[constants.NV_VGLIST] = None
3053 node_verify_param[constants.NV_LVLIST] = vg_name
3054 node_verify_param[constants.NV_PVLIST] = [vg_name]
3055 node_verify_param[constants.NV_DRBDLIST] = None
3058 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3061 # FIXME: this needs to be changed per node-group, not cluster-wide
3063 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3064 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3065 bridges.add(default_nicpp[constants.NIC_LINK])
3066 for instance in self.my_inst_info.values():
3067 for nic in instance.nics:
3068 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3069 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3070 bridges.add(full_nic[constants.NIC_LINK])
3073 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3075 # Build our expected cluster state
3076 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3078 vm_capable=node.vm_capable))
3079 for node in node_data_list)
3083 for node in self.all_node_info.values():
3084 path = _SupportsOob(self.cfg, node)
3085 if path and path not in oob_paths:
3086 oob_paths.append(path)
3089 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3091 for instance in self.my_inst_names:
3092 inst_config = self.my_inst_info[instance]
3094 for nname in inst_config.all_nodes:
3095 if nname not in node_image:
3096 gnode = self.NodeImage(name=nname)
3097 gnode.ghost = (nname not in self.all_node_info)
3098 node_image[nname] = gnode
3100 inst_config.MapLVsByNode(node_vol_should)
3102 pnode = inst_config.primary_node
3103 node_image[pnode].pinst.append(instance)
3105 for snode in inst_config.secondary_nodes:
3106 nimg = node_image[snode]
3107 nimg.sinst.append(instance)
3108 if pnode not in nimg.sbp:
3109 nimg.sbp[pnode] = []
3110 nimg.sbp[pnode].append(instance)
3112 # At this point, we have the in-memory data structures complete,
3113 # except for the runtime information, which we'll gather next
3115 # Due to the way our RPC system works, exact response times cannot be
3116 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3117 # time before and after executing the request, we can at least have a time
3119 nvinfo_starttime = time.time()
3120 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3122 self.cfg.GetClusterName())
3123 nvinfo_endtime = time.time()
3125 if self.extra_lv_nodes and vg_name is not None:
3127 self.rpc.call_node_verify(self.extra_lv_nodes,
3128 {constants.NV_LVLIST: vg_name},
3129 self.cfg.GetClusterName())
3131 extra_lv_nvinfo = {}
3133 all_drbd_map = self.cfg.ComputeDRBDMap()
3135 feedback_fn("* Gathering disk information (%s nodes)" %
3136 len(self.my_node_names))
3137 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3140 feedback_fn("* Verifying configuration file consistency")
3142 # If not all nodes are being checked, we need to make sure the master node
3143 # and a non-checked vm_capable node are in the list.
3144 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3146 vf_nvinfo = all_nvinfo.copy()
3147 vf_node_info = list(self.my_node_info.values())
3148 additional_nodes = []
3149 if master_node not in self.my_node_info:
3150 additional_nodes.append(master_node)
3151 vf_node_info.append(self.all_node_info[master_node])
3152 # Add the first vm_capable node we find which is not included
3153 for node in absent_nodes:
3154 nodeinfo = self.all_node_info[node]
3155 if nodeinfo.vm_capable and not nodeinfo.offline:
3156 additional_nodes.append(node)
3157 vf_node_info.append(self.all_node_info[node])
3159 key = constants.NV_FILELIST
3160 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3161 {key: node_verify_param[key]},
3162 self.cfg.GetClusterName()))
3164 vf_nvinfo = all_nvinfo
3165 vf_node_info = self.my_node_info.values()
3167 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3169 feedback_fn("* Verifying node status")
3173 for node_i in node_data_list:
3175 nimg = node_image[node]
3179 feedback_fn("* Skipping offline node %s" % (node,))
3183 if node == master_node:
3185 elif node_i.master_candidate:
3186 ntype = "master candidate"
3187 elif node_i.drained:
3193 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3195 msg = all_nvinfo[node].fail_msg
3196 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3199 nimg.rpc_fail = True
3202 nresult = all_nvinfo[node].payload
3204 nimg.call_ok = self._VerifyNode(node_i, nresult)
3205 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3206 self._VerifyNodeNetwork(node_i, nresult)
3207 self._VerifyNodeUserScripts(node_i, nresult)
3208 self._VerifyOob(node_i, nresult)
3211 self._VerifyNodeLVM(node_i, nresult, vg_name)
3212 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3215 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3216 self._UpdateNodeInstances(node_i, nresult, nimg)
3217 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3218 self._UpdateNodeOS(node_i, nresult, nimg)
3220 if not nimg.os_fail:
3221 if refos_img is None:
3223 self._VerifyNodeOS(node_i, nimg, refos_img)
3224 self._VerifyNodeBridges(node_i, nresult, bridges)
3226 # Check whether all running instancies are primary for the node. (This
3227 # can no longer be done from _VerifyInstance below, since some of the
3228 # wrong instances could be from other node groups.)
3229 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3231 for inst in non_primary_inst:
3232 # FIXME: investigate best way to handle offline insts
3233 if inst.admin_state == constants.ADMINST_OFFLINE:
3235 feedback_fn("* Skipping offline instance %s" % inst.name)
3238 test = inst in self.all_inst_info
3239 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3240 "instance should not run on node %s", node_i.name)
3241 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3242 "node is running unknown instance %s", inst)
3244 for node, result in extra_lv_nvinfo.items():
3245 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3246 node_image[node], vg_name)
3248 feedback_fn("* Verifying instance status")
3249 for instance in self.my_inst_names:
3251 feedback_fn("* Verifying instance %s" % instance)
3252 inst_config = self.my_inst_info[instance]
3253 self._VerifyInstance(instance, inst_config, node_image,
3255 inst_nodes_offline = []
3257 pnode = inst_config.primary_node
3258 pnode_img = node_image[pnode]
3259 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3260 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3261 " primary node failed", instance)
3263 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3265 constants.CV_EINSTANCEBADNODE, instance,
3266 "instance is marked as running and lives on offline node %s",
3267 inst_config.primary_node)
3269 # If the instance is non-redundant we cannot survive losing its primary
3270 # node, so we are not N+1 compliant. On the other hand we have no disk
3271 # templates with more than one secondary so that situation is not well
3273 # FIXME: does not support file-backed instances
3274 if not inst_config.secondary_nodes:
3275 i_non_redundant.append(instance)
3277 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3278 constants.CV_EINSTANCELAYOUT,
3279 instance, "instance has multiple secondary nodes: %s",
3280 utils.CommaJoin(inst_config.secondary_nodes),
3281 code=self.ETYPE_WARNING)
3283 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3284 pnode = inst_config.primary_node
3285 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3286 instance_groups = {}
3288 for node in instance_nodes:
3289 instance_groups.setdefault(self.all_node_info[node].group,
3293 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3294 # Sort so that we always list the primary node first.
3295 for group, nodes in sorted(instance_groups.items(),
3296 key=lambda (_, nodes): pnode in nodes,
3299 self._ErrorIf(len(instance_groups) > 1,
3300 constants.CV_EINSTANCESPLITGROUPS,
3301 instance, "instance has primary and secondary nodes in"
3302 " different groups: %s", utils.CommaJoin(pretty_list),
3303 code=self.ETYPE_WARNING)
3305 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3306 i_non_a_balanced.append(instance)
3308 for snode in inst_config.secondary_nodes:
3309 s_img = node_image[snode]
3310 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3311 snode, "instance %s, connection to secondary node failed",
3315 inst_nodes_offline.append(snode)
3317 # warn that the instance lives on offline nodes
3318 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3319 "instance has offline secondary node(s) %s",
3320 utils.CommaJoin(inst_nodes_offline))
3321 # ... or ghost/non-vm_capable nodes
3322 for node in inst_config.all_nodes:
3323 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3324 instance, "instance lives on ghost node %s", node)
3325 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3326 instance, "instance lives on non-vm_capable node %s", node)
3328 feedback_fn("* Verifying orphan volumes")
3329 reserved = utils.FieldSet(*cluster.reserved_lvs)
3331 # We will get spurious "unknown volume" warnings if any node of this group
3332 # is secondary for an instance whose primary is in another group. To avoid
3333 # them, we find these instances and add their volumes to node_vol_should.
3334 for inst in self.all_inst_info.values():
3335 for secondary in inst.secondary_nodes:
3336 if (secondary in self.my_node_info
3337 and inst.name not in self.my_inst_info):
3338 inst.MapLVsByNode(node_vol_should)
3341 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3343 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3344 feedback_fn("* Verifying N+1 Memory redundancy")
3345 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3347 feedback_fn("* Other Notes")
3349 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3350 % len(i_non_redundant))
3352 if i_non_a_balanced:
3353 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3354 % len(i_non_a_balanced))
3357 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3360 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3363 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3367 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3368 """Analyze the post-hooks' result
3370 This method analyses the hook result, handles it, and sends some
3371 nicely-formatted feedback back to the user.
3373 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3374 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3375 @param hooks_results: the results of the multi-node hooks rpc call
3376 @param feedback_fn: function used send feedback back to the caller
3377 @param lu_result: previous Exec result
3378 @return: the new Exec result, based on the previous result
3382 # We only really run POST phase hooks, only for non-empty groups,
3383 # and are only interested in their results
3384 if not self.my_node_names:
3387 elif phase == constants.HOOKS_PHASE_POST:
3388 # Used to change hooks' output to proper indentation
3389 feedback_fn("* Hooks Results")
3390 assert hooks_results, "invalid result from hooks"
3392 for node_name in hooks_results:
3393 res = hooks_results[node_name]
3395 test = msg and not res.offline
3396 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3397 "Communication failure in hooks execution: %s", msg)
3398 if res.offline or msg:
3399 # No need to investigate payload if node is offline or gave
3402 for script, hkr, output in res.payload:
3403 test = hkr == constants.HKR_FAIL
3404 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3405 "Script %s failed, output:", script)
3407 output = self._HOOKS_INDENT_RE.sub(" ", output)
3408 feedback_fn("%s" % output)
3414 class LUClusterVerifyDisks(NoHooksLU):
3415 """Verifies the cluster disks status.
3420 def ExpandNames(self):
3421 self.share_locks = _ShareAll()
3422 self.needed_locks = {
3423 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3426 def Exec(self, feedback_fn):
3427 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3429 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3430 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3431 for group in group_names])
3434 class LUGroupVerifyDisks(NoHooksLU):
3435 """Verifies the status of all disks in a node group.
3440 def ExpandNames(self):
3441 # Raises errors.OpPrereqError on its own if group can't be found
3442 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3444 self.share_locks = _ShareAll()
3445 self.needed_locks = {
3446 locking.LEVEL_INSTANCE: [],
3447 locking.LEVEL_NODEGROUP: [],
3448 locking.LEVEL_NODE: [],
3451 def DeclareLocks(self, level):
3452 if level == locking.LEVEL_INSTANCE:
3453 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3455 # Lock instances optimistically, needs verification once node and group
3456 # locks have been acquired
3457 self.needed_locks[locking.LEVEL_INSTANCE] = \
3458 self.cfg.GetNodeGroupInstances(self.group_uuid)
3460 elif level == locking.LEVEL_NODEGROUP:
3461 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3463 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3464 set([self.group_uuid] +
3465 # Lock all groups used by instances optimistically; this requires
3466 # going via the node before it's locked, requiring verification
3469 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3470 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3472 elif level == locking.LEVEL_NODE:
3473 # This will only lock the nodes in the group to be verified which contain
3475 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3476 self._LockInstancesNodes()
3478 # Lock all nodes in group to be verified
3479 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3480 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3481 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3483 def CheckPrereq(self):
3484 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3485 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3486 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3488 assert self.group_uuid in owned_groups
3490 # Check if locked instances are still correct
3491 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3493 # Get instance information
3494 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3496 # Check if node groups for locked instances are still correct
3497 for (instance_name, inst) in self.instances.items():
3498 assert owned_nodes.issuperset(inst.all_nodes), \
3499 "Instance %s's nodes changed while we kept the lock" % instance_name
3501 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3504 assert self.group_uuid in inst_groups, \
3505 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3507 def Exec(self, feedback_fn):
3508 """Verify integrity of cluster disks.
3510 @rtype: tuple of three items
3511 @return: a tuple of (dict of node-to-node_error, list of instances
3512 which need activate-disks, dict of instance: (node, volume) for
3517 res_instances = set()
3520 nv_dict = _MapInstanceDisksToNodes([inst
3521 for inst in self.instances.values()
3522 if inst.admin_state == constants.ADMINST_UP])
3525 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3526 set(self.cfg.GetVmCapableNodeList()))
3528 node_lvs = self.rpc.call_lv_list(nodes, [])
3530 for (node, node_res) in node_lvs.items():
3531 if node_res.offline:
3534 msg = node_res.fail_msg
3536 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3537 res_nodes[node] = msg
3540 for lv_name, (_, _, lv_online) in node_res.payload.items():
3541 inst = nv_dict.pop((node, lv_name), None)
3542 if not (lv_online or inst is None):
3543 res_instances.add(inst)
3545 # any leftover items in nv_dict are missing LVs, let's arrange the data
3547 for key, inst in nv_dict.iteritems():
3548 res_missing.setdefault(inst, []).append(list(key))
3550 return (res_nodes, list(res_instances), res_missing)
3553 class LUClusterRepairDiskSizes(NoHooksLU):
3554 """Verifies the cluster disks sizes.
3559 def ExpandNames(self):
3560 if self.op.instances:
3561 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3562 self.needed_locks = {
3563 locking.LEVEL_NODE_RES: [],
3564 locking.LEVEL_INSTANCE: self.wanted_names,
3566 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3568 self.wanted_names = None
3569 self.needed_locks = {
3570 locking.LEVEL_NODE_RES: locking.ALL_SET,
3571 locking.LEVEL_INSTANCE: locking.ALL_SET,
3573 self.share_locks = {
3574 locking.LEVEL_NODE_RES: 1,
3575 locking.LEVEL_INSTANCE: 0,
3578 def DeclareLocks(self, level):
3579 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3580 self._LockInstancesNodes(primary_only=True, level=level)
3582 def CheckPrereq(self):
3583 """Check prerequisites.
3585 This only checks the optional instance list against the existing names.
3588 if self.wanted_names is None:
3589 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3591 self.wanted_instances = \
3592 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3594 def _EnsureChildSizes(self, disk):
3595 """Ensure children of the disk have the needed disk size.
3597 This is valid mainly for DRBD8 and fixes an issue where the
3598 children have smaller disk size.
3600 @param disk: an L{ganeti.objects.Disk} object
3603 if disk.dev_type == constants.LD_DRBD8:
3604 assert disk.children, "Empty children for DRBD8?"
3605 fchild = disk.children[0]
3606 mismatch = fchild.size < disk.size
3608 self.LogInfo("Child disk has size %d, parent %d, fixing",
3609 fchild.size, disk.size)
3610 fchild.size = disk.size
3612 # and we recurse on this child only, not on the metadev
3613 return self._EnsureChildSizes(fchild) or mismatch
3617 def Exec(self, feedback_fn):
3618 """Verify the size of cluster disks.
3621 # TODO: check child disks too
3622 # TODO: check differences in size between primary/secondary nodes
3624 for instance in self.wanted_instances:
3625 pnode = instance.primary_node
3626 if pnode not in per_node_disks:
3627 per_node_disks[pnode] = []
3628 for idx, disk in enumerate(instance.disks):
3629 per_node_disks[pnode].append((instance, idx, disk))
3631 assert not (frozenset(per_node_disks.keys()) -
3632 self.owned_locks(locking.LEVEL_NODE_RES)), \
3633 "Not owning correct locks"
3634 assert not self.owned_locks(locking.LEVEL_NODE)
3637 for node, dskl in per_node_disks.items():
3638 newl = [v[2].Copy() for v in dskl]
3640 self.cfg.SetDiskID(dsk, node)
3641 result = self.rpc.call_blockdev_getsize(node, newl)
3643 self.LogWarning("Failure in blockdev_getsize call to node"
3644 " %s, ignoring", node)
3646 if len(result.payload) != len(dskl):
3647 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3648 " result.payload=%s", node, len(dskl), result.payload)
3649 self.LogWarning("Invalid result from node %s, ignoring node results",
3652 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3654 self.LogWarning("Disk %d of instance %s did not return size"
3655 " information, ignoring", idx, instance.name)
3657 if not isinstance(size, (int, long)):
3658 self.LogWarning("Disk %d of instance %s did not return valid"
3659 " size information, ignoring", idx, instance.name)
3662 if size != disk.size:
3663 self.LogInfo("Disk %d of instance %s has mismatched size,"
3664 " correcting: recorded %d, actual %d", idx,
3665 instance.name, disk.size, size)
3667 self.cfg.Update(instance, feedback_fn)
3668 changed.append((instance.name, idx, size))
3669 if self._EnsureChildSizes(disk):
3670 self.cfg.Update(instance, feedback_fn)
3671 changed.append((instance.name, idx, disk.size))
3675 class LUClusterRename(LogicalUnit):
3676 """Rename the cluster.
3679 HPATH = "cluster-rename"
3680 HTYPE = constants.HTYPE_CLUSTER
3682 def BuildHooksEnv(self):
3687 "OP_TARGET": self.cfg.GetClusterName(),
3688 "NEW_NAME": self.op.name,
3691 def BuildHooksNodes(self):
3692 """Build hooks nodes.
3695 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3697 def CheckPrereq(self):
3698 """Verify that the passed name is a valid one.
3701 hostname = netutils.GetHostname(name=self.op.name,
3702 family=self.cfg.GetPrimaryIPFamily())
3704 new_name = hostname.name
3705 self.ip = new_ip = hostname.ip
3706 old_name = self.cfg.GetClusterName()
3707 old_ip = self.cfg.GetMasterIP()
3708 if new_name == old_name and new_ip == old_ip:
3709 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3710 " cluster has changed",
3712 if new_ip != old_ip:
3713 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3714 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3715 " reachable on the network" %
3716 new_ip, errors.ECODE_NOTUNIQUE)
3718 self.op.name = new_name
3720 def Exec(self, feedback_fn):
3721 """Rename the cluster.
3724 clustername = self.op.name
3727 # shutdown the master IP
3728 master_params = self.cfg.GetMasterNetworkParameters()
3729 ems = self.cfg.GetUseExternalMipScript()
3730 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3732 result.Raise("Could not disable the master role")
3735 cluster = self.cfg.GetClusterInfo()
3736 cluster.cluster_name = clustername
3737 cluster.master_ip = new_ip
3738 self.cfg.Update(cluster, feedback_fn)
3740 # update the known hosts file
3741 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3742 node_list = self.cfg.GetOnlineNodeList()
3744 node_list.remove(master_params.name)
3747 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3749 master_params.ip = new_ip
3750 result = self.rpc.call_node_activate_master_ip(master_params.name,
3752 msg = result.fail_msg
3754 self.LogWarning("Could not re-enable the master role on"
3755 " the master, please restart manually: %s", msg)
3760 def _ValidateNetmask(cfg, netmask):
3761 """Checks if a netmask is valid.
3763 @type cfg: L{config.ConfigWriter}
3764 @param cfg: The cluster configuration
3766 @param netmask: the netmask to be verified
3767 @raise errors.OpPrereqError: if the validation fails
3770 ip_family = cfg.GetPrimaryIPFamily()
3772 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3773 except errors.ProgrammerError:
3774 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3776 if not ipcls.ValidateNetmask(netmask):
3777 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3781 class LUClusterSetParams(LogicalUnit):
3782 """Change the parameters of the cluster.
3785 HPATH = "cluster-modify"
3786 HTYPE = constants.HTYPE_CLUSTER
3789 def CheckArguments(self):
3793 if self.op.uid_pool:
3794 uidpool.CheckUidPool(self.op.uid_pool)
3796 if self.op.add_uids:
3797 uidpool.CheckUidPool(self.op.add_uids)
3799 if self.op.remove_uids:
3800 uidpool.CheckUidPool(self.op.remove_uids)
3802 if self.op.master_netmask is not None:
3803 _ValidateNetmask(self.cfg, self.op.master_netmask)
3805 if self.op.diskparams:
3806 for dt_params in self.op.diskparams.values():
3807 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3809 def ExpandNames(self):
3810 # FIXME: in the future maybe other cluster params won't require checking on
3811 # all nodes to be modified.
3812 self.needed_locks = {
3813 locking.LEVEL_NODE: locking.ALL_SET,
3814 locking.LEVEL_INSTANCE: locking.ALL_SET,
3815 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3817 self.share_locks = {
3818 locking.LEVEL_NODE: 1,
3819 locking.LEVEL_INSTANCE: 1,
3820 locking.LEVEL_NODEGROUP: 1,
3823 def BuildHooksEnv(self):
3828 "OP_TARGET": self.cfg.GetClusterName(),
3829 "NEW_VG_NAME": self.op.vg_name,
3832 def BuildHooksNodes(self):
3833 """Build hooks nodes.
3836 mn = self.cfg.GetMasterNode()
3839 def CheckPrereq(self):
3840 """Check prerequisites.
3842 This checks whether the given params don't conflict and
3843 if the given volume group is valid.
3846 if self.op.vg_name is not None and not self.op.vg_name:
3847 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3848 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3849 " instances exist", errors.ECODE_INVAL)
3851 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3852 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3853 raise errors.OpPrereqError("Cannot disable drbd helper while"
3854 " drbd-based instances exist",
3857 node_list = self.owned_locks(locking.LEVEL_NODE)
3859 # if vg_name not None, checks given volume group on all nodes
3861 vglist = self.rpc.call_vg_list(node_list)
3862 for node in node_list:
3863 msg = vglist[node].fail_msg
3865 # ignoring down node
3866 self.LogWarning("Error while gathering data on node %s"
3867 " (ignoring node): %s", node, msg)
3869 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3871 constants.MIN_VG_SIZE)
3873 raise errors.OpPrereqError("Error on node '%s': %s" %
3874 (node, vgstatus), errors.ECODE_ENVIRON)
3876 if self.op.drbd_helper:
3877 # checks given drbd helper on all nodes
3878 helpers = self.rpc.call_drbd_helper(node_list)
3879 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3881 self.LogInfo("Not checking drbd helper on offline node %s", node)
3883 msg = helpers[node].fail_msg
3885 raise errors.OpPrereqError("Error checking drbd helper on node"
3886 " '%s': %s" % (node, msg),
3887 errors.ECODE_ENVIRON)
3888 node_helper = helpers[node].payload
3889 if node_helper != self.op.drbd_helper:
3890 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3891 (node, node_helper), errors.ECODE_ENVIRON)
3893 self.cluster = cluster = self.cfg.GetClusterInfo()
3894 # validate params changes
3895 if self.op.beparams:
3896 objects.UpgradeBeParams(self.op.beparams)
3897 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3898 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3900 if self.op.ndparams:
3901 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3902 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3904 # TODO: we need a more general way to handle resetting
3905 # cluster-level parameters to default values
3906 if self.new_ndparams["oob_program"] == "":
3907 self.new_ndparams["oob_program"] = \
3908 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3910 if self.op.hv_state:
3911 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3912 self.cluster.hv_state_static)
3913 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3914 for hv, values in new_hv_state.items())
3916 if self.op.disk_state:
3917 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3918 self.cluster.disk_state_static)
3919 self.new_disk_state = \
3920 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3921 for name, values in svalues.items()))
3922 for storage, svalues in new_disk_state.items())
3925 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3928 all_instances = self.cfg.GetAllInstancesInfo().values()
3930 for group in self.cfg.GetAllNodeGroupsInfo().values():
3931 instances = frozenset([inst for inst in all_instances
3932 if compat.any(node in group.members
3933 for node in inst.all_nodes)])
3934 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3935 new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3937 new_ipolicy, instances)
3939 violations.update(new)
3942 self.LogWarning("After the ipolicy change the following instances"
3943 " violate them: %s",
3944 utils.CommaJoin(violations))
3946 if self.op.nicparams:
3947 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3948 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3949 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3952 # check all instances for consistency
3953 for instance in self.cfg.GetAllInstancesInfo().values():
3954 for nic_idx, nic in enumerate(instance.nics):
3955 params_copy = copy.deepcopy(nic.nicparams)
3956 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3958 # check parameter syntax
3960 objects.NIC.CheckParameterSyntax(params_filled)
3961 except errors.ConfigurationError, err:
3962 nic_errors.append("Instance %s, nic/%d: %s" %
3963 (instance.name, nic_idx, err))
3965 # if we're moving instances to routed, check that they have an ip
3966 target_mode = params_filled[constants.NIC_MODE]
3967 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3968 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3969 " address" % (instance.name, nic_idx))
3971 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3972 "\n".join(nic_errors))
3974 # hypervisor list/parameters
3975 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3976 if self.op.hvparams:
3977 for hv_name, hv_dict in self.op.hvparams.items():
3978 if hv_name not in self.new_hvparams:
3979 self.new_hvparams[hv_name] = hv_dict
3981 self.new_hvparams[hv_name].update(hv_dict)
3983 # disk template parameters
3984 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3985 if self.op.diskparams:
3986 for dt_name, dt_params in self.op.diskparams.items():
3987 if dt_name not in self.op.diskparams:
3988 self.new_diskparams[dt_name] = dt_params
3990 self.new_diskparams[dt_name].update(dt_params)
3992 # os hypervisor parameters
3993 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3995 for os_name, hvs in self.op.os_hvp.items():
3996 if os_name not in self.new_os_hvp:
3997 self.new_os_hvp[os_name] = hvs
3999 for hv_name, hv_dict in hvs.items():
4000 if hv_name not in self.new_os_hvp[os_name]:
4001 self.new_os_hvp[os_name][hv_name] = hv_dict
4003 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4006 self.new_osp = objects.FillDict(cluster.osparams, {})
4007 if self.op.osparams:
4008 for os_name, osp in self.op.osparams.items():
4009 if os_name not in self.new_osp:
4010 self.new_osp[os_name] = {}
4012 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4015 if not self.new_osp[os_name]:
4016 # we removed all parameters
4017 del self.new_osp[os_name]
4019 # check the parameter validity (remote check)
4020 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4021 os_name, self.new_osp[os_name])
4023 # changes to the hypervisor list
4024 if self.op.enabled_hypervisors is not None:
4025 self.hv_list = self.op.enabled_hypervisors
4026 for hv in self.hv_list:
4027 # if the hypervisor doesn't already exist in the cluster
4028 # hvparams, we initialize it to empty, and then (in both
4029 # cases) we make sure to fill the defaults, as we might not
4030 # have a complete defaults list if the hypervisor wasn't
4032 if hv not in new_hvp:
4034 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4035 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4037 self.hv_list = cluster.enabled_hypervisors
4039 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4040 # either the enabled list has changed, or the parameters have, validate
4041 for hv_name, hv_params in self.new_hvparams.items():
4042 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4043 (self.op.enabled_hypervisors and
4044 hv_name in self.op.enabled_hypervisors)):
4045 # either this is a new hypervisor, or its parameters have changed
4046 hv_class = hypervisor.GetHypervisor(hv_name)
4047 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4048 hv_class.CheckParameterSyntax(hv_params)
4049 _CheckHVParams(self, node_list, hv_name, hv_params)
4052 # no need to check any newly-enabled hypervisors, since the
4053 # defaults have already been checked in the above code-block
4054 for os_name, os_hvp in self.new_os_hvp.items():
4055 for hv_name, hv_params in os_hvp.items():
4056 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4057 # we need to fill in the new os_hvp on top of the actual hv_p
4058 cluster_defaults = self.new_hvparams.get(hv_name, {})
4059 new_osp = objects.FillDict(cluster_defaults, hv_params)
4060 hv_class = hypervisor.GetHypervisor(hv_name)
4061 hv_class.CheckParameterSyntax(new_osp)
4062 _CheckHVParams(self, node_list, hv_name, new_osp)
4064 if self.op.default_iallocator:
4065 alloc_script = utils.FindFile(self.op.default_iallocator,
4066 constants.IALLOCATOR_SEARCH_PATH,
4068 if alloc_script is None:
4069 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4070 " specified" % self.op.default_iallocator,
4073 def Exec(self, feedback_fn):
4074 """Change the parameters of the cluster.
4077 if self.op.vg_name is not None:
4078 new_volume = self.op.vg_name
4081 if new_volume != self.cfg.GetVGName():
4082 self.cfg.SetVGName(new_volume)
4084 feedback_fn("Cluster LVM configuration already in desired"
4085 " state, not changing")
4086 if self.op.drbd_helper is not None:
4087 new_helper = self.op.drbd_helper
4090 if new_helper != self.cfg.GetDRBDHelper():
4091 self.cfg.SetDRBDHelper(new_helper)
4093 feedback_fn("Cluster DRBD helper already in desired state,"
4095 if self.op.hvparams:
4096 self.cluster.hvparams = self.new_hvparams
4098 self.cluster.os_hvp = self.new_os_hvp
4099 if self.op.enabled_hypervisors is not None:
4100 self.cluster.hvparams = self.new_hvparams
4101 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4102 if self.op.beparams:
4103 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4104 if self.op.nicparams:
4105 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4107 self.cluster.ipolicy = self.new_ipolicy
4108 if self.op.osparams:
4109 self.cluster.osparams = self.new_osp
4110 if self.op.ndparams:
4111 self.cluster.ndparams = self.new_ndparams
4112 if self.op.diskparams:
4113 self.cluster.diskparams = self.new_diskparams
4114 if self.op.hv_state:
4115 self.cluster.hv_state_static = self.new_hv_state
4116 if self.op.disk_state:
4117 self.cluster.disk_state_static = self.new_disk_state
4119 if self.op.candidate_pool_size is not None:
4120 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4121 # we need to update the pool size here, otherwise the save will fail
4122 _AdjustCandidatePool(self, [])
4124 if self.op.maintain_node_health is not None:
4125 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4126 feedback_fn("Note: CONFD was disabled at build time, node health"
4127 " maintenance is not useful (still enabling it)")
4128 self.cluster.maintain_node_health = self.op.maintain_node_health
4130 if self.op.prealloc_wipe_disks is not None:
4131 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4133 if self.op.add_uids is not None:
4134 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4136 if self.op.remove_uids is not None:
4137 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4139 if self.op.uid_pool is not None:
4140 self.cluster.uid_pool = self.op.uid_pool
4142 if self.op.default_iallocator is not None:
4143 self.cluster.default_iallocator = self.op.default_iallocator
4145 if self.op.reserved_lvs is not None:
4146 self.cluster.reserved_lvs = self.op.reserved_lvs
4148 if self.op.use_external_mip_script is not None:
4149 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4151 def helper_os(aname, mods, desc):
4153 lst = getattr(self.cluster, aname)
4154 for key, val in mods:
4155 if key == constants.DDM_ADD:
4157 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4160 elif key == constants.DDM_REMOVE:
4164 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4166 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4168 if self.op.hidden_os:
4169 helper_os("hidden_os", self.op.hidden_os, "hidden")
4171 if self.op.blacklisted_os:
4172 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4174 if self.op.master_netdev:
4175 master_params = self.cfg.GetMasterNetworkParameters()
4176 ems = self.cfg.GetUseExternalMipScript()
4177 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4178 self.cluster.master_netdev)
4179 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4181 result.Raise("Could not disable the master ip")
4182 feedback_fn("Changing master_netdev from %s to %s" %
4183 (master_params.netdev, self.op.master_netdev))
4184 self.cluster.master_netdev = self.op.master_netdev
4186 if self.op.master_netmask:
4187 master_params = self.cfg.GetMasterNetworkParameters()
4188 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4189 result = self.rpc.call_node_change_master_netmask(master_params.name,
4190 master_params.netmask,
4191 self.op.master_netmask,
4193 master_params.netdev)
4195 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4198 self.cluster.master_netmask = self.op.master_netmask
4200 self.cfg.Update(self.cluster, feedback_fn)
4202 if self.op.master_netdev:
4203 master_params = self.cfg.GetMasterNetworkParameters()
4204 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4205 self.op.master_netdev)
4206 ems = self.cfg.GetUseExternalMipScript()
4207 result = self.rpc.call_node_activate_master_ip(master_params.name,
4210 self.LogWarning("Could not re-enable the master ip on"
4211 " the master, please restart manually: %s",
4215 def _UploadHelper(lu, nodes, fname):
4216 """Helper for uploading a file and showing warnings.
4219 if os.path.exists(fname):
4220 result = lu.rpc.call_upload_file(nodes, fname)
4221 for to_node, to_result in result.items():
4222 msg = to_result.fail_msg
4224 msg = ("Copy of file %s to node %s failed: %s" %
4225 (fname, to_node, msg))
4226 lu.proc.LogWarning(msg)
4229 def _ComputeAncillaryFiles(cluster, redist):
4230 """Compute files external to Ganeti which need to be consistent.
4232 @type redist: boolean
4233 @param redist: Whether to include files which need to be redistributed
4236 # Compute files for all nodes
4238 constants.SSH_KNOWN_HOSTS_FILE,
4239 constants.CONFD_HMAC_KEY,
4240 constants.CLUSTER_DOMAIN_SECRET_FILE,
4241 constants.SPICE_CERT_FILE,
4242 constants.SPICE_CACERT_FILE,
4243 constants.RAPI_USERS_FILE,
4247 files_all.update(constants.ALL_CERT_FILES)
4248 files_all.update(ssconf.SimpleStore().GetFileList())
4250 # we need to ship at least the RAPI certificate
4251 files_all.add(constants.RAPI_CERT_FILE)
4253 if cluster.modify_etc_hosts:
4254 files_all.add(constants.ETC_HOSTS)
4256 # Files which are optional, these must:
4257 # - be present in one other category as well
4258 # - either exist or not exist on all nodes of that category (mc, vm all)
4260 constants.RAPI_USERS_FILE,
4263 # Files which should only be on master candidates
4267 files_mc.add(constants.CLUSTER_CONF_FILE)
4269 # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4271 files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4273 # Files which should only be on VM-capable nodes
4274 files_vm = set(filename
4275 for hv_name in cluster.enabled_hypervisors
4276 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4278 files_opt |= set(filename
4279 for hv_name in cluster.enabled_hypervisors
4280 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4282 # Filenames in each category must be unique
4283 all_files_set = files_all | files_mc | files_vm
4284 assert (len(all_files_set) ==
4285 sum(map(len, [files_all, files_mc, files_vm]))), \
4286 "Found file listed in more than one file list"
4288 # Optional files must be present in one other category
4289 assert all_files_set.issuperset(files_opt), \
4290 "Optional file not in a different required list"
4292 return (files_all, files_opt, files_mc, files_vm)
4295 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4296 """Distribute additional files which are part of the cluster configuration.
4298 ConfigWriter takes care of distributing the config and ssconf files, but
4299 there are more files which should be distributed to all nodes. This function
4300 makes sure those are copied.
4302 @param lu: calling logical unit
4303 @param additional_nodes: list of nodes not in the config to distribute to
4304 @type additional_vm: boolean
4305 @param additional_vm: whether the additional nodes are vm-capable or not
4308 # Gather target nodes
4309 cluster = lu.cfg.GetClusterInfo()
4310 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4312 online_nodes = lu.cfg.GetOnlineNodeList()
4313 vm_nodes = lu.cfg.GetVmCapableNodeList()
4315 if additional_nodes is not None:
4316 online_nodes.extend(additional_nodes)
4318 vm_nodes.extend(additional_nodes)
4320 # Never distribute to master node
4321 for nodelist in [online_nodes, vm_nodes]:
4322 if master_info.name in nodelist:
4323 nodelist.remove(master_info.name)
4326 (files_all, _, files_mc, files_vm) = \
4327 _ComputeAncillaryFiles(cluster, True)
4329 # Never re-distribute configuration file from here
4330 assert not (constants.CLUSTER_CONF_FILE in files_all or
4331 constants.CLUSTER_CONF_FILE in files_vm)
4332 assert not files_mc, "Master candidates not handled in this function"
4335 (online_nodes, files_all),
4336 (vm_nodes, files_vm),
4340 for (node_list, files) in filemap:
4342 _UploadHelper(lu, node_list, fname)
4345 class LUClusterRedistConf(NoHooksLU):
4346 """Force the redistribution of cluster configuration.
4348 This is a very simple LU.
4353 def ExpandNames(self):
4354 self.needed_locks = {
4355 locking.LEVEL_NODE: locking.ALL_SET,
4357 self.share_locks[locking.LEVEL_NODE] = 1
4359 def Exec(self, feedback_fn):
4360 """Redistribute the configuration.
4363 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4364 _RedistributeAncillaryFiles(self)
4367 class LUClusterActivateMasterIp(NoHooksLU):
4368 """Activate the master IP on the master node.
4371 def Exec(self, feedback_fn):
4372 """Activate the master IP.
4375 master_params = self.cfg.GetMasterNetworkParameters()
4376 ems = self.cfg.GetUseExternalMipScript()
4377 result = self.rpc.call_node_activate_master_ip(master_params.name,
4379 result.Raise("Could not activate the master IP")
4382 class LUClusterDeactivateMasterIp(NoHooksLU):
4383 """Deactivate the master IP on the master node.
4386 def Exec(self, feedback_fn):
4387 """Deactivate the master IP.
4390 master_params = self.cfg.GetMasterNetworkParameters()
4391 ems = self.cfg.GetUseExternalMipScript()
4392 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4394 result.Raise("Could not deactivate the master IP")
4397 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4398 """Sleep and poll for an instance's disk to sync.
4401 if not instance.disks or disks is not None and not disks:
4404 disks = _ExpandCheckDisks(instance, disks)
4407 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4409 node = instance.primary_node
4412 lu.cfg.SetDiskID(dev, node)
4414 # TODO: Convert to utils.Retry
4417 degr_retries = 10 # in seconds, as we sleep 1 second each time
4421 cumul_degraded = False
4422 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4423 msg = rstats.fail_msg
4425 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4428 raise errors.RemoteError("Can't contact node %s for mirror data,"
4429 " aborting." % node)
4432 rstats = rstats.payload
4434 for i, mstat in enumerate(rstats):
4436 lu.LogWarning("Can't compute data for node %s/%s",
4437 node, disks[i].iv_name)
4440 cumul_degraded = (cumul_degraded or
4441 (mstat.is_degraded and mstat.sync_percent is None))
4442 if mstat.sync_percent is not None:
4444 if mstat.estimated_time is not None:
4445 rem_time = ("%s remaining (estimated)" %
4446 utils.FormatSeconds(mstat.estimated_time))
4447 max_time = mstat.estimated_time
4449 rem_time = "no time estimate"
4450 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4451 (disks[i].iv_name, mstat.sync_percent, rem_time))
4453 # if we're done but degraded, let's do a few small retries, to
4454 # make sure we see a stable and not transient situation; therefore
4455 # we force restart of the loop
4456 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4457 logging.info("Degraded disks found, %d retries left", degr_retries)
4465 time.sleep(min(60, max_time))
4468 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4469 return not cumul_degraded
4472 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4473 """Check that mirrors are not degraded.
4475 The ldisk parameter, if True, will change the test from the
4476 is_degraded attribute (which represents overall non-ok status for
4477 the device(s)) to the ldisk (representing the local storage status).
4480 lu.cfg.SetDiskID(dev, node)
4484 if on_primary or dev.AssembleOnSecondary():
4485 rstats = lu.rpc.call_blockdev_find(node, dev)
4486 msg = rstats.fail_msg
4488 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4490 elif not rstats.payload:
4491 lu.LogWarning("Can't find disk on node %s", node)
4495 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4497 result = result and not rstats.payload.is_degraded
4500 for child in dev.children:
4501 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4506 class LUOobCommand(NoHooksLU):
4507 """Logical unit for OOB handling.
4511 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4513 def ExpandNames(self):
4514 """Gather locks we need.
4517 if self.op.node_names:
4518 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4519 lock_names = self.op.node_names
4521 lock_names = locking.ALL_SET
4523 self.needed_locks = {
4524 locking.LEVEL_NODE: lock_names,
4527 def CheckPrereq(self):
4528 """Check prerequisites.
4531 - the node exists in the configuration
4534 Any errors are signaled by raising errors.OpPrereqError.
4538 self.master_node = self.cfg.GetMasterNode()
4540 assert self.op.power_delay >= 0.0
4542 if self.op.node_names:
4543 if (self.op.command in self._SKIP_MASTER and
4544 self.master_node in self.op.node_names):
4545 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4546 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4548 if master_oob_handler:
4549 additional_text = ("run '%s %s %s' if you want to operate on the"
4550 " master regardless") % (master_oob_handler,
4554 additional_text = "it does not support out-of-band operations"
4556 raise errors.OpPrereqError(("Operating on the master node %s is not"
4557 " allowed for %s; %s") %
4558 (self.master_node, self.op.command,
4559 additional_text), errors.ECODE_INVAL)
4561 self.op.node_names = self.cfg.GetNodeList()
4562 if self.op.command in self._SKIP_MASTER:
4563 self.op.node_names.remove(self.master_node)
4565 if self.op.command in self._SKIP_MASTER:
4566 assert self.master_node not in self.op.node_names
4568 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4570 raise errors.OpPrereqError("Node %s not found" % node_name,
4573 self.nodes.append(node)
4575 if (not self.op.ignore_status and
4576 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4577 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4578 " not marked offline") % node_name,
4581 def Exec(self, feedback_fn):
4582 """Execute OOB and return result if we expect any.
4585 master_node = self.master_node
4588 for idx, node in enumerate(utils.NiceSort(self.nodes,
4589 key=lambda node: node.name)):
4590 node_entry = [(constants.RS_NORMAL, node.name)]
4591 ret.append(node_entry)
4593 oob_program = _SupportsOob(self.cfg, node)
4596 node_entry.append((constants.RS_UNAVAIL, None))
4599 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4600 self.op.command, oob_program, node.name)
4601 result = self.rpc.call_run_oob(master_node, oob_program,
4602 self.op.command, node.name,
4606 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4607 node.name, result.fail_msg)
4608 node_entry.append((constants.RS_NODATA, None))
4611 self._CheckPayload(result)
4612 except errors.OpExecError, err:
4613 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4615 node_entry.append((constants.RS_NODATA, None))
4617 if self.op.command == constants.OOB_HEALTH:
4618 # For health we should log important events
4619 for item, status in result.payload:
4620 if status in [constants.OOB_STATUS_WARNING,
4621 constants.OOB_STATUS_CRITICAL]:
4622 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4623 item, node.name, status)
4625 if self.op.command == constants.OOB_POWER_ON:
4627 elif self.op.command == constants.OOB_POWER_OFF:
4628 node.powered = False
4629 elif self.op.command == constants.OOB_POWER_STATUS:
4630 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4631 if powered != node.powered:
4632 logging.warning(("Recorded power state (%s) of node '%s' does not"
4633 " match actual power state (%s)"), node.powered,
4636 # For configuration changing commands we should update the node
4637 if self.op.command in (constants.OOB_POWER_ON,
4638 constants.OOB_POWER_OFF):
4639 self.cfg.Update(node, feedback_fn)
4641 node_entry.append((constants.RS_NORMAL, result.payload))
4643 if (self.op.command == constants.OOB_POWER_ON and
4644 idx < len(self.nodes) - 1):
4645 time.sleep(self.op.power_delay)
4649 def _CheckPayload(self, result):
4650 """Checks if the payload is valid.
4652 @param result: RPC result
4653 @raises errors.OpExecError: If payload is not valid
4657 if self.op.command == constants.OOB_HEALTH:
4658 if not isinstance(result.payload, list):
4659 errs.append("command 'health' is expected to return a list but got %s" %
4660 type(result.payload))
4662 for item, status in result.payload:
4663 if status not in constants.OOB_STATUSES:
4664 errs.append("health item '%s' has invalid status '%s'" %
4667 if self.op.command == constants.OOB_POWER_STATUS:
4668 if not isinstance(result.payload, dict):
4669 errs.append("power-status is expected to return a dict but got %s" %
4670 type(result.payload))
4672 if self.op.command in [
4673 constants.OOB_POWER_ON,
4674 constants.OOB_POWER_OFF,
4675 constants.OOB_POWER_CYCLE,
4677 if result.payload is not None:
4678 errs.append("%s is expected to not return payload but got '%s'" %
4679 (self.op.command, result.payload))
4682 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4683 utils.CommaJoin(errs))
4686 class _OsQuery(_QueryBase):
4687 FIELDS = query.OS_FIELDS
4689 def ExpandNames(self, lu):
4690 # Lock all nodes in shared mode
4691 # Temporary removal of locks, should be reverted later
4692 # TODO: reintroduce locks when they are lighter-weight
4693 lu.needed_locks = {}
4694 #self.share_locks[locking.LEVEL_NODE] = 1
4695 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4697 # The following variables interact with _QueryBase._GetNames
4699 self.wanted = self.names
4701 self.wanted = locking.ALL_SET
4703 self.do_locking = self.use_locking
4705 def DeclareLocks(self, lu, level):
4709 def _DiagnoseByOS(rlist):
4710 """Remaps a per-node return list into an a per-os per-node dictionary
4712 @param rlist: a map with node names as keys and OS objects as values
4715 @return: a dictionary with osnames as keys and as value another
4716 map, with nodes as keys and tuples of (path, status, diagnose,
4717 variants, parameters, api_versions) as values, eg::
4719 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4720 (/srv/..., False, "invalid api")],
4721 "node2": [(/srv/..., True, "", [], [])]}
4726 # we build here the list of nodes that didn't fail the RPC (at RPC
4727 # level), so that nodes with a non-responding node daemon don't
4728 # make all OSes invalid
4729 good_nodes = [node_name for node_name in rlist
4730 if not rlist[node_name].fail_msg]
4731 for node_name, nr in rlist.items():
4732 if nr.fail_msg or not nr.payload:
4734 for (name, path, status, diagnose, variants,
4735 params, api_versions) in nr.payload:
4736 if name not in all_os:
4737 # build a list of nodes for this os containing empty lists
4738 # for each node in node_list
4740 for nname in good_nodes:
4741 all_os[name][nname] = []
4742 # convert params from [name, help] to (name, help)
4743 params = [tuple(v) for v in params]
4744 all_os[name][node_name].append((path, status, diagnose,
4745 variants, params, api_versions))
4748 def _GetQueryData(self, lu):
4749 """Computes the list of nodes and their attributes.
4752 # Locking is not used
4753 assert not (compat.any(lu.glm.is_owned(level)
4754 for level in locking.LEVELS
4755 if level != locking.LEVEL_CLUSTER) or
4756 self.do_locking or self.use_locking)
4758 valid_nodes = [node.name
4759 for node in lu.cfg.GetAllNodesInfo().values()
4760 if not node.offline and node.vm_capable]
4761 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4762 cluster = lu.cfg.GetClusterInfo()
4766 for (os_name, os_data) in pol.items():
4767 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4768 hidden=(os_name in cluster.hidden_os),
4769 blacklisted=(os_name in cluster.blacklisted_os))
4773 api_versions = set()
4775 for idx, osl in enumerate(os_data.values()):
4776 info.valid = bool(info.valid and osl and osl[0][1])
4780 (node_variants, node_params, node_api) = osl[0][3:6]
4783 variants.update(node_variants)
4784 parameters.update(node_params)
4785 api_versions.update(node_api)
4787 # Filter out inconsistent values
4788 variants.intersection_update(node_variants)
4789 parameters.intersection_update(node_params)
4790 api_versions.intersection_update(node_api)
4792 info.variants = list(variants)
4793 info.parameters = list(parameters)
4794 info.api_versions = list(api_versions)
4796 data[os_name] = info
4798 # Prepare data in requested order
4799 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4803 class LUOsDiagnose(NoHooksLU):
4804 """Logical unit for OS diagnose/query.
4810 def _BuildFilter(fields, names):
4811 """Builds a filter for querying OSes.
4814 name_filter = qlang.MakeSimpleFilter("name", names)
4816 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4817 # respective field is not requested
4818 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4819 for fname in ["hidden", "blacklisted"]
4820 if fname not in fields]
4821 if "valid" not in fields:
4822 status_filter.append([qlang.OP_TRUE, "valid"])
4825 status_filter.insert(0, qlang.OP_AND)
4827 status_filter = None
4829 if name_filter and status_filter:
4830 return [qlang.OP_AND, name_filter, status_filter]
4834 return status_filter
4836 def CheckArguments(self):
4837 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4838 self.op.output_fields, False)
4840 def ExpandNames(self):
4841 self.oq.ExpandNames(self)
4843 def Exec(self, feedback_fn):
4844 return self.oq.OldStyleQuery(self)
4847 class LUNodeRemove(LogicalUnit):
4848 """Logical unit for removing a node.
4851 HPATH = "node-remove"
4852 HTYPE = constants.HTYPE_NODE
4854 def BuildHooksEnv(self):
4859 "OP_TARGET": self.op.node_name,
4860 "NODE_NAME": self.op.node_name,
4863 def BuildHooksNodes(self):
4864 """Build hooks nodes.
4866 This doesn't run on the target node in the pre phase as a failed
4867 node would then be impossible to remove.
4870 all_nodes = self.cfg.GetNodeList()
4872 all_nodes.remove(self.op.node_name)
4875 return (all_nodes, all_nodes)
4877 def CheckPrereq(self):
4878 """Check prerequisites.
4881 - the node exists in the configuration
4882 - it does not have primary or secondary instances
4883 - it's not the master
4885 Any errors are signaled by raising errors.OpPrereqError.
4888 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4889 node = self.cfg.GetNodeInfo(self.op.node_name)
4890 assert node is not None
4892 masternode = self.cfg.GetMasterNode()
4893 if node.name == masternode:
4894 raise errors.OpPrereqError("Node is the master node, failover to another"
4895 " node is required", errors.ECODE_INVAL)
4897 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4898 if node.name in instance.all_nodes:
4899 raise errors.OpPrereqError("Instance %s is still running on the node,"
4900 " please remove first" % instance_name,
4902 self.op.node_name = node.name
4905 def Exec(self, feedback_fn):
4906 """Removes the node from the cluster.
4910 logging.info("Stopping the node daemon and removing configs from node %s",
4913 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4915 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4918 # Promote nodes to master candidate as needed
4919 _AdjustCandidatePool(self, exceptions=[node.name])
4920 self.context.RemoveNode(node.name)
4922 # Run post hooks on the node before it's removed
4923 _RunPostHook(self, node.name)
4925 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4926 msg = result.fail_msg
4928 self.LogWarning("Errors encountered on the remote node while leaving"
4929 " the cluster: %s", msg)
4931 # Remove node from our /etc/hosts
4932 if self.cfg.GetClusterInfo().modify_etc_hosts:
4933 master_node = self.cfg.GetMasterNode()
4934 result = self.rpc.call_etc_hosts_modify(master_node,
4935 constants.ETC_HOSTS_REMOVE,
4937 result.Raise("Can't update hosts file with new host data")
4938 _RedistributeAncillaryFiles(self)
4941 class _NodeQuery(_QueryBase):
4942 FIELDS = query.NODE_FIELDS
4944 def ExpandNames(self, lu):
4945 lu.needed_locks = {}
4946 lu.share_locks = _ShareAll()
4949 self.wanted = _GetWantedNodes(lu, self.names)
4951 self.wanted = locking.ALL_SET
4953 self.do_locking = (self.use_locking and
4954 query.NQ_LIVE in self.requested_data)
4957 # If any non-static field is requested we need to lock the nodes
4958 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4960 def DeclareLocks(self, lu, level):
4963 def _GetQueryData(self, lu):
4964 """Computes the list of nodes and their attributes.
4967 all_info = lu.cfg.GetAllNodesInfo()
4969 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4971 # Gather data as requested
4972 if query.NQ_LIVE in self.requested_data:
4973 # filter out non-vm_capable nodes
4974 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4976 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4977 [lu.cfg.GetHypervisorType()])
4978 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4979 for (name, nresult) in node_data.items()
4980 if not nresult.fail_msg and nresult.payload)
4984 if query.NQ_INST in self.requested_data:
4985 node_to_primary = dict([(name, set()) for name in nodenames])
4986 node_to_secondary = dict([(name, set()) for name in nodenames])
4988 inst_data = lu.cfg.GetAllInstancesInfo()
4990 for inst in inst_data.values():
4991 if inst.primary_node in node_to_primary:
4992 node_to_primary[inst.primary_node].add(inst.name)
4993 for secnode in inst.secondary_nodes:
4994 if secnode in node_to_secondary:
4995 node_to_secondary[secnode].add(inst.name)
4997 node_to_primary = None
4998 node_to_secondary = None
5000 if query.NQ_OOB in self.requested_data:
5001 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5002 for name, node in all_info.iteritems())
5006 if query.NQ_GROUP in self.requested_data:
5007 groups = lu.cfg.GetAllNodeGroupsInfo()
5011 return query.NodeQueryData([all_info[name] for name in nodenames],
5012 live_data, lu.cfg.GetMasterNode(),
5013 node_to_primary, node_to_secondary, groups,
5014 oob_support, lu.cfg.GetClusterInfo())
5017 class LUNodeQuery(NoHooksLU):
5018 """Logical unit for querying nodes.
5021 # pylint: disable=W0142
5024 def CheckArguments(self):
5025 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5026 self.op.output_fields, self.op.use_locking)
5028 def ExpandNames(self):
5029 self.nq.ExpandNames(self)
5031 def DeclareLocks(self, level):
5032 self.nq.DeclareLocks(self, level)
5034 def Exec(self, feedback_fn):
5035 return self.nq.OldStyleQuery(self)
5038 class LUNodeQueryvols(NoHooksLU):
5039 """Logical unit for getting volumes on node(s).
5043 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5044 _FIELDS_STATIC = utils.FieldSet("node")
5046 def CheckArguments(self):
5047 _CheckOutputFields(static=self._FIELDS_STATIC,
5048 dynamic=self._FIELDS_DYNAMIC,
5049 selected=self.op.output_fields)
5051 def ExpandNames(self):
5052 self.share_locks = _ShareAll()
5053 self.needed_locks = {}
5055 if not self.op.nodes:
5056 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5058 self.needed_locks[locking.LEVEL_NODE] = \
5059 _GetWantedNodes(self, self.op.nodes)
5061 def Exec(self, feedback_fn):
5062 """Computes the list of nodes and their attributes.
5065 nodenames = self.owned_locks(locking.LEVEL_NODE)
5066 volumes = self.rpc.call_node_volumes(nodenames)
5068 ilist = self.cfg.GetAllInstancesInfo()
5069 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5072 for node in nodenames:
5073 nresult = volumes[node]
5076 msg = nresult.fail_msg
5078 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5081 node_vols = sorted(nresult.payload,
5082 key=operator.itemgetter("dev"))
5084 for vol in node_vols:
5086 for field in self.op.output_fields:
5089 elif field == "phys":
5093 elif field == "name":
5095 elif field == "size":
5096 val = int(float(vol["size"]))
5097 elif field == "instance":
5098 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5100 raise errors.ParameterError(field)
5101 node_output.append(str(val))
5103 output.append(node_output)
5108 class LUNodeQueryStorage(NoHooksLU):
5109 """Logical unit for getting information on storage units on node(s).
5112 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5115 def CheckArguments(self):
5116 _CheckOutputFields(static=self._FIELDS_STATIC,
5117 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5118 selected=self.op.output_fields)
5120 def ExpandNames(self):
5121 self.share_locks = _ShareAll()
5122 self.needed_locks = {}
5125 self.needed_locks[locking.LEVEL_NODE] = \
5126 _GetWantedNodes(self, self.op.nodes)
5128 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5130 def Exec(self, feedback_fn):
5131 """Computes the list of nodes and their attributes.
5134 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5136 # Always get name to sort by
5137 if constants.SF_NAME in self.op.output_fields:
5138 fields = self.op.output_fields[:]
5140 fields = [constants.SF_NAME] + self.op.output_fields
5142 # Never ask for node or type as it's only known to the LU
5143 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5144 while extra in fields:
5145 fields.remove(extra)
5147 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5148 name_idx = field_idx[constants.SF_NAME]
5150 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5151 data = self.rpc.call_storage_list(self.nodes,
5152 self.op.storage_type, st_args,
5153 self.op.name, fields)
5157 for node in utils.NiceSort(self.nodes):
5158 nresult = data[node]
5162 msg = nresult.fail_msg
5164 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5167 rows = dict([(row[name_idx], row) for row in nresult.payload])
5169 for name in utils.NiceSort(rows.keys()):
5174 for field in self.op.output_fields:
5175 if field == constants.SF_NODE:
5177 elif field == constants.SF_TYPE:
5178 val = self.op.storage_type
5179 elif field in field_idx:
5180 val = row[field_idx[field]]
5182 raise errors.ParameterError(field)
5191 class _InstanceQuery(_QueryBase):
5192 FIELDS = query.INSTANCE_FIELDS
5194 def ExpandNames(self, lu):
5195 lu.needed_locks = {}
5196 lu.share_locks = _ShareAll()
5199 self.wanted = _GetWantedInstances(lu, self.names)
5201 self.wanted = locking.ALL_SET
5203 self.do_locking = (self.use_locking and
5204 query.IQ_LIVE in self.requested_data)
5206 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5207 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5208 lu.needed_locks[locking.LEVEL_NODE] = []
5209 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5211 self.do_grouplocks = (self.do_locking and
5212 query.IQ_NODES in self.requested_data)
5214 def DeclareLocks(self, lu, level):
5216 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5217 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5219 # Lock all groups used by instances optimistically; this requires going
5220 # via the node before it's locked, requiring verification later on
5221 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5223 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5224 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5225 elif level == locking.LEVEL_NODE:
5226 lu._LockInstancesNodes() # pylint: disable=W0212
5229 def _CheckGroupLocks(lu):
5230 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5231 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5233 # Check if node groups for locked instances are still correct
5234 for instance_name in owned_instances:
5235 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5237 def _GetQueryData(self, lu):
5238 """Computes the list of instances and their attributes.
5241 if self.do_grouplocks:
5242 self._CheckGroupLocks(lu)
5244 cluster = lu.cfg.GetClusterInfo()
5245 all_info = lu.cfg.GetAllInstancesInfo()
5247 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5249 instance_list = [all_info[name] for name in instance_names]
5250 nodes = frozenset(itertools.chain(*(inst.all_nodes
5251 for inst in instance_list)))
5252 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5255 wrongnode_inst = set()
5257 # Gather data as requested
5258 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5260 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5262 result = node_data[name]
5264 # offline nodes will be in both lists
5265 assert result.fail_msg
5266 offline_nodes.append(name)
5268 bad_nodes.append(name)
5269 elif result.payload:
5270 for inst in result.payload:
5271 if inst in all_info:
5272 if all_info[inst].primary_node == name:
5273 live_data.update(result.payload)
5275 wrongnode_inst.add(inst)
5277 # orphan instance; we don't list it here as we don't
5278 # handle this case yet in the output of instance listing
5279 logging.warning("Orphan instance '%s' found on node %s",
5281 # else no instance is alive
5285 if query.IQ_DISKUSAGE in self.requested_data:
5286 disk_usage = dict((inst.name,
5287 _ComputeDiskSize(inst.disk_template,
5288 [{constants.IDISK_SIZE: disk.size}
5289 for disk in inst.disks]))
5290 for inst in instance_list)
5294 if query.IQ_CONSOLE in self.requested_data:
5296 for inst in instance_list:
5297 if inst.name in live_data:
5298 # Instance is running
5299 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5301 consinfo[inst.name] = None
5302 assert set(consinfo.keys()) == set(instance_names)
5306 if query.IQ_NODES in self.requested_data:
5307 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5309 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5310 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5311 for uuid in set(map(operator.attrgetter("group"),
5317 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5318 disk_usage, offline_nodes, bad_nodes,
5319 live_data, wrongnode_inst, consinfo,
5323 class LUQuery(NoHooksLU):
5324 """Query for resources/items of a certain kind.
5327 # pylint: disable=W0142
5330 def CheckArguments(self):
5331 qcls = _GetQueryImplementation(self.op.what)
5333 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5335 def ExpandNames(self):
5336 self.impl.ExpandNames(self)
5338 def DeclareLocks(self, level):
5339 self.impl.DeclareLocks(self, level)
5341 def Exec(self, feedback_fn):
5342 return self.impl.NewStyleQuery(self)
5345 class LUQueryFields(NoHooksLU):
5346 """Query for resources/items of a certain kind.
5349 # pylint: disable=W0142
5352 def CheckArguments(self):
5353 self.qcls = _GetQueryImplementation(self.op.what)
5355 def ExpandNames(self):
5356 self.needed_locks = {}
5358 def Exec(self, feedback_fn):
5359 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5362 class LUNodeModifyStorage(NoHooksLU):
5363 """Logical unit for modifying a storage volume on a node.
5368 def CheckArguments(self):
5369 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5371 storage_type = self.op.storage_type
5374 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5376 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5377 " modified" % storage_type,
5380 diff = set(self.op.changes.keys()) - modifiable
5382 raise errors.OpPrereqError("The following fields can not be modified for"
5383 " storage units of type '%s': %r" %
5384 (storage_type, list(diff)),
5387 def ExpandNames(self):
5388 self.needed_locks = {
5389 locking.LEVEL_NODE: self.op.node_name,
5392 def Exec(self, feedback_fn):
5393 """Computes the list of nodes and their attributes.
5396 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5397 result = self.rpc.call_storage_modify(self.op.node_name,
5398 self.op.storage_type, st_args,
5399 self.op.name, self.op.changes)
5400 result.Raise("Failed to modify storage unit '%s' on %s" %
5401 (self.op.name, self.op.node_name))
5404 class LUNodeAdd(LogicalUnit):
5405 """Logical unit for adding node to the cluster.
5409 HTYPE = constants.HTYPE_NODE
5410 _NFLAGS = ["master_capable", "vm_capable"]
5412 def CheckArguments(self):
5413 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5414 # validate/normalize the node name
5415 self.hostname = netutils.GetHostname(name=self.op.node_name,
5416 family=self.primary_ip_family)
5417 self.op.node_name = self.hostname.name
5419 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5420 raise errors.OpPrereqError("Cannot readd the master node",
5423 if self.op.readd and self.op.group:
5424 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5425 " being readded", errors.ECODE_INVAL)
5427 def BuildHooksEnv(self):
5430 This will run on all nodes before, and on all nodes + the new node after.
5434 "OP_TARGET": self.op.node_name,
5435 "NODE_NAME": self.op.node_name,
5436 "NODE_PIP": self.op.primary_ip,
5437 "NODE_SIP": self.op.secondary_ip,
5438 "MASTER_CAPABLE": str(self.op.master_capable),
5439 "VM_CAPABLE": str(self.op.vm_capable),
5442 def BuildHooksNodes(self):
5443 """Build hooks nodes.
5446 # Exclude added node
5447 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5448 post_nodes = pre_nodes + [self.op.node_name, ]
5450 return (pre_nodes, post_nodes)
5452 def CheckPrereq(self):
5453 """Check prerequisites.
5456 - the new node is not already in the config
5458 - its parameters (single/dual homed) matches the cluster
5460 Any errors are signaled by raising errors.OpPrereqError.
5464 hostname = self.hostname
5465 node = hostname.name
5466 primary_ip = self.op.primary_ip = hostname.ip
5467 if self.op.secondary_ip is None:
5468 if self.primary_ip_family == netutils.IP6Address.family:
5469 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5470 " IPv4 address must be given as secondary",
5472 self.op.secondary_ip = primary_ip
5474 secondary_ip = self.op.secondary_ip
5475 if not netutils.IP4Address.IsValid(secondary_ip):
5476 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5477 " address" % secondary_ip, errors.ECODE_INVAL)
5479 node_list = cfg.GetNodeList()
5480 if not self.op.readd and node in node_list:
5481 raise errors.OpPrereqError("Node %s is already in the configuration" %
5482 node, errors.ECODE_EXISTS)
5483 elif self.op.readd and node not in node_list:
5484 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5487 self.changed_primary_ip = False
5489 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5490 if self.op.readd and node == existing_node_name:
5491 if existing_node.secondary_ip != secondary_ip:
5492 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5493 " address configuration as before",
5495 if existing_node.primary_ip != primary_ip:
5496 self.changed_primary_ip = True
5500 if (existing_node.primary_ip == primary_ip or
5501 existing_node.secondary_ip == primary_ip or
5502 existing_node.primary_ip == secondary_ip or
5503 existing_node.secondary_ip == secondary_ip):
5504 raise errors.OpPrereqError("New node ip address(es) conflict with"
5505 " existing node %s" % existing_node.name,
5506 errors.ECODE_NOTUNIQUE)
5508 # After this 'if' block, None is no longer a valid value for the
5509 # _capable op attributes
5511 old_node = self.cfg.GetNodeInfo(node)
5512 assert old_node is not None, "Can't retrieve locked node %s" % node
5513 for attr in self._NFLAGS:
5514 if getattr(self.op, attr) is None:
5515 setattr(self.op, attr, getattr(old_node, attr))
5517 for attr in self._NFLAGS:
5518 if getattr(self.op, attr) is None:
5519 setattr(self.op, attr, True)
5521 if self.op.readd and not self.op.vm_capable:
5522 pri, sec = cfg.GetNodeInstances(node)
5524 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5525 " flag set to false, but it already holds"
5526 " instances" % node,
5529 # check that the type of the node (single versus dual homed) is the
5530 # same as for the master
5531 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5532 master_singlehomed = myself.secondary_ip == myself.primary_ip
5533 newbie_singlehomed = secondary_ip == primary_ip
5534 if master_singlehomed != newbie_singlehomed:
5535 if master_singlehomed:
5536 raise errors.OpPrereqError("The master has no secondary ip but the"
5537 " new node has one",
5540 raise errors.OpPrereqError("The master has a secondary ip but the"
5541 " new node doesn't have one",
5544 # checks reachability
5545 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5546 raise errors.OpPrereqError("Node not reachable by ping",
5547 errors.ECODE_ENVIRON)
5549 if not newbie_singlehomed:
5550 # check reachability from my secondary ip to newbie's secondary ip
5551 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5552 source=myself.secondary_ip):
5553 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5554 " based ping to node daemon port",
5555 errors.ECODE_ENVIRON)
5562 if self.op.master_capable:
5563 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5565 self.master_candidate = False
5568 self.new_node = old_node
5570 node_group = cfg.LookupNodeGroup(self.op.group)
5571 self.new_node = objects.Node(name=node,
5572 primary_ip=primary_ip,
5573 secondary_ip=secondary_ip,
5574 master_candidate=self.master_candidate,
5575 offline=False, drained=False,
5578 if self.op.ndparams:
5579 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5581 if self.op.hv_state:
5582 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5584 if self.op.disk_state:
5585 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5587 def Exec(self, feedback_fn):
5588 """Adds the new node to the cluster.
5591 new_node = self.new_node
5592 node = new_node.name
5594 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5597 # We adding a new node so we assume it's powered
5598 new_node.powered = True
5600 # for re-adds, reset the offline/drained/master-candidate flags;
5601 # we need to reset here, otherwise offline would prevent RPC calls
5602 # later in the procedure; this also means that if the re-add
5603 # fails, we are left with a non-offlined, broken node
5605 new_node.drained = new_node.offline = False # pylint: disable=W0201
5606 self.LogInfo("Readding a node, the offline/drained flags were reset")
5607 # if we demote the node, we do cleanup later in the procedure
5608 new_node.master_candidate = self.master_candidate
5609 if self.changed_primary_ip:
5610 new_node.primary_ip = self.op.primary_ip
5612 # copy the master/vm_capable flags
5613 for attr in self._NFLAGS:
5614 setattr(new_node, attr, getattr(self.op, attr))
5616 # notify the user about any possible mc promotion
5617 if new_node.master_candidate:
5618 self.LogInfo("Node will be a master candidate")
5620 if self.op.ndparams:
5621 new_node.ndparams = self.op.ndparams
5623 new_node.ndparams = {}
5625 if self.op.hv_state:
5626 new_node.hv_state_static = self.new_hv_state
5628 if self.op.disk_state:
5629 new_node.disk_state_static = self.new_disk_state
5631 # check connectivity
5632 result = self.rpc.call_version([node])[node]
5633 result.Raise("Can't get version information from node %s" % node)
5634 if constants.PROTOCOL_VERSION == result.payload:
5635 logging.info("Communication to node %s fine, sw version %s match",
5636 node, result.payload)
5638 raise errors.OpExecError("Version mismatch master version %s,"
5639 " node version %s" %
5640 (constants.PROTOCOL_VERSION, result.payload))
5642 # Add node to our /etc/hosts, and add key to known_hosts
5643 if self.cfg.GetClusterInfo().modify_etc_hosts:
5644 master_node = self.cfg.GetMasterNode()
5645 result = self.rpc.call_etc_hosts_modify(master_node,
5646 constants.ETC_HOSTS_ADD,
5649 result.Raise("Can't update hosts file with new host data")
5651 if new_node.secondary_ip != new_node.primary_ip:
5652 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5655 node_verify_list = [self.cfg.GetMasterNode()]
5656 node_verify_param = {
5657 constants.NV_NODELIST: ([node], {}),
5658 # TODO: do a node-net-test as well?
5661 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5662 self.cfg.GetClusterName())
5663 for verifier in node_verify_list:
5664 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5665 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5667 for failed in nl_payload:
5668 feedback_fn("ssh/hostname verification failed"
5669 " (checking from %s): %s" %
5670 (verifier, nl_payload[failed]))
5671 raise errors.OpExecError("ssh/hostname verification failed")
5674 _RedistributeAncillaryFiles(self)
5675 self.context.ReaddNode(new_node)
5676 # make sure we redistribute the config
5677 self.cfg.Update(new_node, feedback_fn)
5678 # and make sure the new node will not have old files around
5679 if not new_node.master_candidate:
5680 result = self.rpc.call_node_demote_from_mc(new_node.name)
5681 msg = result.fail_msg
5683 self.LogWarning("Node failed to demote itself from master"
5684 " candidate status: %s" % msg)
5686 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5687 additional_vm=self.op.vm_capable)
5688 self.context.AddNode(new_node, self.proc.GetECId())
5691 class LUNodeSetParams(LogicalUnit):
5692 """Modifies the parameters of a node.
5694 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5695 to the node role (as _ROLE_*)
5696 @cvar _R2F: a dictionary from node role to tuples of flags
5697 @cvar _FLAGS: a list of attribute names corresponding to the flags
5700 HPATH = "node-modify"
5701 HTYPE = constants.HTYPE_NODE
5703 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5705 (True, False, False): _ROLE_CANDIDATE,
5706 (False, True, False): _ROLE_DRAINED,
5707 (False, False, True): _ROLE_OFFLINE,
5708 (False, False, False): _ROLE_REGULAR,
5710 _R2F = dict((v, k) for k, v in _F2R.items())
5711 _FLAGS = ["master_candidate", "drained", "offline"]
5713 def CheckArguments(self):
5714 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5715 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5716 self.op.master_capable, self.op.vm_capable,
5717 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5719 if all_mods.count(None) == len(all_mods):
5720 raise errors.OpPrereqError("Please pass at least one modification",
5722 if all_mods.count(True) > 1:
5723 raise errors.OpPrereqError("Can't set the node into more than one"
5724 " state at the same time",
5727 # Boolean value that tells us whether we might be demoting from MC
5728 self.might_demote = (self.op.master_candidate == False or
5729 self.op.offline == True or
5730 self.op.drained == True or
5731 self.op.master_capable == False)
5733 if self.op.secondary_ip:
5734 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5735 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5736 " address" % self.op.secondary_ip,
5739 self.lock_all = self.op.auto_promote and self.might_demote
5740 self.lock_instances = self.op.secondary_ip is not None
5742 def _InstanceFilter(self, instance):
5743 """Filter for getting affected instances.
5746 return (instance.disk_template in constants.DTS_INT_MIRROR and
5747 self.op.node_name in instance.all_nodes)
5749 def ExpandNames(self):
5751 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5753 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5755 # Since modifying a node can have severe effects on currently running
5756 # operations the resource lock is at least acquired in shared mode
5757 self.needed_locks[locking.LEVEL_NODE_RES] = \
5758 self.needed_locks[locking.LEVEL_NODE]
5760 # Get node resource and instance locks in shared mode; they are not used
5761 # for anything but read-only access
5762 self.share_locks[locking.LEVEL_NODE_RES] = 1
5763 self.share_locks[locking.LEVEL_INSTANCE] = 1
5765 if self.lock_instances:
5766 self.needed_locks[locking.LEVEL_INSTANCE] = \
5767 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5769 def BuildHooksEnv(self):
5772 This runs on the master node.
5776 "OP_TARGET": self.op.node_name,
5777 "MASTER_CANDIDATE": str(self.op.master_candidate),
5778 "OFFLINE": str(self.op.offline),
5779 "DRAINED": str(self.op.drained),
5780 "MASTER_CAPABLE": str(self.op.master_capable),
5781 "VM_CAPABLE": str(self.op.vm_capable),
5784 def BuildHooksNodes(self):
5785 """Build hooks nodes.
5788 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5791 def CheckPrereq(self):
5792 """Check prerequisites.
5794 This only checks the instance list against the existing names.
5797 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5799 if self.lock_instances:
5800 affected_instances = \
5801 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5803 # Verify instance locks
5804 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5805 wanted_instances = frozenset(affected_instances.keys())
5806 if wanted_instances - owned_instances:
5807 raise errors.OpPrereqError("Instances affected by changing node %s's"
5808 " secondary IP address have changed since"
5809 " locks were acquired, wanted '%s', have"
5810 " '%s'; retry the operation" %
5812 utils.CommaJoin(wanted_instances),
5813 utils.CommaJoin(owned_instances)),
5816 affected_instances = None
5818 if (self.op.master_candidate is not None or
5819 self.op.drained is not None or
5820 self.op.offline is not None):
5821 # we can't change the master's node flags
5822 if self.op.node_name == self.cfg.GetMasterNode():
5823 raise errors.OpPrereqError("The master role can be changed"
5824 " only via master-failover",
5827 if self.op.master_candidate and not node.master_capable:
5828 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5829 " it a master candidate" % node.name,
5832 if self.op.vm_capable == False:
5833 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5835 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5836 " the vm_capable flag" % node.name,
5839 if node.master_candidate and self.might_demote and not self.lock_all:
5840 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5841 # check if after removing the current node, we're missing master
5843 (mc_remaining, mc_should, _) = \
5844 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5845 if mc_remaining < mc_should:
5846 raise errors.OpPrereqError("Not enough master candidates, please"
5847 " pass auto promote option to allow"
5848 " promotion", errors.ECODE_STATE)
5850 self.old_flags = old_flags = (node.master_candidate,
5851 node.drained, node.offline)
5852 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5853 self.old_role = old_role = self._F2R[old_flags]
5855 # Check for ineffective changes
5856 for attr in self._FLAGS:
5857 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5858 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5859 setattr(self.op, attr, None)
5861 # Past this point, any flag change to False means a transition
5862 # away from the respective state, as only real changes are kept
5864 # TODO: We might query the real power state if it supports OOB
5865 if _SupportsOob(self.cfg, node):
5866 if self.op.offline is False and not (node.powered or
5867 self.op.powered == True):
5868 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5869 " offline status can be reset") %
5871 elif self.op.powered is not None:
5872 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5873 " as it does not support out-of-band"
5874 " handling") % self.op.node_name)
5876 # If we're being deofflined/drained, we'll MC ourself if needed
5877 if (self.op.drained == False or self.op.offline == False or
5878 (self.op.master_capable and not node.master_capable)):
5879 if _DecideSelfPromotion(self):
5880 self.op.master_candidate = True
5881 self.LogInfo("Auto-promoting node to master candidate")
5883 # If we're no longer master capable, we'll demote ourselves from MC
5884 if self.op.master_capable == False and node.master_candidate:
5885 self.LogInfo("Demoting from master candidate")
5886 self.op.master_candidate = False
5889 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5890 if self.op.master_candidate:
5891 new_role = self._ROLE_CANDIDATE
5892 elif self.op.drained:
5893 new_role = self._ROLE_DRAINED
5894 elif self.op.offline:
5895 new_role = self._ROLE_OFFLINE
5896 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5897 # False is still in new flags, which means we're un-setting (the
5899 new_role = self._ROLE_REGULAR
5900 else: # no new flags, nothing, keep old role
5903 self.new_role = new_role
5905 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5906 # Trying to transition out of offline status
5907 # TODO: Use standard RPC runner, but make sure it works when the node is
5908 # still marked offline
5909 result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5911 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5912 " to report its version: %s" %
5913 (node.name, result.fail_msg),
5916 self.LogWarning("Transitioning node from offline to online state"
5917 " without using re-add. Please make sure the node"
5920 if self.op.secondary_ip:
5921 # Ok even without locking, because this can't be changed by any LU
5922 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5923 master_singlehomed = master.secondary_ip == master.primary_ip
5924 if master_singlehomed and self.op.secondary_ip:
5925 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5926 " homed cluster", errors.ECODE_INVAL)
5928 assert not (frozenset(affected_instances) -
5929 self.owned_locks(locking.LEVEL_INSTANCE))
5932 if affected_instances:
5933 raise errors.OpPrereqError("Cannot change secondary IP address:"
5934 " offline node has instances (%s)"
5935 " configured to use it" %
5936 utils.CommaJoin(affected_instances.keys()))
5938 # On online nodes, check that no instances are running, and that
5939 # the node has the new ip and we can reach it.
5940 for instance in affected_instances.values():
5941 _CheckInstanceState(self, instance, INSTANCE_DOWN,
5942 msg="cannot change secondary ip")
5944 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5945 if master.name != node.name:
5946 # check reachability from master secondary ip to new secondary ip
5947 if not netutils.TcpPing(self.op.secondary_ip,
5948 constants.DEFAULT_NODED_PORT,
5949 source=master.secondary_ip):
5950 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5951 " based ping to node daemon port",
5952 errors.ECODE_ENVIRON)
5954 if self.op.ndparams:
5955 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5956 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5957 self.new_ndparams = new_ndparams
5959 if self.op.hv_state:
5960 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5961 self.node.hv_state_static)
5963 if self.op.disk_state:
5964 self.new_disk_state = \
5965 _MergeAndVerifyDiskState(self.op.disk_state,
5966 self.node.disk_state_static)
5968 def Exec(self, feedback_fn):
5973 old_role = self.old_role
5974 new_role = self.new_role
5978 if self.op.ndparams:
5979 node.ndparams = self.new_ndparams
5981 if self.op.powered is not None:
5982 node.powered = self.op.powered
5984 if self.op.hv_state:
5985 node.hv_state_static = self.new_hv_state
5987 if self.op.disk_state:
5988 node.disk_state_static = self.new_disk_state
5990 for attr in ["master_capable", "vm_capable"]:
5991 val = getattr(self.op, attr)
5993 setattr(node, attr, val)
5994 result.append((attr, str(val)))
5996 if new_role != old_role:
5997 # Tell the node to demote itself, if no longer MC and not offline
5998 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5999 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6001 self.LogWarning("Node failed to demote itself: %s", msg)
6003 new_flags = self._R2F[new_role]
6004 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6006 result.append((desc, str(nf)))
6007 (node.master_candidate, node.drained, node.offline) = new_flags
6009 # we locked all nodes, we adjust the CP before updating this node
6011 _AdjustCandidatePool(self, [node.name])
6013 if self.op.secondary_ip:
6014 node.secondary_ip = self.op.secondary_ip
6015 result.append(("secondary_ip", self.op.secondary_ip))
6017 # this will trigger configuration file update, if needed
6018 self.cfg.Update(node, feedback_fn)
6020 # this will trigger job queue propagation or cleanup if the mc
6022 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6023 self.context.ReaddNode(node)
6028 class LUNodePowercycle(NoHooksLU):
6029 """Powercycles a node.
6034 def CheckArguments(self):
6035 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6036 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6037 raise errors.OpPrereqError("The node is the master and the force"
6038 " parameter was not set",
6041 def ExpandNames(self):
6042 """Locking for PowercycleNode.
6044 This is a last-resort option and shouldn't block on other
6045 jobs. Therefore, we grab no locks.
6048 self.needed_locks = {}
6050 def Exec(self, feedback_fn):
6054 result = self.rpc.call_node_powercycle(self.op.node_name,
6055 self.cfg.GetHypervisorType())
6056 result.Raise("Failed to schedule the reboot")
6057 return result.payload
6060 class LUClusterQuery(NoHooksLU):
6061 """Query cluster configuration.
6066 def ExpandNames(self):
6067 self.needed_locks = {}
6069 def Exec(self, feedback_fn):
6070 """Return cluster config.
6073 cluster = self.cfg.GetClusterInfo()
6076 # Filter just for enabled hypervisors
6077 for os_name, hv_dict in cluster.os_hvp.items():
6078 os_hvp[os_name] = {}
6079 for hv_name, hv_params in hv_dict.items():
6080 if hv_name in cluster.enabled_hypervisors:
6081 os_hvp[os_name][hv_name] = hv_params
6083 # Convert ip_family to ip_version
6084 primary_ip_version = constants.IP4_VERSION
6085 if cluster.primary_ip_family == netutils.IP6Address.family:
6086 primary_ip_version = constants.IP6_VERSION
6089 "software_version": constants.RELEASE_VERSION,
6090 "protocol_version": constants.PROTOCOL_VERSION,
6091 "config_version": constants.CONFIG_VERSION,
6092 "os_api_version": max(constants.OS_API_VERSIONS),
6093 "export_version": constants.EXPORT_VERSION,
6094 "architecture": (platform.architecture()[0], platform.machine()),
6095 "name": cluster.cluster_name,
6096 "master": cluster.master_node,
6097 "default_hypervisor": cluster.primary_hypervisor,
6098 "enabled_hypervisors": cluster.enabled_hypervisors,
6099 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6100 for hypervisor_name in cluster.enabled_hypervisors]),
6102 "beparams": cluster.beparams,
6103 "osparams": cluster.osparams,
6104 "ipolicy": cluster.ipolicy,
6105 "nicparams": cluster.nicparams,
6106 "ndparams": cluster.ndparams,
6107 "candidate_pool_size": cluster.candidate_pool_size,
6108 "master_netdev": cluster.master_netdev,
6109 "master_netmask": cluster.master_netmask,
6110 "use_external_mip_script": cluster.use_external_mip_script,
6111 "volume_group_name": cluster.volume_group_name,
6112 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6113 "file_storage_dir": cluster.file_storage_dir,
6114 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6115 "maintain_node_health": cluster.maintain_node_health,
6116 "ctime": cluster.ctime,
6117 "mtime": cluster.mtime,
6118 "uuid": cluster.uuid,
6119 "tags": list(cluster.GetTags()),
6120 "uid_pool": cluster.uid_pool,
6121 "default_iallocator": cluster.default_iallocator,
6122 "reserved_lvs": cluster.reserved_lvs,
6123 "primary_ip_version": primary_ip_version,
6124 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6125 "hidden_os": cluster.hidden_os,
6126 "blacklisted_os": cluster.blacklisted_os,
6132 class LUClusterConfigQuery(NoHooksLU):
6133 """Return configuration values.
6137 _FIELDS_DYNAMIC = utils.FieldSet()
6138 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
6139 "watcher_pause", "volume_group_name")
6141 def CheckArguments(self):
6142 _CheckOutputFields(static=self._FIELDS_STATIC,
6143 dynamic=self._FIELDS_DYNAMIC,
6144 selected=self.op.output_fields)
6146 def ExpandNames(self):
6147 self.needed_locks = {}
6149 def Exec(self, feedback_fn):
6150 """Dump a representation of the cluster config to the standard output.
6154 for field in self.op.output_fields:
6155 if field == "cluster_name":
6156 entry = self.cfg.GetClusterName()
6157 elif field == "master_node":
6158 entry = self.cfg.GetMasterNode()
6159 elif field == "drain_flag":
6160 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6161 elif field == "watcher_pause":
6162 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6163 elif field == "volume_group_name":
6164 entry = self.cfg.GetVGName()
6166 raise errors.ParameterError(field)
6167 values.append(entry)
6171 class LUInstanceActivateDisks(NoHooksLU):
6172 """Bring up an instance's disks.
6177 def ExpandNames(self):
6178 self._ExpandAndLockInstance()
6179 self.needed_locks[locking.LEVEL_NODE] = []
6180 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6182 def DeclareLocks(self, level):
6183 if level == locking.LEVEL_NODE:
6184 self._LockInstancesNodes()
6186 def CheckPrereq(self):
6187 """Check prerequisites.
6189 This checks that the instance is in the cluster.
6192 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6193 assert self.instance is not None, \
6194 "Cannot retrieve locked instance %s" % self.op.instance_name
6195 _CheckNodeOnline(self, self.instance.primary_node)
6197 def Exec(self, feedback_fn):
6198 """Activate the disks.
6201 disks_ok, disks_info = \
6202 _AssembleInstanceDisks(self, self.instance,
6203 ignore_size=self.op.ignore_size)
6205 raise errors.OpExecError("Cannot activate block devices")
6210 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6212 """Prepare the block devices for an instance.
6214 This sets up the block devices on all nodes.
6216 @type lu: L{LogicalUnit}
6217 @param lu: the logical unit on whose behalf we execute
6218 @type instance: L{objects.Instance}
6219 @param instance: the instance for whose disks we assemble
6220 @type disks: list of L{objects.Disk} or None
6221 @param disks: which disks to assemble (or all, if None)
6222 @type ignore_secondaries: boolean
6223 @param ignore_secondaries: if true, errors on secondary nodes
6224 won't result in an error return from the function
6225 @type ignore_size: boolean
6226 @param ignore_size: if true, the current known size of the disk
6227 will not be used during the disk activation, useful for cases
6228 when the size is wrong
6229 @return: False if the operation failed, otherwise a list of
6230 (host, instance_visible_name, node_visible_name)
6231 with the mapping from node devices to instance devices
6236 iname = instance.name
6237 disks = _ExpandCheckDisks(instance, disks)
6239 # With the two passes mechanism we try to reduce the window of
6240 # opportunity for the race condition of switching DRBD to primary
6241 # before handshaking occured, but we do not eliminate it
6243 # The proper fix would be to wait (with some limits) until the
6244 # connection has been made and drbd transitions from WFConnection
6245 # into any other network-connected state (Connected, SyncTarget,
6248 # 1st pass, assemble on all nodes in secondary mode
6249 for idx, inst_disk in enumerate(disks):
6250 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6252 node_disk = node_disk.Copy()
6253 node_disk.UnsetSize()
6254 lu.cfg.SetDiskID(node_disk, node)
6255 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
6256 msg = result.fail_msg
6258 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6259 " (is_primary=False, pass=1): %s",
6260 inst_disk.iv_name, node, msg)
6261 if not ignore_secondaries:
6264 # FIXME: race condition on drbd migration to primary
6266 # 2nd pass, do only the primary node
6267 for idx, inst_disk in enumerate(disks):
6270 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6271 if node != instance.primary_node:
6274 node_disk = node_disk.Copy()
6275 node_disk.UnsetSize()
6276 lu.cfg.SetDiskID(node_disk, node)
6277 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6278 msg = result.fail_msg
6280 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6281 " (is_primary=True, pass=2): %s",
6282 inst_disk.iv_name, node, msg)
6285 dev_path = result.payload
6287 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6289 # leave the disks configured for the primary node
6290 # this is a workaround that would be fixed better by
6291 # improving the logical/physical id handling
6293 lu.cfg.SetDiskID(disk, instance.primary_node)
6295 return disks_ok, device_info
6298 def _StartInstanceDisks(lu, instance, force):
6299 """Start the disks of an instance.
6302 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6303 ignore_secondaries=force)
6305 _ShutdownInstanceDisks(lu, instance)
6306 if force is not None and not force:
6307 lu.proc.LogWarning("", hint="If the message above refers to a"
6309 " you can retry the operation using '--force'.")
6310 raise errors.OpExecError("Disk consistency error")
6313 class LUInstanceDeactivateDisks(NoHooksLU):
6314 """Shutdown an instance's disks.
6319 def ExpandNames(self):
6320 self._ExpandAndLockInstance()
6321 self.needed_locks[locking.LEVEL_NODE] = []
6322 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6324 def DeclareLocks(self, level):
6325 if level == locking.LEVEL_NODE:
6326 self._LockInstancesNodes()
6328 def CheckPrereq(self):
6329 """Check prerequisites.
6331 This checks that the instance is in the cluster.
6334 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6335 assert self.instance is not None, \
6336 "Cannot retrieve locked instance %s" % self.op.instance_name
6338 def Exec(self, feedback_fn):
6339 """Deactivate the disks
6342 instance = self.instance
6344 _ShutdownInstanceDisks(self, instance)
6346 _SafeShutdownInstanceDisks(self, instance)
6349 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6350 """Shutdown block devices of an instance.
6352 This function checks if an instance is running, before calling
6353 _ShutdownInstanceDisks.
6356 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6357 _ShutdownInstanceDisks(lu, instance, disks=disks)
6360 def _ExpandCheckDisks(instance, disks):
6361 """Return the instance disks selected by the disks list
6363 @type disks: list of L{objects.Disk} or None
6364 @param disks: selected disks
6365 @rtype: list of L{objects.Disk}
6366 @return: selected instance disks to act on
6370 return instance.disks
6372 if not set(disks).issubset(instance.disks):
6373 raise errors.ProgrammerError("Can only act on disks belonging to the"
6378 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6379 """Shutdown block devices of an instance.
6381 This does the shutdown on all nodes of the instance.
6383 If the ignore_primary is false, errors on the primary node are
6388 disks = _ExpandCheckDisks(instance, disks)
6391 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6392 lu.cfg.SetDiskID(top_disk, node)
6393 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6394 msg = result.fail_msg
6396 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6397 disk.iv_name, node, msg)
6398 if ((node == instance.primary_node and not ignore_primary) or
6399 (node != instance.primary_node and not result.offline)):
6404 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6405 """Checks if a node has enough free memory.
6407 This function check if a given node has the needed amount of free
6408 memory. In case the node has less memory or we cannot get the
6409 information from the node, this function raise an OpPrereqError
6412 @type lu: C{LogicalUnit}
6413 @param lu: a logical unit from which we get configuration data
6415 @param node: the node to check
6416 @type reason: C{str}
6417 @param reason: string to use in the error message
6418 @type requested: C{int}
6419 @param requested: the amount of memory in MiB to check for
6420 @type hypervisor_name: C{str}
6421 @param hypervisor_name: the hypervisor to ask for memory stats
6423 @return: node current free memory
6424 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6425 we cannot check the node
6428 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6429 nodeinfo[node].Raise("Can't get data from node %s" % node,
6430 prereq=True, ecode=errors.ECODE_ENVIRON)
6431 (_, _, (hv_info, )) = nodeinfo[node].payload
6433 free_mem = hv_info.get("memory_free", None)
6434 if not isinstance(free_mem, int):
6435 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6436 " was '%s'" % (node, free_mem),
6437 errors.ECODE_ENVIRON)
6438 if requested > free_mem:
6439 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6440 " needed %s MiB, available %s MiB" %
6441 (node, reason, requested, free_mem),
6446 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6447 """Checks if nodes have enough free disk space in the all VGs.
6449 This function check if all given nodes have the needed amount of
6450 free disk. In case any node has less disk or we cannot get the
6451 information from the node, this function raise an OpPrereqError
6454 @type lu: C{LogicalUnit}
6455 @param lu: a logical unit from which we get configuration data
6456 @type nodenames: C{list}
6457 @param nodenames: the list of node names to check
6458 @type req_sizes: C{dict}
6459 @param req_sizes: the hash of vg and corresponding amount of disk in
6461 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6462 or we cannot check the node
6465 for vg, req_size in req_sizes.items():
6466 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6469 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6470 """Checks if nodes have enough free disk space in the specified VG.
6472 This function check if all given nodes have the needed amount of
6473 free disk. In case any node has less disk or we cannot get the
6474 information from the node, this function raise an OpPrereqError
6477 @type lu: C{LogicalUnit}
6478 @param lu: a logical unit from which we get configuration data
6479 @type nodenames: C{list}
6480 @param nodenames: the list of node names to check
6482 @param vg: the volume group to check
6483 @type requested: C{int}
6484 @param requested: the amount of disk in MiB to check for
6485 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6486 or we cannot check the node
6489 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6490 for node in nodenames:
6491 info = nodeinfo[node]
6492 info.Raise("Cannot get current information from node %s" % node,
6493 prereq=True, ecode=errors.ECODE_ENVIRON)
6494 (_, (vg_info, ), _) = info.payload
6495 vg_free = vg_info.get("vg_free", None)
6496 if not isinstance(vg_free, int):
6497 raise errors.OpPrereqError("Can't compute free disk space on node"
6498 " %s for vg %s, result was '%s'" %
6499 (node, vg, vg_free), errors.ECODE_ENVIRON)
6500 if requested > vg_free:
6501 raise errors.OpPrereqError("Not enough disk space on target node %s"
6502 " vg %s: required %d MiB, available %d MiB" %
6503 (node, vg, requested, vg_free),
6507 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6508 """Checks if nodes have enough physical CPUs
6510 This function checks if all given nodes have the needed number of
6511 physical CPUs. In case any node has less CPUs or we cannot get the
6512 information from the node, this function raises an OpPrereqError
6515 @type lu: C{LogicalUnit}
6516 @param lu: a logical unit from which we get configuration data
6517 @type nodenames: C{list}
6518 @param nodenames: the list of node names to check
6519 @type requested: C{int}
6520 @param requested: the minimum acceptable number of physical CPUs
6521 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6522 or we cannot check the node
6525 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6526 for node in nodenames:
6527 info = nodeinfo[node]
6528 info.Raise("Cannot get current information from node %s" % node,
6529 prereq=True, ecode=errors.ECODE_ENVIRON)
6530 (_, _, (hv_info, )) = info.payload
6531 num_cpus = hv_info.get("cpu_total", None)
6532 if not isinstance(num_cpus, int):
6533 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6534 " on node %s, result was '%s'" %
6535 (node, num_cpus), errors.ECODE_ENVIRON)
6536 if requested > num_cpus:
6537 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6538 "required" % (node, num_cpus, requested),
6542 class LUInstanceStartup(LogicalUnit):
6543 """Starts an instance.
6546 HPATH = "instance-start"
6547 HTYPE = constants.HTYPE_INSTANCE
6550 def CheckArguments(self):
6552 if self.op.beparams:
6553 # fill the beparams dict
6554 objects.UpgradeBeParams(self.op.beparams)
6555 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6557 def ExpandNames(self):
6558 self._ExpandAndLockInstance()
6559 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6561 def DeclareLocks(self, level):
6562 if level == locking.LEVEL_NODE_RES:
6563 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6565 def BuildHooksEnv(self):
6568 This runs on master, primary and secondary nodes of the instance.
6572 "FORCE": self.op.force,
6575 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6579 def BuildHooksNodes(self):
6580 """Build hooks nodes.
6583 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6586 def CheckPrereq(self):
6587 """Check prerequisites.
6589 This checks that the instance is in the cluster.
6592 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6593 assert self.instance is not None, \
6594 "Cannot retrieve locked instance %s" % self.op.instance_name
6597 if self.op.hvparams:
6598 # check hypervisor parameter syntax (locally)
6599 cluster = self.cfg.GetClusterInfo()
6600 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6601 filled_hvp = cluster.FillHV(instance)
6602 filled_hvp.update(self.op.hvparams)
6603 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6604 hv_type.CheckParameterSyntax(filled_hvp)
6605 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6607 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6609 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6611 if self.primary_offline and self.op.ignore_offline_nodes:
6612 self.proc.LogWarning("Ignoring offline primary node")
6614 if self.op.hvparams or self.op.beparams:
6615 self.proc.LogWarning("Overridden parameters are ignored")
6617 _CheckNodeOnline(self, instance.primary_node)
6619 bep = self.cfg.GetClusterInfo().FillBE(instance)
6620 bep.update(self.op.beparams)
6622 # check bridges existence
6623 _CheckInstanceBridgesExist(self, instance)
6625 remote_info = self.rpc.call_instance_info(instance.primary_node,
6627 instance.hypervisor)
6628 remote_info.Raise("Error checking node %s" % instance.primary_node,
6629 prereq=True, ecode=errors.ECODE_ENVIRON)
6630 if not remote_info.payload: # not running already
6631 _CheckNodeFreeMemory(self, instance.primary_node,
6632 "starting instance %s" % instance.name,
6633 bep[constants.BE_MINMEM], instance.hypervisor)
6635 def Exec(self, feedback_fn):
6636 """Start the instance.
6639 instance = self.instance
6640 force = self.op.force
6642 if not self.op.no_remember:
6643 self.cfg.MarkInstanceUp(instance.name)
6645 if self.primary_offline:
6646 assert self.op.ignore_offline_nodes
6647 self.proc.LogInfo("Primary node offline, marked instance as started")
6649 node_current = instance.primary_node
6651 _StartInstanceDisks(self, instance, force)
6654 self.rpc.call_instance_start(node_current,
6655 (instance, self.op.hvparams,
6657 self.op.startup_paused)
6658 msg = result.fail_msg
6660 _ShutdownInstanceDisks(self, instance)
6661 raise errors.OpExecError("Could not start instance: %s" % msg)
6664 class LUInstanceReboot(LogicalUnit):
6665 """Reboot an instance.
6668 HPATH = "instance-reboot"
6669 HTYPE = constants.HTYPE_INSTANCE
6672 def ExpandNames(self):
6673 self._ExpandAndLockInstance()
6675 def BuildHooksEnv(self):
6678 This runs on master, primary and secondary nodes of the instance.
6682 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6683 "REBOOT_TYPE": self.op.reboot_type,
6684 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6687 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6691 def BuildHooksNodes(self):
6692 """Build hooks nodes.
6695 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6698 def CheckPrereq(self):
6699 """Check prerequisites.
6701 This checks that the instance is in the cluster.
6704 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6705 assert self.instance is not None, \
6706 "Cannot retrieve locked instance %s" % self.op.instance_name
6707 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6708 _CheckNodeOnline(self, instance.primary_node)
6710 # check bridges existence
6711 _CheckInstanceBridgesExist(self, instance)
6713 def Exec(self, feedback_fn):
6714 """Reboot the instance.
6717 instance = self.instance
6718 ignore_secondaries = self.op.ignore_secondaries
6719 reboot_type = self.op.reboot_type
6721 remote_info = self.rpc.call_instance_info(instance.primary_node,
6723 instance.hypervisor)
6724 remote_info.Raise("Error checking node %s" % instance.primary_node)
6725 instance_running = bool(remote_info.payload)
6727 node_current = instance.primary_node
6729 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6730 constants.INSTANCE_REBOOT_HARD]:
6731 for disk in instance.disks:
6732 self.cfg.SetDiskID(disk, node_current)
6733 result = self.rpc.call_instance_reboot(node_current, instance,
6735 self.op.shutdown_timeout)
6736 result.Raise("Could not reboot instance")
6738 if instance_running:
6739 result = self.rpc.call_instance_shutdown(node_current, instance,
6740 self.op.shutdown_timeout)
6741 result.Raise("Could not shutdown instance for full reboot")
6742 _ShutdownInstanceDisks(self, instance)
6744 self.LogInfo("Instance %s was already stopped, starting now",
6746 _StartInstanceDisks(self, instance, ignore_secondaries)
6747 result = self.rpc.call_instance_start(node_current,
6748 (instance, None, None), False)
6749 msg = result.fail_msg
6751 _ShutdownInstanceDisks(self, instance)
6752 raise errors.OpExecError("Could not start instance for"
6753 " full reboot: %s" % msg)
6755 self.cfg.MarkInstanceUp(instance.name)
6758 class LUInstanceShutdown(LogicalUnit):
6759 """Shutdown an instance.
6762 HPATH = "instance-stop"
6763 HTYPE = constants.HTYPE_INSTANCE
6766 def ExpandNames(self):
6767 self._ExpandAndLockInstance()
6769 def BuildHooksEnv(self):
6772 This runs on master, primary and secondary nodes of the instance.
6775 env = _BuildInstanceHookEnvByObject(self, self.instance)
6776 env["TIMEOUT"] = self.op.timeout
6779 def BuildHooksNodes(self):
6780 """Build hooks nodes.
6783 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6786 def CheckPrereq(self):
6787 """Check prerequisites.
6789 This checks that the instance is in the cluster.
6792 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6793 assert self.instance is not None, \
6794 "Cannot retrieve locked instance %s" % self.op.instance_name
6796 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6798 self.primary_offline = \
6799 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6801 if self.primary_offline and self.op.ignore_offline_nodes:
6802 self.proc.LogWarning("Ignoring offline primary node")
6804 _CheckNodeOnline(self, self.instance.primary_node)
6806 def Exec(self, feedback_fn):
6807 """Shutdown the instance.
6810 instance = self.instance
6811 node_current = instance.primary_node
6812 timeout = self.op.timeout
6814 if not self.op.no_remember:
6815 self.cfg.MarkInstanceDown(instance.name)
6817 if self.primary_offline:
6818 assert self.op.ignore_offline_nodes
6819 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6821 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6822 msg = result.fail_msg
6824 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6826 _ShutdownInstanceDisks(self, instance)
6829 class LUInstanceReinstall(LogicalUnit):
6830 """Reinstall an instance.
6833 HPATH = "instance-reinstall"
6834 HTYPE = constants.HTYPE_INSTANCE
6837 def ExpandNames(self):
6838 self._ExpandAndLockInstance()
6840 def BuildHooksEnv(self):
6843 This runs on master, primary and secondary nodes of the instance.
6846 return _BuildInstanceHookEnvByObject(self, self.instance)
6848 def BuildHooksNodes(self):
6849 """Build hooks nodes.
6852 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6855 def CheckPrereq(self):
6856 """Check prerequisites.
6858 This checks that the instance is in the cluster and is not running.
6861 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6862 assert instance is not None, \
6863 "Cannot retrieve locked instance %s" % self.op.instance_name
6864 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6865 " offline, cannot reinstall")
6866 for node in instance.secondary_nodes:
6867 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6868 " cannot reinstall")
6870 if instance.disk_template == constants.DT_DISKLESS:
6871 raise errors.OpPrereqError("Instance '%s' has no disks" %
6872 self.op.instance_name,
6874 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6876 if self.op.os_type is not None:
6878 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6879 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6880 instance_os = self.op.os_type
6882 instance_os = instance.os
6884 nodelist = list(instance.all_nodes)
6886 if self.op.osparams:
6887 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6888 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6889 self.os_inst = i_osdict # the new dict (without defaults)
6893 self.instance = instance
6895 def Exec(self, feedback_fn):
6896 """Reinstall the instance.
6899 inst = self.instance
6901 if self.op.os_type is not None:
6902 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6903 inst.os = self.op.os_type
6904 # Write to configuration
6905 self.cfg.Update(inst, feedback_fn)
6907 _StartInstanceDisks(self, inst, None)
6909 feedback_fn("Running the instance OS create scripts...")
6910 # FIXME: pass debug option from opcode to backend
6911 result = self.rpc.call_instance_os_add(inst.primary_node,
6912 (inst, self.os_inst), True,
6913 self.op.debug_level)
6914 result.Raise("Could not install OS for instance %s on node %s" %
6915 (inst.name, inst.primary_node))
6917 _ShutdownInstanceDisks(self, inst)
6920 class LUInstanceRecreateDisks(LogicalUnit):
6921 """Recreate an instance's missing disks.
6924 HPATH = "instance-recreate-disks"
6925 HTYPE = constants.HTYPE_INSTANCE
6928 _MODIFYABLE = frozenset([
6929 constants.IDISK_SIZE,
6930 constants.IDISK_MODE,
6933 # New or changed disk parameters may have different semantics
6934 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
6935 constants.IDISK_ADOPT,
6937 # TODO: Implement support changing VG while recreating
6939 constants.IDISK_METAVG,
6942 def CheckArguments(self):
6943 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
6944 # Normalize and convert deprecated list of disk indices
6945 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
6947 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
6949 raise errors.OpPrereqError("Some disks have been specified more than"
6950 " once: %s" % utils.CommaJoin(duplicates),
6953 for (idx, params) in self.op.disks:
6954 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
6955 unsupported = frozenset(params.keys()) - self._MODIFYABLE
6957 raise errors.OpPrereqError("Parameters for disk %s try to change"
6958 " unmodifyable parameter(s): %s" %
6959 (idx, utils.CommaJoin(unsupported)),
6962 def ExpandNames(self):
6963 self._ExpandAndLockInstance()
6964 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6966 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6967 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6969 self.needed_locks[locking.LEVEL_NODE] = []
6970 self.needed_locks[locking.LEVEL_NODE_RES] = []
6972 def DeclareLocks(self, level):
6973 if level == locking.LEVEL_NODE:
6974 # if we replace the nodes, we only need to lock the old primary,
6975 # otherwise we need to lock all nodes for disk re-creation
6976 primary_only = bool(self.op.nodes)
6977 self._LockInstancesNodes(primary_only=primary_only)
6978 elif level == locking.LEVEL_NODE_RES:
6980 self.needed_locks[locking.LEVEL_NODE_RES] = \
6981 self.needed_locks[locking.LEVEL_NODE][:]
6983 def BuildHooksEnv(self):
6986 This runs on master, primary and secondary nodes of the instance.
6989 return _BuildInstanceHookEnvByObject(self, self.instance)
6991 def BuildHooksNodes(self):
6992 """Build hooks nodes.
6995 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6998 def CheckPrereq(self):
6999 """Check prerequisites.
7001 This checks that the instance is in the cluster and is not running.
7004 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7005 assert instance is not None, \
7006 "Cannot retrieve locked instance %s" % self.op.instance_name
7008 if len(self.op.nodes) != len(instance.all_nodes):
7009 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7010 " %d replacement nodes were specified" %
7011 (instance.name, len(instance.all_nodes),
7012 len(self.op.nodes)),
7014 assert instance.disk_template != constants.DT_DRBD8 or \
7015 len(self.op.nodes) == 2
7016 assert instance.disk_template != constants.DT_PLAIN or \
7017 len(self.op.nodes) == 1
7018 primary_node = self.op.nodes[0]
7020 primary_node = instance.primary_node
7021 _CheckNodeOnline(self, primary_node)
7023 if instance.disk_template == constants.DT_DISKLESS:
7024 raise errors.OpPrereqError("Instance '%s' has no disks" %
7025 self.op.instance_name, errors.ECODE_INVAL)
7027 # if we replace nodes *and* the old primary is offline, we don't
7029 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7030 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7031 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7032 if not (self.op.nodes and old_pnode.offline):
7033 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7034 msg="cannot recreate disks")
7037 self.disks = dict(self.op.disks)
7039 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7041 maxidx = max(self.disks.keys())
7042 if maxidx >= len(instance.disks):
7043 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7046 if (self.op.nodes and
7047 sorted(self.disks.keys()) != range(len(instance.disks))):
7048 raise errors.OpPrereqError("Can't recreate disks partially and"
7049 " change the nodes at the same time",
7052 self.instance = instance
7054 def Exec(self, feedback_fn):
7055 """Recreate the disks.
7058 instance = self.instance
7060 assert (self.owned_locks(locking.LEVEL_NODE) ==
7061 self.owned_locks(locking.LEVEL_NODE_RES))
7064 mods = [] # keeps track of needed changes
7066 for idx, disk in enumerate(instance.disks):
7068 changes = self.disks[idx]
7070 # Disk should not be recreated
7074 # update secondaries for disks, if needed
7075 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7076 # need to update the nodes and minors
7077 assert len(self.op.nodes) == 2
7078 assert len(disk.logical_id) == 6 # otherwise disk internals
7080 (_, _, old_port, _, _, old_secret) = disk.logical_id
7081 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7082 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7083 new_minors[0], new_minors[1], old_secret)
7084 assert len(disk.logical_id) == len(new_id)
7088 mods.append((idx, new_id, changes))
7090 # now that we have passed all asserts above, we can apply the mods
7091 # in a single run (to avoid partial changes)
7092 for idx, new_id, changes in mods:
7093 disk = instance.disks[idx]
7094 if new_id is not None:
7095 assert disk.dev_type == constants.LD_DRBD8
7096 disk.logical_id = new_id
7098 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7099 mode=changes.get(constants.IDISK_MODE, None))
7101 # change primary node, if needed
7103 instance.primary_node = self.op.nodes[0]
7104 self.LogWarning("Changing the instance's nodes, you will have to"
7105 " remove any disks left on the older nodes manually")
7108 self.cfg.Update(instance, feedback_fn)
7110 _CreateDisks(self, instance, to_skip=to_skip)
7113 class LUInstanceRename(LogicalUnit):
7114 """Rename an instance.
7117 HPATH = "instance-rename"
7118 HTYPE = constants.HTYPE_INSTANCE
7120 def CheckArguments(self):
7124 if self.op.ip_check and not self.op.name_check:
7125 # TODO: make the ip check more flexible and not depend on the name check
7126 raise errors.OpPrereqError("IP address check requires a name check",
7129 def BuildHooksEnv(self):
7132 This runs on master, primary and secondary nodes of the instance.
7135 env = _BuildInstanceHookEnvByObject(self, self.instance)
7136 env["INSTANCE_NEW_NAME"] = self.op.new_name
7139 def BuildHooksNodes(self):
7140 """Build hooks nodes.
7143 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7146 def CheckPrereq(self):
7147 """Check prerequisites.
7149 This checks that the instance is in the cluster and is not running.
7152 self.op.instance_name = _ExpandInstanceName(self.cfg,
7153 self.op.instance_name)
7154 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7155 assert instance is not None
7156 _CheckNodeOnline(self, instance.primary_node)
7157 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7158 msg="cannot rename")
7159 self.instance = instance
7161 new_name = self.op.new_name
7162 if self.op.name_check:
7163 hostname = netutils.GetHostname(name=new_name)
7164 if hostname.name != new_name:
7165 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7167 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7168 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7169 " same as given hostname '%s'") %
7170 (hostname.name, self.op.new_name),
7172 new_name = self.op.new_name = hostname.name
7173 if (self.op.ip_check and
7174 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7175 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7176 (hostname.ip, new_name),
7177 errors.ECODE_NOTUNIQUE)
7179 instance_list = self.cfg.GetInstanceList()
7180 if new_name in instance_list and new_name != instance.name:
7181 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7182 new_name, errors.ECODE_EXISTS)
7184 def Exec(self, feedback_fn):
7185 """Rename the instance.
7188 inst = self.instance
7189 old_name = inst.name
7191 rename_file_storage = False
7192 if (inst.disk_template in constants.DTS_FILEBASED and
7193 self.op.new_name != inst.name):
7194 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7195 rename_file_storage = True
7197 self.cfg.RenameInstance(inst.name, self.op.new_name)
7198 # Change the instance lock. This is definitely safe while we hold the BGL.
7199 # Otherwise the new lock would have to be added in acquired mode.
7201 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7202 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7204 # re-read the instance from the configuration after rename
7205 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7207 if rename_file_storage:
7208 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7209 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7210 old_file_storage_dir,
7211 new_file_storage_dir)
7212 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7213 " (but the instance has been renamed in Ganeti)" %
7214 (inst.primary_node, old_file_storage_dir,
7215 new_file_storage_dir))
7217 _StartInstanceDisks(self, inst, None)
7219 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7220 old_name, self.op.debug_level)
7221 msg = result.fail_msg
7223 msg = ("Could not run OS rename script for instance %s on node %s"
7224 " (but the instance has been renamed in Ganeti): %s" %
7225 (inst.name, inst.primary_node, msg))
7226 self.proc.LogWarning(msg)
7228 _ShutdownInstanceDisks(self, inst)
7233 class LUInstanceRemove(LogicalUnit):
7234 """Remove an instance.
7237 HPATH = "instance-remove"
7238 HTYPE = constants.HTYPE_INSTANCE
7241 def ExpandNames(self):
7242 self._ExpandAndLockInstance()
7243 self.needed_locks[locking.LEVEL_NODE] = []
7244 self.needed_locks[locking.LEVEL_NODE_RES] = []
7245 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7247 def DeclareLocks(self, level):
7248 if level == locking.LEVEL_NODE:
7249 self._LockInstancesNodes()
7250 elif level == locking.LEVEL_NODE_RES:
7252 self.needed_locks[locking.LEVEL_NODE_RES] = \
7253 self.needed_locks[locking.LEVEL_NODE][:]
7255 def BuildHooksEnv(self):
7258 This runs on master, primary and secondary nodes of the instance.
7261 env = _BuildInstanceHookEnvByObject(self, self.instance)
7262 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7265 def BuildHooksNodes(self):
7266 """Build hooks nodes.
7269 nl = [self.cfg.GetMasterNode()]
7270 nl_post = list(self.instance.all_nodes) + nl
7271 return (nl, nl_post)
7273 def CheckPrereq(self):
7274 """Check prerequisites.
7276 This checks that the instance is in the cluster.
7279 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7280 assert self.instance is not None, \
7281 "Cannot retrieve locked instance %s" % self.op.instance_name
7283 def Exec(self, feedback_fn):
7284 """Remove the instance.
7287 instance = self.instance
7288 logging.info("Shutting down instance %s on node %s",
7289 instance.name, instance.primary_node)
7291 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7292 self.op.shutdown_timeout)
7293 msg = result.fail_msg
7295 if self.op.ignore_failures:
7296 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7298 raise errors.OpExecError("Could not shutdown instance %s on"
7300 (instance.name, instance.primary_node, msg))
7302 assert (self.owned_locks(locking.LEVEL_NODE) ==
7303 self.owned_locks(locking.LEVEL_NODE_RES))
7304 assert not (set(instance.all_nodes) -
7305 self.owned_locks(locking.LEVEL_NODE)), \
7306 "Not owning correct locks"
7308 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7311 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7312 """Utility function to remove an instance.
7315 logging.info("Removing block devices for instance %s", instance.name)
7317 if not _RemoveDisks(lu, instance):
7318 if not ignore_failures:
7319 raise errors.OpExecError("Can't remove instance's disks")
7320 feedback_fn("Warning: can't remove instance's disks")
7322 logging.info("Removing instance %s out of cluster config", instance.name)
7324 lu.cfg.RemoveInstance(instance.name)
7326 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7327 "Instance lock removal conflict"
7329 # Remove lock for the instance
7330 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7333 class LUInstanceQuery(NoHooksLU):
7334 """Logical unit for querying instances.
7337 # pylint: disable=W0142
7340 def CheckArguments(self):
7341 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7342 self.op.output_fields, self.op.use_locking)
7344 def ExpandNames(self):
7345 self.iq.ExpandNames(self)
7347 def DeclareLocks(self, level):
7348 self.iq.DeclareLocks(self, level)
7350 def Exec(self, feedback_fn):
7351 return self.iq.OldStyleQuery(self)
7354 class LUInstanceFailover(LogicalUnit):
7355 """Failover an instance.
7358 HPATH = "instance-failover"
7359 HTYPE = constants.HTYPE_INSTANCE
7362 def CheckArguments(self):
7363 """Check the arguments.
7366 self.iallocator = getattr(self.op, "iallocator", None)
7367 self.target_node = getattr(self.op, "target_node", None)
7369 def ExpandNames(self):
7370 self._ExpandAndLockInstance()
7372 if self.op.target_node is not None:
7373 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7375 self.needed_locks[locking.LEVEL_NODE] = []
7376 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7378 self.needed_locks[locking.LEVEL_NODE_RES] = []
7379 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7381 ignore_consistency = self.op.ignore_consistency
7382 shutdown_timeout = self.op.shutdown_timeout
7383 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7386 ignore_consistency=ignore_consistency,
7387 shutdown_timeout=shutdown_timeout,
7388 ignore_ipolicy=self.op.ignore_ipolicy)
7389 self.tasklets = [self._migrater]
7391 def DeclareLocks(self, level):
7392 if level == locking.LEVEL_NODE:
7393 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7394 if instance.disk_template in constants.DTS_EXT_MIRROR:
7395 if self.op.target_node is None:
7396 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7398 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7399 self.op.target_node]
7400 del self.recalculate_locks[locking.LEVEL_NODE]
7402 self._LockInstancesNodes()
7403 elif level == locking.LEVEL_NODE_RES:
7405 self.needed_locks[locking.LEVEL_NODE_RES] = \
7406 self.needed_locks[locking.LEVEL_NODE][:]
7408 def BuildHooksEnv(self):
7411 This runs on master, primary and secondary nodes of the instance.
7414 instance = self._migrater.instance
7415 source_node = instance.primary_node
7416 target_node = self.op.target_node
7418 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7419 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7420 "OLD_PRIMARY": source_node,
7421 "NEW_PRIMARY": target_node,
7424 if instance.disk_template in constants.DTS_INT_MIRROR:
7425 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7426 env["NEW_SECONDARY"] = source_node
7428 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7430 env.update(_BuildInstanceHookEnvByObject(self, instance))
7434 def BuildHooksNodes(self):
7435 """Build hooks nodes.
7438 instance = self._migrater.instance
7439 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7440 return (nl, nl + [instance.primary_node])
7443 class LUInstanceMigrate(LogicalUnit):
7444 """Migrate an instance.
7446 This is migration without shutting down, compared to the failover,
7447 which is done with shutdown.
7450 HPATH = "instance-migrate"
7451 HTYPE = constants.HTYPE_INSTANCE
7454 def ExpandNames(self):
7455 self._ExpandAndLockInstance()
7457 if self.op.target_node is not None:
7458 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7460 self.needed_locks[locking.LEVEL_NODE] = []
7461 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7463 self.needed_locks[locking.LEVEL_NODE] = []
7464 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7467 TLMigrateInstance(self, self.op.instance_name,
7468 cleanup=self.op.cleanup,
7470 fallback=self.op.allow_failover,
7471 allow_runtime_changes=self.op.allow_runtime_changes,
7472 ignore_ipolicy=self.op.ignore_ipolicy)
7473 self.tasklets = [self._migrater]
7475 def DeclareLocks(self, level):
7476 if level == locking.LEVEL_NODE:
7477 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7478 if instance.disk_template in constants.DTS_EXT_MIRROR:
7479 if self.op.target_node is None:
7480 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7482 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7483 self.op.target_node]
7484 del self.recalculate_locks[locking.LEVEL_NODE]
7486 self._LockInstancesNodes()
7487 elif level == locking.LEVEL_NODE_RES:
7489 self.needed_locks[locking.LEVEL_NODE_RES] = \
7490 self.needed_locks[locking.LEVEL_NODE][:]
7492 def BuildHooksEnv(self):
7495 This runs on master, primary and secondary nodes of the instance.
7498 instance = self._migrater.instance
7499 source_node = instance.primary_node
7500 target_node = self.op.target_node
7501 env = _BuildInstanceHookEnvByObject(self, instance)
7503 "MIGRATE_LIVE": self._migrater.live,
7504 "MIGRATE_CLEANUP": self.op.cleanup,
7505 "OLD_PRIMARY": source_node,
7506 "NEW_PRIMARY": target_node,
7507 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7510 if instance.disk_template in constants.DTS_INT_MIRROR:
7511 env["OLD_SECONDARY"] = target_node
7512 env["NEW_SECONDARY"] = source_node
7514 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7518 def BuildHooksNodes(self):
7519 """Build hooks nodes.
7522 instance = self._migrater.instance
7523 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7524 return (nl, nl + [instance.primary_node])
7527 class LUInstanceMove(LogicalUnit):
7528 """Move an instance by data-copying.
7531 HPATH = "instance-move"
7532 HTYPE = constants.HTYPE_INSTANCE
7535 def ExpandNames(self):
7536 self._ExpandAndLockInstance()
7537 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7538 self.op.target_node = target_node
7539 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7540 self.needed_locks[locking.LEVEL_NODE_RES] = []
7541 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7543 def DeclareLocks(self, level):
7544 if level == locking.LEVEL_NODE:
7545 self._LockInstancesNodes(primary_only=True)
7546 elif level == locking.LEVEL_NODE_RES:
7548 self.needed_locks[locking.LEVEL_NODE_RES] = \
7549 self.needed_locks[locking.LEVEL_NODE][:]
7551 def BuildHooksEnv(self):
7554 This runs on master, primary and secondary nodes of the instance.
7558 "TARGET_NODE": self.op.target_node,
7559 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7561 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7564 def BuildHooksNodes(self):
7565 """Build hooks nodes.
7569 self.cfg.GetMasterNode(),
7570 self.instance.primary_node,
7571 self.op.target_node,
7575 def CheckPrereq(self):
7576 """Check prerequisites.
7578 This checks that the instance is in the cluster.
7581 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7582 assert self.instance is not None, \
7583 "Cannot retrieve locked instance %s" % self.op.instance_name
7585 node = self.cfg.GetNodeInfo(self.op.target_node)
7586 assert node is not None, \
7587 "Cannot retrieve locked node %s" % self.op.target_node
7589 self.target_node = target_node = node.name
7591 if target_node == instance.primary_node:
7592 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7593 (instance.name, target_node),
7596 bep = self.cfg.GetClusterInfo().FillBE(instance)
7598 for idx, dsk in enumerate(instance.disks):
7599 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7600 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7601 " cannot copy" % idx, errors.ECODE_STATE)
7603 _CheckNodeOnline(self, target_node)
7604 _CheckNodeNotDrained(self, target_node)
7605 _CheckNodeVmCapable(self, target_node)
7606 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7607 self.cfg.GetNodeGroup(node.group))
7608 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7609 ignore=self.op.ignore_ipolicy)
7611 if instance.admin_state == constants.ADMINST_UP:
7612 # check memory requirements on the secondary node
7613 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7614 instance.name, bep[constants.BE_MAXMEM],
7615 instance.hypervisor)
7617 self.LogInfo("Not checking memory on the secondary node as"
7618 " instance will not be started")
7620 # check bridge existance
7621 _CheckInstanceBridgesExist(self, instance, node=target_node)
7623 def Exec(self, feedback_fn):
7624 """Move an instance.
7626 The move is done by shutting it down on its present node, copying
7627 the data over (slow) and starting it on the new node.
7630 instance = self.instance
7632 source_node = instance.primary_node
7633 target_node = self.target_node
7635 self.LogInfo("Shutting down instance %s on source node %s",
7636 instance.name, source_node)
7638 assert (self.owned_locks(locking.LEVEL_NODE) ==
7639 self.owned_locks(locking.LEVEL_NODE_RES))
7641 result = self.rpc.call_instance_shutdown(source_node, instance,
7642 self.op.shutdown_timeout)
7643 msg = result.fail_msg
7645 if self.op.ignore_consistency:
7646 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7647 " Proceeding anyway. Please make sure node"
7648 " %s is down. Error details: %s",
7649 instance.name, source_node, source_node, msg)
7651 raise errors.OpExecError("Could not shutdown instance %s on"
7653 (instance.name, source_node, msg))
7655 # create the target disks
7657 _CreateDisks(self, instance, target_node=target_node)
7658 except errors.OpExecError:
7659 self.LogWarning("Device creation failed, reverting...")
7661 _RemoveDisks(self, instance, target_node=target_node)
7663 self.cfg.ReleaseDRBDMinors(instance.name)
7666 cluster_name = self.cfg.GetClusterInfo().cluster_name
7669 # activate, get path, copy the data over
7670 for idx, disk in enumerate(instance.disks):
7671 self.LogInfo("Copying data for disk %d", idx)
7672 result = self.rpc.call_blockdev_assemble(target_node, disk,
7673 instance.name, True, idx)
7675 self.LogWarning("Can't assemble newly created disk %d: %s",
7676 idx, result.fail_msg)
7677 errs.append(result.fail_msg)
7679 dev_path = result.payload
7680 result = self.rpc.call_blockdev_export(source_node, disk,
7681 target_node, dev_path,
7684 self.LogWarning("Can't copy data over for disk %d: %s",
7685 idx, result.fail_msg)
7686 errs.append(result.fail_msg)
7690 self.LogWarning("Some disks failed to copy, aborting")
7692 _RemoveDisks(self, instance, target_node=target_node)
7694 self.cfg.ReleaseDRBDMinors(instance.name)
7695 raise errors.OpExecError("Errors during disk copy: %s" %
7698 instance.primary_node = target_node
7699 self.cfg.Update(instance, feedback_fn)
7701 self.LogInfo("Removing the disks on the original node")
7702 _RemoveDisks(self, instance, target_node=source_node)
7704 # Only start the instance if it's marked as up
7705 if instance.admin_state == constants.ADMINST_UP:
7706 self.LogInfo("Starting instance %s on node %s",
7707 instance.name, target_node)
7709 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7710 ignore_secondaries=True)
7712 _ShutdownInstanceDisks(self, instance)
7713 raise errors.OpExecError("Can't activate the instance's disks")
7715 result = self.rpc.call_instance_start(target_node,
7716 (instance, None, None), False)
7717 msg = result.fail_msg
7719 _ShutdownInstanceDisks(self, instance)
7720 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7721 (instance.name, target_node, msg))
7724 class LUNodeMigrate(LogicalUnit):
7725 """Migrate all instances from a node.
7728 HPATH = "node-migrate"
7729 HTYPE = constants.HTYPE_NODE
7732 def CheckArguments(self):
7735 def ExpandNames(self):
7736 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7738 self.share_locks = _ShareAll()
7739 self.needed_locks = {
7740 locking.LEVEL_NODE: [self.op.node_name],
7743 def BuildHooksEnv(self):
7746 This runs on the master, the primary and all the secondaries.
7750 "NODE_NAME": self.op.node_name,
7751 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7754 def BuildHooksNodes(self):
7755 """Build hooks nodes.
7758 nl = [self.cfg.GetMasterNode()]
7761 def CheckPrereq(self):
7764 def Exec(self, feedback_fn):
7765 # Prepare jobs for migration instances
7766 allow_runtime_changes = self.op.allow_runtime_changes
7768 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7771 iallocator=self.op.iallocator,
7772 target_node=self.op.target_node,
7773 allow_runtime_changes=allow_runtime_changes,
7774 ignore_ipolicy=self.op.ignore_ipolicy)]
7775 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7778 # TODO: Run iallocator in this opcode and pass correct placement options to
7779 # OpInstanceMigrate. Since other jobs can modify the cluster between
7780 # running the iallocator and the actual migration, a good consistency model
7781 # will have to be found.
7783 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7784 frozenset([self.op.node_name]))
7786 return ResultWithJobs(jobs)
7789 class TLMigrateInstance(Tasklet):
7790 """Tasklet class for instance migration.
7793 @ivar live: whether the migration will be done live or non-live;
7794 this variable is initalized only after CheckPrereq has run
7795 @type cleanup: boolean
7796 @ivar cleanup: Wheater we cleanup from a failed migration
7797 @type iallocator: string
7798 @ivar iallocator: The iallocator used to determine target_node
7799 @type target_node: string
7800 @ivar target_node: If given, the target_node to reallocate the instance to
7801 @type failover: boolean
7802 @ivar failover: Whether operation results in failover or migration
7803 @type fallback: boolean
7804 @ivar fallback: Whether fallback to failover is allowed if migration not
7806 @type ignore_consistency: boolean
7807 @ivar ignore_consistency: Wheter we should ignore consistency between source
7809 @type shutdown_timeout: int
7810 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7811 @type ignore_ipolicy: bool
7812 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7817 _MIGRATION_POLL_INTERVAL = 1 # seconds
7818 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7820 def __init__(self, lu, instance_name, cleanup=False,
7821 failover=False, fallback=False,
7822 ignore_consistency=False,
7823 allow_runtime_changes=True,
7824 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7825 ignore_ipolicy=False):
7826 """Initializes this class.
7829 Tasklet.__init__(self, lu)
7832 self.instance_name = instance_name
7833 self.cleanup = cleanup
7834 self.live = False # will be overridden later
7835 self.failover = failover
7836 self.fallback = fallback
7837 self.ignore_consistency = ignore_consistency
7838 self.shutdown_timeout = shutdown_timeout
7839 self.ignore_ipolicy = ignore_ipolicy
7840 self.allow_runtime_changes = allow_runtime_changes
7842 def CheckPrereq(self):
7843 """Check prerequisites.
7845 This checks that the instance is in the cluster.
7848 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7849 instance = self.cfg.GetInstanceInfo(instance_name)
7850 assert instance is not None
7851 self.instance = instance
7852 cluster = self.cfg.GetClusterInfo()
7854 if (not self.cleanup and
7855 not instance.admin_state == constants.ADMINST_UP and
7856 not self.failover and self.fallback):
7857 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7858 " switching to failover")
7859 self.failover = True
7861 if instance.disk_template not in constants.DTS_MIRRORED:
7866 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7867 " %s" % (instance.disk_template, text),
7870 if instance.disk_template in constants.DTS_EXT_MIRROR:
7871 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7873 if self.lu.op.iallocator:
7874 self._RunAllocator()
7876 # We set set self.target_node as it is required by
7878 self.target_node = self.lu.op.target_node
7880 # Check that the target node is correct in terms of instance policy
7881 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
7882 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7883 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7884 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7885 ignore=self.ignore_ipolicy)
7887 # self.target_node is already populated, either directly or by the
7889 target_node = self.target_node
7890 if self.target_node == instance.primary_node:
7891 raise errors.OpPrereqError("Cannot migrate instance %s"
7892 " to its primary (%s)" %
7893 (instance.name, instance.primary_node))
7895 if len(self.lu.tasklets) == 1:
7896 # It is safe to release locks only when we're the only tasklet
7898 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7899 keep=[instance.primary_node, self.target_node])
7902 secondary_nodes = instance.secondary_nodes
7903 if not secondary_nodes:
7904 raise errors.ConfigurationError("No secondary node but using"
7905 " %s disk template" %
7906 instance.disk_template)
7907 target_node = secondary_nodes[0]
7908 if self.lu.op.iallocator or (self.lu.op.target_node and
7909 self.lu.op.target_node != target_node):
7911 text = "failed over"
7914 raise errors.OpPrereqError("Instances with disk template %s cannot"
7915 " be %s to arbitrary nodes"
7916 " (neither an iallocator nor a target"
7917 " node can be passed)" %
7918 (instance.disk_template, text),
7920 nodeinfo = self.cfg.GetNodeInfo(target_node)
7921 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7922 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7923 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7924 ignore=self.ignore_ipolicy)
7926 i_be = cluster.FillBE(instance)
7928 # check memory requirements on the secondary node
7929 if (not self.cleanup and
7930 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
7931 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
7932 "migrating instance %s" %
7934 i_be[constants.BE_MINMEM],
7935 instance.hypervisor)
7937 self.lu.LogInfo("Not checking memory on the secondary node as"
7938 " instance will not be started")
7940 # check if failover must be forced instead of migration
7941 if (not self.cleanup and not self.failover and
7942 i_be[constants.BE_ALWAYS_FAILOVER]):
7944 self.lu.LogInfo("Instance configured to always failover; fallback"
7946 self.failover = True
7948 raise errors.OpPrereqError("This instance has been configured to"
7949 " always failover, please allow failover",
7952 # check bridge existance
7953 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7955 if not self.cleanup:
7956 _CheckNodeNotDrained(self.lu, target_node)
7957 if not self.failover:
7958 result = self.rpc.call_instance_migratable(instance.primary_node,
7960 if result.fail_msg and self.fallback:
7961 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7963 self.failover = True
7965 result.Raise("Can't migrate, please use failover",
7966 prereq=True, ecode=errors.ECODE_STATE)
7968 assert not (self.failover and self.cleanup)
7970 if not self.failover:
7971 if self.lu.op.live is not None and self.lu.op.mode is not None:
7972 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7973 " parameters are accepted",
7975 if self.lu.op.live is not None:
7977 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7979 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7980 # reset the 'live' parameter to None so that repeated
7981 # invocations of CheckPrereq do not raise an exception
7982 self.lu.op.live = None
7983 elif self.lu.op.mode is None:
7984 # read the default value from the hypervisor
7985 i_hv = cluster.FillHV(self.instance, skip_globals=False)
7986 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7988 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7990 # Failover is never live
7993 if not (self.failover or self.cleanup):
7994 remote_info = self.rpc.call_instance_info(instance.primary_node,
7996 instance.hypervisor)
7997 remote_info.Raise("Error checking instance on node %s" %
7998 instance.primary_node)
7999 instance_running = bool(remote_info.payload)
8000 if instance_running:
8001 self.current_mem = int(remote_info.payload["memory"])
8003 def _RunAllocator(self):
8004 """Run the allocator based on input opcode.
8007 # FIXME: add a self.ignore_ipolicy option
8008 ial = IAllocator(self.cfg, self.rpc,
8009 mode=constants.IALLOCATOR_MODE_RELOC,
8010 name=self.instance_name,
8011 # TODO See why hail breaks with a single node below
8012 relocate_from=[self.instance.primary_node,
8013 self.instance.primary_node],
8016 ial.Run(self.lu.op.iallocator)
8019 raise errors.OpPrereqError("Can't compute nodes using"
8020 " iallocator '%s': %s" %
8021 (self.lu.op.iallocator, ial.info),
8023 if len(ial.result) != ial.required_nodes:
8024 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8025 " of nodes (%s), required %s" %
8026 (self.lu.op.iallocator, len(ial.result),
8027 ial.required_nodes), errors.ECODE_FAULT)
8028 self.target_node = ial.result[0]
8029 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8030 self.instance_name, self.lu.op.iallocator,
8031 utils.CommaJoin(ial.result))
8033 def _WaitUntilSync(self):
8034 """Poll with custom rpc for disk sync.
8036 This uses our own step-based rpc call.
8039 self.feedback_fn("* wait until resync is done")
8043 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8045 self.instance.disks)
8047 for node, nres in result.items():
8048 nres.Raise("Cannot resync disks on node %s" % node)
8049 node_done, node_percent = nres.payload
8050 all_done = all_done and node_done
8051 if node_percent is not None:
8052 min_percent = min(min_percent, node_percent)
8054 if min_percent < 100:
8055 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8058 def _EnsureSecondary(self, node):
8059 """Demote a node to secondary.
8062 self.feedback_fn("* switching node %s to secondary mode" % node)
8064 for dev in self.instance.disks:
8065 self.cfg.SetDiskID(dev, node)
8067 result = self.rpc.call_blockdev_close(node, self.instance.name,
8068 self.instance.disks)
8069 result.Raise("Cannot change disk to secondary on node %s" % node)
8071 def _GoStandalone(self):
8072 """Disconnect from the network.
8075 self.feedback_fn("* changing into standalone mode")
8076 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8077 self.instance.disks)
8078 for node, nres in result.items():
8079 nres.Raise("Cannot disconnect disks node %s" % node)
8081 def _GoReconnect(self, multimaster):
8082 """Reconnect to the network.
8088 msg = "single-master"
8089 self.feedback_fn("* changing disks into %s mode" % msg)
8090 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8091 self.instance.disks,
8092 self.instance.name, multimaster)
8093 for node, nres in result.items():
8094 nres.Raise("Cannot change disks config on node %s" % node)
8096 def _ExecCleanup(self):
8097 """Try to cleanup after a failed migration.
8099 The cleanup is done by:
8100 - check that the instance is running only on one node
8101 (and update the config if needed)
8102 - change disks on its secondary node to secondary
8103 - wait until disks are fully synchronized
8104 - disconnect from the network
8105 - change disks into single-master mode
8106 - wait again until disks are fully synchronized
8109 instance = self.instance
8110 target_node = self.target_node
8111 source_node = self.source_node
8113 # check running on only one node
8114 self.feedback_fn("* checking where the instance actually runs"
8115 " (if this hangs, the hypervisor might be in"
8117 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8118 for node, result in ins_l.items():
8119 result.Raise("Can't contact node %s" % node)
8121 runningon_source = instance.name in ins_l[source_node].payload
8122 runningon_target = instance.name in ins_l[target_node].payload
8124 if runningon_source and runningon_target:
8125 raise errors.OpExecError("Instance seems to be running on two nodes,"
8126 " or the hypervisor is confused; you will have"
8127 " to ensure manually that it runs only on one"
8128 " and restart this operation")
8130 if not (runningon_source or runningon_target):
8131 raise errors.OpExecError("Instance does not seem to be running at all;"
8132 " in this case it's safer to repair by"
8133 " running 'gnt-instance stop' to ensure disk"
8134 " shutdown, and then restarting it")
8136 if runningon_target:
8137 # the migration has actually succeeded, we need to update the config
8138 self.feedback_fn("* instance running on secondary node (%s),"
8139 " updating config" % target_node)
8140 instance.primary_node = target_node
8141 self.cfg.Update(instance, self.feedback_fn)
8142 demoted_node = source_node
8144 self.feedback_fn("* instance confirmed to be running on its"
8145 " primary node (%s)" % source_node)
8146 demoted_node = target_node
8148 if instance.disk_template in constants.DTS_INT_MIRROR:
8149 self._EnsureSecondary(demoted_node)
8151 self._WaitUntilSync()
8152 except errors.OpExecError:
8153 # we ignore here errors, since if the device is standalone, it
8154 # won't be able to sync
8156 self._GoStandalone()
8157 self._GoReconnect(False)
8158 self._WaitUntilSync()
8160 self.feedback_fn("* done")
8162 def _RevertDiskStatus(self):
8163 """Try to revert the disk status after a failed migration.
8166 target_node = self.target_node
8167 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8171 self._EnsureSecondary(target_node)
8172 self._GoStandalone()
8173 self._GoReconnect(False)
8174 self._WaitUntilSync()
8175 except errors.OpExecError, err:
8176 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8177 " please try to recover the instance manually;"
8178 " error '%s'" % str(err))
8180 def _AbortMigration(self):
8181 """Call the hypervisor code to abort a started migration.
8184 instance = self.instance
8185 target_node = self.target_node
8186 source_node = self.source_node
8187 migration_info = self.migration_info
8189 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8193 abort_msg = abort_result.fail_msg
8195 logging.error("Aborting migration failed on target node %s: %s",
8196 target_node, abort_msg)
8197 # Don't raise an exception here, as we stil have to try to revert the
8198 # disk status, even if this step failed.
8200 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8201 instance, False, self.live)
8202 abort_msg = abort_result.fail_msg
8204 logging.error("Aborting migration failed on source node %s: %s",
8205 source_node, abort_msg)
8207 def _ExecMigration(self):
8208 """Migrate an instance.
8210 The migrate is done by:
8211 - change the disks into dual-master mode
8212 - wait until disks are fully synchronized again
8213 - migrate the instance
8214 - change disks on the new secondary node (the old primary) to secondary
8215 - wait until disks are fully synchronized
8216 - change disks into single-master mode
8219 instance = self.instance
8220 target_node = self.target_node
8221 source_node = self.source_node
8223 # Check for hypervisor version mismatch and warn the user.
8224 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8225 None, [self.instance.hypervisor])
8226 for ninfo in nodeinfo.values():
8227 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8229 (_, _, (src_info, )) = nodeinfo[source_node].payload
8230 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8232 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8233 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8234 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8235 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8236 if src_version != dst_version:
8237 self.feedback_fn("* warning: hypervisor version mismatch between"
8238 " source (%s) and target (%s) node" %
8239 (src_version, dst_version))
8241 self.feedback_fn("* checking disk consistency between source and target")
8242 for (idx, dev) in enumerate(instance.disks):
8243 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8244 raise errors.OpExecError("Disk %s is degraded or not fully"
8245 " synchronized on target node,"
8246 " aborting migration" % idx)
8248 if self.current_mem > self.tgt_free_mem:
8249 if not self.allow_runtime_changes:
8250 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8251 " free memory to fit instance %s on target"
8252 " node %s (have %dMB, need %dMB)" %
8253 (instance.name, target_node,
8254 self.tgt_free_mem, self.current_mem))
8255 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8256 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8259 rpcres.Raise("Cannot modify instance runtime memory")
8261 # First get the migration information from the remote node
8262 result = self.rpc.call_migration_info(source_node, instance)
8263 msg = result.fail_msg
8265 log_err = ("Failed fetching source migration information from %s: %s" %
8267 logging.error(log_err)
8268 raise errors.OpExecError(log_err)
8270 self.migration_info = migration_info = result.payload
8272 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8273 # Then switch the disks to master/master mode
8274 self._EnsureSecondary(target_node)
8275 self._GoStandalone()
8276 self._GoReconnect(True)
8277 self._WaitUntilSync()
8279 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8280 result = self.rpc.call_accept_instance(target_node,
8283 self.nodes_ip[target_node])
8285 msg = result.fail_msg
8287 logging.error("Instance pre-migration failed, trying to revert"
8288 " disk status: %s", msg)
8289 self.feedback_fn("Pre-migration failed, aborting")
8290 self._AbortMigration()
8291 self._RevertDiskStatus()
8292 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8293 (instance.name, msg))
8295 self.feedback_fn("* migrating instance to %s" % target_node)
8296 result = self.rpc.call_instance_migrate(source_node, instance,
8297 self.nodes_ip[target_node],
8299 msg = result.fail_msg
8301 logging.error("Instance migration failed, trying to revert"
8302 " disk status: %s", msg)
8303 self.feedback_fn("Migration failed, aborting")
8304 self._AbortMigration()
8305 self._RevertDiskStatus()
8306 raise errors.OpExecError("Could not migrate instance %s: %s" %
8307 (instance.name, msg))
8309 self.feedback_fn("* starting memory transfer")
8310 last_feedback = time.time()
8312 result = self.rpc.call_instance_get_migration_status(source_node,
8314 msg = result.fail_msg
8315 ms = result.payload # MigrationStatus instance
8316 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8317 logging.error("Instance migration failed, trying to revert"
8318 " disk status: %s", msg)
8319 self.feedback_fn("Migration failed, aborting")
8320 self._AbortMigration()
8321 self._RevertDiskStatus()
8322 raise errors.OpExecError("Could not migrate instance %s: %s" %
8323 (instance.name, msg))
8325 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8326 self.feedback_fn("* memory transfer complete")
8329 if (utils.TimeoutExpired(last_feedback,
8330 self._MIGRATION_FEEDBACK_INTERVAL) and
8331 ms.transferred_ram is not None):
8332 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8333 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8334 last_feedback = time.time()
8336 time.sleep(self._MIGRATION_POLL_INTERVAL)
8338 result = self.rpc.call_instance_finalize_migration_src(source_node,
8342 msg = result.fail_msg
8344 logging.error("Instance migration succeeded, but finalization failed"
8345 " on the source node: %s", msg)
8346 raise errors.OpExecError("Could not finalize instance migration: %s" %
8349 instance.primary_node = target_node
8351 # distribute new instance config to the other nodes
8352 self.cfg.Update(instance, self.feedback_fn)
8354 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8358 msg = result.fail_msg
8360 logging.error("Instance migration succeeded, but finalization failed"
8361 " on the target node: %s", msg)
8362 raise errors.OpExecError("Could not finalize instance migration: %s" %
8365 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8366 self._EnsureSecondary(source_node)
8367 self._WaitUntilSync()
8368 self._GoStandalone()
8369 self._GoReconnect(False)
8370 self._WaitUntilSync()
8372 # If the instance's disk template is `rbd' and there was a successful
8373 # migration, unmap the device from the source node.
8374 if self.instance.disk_template == constants.DT_RBD:
8375 disks = _ExpandCheckDisks(instance, instance.disks)
8376 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8378 result = self.rpc.call_blockdev_shutdown(source_node, disk)
8379 msg = result.fail_msg
8381 logging.error("Migration was successful, but couldn't unmap the"
8382 " block device %s on source node %s: %s",
8383 disk.iv_name, source_node, msg)
8384 logging.error("You need to unmap the device %s manually on %s",
8385 disk.iv_name, source_node)
8387 self.feedback_fn("* done")
8389 def _ExecFailover(self):
8390 """Failover an instance.
8392 The failover is done by shutting it down on its present node and
8393 starting it on the secondary.
8396 instance = self.instance
8397 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8399 source_node = instance.primary_node
8400 target_node = self.target_node
8402 if instance.admin_state == constants.ADMINST_UP:
8403 self.feedback_fn("* checking disk consistency between source and target")
8404 for (idx, dev) in enumerate(instance.disks):
8405 # for drbd, these are drbd over lvm
8406 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8407 if primary_node.offline:
8408 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8410 (primary_node.name, idx, target_node))
8411 elif not self.ignore_consistency:
8412 raise errors.OpExecError("Disk %s is degraded on target node,"
8413 " aborting failover" % idx)
8415 self.feedback_fn("* not checking disk consistency as instance is not"
8418 self.feedback_fn("* shutting down instance on source node")
8419 logging.info("Shutting down instance %s on node %s",
8420 instance.name, source_node)
8422 result = self.rpc.call_instance_shutdown(source_node, instance,
8423 self.shutdown_timeout)
8424 msg = result.fail_msg
8426 if self.ignore_consistency or primary_node.offline:
8427 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8428 " proceeding anyway; please make sure node"
8429 " %s is down; error details: %s",
8430 instance.name, source_node, source_node, msg)
8432 raise errors.OpExecError("Could not shutdown instance %s on"
8434 (instance.name, source_node, msg))
8436 self.feedback_fn("* deactivating the instance's disks on source node")
8437 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8438 raise errors.OpExecError("Can't shut down the instance's disks")
8440 instance.primary_node = target_node
8441 # distribute new instance config to the other nodes
8442 self.cfg.Update(instance, self.feedback_fn)
8444 # Only start the instance if it's marked as up
8445 if instance.admin_state == constants.ADMINST_UP:
8446 self.feedback_fn("* activating the instance's disks on target node %s" %
8448 logging.info("Starting instance %s on node %s",
8449 instance.name, target_node)
8451 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8452 ignore_secondaries=True)
8454 _ShutdownInstanceDisks(self.lu, instance)
8455 raise errors.OpExecError("Can't activate the instance's disks")
8457 self.feedback_fn("* starting the instance on the target node %s" %
8459 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8461 msg = result.fail_msg
8463 _ShutdownInstanceDisks(self.lu, instance)
8464 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8465 (instance.name, target_node, msg))
8467 def Exec(self, feedback_fn):
8468 """Perform the migration.
8471 self.feedback_fn = feedback_fn
8472 self.source_node = self.instance.primary_node
8474 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8475 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8476 self.target_node = self.instance.secondary_nodes[0]
8477 # Otherwise self.target_node has been populated either
8478 # directly, or through an iallocator.
8480 self.all_nodes = [self.source_node, self.target_node]
8481 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8482 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8485 feedback_fn("Failover instance %s" % self.instance.name)
8486 self._ExecFailover()
8488 feedback_fn("Migrating instance %s" % self.instance.name)
8491 return self._ExecCleanup()
8493 return self._ExecMigration()
8496 def _CreateBlockDev(lu, node, instance, device, force_create,
8498 """Create a tree of block devices on a given node.
8500 If this device type has to be created on secondaries, create it and
8503 If not, just recurse to children keeping the same 'force' value.
8505 @param lu: the lu on whose behalf we execute
8506 @param node: the node on which to create the device
8507 @type instance: L{objects.Instance}
8508 @param instance: the instance which owns the device
8509 @type device: L{objects.Disk}
8510 @param device: the device to create
8511 @type force_create: boolean
8512 @param force_create: whether to force creation of this device; this
8513 will be change to True whenever we find a device which has
8514 CreateOnSecondary() attribute
8515 @param info: the extra 'metadata' we should attach to the device
8516 (this will be represented as a LVM tag)
8517 @type force_open: boolean
8518 @param force_open: this parameter will be passes to the
8519 L{backend.BlockdevCreate} function where it specifies
8520 whether we run on primary or not, and it affects both
8521 the child assembly and the device own Open() execution
8524 if device.CreateOnSecondary():
8528 for child in device.children:
8529 _CreateBlockDev(lu, node, instance, child, force_create,
8532 if not force_create:
8535 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8538 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8539 """Create a single block device on a given node.
8541 This will not recurse over children of the device, so they must be
8544 @param lu: the lu on whose behalf we execute
8545 @param node: the node on which to create the device
8546 @type instance: L{objects.Instance}
8547 @param instance: the instance which owns the device
8548 @type device: L{objects.Disk}
8549 @param device: the device to create
8550 @param info: the extra 'metadata' we should attach to the device
8551 (this will be represented as a LVM tag)
8552 @type force_open: boolean
8553 @param force_open: this parameter will be passes to the
8554 L{backend.BlockdevCreate} function where it specifies
8555 whether we run on primary or not, and it affects both
8556 the child assembly and the device own Open() execution
8559 lu.cfg.SetDiskID(device, node)
8560 result = lu.rpc.call_blockdev_create(node, device, device.size,
8561 instance.name, force_open, info)
8562 result.Raise("Can't create block device %s on"
8563 " node %s for instance %s" % (device, node, instance.name))
8564 if device.physical_id is None:
8565 device.physical_id = result.payload
8568 def _GenerateUniqueNames(lu, exts):
8569 """Generate a suitable LV name.
8571 This will generate a logical volume name for the given instance.
8576 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8577 results.append("%s%s" % (new_id, val))
8581 def _ComputeLDParams(disk_template, disk_params):
8582 """Computes Logical Disk parameters from Disk Template parameters.
8584 @type disk_template: string
8585 @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8586 @type disk_params: dict
8587 @param disk_params: disk template parameters; dict(template_name -> parameters
8589 @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8590 contains the LD parameters of the node. The tree is flattened in-order.
8593 if disk_template not in constants.DISK_TEMPLATES:
8594 raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8597 dt_params = disk_params[disk_template]
8598 if disk_template == constants.DT_DRBD8:
8600 constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8601 constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8602 constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8603 constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8604 constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8605 constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8606 constants.LDP_DYNAMIC_RESYNC: dt_params[constants.DRBD_DYNAMIC_RESYNC],
8607 constants.LDP_PLAN_AHEAD: dt_params[constants.DRBD_PLAN_AHEAD],
8608 constants.LDP_FILL_TARGET: dt_params[constants.DRBD_FILL_TARGET],
8609 constants.LDP_DELAY_TARGET: dt_params[constants.DRBD_DELAY_TARGET],
8610 constants.LDP_MAX_RATE: dt_params[constants.DRBD_MAX_RATE],
8611 constants.LDP_MIN_RATE: dt_params[constants.DRBD_MIN_RATE],
8615 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8618 result.append(drbd_params)
8622 constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8625 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8627 result.append(data_params)
8631 constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8634 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8636 result.append(meta_params)
8638 elif (disk_template == constants.DT_FILE or
8639 disk_template == constants.DT_SHARED_FILE):
8640 result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8642 elif disk_template == constants.DT_PLAIN:
8644 constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8647 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8649 result.append(params)
8651 elif disk_template == constants.DT_BLOCK:
8652 result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8654 elif disk_template == constants.DT_RBD:
8656 constants.LDP_POOL: dt_params[constants.RBD_POOL]
8659 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_RBD],
8661 result.append(params)
8666 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8667 iv_name, p_minor, s_minor, drbd_params, data_params,
8669 """Generate a drbd8 device complete with its children.
8672 assert len(vgnames) == len(names) == 2
8673 port = lu.cfg.AllocatePort()
8674 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8676 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8677 logical_id=(vgnames[0], names[0]),
8679 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8680 logical_id=(vgnames[1], names[1]),
8682 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8683 logical_id=(primary, secondary, port,
8686 children=[dev_data, dev_meta],
8687 iv_name=iv_name, params=drbd_params)
8691 _DISK_TEMPLATE_NAME_PREFIX = {
8692 constants.DT_PLAIN: "",
8693 constants.DT_RBD: ".rbd",
8697 _DISK_TEMPLATE_DEVICE_TYPE = {
8698 constants.DT_PLAIN: constants.LD_LV,
8699 constants.DT_FILE: constants.LD_FILE,
8700 constants.DT_SHARED_FILE: constants.LD_FILE,
8701 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8702 constants.DT_RBD: constants.LD_RBD,
8706 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8707 secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8708 feedback_fn, disk_params,
8709 _req_file_storage=opcodes.RequireFileStorage,
8710 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8711 """Generate the entire disk layout for a given template type.
8714 #TODO: compute space requirements
8716 vgname = lu.cfg.GetVGName()
8717 disk_count = len(disk_info)
8719 ld_params = _ComputeLDParams(template_name, disk_params)
8721 if template_name == constants.DT_DISKLESS:
8723 elif template_name == constants.DT_DRBD8:
8724 drbd_params, data_params, meta_params = ld_params
8725 if len(secondary_nodes) != 1:
8726 raise errors.ProgrammerError("Wrong template configuration")
8727 remote_node = secondary_nodes[0]
8728 minors = lu.cfg.AllocateDRBDMinor(
8729 [primary_node, remote_node] * len(disk_info), instance_name)
8732 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8733 for i in range(disk_count)]):
8734 names.append(lv_prefix + "_data")
8735 names.append(lv_prefix + "_meta")
8736 for idx, disk in enumerate(disk_info):
8737 disk_index = idx + base_index
8738 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8739 data_vg = disk.get(constants.IDISK_VG, vgname)
8740 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8741 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8742 disk[constants.IDISK_SIZE],
8744 names[idx * 2:idx * 2 + 2],
8745 "disk/%d" % disk_index,
8746 minors[idx * 2], minors[idx * 2 + 1],
8747 drbd_params, data_params, meta_params)
8748 disk_dev.mode = disk[constants.IDISK_MODE]
8749 disks.append(disk_dev)
8752 raise errors.ProgrammerError("Wrong template configuration")
8754 if template_name == constants.DT_FILE:
8756 elif template_name == constants.DT_SHARED_FILE:
8757 _req_shr_file_storage()
8759 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8760 if name_prefix is None:
8763 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8764 (name_prefix, base_index + i)
8765 for i in range(disk_count)])
8767 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8769 if template_name == constants.DT_PLAIN:
8770 def logical_id_fn(idx, _, disk):
8771 vg = disk.get(constants.IDISK_VG, vgname)
8772 return (vg, names[idx])
8773 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8775 lambda _, disk_index, disk: (file_driver,
8776 "%s/disk%d" % (file_storage_dir,
8778 elif template_name == constants.DT_BLOCK:
8780 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8781 disk[constants.IDISK_ADOPT])
8782 elif template_name == constants.DT_RBD:
8783 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8785 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8787 for idx, disk in enumerate(disk_info):
8788 disk_index = idx + base_index
8789 size = disk[constants.IDISK_SIZE]
8790 feedback_fn("* disk %s, size %s" %
8791 (disk_index, utils.FormatUnit(size, "h")))
8792 disks.append(objects.Disk(dev_type=dev_type, size=size,
8793 logical_id=logical_id_fn(idx, disk_index, disk),
8794 iv_name="disk/%d" % disk_index,
8795 mode=disk[constants.IDISK_MODE],
8796 params=ld_params[0]))
8801 def _GetInstanceInfoText(instance):
8802 """Compute that text that should be added to the disk's metadata.
8805 return "originstname+%s" % instance.name
8808 def _CalcEta(time_taken, written, total_size):
8809 """Calculates the ETA based on size written and total size.
8811 @param time_taken: The time taken so far
8812 @param written: amount written so far
8813 @param total_size: The total size of data to be written
8814 @return: The remaining time in seconds
8817 avg_time = time_taken / float(written)
8818 return (total_size - written) * avg_time
8821 def _WipeDisks(lu, instance):
8822 """Wipes instance disks.
8824 @type lu: L{LogicalUnit}
8825 @param lu: the logical unit on whose behalf we execute
8826 @type instance: L{objects.Instance}
8827 @param instance: the instance whose disks we should create
8828 @return: the success of the wipe
8831 node = instance.primary_node
8833 for device in instance.disks:
8834 lu.cfg.SetDiskID(device, node)
8836 logging.info("Pause sync of instance %s disks", instance.name)
8837 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8839 for idx, success in enumerate(result.payload):
8841 logging.warn("pause-sync of instance %s for disks %d failed",
8845 for idx, device in enumerate(instance.disks):
8846 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8847 # MAX_WIPE_CHUNK at max
8848 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8849 constants.MIN_WIPE_CHUNK_PERCENT)
8850 # we _must_ make this an int, otherwise rounding errors will
8852 wipe_chunk_size = int(wipe_chunk_size)
8854 lu.LogInfo("* Wiping disk %d", idx)
8855 logging.info("Wiping disk %d for instance %s, node %s using"
8856 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8861 start_time = time.time()
8863 while offset < size:
8864 wipe_size = min(wipe_chunk_size, size - offset)
8865 logging.debug("Wiping disk %d, offset %s, chunk %s",
8866 idx, offset, wipe_size)
8867 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8868 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8869 (idx, offset, wipe_size))
8872 if now - last_output >= 60:
8873 eta = _CalcEta(now - start_time, offset, size)
8874 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8875 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8878 logging.info("Resume sync of instance %s disks", instance.name)
8880 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8882 for idx, success in enumerate(result.payload):
8884 lu.LogWarning("Resume sync of disk %d failed, please have a"
8885 " look at the status and troubleshoot the issue", idx)
8886 logging.warn("resume-sync of instance %s for disks %d failed",
8890 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8891 """Create all disks for an instance.
8893 This abstracts away some work from AddInstance.
8895 @type lu: L{LogicalUnit}
8896 @param lu: the logical unit on whose behalf we execute
8897 @type instance: L{objects.Instance}
8898 @param instance: the instance whose disks we should create
8900 @param to_skip: list of indices to skip
8901 @type target_node: string
8902 @param target_node: if passed, overrides the target node for creation
8904 @return: the success of the creation
8907 info = _GetInstanceInfoText(instance)
8908 if target_node is None:
8909 pnode = instance.primary_node
8910 all_nodes = instance.all_nodes
8915 if instance.disk_template in constants.DTS_FILEBASED:
8916 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8917 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8919 result.Raise("Failed to create directory '%s' on"
8920 " node %s" % (file_storage_dir, pnode))
8922 # Note: this needs to be kept in sync with adding of disks in
8923 # LUInstanceSetParams
8924 for idx, device in enumerate(instance.disks):
8925 if to_skip and idx in to_skip:
8927 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
8929 for node in all_nodes:
8930 f_create = node == pnode
8931 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8934 def _RemoveDisks(lu, instance, target_node=None):
8935 """Remove all disks for an instance.
8937 This abstracts away some work from `AddInstance()` and
8938 `RemoveInstance()`. Note that in case some of the devices couldn't
8939 be removed, the removal will continue with the other ones (compare
8940 with `_CreateDisks()`).
8942 @type lu: L{LogicalUnit}
8943 @param lu: the logical unit on whose behalf we execute
8944 @type instance: L{objects.Instance}
8945 @param instance: the instance whose disks we should remove
8946 @type target_node: string
8947 @param target_node: used to override the node on which to remove the disks
8949 @return: the success of the removal
8952 logging.info("Removing block devices for instance %s", instance.name)
8955 for (idx, device) in enumerate(instance.disks):
8957 edata = [(target_node, device)]
8959 edata = device.ComputeNodeTree(instance.primary_node)
8960 for node, disk in edata:
8961 lu.cfg.SetDiskID(disk, node)
8962 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8964 lu.LogWarning("Could not remove disk %s on node %s,"
8965 " continuing anyway: %s", idx, node, msg)
8968 # if this is a DRBD disk, return its port to the pool
8969 if device.dev_type in constants.LDS_DRBD:
8970 tcp_port = device.logical_id[2]
8971 lu.cfg.AddTcpUdpPort(tcp_port)
8973 if instance.disk_template == constants.DT_FILE:
8974 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8978 tgt = instance.primary_node
8979 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8981 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8982 file_storage_dir, instance.primary_node, result.fail_msg)
8988 def _ComputeDiskSizePerVG(disk_template, disks):
8989 """Compute disk size requirements in the volume group
8992 def _compute(disks, payload):
8993 """Universal algorithm.
8998 vgs[disk[constants.IDISK_VG]] = \
8999 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9003 # Required free disk space as a function of disk and swap space
9005 constants.DT_DISKLESS: {},
9006 constants.DT_PLAIN: _compute(disks, 0),
9007 # 128 MB are added for drbd metadata for each disk
9008 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9009 constants.DT_FILE: {},
9010 constants.DT_SHARED_FILE: {},
9013 if disk_template not in req_size_dict:
9014 raise errors.ProgrammerError("Disk template '%s' size requirement"
9015 " is unknown" % disk_template)
9017 return req_size_dict[disk_template]
9020 def _ComputeDiskSize(disk_template, disks):
9021 """Compute disk size requirements in the volume group
9024 # Required free disk space as a function of disk and swap space
9026 constants.DT_DISKLESS: None,
9027 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9028 # 128 MB are added for drbd metadata for each disk
9030 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9031 constants.DT_FILE: None,
9032 constants.DT_SHARED_FILE: 0,
9033 constants.DT_BLOCK: 0,
9034 constants.DT_RBD: 0,
9037 if disk_template not in req_size_dict:
9038 raise errors.ProgrammerError("Disk template '%s' size requirement"
9039 " is unknown" % disk_template)
9041 return req_size_dict[disk_template]
9044 def _FilterVmNodes(lu, nodenames):
9045 """Filters out non-vm_capable nodes from a list.
9047 @type lu: L{LogicalUnit}
9048 @param lu: the logical unit for which we check
9049 @type nodenames: list
9050 @param nodenames: the list of nodes on which we should check
9052 @return: the list of vm-capable nodes
9055 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9056 return [name for name in nodenames if name not in vm_nodes]
9059 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9060 """Hypervisor parameter validation.
9062 This function abstract the hypervisor parameter validation to be
9063 used in both instance create and instance modify.
9065 @type lu: L{LogicalUnit}
9066 @param lu: the logical unit for which we check
9067 @type nodenames: list
9068 @param nodenames: the list of nodes on which we should check
9069 @type hvname: string
9070 @param hvname: the name of the hypervisor we should use
9071 @type hvparams: dict
9072 @param hvparams: the parameters which we need to check
9073 @raise errors.OpPrereqError: if the parameters are not valid
9076 nodenames = _FilterVmNodes(lu, nodenames)
9078 cluster = lu.cfg.GetClusterInfo()
9079 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9081 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9082 for node in nodenames:
9086 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9089 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9090 """OS parameters validation.
9092 @type lu: L{LogicalUnit}
9093 @param lu: the logical unit for which we check
9094 @type required: boolean
9095 @param required: whether the validation should fail if the OS is not
9097 @type nodenames: list
9098 @param nodenames: the list of nodes on which we should check
9099 @type osname: string
9100 @param osname: the name of the hypervisor we should use
9101 @type osparams: dict
9102 @param osparams: the parameters which we need to check
9103 @raise errors.OpPrereqError: if the parameters are not valid
9106 nodenames = _FilterVmNodes(lu, nodenames)
9107 result = lu.rpc.call_os_validate(nodenames, required, osname,
9108 [constants.OS_VALIDATE_PARAMETERS],
9110 for node, nres in result.items():
9111 # we don't check for offline cases since this should be run only
9112 # against the master node and/or an instance's nodes
9113 nres.Raise("OS Parameters validation failed on node %s" % node)
9114 if not nres.payload:
9115 lu.LogInfo("OS %s not found on node %s, validation skipped",
9119 class LUInstanceCreate(LogicalUnit):
9120 """Create an instance.
9123 HPATH = "instance-add"
9124 HTYPE = constants.HTYPE_INSTANCE
9127 def CheckArguments(self):
9131 # do not require name_check to ease forward/backward compatibility
9133 if self.op.no_install and self.op.start:
9134 self.LogInfo("No-installation mode selected, disabling startup")
9135 self.op.start = False
9136 # validate/normalize the instance name
9137 self.op.instance_name = \
9138 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9140 if self.op.ip_check and not self.op.name_check:
9141 # TODO: make the ip check more flexible and not depend on the name check
9142 raise errors.OpPrereqError("Cannot do IP address check without a name"
9143 " check", errors.ECODE_INVAL)
9145 # check nics' parameter names
9146 for nic in self.op.nics:
9147 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9149 # check disks. parameter names and consistent adopt/no-adopt strategy
9150 has_adopt = has_no_adopt = False
9151 for disk in self.op.disks:
9152 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9153 if constants.IDISK_ADOPT in disk:
9157 if has_adopt and has_no_adopt:
9158 raise errors.OpPrereqError("Either all disks are adopted or none is",
9161 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9162 raise errors.OpPrereqError("Disk adoption is not supported for the"
9163 " '%s' disk template" %
9164 self.op.disk_template,
9166 if self.op.iallocator is not None:
9167 raise errors.OpPrereqError("Disk adoption not allowed with an"
9168 " iallocator script", errors.ECODE_INVAL)
9169 if self.op.mode == constants.INSTANCE_IMPORT:
9170 raise errors.OpPrereqError("Disk adoption not allowed for"
9171 " instance import", errors.ECODE_INVAL)
9173 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9174 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9175 " but no 'adopt' parameter given" %
9176 self.op.disk_template,
9179 self.adopt_disks = has_adopt
9181 # instance name verification
9182 if self.op.name_check:
9183 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9184 self.op.instance_name = self.hostname1.name
9185 # used in CheckPrereq for ip ping check
9186 self.check_ip = self.hostname1.ip
9188 self.check_ip = None
9190 # file storage checks
9191 if (self.op.file_driver and
9192 not self.op.file_driver in constants.FILE_DRIVER):
9193 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9194 self.op.file_driver, errors.ECODE_INVAL)
9196 if self.op.disk_template == constants.DT_FILE:
9197 opcodes.RequireFileStorage()
9198 elif self.op.disk_template == constants.DT_SHARED_FILE:
9199 opcodes.RequireSharedFileStorage()
9201 ### Node/iallocator related checks
9202 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9204 if self.op.pnode is not None:
9205 if self.op.disk_template in constants.DTS_INT_MIRROR:
9206 if self.op.snode is None:
9207 raise errors.OpPrereqError("The networked disk templates need"
9208 " a mirror node", errors.ECODE_INVAL)
9210 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9212 self.op.snode = None
9214 self._cds = _GetClusterDomainSecret()
9216 if self.op.mode == constants.INSTANCE_IMPORT:
9217 # On import force_variant must be True, because if we forced it at
9218 # initial install, our only chance when importing it back is that it
9220 self.op.force_variant = True
9222 if self.op.no_install:
9223 self.LogInfo("No-installation mode has no effect during import")
9225 elif self.op.mode == constants.INSTANCE_CREATE:
9226 if self.op.os_type is None:
9227 raise errors.OpPrereqError("No guest OS specified",
9229 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9230 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9231 " installation" % self.op.os_type,
9233 if self.op.disk_template is None:
9234 raise errors.OpPrereqError("No disk template specified",
9237 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9238 # Check handshake to ensure both clusters have the same domain secret
9239 src_handshake = self.op.source_handshake
9240 if not src_handshake:
9241 raise errors.OpPrereqError("Missing source handshake",
9244 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9247 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9250 # Load and check source CA
9251 self.source_x509_ca_pem = self.op.source_x509_ca
9252 if not self.source_x509_ca_pem:
9253 raise errors.OpPrereqError("Missing source X509 CA",
9257 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9259 except OpenSSL.crypto.Error, err:
9260 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9261 (err, ), errors.ECODE_INVAL)
9263 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9264 if errcode is not None:
9265 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9268 self.source_x509_ca = cert
9270 src_instance_name = self.op.source_instance_name
9271 if not src_instance_name:
9272 raise errors.OpPrereqError("Missing source instance name",
9275 self.source_instance_name = \
9276 netutils.GetHostname(name=src_instance_name).name
9279 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9280 self.op.mode, errors.ECODE_INVAL)
9282 def ExpandNames(self):
9283 """ExpandNames for CreateInstance.
9285 Figure out the right locks for instance creation.
9288 self.needed_locks = {}
9290 instance_name = self.op.instance_name
9291 # this is just a preventive check, but someone might still add this
9292 # instance in the meantime, and creation will fail at lock-add time
9293 if instance_name in self.cfg.GetInstanceList():
9294 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9295 instance_name, errors.ECODE_EXISTS)
9297 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9299 if self.op.iallocator:
9300 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9301 # specifying a group on instance creation and then selecting nodes from
9303 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9304 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9306 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9307 nodelist = [self.op.pnode]
9308 if self.op.snode is not None:
9309 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9310 nodelist.append(self.op.snode)
9311 self.needed_locks[locking.LEVEL_NODE] = nodelist
9312 # Lock resources of instance's primary and secondary nodes (copy to
9313 # prevent accidential modification)
9314 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9316 # in case of import lock the source node too
9317 if self.op.mode == constants.INSTANCE_IMPORT:
9318 src_node = self.op.src_node
9319 src_path = self.op.src_path
9321 if src_path is None:
9322 self.op.src_path = src_path = self.op.instance_name
9324 if src_node is None:
9325 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9326 self.op.src_node = None
9327 if os.path.isabs(src_path):
9328 raise errors.OpPrereqError("Importing an instance from a path"
9329 " requires a source node option",
9332 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9333 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9334 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9335 if not os.path.isabs(src_path):
9336 self.op.src_path = src_path = \
9337 utils.PathJoin(constants.EXPORT_DIR, src_path)
9339 def _RunAllocator(self):
9340 """Run the allocator based on input opcode.
9343 nics = [n.ToDict() for n in self.nics]
9344 ial = IAllocator(self.cfg, self.rpc,
9345 mode=constants.IALLOCATOR_MODE_ALLOC,
9346 name=self.op.instance_name,
9347 disk_template=self.op.disk_template,
9350 vcpus=self.be_full[constants.BE_VCPUS],
9351 memory=self.be_full[constants.BE_MAXMEM],
9352 spindle_usage=self.be_full[constants.BE_SPINDLE_USAGE],
9355 hypervisor=self.op.hypervisor,
9358 ial.Run(self.op.iallocator)
9361 raise errors.OpPrereqError("Can't compute nodes using"
9362 " iallocator '%s': %s" %
9363 (self.op.iallocator, ial.info),
9365 if len(ial.result) != ial.required_nodes:
9366 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9367 " of nodes (%s), required %s" %
9368 (self.op.iallocator, len(ial.result),
9369 ial.required_nodes), errors.ECODE_FAULT)
9370 self.op.pnode = ial.result[0]
9371 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9372 self.op.instance_name, self.op.iallocator,
9373 utils.CommaJoin(ial.result))
9374 if ial.required_nodes == 2:
9375 self.op.snode = ial.result[1]
9377 def BuildHooksEnv(self):
9380 This runs on master, primary and secondary nodes of the instance.
9384 "ADD_MODE": self.op.mode,
9386 if self.op.mode == constants.INSTANCE_IMPORT:
9387 env["SRC_NODE"] = self.op.src_node
9388 env["SRC_PATH"] = self.op.src_path
9389 env["SRC_IMAGES"] = self.src_images
9391 env.update(_BuildInstanceHookEnv(
9392 name=self.op.instance_name,
9393 primary_node=self.op.pnode,
9394 secondary_nodes=self.secondaries,
9395 status=self.op.start,
9396 os_type=self.op.os_type,
9397 minmem=self.be_full[constants.BE_MINMEM],
9398 maxmem=self.be_full[constants.BE_MAXMEM],
9399 vcpus=self.be_full[constants.BE_VCPUS],
9400 nics=_NICListToTuple(self, self.nics),
9401 disk_template=self.op.disk_template,
9402 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9403 for d in self.disks],
9406 hypervisor_name=self.op.hypervisor,
9412 def BuildHooksNodes(self):
9413 """Build hooks nodes.
9416 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9419 def _ReadExportInfo(self):
9420 """Reads the export information from disk.
9422 It will override the opcode source node and path with the actual
9423 information, if these two were not specified before.
9425 @return: the export information
9428 assert self.op.mode == constants.INSTANCE_IMPORT
9430 src_node = self.op.src_node
9431 src_path = self.op.src_path
9433 if src_node is None:
9434 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9435 exp_list = self.rpc.call_export_list(locked_nodes)
9437 for node in exp_list:
9438 if exp_list[node].fail_msg:
9440 if src_path in exp_list[node].payload:
9442 self.op.src_node = src_node = node
9443 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9447 raise errors.OpPrereqError("No export found for relative path %s" %
9448 src_path, errors.ECODE_INVAL)
9450 _CheckNodeOnline(self, src_node)
9451 result = self.rpc.call_export_info(src_node, src_path)
9452 result.Raise("No export or invalid export found in dir %s" % src_path)
9454 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9455 if not export_info.has_section(constants.INISECT_EXP):
9456 raise errors.ProgrammerError("Corrupted export config",
9457 errors.ECODE_ENVIRON)
9459 ei_version = export_info.get(constants.INISECT_EXP, "version")
9460 if (int(ei_version) != constants.EXPORT_VERSION):
9461 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9462 (ei_version, constants.EXPORT_VERSION),
9463 errors.ECODE_ENVIRON)
9466 def _ReadExportParams(self, einfo):
9467 """Use export parameters as defaults.
9469 In case the opcode doesn't specify (as in override) some instance
9470 parameters, then try to use them from the export information, if
9474 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9476 if self.op.disk_template is None:
9477 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9478 self.op.disk_template = einfo.get(constants.INISECT_INS,
9480 if self.op.disk_template not in constants.DISK_TEMPLATES:
9481 raise errors.OpPrereqError("Disk template specified in configuration"
9482 " file is not one of the allowed values:"
9483 " %s" % " ".join(constants.DISK_TEMPLATES))
9485 raise errors.OpPrereqError("No disk template specified and the export"
9486 " is missing the disk_template information",
9489 if not self.op.disks:
9491 # TODO: import the disk iv_name too
9492 for idx in range(constants.MAX_DISKS):
9493 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9494 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9495 disks.append({constants.IDISK_SIZE: disk_sz})
9496 self.op.disks = disks
9497 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9498 raise errors.OpPrereqError("No disk info specified and the export"
9499 " is missing the disk information",
9502 if not self.op.nics:
9504 for idx in range(constants.MAX_NICS):
9505 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9507 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9508 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9515 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9516 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9518 if (self.op.hypervisor is None and
9519 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9520 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9522 if einfo.has_section(constants.INISECT_HYP):
9523 # use the export parameters but do not override the ones
9524 # specified by the user
9525 for name, value in einfo.items(constants.INISECT_HYP):
9526 if name not in self.op.hvparams:
9527 self.op.hvparams[name] = value
9529 if einfo.has_section(constants.INISECT_BEP):
9530 # use the parameters, without overriding
9531 for name, value in einfo.items(constants.INISECT_BEP):
9532 if name not in self.op.beparams:
9533 self.op.beparams[name] = value
9534 # Compatibility for the old "memory" be param
9535 if name == constants.BE_MEMORY:
9536 if constants.BE_MAXMEM not in self.op.beparams:
9537 self.op.beparams[constants.BE_MAXMEM] = value
9538 if constants.BE_MINMEM not in self.op.beparams:
9539 self.op.beparams[constants.BE_MINMEM] = value
9541 # try to read the parameters old style, from the main section
9542 for name in constants.BES_PARAMETERS:
9543 if (name not in self.op.beparams and
9544 einfo.has_option(constants.INISECT_INS, name)):
9545 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9547 if einfo.has_section(constants.INISECT_OSP):
9548 # use the parameters, without overriding
9549 for name, value in einfo.items(constants.INISECT_OSP):
9550 if name not in self.op.osparams:
9551 self.op.osparams[name] = value
9553 def _RevertToDefaults(self, cluster):
9554 """Revert the instance parameters to the default values.
9558 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9559 for name in self.op.hvparams.keys():
9560 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9561 del self.op.hvparams[name]
9563 be_defs = cluster.SimpleFillBE({})
9564 for name in self.op.beparams.keys():
9565 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9566 del self.op.beparams[name]
9568 nic_defs = cluster.SimpleFillNIC({})
9569 for nic in self.op.nics:
9570 for name in constants.NICS_PARAMETERS:
9571 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9574 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9575 for name in self.op.osparams.keys():
9576 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9577 del self.op.osparams[name]
9579 def _CalculateFileStorageDir(self):
9580 """Calculate final instance file storage dir.
9583 # file storage dir calculation/check
9584 self.instance_file_storage_dir = None
9585 if self.op.disk_template in constants.DTS_FILEBASED:
9586 # build the full file storage dir path
9589 if self.op.disk_template == constants.DT_SHARED_FILE:
9590 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9592 get_fsd_fn = self.cfg.GetFileStorageDir
9594 cfg_storagedir = get_fsd_fn()
9595 if not cfg_storagedir:
9596 raise errors.OpPrereqError("Cluster file storage dir not defined")
9597 joinargs.append(cfg_storagedir)
9599 if self.op.file_storage_dir is not None:
9600 joinargs.append(self.op.file_storage_dir)
9602 joinargs.append(self.op.instance_name)
9604 # pylint: disable=W0142
9605 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9607 def CheckPrereq(self): # pylint: disable=R0914
9608 """Check prerequisites.
9611 self._CalculateFileStorageDir()
9613 if self.op.mode == constants.INSTANCE_IMPORT:
9614 export_info = self._ReadExportInfo()
9615 self._ReadExportParams(export_info)
9617 if (not self.cfg.GetVGName() and
9618 self.op.disk_template not in constants.DTS_NOT_LVM):
9619 raise errors.OpPrereqError("Cluster does not support lvm-based"
9620 " instances", errors.ECODE_STATE)
9622 if (self.op.hypervisor is None or
9623 self.op.hypervisor == constants.VALUE_AUTO):
9624 self.op.hypervisor = self.cfg.GetHypervisorType()
9626 cluster = self.cfg.GetClusterInfo()
9627 enabled_hvs = cluster.enabled_hypervisors
9628 if self.op.hypervisor not in enabled_hvs:
9629 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9630 " cluster (%s)" % (self.op.hypervisor,
9631 ",".join(enabled_hvs)),
9634 # Check tag validity
9635 for tag in self.op.tags:
9636 objects.TaggableObject.ValidateTag(tag)
9638 # check hypervisor parameter syntax (locally)
9639 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9640 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9642 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9643 hv_type.CheckParameterSyntax(filled_hvp)
9644 self.hv_full = filled_hvp
9645 # check that we don't specify global parameters on an instance
9646 _CheckGlobalHvParams(self.op.hvparams)
9648 # fill and remember the beparams dict
9649 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9650 for param, value in self.op.beparams.iteritems():
9651 if value == constants.VALUE_AUTO:
9652 self.op.beparams[param] = default_beparams[param]
9653 objects.UpgradeBeParams(self.op.beparams)
9654 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9655 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9657 # build os parameters
9658 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9660 # now that hvp/bep are in final format, let's reset to defaults,
9662 if self.op.identify_defaults:
9663 self._RevertToDefaults(cluster)
9667 for idx, nic in enumerate(self.op.nics):
9668 nic_mode_req = nic.get(constants.INIC_MODE, None)
9669 nic_mode = nic_mode_req
9670 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9671 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9673 # in routed mode, for the first nic, the default ip is 'auto'
9674 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9675 default_ip_mode = constants.VALUE_AUTO
9677 default_ip_mode = constants.VALUE_NONE
9679 # ip validity checks
9680 ip = nic.get(constants.INIC_IP, default_ip_mode)
9681 if ip is None or ip.lower() == constants.VALUE_NONE:
9683 elif ip.lower() == constants.VALUE_AUTO:
9684 if not self.op.name_check:
9685 raise errors.OpPrereqError("IP address set to auto but name checks"
9686 " have been skipped",
9688 nic_ip = self.hostname1.ip
9690 if not netutils.IPAddress.IsValid(ip):
9691 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9695 # TODO: check the ip address for uniqueness
9696 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9697 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9700 # MAC address verification
9701 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9702 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9703 mac = utils.NormalizeAndValidateMac(mac)
9706 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9707 except errors.ReservationError:
9708 raise errors.OpPrereqError("MAC address %s already in use"
9709 " in cluster" % mac,
9710 errors.ECODE_NOTUNIQUE)
9712 # Build nic parameters
9713 link = nic.get(constants.INIC_LINK, None)
9714 if link == constants.VALUE_AUTO:
9715 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9718 nicparams[constants.NIC_MODE] = nic_mode
9720 nicparams[constants.NIC_LINK] = link
9722 check_params = cluster.SimpleFillNIC(nicparams)
9723 objects.NIC.CheckParameterSyntax(check_params)
9724 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9726 # disk checks/pre-build
9727 default_vg = self.cfg.GetVGName()
9729 for disk in self.op.disks:
9730 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9731 if mode not in constants.DISK_ACCESS_SET:
9732 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9733 mode, errors.ECODE_INVAL)
9734 size = disk.get(constants.IDISK_SIZE, None)
9736 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9739 except (TypeError, ValueError):
9740 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9743 data_vg = disk.get(constants.IDISK_VG, default_vg)
9745 constants.IDISK_SIZE: size,
9746 constants.IDISK_MODE: mode,
9747 constants.IDISK_VG: data_vg,
9749 if constants.IDISK_METAVG in disk:
9750 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9751 if constants.IDISK_ADOPT in disk:
9752 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9753 self.disks.append(new_disk)
9755 if self.op.mode == constants.INSTANCE_IMPORT:
9757 for idx in range(len(self.disks)):
9758 option = "disk%d_dump" % idx
9759 if export_info.has_option(constants.INISECT_INS, option):
9760 # FIXME: are the old os-es, disk sizes, etc. useful?
9761 export_name = export_info.get(constants.INISECT_INS, option)
9762 image = utils.PathJoin(self.op.src_path, export_name)
9763 disk_images.append(image)
9765 disk_images.append(False)
9767 self.src_images = disk_images
9769 old_name = export_info.get(constants.INISECT_INS, "name")
9770 if self.op.instance_name == old_name:
9771 for idx, nic in enumerate(self.nics):
9772 if nic.mac == constants.VALUE_AUTO:
9773 nic_mac_ini = "nic%d_mac" % idx
9774 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9776 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9778 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9779 if self.op.ip_check:
9780 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9781 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9782 (self.check_ip, self.op.instance_name),
9783 errors.ECODE_NOTUNIQUE)
9785 #### mac address generation
9786 # By generating here the mac address both the allocator and the hooks get
9787 # the real final mac address rather than the 'auto' or 'generate' value.
9788 # There is a race condition between the generation and the instance object
9789 # creation, which means that we know the mac is valid now, but we're not
9790 # sure it will be when we actually add the instance. If things go bad
9791 # adding the instance will abort because of a duplicate mac, and the
9792 # creation job will fail.
9793 for nic in self.nics:
9794 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9795 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9799 if self.op.iallocator is not None:
9800 self._RunAllocator()
9802 # Release all unneeded node locks
9803 _ReleaseLocks(self, locking.LEVEL_NODE,
9804 keep=filter(None, [self.op.pnode, self.op.snode,
9806 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9807 keep=filter(None, [self.op.pnode, self.op.snode,
9810 #### node related checks
9812 # check primary node
9813 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9814 assert self.pnode is not None, \
9815 "Cannot retrieve locked node %s" % self.op.pnode
9817 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9818 pnode.name, errors.ECODE_STATE)
9820 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9821 pnode.name, errors.ECODE_STATE)
9822 if not pnode.vm_capable:
9823 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9824 " '%s'" % pnode.name, errors.ECODE_STATE)
9826 self.secondaries = []
9828 # mirror node verification
9829 if self.op.disk_template in constants.DTS_INT_MIRROR:
9830 if self.op.snode == pnode.name:
9831 raise errors.OpPrereqError("The secondary node cannot be the"
9832 " primary node", errors.ECODE_INVAL)
9833 _CheckNodeOnline(self, self.op.snode)
9834 _CheckNodeNotDrained(self, self.op.snode)
9835 _CheckNodeVmCapable(self, self.op.snode)
9836 self.secondaries.append(self.op.snode)
9838 snode = self.cfg.GetNodeInfo(self.op.snode)
9839 if pnode.group != snode.group:
9840 self.LogWarning("The primary and secondary nodes are in two"
9841 " different node groups; the disk parameters"
9842 " from the first disk's node group will be"
9845 nodenames = [pnode.name] + self.secondaries
9847 # Verify instance specs
9849 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9850 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9851 constants.ISPEC_DISK_COUNT: len(self.disks),
9852 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9853 constants.ISPEC_NIC_COUNT: len(self.nics),
9856 group_info = self.cfg.GetNodeGroup(pnode.group)
9857 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9858 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9859 if not self.op.ignore_ipolicy and res:
9860 raise errors.OpPrereqError(("Instance allocation to group %s violates"
9861 " policy: %s") % (pnode.group,
9862 utils.CommaJoin(res)),
9865 # disk parameters (not customizable at instance or node level)
9866 # just use the primary node parameters, ignoring the secondary.
9867 self.diskparams = group_info.diskparams
9869 if not self.adopt_disks:
9870 if self.op.disk_template == constants.DT_RBD:
9871 # _CheckRADOSFreeSpace() is just a placeholder.
9872 # Any function that checks prerequisites can be placed here.
9873 # Check if there is enough space on the RADOS cluster.
9874 _CheckRADOSFreeSpace()
9876 # Check lv size requirements, if not adopting
9877 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9878 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9880 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9881 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9882 disk[constants.IDISK_ADOPT])
9883 for disk in self.disks])
9884 if len(all_lvs) != len(self.disks):
9885 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9887 for lv_name in all_lvs:
9889 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9890 # to ReserveLV uses the same syntax
9891 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9892 except errors.ReservationError:
9893 raise errors.OpPrereqError("LV named %s used by another instance" %
9894 lv_name, errors.ECODE_NOTUNIQUE)
9896 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9897 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9899 node_lvs = self.rpc.call_lv_list([pnode.name],
9900 vg_names.payload.keys())[pnode.name]
9901 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9902 node_lvs = node_lvs.payload
9904 delta = all_lvs.difference(node_lvs.keys())
9906 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9907 utils.CommaJoin(delta),
9909 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9911 raise errors.OpPrereqError("Online logical volumes found, cannot"
9912 " adopt: %s" % utils.CommaJoin(online_lvs),
9914 # update the size of disk based on what is found
9915 for dsk in self.disks:
9916 dsk[constants.IDISK_SIZE] = \
9917 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9918 dsk[constants.IDISK_ADOPT])][0]))
9920 elif self.op.disk_template == constants.DT_BLOCK:
9921 # Normalize and de-duplicate device paths
9922 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9923 for disk in self.disks])
9924 if len(all_disks) != len(self.disks):
9925 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9927 baddisks = [d for d in all_disks
9928 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9930 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9931 " cannot be adopted" %
9932 (", ".join(baddisks),
9933 constants.ADOPTABLE_BLOCKDEV_ROOT),
9936 node_disks = self.rpc.call_bdev_sizes([pnode.name],
9937 list(all_disks))[pnode.name]
9938 node_disks.Raise("Cannot get block device information from node %s" %
9940 node_disks = node_disks.payload
9941 delta = all_disks.difference(node_disks.keys())
9943 raise errors.OpPrereqError("Missing block device(s): %s" %
9944 utils.CommaJoin(delta),
9946 for dsk in self.disks:
9947 dsk[constants.IDISK_SIZE] = \
9948 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9950 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9952 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9953 # check OS parameters (remotely)
9954 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9956 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9958 # memory check on primary node
9959 #TODO(dynmem): use MINMEM for checking
9961 _CheckNodeFreeMemory(self, self.pnode.name,
9962 "creating instance %s" % self.op.instance_name,
9963 self.be_full[constants.BE_MAXMEM],
9966 self.dry_run_result = list(nodenames)
9968 def Exec(self, feedback_fn):
9969 """Create and add the instance to the cluster.
9972 instance = self.op.instance_name
9973 pnode_name = self.pnode.name
9975 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9976 self.owned_locks(locking.LEVEL_NODE)), \
9977 "Node locks differ from node resource locks"
9979 ht_kind = self.op.hypervisor
9980 if ht_kind in constants.HTS_REQ_PORT:
9981 network_port = self.cfg.AllocatePort()
9985 disks = _GenerateDiskTemplate(self,
9986 self.op.disk_template,
9987 instance, pnode_name,
9990 self.instance_file_storage_dir,
9991 self.op.file_driver,
9996 iobj = objects.Instance(name=instance, os=self.op.os_type,
9997 primary_node=pnode_name,
9998 nics=self.nics, disks=disks,
9999 disk_template=self.op.disk_template,
10000 admin_state=constants.ADMINST_DOWN,
10001 network_port=network_port,
10002 beparams=self.op.beparams,
10003 hvparams=self.op.hvparams,
10004 hypervisor=self.op.hypervisor,
10005 osparams=self.op.osparams,
10009 for tag in self.op.tags:
10012 if self.adopt_disks:
10013 if self.op.disk_template == constants.DT_PLAIN:
10014 # rename LVs to the newly-generated names; we need to construct
10015 # 'fake' LV disks with the old data, plus the new unique_id
10016 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10018 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10019 rename_to.append(t_dsk.logical_id)
10020 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10021 self.cfg.SetDiskID(t_dsk, pnode_name)
10022 result = self.rpc.call_blockdev_rename(pnode_name,
10023 zip(tmp_disks, rename_to))
10024 result.Raise("Failed to rename adoped LVs")
10026 feedback_fn("* creating instance disks...")
10028 _CreateDisks(self, iobj)
10029 except errors.OpExecError:
10030 self.LogWarning("Device creation failed, reverting...")
10032 _RemoveDisks(self, iobj)
10034 self.cfg.ReleaseDRBDMinors(instance)
10037 feedback_fn("adding instance %s to cluster config" % instance)
10039 self.cfg.AddInstance(iobj, self.proc.GetECId())
10041 # Declare that we don't want to remove the instance lock anymore, as we've
10042 # added the instance to the config
10043 del self.remove_locks[locking.LEVEL_INSTANCE]
10045 if self.op.mode == constants.INSTANCE_IMPORT:
10046 # Release unused nodes
10047 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10049 # Release all nodes
10050 _ReleaseLocks(self, locking.LEVEL_NODE)
10053 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10054 feedback_fn("* wiping instance disks...")
10056 _WipeDisks(self, iobj)
10057 except errors.OpExecError, err:
10058 logging.exception("Wiping disks failed")
10059 self.LogWarning("Wiping instance disks failed (%s)", err)
10063 # Something is already wrong with the disks, don't do anything else
10065 elif self.op.wait_for_sync:
10066 disk_abort = not _WaitForSync(self, iobj)
10067 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10068 # make sure the disks are not degraded (still sync-ing is ok)
10069 feedback_fn("* checking mirrors status")
10070 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10075 _RemoveDisks(self, iobj)
10076 self.cfg.RemoveInstance(iobj.name)
10077 # Make sure the instance lock gets removed
10078 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10079 raise errors.OpExecError("There are some degraded disks for"
10082 # Release all node resource locks
10083 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10085 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10086 if self.op.mode == constants.INSTANCE_CREATE:
10087 if not self.op.no_install:
10088 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10089 not self.op.wait_for_sync)
10091 feedback_fn("* pausing disk sync to install instance OS")
10092 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10094 for idx, success in enumerate(result.payload):
10096 logging.warn("pause-sync of instance %s for disk %d failed",
10099 feedback_fn("* running the instance OS create scripts...")
10100 # FIXME: pass debug option from opcode to backend
10102 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10103 self.op.debug_level)
10105 feedback_fn("* resuming disk sync")
10106 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10108 for idx, success in enumerate(result.payload):
10110 logging.warn("resume-sync of instance %s for disk %d failed",
10113 os_add_result.Raise("Could not add os for instance %s"
10114 " on node %s" % (instance, pnode_name))
10116 elif self.op.mode == constants.INSTANCE_IMPORT:
10117 feedback_fn("* running the instance OS import scripts...")
10121 for idx, image in enumerate(self.src_images):
10125 # FIXME: pass debug option from opcode to backend
10126 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10127 constants.IEIO_FILE, (image, ),
10128 constants.IEIO_SCRIPT,
10129 (iobj.disks[idx], idx),
10131 transfers.append(dt)
10134 masterd.instance.TransferInstanceData(self, feedback_fn,
10135 self.op.src_node, pnode_name,
10136 self.pnode.secondary_ip,
10138 if not compat.all(import_result):
10139 self.LogWarning("Some disks for instance %s on node %s were not"
10140 " imported successfully" % (instance, pnode_name))
10142 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10143 feedback_fn("* preparing remote import...")
10144 # The source cluster will stop the instance before attempting to make a
10145 # connection. In some cases stopping an instance can take a long time,
10146 # hence the shutdown timeout is added to the connection timeout.
10147 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10148 self.op.source_shutdown_timeout)
10149 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10151 assert iobj.primary_node == self.pnode.name
10153 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10154 self.source_x509_ca,
10155 self._cds, timeouts)
10156 if not compat.all(disk_results):
10157 # TODO: Should the instance still be started, even if some disks
10158 # failed to import (valid for local imports, too)?
10159 self.LogWarning("Some disks for instance %s on node %s were not"
10160 " imported successfully" % (instance, pnode_name))
10162 # Run rename script on newly imported instance
10163 assert iobj.name == instance
10164 feedback_fn("Running rename script for %s" % instance)
10165 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10166 self.source_instance_name,
10167 self.op.debug_level)
10168 if result.fail_msg:
10169 self.LogWarning("Failed to run rename script for %s on node"
10170 " %s: %s" % (instance, pnode_name, result.fail_msg))
10173 # also checked in the prereq part
10174 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10177 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10180 iobj.admin_state = constants.ADMINST_UP
10181 self.cfg.Update(iobj, feedback_fn)
10182 logging.info("Starting instance %s on node %s", instance, pnode_name)
10183 feedback_fn("* starting instance...")
10184 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10186 result.Raise("Could not start instance")
10188 return list(iobj.all_nodes)
10191 def _CheckRADOSFreeSpace():
10192 """Compute disk size requirements inside the RADOS cluster.
10195 # For the RADOS cluster we assume there is always enough space.
10199 class LUInstanceConsole(NoHooksLU):
10200 """Connect to an instance's console.
10202 This is somewhat special in that it returns the command line that
10203 you need to run on the master node in order to connect to the
10209 def ExpandNames(self):
10210 self.share_locks = _ShareAll()
10211 self._ExpandAndLockInstance()
10213 def CheckPrereq(self):
10214 """Check prerequisites.
10216 This checks that the instance is in the cluster.
10219 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10220 assert self.instance is not None, \
10221 "Cannot retrieve locked instance %s" % self.op.instance_name
10222 _CheckNodeOnline(self, self.instance.primary_node)
10224 def Exec(self, feedback_fn):
10225 """Connect to the console of an instance
10228 instance = self.instance
10229 node = instance.primary_node
10231 node_insts = self.rpc.call_instance_list([node],
10232 [instance.hypervisor])[node]
10233 node_insts.Raise("Can't get node information from %s" % node)
10235 if instance.name not in node_insts.payload:
10236 if instance.admin_state == constants.ADMINST_UP:
10237 state = constants.INSTST_ERRORDOWN
10238 elif instance.admin_state == constants.ADMINST_DOWN:
10239 state = constants.INSTST_ADMINDOWN
10241 state = constants.INSTST_ADMINOFFLINE
10242 raise errors.OpExecError("Instance %s is not running (state %s)" %
10243 (instance.name, state))
10245 logging.debug("Connecting to console of %s on %s", instance.name, node)
10247 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10250 def _GetInstanceConsole(cluster, instance):
10251 """Returns console information for an instance.
10253 @type cluster: L{objects.Cluster}
10254 @type instance: L{objects.Instance}
10258 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10259 # beparams and hvparams are passed separately, to avoid editing the
10260 # instance and then saving the defaults in the instance itself.
10261 hvparams = cluster.FillHV(instance)
10262 beparams = cluster.FillBE(instance)
10263 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10265 assert console.instance == instance.name
10266 assert console.Validate()
10268 return console.ToDict()
10271 class LUInstanceReplaceDisks(LogicalUnit):
10272 """Replace the disks of an instance.
10275 HPATH = "mirrors-replace"
10276 HTYPE = constants.HTYPE_INSTANCE
10279 def CheckArguments(self):
10280 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10281 self.op.iallocator)
10283 def ExpandNames(self):
10284 self._ExpandAndLockInstance()
10286 assert locking.LEVEL_NODE not in self.needed_locks
10287 assert locking.LEVEL_NODE_RES not in self.needed_locks
10288 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10290 assert self.op.iallocator is None or self.op.remote_node is None, \
10291 "Conflicting options"
10293 if self.op.remote_node is not None:
10294 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10296 # Warning: do not remove the locking of the new secondary here
10297 # unless DRBD8.AddChildren is changed to work in parallel;
10298 # currently it doesn't since parallel invocations of
10299 # FindUnusedMinor will conflict
10300 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10301 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10303 self.needed_locks[locking.LEVEL_NODE] = []
10304 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10306 if self.op.iallocator is not None:
10307 # iallocator will select a new node in the same group
10308 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10310 self.needed_locks[locking.LEVEL_NODE_RES] = []
10312 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10313 self.op.iallocator, self.op.remote_node,
10314 self.op.disks, False, self.op.early_release,
10315 self.op.ignore_ipolicy)
10317 self.tasklets = [self.replacer]
10319 def DeclareLocks(self, level):
10320 if level == locking.LEVEL_NODEGROUP:
10321 assert self.op.remote_node is None
10322 assert self.op.iallocator is not None
10323 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10325 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10326 # Lock all groups used by instance optimistically; this requires going
10327 # via the node before it's locked, requiring verification later on
10328 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10329 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10331 elif level == locking.LEVEL_NODE:
10332 if self.op.iallocator is not None:
10333 assert self.op.remote_node is None
10334 assert not self.needed_locks[locking.LEVEL_NODE]
10336 # Lock member nodes of all locked groups
10337 self.needed_locks[locking.LEVEL_NODE] = [node_name
10338 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10339 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10341 self._LockInstancesNodes()
10342 elif level == locking.LEVEL_NODE_RES:
10344 self.needed_locks[locking.LEVEL_NODE_RES] = \
10345 self.needed_locks[locking.LEVEL_NODE]
10347 def BuildHooksEnv(self):
10348 """Build hooks env.
10350 This runs on the master, the primary and all the secondaries.
10353 instance = self.replacer.instance
10355 "MODE": self.op.mode,
10356 "NEW_SECONDARY": self.op.remote_node,
10357 "OLD_SECONDARY": instance.secondary_nodes[0],
10359 env.update(_BuildInstanceHookEnvByObject(self, instance))
10362 def BuildHooksNodes(self):
10363 """Build hooks nodes.
10366 instance = self.replacer.instance
10368 self.cfg.GetMasterNode(),
10369 instance.primary_node,
10371 if self.op.remote_node is not None:
10372 nl.append(self.op.remote_node)
10375 def CheckPrereq(self):
10376 """Check prerequisites.
10379 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10380 self.op.iallocator is None)
10382 # Verify if node group locks are still correct
10383 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10385 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10387 return LogicalUnit.CheckPrereq(self)
10390 class TLReplaceDisks(Tasklet):
10391 """Replaces disks for an instance.
10393 Note: Locking is not within the scope of this class.
10396 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10397 disks, delay_iallocator, early_release, ignore_ipolicy):
10398 """Initializes this class.
10401 Tasklet.__init__(self, lu)
10404 self.instance_name = instance_name
10406 self.iallocator_name = iallocator_name
10407 self.remote_node = remote_node
10409 self.delay_iallocator = delay_iallocator
10410 self.early_release = early_release
10411 self.ignore_ipolicy = ignore_ipolicy
10414 self.instance = None
10415 self.new_node = None
10416 self.target_node = None
10417 self.other_node = None
10418 self.remote_node_info = None
10419 self.node_secondary_ip = None
10422 def CheckArguments(mode, remote_node, iallocator):
10423 """Helper function for users of this class.
10426 # check for valid parameter combination
10427 if mode == constants.REPLACE_DISK_CHG:
10428 if remote_node is None and iallocator is None:
10429 raise errors.OpPrereqError("When changing the secondary either an"
10430 " iallocator script must be used or the"
10431 " new node given", errors.ECODE_INVAL)
10433 if remote_node is not None and iallocator is not None:
10434 raise errors.OpPrereqError("Give either the iallocator or the new"
10435 " secondary, not both", errors.ECODE_INVAL)
10437 elif remote_node is not None or iallocator is not None:
10438 # Not replacing the secondary
10439 raise errors.OpPrereqError("The iallocator and new node options can"
10440 " only be used when changing the"
10441 " secondary node", errors.ECODE_INVAL)
10444 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10445 """Compute a new secondary node using an IAllocator.
10448 ial = IAllocator(lu.cfg, lu.rpc,
10449 mode=constants.IALLOCATOR_MODE_RELOC,
10450 name=instance_name,
10451 relocate_from=list(relocate_from))
10453 ial.Run(iallocator_name)
10455 if not ial.success:
10456 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10457 " %s" % (iallocator_name, ial.info),
10458 errors.ECODE_NORES)
10460 if len(ial.result) != ial.required_nodes:
10461 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10462 " of nodes (%s), required %s" %
10464 len(ial.result), ial.required_nodes),
10465 errors.ECODE_FAULT)
10467 remote_node_name = ial.result[0]
10469 lu.LogInfo("Selected new secondary for instance '%s': %s",
10470 instance_name, remote_node_name)
10472 return remote_node_name
10474 def _FindFaultyDisks(self, node_name):
10475 """Wrapper for L{_FindFaultyInstanceDisks}.
10478 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10481 def _CheckDisksActivated(self, instance):
10482 """Checks if the instance disks are activated.
10484 @param instance: The instance to check disks
10485 @return: True if they are activated, False otherwise
10488 nodes = instance.all_nodes
10490 for idx, dev in enumerate(instance.disks):
10492 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10493 self.cfg.SetDiskID(dev, node)
10495 result = self.rpc.call_blockdev_find(node, dev)
10499 elif result.fail_msg or not result.payload:
10504 def CheckPrereq(self):
10505 """Check prerequisites.
10507 This checks that the instance is in the cluster.
10510 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10511 assert instance is not None, \
10512 "Cannot retrieve locked instance %s" % self.instance_name
10514 if instance.disk_template != constants.DT_DRBD8:
10515 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10516 " instances", errors.ECODE_INVAL)
10518 if len(instance.secondary_nodes) != 1:
10519 raise errors.OpPrereqError("The instance has a strange layout,"
10520 " expected one secondary but found %d" %
10521 len(instance.secondary_nodes),
10522 errors.ECODE_FAULT)
10524 if not self.delay_iallocator:
10525 self._CheckPrereq2()
10527 def _CheckPrereq2(self):
10528 """Check prerequisites, second part.
10530 This function should always be part of CheckPrereq. It was separated and is
10531 now called from Exec because during node evacuation iallocator was only
10532 called with an unmodified cluster model, not taking planned changes into
10536 instance = self.instance
10537 secondary_node = instance.secondary_nodes[0]
10539 if self.iallocator_name is None:
10540 remote_node = self.remote_node
10542 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10543 instance.name, instance.secondary_nodes)
10545 if remote_node is None:
10546 self.remote_node_info = None
10548 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10549 "Remote node '%s' is not locked" % remote_node
10551 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10552 assert self.remote_node_info is not None, \
10553 "Cannot retrieve locked node %s" % remote_node
10555 if remote_node == self.instance.primary_node:
10556 raise errors.OpPrereqError("The specified node is the primary node of"
10557 " the instance", errors.ECODE_INVAL)
10559 if remote_node == secondary_node:
10560 raise errors.OpPrereqError("The specified node is already the"
10561 " secondary node of the instance",
10562 errors.ECODE_INVAL)
10564 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10565 constants.REPLACE_DISK_CHG):
10566 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10567 errors.ECODE_INVAL)
10569 if self.mode == constants.REPLACE_DISK_AUTO:
10570 if not self._CheckDisksActivated(instance):
10571 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10572 " first" % self.instance_name,
10573 errors.ECODE_STATE)
10574 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10575 faulty_secondary = self._FindFaultyDisks(secondary_node)
10577 if faulty_primary and faulty_secondary:
10578 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10579 " one node and can not be repaired"
10580 " automatically" % self.instance_name,
10581 errors.ECODE_STATE)
10584 self.disks = faulty_primary
10585 self.target_node = instance.primary_node
10586 self.other_node = secondary_node
10587 check_nodes = [self.target_node, self.other_node]
10588 elif faulty_secondary:
10589 self.disks = faulty_secondary
10590 self.target_node = secondary_node
10591 self.other_node = instance.primary_node
10592 check_nodes = [self.target_node, self.other_node]
10598 # Non-automatic modes
10599 if self.mode == constants.REPLACE_DISK_PRI:
10600 self.target_node = instance.primary_node
10601 self.other_node = secondary_node
10602 check_nodes = [self.target_node, self.other_node]
10604 elif self.mode == constants.REPLACE_DISK_SEC:
10605 self.target_node = secondary_node
10606 self.other_node = instance.primary_node
10607 check_nodes = [self.target_node, self.other_node]
10609 elif self.mode == constants.REPLACE_DISK_CHG:
10610 self.new_node = remote_node
10611 self.other_node = instance.primary_node
10612 self.target_node = secondary_node
10613 check_nodes = [self.new_node, self.other_node]
10615 _CheckNodeNotDrained(self.lu, remote_node)
10616 _CheckNodeVmCapable(self.lu, remote_node)
10618 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10619 assert old_node_info is not None
10620 if old_node_info.offline and not self.early_release:
10621 # doesn't make sense to delay the release
10622 self.early_release = True
10623 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10624 " early-release mode", secondary_node)
10627 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10630 # If not specified all disks should be replaced
10632 self.disks = range(len(self.instance.disks))
10634 # TODO: This is ugly, but right now we can't distinguish between internal
10635 # submitted opcode and external one. We should fix that.
10636 if self.remote_node_info:
10637 # We change the node, lets verify it still meets instance policy
10638 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10639 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10641 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10642 ignore=self.ignore_ipolicy)
10644 # TODO: compute disk parameters
10645 primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10646 secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10647 if primary_node_info.group != secondary_node_info.group:
10648 self.lu.LogInfo("The instance primary and secondary nodes are in two"
10649 " different node groups; the disk parameters of the"
10650 " primary node's group will be applied.")
10652 self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10654 for node in check_nodes:
10655 _CheckNodeOnline(self.lu, node)
10657 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10660 if node_name is not None)
10662 # Release unneeded node and node resource locks
10663 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10664 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10666 # Release any owned node group
10667 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10668 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10670 # Check whether disks are valid
10671 for disk_idx in self.disks:
10672 instance.FindDisk(disk_idx)
10674 # Get secondary node IP addresses
10675 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10676 in self.cfg.GetMultiNodeInfo(touched_nodes))
10678 def Exec(self, feedback_fn):
10679 """Execute disk replacement.
10681 This dispatches the disk replacement to the appropriate handler.
10684 if self.delay_iallocator:
10685 self._CheckPrereq2()
10688 # Verify owned locks before starting operation
10689 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10690 assert set(owned_nodes) == set(self.node_secondary_ip), \
10691 ("Incorrect node locks, owning %s, expected %s" %
10692 (owned_nodes, self.node_secondary_ip.keys()))
10693 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10694 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10696 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10697 assert list(owned_instances) == [self.instance_name], \
10698 "Instance '%s' not locked" % self.instance_name
10700 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10701 "Should not own any node group lock at this point"
10704 feedback_fn("No disks need replacement")
10707 feedback_fn("Replacing disk(s) %s for %s" %
10708 (utils.CommaJoin(self.disks), self.instance.name))
10710 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10712 # Activate the instance disks if we're replacing them on a down instance
10714 _StartInstanceDisks(self.lu, self.instance, True)
10717 # Should we replace the secondary node?
10718 if self.new_node is not None:
10719 fn = self._ExecDrbd8Secondary
10721 fn = self._ExecDrbd8DiskOnly
10723 result = fn(feedback_fn)
10725 # Deactivate the instance disks if we're replacing them on a
10728 _SafeShutdownInstanceDisks(self.lu, self.instance)
10730 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10733 # Verify owned locks
10734 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10735 nodes = frozenset(self.node_secondary_ip)
10736 assert ((self.early_release and not owned_nodes) or
10737 (not self.early_release and not (set(owned_nodes) - nodes))), \
10738 ("Not owning the correct locks, early_release=%s, owned=%r,"
10739 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10743 def _CheckVolumeGroup(self, nodes):
10744 self.lu.LogInfo("Checking volume groups")
10746 vgname = self.cfg.GetVGName()
10748 # Make sure volume group exists on all involved nodes
10749 results = self.rpc.call_vg_list(nodes)
10751 raise errors.OpExecError("Can't list volume groups on the nodes")
10754 res = results[node]
10755 res.Raise("Error checking node %s" % node)
10756 if vgname not in res.payload:
10757 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10760 def _CheckDisksExistence(self, nodes):
10761 # Check disk existence
10762 for idx, dev in enumerate(self.instance.disks):
10763 if idx not in self.disks:
10767 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10768 self.cfg.SetDiskID(dev, node)
10770 result = self.rpc.call_blockdev_find(node, dev)
10772 msg = result.fail_msg
10773 if msg or not result.payload:
10775 msg = "disk not found"
10776 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10779 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10780 for idx, dev in enumerate(self.instance.disks):
10781 if idx not in self.disks:
10784 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10787 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10789 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10790 " replace disks for instance %s" %
10791 (node_name, self.instance.name))
10793 def _CreateNewStorage(self, node_name):
10794 """Create new storage on the primary or secondary node.
10796 This is only used for same-node replaces, not for changing the
10797 secondary node, hence we don't want to modify the existing disk.
10802 for idx, dev in enumerate(self.instance.disks):
10803 if idx not in self.disks:
10806 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10808 self.cfg.SetDiskID(dev, node_name)
10810 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10811 names = _GenerateUniqueNames(self.lu, lv_names)
10813 _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10815 vg_data = dev.children[0].logical_id[0]
10816 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10817 logical_id=(vg_data, names[0]), params=data_p)
10818 vg_meta = dev.children[1].logical_id[0]
10819 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10820 logical_id=(vg_meta, names[1]), params=meta_p)
10822 new_lvs = [lv_data, lv_meta]
10823 old_lvs = [child.Copy() for child in dev.children]
10824 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10826 # we pass force_create=True to force the LVM creation
10827 for new_lv in new_lvs:
10828 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10829 _GetInstanceInfoText(self.instance), False)
10833 def _CheckDevices(self, node_name, iv_names):
10834 for name, (dev, _, _) in iv_names.iteritems():
10835 self.cfg.SetDiskID(dev, node_name)
10837 result = self.rpc.call_blockdev_find(node_name, dev)
10839 msg = result.fail_msg
10840 if msg or not result.payload:
10842 msg = "disk not found"
10843 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10846 if result.payload.is_degraded:
10847 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10849 def _RemoveOldStorage(self, node_name, iv_names):
10850 for name, (_, old_lvs, _) in iv_names.iteritems():
10851 self.lu.LogInfo("Remove logical volumes for %s" % name)
10854 self.cfg.SetDiskID(lv, node_name)
10856 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10858 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10859 hint="remove unused LVs manually")
10861 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10862 """Replace a disk on the primary or secondary for DRBD 8.
10864 The algorithm for replace is quite complicated:
10866 1. for each disk to be replaced:
10868 1. create new LVs on the target node with unique names
10869 1. detach old LVs from the drbd device
10870 1. rename old LVs to name_replaced.<time_t>
10871 1. rename new LVs to old LVs
10872 1. attach the new LVs (with the old names now) to the drbd device
10874 1. wait for sync across all devices
10876 1. for each modified disk:
10878 1. remove old LVs (which have the name name_replaces.<time_t>)
10880 Failures are not very well handled.
10885 # Step: check device activation
10886 self.lu.LogStep(1, steps_total, "Check device existence")
10887 self._CheckDisksExistence([self.other_node, self.target_node])
10888 self._CheckVolumeGroup([self.target_node, self.other_node])
10890 # Step: check other node consistency
10891 self.lu.LogStep(2, steps_total, "Check peer consistency")
10892 self._CheckDisksConsistency(self.other_node,
10893 self.other_node == self.instance.primary_node,
10896 # Step: create new storage
10897 self.lu.LogStep(3, steps_total, "Allocate new storage")
10898 iv_names = self._CreateNewStorage(self.target_node)
10900 # Step: for each lv, detach+rename*2+attach
10901 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10902 for dev, old_lvs, new_lvs in iv_names.itervalues():
10903 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10905 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10907 result.Raise("Can't detach drbd from local storage on node"
10908 " %s for device %s" % (self.target_node, dev.iv_name))
10910 #cfg.Update(instance)
10912 # ok, we created the new LVs, so now we know we have the needed
10913 # storage; as such, we proceed on the target node to rename
10914 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10915 # using the assumption that logical_id == physical_id (which in
10916 # turn is the unique_id on that node)
10918 # FIXME(iustin): use a better name for the replaced LVs
10919 temp_suffix = int(time.time())
10920 ren_fn = lambda d, suff: (d.physical_id[0],
10921 d.physical_id[1] + "_replaced-%s" % suff)
10923 # Build the rename list based on what LVs exist on the node
10924 rename_old_to_new = []
10925 for to_ren in old_lvs:
10926 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10927 if not result.fail_msg and result.payload:
10929 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10931 self.lu.LogInfo("Renaming the old LVs on the target node")
10932 result = self.rpc.call_blockdev_rename(self.target_node,
10934 result.Raise("Can't rename old LVs on node %s" % self.target_node)
10936 # Now we rename the new LVs to the old LVs
10937 self.lu.LogInfo("Renaming the new LVs on the target node")
10938 rename_new_to_old = [(new, old.physical_id)
10939 for old, new in zip(old_lvs, new_lvs)]
10940 result = self.rpc.call_blockdev_rename(self.target_node,
10942 result.Raise("Can't rename new LVs on node %s" % self.target_node)
10944 # Intermediate steps of in memory modifications
10945 for old, new in zip(old_lvs, new_lvs):
10946 new.logical_id = old.logical_id
10947 self.cfg.SetDiskID(new, self.target_node)
10949 # We need to modify old_lvs so that removal later removes the
10950 # right LVs, not the newly added ones; note that old_lvs is a
10952 for disk in old_lvs:
10953 disk.logical_id = ren_fn(disk, temp_suffix)
10954 self.cfg.SetDiskID(disk, self.target_node)
10956 # Now that the new lvs have the old name, we can add them to the device
10957 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10958 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10960 msg = result.fail_msg
10962 for new_lv in new_lvs:
10963 msg2 = self.rpc.call_blockdev_remove(self.target_node,
10966 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10967 hint=("cleanup manually the unused logical"
10969 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10971 cstep = itertools.count(5)
10973 if self.early_release:
10974 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10975 self._RemoveOldStorage(self.target_node, iv_names)
10976 # TODO: Check if releasing locks early still makes sense
10977 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10979 # Release all resource locks except those used by the instance
10980 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10981 keep=self.node_secondary_ip.keys())
10983 # Release all node locks while waiting for sync
10984 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10986 # TODO: Can the instance lock be downgraded here? Take the optional disk
10987 # shutdown in the caller into consideration.
10990 # This can fail as the old devices are degraded and _WaitForSync
10991 # does a combined result over all disks, so we don't check its return value
10992 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10993 _WaitForSync(self.lu, self.instance)
10995 # Check all devices manually
10996 self._CheckDevices(self.instance.primary_node, iv_names)
10998 # Step: remove old storage
10999 if not self.early_release:
11000 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11001 self._RemoveOldStorage(self.target_node, iv_names)
11003 def _ExecDrbd8Secondary(self, feedback_fn):
11004 """Replace the secondary node for DRBD 8.
11006 The algorithm for replace is quite complicated:
11007 - for all disks of the instance:
11008 - create new LVs on the new node with same names
11009 - shutdown the drbd device on the old secondary
11010 - disconnect the drbd network on the primary
11011 - create the drbd device on the new secondary
11012 - network attach the drbd on the primary, using an artifice:
11013 the drbd code for Attach() will connect to the network if it
11014 finds a device which is connected to the good local disks but
11015 not network enabled
11016 - wait for sync across all devices
11017 - remove all disks from the old secondary
11019 Failures are not very well handled.
11024 pnode = self.instance.primary_node
11026 # Step: check device activation
11027 self.lu.LogStep(1, steps_total, "Check device existence")
11028 self._CheckDisksExistence([self.instance.primary_node])
11029 self._CheckVolumeGroup([self.instance.primary_node])
11031 # Step: check other node consistency
11032 self.lu.LogStep(2, steps_total, "Check peer consistency")
11033 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11035 # Step: create new storage
11036 self.lu.LogStep(3, steps_total, "Allocate new storage")
11037 for idx, dev in enumerate(self.instance.disks):
11038 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11039 (self.new_node, idx))
11040 # we pass force_create=True to force LVM creation
11041 for new_lv in dev.children:
11042 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
11043 _GetInstanceInfoText(self.instance), False)
11045 # Step 4: dbrd minors and drbd setups changes
11046 # after this, we must manually remove the drbd minors on both the
11047 # error and the success paths
11048 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11049 minors = self.cfg.AllocateDRBDMinor([self.new_node
11050 for dev in self.instance.disks],
11051 self.instance.name)
11052 logging.debug("Allocated minors %r", minors)
11055 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11056 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11057 (self.new_node, idx))
11058 # create new devices on new_node; note that we create two IDs:
11059 # one without port, so the drbd will be activated without
11060 # networking information on the new node at this stage, and one
11061 # with network, for the latter activation in step 4
11062 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11063 if self.instance.primary_node == o_node1:
11066 assert self.instance.primary_node == o_node2, "Three-node instance?"
11069 new_alone_id = (self.instance.primary_node, self.new_node, None,
11070 p_minor, new_minor, o_secret)
11071 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11072 p_minor, new_minor, o_secret)
11074 iv_names[idx] = (dev, dev.children, new_net_id)
11075 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11077 drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
11078 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11079 logical_id=new_alone_id,
11080 children=dev.children,
11082 params=drbd_params)
11084 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
11085 _GetInstanceInfoText(self.instance), False)
11086 except errors.GenericError:
11087 self.cfg.ReleaseDRBDMinors(self.instance.name)
11090 # We have new devices, shutdown the drbd on the old secondary
11091 for idx, dev in enumerate(self.instance.disks):
11092 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11093 self.cfg.SetDiskID(dev, self.target_node)
11094 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
11096 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11097 "node: %s" % (idx, msg),
11098 hint=("Please cleanup this device manually as"
11099 " soon as possible"))
11101 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11102 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11103 self.instance.disks)[pnode]
11105 msg = result.fail_msg
11107 # detaches didn't succeed (unlikely)
11108 self.cfg.ReleaseDRBDMinors(self.instance.name)
11109 raise errors.OpExecError("Can't detach the disks from the network on"
11110 " old node: %s" % (msg,))
11112 # if we managed to detach at least one, we update all the disks of
11113 # the instance to point to the new secondary
11114 self.lu.LogInfo("Updating instance configuration")
11115 for dev, _, new_logical_id in iv_names.itervalues():
11116 dev.logical_id = new_logical_id
11117 self.cfg.SetDiskID(dev, self.instance.primary_node)
11119 self.cfg.Update(self.instance, feedback_fn)
11121 # Release all node locks (the configuration has been updated)
11122 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11124 # and now perform the drbd attach
11125 self.lu.LogInfo("Attaching primary drbds to new secondary"
11126 " (standalone => connected)")
11127 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11129 self.node_secondary_ip,
11130 self.instance.disks,
11131 self.instance.name,
11133 for to_node, to_result in result.items():
11134 msg = to_result.fail_msg
11136 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11138 hint=("please do a gnt-instance info to see the"
11139 " status of disks"))
11141 cstep = itertools.count(5)
11143 if self.early_release:
11144 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11145 self._RemoveOldStorage(self.target_node, iv_names)
11146 # TODO: Check if releasing locks early still makes sense
11147 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11149 # Release all resource locks except those used by the instance
11150 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11151 keep=self.node_secondary_ip.keys())
11153 # TODO: Can the instance lock be downgraded here? Take the optional disk
11154 # shutdown in the caller into consideration.
11157 # This can fail as the old devices are degraded and _WaitForSync
11158 # does a combined result over all disks, so we don't check its return value
11159 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11160 _WaitForSync(self.lu, self.instance)
11162 # Check all devices manually
11163 self._CheckDevices(self.instance.primary_node, iv_names)
11165 # Step: remove old storage
11166 if not self.early_release:
11167 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11168 self._RemoveOldStorage(self.target_node, iv_names)
11171 class LURepairNodeStorage(NoHooksLU):
11172 """Repairs the volume group on a node.
11177 def CheckArguments(self):
11178 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11180 storage_type = self.op.storage_type
11182 if (constants.SO_FIX_CONSISTENCY not in
11183 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11184 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11185 " repaired" % storage_type,
11186 errors.ECODE_INVAL)
11188 def ExpandNames(self):
11189 self.needed_locks = {
11190 locking.LEVEL_NODE: [self.op.node_name],
11193 def _CheckFaultyDisks(self, instance, node_name):
11194 """Ensure faulty disks abort the opcode or at least warn."""
11196 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11198 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11199 " node '%s'" % (instance.name, node_name),
11200 errors.ECODE_STATE)
11201 except errors.OpPrereqError, err:
11202 if self.op.ignore_consistency:
11203 self.proc.LogWarning(str(err.args[0]))
11207 def CheckPrereq(self):
11208 """Check prerequisites.
11211 # Check whether any instance on this node has faulty disks
11212 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11213 if inst.admin_state != constants.ADMINST_UP:
11215 check_nodes = set(inst.all_nodes)
11216 check_nodes.discard(self.op.node_name)
11217 for inst_node_name in check_nodes:
11218 self._CheckFaultyDisks(inst, inst_node_name)
11220 def Exec(self, feedback_fn):
11221 feedback_fn("Repairing storage unit '%s' on %s ..." %
11222 (self.op.name, self.op.node_name))
11224 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11225 result = self.rpc.call_storage_execute(self.op.node_name,
11226 self.op.storage_type, st_args,
11228 constants.SO_FIX_CONSISTENCY)
11229 result.Raise("Failed to repair storage unit '%s' on %s" %
11230 (self.op.name, self.op.node_name))
11233 class LUNodeEvacuate(NoHooksLU):
11234 """Evacuates instances off a list of nodes.
11239 _MODE2IALLOCATOR = {
11240 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11241 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11242 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11244 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11245 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11246 constants.IALLOCATOR_NEVAC_MODES)
11248 def CheckArguments(self):
11249 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11251 def ExpandNames(self):
11252 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11254 if self.op.remote_node is not None:
11255 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11256 assert self.op.remote_node
11258 if self.op.remote_node == self.op.node_name:
11259 raise errors.OpPrereqError("Can not use evacuated node as a new"
11260 " secondary node", errors.ECODE_INVAL)
11262 if self.op.mode != constants.NODE_EVAC_SEC:
11263 raise errors.OpPrereqError("Without the use of an iallocator only"
11264 " secondary instances can be evacuated",
11265 errors.ECODE_INVAL)
11268 self.share_locks = _ShareAll()
11269 self.needed_locks = {
11270 locking.LEVEL_INSTANCE: [],
11271 locking.LEVEL_NODEGROUP: [],
11272 locking.LEVEL_NODE: [],
11275 # Determine nodes (via group) optimistically, needs verification once locks
11276 # have been acquired
11277 self.lock_nodes = self._DetermineNodes()
11279 def _DetermineNodes(self):
11280 """Gets the list of nodes to operate on.
11283 if self.op.remote_node is None:
11284 # Iallocator will choose any node(s) in the same group
11285 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11287 group_nodes = frozenset([self.op.remote_node])
11289 # Determine nodes to be locked
11290 return set([self.op.node_name]) | group_nodes
11292 def _DetermineInstances(self):
11293 """Builds list of instances to operate on.
11296 assert self.op.mode in constants.NODE_EVAC_MODES
11298 if self.op.mode == constants.NODE_EVAC_PRI:
11299 # Primary instances only
11300 inst_fn = _GetNodePrimaryInstances
11301 assert self.op.remote_node is None, \
11302 "Evacuating primary instances requires iallocator"
11303 elif self.op.mode == constants.NODE_EVAC_SEC:
11304 # Secondary instances only
11305 inst_fn = _GetNodeSecondaryInstances
11308 assert self.op.mode == constants.NODE_EVAC_ALL
11309 inst_fn = _GetNodeInstances
11310 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11312 raise errors.OpPrereqError("Due to an issue with the iallocator"
11313 " interface it is not possible to evacuate"
11314 " all instances at once; specify explicitly"
11315 " whether to evacuate primary or secondary"
11317 errors.ECODE_INVAL)
11319 return inst_fn(self.cfg, self.op.node_name)
11321 def DeclareLocks(self, level):
11322 if level == locking.LEVEL_INSTANCE:
11323 # Lock instances optimistically, needs verification once node and group
11324 # locks have been acquired
11325 self.needed_locks[locking.LEVEL_INSTANCE] = \
11326 set(i.name for i in self._DetermineInstances())
11328 elif level == locking.LEVEL_NODEGROUP:
11329 # Lock node groups for all potential target nodes optimistically, needs
11330 # verification once nodes have been acquired
11331 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11332 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11334 elif level == locking.LEVEL_NODE:
11335 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11337 def CheckPrereq(self):
11339 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11340 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11341 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11343 need_nodes = self._DetermineNodes()
11345 if not owned_nodes.issuperset(need_nodes):
11346 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11347 " locks were acquired, current nodes are"
11348 " are '%s', used to be '%s'; retry the"
11350 (self.op.node_name,
11351 utils.CommaJoin(need_nodes),
11352 utils.CommaJoin(owned_nodes)),
11353 errors.ECODE_STATE)
11355 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11356 if owned_groups != wanted_groups:
11357 raise errors.OpExecError("Node groups changed since locks were acquired,"
11358 " current groups are '%s', used to be '%s';"
11359 " retry the operation" %
11360 (utils.CommaJoin(wanted_groups),
11361 utils.CommaJoin(owned_groups)))
11363 # Determine affected instances
11364 self.instances = self._DetermineInstances()
11365 self.instance_names = [i.name for i in self.instances]
11367 if set(self.instance_names) != owned_instances:
11368 raise errors.OpExecError("Instances on node '%s' changed since locks"
11369 " were acquired, current instances are '%s',"
11370 " used to be '%s'; retry the operation" %
11371 (self.op.node_name,
11372 utils.CommaJoin(self.instance_names),
11373 utils.CommaJoin(owned_instances)))
11375 if self.instance_names:
11376 self.LogInfo("Evacuating instances from node '%s': %s",
11378 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11380 self.LogInfo("No instances to evacuate from node '%s'",
11383 if self.op.remote_node is not None:
11384 for i in self.instances:
11385 if i.primary_node == self.op.remote_node:
11386 raise errors.OpPrereqError("Node %s is the primary node of"
11387 " instance %s, cannot use it as"
11389 (self.op.remote_node, i.name),
11390 errors.ECODE_INVAL)
11392 def Exec(self, feedback_fn):
11393 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11395 if not self.instance_names:
11396 # No instances to evacuate
11399 elif self.op.iallocator is not None:
11400 # TODO: Implement relocation to other group
11401 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11402 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11403 instances=list(self.instance_names))
11405 ial.Run(self.op.iallocator)
11407 if not ial.success:
11408 raise errors.OpPrereqError("Can't compute node evacuation using"
11409 " iallocator '%s': %s" %
11410 (self.op.iallocator, ial.info),
11411 errors.ECODE_NORES)
11413 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11415 elif self.op.remote_node is not None:
11416 assert self.op.mode == constants.NODE_EVAC_SEC
11418 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11419 remote_node=self.op.remote_node,
11421 mode=constants.REPLACE_DISK_CHG,
11422 early_release=self.op.early_release)]
11423 for instance_name in self.instance_names
11427 raise errors.ProgrammerError("No iallocator or remote node")
11429 return ResultWithJobs(jobs)
11432 def _SetOpEarlyRelease(early_release, op):
11433 """Sets C{early_release} flag on opcodes if available.
11437 op.early_release = early_release
11438 except AttributeError:
11439 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11444 def _NodeEvacDest(use_nodes, group, nodes):
11445 """Returns group or nodes depending on caller's choice.
11449 return utils.CommaJoin(nodes)
11454 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11455 """Unpacks the result of change-group and node-evacuate iallocator requests.
11457 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11458 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11460 @type lu: L{LogicalUnit}
11461 @param lu: Logical unit instance
11462 @type alloc_result: tuple/list
11463 @param alloc_result: Result from iallocator
11464 @type early_release: bool
11465 @param early_release: Whether to release locks early if possible
11466 @type use_nodes: bool
11467 @param use_nodes: Whether to display node names instead of groups
11470 (moved, failed, jobs) = alloc_result
11473 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11474 for (name, reason) in failed)
11475 lu.LogWarning("Unable to evacuate instances %s", failreason)
11476 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11479 lu.LogInfo("Instances to be moved: %s",
11480 utils.CommaJoin("%s (to %s)" %
11481 (name, _NodeEvacDest(use_nodes, group, nodes))
11482 for (name, group, nodes) in moved))
11484 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11485 map(opcodes.OpCode.LoadOpCode, ops))
11489 class LUInstanceGrowDisk(LogicalUnit):
11490 """Grow a disk of an instance.
11493 HPATH = "disk-grow"
11494 HTYPE = constants.HTYPE_INSTANCE
11497 def ExpandNames(self):
11498 self._ExpandAndLockInstance()
11499 self.needed_locks[locking.LEVEL_NODE] = []
11500 self.needed_locks[locking.LEVEL_NODE_RES] = []
11501 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11502 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11504 def DeclareLocks(self, level):
11505 if level == locking.LEVEL_NODE:
11506 self._LockInstancesNodes()
11507 elif level == locking.LEVEL_NODE_RES:
11509 self.needed_locks[locking.LEVEL_NODE_RES] = \
11510 self.needed_locks[locking.LEVEL_NODE][:]
11512 def BuildHooksEnv(self):
11513 """Build hooks env.
11515 This runs on the master, the primary and all the secondaries.
11519 "DISK": self.op.disk,
11520 "AMOUNT": self.op.amount,
11522 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11525 def BuildHooksNodes(self):
11526 """Build hooks nodes.
11529 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11532 def CheckPrereq(self):
11533 """Check prerequisites.
11535 This checks that the instance is in the cluster.
11538 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11539 assert instance is not None, \
11540 "Cannot retrieve locked instance %s" % self.op.instance_name
11541 nodenames = list(instance.all_nodes)
11542 for node in nodenames:
11543 _CheckNodeOnline(self, node)
11545 self.instance = instance
11547 if instance.disk_template not in constants.DTS_GROWABLE:
11548 raise errors.OpPrereqError("Instance's disk layout does not support"
11549 " growing", errors.ECODE_INVAL)
11551 self.disk = instance.FindDisk(self.op.disk)
11553 if instance.disk_template not in (constants.DT_FILE,
11554 constants.DT_SHARED_FILE,
11556 # TODO: check the free disk space for file, when that feature will be
11558 _CheckNodesFreeDiskPerVG(self, nodenames,
11559 self.disk.ComputeGrowth(self.op.amount))
11561 def Exec(self, feedback_fn):
11562 """Execute disk grow.
11565 instance = self.instance
11568 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11569 assert (self.owned_locks(locking.LEVEL_NODE) ==
11570 self.owned_locks(locking.LEVEL_NODE_RES))
11572 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11574 raise errors.OpExecError("Cannot activate block device to grow")
11576 feedback_fn("Growing disk %s of instance '%s' by %s" %
11577 (self.op.disk, instance.name,
11578 utils.FormatUnit(self.op.amount, "h")))
11580 # First run all grow ops in dry-run mode
11581 for node in instance.all_nodes:
11582 self.cfg.SetDiskID(disk, node)
11583 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11584 result.Raise("Grow request failed to node %s" % node)
11586 # We know that (as far as we can test) operations across different
11587 # nodes will succeed, time to run it for real
11588 for node in instance.all_nodes:
11589 self.cfg.SetDiskID(disk, node)
11590 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11591 result.Raise("Grow request failed to node %s" % node)
11593 # TODO: Rewrite code to work properly
11594 # DRBD goes into sync mode for a short amount of time after executing the
11595 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11596 # calling "resize" in sync mode fails. Sleeping for a short amount of
11597 # time is a work-around.
11600 disk.RecordGrow(self.op.amount)
11601 self.cfg.Update(instance, feedback_fn)
11603 # Changes have been recorded, release node lock
11604 _ReleaseLocks(self, locking.LEVEL_NODE)
11606 # Downgrade lock while waiting for sync
11607 self.glm.downgrade(locking.LEVEL_INSTANCE)
11609 if self.op.wait_for_sync:
11610 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11612 self.proc.LogWarning("Disk sync-ing has not returned a good"
11613 " status; please check the instance")
11614 if instance.admin_state != constants.ADMINST_UP:
11615 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11616 elif instance.admin_state != constants.ADMINST_UP:
11617 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11618 " not supposed to be running because no wait for"
11619 " sync mode was requested")
11621 assert self.owned_locks(locking.LEVEL_NODE_RES)
11622 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11625 class LUInstanceQueryData(NoHooksLU):
11626 """Query runtime instance data.
11631 def ExpandNames(self):
11632 self.needed_locks = {}
11634 # Use locking if requested or when non-static information is wanted
11635 if not (self.op.static or self.op.use_locking):
11636 self.LogWarning("Non-static data requested, locks need to be acquired")
11637 self.op.use_locking = True
11639 if self.op.instances or not self.op.use_locking:
11640 # Expand instance names right here
11641 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11643 # Will use acquired locks
11644 self.wanted_names = None
11646 if self.op.use_locking:
11647 self.share_locks = _ShareAll()
11649 if self.wanted_names is None:
11650 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11652 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11654 self.needed_locks[locking.LEVEL_NODE] = []
11655 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11657 def DeclareLocks(self, level):
11658 if self.op.use_locking and level == locking.LEVEL_NODE:
11659 self._LockInstancesNodes()
11661 def CheckPrereq(self):
11662 """Check prerequisites.
11664 This only checks the optional instance list against the existing names.
11667 if self.wanted_names is None:
11668 assert self.op.use_locking, "Locking was not used"
11669 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11671 self.wanted_instances = \
11672 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11674 def _ComputeBlockdevStatus(self, node, instance_name, dev):
11675 """Returns the status of a block device
11678 if self.op.static or not node:
11681 self.cfg.SetDiskID(dev, node)
11683 result = self.rpc.call_blockdev_find(node, dev)
11687 result.Raise("Can't compute disk status for %s" % instance_name)
11689 status = result.payload
11693 return (status.dev_path, status.major, status.minor,
11694 status.sync_percent, status.estimated_time,
11695 status.is_degraded, status.ldisk_status)
11697 def _ComputeDiskStatus(self, instance, snode, dev):
11698 """Compute block device status.
11701 if dev.dev_type in constants.LDS_DRBD:
11702 # we change the snode then (otherwise we use the one passed in)
11703 if dev.logical_id[0] == instance.primary_node:
11704 snode = dev.logical_id[1]
11706 snode = dev.logical_id[0]
11708 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11709 instance.name, dev)
11710 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11713 dev_children = map(compat.partial(self._ComputeDiskStatus,
11720 "iv_name": dev.iv_name,
11721 "dev_type": dev.dev_type,
11722 "logical_id": dev.logical_id,
11723 "physical_id": dev.physical_id,
11724 "pstatus": dev_pstatus,
11725 "sstatus": dev_sstatus,
11726 "children": dev_children,
11731 def Exec(self, feedback_fn):
11732 """Gather and return data"""
11735 cluster = self.cfg.GetClusterInfo()
11737 pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11738 for i in self.wanted_instances)
11739 for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11740 if self.op.static or pnode.offline:
11741 remote_state = None
11743 self.LogWarning("Primary node %s is marked offline, returning static"
11744 " information only for instance %s" %
11745 (pnode.name, instance.name))
11747 remote_info = self.rpc.call_instance_info(instance.primary_node,
11749 instance.hypervisor)
11750 remote_info.Raise("Error checking node %s" % instance.primary_node)
11751 remote_info = remote_info.payload
11752 if remote_info and "state" in remote_info:
11753 remote_state = "up"
11755 if instance.admin_state == constants.ADMINST_UP:
11756 remote_state = "down"
11758 remote_state = instance.admin_state
11760 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11763 result[instance.name] = {
11764 "name": instance.name,
11765 "config_state": instance.admin_state,
11766 "run_state": remote_state,
11767 "pnode": instance.primary_node,
11768 "snodes": instance.secondary_nodes,
11770 # this happens to be the same format used for hooks
11771 "nics": _NICListToTuple(self, instance.nics),
11772 "disk_template": instance.disk_template,
11774 "hypervisor": instance.hypervisor,
11775 "network_port": instance.network_port,
11776 "hv_instance": instance.hvparams,
11777 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11778 "be_instance": instance.beparams,
11779 "be_actual": cluster.FillBE(instance),
11780 "os_instance": instance.osparams,
11781 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11782 "serial_no": instance.serial_no,
11783 "mtime": instance.mtime,
11784 "ctime": instance.ctime,
11785 "uuid": instance.uuid,
11791 def PrepareContainerMods(mods, private_fn):
11792 """Prepares a list of container modifications by adding a private data field.
11794 @type mods: list of tuples; (operation, index, parameters)
11795 @param mods: List of modifications
11796 @type private_fn: callable or None
11797 @param private_fn: Callable for constructing a private data field for a
11802 if private_fn is None:
11807 return [(op, idx, params, fn()) for (op, idx, params) in mods]
11810 #: Type description for changes as returned by L{ApplyContainerMods}'s
11812 _TApplyContModsCbChanges = \
11813 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
11814 ht.TNonEmptyString,
11819 def ApplyContainerMods(kind, container, chgdesc, mods,
11820 create_fn, modify_fn, remove_fn):
11821 """Applies descriptions in C{mods} to C{container}.
11824 @param kind: One-word item description
11825 @type container: list
11826 @param container: Container to modify
11827 @type chgdesc: None or list
11828 @param chgdesc: List of applied changes
11830 @param mods: Modifications as returned by L{PrepareContainerMods}
11831 @type create_fn: callable
11832 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
11833 receives absolute item index, parameters and private data object as added
11834 by L{PrepareContainerMods}, returns tuple containing new item and changes
11836 @type modify_fn: callable
11837 @param modify_fn: Callback for modifying an existing item
11838 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
11839 and private data object as added by L{PrepareContainerMods}, returns
11841 @type remove_fn: callable
11842 @param remove_fn: Callback on removing item; receives absolute item index,
11843 item and private data object as added by L{PrepareContainerMods}
11846 for (op, idx, params, private) in mods:
11849 absidx = len(container) - 1
11851 raise IndexError("Not accepting negative indices other than -1")
11852 elif idx > len(container):
11853 raise IndexError("Got %s index %s, but there are only %s" %
11854 (kind, idx, len(container)))
11860 if op == constants.DDM_ADD:
11861 # Calculate where item will be added
11863 addidx = len(container)
11867 if create_fn is None:
11870 (item, changes) = create_fn(addidx, params, private)
11873 container.append(item)
11876 assert idx <= len(container)
11877 # list.insert does so before the specified index
11878 container.insert(idx, item)
11880 # Retrieve existing item
11882 item = container[absidx]
11884 raise IndexError("Invalid %s index %s" % (kind, idx))
11886 if op == constants.DDM_REMOVE:
11889 if remove_fn is not None:
11890 remove_fn(absidx, item, private)
11892 changes = [("%s/%s" % (kind, absidx), "remove")]
11894 assert container[absidx] == item
11895 del container[absidx]
11896 elif op == constants.DDM_MODIFY:
11897 if modify_fn is not None:
11898 changes = modify_fn(absidx, item, params, private)
11900 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
11902 assert _TApplyContModsCbChanges(changes)
11904 if not (chgdesc is None or changes is None):
11905 chgdesc.extend(changes)
11908 def _UpdateIvNames(base_index, disks):
11909 """Updates the C{iv_name} attribute of disks.
11911 @type disks: list of L{objects.Disk}
11914 for (idx, disk) in enumerate(disks):
11915 disk.iv_name = "disk/%s" % (base_index + idx, )
11918 class _InstNicModPrivate:
11919 """Data structure for network interface modifications.
11921 Used by L{LUInstanceSetParams}.
11924 def __init__(self):
11929 class LUInstanceSetParams(LogicalUnit):
11930 """Modifies an instances's parameters.
11933 HPATH = "instance-modify"
11934 HTYPE = constants.HTYPE_INSTANCE
11938 def _UpgradeDiskNicMods(kind, mods, verify_fn):
11939 assert ht.TList(mods)
11940 assert not mods or len(mods[0]) in (2, 3)
11942 if mods and len(mods[0]) == 2:
11946 for op, params in mods:
11947 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
11948 result.append((op, -1, params))
11952 raise errors.OpPrereqError("Only one %s add or remove operation is"
11953 " supported at a time" % kind,
11954 errors.ECODE_INVAL)
11956 result.append((constants.DDM_MODIFY, op, params))
11958 assert verify_fn(result)
11965 def _CheckMods(kind, mods, key_types, item_fn):
11966 """Ensures requested disk/NIC modifications are valid.
11969 for (op, _, params) in mods:
11970 assert ht.TDict(params)
11972 utils.ForceDictType(params, key_types)
11974 if op == constants.DDM_REMOVE:
11976 raise errors.OpPrereqError("No settings should be passed when"
11977 " removing a %s" % kind,
11978 errors.ECODE_INVAL)
11979 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
11980 item_fn(op, params)
11982 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
11985 def _VerifyDiskModification(op, params):
11986 """Verifies a disk modification.
11989 if op == constants.DDM_ADD:
11990 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11991 if mode not in constants.DISK_ACCESS_SET:
11992 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11993 errors.ECODE_INVAL)
11995 size = params.get(constants.IDISK_SIZE, None)
11997 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
11998 constants.IDISK_SIZE, errors.ECODE_INVAL)
12002 except (TypeError, ValueError), err:
12003 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12004 errors.ECODE_INVAL)
12006 params[constants.IDISK_SIZE] = size
12008 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12009 raise errors.OpPrereqError("Disk size change not possible, use"
12010 " grow-disk", errors.ECODE_INVAL)
12013 def _VerifyNicModification(op, params):
12014 """Verifies a network interface modification.
12017 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12018 ip = params.get(constants.INIC_IP, None)
12021 elif ip.lower() == constants.VALUE_NONE:
12022 params[constants.INIC_IP] = None
12023 elif not netutils.IPAddress.IsValid(ip):
12024 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12025 errors.ECODE_INVAL)
12027 bridge = params.get("bridge", None)
12028 link = params.get(constants.INIC_LINK, None)
12029 if bridge and link:
12030 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12031 " at the same time", errors.ECODE_INVAL)
12032 elif bridge and bridge.lower() == constants.VALUE_NONE:
12033 params["bridge"] = None
12034 elif link and link.lower() == constants.VALUE_NONE:
12035 params[constants.INIC_LINK] = None
12037 if op == constants.DDM_ADD:
12038 macaddr = params.get(constants.INIC_MAC, None)
12039 if macaddr is None:
12040 params[constants.INIC_MAC] = constants.VALUE_AUTO
12042 if constants.INIC_MAC in params:
12043 macaddr = params[constants.INIC_MAC]
12044 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12045 macaddr = utils.NormalizeAndValidateMac(macaddr)
12047 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12048 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12049 " modifying an existing NIC",
12050 errors.ECODE_INVAL)
12052 def CheckArguments(self):
12053 if not (self.op.nics or self.op.disks or self.op.disk_template or
12054 self.op.hvparams or self.op.beparams or self.op.os_name or
12055 self.op.offline is not None or self.op.runtime_mem):
12056 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12058 if self.op.hvparams:
12059 _CheckGlobalHvParams(self.op.hvparams)
12062 self._UpgradeDiskNicMods("disk", self.op.disks,
12063 opcodes.OpInstanceSetParams.TestDiskModifications)
12065 self._UpgradeDiskNicMods("NIC", self.op.nics,
12066 opcodes.OpInstanceSetParams.TestNicModifications)
12068 # Check disk modifications
12069 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12070 self._VerifyDiskModification)
12072 if self.op.disks and self.op.disk_template is not None:
12073 raise errors.OpPrereqError("Disk template conversion and other disk"
12074 " changes not supported at the same time",
12075 errors.ECODE_INVAL)
12077 if (self.op.disk_template and
12078 self.op.disk_template in constants.DTS_INT_MIRROR and
12079 self.op.remote_node is None):
12080 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12081 " one requires specifying a secondary node",
12082 errors.ECODE_INVAL)
12084 # Check NIC modifications
12085 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12086 self._VerifyNicModification)
12088 def ExpandNames(self):
12089 self._ExpandAndLockInstance()
12090 # Can't even acquire node locks in shared mode as upcoming changes in
12091 # Ganeti 2.6 will start to modify the node object on disk conversion
12092 self.needed_locks[locking.LEVEL_NODE] = []
12093 self.needed_locks[locking.LEVEL_NODE_RES] = []
12094 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12096 def DeclareLocks(self, level):
12097 # TODO: Acquire group lock in shared mode (disk parameters)
12098 if level == locking.LEVEL_NODE:
12099 self._LockInstancesNodes()
12100 if self.op.disk_template and self.op.remote_node:
12101 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12102 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12103 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12105 self.needed_locks[locking.LEVEL_NODE_RES] = \
12106 self.needed_locks[locking.LEVEL_NODE][:]
12108 def BuildHooksEnv(self):
12109 """Build hooks env.
12111 This runs on the master, primary and secondaries.
12115 if constants.BE_MINMEM in self.be_new:
12116 args["minmem"] = self.be_new[constants.BE_MINMEM]
12117 if constants.BE_MAXMEM in self.be_new:
12118 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12119 if constants.BE_VCPUS in self.be_new:
12120 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12121 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12122 # information at all.
12124 if self._new_nics is not None:
12127 for nic in self._new_nics:
12128 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12129 mode = nicparams[constants.NIC_MODE]
12130 link = nicparams[constants.NIC_LINK]
12131 nics.append((nic.ip, nic.mac, mode, link))
12133 args["nics"] = nics
12135 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12136 if self.op.disk_template:
12137 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12138 if self.op.runtime_mem:
12139 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12143 def BuildHooksNodes(self):
12144 """Build hooks nodes.
12147 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12150 def _PrepareNicModification(self, params, private, old_ip, old_params,
12152 update_params_dict = dict([(key, params[key])
12153 for key in constants.NICS_PARAMETERS
12156 if "bridge" in params:
12157 update_params_dict[constants.NIC_LINK] = params["bridge"]
12159 new_params = _GetUpdatedParams(old_params, update_params_dict)
12160 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12162 new_filled_params = cluster.SimpleFillNIC(new_params)
12163 objects.NIC.CheckParameterSyntax(new_filled_params)
12165 new_mode = new_filled_params[constants.NIC_MODE]
12166 if new_mode == constants.NIC_MODE_BRIDGED:
12167 bridge = new_filled_params[constants.NIC_LINK]
12168 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12170 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12172 self.warn.append(msg)
12174 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12176 elif new_mode == constants.NIC_MODE_ROUTED:
12177 ip = params.get(constants.INIC_IP, old_ip)
12179 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12180 " on a routed NIC", errors.ECODE_INVAL)
12182 if constants.INIC_MAC in params:
12183 mac = params[constants.INIC_MAC]
12185 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12186 errors.ECODE_INVAL)
12187 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12188 # otherwise generate the MAC address
12189 params[constants.INIC_MAC] = \
12190 self.cfg.GenerateMAC(self.proc.GetECId())
12192 # or validate/reserve the current one
12194 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12195 except errors.ReservationError:
12196 raise errors.OpPrereqError("MAC address '%s' already in use"
12197 " in cluster" % mac,
12198 errors.ECODE_NOTUNIQUE)
12200 private.params = new_params
12201 private.filled = new_filled_params
12203 return (None, None)
12205 def CheckPrereq(self):
12206 """Check prerequisites.
12208 This only checks the instance list against the existing names.
12211 # checking the new params on the primary/secondary nodes
12213 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12214 cluster = self.cluster = self.cfg.GetClusterInfo()
12215 assert self.instance is not None, \
12216 "Cannot retrieve locked instance %s" % self.op.instance_name
12217 pnode = instance.primary_node
12218 nodelist = list(instance.all_nodes)
12219 pnode_info = self.cfg.GetNodeInfo(pnode)
12220 self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
12222 # Prepare disk/NIC modifications
12223 self.diskmod = PrepareContainerMods(self.op.disks, None)
12224 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12227 if self.op.os_name and not self.op.force:
12228 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12229 self.op.force_variant)
12230 instance_os = self.op.os_name
12232 instance_os = instance.os
12234 assert not (self.op.disk_template and self.op.disks), \
12235 "Can't modify disk template and apply disk changes at the same time"
12237 if self.op.disk_template:
12238 if instance.disk_template == self.op.disk_template:
12239 raise errors.OpPrereqError("Instance already has disk template %s" %
12240 instance.disk_template, errors.ECODE_INVAL)
12242 if (instance.disk_template,
12243 self.op.disk_template) not in self._DISK_CONVERSIONS:
12244 raise errors.OpPrereqError("Unsupported disk template conversion from"
12245 " %s to %s" % (instance.disk_template,
12246 self.op.disk_template),
12247 errors.ECODE_INVAL)
12248 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12249 msg="cannot change disk template")
12250 if self.op.disk_template in constants.DTS_INT_MIRROR:
12251 if self.op.remote_node == pnode:
12252 raise errors.OpPrereqError("Given new secondary node %s is the same"
12253 " as the primary node of the instance" %
12254 self.op.remote_node, errors.ECODE_STATE)
12255 _CheckNodeOnline(self, self.op.remote_node)
12256 _CheckNodeNotDrained(self, self.op.remote_node)
12257 # FIXME: here we assume that the old instance type is DT_PLAIN
12258 assert instance.disk_template == constants.DT_PLAIN
12259 disks = [{constants.IDISK_SIZE: d.size,
12260 constants.IDISK_VG: d.logical_id[0]}
12261 for d in instance.disks]
12262 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12263 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12265 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12266 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12267 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12268 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12269 ignore=self.op.ignore_ipolicy)
12270 if pnode_info.group != snode_info.group:
12271 self.LogWarning("The primary and secondary nodes are in two"
12272 " different node groups; the disk parameters"
12273 " from the first disk's node group will be"
12276 # hvparams processing
12277 if self.op.hvparams:
12278 hv_type = instance.hypervisor
12279 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12280 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12281 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12284 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12285 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12286 self.hv_proposed = self.hv_new = hv_new # the new actual values
12287 self.hv_inst = i_hvdict # the new dict (without defaults)
12289 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12291 self.hv_new = self.hv_inst = {}
12293 # beparams processing
12294 if self.op.beparams:
12295 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12297 objects.UpgradeBeParams(i_bedict)
12298 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12299 be_new = cluster.SimpleFillBE(i_bedict)
12300 self.be_proposed = self.be_new = be_new # the new actual values
12301 self.be_inst = i_bedict # the new dict (without defaults)
12303 self.be_new = self.be_inst = {}
12304 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12305 be_old = cluster.FillBE(instance)
12307 # CPU param validation -- checking every time a paramtere is
12308 # changed to cover all cases where either CPU mask or vcpus have
12310 if (constants.BE_VCPUS in self.be_proposed and
12311 constants.HV_CPU_MASK in self.hv_proposed):
12313 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12314 # Verify mask is consistent with number of vCPUs. Can skip this
12315 # test if only 1 entry in the CPU mask, which means same mask
12316 # is applied to all vCPUs.
12317 if (len(cpu_list) > 1 and
12318 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12319 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12321 (self.be_proposed[constants.BE_VCPUS],
12322 self.hv_proposed[constants.HV_CPU_MASK]),
12323 errors.ECODE_INVAL)
12325 # Only perform this test if a new CPU mask is given
12326 if constants.HV_CPU_MASK in self.hv_new:
12327 # Calculate the largest CPU number requested
12328 max_requested_cpu = max(map(max, cpu_list))
12329 # Check that all of the instance's nodes have enough physical CPUs to
12330 # satisfy the requested CPU mask
12331 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12332 max_requested_cpu + 1, instance.hypervisor)
12334 # osparams processing
12335 if self.op.osparams:
12336 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12337 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12338 self.os_inst = i_osdict # the new dict (without defaults)
12344 #TODO(dynmem): do the appropriate check involving MINMEM
12345 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12346 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12347 mem_check_list = [pnode]
12348 if be_new[constants.BE_AUTO_BALANCE]:
12349 # either we changed auto_balance to yes or it was from before
12350 mem_check_list.extend(instance.secondary_nodes)
12351 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12352 instance.hypervisor)
12353 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12354 [instance.hypervisor])
12355 pninfo = nodeinfo[pnode]
12356 msg = pninfo.fail_msg
12358 # Assume the primary node is unreachable and go ahead
12359 self.warn.append("Can't get info from primary node %s: %s" %
12362 (_, _, (pnhvinfo, )) = pninfo.payload
12363 if not isinstance(pnhvinfo.get("memory_free", None), int):
12364 self.warn.append("Node data from primary node %s doesn't contain"
12365 " free memory information" % pnode)
12366 elif instance_info.fail_msg:
12367 self.warn.append("Can't get instance runtime information: %s" %
12368 instance_info.fail_msg)
12370 if instance_info.payload:
12371 current_mem = int(instance_info.payload["memory"])
12373 # Assume instance not running
12374 # (there is a slight race condition here, but it's not very
12375 # probable, and we have no other way to check)
12376 # TODO: Describe race condition
12378 #TODO(dynmem): do the appropriate check involving MINMEM
12379 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12380 pnhvinfo["memory_free"])
12382 raise errors.OpPrereqError("This change will prevent the instance"
12383 " from starting, due to %d MB of memory"
12384 " missing on its primary node" %
12386 errors.ECODE_NORES)
12388 if be_new[constants.BE_AUTO_BALANCE]:
12389 for node, nres in nodeinfo.items():
12390 if node not in instance.secondary_nodes:
12392 nres.Raise("Can't get info from secondary node %s" % node,
12393 prereq=True, ecode=errors.ECODE_STATE)
12394 (_, _, (nhvinfo, )) = nres.payload
12395 if not isinstance(nhvinfo.get("memory_free", None), int):
12396 raise errors.OpPrereqError("Secondary node %s didn't return free"
12397 " memory information" % node,
12398 errors.ECODE_STATE)
12399 #TODO(dynmem): do the appropriate check involving MINMEM
12400 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12401 raise errors.OpPrereqError("This change will prevent the instance"
12402 " from failover to its secondary node"
12403 " %s, due to not enough memory" % node,
12404 errors.ECODE_STATE)
12406 if self.op.runtime_mem:
12407 remote_info = self.rpc.call_instance_info(instance.primary_node,
12409 instance.hypervisor)
12410 remote_info.Raise("Error checking node %s" % instance.primary_node)
12411 if not remote_info.payload: # not running already
12412 raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12413 errors.ECODE_STATE)
12415 current_memory = remote_info.payload["memory"]
12416 if (not self.op.force and
12417 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12418 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12419 raise errors.OpPrereqError("Instance %s must have memory between %d"
12420 " and %d MB of memory unless --force is"
12421 " given" % (instance.name,
12422 self.be_proposed[constants.BE_MINMEM],
12423 self.be_proposed[constants.BE_MAXMEM]),
12424 errors.ECODE_INVAL)
12426 if self.op.runtime_mem > current_memory:
12427 _CheckNodeFreeMemory(self, instance.primary_node,
12428 "ballooning memory for instance %s" %
12430 self.op.memory - current_memory,
12431 instance.hypervisor)
12433 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12434 raise errors.OpPrereqError("Disk operations not supported for"
12435 " diskless instances",
12436 errors.ECODE_INVAL)
12438 def _PrepareNicCreate(_, params, private):
12439 return self._PrepareNicModification(params, private, None, {},
12442 def _PrepareNicMod(_, nic, params, private):
12443 return self._PrepareNicModification(params, private, nic.ip,
12444 nic.nicparams, cluster, pnode)
12446 # Verify NIC changes (operating on copy)
12447 nics = instance.nics[:]
12448 ApplyContainerMods("NIC", nics, None, self.nicmod,
12449 _PrepareNicCreate, _PrepareNicMod, None)
12450 if len(nics) > constants.MAX_NICS:
12451 raise errors.OpPrereqError("Instance has too many network interfaces"
12452 " (%d), cannot add more" % constants.MAX_NICS,
12453 errors.ECODE_STATE)
12455 # Verify disk changes (operating on a copy)
12456 disks = instance.disks[:]
12457 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12458 if len(disks) > constants.MAX_DISKS:
12459 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12460 " more" % constants.MAX_DISKS,
12461 errors.ECODE_STATE)
12463 if self.op.offline is not None:
12464 if self.op.offline:
12465 msg = "can't change to offline"
12467 msg = "can't change to online"
12468 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12470 # Pre-compute NIC changes (necessary to use result in hooks)
12471 self._nic_chgdesc = []
12473 # Operate on copies as this is still in prereq
12474 nics = [nic.Copy() for nic in instance.nics]
12475 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12476 self._CreateNewNic, self._ApplyNicMods, None)
12477 self._new_nics = nics
12479 self._new_nics = None
12481 def _ConvertPlainToDrbd(self, feedback_fn):
12482 """Converts an instance from plain to drbd.
12485 feedback_fn("Converting template to drbd")
12486 instance = self.instance
12487 pnode = instance.primary_node
12488 snode = self.op.remote_node
12490 assert instance.disk_template == constants.DT_PLAIN
12492 # create a fake disk info for _GenerateDiskTemplate
12493 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12494 constants.IDISK_VG: d.logical_id[0]}
12495 for d in instance.disks]
12496 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12497 instance.name, pnode, [snode],
12498 disk_info, None, None, 0, feedback_fn,
12500 info = _GetInstanceInfoText(instance)
12501 feedback_fn("Creating aditional volumes...")
12502 # first, create the missing data and meta devices
12503 for disk in new_disks:
12504 # unfortunately this is... not too nice
12505 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12507 for child in disk.children:
12508 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12509 # at this stage, all new LVs have been created, we can rename the
12511 feedback_fn("Renaming original volumes...")
12512 rename_list = [(o, n.children[0].logical_id)
12513 for (o, n) in zip(instance.disks, new_disks)]
12514 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12515 result.Raise("Failed to rename original LVs")
12517 feedback_fn("Initializing DRBD devices...")
12518 # all child devices are in place, we can now create the DRBD devices
12519 for disk in new_disks:
12520 for node in [pnode, snode]:
12521 f_create = node == pnode
12522 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12524 # at this point, the instance has been modified
12525 instance.disk_template = constants.DT_DRBD8
12526 instance.disks = new_disks
12527 self.cfg.Update(instance, feedback_fn)
12529 # Release node locks while waiting for sync
12530 _ReleaseLocks(self, locking.LEVEL_NODE)
12532 # disks are created, waiting for sync
12533 disk_abort = not _WaitForSync(self, instance,
12534 oneshot=not self.op.wait_for_sync)
12536 raise errors.OpExecError("There are some degraded disks for"
12537 " this instance, please cleanup manually")
12539 # Node resource locks will be released by caller
12541 def _ConvertDrbdToPlain(self, feedback_fn):
12542 """Converts an instance from drbd to plain.
12545 instance = self.instance
12547 assert len(instance.secondary_nodes) == 1
12548 assert instance.disk_template == constants.DT_DRBD8
12550 pnode = instance.primary_node
12551 snode = instance.secondary_nodes[0]
12552 feedback_fn("Converting template to plain")
12554 old_disks = instance.disks
12555 new_disks = [d.children[0] for d in old_disks]
12557 # copy over size and mode
12558 for parent, child in zip(old_disks, new_disks):
12559 child.size = parent.size
12560 child.mode = parent.mode
12562 # update instance structure
12563 instance.disks = new_disks
12564 instance.disk_template = constants.DT_PLAIN
12565 self.cfg.Update(instance, feedback_fn)
12567 # Release locks in case removing disks takes a while
12568 _ReleaseLocks(self, locking.LEVEL_NODE)
12570 feedback_fn("Removing volumes on the secondary node...")
12571 for disk in old_disks:
12572 self.cfg.SetDiskID(disk, snode)
12573 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12575 self.LogWarning("Could not remove block device %s on node %s,"
12576 " continuing anyway: %s", disk.iv_name, snode, msg)
12578 feedback_fn("Removing unneeded volumes on the primary node...")
12579 for idx, disk in enumerate(old_disks):
12580 meta = disk.children[1]
12581 self.cfg.SetDiskID(meta, pnode)
12582 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12584 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12585 " continuing anyway: %s", idx, pnode, msg)
12587 # this is a DRBD disk, return its port to the pool
12588 for disk in old_disks:
12589 tcp_port = disk.logical_id[2]
12590 self.cfg.AddTcpUdpPort(tcp_port)
12592 # Node resource locks will be released by caller
12594 def _CreateNewDisk(self, idx, params, _):
12595 """Creates a new disk.
12598 instance = self.instance
12601 if instance.disk_template in constants.DTS_FILEBASED:
12602 (file_driver, file_path) = instance.disks[0].logical_id
12603 file_path = os.path.dirname(file_path)
12605 file_driver = file_path = None
12608 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12609 instance.primary_node, instance.secondary_nodes,
12610 [params], file_path, file_driver, idx,
12611 self.Log, self.diskparams)[0]
12613 info = _GetInstanceInfoText(instance)
12615 logging.info("Creating volume %s for instance %s",
12616 disk.iv_name, instance.name)
12617 # Note: this needs to be kept in sync with _CreateDisks
12619 for node in instance.all_nodes:
12620 f_create = (node == instance.primary_node)
12622 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12623 except errors.OpExecError, err:
12624 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12625 disk.iv_name, disk, node, err)
12628 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12632 def _ModifyDisk(idx, disk, params, _):
12633 """Modifies a disk.
12636 disk.mode = params[constants.IDISK_MODE]
12639 ("disk.mode/%d" % idx, disk.mode),
12642 def _RemoveDisk(self, idx, root, _):
12646 for node, disk in root.ComputeNodeTree(self.instance.primary_node):
12647 self.cfg.SetDiskID(disk, node)
12648 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12650 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12651 " continuing anyway", idx, node, msg)
12653 # if this is a DRBD disk, return its port to the pool
12654 if root.dev_type in constants.LDS_DRBD:
12655 self.cfg.AddTcpUdpPort(root.logical_id[2])
12658 def _CreateNewNic(idx, params, private):
12659 """Creates data structure for a new network interface.
12662 mac = params[constants.INIC_MAC]
12663 ip = params.get(constants.INIC_IP, None)
12664 nicparams = private.params
12666 return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12668 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12669 (mac, ip, private.filled[constants.NIC_MODE],
12670 private.filled[constants.NIC_LINK])),
12674 def _ApplyNicMods(idx, nic, params, private):
12675 """Modifies a network interface.
12680 for key in [constants.INIC_MAC, constants.INIC_IP]:
12682 changes.append(("nic.%s/%d" % (key, idx), params[key]))
12683 setattr(nic, key, params[key])
12686 nic.nicparams = private.params
12688 for (key, val) in params.items():
12689 changes.append(("nic.%s/%d" % (key, idx), val))
12693 def Exec(self, feedback_fn):
12694 """Modifies an instance.
12696 All parameters take effect only at the next restart of the instance.
12699 # Process here the warnings from CheckPrereq, as we don't have a
12700 # feedback_fn there.
12701 # TODO: Replace with self.LogWarning
12702 for warn in self.warn:
12703 feedback_fn("WARNING: %s" % warn)
12705 assert ((self.op.disk_template is None) ^
12706 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12707 "Not owning any node resource locks"
12710 instance = self.instance
12713 if self.op.runtime_mem:
12714 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12716 self.op.runtime_mem)
12717 rpcres.Raise("Cannot modify instance runtime memory")
12718 result.append(("runtime_memory", self.op.runtime_mem))
12720 # Apply disk changes
12721 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12722 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12723 _UpdateIvNames(0, instance.disks)
12725 if self.op.disk_template:
12727 check_nodes = set(instance.all_nodes)
12728 if self.op.remote_node:
12729 check_nodes.add(self.op.remote_node)
12730 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12731 owned = self.owned_locks(level)
12732 assert not (check_nodes - owned), \
12733 ("Not owning the correct locks, owning %r, expected at least %r" %
12734 (owned, check_nodes))
12736 r_shut = _ShutdownInstanceDisks(self, instance)
12738 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12739 " proceed with disk template conversion")
12740 mode = (instance.disk_template, self.op.disk_template)
12742 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12744 self.cfg.ReleaseDRBDMinors(instance.name)
12746 result.append(("disk_template", self.op.disk_template))
12748 assert instance.disk_template == self.op.disk_template, \
12749 ("Expected disk template '%s', found '%s'" %
12750 (self.op.disk_template, instance.disk_template))
12752 # Release node and resource locks if there are any (they might already have
12753 # been released during disk conversion)
12754 _ReleaseLocks(self, locking.LEVEL_NODE)
12755 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12757 # Apply NIC changes
12758 if self._new_nics is not None:
12759 instance.nics = self._new_nics
12760 result.extend(self._nic_chgdesc)
12763 if self.op.hvparams:
12764 instance.hvparams = self.hv_inst
12765 for key, val in self.op.hvparams.iteritems():
12766 result.append(("hv/%s" % key, val))
12769 if self.op.beparams:
12770 instance.beparams = self.be_inst
12771 for key, val in self.op.beparams.iteritems():
12772 result.append(("be/%s" % key, val))
12775 if self.op.os_name:
12776 instance.os = self.op.os_name
12779 if self.op.osparams:
12780 instance.osparams = self.os_inst
12781 for key, val in self.op.osparams.iteritems():
12782 result.append(("os/%s" % key, val))
12784 if self.op.offline is None:
12787 elif self.op.offline:
12788 # Mark instance as offline
12789 self.cfg.MarkInstanceOffline(instance.name)
12790 result.append(("admin_state", constants.ADMINST_OFFLINE))
12792 # Mark instance as online, but stopped
12793 self.cfg.MarkInstanceDown(instance.name)
12794 result.append(("admin_state", constants.ADMINST_DOWN))
12796 self.cfg.Update(instance, feedback_fn)
12798 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12799 self.owned_locks(locking.LEVEL_NODE)), \
12800 "All node locks should have been released by now"
12804 _DISK_CONVERSIONS = {
12805 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12806 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12810 class LUInstanceChangeGroup(LogicalUnit):
12811 HPATH = "instance-change-group"
12812 HTYPE = constants.HTYPE_INSTANCE
12815 def ExpandNames(self):
12816 self.share_locks = _ShareAll()
12817 self.needed_locks = {
12818 locking.LEVEL_NODEGROUP: [],
12819 locking.LEVEL_NODE: [],
12822 self._ExpandAndLockInstance()
12824 if self.op.target_groups:
12825 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12826 self.op.target_groups)
12828 self.req_target_uuids = None
12830 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12832 def DeclareLocks(self, level):
12833 if level == locking.LEVEL_NODEGROUP:
12834 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12836 if self.req_target_uuids:
12837 lock_groups = set(self.req_target_uuids)
12839 # Lock all groups used by instance optimistically; this requires going
12840 # via the node before it's locked, requiring verification later on
12841 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12842 lock_groups.update(instance_groups)
12844 # No target groups, need to lock all of them
12845 lock_groups = locking.ALL_SET
12847 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12849 elif level == locking.LEVEL_NODE:
12850 if self.req_target_uuids:
12851 # Lock all nodes used by instances
12852 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12853 self._LockInstancesNodes()
12855 # Lock all nodes in all potential target groups
12856 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12857 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12858 member_nodes = [node_name
12859 for group in lock_groups
12860 for node_name in self.cfg.GetNodeGroup(group).members]
12861 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12863 # Lock all nodes as all groups are potential targets
12864 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12866 def CheckPrereq(self):
12867 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12868 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12869 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12871 assert (self.req_target_uuids is None or
12872 owned_groups.issuperset(self.req_target_uuids))
12873 assert owned_instances == set([self.op.instance_name])
12875 # Get instance information
12876 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12878 # Check if node groups for locked instance are still correct
12879 assert owned_nodes.issuperset(self.instance.all_nodes), \
12880 ("Instance %s's nodes changed while we kept the lock" %
12881 self.op.instance_name)
12883 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12886 if self.req_target_uuids:
12887 # User requested specific target groups
12888 self.target_uuids = self.req_target_uuids
12890 # All groups except those used by the instance are potential targets
12891 self.target_uuids = owned_groups - inst_groups
12893 conflicting_groups = self.target_uuids & inst_groups
12894 if conflicting_groups:
12895 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12896 " used by the instance '%s'" %
12897 (utils.CommaJoin(conflicting_groups),
12898 self.op.instance_name),
12899 errors.ECODE_INVAL)
12901 if not self.target_uuids:
12902 raise errors.OpPrereqError("There are no possible target groups",
12903 errors.ECODE_INVAL)
12905 def BuildHooksEnv(self):
12906 """Build hooks env.
12909 assert self.target_uuids
12912 "TARGET_GROUPS": " ".join(self.target_uuids),
12915 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12919 def BuildHooksNodes(self):
12920 """Build hooks nodes.
12923 mn = self.cfg.GetMasterNode()
12924 return ([mn], [mn])
12926 def Exec(self, feedback_fn):
12927 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12929 assert instances == [self.op.instance_name], "Instance not locked"
12931 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12932 instances=instances, target_groups=list(self.target_uuids))
12934 ial.Run(self.op.iallocator)
12936 if not ial.success:
12937 raise errors.OpPrereqError("Can't compute solution for changing group of"
12938 " instance '%s' using iallocator '%s': %s" %
12939 (self.op.instance_name, self.op.iallocator,
12941 errors.ECODE_NORES)
12943 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12945 self.LogInfo("Iallocator returned %s job(s) for changing group of"
12946 " instance '%s'", len(jobs), self.op.instance_name)
12948 return ResultWithJobs(jobs)
12951 class LUBackupQuery(NoHooksLU):
12952 """Query the exports list
12957 def ExpandNames(self):
12958 self.needed_locks = {}
12959 self.share_locks[locking.LEVEL_NODE] = 1
12960 if not self.op.nodes:
12961 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12963 self.needed_locks[locking.LEVEL_NODE] = \
12964 _GetWantedNodes(self, self.op.nodes)
12966 def Exec(self, feedback_fn):
12967 """Compute the list of all the exported system images.
12970 @return: a dictionary with the structure node->(export-list)
12971 where export-list is a list of the instances exported on
12975 self.nodes = self.owned_locks(locking.LEVEL_NODE)
12976 rpcresult = self.rpc.call_export_list(self.nodes)
12978 for node in rpcresult:
12979 if rpcresult[node].fail_msg:
12980 result[node] = False
12982 result[node] = rpcresult[node].payload
12987 class LUBackupPrepare(NoHooksLU):
12988 """Prepares an instance for an export and returns useful information.
12993 def ExpandNames(self):
12994 self._ExpandAndLockInstance()
12996 def CheckPrereq(self):
12997 """Check prerequisites.
13000 instance_name = self.op.instance_name
13002 self.instance = self.cfg.GetInstanceInfo(instance_name)
13003 assert self.instance is not None, \
13004 "Cannot retrieve locked instance %s" % self.op.instance_name
13005 _CheckNodeOnline(self, self.instance.primary_node)
13007 self._cds = _GetClusterDomainSecret()
13009 def Exec(self, feedback_fn):
13010 """Prepares an instance for an export.
13013 instance = self.instance
13015 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13016 salt = utils.GenerateSecret(8)
13018 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13019 result = self.rpc.call_x509_cert_create(instance.primary_node,
13020 constants.RIE_CERT_VALIDITY)
13021 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13023 (name, cert_pem) = result.payload
13025 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13029 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13030 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13032 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13038 class LUBackupExport(LogicalUnit):
13039 """Export an instance to an image in the cluster.
13042 HPATH = "instance-export"
13043 HTYPE = constants.HTYPE_INSTANCE
13046 def CheckArguments(self):
13047 """Check the arguments.
13050 self.x509_key_name = self.op.x509_key_name
13051 self.dest_x509_ca_pem = self.op.destination_x509_ca
13053 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13054 if not self.x509_key_name:
13055 raise errors.OpPrereqError("Missing X509 key name for encryption",
13056 errors.ECODE_INVAL)
13058 if not self.dest_x509_ca_pem:
13059 raise errors.OpPrereqError("Missing destination X509 CA",
13060 errors.ECODE_INVAL)
13062 def ExpandNames(self):
13063 self._ExpandAndLockInstance()
13065 # Lock all nodes for local exports
13066 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13067 # FIXME: lock only instance primary and destination node
13069 # Sad but true, for now we have do lock all nodes, as we don't know where
13070 # the previous export might be, and in this LU we search for it and
13071 # remove it from its current node. In the future we could fix this by:
13072 # - making a tasklet to search (share-lock all), then create the
13073 # new one, then one to remove, after
13074 # - removing the removal operation altogether
13075 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13077 def DeclareLocks(self, level):
13078 """Last minute lock declaration."""
13079 # All nodes are locked anyway, so nothing to do here.
13081 def BuildHooksEnv(self):
13082 """Build hooks env.
13084 This will run on the master, primary node and target node.
13088 "EXPORT_MODE": self.op.mode,
13089 "EXPORT_NODE": self.op.target_node,
13090 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13091 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13092 # TODO: Generic function for boolean env variables
13093 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13096 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13100 def BuildHooksNodes(self):
13101 """Build hooks nodes.
13104 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13106 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13107 nl.append(self.op.target_node)
13111 def CheckPrereq(self):
13112 """Check prerequisites.
13114 This checks that the instance and node names are valid.
13117 instance_name = self.op.instance_name
13119 self.instance = self.cfg.GetInstanceInfo(instance_name)
13120 assert self.instance is not None, \
13121 "Cannot retrieve locked instance %s" % self.op.instance_name
13122 _CheckNodeOnline(self, self.instance.primary_node)
13124 if (self.op.remove_instance and
13125 self.instance.admin_state == constants.ADMINST_UP and
13126 not self.op.shutdown):
13127 raise errors.OpPrereqError("Can not remove instance without shutting it"
13130 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13131 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13132 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13133 assert self.dst_node is not None
13135 _CheckNodeOnline(self, self.dst_node.name)
13136 _CheckNodeNotDrained(self, self.dst_node.name)
13139 self.dest_disk_info = None
13140 self.dest_x509_ca = None
13142 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13143 self.dst_node = None
13145 if len(self.op.target_node) != len(self.instance.disks):
13146 raise errors.OpPrereqError(("Received destination information for %s"
13147 " disks, but instance %s has %s disks") %
13148 (len(self.op.target_node), instance_name,
13149 len(self.instance.disks)),
13150 errors.ECODE_INVAL)
13152 cds = _GetClusterDomainSecret()
13154 # Check X509 key name
13156 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13157 except (TypeError, ValueError), err:
13158 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13160 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13161 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13162 errors.ECODE_INVAL)
13164 # Load and verify CA
13166 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13167 except OpenSSL.crypto.Error, err:
13168 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13169 (err, ), errors.ECODE_INVAL)
13171 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13172 if errcode is not None:
13173 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13174 (msg, ), errors.ECODE_INVAL)
13176 self.dest_x509_ca = cert
13178 # Verify target information
13180 for idx, disk_data in enumerate(self.op.target_node):
13182 (host, port, magic) = \
13183 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13184 except errors.GenericError, err:
13185 raise errors.OpPrereqError("Target info for disk %s: %s" %
13186 (idx, err), errors.ECODE_INVAL)
13188 disk_info.append((host, port, magic))
13190 assert len(disk_info) == len(self.op.target_node)
13191 self.dest_disk_info = disk_info
13194 raise errors.ProgrammerError("Unhandled export mode %r" %
13197 # instance disk type verification
13198 # TODO: Implement export support for file-based disks
13199 for disk in self.instance.disks:
13200 if disk.dev_type == constants.LD_FILE:
13201 raise errors.OpPrereqError("Export not supported for instances with"
13202 " file-based disks", errors.ECODE_INVAL)
13204 def _CleanupExports(self, feedback_fn):
13205 """Removes exports of current instance from all other nodes.
13207 If an instance in a cluster with nodes A..D was exported to node C, its
13208 exports will be removed from the nodes A, B and D.
13211 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13213 nodelist = self.cfg.GetNodeList()
13214 nodelist.remove(self.dst_node.name)
13216 # on one-node clusters nodelist will be empty after the removal
13217 # if we proceed the backup would be removed because OpBackupQuery
13218 # substitutes an empty list with the full cluster node list.
13219 iname = self.instance.name
13221 feedback_fn("Removing old exports for instance %s" % iname)
13222 exportlist = self.rpc.call_export_list(nodelist)
13223 for node in exportlist:
13224 if exportlist[node].fail_msg:
13226 if iname in exportlist[node].payload:
13227 msg = self.rpc.call_export_remove(node, iname).fail_msg
13229 self.LogWarning("Could not remove older export for instance %s"
13230 " on node %s: %s", iname, node, msg)
13232 def Exec(self, feedback_fn):
13233 """Export an instance to an image in the cluster.
13236 assert self.op.mode in constants.EXPORT_MODES
13238 instance = self.instance
13239 src_node = instance.primary_node
13241 if self.op.shutdown:
13242 # shutdown the instance, but not the disks
13243 feedback_fn("Shutting down instance %s" % instance.name)
13244 result = self.rpc.call_instance_shutdown(src_node, instance,
13245 self.op.shutdown_timeout)
13246 # TODO: Maybe ignore failures if ignore_remove_failures is set
13247 result.Raise("Could not shutdown instance %s on"
13248 " node %s" % (instance.name, src_node))
13250 # set the disks ID correctly since call_instance_start needs the
13251 # correct drbd minor to create the symlinks
13252 for disk in instance.disks:
13253 self.cfg.SetDiskID(disk, src_node)
13255 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13258 # Activate the instance disks if we'exporting a stopped instance
13259 feedback_fn("Activating disks for %s" % instance.name)
13260 _StartInstanceDisks(self, instance, None)
13263 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13266 helper.CreateSnapshots()
13268 if (self.op.shutdown and
13269 instance.admin_state == constants.ADMINST_UP and
13270 not self.op.remove_instance):
13271 assert not activate_disks
13272 feedback_fn("Starting instance %s" % instance.name)
13273 result = self.rpc.call_instance_start(src_node,
13274 (instance, None, None), False)
13275 msg = result.fail_msg
13277 feedback_fn("Failed to start instance: %s" % msg)
13278 _ShutdownInstanceDisks(self, instance)
13279 raise errors.OpExecError("Could not start instance: %s" % msg)
13281 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13282 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13283 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13284 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13285 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13287 (key_name, _, _) = self.x509_key_name
13290 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13293 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13294 key_name, dest_ca_pem,
13299 # Check for backwards compatibility
13300 assert len(dresults) == len(instance.disks)
13301 assert compat.all(isinstance(i, bool) for i in dresults), \
13302 "Not all results are boolean: %r" % dresults
13306 feedback_fn("Deactivating disks for %s" % instance.name)
13307 _ShutdownInstanceDisks(self, instance)
13309 if not (compat.all(dresults) and fin_resu):
13312 failures.append("export finalization")
13313 if not compat.all(dresults):
13314 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13316 failures.append("disk export: disk(s) %s" % fdsk)
13318 raise errors.OpExecError("Export failed, errors in %s" %
13319 utils.CommaJoin(failures))
13321 # At this point, the export was successful, we can cleanup/finish
13323 # Remove instance if requested
13324 if self.op.remove_instance:
13325 feedback_fn("Removing instance %s" % instance.name)
13326 _RemoveInstance(self, feedback_fn, instance,
13327 self.op.ignore_remove_failures)
13329 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13330 self._CleanupExports(feedback_fn)
13332 return fin_resu, dresults
13335 class LUBackupRemove(NoHooksLU):
13336 """Remove exports related to the named instance.
13341 def ExpandNames(self):
13342 self.needed_locks = {}
13343 # We need all nodes to be locked in order for RemoveExport to work, but we
13344 # don't need to lock the instance itself, as nothing will happen to it (and
13345 # we can remove exports also for a removed instance)
13346 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13348 def Exec(self, feedback_fn):
13349 """Remove any export.
13352 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13353 # If the instance was not found we'll try with the name that was passed in.
13354 # This will only work if it was an FQDN, though.
13356 if not instance_name:
13358 instance_name = self.op.instance_name
13360 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13361 exportlist = self.rpc.call_export_list(locked_nodes)
13363 for node in exportlist:
13364 msg = exportlist[node].fail_msg
13366 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13368 if instance_name in exportlist[node].payload:
13370 result = self.rpc.call_export_remove(node, instance_name)
13371 msg = result.fail_msg
13373 logging.error("Could not remove export for instance %s"
13374 " on node %s: %s", instance_name, node, msg)
13376 if fqdn_warn and not found:
13377 feedback_fn("Export not found. If trying to remove an export belonging"
13378 " to a deleted instance please use its Fully Qualified"
13382 class LUGroupAdd(LogicalUnit):
13383 """Logical unit for creating node groups.
13386 HPATH = "group-add"
13387 HTYPE = constants.HTYPE_GROUP
13390 def ExpandNames(self):
13391 # We need the new group's UUID here so that we can create and acquire the
13392 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13393 # that it should not check whether the UUID exists in the configuration.
13394 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13395 self.needed_locks = {}
13396 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13398 def CheckPrereq(self):
13399 """Check prerequisites.
13401 This checks that the given group name is not an existing node group
13406 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13407 except errors.OpPrereqError:
13410 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13411 " node group (UUID: %s)" %
13412 (self.op.group_name, existing_uuid),
13413 errors.ECODE_EXISTS)
13415 if self.op.ndparams:
13416 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13418 if self.op.hv_state:
13419 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13421 self.new_hv_state = None
13423 if self.op.disk_state:
13424 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13426 self.new_disk_state = None
13428 if self.op.diskparams:
13429 for templ in constants.DISK_TEMPLATES:
13430 if templ not in self.op.diskparams:
13431 self.op.diskparams[templ] = {}
13432 utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
13434 self.op.diskparams = self.cfg.GetClusterInfo().diskparams
13436 if self.op.ipolicy:
13437 cluster = self.cfg.GetClusterInfo()
13438 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13440 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
13441 except errors.ConfigurationError, err:
13442 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13443 errors.ECODE_INVAL)
13445 def BuildHooksEnv(self):
13446 """Build hooks env.
13450 "GROUP_NAME": self.op.group_name,
13453 def BuildHooksNodes(self):
13454 """Build hooks nodes.
13457 mn = self.cfg.GetMasterNode()
13458 return ([mn], [mn])
13460 def Exec(self, feedback_fn):
13461 """Add the node group to the cluster.
13464 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13465 uuid=self.group_uuid,
13466 alloc_policy=self.op.alloc_policy,
13467 ndparams=self.op.ndparams,
13468 diskparams=self.op.diskparams,
13469 ipolicy=self.op.ipolicy,
13470 hv_state_static=self.new_hv_state,
13471 disk_state_static=self.new_disk_state)
13473 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13474 del self.remove_locks[locking.LEVEL_NODEGROUP]
13477 class LUGroupAssignNodes(NoHooksLU):
13478 """Logical unit for assigning nodes to groups.
13483 def ExpandNames(self):
13484 # These raise errors.OpPrereqError on their own:
13485 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13486 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13488 # We want to lock all the affected nodes and groups. We have readily
13489 # available the list of nodes, and the *destination* group. To gather the
13490 # list of "source" groups, we need to fetch node information later on.
13491 self.needed_locks = {
13492 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13493 locking.LEVEL_NODE: self.op.nodes,
13496 def DeclareLocks(self, level):
13497 if level == locking.LEVEL_NODEGROUP:
13498 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13500 # Try to get all affected nodes' groups without having the group or node
13501 # lock yet. Needs verification later in the code flow.
13502 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13504 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13506 def CheckPrereq(self):
13507 """Check prerequisites.
13510 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13511 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13512 frozenset(self.op.nodes))
13514 expected_locks = (set([self.group_uuid]) |
13515 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13516 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13517 if actual_locks != expected_locks:
13518 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13519 " current groups are '%s', used to be '%s'" %
13520 (utils.CommaJoin(expected_locks),
13521 utils.CommaJoin(actual_locks)))
13523 self.node_data = self.cfg.GetAllNodesInfo()
13524 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13525 instance_data = self.cfg.GetAllInstancesInfo()
13527 if self.group is None:
13528 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13529 (self.op.group_name, self.group_uuid))
13531 (new_splits, previous_splits) = \
13532 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13533 for node in self.op.nodes],
13534 self.node_data, instance_data)
13537 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13539 if not self.op.force:
13540 raise errors.OpExecError("The following instances get split by this"
13541 " change and --force was not given: %s" %
13544 self.LogWarning("This operation will split the following instances: %s",
13547 if previous_splits:
13548 self.LogWarning("In addition, these already-split instances continue"
13549 " to be split across groups: %s",
13550 utils.CommaJoin(utils.NiceSort(previous_splits)))
13552 def Exec(self, feedback_fn):
13553 """Assign nodes to a new group.
13556 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13558 self.cfg.AssignGroupNodes(mods)
13561 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13562 """Check for split instances after a node assignment.
13564 This method considers a series of node assignments as an atomic operation,
13565 and returns information about split instances after applying the set of
13568 In particular, it returns information about newly split instances, and
13569 instances that were already split, and remain so after the change.
13571 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13574 @type changes: list of (node_name, new_group_uuid) pairs.
13575 @param changes: list of node assignments to consider.
13576 @param node_data: a dict with data for all nodes
13577 @param instance_data: a dict with all instances to consider
13578 @rtype: a two-tuple
13579 @return: a list of instances that were previously okay and result split as a
13580 consequence of this change, and a list of instances that were previously
13581 split and this change does not fix.
13584 changed_nodes = dict((node, group) for node, group in changes
13585 if node_data[node].group != group)
13587 all_split_instances = set()
13588 previously_split_instances = set()
13590 def InstanceNodes(instance):
13591 return [instance.primary_node] + list(instance.secondary_nodes)
13593 for inst in instance_data.values():
13594 if inst.disk_template not in constants.DTS_INT_MIRROR:
13597 instance_nodes = InstanceNodes(inst)
13599 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13600 previously_split_instances.add(inst.name)
13602 if len(set(changed_nodes.get(node, node_data[node].group)
13603 for node in instance_nodes)) > 1:
13604 all_split_instances.add(inst.name)
13606 return (list(all_split_instances - previously_split_instances),
13607 list(previously_split_instances & all_split_instances))
13610 class _GroupQuery(_QueryBase):
13611 FIELDS = query.GROUP_FIELDS
13613 def ExpandNames(self, lu):
13614 lu.needed_locks = {}
13616 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13617 self._cluster = lu.cfg.GetClusterInfo()
13618 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13621 self.wanted = [name_to_uuid[name]
13622 for name in utils.NiceSort(name_to_uuid.keys())]
13624 # Accept names to be either names or UUIDs.
13627 all_uuid = frozenset(self._all_groups.keys())
13629 for name in self.names:
13630 if name in all_uuid:
13631 self.wanted.append(name)
13632 elif name in name_to_uuid:
13633 self.wanted.append(name_to_uuid[name])
13635 missing.append(name)
13638 raise errors.OpPrereqError("Some groups do not exist: %s" %
13639 utils.CommaJoin(missing),
13640 errors.ECODE_NOENT)
13642 def DeclareLocks(self, lu, level):
13645 def _GetQueryData(self, lu):
13646 """Computes the list of node groups and their attributes.
13649 do_nodes = query.GQ_NODE in self.requested_data
13650 do_instances = query.GQ_INST in self.requested_data
13652 group_to_nodes = None
13653 group_to_instances = None
13655 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13656 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13657 # latter GetAllInstancesInfo() is not enough, for we have to go through
13658 # instance->node. Hence, we will need to process nodes even if we only need
13659 # instance information.
13660 if do_nodes or do_instances:
13661 all_nodes = lu.cfg.GetAllNodesInfo()
13662 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13665 for node in all_nodes.values():
13666 if node.group in group_to_nodes:
13667 group_to_nodes[node.group].append(node.name)
13668 node_to_group[node.name] = node.group
13671 all_instances = lu.cfg.GetAllInstancesInfo()
13672 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13674 for instance in all_instances.values():
13675 node = instance.primary_node
13676 if node in node_to_group:
13677 group_to_instances[node_to_group[node]].append(instance.name)
13680 # Do not pass on node information if it was not requested.
13681 group_to_nodes = None
13683 return query.GroupQueryData(self._cluster,
13684 [self._all_groups[uuid]
13685 for uuid in self.wanted],
13686 group_to_nodes, group_to_instances)
13689 class LUGroupQuery(NoHooksLU):
13690 """Logical unit for querying node groups.
13695 def CheckArguments(self):
13696 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13697 self.op.output_fields, False)
13699 def ExpandNames(self):
13700 self.gq.ExpandNames(self)
13702 def DeclareLocks(self, level):
13703 self.gq.DeclareLocks(self, level)
13705 def Exec(self, feedback_fn):
13706 return self.gq.OldStyleQuery(self)
13709 class LUGroupSetParams(LogicalUnit):
13710 """Modifies the parameters of a node group.
13713 HPATH = "group-modify"
13714 HTYPE = constants.HTYPE_GROUP
13717 def CheckArguments(self):
13720 self.op.diskparams,
13721 self.op.alloc_policy,
13723 self.op.disk_state,
13727 if all_changes.count(None) == len(all_changes):
13728 raise errors.OpPrereqError("Please pass at least one modification",
13729 errors.ECODE_INVAL)
13731 def ExpandNames(self):
13732 # This raises errors.OpPrereqError on its own:
13733 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13735 self.needed_locks = {
13736 locking.LEVEL_INSTANCE: [],
13737 locking.LEVEL_NODEGROUP: [self.group_uuid],
13740 self.share_locks[locking.LEVEL_INSTANCE] = 1
13742 def DeclareLocks(self, level):
13743 if level == locking.LEVEL_INSTANCE:
13744 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13746 # Lock instances optimistically, needs verification once group lock has
13748 self.needed_locks[locking.LEVEL_INSTANCE] = \
13749 self.cfg.GetNodeGroupInstances(self.group_uuid)
13751 def CheckPrereq(self):
13752 """Check prerequisites.
13755 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13757 # Check if locked instances are still correct
13758 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13760 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13761 cluster = self.cfg.GetClusterInfo()
13763 if self.group is None:
13764 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13765 (self.op.group_name, self.group_uuid))
13767 if self.op.ndparams:
13768 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13769 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13770 self.new_ndparams = new_ndparams
13772 if self.op.diskparams:
13773 self.new_diskparams = dict()
13774 for templ in constants.DISK_TEMPLATES:
13775 if templ not in self.op.diskparams:
13776 self.op.diskparams[templ] = {}
13777 new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13778 self.op.diskparams[templ])
13779 utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13780 self.new_diskparams[templ] = new_templ_params
13782 if self.op.hv_state:
13783 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13784 self.group.hv_state_static)
13786 if self.op.disk_state:
13787 self.new_disk_state = \
13788 _MergeAndVerifyDiskState(self.op.disk_state,
13789 self.group.disk_state_static)
13791 if self.op.ipolicy:
13792 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
13796 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
13797 inst_filter = lambda inst: inst.name in owned_instances
13798 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
13800 _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
13802 new_ipolicy, instances)
13805 self.LogWarning("After the ipolicy change the following instances"
13806 " violate them: %s",
13807 utils.CommaJoin(violations))
13809 def BuildHooksEnv(self):
13810 """Build hooks env.
13814 "GROUP_NAME": self.op.group_name,
13815 "NEW_ALLOC_POLICY": self.op.alloc_policy,
13818 def BuildHooksNodes(self):
13819 """Build hooks nodes.
13822 mn = self.cfg.GetMasterNode()
13823 return ([mn], [mn])
13825 def Exec(self, feedback_fn):
13826 """Modifies the node group.
13831 if self.op.ndparams:
13832 self.group.ndparams = self.new_ndparams
13833 result.append(("ndparams", str(self.group.ndparams)))
13835 if self.op.diskparams:
13836 self.group.diskparams = self.new_diskparams
13837 result.append(("diskparams", str(self.group.diskparams)))
13839 if self.op.alloc_policy:
13840 self.group.alloc_policy = self.op.alloc_policy
13842 if self.op.hv_state:
13843 self.group.hv_state_static = self.new_hv_state
13845 if self.op.disk_state:
13846 self.group.disk_state_static = self.new_disk_state
13848 if self.op.ipolicy:
13849 self.group.ipolicy = self.new_ipolicy
13851 self.cfg.Update(self.group, feedback_fn)
13855 class LUGroupRemove(LogicalUnit):
13856 HPATH = "group-remove"
13857 HTYPE = constants.HTYPE_GROUP
13860 def ExpandNames(self):
13861 # This will raises errors.OpPrereqError on its own:
13862 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13863 self.needed_locks = {
13864 locking.LEVEL_NODEGROUP: [self.group_uuid],
13867 def CheckPrereq(self):
13868 """Check prerequisites.
13870 This checks that the given group name exists as a node group, that is
13871 empty (i.e., contains no nodes), and that is not the last group of the
13875 # Verify that the group is empty.
13876 group_nodes = [node.name
13877 for node in self.cfg.GetAllNodesInfo().values()
13878 if node.group == self.group_uuid]
13881 raise errors.OpPrereqError("Group '%s' not empty, has the following"
13883 (self.op.group_name,
13884 utils.CommaJoin(utils.NiceSort(group_nodes))),
13885 errors.ECODE_STATE)
13887 # Verify the cluster would not be left group-less.
13888 if len(self.cfg.GetNodeGroupList()) == 1:
13889 raise errors.OpPrereqError("Group '%s' is the only group,"
13890 " cannot be removed" %
13891 self.op.group_name,
13892 errors.ECODE_STATE)
13894 def BuildHooksEnv(self):
13895 """Build hooks env.
13899 "GROUP_NAME": self.op.group_name,
13902 def BuildHooksNodes(self):
13903 """Build hooks nodes.
13906 mn = self.cfg.GetMasterNode()
13907 return ([mn], [mn])
13909 def Exec(self, feedback_fn):
13910 """Remove the node group.
13914 self.cfg.RemoveNodeGroup(self.group_uuid)
13915 except errors.ConfigurationError:
13916 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13917 (self.op.group_name, self.group_uuid))
13919 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13922 class LUGroupRename(LogicalUnit):
13923 HPATH = "group-rename"
13924 HTYPE = constants.HTYPE_GROUP
13927 def ExpandNames(self):
13928 # This raises errors.OpPrereqError on its own:
13929 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13931 self.needed_locks = {
13932 locking.LEVEL_NODEGROUP: [self.group_uuid],
13935 def CheckPrereq(self):
13936 """Check prerequisites.
13938 Ensures requested new name is not yet used.
13942 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13943 except errors.OpPrereqError:
13946 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13947 " node group (UUID: %s)" %
13948 (self.op.new_name, new_name_uuid),
13949 errors.ECODE_EXISTS)
13951 def BuildHooksEnv(self):
13952 """Build hooks env.
13956 "OLD_NAME": self.op.group_name,
13957 "NEW_NAME": self.op.new_name,
13960 def BuildHooksNodes(self):
13961 """Build hooks nodes.
13964 mn = self.cfg.GetMasterNode()
13966 all_nodes = self.cfg.GetAllNodesInfo()
13967 all_nodes.pop(mn, None)
13970 run_nodes.extend(node.name for node in all_nodes.values()
13971 if node.group == self.group_uuid)
13973 return (run_nodes, run_nodes)
13975 def Exec(self, feedback_fn):
13976 """Rename the node group.
13979 group = self.cfg.GetNodeGroup(self.group_uuid)
13982 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13983 (self.op.group_name, self.group_uuid))
13985 group.name = self.op.new_name
13986 self.cfg.Update(group, feedback_fn)
13988 return self.op.new_name
13991 class LUGroupEvacuate(LogicalUnit):
13992 HPATH = "group-evacuate"
13993 HTYPE = constants.HTYPE_GROUP
13996 def ExpandNames(self):
13997 # This raises errors.OpPrereqError on its own:
13998 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14000 if self.op.target_groups:
14001 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14002 self.op.target_groups)
14004 self.req_target_uuids = []
14006 if self.group_uuid in self.req_target_uuids:
14007 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14008 " as a target group (targets are %s)" %
14010 utils.CommaJoin(self.req_target_uuids)),
14011 errors.ECODE_INVAL)
14013 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14015 self.share_locks = _ShareAll()
14016 self.needed_locks = {
14017 locking.LEVEL_INSTANCE: [],
14018 locking.LEVEL_NODEGROUP: [],
14019 locking.LEVEL_NODE: [],
14022 def DeclareLocks(self, level):
14023 if level == locking.LEVEL_INSTANCE:
14024 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14026 # Lock instances optimistically, needs verification once node and group
14027 # locks have been acquired
14028 self.needed_locks[locking.LEVEL_INSTANCE] = \
14029 self.cfg.GetNodeGroupInstances(self.group_uuid)
14031 elif level == locking.LEVEL_NODEGROUP:
14032 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14034 if self.req_target_uuids:
14035 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14037 # Lock all groups used by instances optimistically; this requires going
14038 # via the node before it's locked, requiring verification later on
14039 lock_groups.update(group_uuid
14040 for instance_name in
14041 self.owned_locks(locking.LEVEL_INSTANCE)
14043 self.cfg.GetInstanceNodeGroups(instance_name))
14045 # No target groups, need to lock all of them
14046 lock_groups = locking.ALL_SET
14048 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14050 elif level == locking.LEVEL_NODE:
14051 # This will only lock the nodes in the group to be evacuated which
14052 # contain actual instances
14053 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14054 self._LockInstancesNodes()
14056 # Lock all nodes in group to be evacuated and target groups
14057 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14058 assert self.group_uuid in owned_groups
14059 member_nodes = [node_name
14060 for group in owned_groups
14061 for node_name in self.cfg.GetNodeGroup(group).members]
14062 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14064 def CheckPrereq(self):
14065 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14066 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14067 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14069 assert owned_groups.issuperset(self.req_target_uuids)
14070 assert self.group_uuid in owned_groups
14072 # Check if locked instances are still correct
14073 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14075 # Get instance information
14076 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14078 # Check if node groups for locked instances are still correct
14079 for instance_name in owned_instances:
14080 inst = self.instances[instance_name]
14081 assert owned_nodes.issuperset(inst.all_nodes), \
14082 "Instance %s's nodes changed while we kept the lock" % instance_name
14084 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
14087 assert self.group_uuid in inst_groups, \
14088 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
14090 if self.req_target_uuids:
14091 # User requested specific target groups
14092 self.target_uuids = self.req_target_uuids
14094 # All groups except the one to be evacuated are potential targets
14095 self.target_uuids = [group_uuid for group_uuid in owned_groups
14096 if group_uuid != self.group_uuid]
14098 if not self.target_uuids:
14099 raise errors.OpPrereqError("There are no possible target groups",
14100 errors.ECODE_INVAL)
14102 def BuildHooksEnv(self):
14103 """Build hooks env.
14107 "GROUP_NAME": self.op.group_name,
14108 "TARGET_GROUPS": " ".join(self.target_uuids),
14111 def BuildHooksNodes(self):
14112 """Build hooks nodes.
14115 mn = self.cfg.GetMasterNode()
14117 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14119 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14121 return (run_nodes, run_nodes)
14123 def Exec(self, feedback_fn):
14124 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14126 assert self.group_uuid not in self.target_uuids
14128 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14129 instances=instances, target_groups=self.target_uuids)
14131 ial.Run(self.op.iallocator)
14133 if not ial.success:
14134 raise errors.OpPrereqError("Can't compute group evacuation using"
14135 " iallocator '%s': %s" %
14136 (self.op.iallocator, ial.info),
14137 errors.ECODE_NORES)
14139 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14141 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14142 len(jobs), self.op.group_name)
14144 return ResultWithJobs(jobs)
14147 class TagsLU(NoHooksLU): # pylint: disable=W0223
14148 """Generic tags LU.
14150 This is an abstract class which is the parent of all the other tags LUs.
14153 def ExpandNames(self):
14154 self.group_uuid = None
14155 self.needed_locks = {}
14156 if self.op.kind == constants.TAG_NODE:
14157 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14158 self.needed_locks[locking.LEVEL_NODE] = self.op.name
14159 elif self.op.kind == constants.TAG_INSTANCE:
14160 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14161 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
14162 elif self.op.kind == constants.TAG_NODEGROUP:
14163 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14165 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14166 # not possible to acquire the BGL based on opcode parameters)
14168 def CheckPrereq(self):
14169 """Check prerequisites.
14172 if self.op.kind == constants.TAG_CLUSTER:
14173 self.target = self.cfg.GetClusterInfo()
14174 elif self.op.kind == constants.TAG_NODE:
14175 self.target = self.cfg.GetNodeInfo(self.op.name)
14176 elif self.op.kind == constants.TAG_INSTANCE:
14177 self.target = self.cfg.GetInstanceInfo(self.op.name)
14178 elif self.op.kind == constants.TAG_NODEGROUP:
14179 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14181 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14182 str(self.op.kind), errors.ECODE_INVAL)
14185 class LUTagsGet(TagsLU):
14186 """Returns the tags of a given object.
14191 def ExpandNames(self):
14192 TagsLU.ExpandNames(self)
14194 # Share locks as this is only a read operation
14195 self.share_locks = _ShareAll()
14197 def Exec(self, feedback_fn):
14198 """Returns the tag list.
14201 return list(self.target.GetTags())
14204 class LUTagsSearch(NoHooksLU):
14205 """Searches the tags for a given pattern.
14210 def ExpandNames(self):
14211 self.needed_locks = {}
14213 def CheckPrereq(self):
14214 """Check prerequisites.
14216 This checks the pattern passed for validity by compiling it.
14220 self.re = re.compile(self.op.pattern)
14221 except re.error, err:
14222 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14223 (self.op.pattern, err), errors.ECODE_INVAL)
14225 def Exec(self, feedback_fn):
14226 """Returns the tag list.
14230 tgts = [("/cluster", cfg.GetClusterInfo())]
14231 ilist = cfg.GetAllInstancesInfo().values()
14232 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14233 nlist = cfg.GetAllNodesInfo().values()
14234 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14235 tgts.extend(("/nodegroup/%s" % n.name, n)
14236 for n in cfg.GetAllNodeGroupsInfo().values())
14238 for path, target in tgts:
14239 for tag in target.GetTags():
14240 if self.re.search(tag):
14241 results.append((path, tag))
14245 class LUTagsSet(TagsLU):
14246 """Sets a tag on a given object.
14251 def CheckPrereq(self):
14252 """Check prerequisites.
14254 This checks the type and length of the tag name and value.
14257 TagsLU.CheckPrereq(self)
14258 for tag in self.op.tags:
14259 objects.TaggableObject.ValidateTag(tag)
14261 def Exec(self, feedback_fn):
14266 for tag in self.op.tags:
14267 self.target.AddTag(tag)
14268 except errors.TagError, err:
14269 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14270 self.cfg.Update(self.target, feedback_fn)
14273 class LUTagsDel(TagsLU):
14274 """Delete a list of tags from a given object.
14279 def CheckPrereq(self):
14280 """Check prerequisites.
14282 This checks that we have the given tag.
14285 TagsLU.CheckPrereq(self)
14286 for tag in self.op.tags:
14287 objects.TaggableObject.ValidateTag(tag)
14288 del_tags = frozenset(self.op.tags)
14289 cur_tags = self.target.GetTags()
14291 diff_tags = del_tags - cur_tags
14293 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14294 raise errors.OpPrereqError("Tag(s) %s not found" %
14295 (utils.CommaJoin(diff_names), ),
14296 errors.ECODE_NOENT)
14298 def Exec(self, feedback_fn):
14299 """Remove the tag from the object.
14302 for tag in self.op.tags:
14303 self.target.RemoveTag(tag)
14304 self.cfg.Update(self.target, feedback_fn)
14307 class LUTestDelay(NoHooksLU):
14308 """Sleep for a specified amount of time.
14310 This LU sleeps on the master and/or nodes for a specified amount of
14316 def ExpandNames(self):
14317 """Expand names and set required locks.
14319 This expands the node list, if any.
14322 self.needed_locks = {}
14323 if self.op.on_nodes:
14324 # _GetWantedNodes can be used here, but is not always appropriate to use
14325 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14326 # more information.
14327 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14328 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14330 def _TestDelay(self):
14331 """Do the actual sleep.
14334 if self.op.on_master:
14335 if not utils.TestDelay(self.op.duration):
14336 raise errors.OpExecError("Error during master delay test")
14337 if self.op.on_nodes:
14338 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14339 for node, node_result in result.items():
14340 node_result.Raise("Failure during rpc call to node %s" % node)
14342 def Exec(self, feedback_fn):
14343 """Execute the test delay opcode, with the wanted repetitions.
14346 if self.op.repeat == 0:
14349 top_value = self.op.repeat - 1
14350 for i in range(self.op.repeat):
14351 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14355 class LUTestJqueue(NoHooksLU):
14356 """Utility LU to test some aspects of the job queue.
14361 # Must be lower than default timeout for WaitForJobChange to see whether it
14362 # notices changed jobs
14363 _CLIENT_CONNECT_TIMEOUT = 20.0
14364 _CLIENT_CONFIRM_TIMEOUT = 60.0
14367 def _NotifyUsingSocket(cls, cb, errcls):
14368 """Opens a Unix socket and waits for another program to connect.
14371 @param cb: Callback to send socket name to client
14372 @type errcls: class
14373 @param errcls: Exception class to use for errors
14376 # Using a temporary directory as there's no easy way to create temporary
14377 # sockets without writing a custom loop around tempfile.mktemp and
14379 tmpdir = tempfile.mkdtemp()
14381 tmpsock = utils.PathJoin(tmpdir, "sock")
14383 logging.debug("Creating temporary socket at %s", tmpsock)
14384 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14389 # Send details to client
14392 # Wait for client to connect before continuing
14393 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14395 (conn, _) = sock.accept()
14396 except socket.error, err:
14397 raise errcls("Client didn't connect in time (%s)" % err)
14401 # Remove as soon as client is connected
14402 shutil.rmtree(tmpdir)
14404 # Wait for client to close
14407 # pylint: disable=E1101
14408 # Instance of '_socketobject' has no ... member
14409 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14411 except socket.error, err:
14412 raise errcls("Client failed to confirm notification (%s)" % err)
14416 def _SendNotification(self, test, arg, sockname):
14417 """Sends a notification to the client.
14420 @param test: Test name
14421 @param arg: Test argument (depends on test)
14422 @type sockname: string
14423 @param sockname: Socket path
14426 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14428 def _Notify(self, prereq, test, arg):
14429 """Notifies the client of a test.
14432 @param prereq: Whether this is a prereq-phase test
14434 @param test: Test name
14435 @param arg: Test argument (depends on test)
14439 errcls = errors.OpPrereqError
14441 errcls = errors.OpExecError
14443 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14447 def CheckArguments(self):
14448 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14449 self.expandnames_calls = 0
14451 def ExpandNames(self):
14452 checkargs_calls = getattr(self, "checkargs_calls", 0)
14453 if checkargs_calls < 1:
14454 raise errors.ProgrammerError("CheckArguments was not called")
14456 self.expandnames_calls += 1
14458 if self.op.notify_waitlock:
14459 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14461 self.LogInfo("Expanding names")
14463 # Get lock on master node (just to get a lock, not for a particular reason)
14464 self.needed_locks = {
14465 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14468 def Exec(self, feedback_fn):
14469 if self.expandnames_calls < 1:
14470 raise errors.ProgrammerError("ExpandNames was not called")
14472 if self.op.notify_exec:
14473 self._Notify(False, constants.JQT_EXEC, None)
14475 self.LogInfo("Executing")
14477 if self.op.log_messages:
14478 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14479 for idx, msg in enumerate(self.op.log_messages):
14480 self.LogInfo("Sending log message %s", idx + 1)
14481 feedback_fn(constants.JQT_MSGPREFIX + msg)
14482 # Report how many test messages have been sent
14483 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14486 raise errors.OpExecError("Opcode failure was requested")
14491 class IAllocator(object):
14492 """IAllocator framework.
14494 An IAllocator instance has three sets of attributes:
14495 - cfg that is needed to query the cluster
14496 - input data (all members of the _KEYS class attribute are required)
14497 - four buffer attributes (in|out_data|text), that represent the
14498 input (to the external script) in text and data structure format,
14499 and the output from it, again in two formats
14500 - the result variables from the script (success, info, nodes) for
14504 # pylint: disable=R0902
14505 # lots of instance attributes
14507 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14509 self.rpc = rpc_runner
14510 # init buffer variables
14511 self.in_text = self.out_text = self.in_data = self.out_data = None
14512 # init all input fields so that pylint is happy
14514 self.memory = self.disks = self.disk_template = self.spindle_usage = None
14515 self.os = self.tags = self.nics = self.vcpus = None
14516 self.hypervisor = None
14517 self.relocate_from = None
14519 self.instances = None
14520 self.evac_mode = None
14521 self.target_groups = []
14523 self.required_nodes = None
14524 # init result fields
14525 self.success = self.info = self.result = None
14528 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14530 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14531 " IAllocator" % self.mode)
14533 keyset = [n for (n, _) in keydata]
14536 if key not in keyset:
14537 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14538 " IAllocator" % key)
14539 setattr(self, key, kwargs[key])
14542 if key not in kwargs:
14543 raise errors.ProgrammerError("Missing input parameter '%s' to"
14544 " IAllocator" % key)
14545 self._BuildInputData(compat.partial(fn, self), keydata)
14547 def _ComputeClusterData(self):
14548 """Compute the generic allocator input data.
14550 This is the data that is independent of the actual operation.
14554 cluster_info = cfg.GetClusterInfo()
14557 "version": constants.IALLOCATOR_VERSION,
14558 "cluster_name": cfg.GetClusterName(),
14559 "cluster_tags": list(cluster_info.GetTags()),
14560 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14561 "ipolicy": cluster_info.ipolicy,
14563 ninfo = cfg.GetAllNodesInfo()
14564 iinfo = cfg.GetAllInstancesInfo().values()
14565 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14568 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14570 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14571 hypervisor_name = self.hypervisor
14572 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14573 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14575 hypervisor_name = cluster_info.primary_hypervisor
14577 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14580 self.rpc.call_all_instances_info(node_list,
14581 cluster_info.enabled_hypervisors)
14583 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14585 config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14586 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14587 i_list, config_ndata)
14588 assert len(data["nodes"]) == len(ninfo), \
14589 "Incomplete node data computed"
14591 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14593 self.in_data = data
14596 def _ComputeNodeGroupData(cfg):
14597 """Compute node groups data.
14600 cluster = cfg.GetClusterInfo()
14601 ng = dict((guuid, {
14602 "name": gdata.name,
14603 "alloc_policy": gdata.alloc_policy,
14604 "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14606 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14611 def _ComputeBasicNodeData(cfg, node_cfg):
14612 """Compute global node data.
14615 @returns: a dict of name: (node dict, node config)
14618 # fill in static (config-based) values
14619 node_results = dict((ninfo.name, {
14620 "tags": list(ninfo.GetTags()),
14621 "primary_ip": ninfo.primary_ip,
14622 "secondary_ip": ninfo.secondary_ip,
14623 "offline": ninfo.offline,
14624 "drained": ninfo.drained,
14625 "master_candidate": ninfo.master_candidate,
14626 "group": ninfo.group,
14627 "master_capable": ninfo.master_capable,
14628 "vm_capable": ninfo.vm_capable,
14629 "ndparams": cfg.GetNdParams(ninfo),
14631 for ninfo in node_cfg.values())
14633 return node_results
14636 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14638 """Compute global node data.
14640 @param node_results: the basic node structures as filled from the config
14643 #TODO(dynmem): compute the right data on MAX and MIN memory
14644 # make a copy of the current dict
14645 node_results = dict(node_results)
14646 for nname, nresult in node_data.items():
14647 assert nname in node_results, "Missing basic data for node %s" % nname
14648 ninfo = node_cfg[nname]
14650 if not (ninfo.offline or ninfo.drained):
14651 nresult.Raise("Can't get data for node %s" % nname)
14652 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14654 remote_info = _MakeLegacyNodeInfo(nresult.payload)
14656 for attr in ["memory_total", "memory_free", "memory_dom0",
14657 "vg_size", "vg_free", "cpu_total"]:
14658 if attr not in remote_info:
14659 raise errors.OpExecError("Node '%s' didn't return attribute"
14660 " '%s'" % (nname, attr))
14661 if not isinstance(remote_info[attr], int):
14662 raise errors.OpExecError("Node '%s' returned invalid value"
14664 (nname, attr, remote_info[attr]))
14665 # compute memory used by primary instances
14666 i_p_mem = i_p_up_mem = 0
14667 for iinfo, beinfo in i_list:
14668 if iinfo.primary_node == nname:
14669 i_p_mem += beinfo[constants.BE_MAXMEM]
14670 if iinfo.name not in node_iinfo[nname].payload:
14673 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14674 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14675 remote_info["memory_free"] -= max(0, i_mem_diff)
14677 if iinfo.admin_state == constants.ADMINST_UP:
14678 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14680 # compute memory used by instances
14682 "total_memory": remote_info["memory_total"],
14683 "reserved_memory": remote_info["memory_dom0"],
14684 "free_memory": remote_info["memory_free"],
14685 "total_disk": remote_info["vg_size"],
14686 "free_disk": remote_info["vg_free"],
14687 "total_cpus": remote_info["cpu_total"],
14688 "i_pri_memory": i_p_mem,
14689 "i_pri_up_memory": i_p_up_mem,
14691 pnr_dyn.update(node_results[nname])
14692 node_results[nname] = pnr_dyn
14694 return node_results
14697 def _ComputeInstanceData(cluster_info, i_list):
14698 """Compute global instance data.
14702 for iinfo, beinfo in i_list:
14704 for nic in iinfo.nics:
14705 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14709 "mode": filled_params[constants.NIC_MODE],
14710 "link": filled_params[constants.NIC_LINK],
14712 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14713 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14714 nic_data.append(nic_dict)
14716 "tags": list(iinfo.GetTags()),
14717 "admin_state": iinfo.admin_state,
14718 "vcpus": beinfo[constants.BE_VCPUS],
14719 "memory": beinfo[constants.BE_MAXMEM],
14720 "spindle_usage": beinfo[constants.BE_SPINDLE_USAGE],
14722 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14724 "disks": [{constants.IDISK_SIZE: dsk.size,
14725 constants.IDISK_MODE: dsk.mode}
14726 for dsk in iinfo.disks],
14727 "disk_template": iinfo.disk_template,
14728 "hypervisor": iinfo.hypervisor,
14730 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14732 instance_data[iinfo.name] = pir
14734 return instance_data
14736 def _AddNewInstance(self):
14737 """Add new instance data to allocator structure.
14739 This in combination with _AllocatorGetClusterData will create the
14740 correct structure needed as input for the allocator.
14742 The checks for the completeness of the opcode must have already been
14746 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14748 if self.disk_template in constants.DTS_INT_MIRROR:
14749 self.required_nodes = 2
14751 self.required_nodes = 1
14755 "disk_template": self.disk_template,
14758 "vcpus": self.vcpus,
14759 "memory": self.memory,
14760 "spindle_usage": self.spindle_usage,
14761 "disks": self.disks,
14762 "disk_space_total": disk_space,
14764 "required_nodes": self.required_nodes,
14765 "hypervisor": self.hypervisor,
14770 def _AddRelocateInstance(self):
14771 """Add relocate instance data to allocator structure.
14773 This in combination with _IAllocatorGetClusterData will create the
14774 correct structure needed as input for the allocator.
14776 The checks for the completeness of the opcode must have already been
14780 instance = self.cfg.GetInstanceInfo(self.name)
14781 if instance is None:
14782 raise errors.ProgrammerError("Unknown instance '%s' passed to"
14783 " IAllocator" % self.name)
14785 if instance.disk_template not in constants.DTS_MIRRORED:
14786 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14787 errors.ECODE_INVAL)
14789 if instance.disk_template in constants.DTS_INT_MIRROR and \
14790 len(instance.secondary_nodes) != 1:
14791 raise errors.OpPrereqError("Instance has not exactly one secondary node",
14792 errors.ECODE_STATE)
14794 self.required_nodes = 1
14795 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14796 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14800 "disk_space_total": disk_space,
14801 "required_nodes": self.required_nodes,
14802 "relocate_from": self.relocate_from,
14806 def _AddNodeEvacuate(self):
14807 """Get data for node-evacuate requests.
14811 "instances": self.instances,
14812 "evac_mode": self.evac_mode,
14815 def _AddChangeGroup(self):
14816 """Get data for node-evacuate requests.
14820 "instances": self.instances,
14821 "target_groups": self.target_groups,
14824 def _BuildInputData(self, fn, keydata):
14825 """Build input data structures.
14828 self._ComputeClusterData()
14831 request["type"] = self.mode
14832 for keyname, keytype in keydata:
14833 if keyname not in request:
14834 raise errors.ProgrammerError("Request parameter %s is missing" %
14836 val = request[keyname]
14837 if not keytype(val):
14838 raise errors.ProgrammerError("Request parameter %s doesn't pass"
14839 " validation, value %s, expected"
14840 " type %s" % (keyname, val, keytype))
14841 self.in_data["request"] = request
14843 self.in_text = serializer.Dump(self.in_data)
14845 _STRING_LIST = ht.TListOf(ht.TString)
14846 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14847 # pylint: disable=E1101
14848 # Class '...' has no 'OP_ID' member
14849 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14850 opcodes.OpInstanceMigrate.OP_ID,
14851 opcodes.OpInstanceReplaceDisks.OP_ID])
14855 ht.TListOf(ht.TAnd(ht.TIsLength(3),
14856 ht.TItems([ht.TNonEmptyString,
14857 ht.TNonEmptyString,
14858 ht.TListOf(ht.TNonEmptyString),
14861 ht.TListOf(ht.TAnd(ht.TIsLength(2),
14862 ht.TItems([ht.TNonEmptyString,
14865 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14866 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14869 constants.IALLOCATOR_MODE_ALLOC:
14872 ("name", ht.TString),
14873 ("memory", ht.TInt),
14874 ("spindle_usage", ht.TInt),
14875 ("disks", ht.TListOf(ht.TDict)),
14876 ("disk_template", ht.TString),
14877 ("os", ht.TString),
14878 ("tags", _STRING_LIST),
14879 ("nics", ht.TListOf(ht.TDict)),
14880 ("vcpus", ht.TInt),
14881 ("hypervisor", ht.TString),
14883 constants.IALLOCATOR_MODE_RELOC:
14884 (_AddRelocateInstance,
14885 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14887 constants.IALLOCATOR_MODE_NODE_EVAC:
14888 (_AddNodeEvacuate, [
14889 ("instances", _STRING_LIST),
14890 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14892 constants.IALLOCATOR_MODE_CHG_GROUP:
14893 (_AddChangeGroup, [
14894 ("instances", _STRING_LIST),
14895 ("target_groups", _STRING_LIST),
14899 def Run(self, name, validate=True, call_fn=None):
14900 """Run an instance allocator and return the results.
14903 if call_fn is None:
14904 call_fn = self.rpc.call_iallocator_runner
14906 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14907 result.Raise("Failure while running the iallocator script")
14909 self.out_text = result.payload
14911 self._ValidateResult()
14913 def _ValidateResult(self):
14914 """Process the allocator results.
14916 This will process and if successful save the result in
14917 self.out_data and the other parameters.
14921 rdict = serializer.Load(self.out_text)
14922 except Exception, err:
14923 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14925 if not isinstance(rdict, dict):
14926 raise errors.OpExecError("Can't parse iallocator results: not a dict")
14928 # TODO: remove backwards compatiblity in later versions
14929 if "nodes" in rdict and "result" not in rdict:
14930 rdict["result"] = rdict["nodes"]
14933 for key in "success", "info", "result":
14934 if key not in rdict:
14935 raise errors.OpExecError("Can't parse iallocator results:"
14936 " missing key '%s'" % key)
14937 setattr(self, key, rdict[key])
14939 if not self._result_check(self.result):
14940 raise errors.OpExecError("Iallocator returned invalid result,"
14941 " expected %s, got %s" %
14942 (self._result_check, self.result),
14943 errors.ECODE_INVAL)
14945 if self.mode == constants.IALLOCATOR_MODE_RELOC:
14946 assert self.relocate_from is not None
14947 assert self.required_nodes == 1
14949 node2group = dict((name, ndata["group"])
14950 for (name, ndata) in self.in_data["nodes"].items())
14952 fn = compat.partial(self._NodesToGroups, node2group,
14953 self.in_data["nodegroups"])
14955 instance = self.cfg.GetInstanceInfo(self.name)
14956 request_groups = fn(self.relocate_from + [instance.primary_node])
14957 result_groups = fn(rdict["result"] + [instance.primary_node])
14959 if self.success and not set(result_groups).issubset(request_groups):
14960 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14961 " differ from original groups (%s)" %
14962 (utils.CommaJoin(result_groups),
14963 utils.CommaJoin(request_groups)))
14965 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14966 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14968 self.out_data = rdict
14971 def _NodesToGroups(node2group, groups, nodes):
14972 """Returns a list of unique group names for a list of nodes.
14974 @type node2group: dict
14975 @param node2group: Map from node name to group UUID
14977 @param groups: Group information
14979 @param nodes: Node names
14986 group_uuid = node2group[node]
14988 # Ignore unknown node
14992 group = groups[group_uuid]
14994 # Can't find group, let's use UUID
14995 group_name = group_uuid
14997 group_name = group["name"]
14999 result.add(group_name)
15001 return sorted(result)
15004 class LUTestAllocator(NoHooksLU):
15005 """Run allocator tests.
15007 This LU runs the allocator tests
15010 def CheckPrereq(self):
15011 """Check prerequisites.
15013 This checks the opcode parameters depending on the director and mode test.
15016 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15017 for attr in ["memory", "disks", "disk_template",
15018 "os", "tags", "nics", "vcpus"]:
15019 if not hasattr(self.op, attr):
15020 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15021 attr, errors.ECODE_INVAL)
15022 iname = self.cfg.ExpandInstanceName(self.op.name)
15023 if iname is not None:
15024 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15025 iname, errors.ECODE_EXISTS)
15026 if not isinstance(self.op.nics, list):
15027 raise errors.OpPrereqError("Invalid parameter 'nics'",
15028 errors.ECODE_INVAL)
15029 if not isinstance(self.op.disks, list):
15030 raise errors.OpPrereqError("Invalid parameter 'disks'",
15031 errors.ECODE_INVAL)
15032 for row in self.op.disks:
15033 if (not isinstance(row, dict) or
15034 constants.IDISK_SIZE not in row or
15035 not isinstance(row[constants.IDISK_SIZE], int) or
15036 constants.IDISK_MODE not in row or
15037 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15038 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15039 " parameter", errors.ECODE_INVAL)
15040 if self.op.hypervisor is None:
15041 self.op.hypervisor = self.cfg.GetHypervisorType()
15042 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15043 fname = _ExpandInstanceName(self.cfg, self.op.name)
15044 self.op.name = fname
15045 self.relocate_from = \
15046 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15047 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15048 constants.IALLOCATOR_MODE_NODE_EVAC):
15049 if not self.op.instances:
15050 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15051 self.op.instances = _GetWantedInstances(self, self.op.instances)
15053 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15054 self.op.mode, errors.ECODE_INVAL)
15056 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15057 if self.op.allocator is None:
15058 raise errors.OpPrereqError("Missing allocator name",
15059 errors.ECODE_INVAL)
15060 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15061 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15062 self.op.direction, errors.ECODE_INVAL)
15064 def Exec(self, feedback_fn):
15065 """Run the allocator test.
15068 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15069 ial = IAllocator(self.cfg, self.rpc,
15072 memory=self.op.memory,
15073 disks=self.op.disks,
15074 disk_template=self.op.disk_template,
15078 vcpus=self.op.vcpus,
15079 hypervisor=self.op.hypervisor,
15081 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15082 ial = IAllocator(self.cfg, self.rpc,
15085 relocate_from=list(self.relocate_from),
15087 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15088 ial = IAllocator(self.cfg, self.rpc,
15090 instances=self.op.instances,
15091 target_groups=self.op.target_groups)
15092 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15093 ial = IAllocator(self.cfg, self.rpc,
15095 instances=self.op.instances,
15096 evac_mode=self.op.evac_mode)
15098 raise errors.ProgrammerError("Uncatched mode %s in"
15099 " LUTestAllocator.Exec", self.op.mode)
15101 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15102 result = ial.in_text
15104 ial.Run(self.op.allocator, validate=False)
15105 result = ial.out_text
15109 #: Query type implementations
15111 constants.QR_INSTANCE: _InstanceQuery,
15112 constants.QR_NODE: _NodeQuery,
15113 constants.QR_GROUP: _GroupQuery,
15114 constants.QR_OS: _OsQuery,
15117 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15120 def _GetQueryImplementation(name):
15121 """Returns the implemtnation for a query type.
15123 @param name: Query type, must be one of L{constants.QR_VIA_OP}
15127 return _QUERY_IMPL[name]
15129 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15130 errors.ECODE_INVAL)