4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
62 from ganeti import rpc
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
141 # Dicts used to declare locking needs to mcpu
142 self.needed_locks = None
143 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.remove_locks = {}
146 # Used to force good behavior when calling helper functions
147 self.recalculate_locks = {}
149 self.Log = processor.Log # pylint: disable=C0103
150 self.LogWarning = processor.LogWarning # pylint: disable=C0103
151 self.LogInfo = processor.LogInfo # pylint: disable=C0103
152 self.LogStep = processor.LogStep # pylint: disable=C0103
153 # support for dry-run
154 self.dry_run_result = None
155 # support for generic debug attribute
156 if (not hasattr(self.op, "debug_level") or
157 not isinstance(self.op.debug_level, int)):
158 self.op.debug_level = 0
163 # Validate opcode parameters and set defaults
164 self.op.Validate(True)
166 self.CheckArguments()
168 def CheckArguments(self):
169 """Check syntactic validity for the opcode arguments.
171 This method is for doing a simple syntactic check and ensure
172 validity of opcode parameters, without any cluster-related
173 checks. While the same can be accomplished in ExpandNames and/or
174 CheckPrereq, doing these separate is better because:
176 - ExpandNames is left as as purely a lock-related function
177 - CheckPrereq is run after we have acquired locks (and possible
180 The function is allowed to change the self.op attribute so that
181 later methods can no longer worry about missing parameters.
186 def ExpandNames(self):
187 """Expand names for this LU.
189 This method is called before starting to execute the opcode, and it should
190 update all the parameters of the opcode to their canonical form (e.g. a
191 short node name must be fully expanded after this method has successfully
192 completed). This way locking, hooks, logging, etc. can work correctly.
194 LUs which implement this method must also populate the self.needed_locks
195 member, as a dict with lock levels as keys, and a list of needed lock names
198 - use an empty dict if you don't need any lock
199 - if you don't need any lock at a particular level omit that
200 level (note that in this case C{DeclareLocks} won't be called
201 at all for that level)
202 - if you need locks at a level, but you can't calculate it in
203 this function, initialise that level with an empty list and do
204 further processing in L{LogicalUnit.DeclareLocks} (see that
205 function's docstring)
206 - don't put anything for the BGL level
207 - if you want all locks at a level use L{locking.ALL_SET} as a value
209 If you need to share locks (rather than acquire them exclusively) at one
210 level you can modify self.share_locks, setting a true value (usually 1) for
211 that level. By default locks are not shared.
213 This function can also define a list of tasklets, which then will be
214 executed in order instead of the usual LU-level CheckPrereq and Exec
215 functions, if those are not defined by the LU.
219 # Acquire all nodes and one instance
220 self.needed_locks = {
221 locking.LEVEL_NODE: locking.ALL_SET,
222 locking.LEVEL_INSTANCE: ['instance1.example.com'],
224 # Acquire just two nodes
225 self.needed_locks = {
226 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
229 self.needed_locks = {} # No, you can't leave it to the default value None
232 # The implementation of this method is mandatory only if the new LU is
233 # concurrent, so that old LUs don't need to be changed all at the same
236 self.needed_locks = {} # Exclusive LUs don't need locks.
238 raise NotImplementedError
240 def DeclareLocks(self, level):
241 """Declare LU locking needs for a level
243 While most LUs can just declare their locking needs at ExpandNames time,
244 sometimes there's the need to calculate some locks after having acquired
245 the ones before. This function is called just before acquiring locks at a
246 particular level, but after acquiring the ones at lower levels, and permits
247 such calculations. It can be used to modify self.needed_locks, and by
248 default it does nothing.
250 This function is only called if you have something already set in
251 self.needed_locks for the level.
253 @param level: Locking level which is going to be locked
254 @type level: member of L{ganeti.locking.LEVELS}
258 def CheckPrereq(self):
259 """Check prerequisites for this LU.
261 This method should check that the prerequisites for the execution
262 of this LU are fulfilled. It can do internode communication, but
263 it should be idempotent - no cluster or system changes are
266 The method should raise errors.OpPrereqError in case something is
267 not fulfilled. Its return value is ignored.
269 This method should also update all the parameters of the opcode to
270 their canonical form if it hasn't been done by ExpandNames before.
273 if self.tasklets is not None:
274 for (idx, tl) in enumerate(self.tasklets):
275 logging.debug("Checking prerequisites for tasklet %s/%s",
276 idx + 1, len(self.tasklets))
281 def Exec(self, feedback_fn):
284 This method should implement the actual work. It should raise
285 errors.OpExecError for failures that are somewhat dealt with in
289 if self.tasklets is not None:
290 for (idx, tl) in enumerate(self.tasklets):
291 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
294 raise NotImplementedError
296 def BuildHooksEnv(self):
297 """Build hooks environment for this LU.
300 @return: Dictionary containing the environment that will be used for
301 running the hooks for this LU. The keys of the dict must not be prefixed
302 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
303 will extend the environment with additional variables. If no environment
304 should be defined, an empty dictionary should be returned (not C{None}).
305 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
309 raise NotImplementedError
311 def BuildHooksNodes(self):
312 """Build list of nodes to run LU's hooks.
314 @rtype: tuple; (list, list)
315 @return: Tuple containing a list of node names on which the hook
316 should run before the execution and a list of node names on which the
317 hook should run after the execution. No nodes should be returned as an
318 empty list (and not None).
319 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
323 raise NotImplementedError
325 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
326 """Notify the LU about the results of its hooks.
328 This method is called every time a hooks phase is executed, and notifies
329 the Logical Unit about the hooks' result. The LU can then use it to alter
330 its result based on the hooks. By default the method does nothing and the
331 previous result is passed back unchanged but any LU can define it if it
332 wants to use the local cluster hook-scripts somehow.
334 @param phase: one of L{constants.HOOKS_PHASE_POST} or
335 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
336 @param hook_results: the results of the multi-node hooks rpc call
337 @param feedback_fn: function used send feedback back to the caller
338 @param lu_result: the previous Exec result this LU had, or None
340 @return: the new Exec result, based on the previous result
344 # API must be kept, thus we ignore the unused argument and could
345 # be a function warnings
346 # pylint: disable=W0613,R0201
349 def _ExpandAndLockInstance(self):
350 """Helper function to expand and lock an instance.
352 Many LUs that work on an instance take its name in self.op.instance_name
353 and need to expand it and then declare the expanded name for locking. This
354 function does it, and then updates self.op.instance_name to the expanded
355 name. It also initializes needed_locks as a dict, if this hasn't been done
359 if self.needed_locks is None:
360 self.needed_locks = {}
362 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
363 "_ExpandAndLockInstance called with instance-level locks set"
364 self.op.instance_name = _ExpandInstanceName(self.cfg,
365 self.op.instance_name)
366 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
368 def _LockInstancesNodes(self, primary_only=False,
369 level=locking.LEVEL_NODE):
370 """Helper function to declare instances' nodes for locking.
372 This function should be called after locking one or more instances to lock
373 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
374 with all primary or secondary nodes for instances already locked and
375 present in self.needed_locks[locking.LEVEL_INSTANCE].
377 It should be called from DeclareLocks, and for safety only works if
378 self.recalculate_locks[locking.LEVEL_NODE] is set.
380 In the future it may grow parameters to just lock some instance's nodes, or
381 to just lock primaries or secondary nodes, if needed.
383 If should be called in DeclareLocks in a way similar to::
385 if level == locking.LEVEL_NODE:
386 self._LockInstancesNodes()
388 @type primary_only: boolean
389 @param primary_only: only lock primary nodes of locked instances
390 @param level: Which lock level to use for locking nodes
393 assert level in self.recalculate_locks, \
394 "_LockInstancesNodes helper function called with no nodes to recalculate"
396 # TODO: check if we're really been called with the instance locks held
398 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
399 # future we might want to have different behaviors depending on the value
400 # of self.recalculate_locks[locking.LEVEL_NODE]
402 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
403 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
404 wanted_nodes.append(instance.primary_node)
406 wanted_nodes.extend(instance.secondary_nodes)
408 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
409 self.needed_locks[level] = wanted_nodes
410 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
411 self.needed_locks[level].extend(wanted_nodes)
413 raise errors.ProgrammerError("Unknown recalculation mode")
415 del self.recalculate_locks[level]
418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
419 """Simple LU which runs no hooks.
421 This LU is intended as a parent for other LogicalUnits which will
422 run no hooks, in order to reduce duplicate code.
428 def BuildHooksEnv(self):
429 """Empty BuildHooksEnv for NoHooksLu.
431 This just raises an error.
434 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
436 def BuildHooksNodes(self):
437 """Empty BuildHooksNodes for NoHooksLU.
440 raise AssertionError("BuildHooksNodes called for NoHooksLU")
444 """Tasklet base class.
446 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
447 they can mix legacy code with tasklets. Locking needs to be done in the LU,
448 tasklets know nothing about locks.
450 Subclasses must follow these rules:
451 - Implement CheckPrereq
455 def __init__(self, lu):
462 def CheckPrereq(self):
463 """Check prerequisites for this tasklets.
465 This method should check whether the prerequisites for the execution of
466 this tasklet are fulfilled. It can do internode communication, but it
467 should be idempotent - no cluster or system changes are allowed.
469 The method should raise errors.OpPrereqError in case something is not
470 fulfilled. Its return value is ignored.
472 This method should also update all parameters to their canonical form if it
473 hasn't been done before.
478 def Exec(self, feedback_fn):
479 """Execute the tasklet.
481 This method should implement the actual work. It should raise
482 errors.OpExecError for failures that are somewhat dealt with in code, or
486 raise NotImplementedError
490 """Base for query utility classes.
493 #: Attribute holding field definitions
496 def __init__(self, qfilter, fields, use_locking):
497 """Initializes this class.
500 self.use_locking = use_locking
502 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
504 self.requested_data = self.query.RequestedData()
505 self.names = self.query.RequestedNames()
507 # Sort only if no names were requested
508 self.sort_by_name = not self.names
510 self.do_locking = None
513 def _GetNames(self, lu, all_names, lock_level):
514 """Helper function to determine names asked for in the query.
518 names = lu.owned_locks(lock_level)
522 if self.wanted == locking.ALL_SET:
523 assert not self.names
524 # caller didn't specify names, so ordering is not important
525 return utils.NiceSort(names)
527 # caller specified names and we must keep the same order
529 assert not self.do_locking or lu.glm.is_owned(lock_level)
531 missing = set(self.wanted).difference(names)
533 raise errors.OpExecError("Some items were removed before retrieving"
534 " their data: %s" % missing)
536 # Return expanded names
539 def ExpandNames(self, lu):
540 """Expand names for this query.
542 See L{LogicalUnit.ExpandNames}.
545 raise NotImplementedError()
547 def DeclareLocks(self, lu, level):
548 """Declare locks for this query.
550 See L{LogicalUnit.DeclareLocks}.
553 raise NotImplementedError()
555 def _GetQueryData(self, lu):
556 """Collects all data for this query.
558 @return: Query data object
561 raise NotImplementedError()
563 def NewStyleQuery(self, lu):
564 """Collect data and execute query.
567 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
568 sort_by_name=self.sort_by_name)
570 def OldStyleQuery(self, lu):
571 """Collect data and execute query.
574 return self.query.OldStyleQuery(self._GetQueryData(lu),
575 sort_by_name=self.sort_by_name)
579 """Returns a dict declaring all lock levels shared.
582 return dict.fromkeys(locking.LEVELS, 1)
585 def _MakeLegacyNodeInfo(data):
586 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
588 Converts the data into a single dictionary. This is fine for most use cases,
589 but some require information from more than one volume group or hypervisor.
592 (bootid, (vg_info, ), (hv_info, )) = data
594 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
599 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
600 """Checks if the owned node groups are still correct for an instance.
602 @type cfg: L{config.ConfigWriter}
603 @param cfg: The cluster configuration
604 @type instance_name: string
605 @param instance_name: Instance name
606 @type owned_groups: set or frozenset
607 @param owned_groups: List of currently owned node groups
610 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
612 if not owned_groups.issuperset(inst_groups):
613 raise errors.OpPrereqError("Instance %s's node groups changed since"
614 " locks were acquired, current groups are"
615 " are '%s', owning groups '%s'; retry the"
618 utils.CommaJoin(inst_groups),
619 utils.CommaJoin(owned_groups)),
625 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
626 """Checks if the instances in a node group are still correct.
628 @type cfg: L{config.ConfigWriter}
629 @param cfg: The cluster configuration
630 @type group_uuid: string
631 @param group_uuid: Node group UUID
632 @type owned_instances: set or frozenset
633 @param owned_instances: List of currently owned instances
636 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
637 if owned_instances != wanted_instances:
638 raise errors.OpPrereqError("Instances in node group '%s' changed since"
639 " locks were acquired, wanted '%s', have '%s';"
640 " retry the operation" %
642 utils.CommaJoin(wanted_instances),
643 utils.CommaJoin(owned_instances)),
646 return wanted_instances
649 def _SupportsOob(cfg, node):
650 """Tells if node supports OOB.
652 @type cfg: L{config.ConfigWriter}
653 @param cfg: The cluster configuration
654 @type node: L{objects.Node}
655 @param node: The node
656 @return: The OOB script if supported or an empty string otherwise
659 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
662 def _GetWantedNodes(lu, nodes):
663 """Returns list of checked and expanded node names.
665 @type lu: L{LogicalUnit}
666 @param lu: the logical unit on whose behalf we execute
668 @param nodes: list of node names or None for all nodes
670 @return: the list of nodes, sorted
671 @raise errors.ProgrammerError: if the nodes parameter is wrong type
675 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
677 return utils.NiceSort(lu.cfg.GetNodeList())
680 def _GetWantedInstances(lu, instances):
681 """Returns list of checked and expanded instance names.
683 @type lu: L{LogicalUnit}
684 @param lu: the logical unit on whose behalf we execute
685 @type instances: list
686 @param instances: list of instance names or None for all instances
688 @return: the list of instances, sorted
689 @raise errors.OpPrereqError: if the instances parameter is wrong type
690 @raise errors.OpPrereqError: if any of the passed instances is not found
694 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
696 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
700 def _GetUpdatedParams(old_params, update_dict,
701 use_default=True, use_none=False):
702 """Return the new version of a parameter dictionary.
704 @type old_params: dict
705 @param old_params: old parameters
706 @type update_dict: dict
707 @param update_dict: dict containing new parameter values, or
708 constants.VALUE_DEFAULT to reset the parameter to its default
710 @param use_default: boolean
711 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
712 values as 'to be deleted' values
713 @param use_none: boolean
714 @type use_none: whether to recognise C{None} values as 'to be
717 @return: the new parameter dictionary
720 params_copy = copy.deepcopy(old_params)
721 for key, val in update_dict.iteritems():
722 if ((use_default and val == constants.VALUE_DEFAULT) or
723 (use_none and val is None)):
729 params_copy[key] = val
733 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
734 """Return the new version of a instance policy.
736 @param group_policy: whether this policy applies to a group and thus
737 we should support removal of policy entries
740 use_none = use_default = group_policy
741 ipolicy = copy.deepcopy(old_ipolicy)
742 for key, value in new_ipolicy.items():
743 if key not in constants.IPOLICY_ALL_KEYS:
744 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
746 if key in constants.IPOLICY_ISPECS:
747 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
748 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
750 use_default=use_default)
752 if not value or value == [constants.VALUE_DEFAULT]:
756 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
757 " on the cluster'" % key,
760 if key in constants.IPOLICY_PARAMETERS:
761 # FIXME: we assume all such values are float
763 ipolicy[key] = float(value)
764 except (TypeError, ValueError), err:
765 raise errors.OpPrereqError("Invalid value for attribute"
766 " '%s': '%s', error: %s" %
767 (key, value, err), errors.ECODE_INVAL)
769 # FIXME: we assume all others are lists; this should be redone
771 ipolicy[key] = list(value)
773 objects.InstancePolicy.CheckParameterSyntax(ipolicy)
774 except errors.ConfigurationError, err:
775 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
780 def _UpdateAndVerifySubDict(base, updates, type_check):
781 """Updates and verifies a dict with sub dicts of the same type.
783 @param base: The dict with the old data
784 @param updates: The dict with the new data
785 @param type_check: Dict suitable to ForceDictType to verify correct types
786 @returns: A new dict with updated and verified values
790 new = _GetUpdatedParams(old, value)
791 utils.ForceDictType(new, type_check)
794 ret = copy.deepcopy(base)
795 ret.update(dict((key, fn(base.get(key, {}), value))
796 for key, value in updates.items()))
800 def _MergeAndVerifyHvState(op_input, obj_input):
801 """Combines the hv state from an opcode with the one of the object
803 @param op_input: The input dict from the opcode
804 @param obj_input: The input dict from the objects
805 @return: The verified and updated dict
809 invalid_hvs = set(op_input) - constants.HYPER_TYPES
811 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
812 " %s" % utils.CommaJoin(invalid_hvs),
814 if obj_input is None:
816 type_check = constants.HVSTS_PARAMETER_TYPES
817 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
822 def _MergeAndVerifyDiskState(op_input, obj_input):
823 """Combines the disk state from an opcode with the one of the object
825 @param op_input: The input dict from the opcode
826 @param obj_input: The input dict from the objects
827 @return: The verified and updated dict
830 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
832 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
833 utils.CommaJoin(invalid_dst),
835 type_check = constants.DSS_PARAMETER_TYPES
836 if obj_input is None:
838 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
840 for key, value in op_input.items())
845 def _ReleaseLocks(lu, level, names=None, keep=None):
846 """Releases locks owned by an LU.
848 @type lu: L{LogicalUnit}
849 @param level: Lock level
850 @type names: list or None
851 @param names: Names of locks to release
852 @type keep: list or None
853 @param keep: Names of locks to retain
856 assert not (keep is not None and names is not None), \
857 "Only one of the 'names' and the 'keep' parameters can be given"
859 if names is not None:
860 should_release = names.__contains__
862 should_release = lambda name: name not in keep
864 should_release = None
866 owned = lu.owned_locks(level)
868 # Not owning any lock at this level, do nothing
875 # Determine which locks to release
877 if should_release(name):
882 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
884 # Release just some locks
885 lu.glm.release(level, names=release)
887 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
890 lu.glm.release(level)
892 assert not lu.glm.is_owned(level), "No locks should be owned"
895 def _MapInstanceDisksToNodes(instances):
896 """Creates a map from (node, volume) to instance name.
898 @type instances: list of L{objects.Instance}
899 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
902 return dict(((node, vol), inst.name)
903 for inst in instances
904 for (node, vols) in inst.MapLVsByNode().items()
908 def _RunPostHook(lu, node_name):
909 """Runs the post-hook for an opcode on a single node.
912 hm = lu.proc.BuildHooksManager(lu)
914 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
916 # pylint: disable=W0702
917 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
920 def _CheckOutputFields(static, dynamic, selected):
921 """Checks whether all selected fields are valid.
923 @type static: L{utils.FieldSet}
924 @param static: static fields set
925 @type dynamic: L{utils.FieldSet}
926 @param dynamic: dynamic fields set
933 delta = f.NonMatching(selected)
935 raise errors.OpPrereqError("Unknown output fields selected: %s"
936 % ",".join(delta), errors.ECODE_INVAL)
939 def _CheckGlobalHvParams(params):
940 """Validates that given hypervisor params are not global ones.
942 This will ensure that instances don't get customised versions of
946 used_globals = constants.HVC_GLOBALS.intersection(params)
948 msg = ("The following hypervisor parameters are global and cannot"
949 " be customized at instance level, please modify them at"
950 " cluster level: %s" % utils.CommaJoin(used_globals))
951 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
954 def _CheckNodeOnline(lu, node, msg=None):
955 """Ensure that a given node is online.
957 @param lu: the LU on behalf of which we make the check
958 @param node: the node to check
959 @param msg: if passed, should be a message to replace the default one
960 @raise errors.OpPrereqError: if the node is offline
964 msg = "Can't use offline node"
965 if lu.cfg.GetNodeInfo(node).offline:
966 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
969 def _CheckNodeNotDrained(lu, node):
970 """Ensure that a given node is not drained.
972 @param lu: the LU on behalf of which we make the check
973 @param node: the node to check
974 @raise errors.OpPrereqError: if the node is drained
977 if lu.cfg.GetNodeInfo(node).drained:
978 raise errors.OpPrereqError("Can't use drained node %s" % node,
982 def _CheckNodeVmCapable(lu, node):
983 """Ensure that a given node is vm capable.
985 @param lu: the LU on behalf of which we make the check
986 @param node: the node to check
987 @raise errors.OpPrereqError: if the node is not vm capable
990 if not lu.cfg.GetNodeInfo(node).vm_capable:
991 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
995 def _CheckNodeHasOS(lu, node, os_name, force_variant):
996 """Ensure that a node supports a given OS.
998 @param lu: the LU on behalf of which we make the check
999 @param node: the node to check
1000 @param os_name: the OS to query about
1001 @param force_variant: whether to ignore variant errors
1002 @raise errors.OpPrereqError: if the node is not supporting the OS
1005 result = lu.rpc.call_os_get(node, os_name)
1006 result.Raise("OS '%s' not in supported OS list for node %s" %
1008 prereq=True, ecode=errors.ECODE_INVAL)
1009 if not force_variant:
1010 _CheckOSVariant(result.payload, os_name)
1013 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1014 """Ensure that a node has the given secondary ip.
1016 @type lu: L{LogicalUnit}
1017 @param lu: the LU on behalf of which we make the check
1019 @param node: the node to check
1020 @type secondary_ip: string
1021 @param secondary_ip: the ip to check
1022 @type prereq: boolean
1023 @param prereq: whether to throw a prerequisite or an execute error
1024 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1025 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1028 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1029 result.Raise("Failure checking secondary ip on node %s" % node,
1030 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1031 if not result.payload:
1032 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1033 " please fix and re-run this command" % secondary_ip)
1035 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1037 raise errors.OpExecError(msg)
1040 def _GetClusterDomainSecret():
1041 """Reads the cluster domain secret.
1044 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1048 def _CheckInstanceState(lu, instance, req_states, msg=None):
1049 """Ensure that an instance is in one of the required states.
1051 @param lu: the LU on behalf of which we make the check
1052 @param instance: the instance to check
1053 @param msg: if passed, should be a message to replace the default one
1054 @raise errors.OpPrereqError: if the instance is not in the required state
1058 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1059 if instance.admin_state not in req_states:
1060 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1061 (instance.name, instance.admin_state, msg),
1064 if constants.ADMINST_UP not in req_states:
1065 pnode = instance.primary_node
1066 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1067 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1068 prereq=True, ecode=errors.ECODE_ENVIRON)
1070 if instance.name in ins_l.payload:
1071 raise errors.OpPrereqError("Instance %s is running, %s" %
1072 (instance.name, msg), errors.ECODE_STATE)
1075 def _ComputeMinMaxSpec(name, ipolicy, value):
1076 """Computes if value is in the desired range.
1078 @param name: name of the parameter for which we perform the check
1079 @param ipolicy: dictionary containing min, max and std values
1080 @param value: actual value that we want to use
1081 @return: None or element not meeting the criteria
1085 if value in [None, constants.VALUE_AUTO]:
1087 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1088 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1089 if value > max_v or min_v > value:
1090 return ("%s value %s is not in range [%s, %s]" %
1091 (name, value, min_v, max_v))
1095 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1096 nic_count, disk_sizes, spindle_use,
1097 _compute_fn=_ComputeMinMaxSpec):
1098 """Verifies ipolicy against provided specs.
1101 @param ipolicy: The ipolicy
1103 @param mem_size: The memory size
1104 @type cpu_count: int
1105 @param cpu_count: Used cpu cores
1106 @type disk_count: int
1107 @param disk_count: Number of disks used
1108 @type nic_count: int
1109 @param nic_count: Number of nics used
1110 @type disk_sizes: list of ints
1111 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1112 @type spindle_use: int
1113 @param spindle_use: The number of spindles this instance uses
1114 @param _compute_fn: The compute function (unittest only)
1115 @return: A list of violations, or an empty list of no violations are found
1118 assert disk_count == len(disk_sizes)
1121 (constants.ISPEC_MEM_SIZE, mem_size),
1122 (constants.ISPEC_CPU_COUNT, cpu_count),
1123 (constants.ISPEC_DISK_COUNT, disk_count),
1124 (constants.ISPEC_NIC_COUNT, nic_count),
1125 (constants.ISPEC_SPINDLE_USE, spindle_use),
1126 ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
1129 (_compute_fn(name, ipolicy, value)
1130 for (name, value) in test_settings))
1133 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1134 _compute_fn=_ComputeIPolicySpecViolation):
1135 """Compute if instance meets the specs of ipolicy.
1138 @param ipolicy: The ipolicy to verify against
1139 @type instance: L{objects.Instance}
1140 @param instance: The instance to verify
1141 @param _compute_fn: The function to verify ipolicy (unittest only)
1142 @see: L{_ComputeIPolicySpecViolation}
1145 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1146 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1147 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1148 disk_count = len(instance.disks)
1149 disk_sizes = [disk.size for disk in instance.disks]
1150 nic_count = len(instance.nics)
1152 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1153 disk_sizes, spindle_use)
1156 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1157 _compute_fn=_ComputeIPolicySpecViolation):
1158 """Compute if instance specs meets the specs of ipolicy.
1161 @param ipolicy: The ipolicy to verify against
1162 @param instance_spec: dict
1163 @param instance_spec: The instance spec to verify
1164 @param _compute_fn: The function to verify ipolicy (unittest only)
1165 @see: L{_ComputeIPolicySpecViolation}
1168 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1169 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1170 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1171 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1172 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1173 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1175 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1176 disk_sizes, spindle_use)
1179 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1181 _compute_fn=_ComputeIPolicyInstanceViolation):
1182 """Compute if instance meets the specs of the new target group.
1184 @param ipolicy: The ipolicy to verify
1185 @param instance: The instance object to verify
1186 @param current_group: The current group of the instance
1187 @param target_group: The new group of the instance
1188 @param _compute_fn: The function to verify ipolicy (unittest only)
1189 @see: L{_ComputeIPolicySpecViolation}
1192 if current_group == target_group:
1195 return _compute_fn(ipolicy, instance)
1198 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1199 _compute_fn=_ComputeIPolicyNodeViolation):
1200 """Checks that the target node is correct in terms of instance policy.
1202 @param ipolicy: The ipolicy to verify
1203 @param instance: The instance object to verify
1204 @param node: The new node to relocate
1205 @param ignore: Ignore violations of the ipolicy
1206 @param _compute_fn: The function to verify ipolicy (unittest only)
1207 @see: L{_ComputeIPolicySpecViolation}
1210 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1211 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1214 msg = ("Instance does not meet target node group's (%s) instance"
1215 " policy: %s") % (node.group, utils.CommaJoin(res))
1219 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1222 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1223 """Computes a set of any instances that would violate the new ipolicy.
1225 @param old_ipolicy: The current (still in-place) ipolicy
1226 @param new_ipolicy: The new (to become) ipolicy
1227 @param instances: List of instances to verify
1228 @return: A list of instances which violates the new ipolicy but did not before
1231 return (_ComputeViolatingInstances(old_ipolicy, instances) -
1232 _ComputeViolatingInstances(new_ipolicy, instances))
1235 def _ExpandItemName(fn, name, kind):
1236 """Expand an item name.
1238 @param fn: the function to use for expansion
1239 @param name: requested item name
1240 @param kind: text description ('Node' or 'Instance')
1241 @return: the resolved (full) name
1242 @raise errors.OpPrereqError: if the item is not found
1245 full_name = fn(name)
1246 if full_name is None:
1247 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1252 def _ExpandNodeName(cfg, name):
1253 """Wrapper over L{_ExpandItemName} for nodes."""
1254 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1257 def _ExpandInstanceName(cfg, name):
1258 """Wrapper over L{_ExpandItemName} for instance."""
1259 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1262 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1263 minmem, maxmem, vcpus, nics, disk_template, disks,
1264 bep, hvp, hypervisor_name, tags):
1265 """Builds instance related env variables for hooks
1267 This builds the hook environment from individual variables.
1270 @param name: the name of the instance
1271 @type primary_node: string
1272 @param primary_node: the name of the instance's primary node
1273 @type secondary_nodes: list
1274 @param secondary_nodes: list of secondary nodes as strings
1275 @type os_type: string
1276 @param os_type: the name of the instance's OS
1277 @type status: string
1278 @param status: the desired status of the instance
1279 @type minmem: string
1280 @param minmem: the minimum memory size of the instance
1281 @type maxmem: string
1282 @param maxmem: the maximum memory size of the instance
1284 @param vcpus: the count of VCPUs the instance has
1286 @param nics: list of tuples (ip, mac, mode, link) representing
1287 the NICs the instance has
1288 @type disk_template: string
1289 @param disk_template: the disk template of the instance
1291 @param disks: the list of (size, mode) pairs
1293 @param bep: the backend parameters for the instance
1295 @param hvp: the hypervisor parameters for the instance
1296 @type hypervisor_name: string
1297 @param hypervisor_name: the hypervisor for the instance
1299 @param tags: list of instance tags as strings
1301 @return: the hook environment for this instance
1306 "INSTANCE_NAME": name,
1307 "INSTANCE_PRIMARY": primary_node,
1308 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1309 "INSTANCE_OS_TYPE": os_type,
1310 "INSTANCE_STATUS": status,
1311 "INSTANCE_MINMEM": minmem,
1312 "INSTANCE_MAXMEM": maxmem,
1313 # TODO(2.7) remove deprecated "memory" value
1314 "INSTANCE_MEMORY": maxmem,
1315 "INSTANCE_VCPUS": vcpus,
1316 "INSTANCE_DISK_TEMPLATE": disk_template,
1317 "INSTANCE_HYPERVISOR": hypervisor_name,
1320 nic_count = len(nics)
1321 for idx, (ip, mac, mode, link) in enumerate(nics):
1324 env["INSTANCE_NIC%d_IP" % idx] = ip
1325 env["INSTANCE_NIC%d_MAC" % idx] = mac
1326 env["INSTANCE_NIC%d_MODE" % idx] = mode
1327 env["INSTANCE_NIC%d_LINK" % idx] = link
1328 if mode == constants.NIC_MODE_BRIDGED:
1329 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1333 env["INSTANCE_NIC_COUNT"] = nic_count
1336 disk_count = len(disks)
1337 for idx, (size, mode) in enumerate(disks):
1338 env["INSTANCE_DISK%d_SIZE" % idx] = size
1339 env["INSTANCE_DISK%d_MODE" % idx] = mode
1343 env["INSTANCE_DISK_COUNT"] = disk_count
1348 env["INSTANCE_TAGS"] = " ".join(tags)
1350 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1351 for key, value in source.items():
1352 env["INSTANCE_%s_%s" % (kind, key)] = value
1357 def _NICListToTuple(lu, nics):
1358 """Build a list of nic information tuples.
1360 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1361 value in LUInstanceQueryData.
1363 @type lu: L{LogicalUnit}
1364 @param lu: the logical unit on whose behalf we execute
1365 @type nics: list of L{objects.NIC}
1366 @param nics: list of nics to convert to hooks tuples
1370 cluster = lu.cfg.GetClusterInfo()
1374 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1375 mode = filled_params[constants.NIC_MODE]
1376 link = filled_params[constants.NIC_LINK]
1377 hooks_nics.append((ip, mac, mode, link))
1381 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1382 """Builds instance related env variables for hooks from an object.
1384 @type lu: L{LogicalUnit}
1385 @param lu: the logical unit on whose behalf we execute
1386 @type instance: L{objects.Instance}
1387 @param instance: the instance for which we should build the
1389 @type override: dict
1390 @param override: dictionary with key/values that will override
1393 @return: the hook environment dictionary
1396 cluster = lu.cfg.GetClusterInfo()
1397 bep = cluster.FillBE(instance)
1398 hvp = cluster.FillHV(instance)
1400 "name": instance.name,
1401 "primary_node": instance.primary_node,
1402 "secondary_nodes": instance.secondary_nodes,
1403 "os_type": instance.os,
1404 "status": instance.admin_state,
1405 "maxmem": bep[constants.BE_MAXMEM],
1406 "minmem": bep[constants.BE_MINMEM],
1407 "vcpus": bep[constants.BE_VCPUS],
1408 "nics": _NICListToTuple(lu, instance.nics),
1409 "disk_template": instance.disk_template,
1410 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1413 "hypervisor_name": instance.hypervisor,
1414 "tags": instance.tags,
1417 args.update(override)
1418 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1421 def _AdjustCandidatePool(lu, exceptions):
1422 """Adjust the candidate pool after node operations.
1425 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1427 lu.LogInfo("Promoted nodes to master candidate role: %s",
1428 utils.CommaJoin(node.name for node in mod_list))
1429 for name in mod_list:
1430 lu.context.ReaddNode(name)
1431 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1433 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1437 def _DecideSelfPromotion(lu, exceptions=None):
1438 """Decide whether I should promote myself as a master candidate.
1441 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1442 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1443 # the new node will increase mc_max with one, so:
1444 mc_should = min(mc_should + 1, cp_size)
1445 return mc_now < mc_should
1448 def _CalculateGroupIPolicy(cluster, group):
1449 """Calculate instance policy for group.
1452 return cluster.SimpleFillIPolicy(group.ipolicy)
1455 def _ComputeViolatingInstances(ipolicy, instances):
1456 """Computes a set of instances who violates given ipolicy.
1458 @param ipolicy: The ipolicy to verify
1459 @type instances: object.Instance
1460 @param instances: List of instances to verify
1461 @return: A frozenset of instance names violating the ipolicy
1464 return frozenset([inst.name for inst in instances
1465 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1468 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1469 """Check that the brigdes needed by a list of nics exist.
1472 cluster = lu.cfg.GetClusterInfo()
1473 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1474 brlist = [params[constants.NIC_LINK] for params in paramslist
1475 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1477 result = lu.rpc.call_bridges_exist(target_node, brlist)
1478 result.Raise("Error checking bridges on destination node '%s'" %
1479 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1482 def _CheckInstanceBridgesExist(lu, instance, node=None):
1483 """Check that the brigdes needed by an instance exist.
1487 node = instance.primary_node
1488 _CheckNicsBridgesExist(lu, instance.nics, node)
1491 def _CheckOSVariant(os_obj, name):
1492 """Check whether an OS name conforms to the os variants specification.
1494 @type os_obj: L{objects.OS}
1495 @param os_obj: OS object to check
1497 @param name: OS name passed by the user, to check for validity
1500 variant = objects.OS.GetVariant(name)
1501 if not os_obj.supported_variants:
1503 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1504 " passed)" % (os_obj.name, variant),
1508 raise errors.OpPrereqError("OS name must include a variant",
1511 if variant not in os_obj.supported_variants:
1512 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1515 def _GetNodeInstancesInner(cfg, fn):
1516 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1519 def _GetNodeInstances(cfg, node_name):
1520 """Returns a list of all primary and secondary instances on a node.
1524 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1527 def _GetNodePrimaryInstances(cfg, node_name):
1528 """Returns primary instances on a node.
1531 return _GetNodeInstancesInner(cfg,
1532 lambda inst: node_name == inst.primary_node)
1535 def _GetNodeSecondaryInstances(cfg, node_name):
1536 """Returns secondary instances on a node.
1539 return _GetNodeInstancesInner(cfg,
1540 lambda inst: node_name in inst.secondary_nodes)
1543 def _GetStorageTypeArgs(cfg, storage_type):
1544 """Returns the arguments for a storage type.
1547 # Special case for file storage
1548 if storage_type == constants.ST_FILE:
1549 # storage.FileStorage wants a list of storage directories
1550 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1555 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1558 for dev in instance.disks:
1559 cfg.SetDiskID(dev, node_name)
1561 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1562 result.Raise("Failed to get disk status from node %s" % node_name,
1563 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1565 for idx, bdev_status in enumerate(result.payload):
1566 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1572 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1573 """Check the sanity of iallocator and node arguments and use the
1574 cluster-wide iallocator if appropriate.
1576 Check that at most one of (iallocator, node) is specified. If none is
1577 specified, then the LU's opcode's iallocator slot is filled with the
1578 cluster-wide default iallocator.
1580 @type iallocator_slot: string
1581 @param iallocator_slot: the name of the opcode iallocator slot
1582 @type node_slot: string
1583 @param node_slot: the name of the opcode target node slot
1586 node = getattr(lu.op, node_slot, None)
1587 iallocator = getattr(lu.op, iallocator_slot, None)
1589 if node is not None and iallocator is not None:
1590 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1592 elif node is None and iallocator is None:
1593 default_iallocator = lu.cfg.GetDefaultIAllocator()
1594 if default_iallocator:
1595 setattr(lu.op, iallocator_slot, default_iallocator)
1597 raise errors.OpPrereqError("No iallocator or node given and no"
1598 " cluster-wide default iallocator found;"
1599 " please specify either an iallocator or a"
1600 " node, or set a cluster-wide default"
1604 def _GetDefaultIAllocator(cfg, iallocator):
1605 """Decides on which iallocator to use.
1607 @type cfg: L{config.ConfigWriter}
1608 @param cfg: Cluster configuration object
1609 @type iallocator: string or None
1610 @param iallocator: Iallocator specified in opcode
1612 @return: Iallocator name
1616 # Use default iallocator
1617 iallocator = cfg.GetDefaultIAllocator()
1620 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1621 " opcode nor as a cluster-wide default",
1627 class LUClusterPostInit(LogicalUnit):
1628 """Logical unit for running hooks after cluster initialization.
1631 HPATH = "cluster-init"
1632 HTYPE = constants.HTYPE_CLUSTER
1634 def BuildHooksEnv(self):
1639 "OP_TARGET": self.cfg.GetClusterName(),
1642 def BuildHooksNodes(self):
1643 """Build hooks nodes.
1646 return ([], [self.cfg.GetMasterNode()])
1648 def Exec(self, feedback_fn):
1655 class LUClusterDestroy(LogicalUnit):
1656 """Logical unit for destroying the cluster.
1659 HPATH = "cluster-destroy"
1660 HTYPE = constants.HTYPE_CLUSTER
1662 def BuildHooksEnv(self):
1667 "OP_TARGET": self.cfg.GetClusterName(),
1670 def BuildHooksNodes(self):
1671 """Build hooks nodes.
1676 def CheckPrereq(self):
1677 """Check prerequisites.
1679 This checks whether the cluster is empty.
1681 Any errors are signaled by raising errors.OpPrereqError.
1684 master = self.cfg.GetMasterNode()
1686 nodelist = self.cfg.GetNodeList()
1687 if len(nodelist) != 1 or nodelist[0] != master:
1688 raise errors.OpPrereqError("There are still %d node(s) in"
1689 " this cluster." % (len(nodelist) - 1),
1691 instancelist = self.cfg.GetInstanceList()
1693 raise errors.OpPrereqError("There are still %d instance(s) in"
1694 " this cluster." % len(instancelist),
1697 def Exec(self, feedback_fn):
1698 """Destroys the cluster.
1701 master_params = self.cfg.GetMasterNetworkParameters()
1703 # Run post hooks on master node before it's removed
1704 _RunPostHook(self, master_params.name)
1706 ems = self.cfg.GetUseExternalMipScript()
1707 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1710 self.LogWarning("Error disabling the master IP address: %s",
1713 return master_params.name
1716 def _VerifyCertificate(filename):
1717 """Verifies a certificate for L{LUClusterVerifyConfig}.
1719 @type filename: string
1720 @param filename: Path to PEM file
1724 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1725 utils.ReadFile(filename))
1726 except Exception, err: # pylint: disable=W0703
1727 return (LUClusterVerifyConfig.ETYPE_ERROR,
1728 "Failed to load X509 certificate %s: %s" % (filename, err))
1731 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1732 constants.SSL_CERT_EXPIRATION_ERROR)
1735 fnamemsg = "While verifying %s: %s" % (filename, msg)
1740 return (None, fnamemsg)
1741 elif errcode == utils.CERT_WARNING:
1742 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1743 elif errcode == utils.CERT_ERROR:
1744 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1746 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1749 def _GetAllHypervisorParameters(cluster, instances):
1750 """Compute the set of all hypervisor parameters.
1752 @type cluster: L{objects.Cluster}
1753 @param cluster: the cluster object
1754 @param instances: list of L{objects.Instance}
1755 @param instances: additional instances from which to obtain parameters
1756 @rtype: list of (origin, hypervisor, parameters)
1757 @return: a list with all parameters found, indicating the hypervisor they
1758 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1763 for hv_name in cluster.enabled_hypervisors:
1764 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1766 for os_name, os_hvp in cluster.os_hvp.items():
1767 for hv_name, hv_params in os_hvp.items():
1769 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1770 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1772 # TODO: collapse identical parameter values in a single one
1773 for instance in instances:
1774 if instance.hvparams:
1775 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1776 cluster.FillHV(instance)))
1781 class _VerifyErrors(object):
1782 """Mix-in for cluster/group verify LUs.
1784 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1785 self.op and self._feedback_fn to be available.)
1789 ETYPE_FIELD = "code"
1790 ETYPE_ERROR = "ERROR"
1791 ETYPE_WARNING = "WARNING"
1793 def _Error(self, ecode, item, msg, *args, **kwargs):
1794 """Format an error message.
1796 Based on the opcode's error_codes parameter, either format a
1797 parseable error code, or a simpler error string.
1799 This must be called only from Exec and functions called from Exec.
1802 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1803 itype, etxt, _ = ecode
1804 # first complete the msg
1807 # then format the whole message
1808 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1809 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1815 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1816 # and finally report it via the feedback_fn
1817 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1819 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1820 """Log an error message if the passed condition is True.
1824 or self.op.debug_simulate_errors) # pylint: disable=E1101
1826 # If the error code is in the list of ignored errors, demote the error to a
1828 (_, etxt, _) = ecode
1829 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1830 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1833 self._Error(ecode, *args, **kwargs)
1835 # do not mark the operation as failed for WARN cases only
1836 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1837 self.bad = self.bad or cond
1840 class LUClusterVerify(NoHooksLU):
1841 """Submits all jobs necessary to verify the cluster.
1846 def ExpandNames(self):
1847 self.needed_locks = {}
1849 def Exec(self, feedback_fn):
1852 if self.op.group_name:
1853 groups = [self.op.group_name]
1854 depends_fn = lambda: None
1856 groups = self.cfg.GetNodeGroupList()
1858 # Verify global configuration
1860 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1863 # Always depend on global verification
1864 depends_fn = lambda: [(-len(jobs), [])]
1866 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1867 ignore_errors=self.op.ignore_errors,
1868 depends=depends_fn())]
1869 for group in groups)
1871 # Fix up all parameters
1872 for op in itertools.chain(*jobs): # pylint: disable=W0142
1873 op.debug_simulate_errors = self.op.debug_simulate_errors
1874 op.verbose = self.op.verbose
1875 op.error_codes = self.op.error_codes
1877 op.skip_checks = self.op.skip_checks
1878 except AttributeError:
1879 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1881 return ResultWithJobs(jobs)
1884 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1885 """Verifies the cluster config.
1890 def _VerifyHVP(self, hvp_data):
1891 """Verifies locally the syntax of the hypervisor parameters.
1894 for item, hv_name, hv_params in hvp_data:
1895 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1898 hv_class = hypervisor.GetHypervisor(hv_name)
1899 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1900 hv_class.CheckParameterSyntax(hv_params)
1901 except errors.GenericError, err:
1902 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1904 def ExpandNames(self):
1905 # Information can be safely retrieved as the BGL is acquired in exclusive
1907 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1908 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1909 self.all_node_info = self.cfg.GetAllNodesInfo()
1910 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1911 self.needed_locks = {}
1913 def Exec(self, feedback_fn):
1914 """Verify integrity of cluster, performing various test on nodes.
1918 self._feedback_fn = feedback_fn
1920 feedback_fn("* Verifying cluster config")
1922 for msg in self.cfg.VerifyConfig():
1923 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1925 feedback_fn("* Verifying cluster certificate files")
1927 for cert_filename in constants.ALL_CERT_FILES:
1928 (errcode, msg) = _VerifyCertificate(cert_filename)
1929 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1931 feedback_fn("* Verifying hypervisor parameters")
1933 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1934 self.all_inst_info.values()))
1936 feedback_fn("* Verifying all nodes belong to an existing group")
1938 # We do this verification here because, should this bogus circumstance
1939 # occur, it would never be caught by VerifyGroup, which only acts on
1940 # nodes/instances reachable from existing node groups.
1942 dangling_nodes = set(node.name for node in self.all_node_info.values()
1943 if node.group not in self.all_group_info)
1945 dangling_instances = {}
1946 no_node_instances = []
1948 for inst in self.all_inst_info.values():
1949 if inst.primary_node in dangling_nodes:
1950 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1951 elif inst.primary_node not in self.all_node_info:
1952 no_node_instances.append(inst.name)
1957 utils.CommaJoin(dangling_instances.get(node.name,
1959 for node in dangling_nodes]
1961 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1963 "the following nodes (and their instances) belong to a non"
1964 " existing group: %s", utils.CommaJoin(pretty_dangling))
1966 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1968 "the following instances have a non-existing primary-node:"
1969 " %s", utils.CommaJoin(no_node_instances))
1974 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1975 """Verifies the status of a node group.
1978 HPATH = "cluster-verify"
1979 HTYPE = constants.HTYPE_CLUSTER
1982 _HOOKS_INDENT_RE = re.compile("^", re.M)
1984 class NodeImage(object):
1985 """A class representing the logical and physical status of a node.
1988 @ivar name: the node name to which this object refers
1989 @ivar volumes: a structure as returned from
1990 L{ganeti.backend.GetVolumeList} (runtime)
1991 @ivar instances: a list of running instances (runtime)
1992 @ivar pinst: list of configured primary instances (config)
1993 @ivar sinst: list of configured secondary instances (config)
1994 @ivar sbp: dictionary of {primary-node: list of instances} for all
1995 instances for which this node is secondary (config)
1996 @ivar mfree: free memory, as reported by hypervisor (runtime)
1997 @ivar dfree: free disk, as reported by the node (runtime)
1998 @ivar offline: the offline status (config)
1999 @type rpc_fail: boolean
2000 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2001 not whether the individual keys were correct) (runtime)
2002 @type lvm_fail: boolean
2003 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2004 @type hyp_fail: boolean
2005 @ivar hyp_fail: whether the RPC call didn't return the instance list
2006 @type ghost: boolean
2007 @ivar ghost: whether this is a known node or not (config)
2008 @type os_fail: boolean
2009 @ivar os_fail: whether the RPC call didn't return valid OS data
2011 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2012 @type vm_capable: boolean
2013 @ivar vm_capable: whether the node can host instances
2016 def __init__(self, offline=False, name=None, vm_capable=True):
2025 self.offline = offline
2026 self.vm_capable = vm_capable
2027 self.rpc_fail = False
2028 self.lvm_fail = False
2029 self.hyp_fail = False
2031 self.os_fail = False
2034 def ExpandNames(self):
2035 # This raises errors.OpPrereqError on its own:
2036 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2038 # Get instances in node group; this is unsafe and needs verification later
2040 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2042 self.needed_locks = {
2043 locking.LEVEL_INSTANCE: inst_names,
2044 locking.LEVEL_NODEGROUP: [self.group_uuid],
2045 locking.LEVEL_NODE: [],
2048 self.share_locks = _ShareAll()
2050 def DeclareLocks(self, level):
2051 if level == locking.LEVEL_NODE:
2052 # Get members of node group; this is unsafe and needs verification later
2053 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2055 all_inst_info = self.cfg.GetAllInstancesInfo()
2057 # In Exec(), we warn about mirrored instances that have primary and
2058 # secondary living in separate node groups. To fully verify that
2059 # volumes for these instances are healthy, we will need to do an
2060 # extra call to their secondaries. We ensure here those nodes will
2062 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2063 # Important: access only the instances whose lock is owned
2064 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2065 nodes.update(all_inst_info[inst].secondary_nodes)
2067 self.needed_locks[locking.LEVEL_NODE] = nodes
2069 def CheckPrereq(self):
2070 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2071 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2073 group_nodes = set(self.group_info.members)
2075 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2078 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2080 unlocked_instances = \
2081 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2084 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2085 utils.CommaJoin(unlocked_nodes),
2088 if unlocked_instances:
2089 raise errors.OpPrereqError("Missing lock for instances: %s" %
2090 utils.CommaJoin(unlocked_instances),
2093 self.all_node_info = self.cfg.GetAllNodesInfo()
2094 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2096 self.my_node_names = utils.NiceSort(group_nodes)
2097 self.my_inst_names = utils.NiceSort(group_instances)
2099 self.my_node_info = dict((name, self.all_node_info[name])
2100 for name in self.my_node_names)
2102 self.my_inst_info = dict((name, self.all_inst_info[name])
2103 for name in self.my_inst_names)
2105 # We detect here the nodes that will need the extra RPC calls for verifying
2106 # split LV volumes; they should be locked.
2107 extra_lv_nodes = set()
2109 for inst in self.my_inst_info.values():
2110 if inst.disk_template in constants.DTS_INT_MIRROR:
2111 for nname in inst.all_nodes:
2112 if self.all_node_info[nname].group != self.group_uuid:
2113 extra_lv_nodes.add(nname)
2115 unlocked_lv_nodes = \
2116 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2118 if unlocked_lv_nodes:
2119 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2120 utils.CommaJoin(unlocked_lv_nodes),
2122 self.extra_lv_nodes = list(extra_lv_nodes)
2124 def _VerifyNode(self, ninfo, nresult):
2125 """Perform some basic validation on data returned from a node.
2127 - check the result data structure is well formed and has all the
2129 - check ganeti version
2131 @type ninfo: L{objects.Node}
2132 @param ninfo: the node to check
2133 @param nresult: the results from the node
2135 @return: whether overall this call was successful (and we can expect
2136 reasonable values in the respose)
2140 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2142 # main result, nresult should be a non-empty dict
2143 test = not nresult or not isinstance(nresult, dict)
2144 _ErrorIf(test, constants.CV_ENODERPC, node,
2145 "unable to verify node: no data returned")
2149 # compares ganeti version
2150 local_version = constants.PROTOCOL_VERSION
2151 remote_version = nresult.get("version", None)
2152 test = not (remote_version and
2153 isinstance(remote_version, (list, tuple)) and
2154 len(remote_version) == 2)
2155 _ErrorIf(test, constants.CV_ENODERPC, node,
2156 "connection to node returned invalid data")
2160 test = local_version != remote_version[0]
2161 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2162 "incompatible protocol versions: master %s,"
2163 " node %s", local_version, remote_version[0])
2167 # node seems compatible, we can actually try to look into its results
2169 # full package version
2170 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2171 constants.CV_ENODEVERSION, node,
2172 "software version mismatch: master %s, node %s",
2173 constants.RELEASE_VERSION, remote_version[1],
2174 code=self.ETYPE_WARNING)
2176 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2177 if ninfo.vm_capable and isinstance(hyp_result, dict):
2178 for hv_name, hv_result in hyp_result.iteritems():
2179 test = hv_result is not None
2180 _ErrorIf(test, constants.CV_ENODEHV, node,
2181 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2183 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2184 if ninfo.vm_capable and isinstance(hvp_result, list):
2185 for item, hv_name, hv_result in hvp_result:
2186 _ErrorIf(True, constants.CV_ENODEHV, node,
2187 "hypervisor %s parameter verify failure (source %s): %s",
2188 hv_name, item, hv_result)
2190 test = nresult.get(constants.NV_NODESETUP,
2191 ["Missing NODESETUP results"])
2192 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2197 def _VerifyNodeTime(self, ninfo, nresult,
2198 nvinfo_starttime, nvinfo_endtime):
2199 """Check the node time.
2201 @type ninfo: L{objects.Node}
2202 @param ninfo: the node to check
2203 @param nresult: the remote results for the node
2204 @param nvinfo_starttime: the start time of the RPC call
2205 @param nvinfo_endtime: the end time of the RPC call
2209 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2211 ntime = nresult.get(constants.NV_TIME, None)
2213 ntime_merged = utils.MergeTime(ntime)
2214 except (ValueError, TypeError):
2215 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2218 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2219 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2220 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2221 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2225 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2226 "Node time diverges by at least %s from master node time",
2229 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2230 """Check the node LVM results.
2232 @type ninfo: L{objects.Node}
2233 @param ninfo: the node to check
2234 @param nresult: the remote results for the node
2235 @param vg_name: the configured VG name
2242 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2244 # checks vg existence and size > 20G
2245 vglist = nresult.get(constants.NV_VGLIST, None)
2247 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2249 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2250 constants.MIN_VG_SIZE)
2251 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2254 pvlist = nresult.get(constants.NV_PVLIST, None)
2255 test = pvlist is None
2256 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2258 # check that ':' is not present in PV names, since it's a
2259 # special character for lvcreate (denotes the range of PEs to
2261 for _, pvname, owner_vg in pvlist:
2262 test = ":" in pvname
2263 _ErrorIf(test, constants.CV_ENODELVM, node,
2264 "Invalid character ':' in PV '%s' of VG '%s'",
2267 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2268 """Check the node bridges.
2270 @type ninfo: L{objects.Node}
2271 @param ninfo: the node to check
2272 @param nresult: the remote results for the node
2273 @param bridges: the expected list of bridges
2280 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2282 missing = nresult.get(constants.NV_BRIDGES, None)
2283 test = not isinstance(missing, list)
2284 _ErrorIf(test, constants.CV_ENODENET, node,
2285 "did not return valid bridge information")
2287 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2288 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2290 def _VerifyNodeUserScripts(self, ninfo, nresult):
2291 """Check the results of user scripts presence and executability on the node
2293 @type ninfo: L{objects.Node}
2294 @param ninfo: the node to check
2295 @param nresult: the remote results for the node
2300 test = not constants.NV_USERSCRIPTS in nresult
2301 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2302 "did not return user scripts information")
2304 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2306 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2307 "user scripts not present or not executable: %s" %
2308 utils.CommaJoin(sorted(broken_scripts)))
2310 def _VerifyNodeNetwork(self, ninfo, nresult):
2311 """Check the node network connectivity results.
2313 @type ninfo: L{objects.Node}
2314 @param ninfo: the node to check
2315 @param nresult: the remote results for the node
2319 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2321 test = constants.NV_NODELIST not in nresult
2322 _ErrorIf(test, constants.CV_ENODESSH, node,
2323 "node hasn't returned node ssh connectivity data")
2325 if nresult[constants.NV_NODELIST]:
2326 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2327 _ErrorIf(True, constants.CV_ENODESSH, node,
2328 "ssh communication with node '%s': %s", a_node, a_msg)
2330 test = constants.NV_NODENETTEST not in nresult
2331 _ErrorIf(test, constants.CV_ENODENET, node,
2332 "node hasn't returned node tcp connectivity data")
2334 if nresult[constants.NV_NODENETTEST]:
2335 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2337 _ErrorIf(True, constants.CV_ENODENET, node,
2338 "tcp communication with node '%s': %s",
2339 anode, nresult[constants.NV_NODENETTEST][anode])
2341 test = constants.NV_MASTERIP not in nresult
2342 _ErrorIf(test, constants.CV_ENODENET, node,
2343 "node hasn't returned node master IP reachability data")
2345 if not nresult[constants.NV_MASTERIP]:
2346 if node == self.master_node:
2347 msg = "the master node cannot reach the master IP (not configured?)"
2349 msg = "cannot reach the master IP"
2350 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2352 def _VerifyInstance(self, instance, instanceconfig, node_image,
2354 """Verify an instance.
2356 This function checks to see if the required block devices are
2357 available on the instance's node.
2360 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2361 node_current = instanceconfig.primary_node
2363 node_vol_should = {}
2364 instanceconfig.MapLVsByNode(node_vol_should)
2366 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2367 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2368 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, err)
2370 for node in node_vol_should:
2371 n_img = node_image[node]
2372 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2373 # ignore missing volumes on offline or broken nodes
2375 for volume in node_vol_should[node]:
2376 test = volume not in n_img.volumes
2377 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2378 "volume %s missing on node %s", volume, node)
2380 if instanceconfig.admin_state == constants.ADMINST_UP:
2381 pri_img = node_image[node_current]
2382 test = instance not in pri_img.instances and not pri_img.offline
2383 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2384 "instance not running on its primary node %s",
2387 diskdata = [(nname, success, status, idx)
2388 for (nname, disks) in diskstatus.items()
2389 for idx, (success, status) in enumerate(disks)]
2391 for nname, success, bdev_status, idx in diskdata:
2392 # the 'ghost node' construction in Exec() ensures that we have a
2394 snode = node_image[nname]
2395 bad_snode = snode.ghost or snode.offline
2396 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2397 not success and not bad_snode,
2398 constants.CV_EINSTANCEFAULTYDISK, instance,
2399 "couldn't retrieve status for disk/%s on %s: %s",
2400 idx, nname, bdev_status)
2401 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2402 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2403 constants.CV_EINSTANCEFAULTYDISK, instance,
2404 "disk/%s on %s is faulty", idx, nname)
2406 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2407 """Verify if there are any unknown volumes in the cluster.
2409 The .os, .swap and backup volumes are ignored. All other volumes are
2410 reported as unknown.
2412 @type reserved: L{ganeti.utils.FieldSet}
2413 @param reserved: a FieldSet of reserved volume names
2416 for node, n_img in node_image.items():
2417 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2418 self.all_node_info[node].group != self.group_uuid):
2419 # skip non-healthy nodes
2421 for volume in n_img.volumes:
2422 test = ((node not in node_vol_should or
2423 volume not in node_vol_should[node]) and
2424 not reserved.Matches(volume))
2425 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2426 "volume %s is unknown", volume)
2428 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2429 """Verify N+1 Memory Resilience.
2431 Check that if one single node dies we can still start all the
2432 instances it was primary for.
2435 cluster_info = self.cfg.GetClusterInfo()
2436 for node, n_img in node_image.items():
2437 # This code checks that every node which is now listed as
2438 # secondary has enough memory to host all instances it is
2439 # supposed to should a single other node in the cluster fail.
2440 # FIXME: not ready for failover to an arbitrary node
2441 # FIXME: does not support file-backed instances
2442 # WARNING: we currently take into account down instances as well
2443 # as up ones, considering that even if they're down someone
2444 # might want to start them even in the event of a node failure.
2445 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2446 # we're skipping nodes marked offline and nodes in other groups from
2447 # the N+1 warning, since most likely we don't have good memory
2448 # infromation from them; we already list instances living on such
2449 # nodes, and that's enough warning
2451 #TODO(dynmem): also consider ballooning out other instances
2452 for prinode, instances in n_img.sbp.items():
2454 for instance in instances:
2455 bep = cluster_info.FillBE(instance_cfg[instance])
2456 if bep[constants.BE_AUTO_BALANCE]:
2457 needed_mem += bep[constants.BE_MINMEM]
2458 test = n_img.mfree < needed_mem
2459 self._ErrorIf(test, constants.CV_ENODEN1, node,
2460 "not enough memory to accomodate instance failovers"
2461 " should node %s fail (%dMiB needed, %dMiB available)",
2462 prinode, needed_mem, n_img.mfree)
2465 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2466 (files_all, files_opt, files_mc, files_vm)):
2467 """Verifies file checksums collected from all nodes.
2469 @param errorif: Callback for reporting errors
2470 @param nodeinfo: List of L{objects.Node} objects
2471 @param master_node: Name of master node
2472 @param all_nvinfo: RPC results
2475 # Define functions determining which nodes to consider for a file
2478 (files_mc, lambda node: (node.master_candidate or
2479 node.name == master_node)),
2480 (files_vm, lambda node: node.vm_capable),
2483 # Build mapping from filename to list of nodes which should have the file
2485 for (files, fn) in files2nodefn:
2487 filenodes = nodeinfo
2489 filenodes = filter(fn, nodeinfo)
2490 nodefiles.update((filename,
2491 frozenset(map(operator.attrgetter("name"), filenodes)))
2492 for filename in files)
2494 assert set(nodefiles) == (files_all | files_mc | files_vm)
2496 fileinfo = dict((filename, {}) for filename in nodefiles)
2497 ignore_nodes = set()
2499 for node in nodeinfo:
2501 ignore_nodes.add(node.name)
2504 nresult = all_nvinfo[node.name]
2506 if nresult.fail_msg or not nresult.payload:
2509 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2511 test = not (node_files and isinstance(node_files, dict))
2512 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2513 "Node did not return file checksum data")
2515 ignore_nodes.add(node.name)
2518 # Build per-checksum mapping from filename to nodes having it
2519 for (filename, checksum) in node_files.items():
2520 assert filename in nodefiles
2521 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2523 for (filename, checksums) in fileinfo.items():
2524 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2526 # Nodes having the file
2527 with_file = frozenset(node_name
2528 for nodes in fileinfo[filename].values()
2529 for node_name in nodes) - ignore_nodes
2531 expected_nodes = nodefiles[filename] - ignore_nodes
2533 # Nodes missing file
2534 missing_file = expected_nodes - with_file
2536 if filename in files_opt:
2538 errorif(missing_file and missing_file != expected_nodes,
2539 constants.CV_ECLUSTERFILECHECK, None,
2540 "File %s is optional, but it must exist on all or no"
2541 " nodes (not found on %s)",
2542 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2544 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2545 "File %s is missing from node(s) %s", filename,
2546 utils.CommaJoin(utils.NiceSort(missing_file)))
2548 # Warn if a node has a file it shouldn't
2549 unexpected = with_file - expected_nodes
2551 constants.CV_ECLUSTERFILECHECK, None,
2552 "File %s should not exist on node(s) %s",
2553 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2555 # See if there are multiple versions of the file
2556 test = len(checksums) > 1
2558 variants = ["variant %s on %s" %
2559 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2560 for (idx, (checksum, nodes)) in
2561 enumerate(sorted(checksums.items()))]
2565 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2566 "File %s found with %s different checksums (%s)",
2567 filename, len(checksums), "; ".join(variants))
2569 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2571 """Verifies and the node DRBD status.
2573 @type ninfo: L{objects.Node}
2574 @param ninfo: the node to check
2575 @param nresult: the remote results for the node
2576 @param instanceinfo: the dict of instances
2577 @param drbd_helper: the configured DRBD usermode helper
2578 @param drbd_map: the DRBD map as returned by
2579 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2583 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2586 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2587 test = (helper_result == None)
2588 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2589 "no drbd usermode helper returned")
2591 status, payload = helper_result
2593 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2594 "drbd usermode helper check unsuccessful: %s", payload)
2595 test = status and (payload != drbd_helper)
2596 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2597 "wrong drbd usermode helper: %s", payload)
2599 # compute the DRBD minors
2601 for minor, instance in drbd_map[node].items():
2602 test = instance not in instanceinfo
2603 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2604 "ghost instance '%s' in temporary DRBD map", instance)
2605 # ghost instance should not be running, but otherwise we
2606 # don't give double warnings (both ghost instance and
2607 # unallocated minor in use)
2609 node_drbd[minor] = (instance, False)
2611 instance = instanceinfo[instance]
2612 node_drbd[minor] = (instance.name,
2613 instance.admin_state == constants.ADMINST_UP)
2615 # and now check them
2616 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2617 test = not isinstance(used_minors, (tuple, list))
2618 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2619 "cannot parse drbd status file: %s", str(used_minors))
2621 # we cannot check drbd status
2624 for minor, (iname, must_exist) in node_drbd.items():
2625 test = minor not in used_minors and must_exist
2626 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2627 "drbd minor %d of instance %s is not active", minor, iname)
2628 for minor in used_minors:
2629 test = minor not in node_drbd
2630 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2631 "unallocated drbd minor %d is in use", minor)
2633 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2634 """Builds the node OS structures.
2636 @type ninfo: L{objects.Node}
2637 @param ninfo: the node to check
2638 @param nresult: the remote results for the node
2639 @param nimg: the node image object
2643 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2645 remote_os = nresult.get(constants.NV_OSLIST, None)
2646 test = (not isinstance(remote_os, list) or
2647 not compat.all(isinstance(v, list) and len(v) == 7
2648 for v in remote_os))
2650 _ErrorIf(test, constants.CV_ENODEOS, node,
2651 "node hasn't returned valid OS data")
2660 for (name, os_path, status, diagnose,
2661 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2663 if name not in os_dict:
2666 # parameters is a list of lists instead of list of tuples due to
2667 # JSON lacking a real tuple type, fix it:
2668 parameters = [tuple(v) for v in parameters]
2669 os_dict[name].append((os_path, status, diagnose,
2670 set(variants), set(parameters), set(api_ver)))
2672 nimg.oslist = os_dict
2674 def _VerifyNodeOS(self, ninfo, nimg, base):
2675 """Verifies the node OS list.
2677 @type ninfo: L{objects.Node}
2678 @param ninfo: the node to check
2679 @param nimg: the node image object
2680 @param base: the 'template' node we match against (e.g. from the master)
2684 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2686 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2688 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2689 for os_name, os_data in nimg.oslist.items():
2690 assert os_data, "Empty OS status for OS %s?!" % os_name
2691 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2692 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2693 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2694 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2695 "OS '%s' has multiple entries (first one shadows the rest): %s",
2696 os_name, utils.CommaJoin([v[0] for v in os_data]))
2697 # comparisons with the 'base' image
2698 test = os_name not in base.oslist
2699 _ErrorIf(test, constants.CV_ENODEOS, node,
2700 "Extra OS %s not present on reference node (%s)",
2704 assert base.oslist[os_name], "Base node has empty OS status?"
2705 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2707 # base OS is invalid, skipping
2709 for kind, a, b in [("API version", f_api, b_api),
2710 ("variants list", f_var, b_var),
2711 ("parameters", beautify_params(f_param),
2712 beautify_params(b_param))]:
2713 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2714 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2715 kind, os_name, base.name,
2716 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2718 # check any missing OSes
2719 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2720 _ErrorIf(missing, constants.CV_ENODEOS, node,
2721 "OSes present on reference node %s but missing on this node: %s",
2722 base.name, utils.CommaJoin(missing))
2724 def _VerifyOob(self, ninfo, nresult):
2725 """Verifies out of band functionality of a node.
2727 @type ninfo: L{objects.Node}
2728 @param ninfo: the node to check
2729 @param nresult: the remote results for the node
2733 # We just have to verify the paths on master and/or master candidates
2734 # as the oob helper is invoked on the master
2735 if ((ninfo.master_candidate or ninfo.master_capable) and
2736 constants.NV_OOB_PATHS in nresult):
2737 for path_result in nresult[constants.NV_OOB_PATHS]:
2738 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2740 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2741 """Verifies and updates the node volume data.
2743 This function will update a L{NodeImage}'s internal structures
2744 with data from the remote call.
2746 @type ninfo: L{objects.Node}
2747 @param ninfo: the node to check
2748 @param nresult: the remote results for the node
2749 @param nimg: the node image object
2750 @param vg_name: the configured VG name
2754 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2756 nimg.lvm_fail = True
2757 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2760 elif isinstance(lvdata, basestring):
2761 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2762 utils.SafeEncode(lvdata))
2763 elif not isinstance(lvdata, dict):
2764 _ErrorIf(True, constants.CV_ENODELVM, node,
2765 "rpc call to node failed (lvlist)")
2767 nimg.volumes = lvdata
2768 nimg.lvm_fail = False
2770 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2771 """Verifies and updates the node instance list.
2773 If the listing was successful, then updates this node's instance
2774 list. Otherwise, it marks the RPC call as failed for the instance
2777 @type ninfo: L{objects.Node}
2778 @param ninfo: the node to check
2779 @param nresult: the remote results for the node
2780 @param nimg: the node image object
2783 idata = nresult.get(constants.NV_INSTANCELIST, None)
2784 test = not isinstance(idata, list)
2785 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2786 "rpc call to node failed (instancelist): %s",
2787 utils.SafeEncode(str(idata)))
2789 nimg.hyp_fail = True
2791 nimg.instances = idata
2793 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2794 """Verifies and computes a node information map
2796 @type ninfo: L{objects.Node}
2797 @param ninfo: the node to check
2798 @param nresult: the remote results for the node
2799 @param nimg: the node image object
2800 @param vg_name: the configured VG name
2804 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2806 # try to read free memory (from the hypervisor)
2807 hv_info = nresult.get(constants.NV_HVINFO, None)
2808 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2809 _ErrorIf(test, constants.CV_ENODEHV, node,
2810 "rpc call to node failed (hvinfo)")
2813 nimg.mfree = int(hv_info["memory_free"])
2814 except (ValueError, TypeError):
2815 _ErrorIf(True, constants.CV_ENODERPC, node,
2816 "node returned invalid nodeinfo, check hypervisor")
2818 # FIXME: devise a free space model for file based instances as well
2819 if vg_name is not None:
2820 test = (constants.NV_VGLIST not in nresult or
2821 vg_name not in nresult[constants.NV_VGLIST])
2822 _ErrorIf(test, constants.CV_ENODELVM, node,
2823 "node didn't return data for the volume group '%s'"
2824 " - it is either missing or broken", vg_name)
2827 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2828 except (ValueError, TypeError):
2829 _ErrorIf(True, constants.CV_ENODERPC, node,
2830 "node returned invalid LVM info, check LVM status")
2832 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2833 """Gets per-disk status information for all instances.
2835 @type nodelist: list of strings
2836 @param nodelist: Node names
2837 @type node_image: dict of (name, L{objects.Node})
2838 @param node_image: Node objects
2839 @type instanceinfo: dict of (name, L{objects.Instance})
2840 @param instanceinfo: Instance objects
2841 @rtype: {instance: {node: [(succes, payload)]}}
2842 @return: a dictionary of per-instance dictionaries with nodes as
2843 keys and disk information as values; the disk information is a
2844 list of tuples (success, payload)
2847 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2850 node_disks_devonly = {}
2851 diskless_instances = set()
2852 diskless = constants.DT_DISKLESS
2854 for nname in nodelist:
2855 node_instances = list(itertools.chain(node_image[nname].pinst,
2856 node_image[nname].sinst))
2857 diskless_instances.update(inst for inst in node_instances
2858 if instanceinfo[inst].disk_template == diskless)
2859 disks = [(inst, disk)
2860 for inst in node_instances
2861 for disk in instanceinfo[inst].disks]
2864 # No need to collect data
2867 node_disks[nname] = disks
2869 # Creating copies as SetDiskID below will modify the objects and that can
2870 # lead to incorrect data returned from nodes
2871 devonly = [dev.Copy() for (_, dev) in disks]
2874 self.cfg.SetDiskID(dev, nname)
2876 node_disks_devonly[nname] = devonly
2878 assert len(node_disks) == len(node_disks_devonly)
2880 # Collect data from all nodes with disks
2881 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2884 assert len(result) == len(node_disks)
2888 for (nname, nres) in result.items():
2889 disks = node_disks[nname]
2892 # No data from this node
2893 data = len(disks) * [(False, "node offline")]
2896 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2897 "while getting disk information: %s", msg)
2899 # No data from this node
2900 data = len(disks) * [(False, msg)]
2903 for idx, i in enumerate(nres.payload):
2904 if isinstance(i, (tuple, list)) and len(i) == 2:
2907 logging.warning("Invalid result from node %s, entry %d: %s",
2909 data.append((False, "Invalid result from the remote node"))
2911 for ((inst, _), status) in zip(disks, data):
2912 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2914 # Add empty entries for diskless instances.
2915 for inst in diskless_instances:
2916 assert inst not in instdisk
2919 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2920 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2921 compat.all(isinstance(s, (tuple, list)) and
2922 len(s) == 2 for s in statuses)
2923 for inst, nnames in instdisk.items()
2924 for nname, statuses in nnames.items())
2925 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2930 def _SshNodeSelector(group_uuid, all_nodes):
2931 """Create endless iterators for all potential SSH check hosts.
2934 nodes = [node for node in all_nodes
2935 if (node.group != group_uuid and
2937 keyfunc = operator.attrgetter("group")
2939 return map(itertools.cycle,
2940 [sorted(map(operator.attrgetter("name"), names))
2941 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2945 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2946 """Choose which nodes should talk to which other nodes.
2948 We will make nodes contact all nodes in their group, and one node from
2951 @warning: This algorithm has a known issue if one node group is much
2952 smaller than others (e.g. just one node). In such a case all other
2953 nodes will talk to the single node.
2956 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2957 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2959 return (online_nodes,
2960 dict((name, sorted([i.next() for i in sel]))
2961 for name in online_nodes))
2963 def BuildHooksEnv(self):
2966 Cluster-Verify hooks just ran in the post phase and their failure makes
2967 the output be logged in the verify output and the verification to fail.
2971 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2974 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2975 for node in self.my_node_info.values())
2979 def BuildHooksNodes(self):
2980 """Build hooks nodes.
2983 return ([], self.my_node_names)
2985 def Exec(self, feedback_fn):
2986 """Verify integrity of the node group, performing various test on nodes.
2989 # This method has too many local variables. pylint: disable=R0914
2990 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2992 if not self.my_node_names:
2994 feedback_fn("* Empty node group, skipping verification")
2998 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2999 verbose = self.op.verbose
3000 self._feedback_fn = feedback_fn
3002 vg_name = self.cfg.GetVGName()
3003 drbd_helper = self.cfg.GetDRBDHelper()
3004 cluster = self.cfg.GetClusterInfo()
3005 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3006 hypervisors = cluster.enabled_hypervisors
3007 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3009 i_non_redundant = [] # Non redundant instances
3010 i_non_a_balanced = [] # Non auto-balanced instances
3011 i_offline = 0 # Count of offline instances
3012 n_offline = 0 # Count of offline nodes
3013 n_drained = 0 # Count of nodes being drained
3014 node_vol_should = {}
3016 # FIXME: verify OS list
3019 filemap = _ComputeAncillaryFiles(cluster, False)
3021 # do local checksums
3022 master_node = self.master_node = self.cfg.GetMasterNode()
3023 master_ip = self.cfg.GetMasterIP()
3025 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3028 if self.cfg.GetUseExternalMipScript():
3029 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3031 node_verify_param = {
3032 constants.NV_FILELIST:
3033 utils.UniqueSequence(filename
3034 for files in filemap
3035 for filename in files),
3036 constants.NV_NODELIST:
3037 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3038 self.all_node_info.values()),
3039 constants.NV_HYPERVISOR: hypervisors,
3040 constants.NV_HVPARAMS:
3041 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3042 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3043 for node in node_data_list
3044 if not node.offline],
3045 constants.NV_INSTANCELIST: hypervisors,
3046 constants.NV_VERSION: None,
3047 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3048 constants.NV_NODESETUP: None,
3049 constants.NV_TIME: None,
3050 constants.NV_MASTERIP: (master_node, master_ip),
3051 constants.NV_OSLIST: None,
3052 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3053 constants.NV_USERSCRIPTS: user_scripts,
3056 if vg_name is not None:
3057 node_verify_param[constants.NV_VGLIST] = None
3058 node_verify_param[constants.NV_LVLIST] = vg_name
3059 node_verify_param[constants.NV_PVLIST] = [vg_name]
3060 node_verify_param[constants.NV_DRBDLIST] = None
3063 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3066 # FIXME: this needs to be changed per node-group, not cluster-wide
3068 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3069 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3070 bridges.add(default_nicpp[constants.NIC_LINK])
3071 for instance in self.my_inst_info.values():
3072 for nic in instance.nics:
3073 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3074 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3075 bridges.add(full_nic[constants.NIC_LINK])
3078 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3080 # Build our expected cluster state
3081 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3083 vm_capable=node.vm_capable))
3084 for node in node_data_list)
3088 for node in self.all_node_info.values():
3089 path = _SupportsOob(self.cfg, node)
3090 if path and path not in oob_paths:
3091 oob_paths.append(path)
3094 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3096 for instance in self.my_inst_names:
3097 inst_config = self.my_inst_info[instance]
3099 for nname in inst_config.all_nodes:
3100 if nname not in node_image:
3101 gnode = self.NodeImage(name=nname)
3102 gnode.ghost = (nname not in self.all_node_info)
3103 node_image[nname] = gnode
3105 inst_config.MapLVsByNode(node_vol_should)
3107 pnode = inst_config.primary_node
3108 node_image[pnode].pinst.append(instance)
3110 for snode in inst_config.secondary_nodes:
3111 nimg = node_image[snode]
3112 nimg.sinst.append(instance)
3113 if pnode not in nimg.sbp:
3114 nimg.sbp[pnode] = []
3115 nimg.sbp[pnode].append(instance)
3117 # At this point, we have the in-memory data structures complete,
3118 # except for the runtime information, which we'll gather next
3120 # Due to the way our RPC system works, exact response times cannot be
3121 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3122 # time before and after executing the request, we can at least have a time
3124 nvinfo_starttime = time.time()
3125 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3127 self.cfg.GetClusterName())
3128 nvinfo_endtime = time.time()
3130 if self.extra_lv_nodes and vg_name is not None:
3132 self.rpc.call_node_verify(self.extra_lv_nodes,
3133 {constants.NV_LVLIST: vg_name},
3134 self.cfg.GetClusterName())
3136 extra_lv_nvinfo = {}
3138 all_drbd_map = self.cfg.ComputeDRBDMap()
3140 feedback_fn("* Gathering disk information (%s nodes)" %
3141 len(self.my_node_names))
3142 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3145 feedback_fn("* Verifying configuration file consistency")
3147 # If not all nodes are being checked, we need to make sure the master node
3148 # and a non-checked vm_capable node are in the list.
3149 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3151 vf_nvinfo = all_nvinfo.copy()
3152 vf_node_info = list(self.my_node_info.values())
3153 additional_nodes = []
3154 if master_node not in self.my_node_info:
3155 additional_nodes.append(master_node)
3156 vf_node_info.append(self.all_node_info[master_node])
3157 # Add the first vm_capable node we find which is not included
3158 for node in absent_nodes:
3159 nodeinfo = self.all_node_info[node]
3160 if nodeinfo.vm_capable and not nodeinfo.offline:
3161 additional_nodes.append(node)
3162 vf_node_info.append(self.all_node_info[node])
3164 key = constants.NV_FILELIST
3165 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3166 {key: node_verify_param[key]},
3167 self.cfg.GetClusterName()))
3169 vf_nvinfo = all_nvinfo
3170 vf_node_info = self.my_node_info.values()
3172 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3174 feedback_fn("* Verifying node status")
3178 for node_i in node_data_list:
3180 nimg = node_image[node]
3184 feedback_fn("* Skipping offline node %s" % (node,))
3188 if node == master_node:
3190 elif node_i.master_candidate:
3191 ntype = "master candidate"
3192 elif node_i.drained:
3198 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3200 msg = all_nvinfo[node].fail_msg
3201 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3204 nimg.rpc_fail = True
3207 nresult = all_nvinfo[node].payload
3209 nimg.call_ok = self._VerifyNode(node_i, nresult)
3210 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3211 self._VerifyNodeNetwork(node_i, nresult)
3212 self._VerifyNodeUserScripts(node_i, nresult)
3213 self._VerifyOob(node_i, nresult)
3216 self._VerifyNodeLVM(node_i, nresult, vg_name)
3217 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3220 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3221 self._UpdateNodeInstances(node_i, nresult, nimg)
3222 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3223 self._UpdateNodeOS(node_i, nresult, nimg)
3225 if not nimg.os_fail:
3226 if refos_img is None:
3228 self._VerifyNodeOS(node_i, nimg, refos_img)
3229 self._VerifyNodeBridges(node_i, nresult, bridges)
3231 # Check whether all running instancies are primary for the node. (This
3232 # can no longer be done from _VerifyInstance below, since some of the
3233 # wrong instances could be from other node groups.)
3234 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3236 for inst in non_primary_inst:
3237 # FIXME: investigate best way to handle offline insts
3238 if inst.admin_state == constants.ADMINST_OFFLINE:
3240 feedback_fn("* Skipping offline instance %s" % inst.name)
3243 test = inst in self.all_inst_info
3244 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3245 "instance should not run on node %s", node_i.name)
3246 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3247 "node is running unknown instance %s", inst)
3249 for node, result in extra_lv_nvinfo.items():
3250 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3251 node_image[node], vg_name)
3253 feedback_fn("* Verifying instance status")
3254 for instance in self.my_inst_names:
3256 feedback_fn("* Verifying instance %s" % instance)
3257 inst_config = self.my_inst_info[instance]
3258 self._VerifyInstance(instance, inst_config, node_image,
3260 inst_nodes_offline = []
3262 pnode = inst_config.primary_node
3263 pnode_img = node_image[pnode]
3264 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3265 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3266 " primary node failed", instance)
3268 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3270 constants.CV_EINSTANCEBADNODE, instance,
3271 "instance is marked as running and lives on offline node %s",
3272 inst_config.primary_node)
3274 # If the instance is non-redundant we cannot survive losing its primary
3275 # node, so we are not N+1 compliant. On the other hand we have no disk
3276 # templates with more than one secondary so that situation is not well
3278 # FIXME: does not support file-backed instances
3279 if not inst_config.secondary_nodes:
3280 i_non_redundant.append(instance)
3282 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3283 constants.CV_EINSTANCELAYOUT,
3284 instance, "instance has multiple secondary nodes: %s",
3285 utils.CommaJoin(inst_config.secondary_nodes),
3286 code=self.ETYPE_WARNING)
3288 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3289 pnode = inst_config.primary_node
3290 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3291 instance_groups = {}
3293 for node in instance_nodes:
3294 instance_groups.setdefault(self.all_node_info[node].group,
3298 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3299 # Sort so that we always list the primary node first.
3300 for group, nodes in sorted(instance_groups.items(),
3301 key=lambda (_, nodes): pnode in nodes,
3304 self._ErrorIf(len(instance_groups) > 1,
3305 constants.CV_EINSTANCESPLITGROUPS,
3306 instance, "instance has primary and secondary nodes in"
3307 " different groups: %s", utils.CommaJoin(pretty_list),
3308 code=self.ETYPE_WARNING)
3310 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3311 i_non_a_balanced.append(instance)
3313 for snode in inst_config.secondary_nodes:
3314 s_img = node_image[snode]
3315 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3316 snode, "instance %s, connection to secondary node failed",
3320 inst_nodes_offline.append(snode)
3322 # warn that the instance lives on offline nodes
3323 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3324 "instance has offline secondary node(s) %s",
3325 utils.CommaJoin(inst_nodes_offline))
3326 # ... or ghost/non-vm_capable nodes
3327 for node in inst_config.all_nodes:
3328 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3329 instance, "instance lives on ghost node %s", node)
3330 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3331 instance, "instance lives on non-vm_capable node %s", node)
3333 feedback_fn("* Verifying orphan volumes")
3334 reserved = utils.FieldSet(*cluster.reserved_lvs)
3336 # We will get spurious "unknown volume" warnings if any node of this group
3337 # is secondary for an instance whose primary is in another group. To avoid
3338 # them, we find these instances and add their volumes to node_vol_should.
3339 for inst in self.all_inst_info.values():
3340 for secondary in inst.secondary_nodes:
3341 if (secondary in self.my_node_info
3342 and inst.name not in self.my_inst_info):
3343 inst.MapLVsByNode(node_vol_should)
3346 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3348 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3349 feedback_fn("* Verifying N+1 Memory redundancy")
3350 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3352 feedback_fn("* Other Notes")
3354 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3355 % len(i_non_redundant))
3357 if i_non_a_balanced:
3358 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3359 % len(i_non_a_balanced))
3362 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3365 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3368 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3372 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3373 """Analyze the post-hooks' result
3375 This method analyses the hook result, handles it, and sends some
3376 nicely-formatted feedback back to the user.
3378 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3379 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3380 @param hooks_results: the results of the multi-node hooks rpc call
3381 @param feedback_fn: function used send feedback back to the caller
3382 @param lu_result: previous Exec result
3383 @return: the new Exec result, based on the previous result
3387 # We only really run POST phase hooks, only for non-empty groups,
3388 # and are only interested in their results
3389 if not self.my_node_names:
3392 elif phase == constants.HOOKS_PHASE_POST:
3393 # Used to change hooks' output to proper indentation
3394 feedback_fn("* Hooks Results")
3395 assert hooks_results, "invalid result from hooks"
3397 for node_name in hooks_results:
3398 res = hooks_results[node_name]
3400 test = msg and not res.offline
3401 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3402 "Communication failure in hooks execution: %s", msg)
3403 if res.offline or msg:
3404 # No need to investigate payload if node is offline or gave
3407 for script, hkr, output in res.payload:
3408 test = hkr == constants.HKR_FAIL
3409 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3410 "Script %s failed, output:", script)
3412 output = self._HOOKS_INDENT_RE.sub(" ", output)
3413 feedback_fn("%s" % output)
3419 class LUClusterVerifyDisks(NoHooksLU):
3420 """Verifies the cluster disks status.
3425 def ExpandNames(self):
3426 self.share_locks = _ShareAll()
3427 self.needed_locks = {
3428 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3431 def Exec(self, feedback_fn):
3432 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3434 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3435 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3436 for group in group_names])
3439 class LUGroupVerifyDisks(NoHooksLU):
3440 """Verifies the status of all disks in a node group.
3445 def ExpandNames(self):
3446 # Raises errors.OpPrereqError on its own if group can't be found
3447 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3449 self.share_locks = _ShareAll()
3450 self.needed_locks = {
3451 locking.LEVEL_INSTANCE: [],
3452 locking.LEVEL_NODEGROUP: [],
3453 locking.LEVEL_NODE: [],
3456 def DeclareLocks(self, level):
3457 if level == locking.LEVEL_INSTANCE:
3458 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3460 # Lock instances optimistically, needs verification once node and group
3461 # locks have been acquired
3462 self.needed_locks[locking.LEVEL_INSTANCE] = \
3463 self.cfg.GetNodeGroupInstances(self.group_uuid)
3465 elif level == locking.LEVEL_NODEGROUP:
3466 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3468 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3469 set([self.group_uuid] +
3470 # Lock all groups used by instances optimistically; this requires
3471 # going via the node before it's locked, requiring verification
3474 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3475 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3477 elif level == locking.LEVEL_NODE:
3478 # This will only lock the nodes in the group to be verified which contain
3480 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3481 self._LockInstancesNodes()
3483 # Lock all nodes in group to be verified
3484 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3485 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3486 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3488 def CheckPrereq(self):
3489 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3490 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3491 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3493 assert self.group_uuid in owned_groups
3495 # Check if locked instances are still correct
3496 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3498 # Get instance information
3499 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3501 # Check if node groups for locked instances are still correct
3502 for (instance_name, inst) in self.instances.items():
3503 assert owned_nodes.issuperset(inst.all_nodes), \
3504 "Instance %s's nodes changed while we kept the lock" % instance_name
3506 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3509 assert self.group_uuid in inst_groups, \
3510 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3512 def Exec(self, feedback_fn):
3513 """Verify integrity of cluster disks.
3515 @rtype: tuple of three items
3516 @return: a tuple of (dict of node-to-node_error, list of instances
3517 which need activate-disks, dict of instance: (node, volume) for
3522 res_instances = set()
3525 nv_dict = _MapInstanceDisksToNodes([inst
3526 for inst in self.instances.values()
3527 if inst.admin_state == constants.ADMINST_UP])
3530 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3531 set(self.cfg.GetVmCapableNodeList()))
3533 node_lvs = self.rpc.call_lv_list(nodes, [])
3535 for (node, node_res) in node_lvs.items():
3536 if node_res.offline:
3539 msg = node_res.fail_msg
3541 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3542 res_nodes[node] = msg
3545 for lv_name, (_, _, lv_online) in node_res.payload.items():
3546 inst = nv_dict.pop((node, lv_name), None)
3547 if not (lv_online or inst is None):
3548 res_instances.add(inst)
3550 # any leftover items in nv_dict are missing LVs, let's arrange the data
3552 for key, inst in nv_dict.iteritems():
3553 res_missing.setdefault(inst, []).append(list(key))
3555 return (res_nodes, list(res_instances), res_missing)
3558 class LUClusterRepairDiskSizes(NoHooksLU):
3559 """Verifies the cluster disks sizes.
3564 def ExpandNames(self):
3565 if self.op.instances:
3566 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3567 self.needed_locks = {
3568 locking.LEVEL_NODE_RES: [],
3569 locking.LEVEL_INSTANCE: self.wanted_names,
3571 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3573 self.wanted_names = None
3574 self.needed_locks = {
3575 locking.LEVEL_NODE_RES: locking.ALL_SET,
3576 locking.LEVEL_INSTANCE: locking.ALL_SET,
3578 self.share_locks = {
3579 locking.LEVEL_NODE_RES: 1,
3580 locking.LEVEL_INSTANCE: 0,
3583 def DeclareLocks(self, level):
3584 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3585 self._LockInstancesNodes(primary_only=True, level=level)
3587 def CheckPrereq(self):
3588 """Check prerequisites.
3590 This only checks the optional instance list against the existing names.
3593 if self.wanted_names is None:
3594 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3596 self.wanted_instances = \
3597 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3599 def _EnsureChildSizes(self, disk):
3600 """Ensure children of the disk have the needed disk size.
3602 This is valid mainly for DRBD8 and fixes an issue where the
3603 children have smaller disk size.
3605 @param disk: an L{ganeti.objects.Disk} object
3608 if disk.dev_type == constants.LD_DRBD8:
3609 assert disk.children, "Empty children for DRBD8?"
3610 fchild = disk.children[0]
3611 mismatch = fchild.size < disk.size
3613 self.LogInfo("Child disk has size %d, parent %d, fixing",
3614 fchild.size, disk.size)
3615 fchild.size = disk.size
3617 # and we recurse on this child only, not on the metadev
3618 return self._EnsureChildSizes(fchild) or mismatch
3622 def Exec(self, feedback_fn):
3623 """Verify the size of cluster disks.
3626 # TODO: check child disks too
3627 # TODO: check differences in size between primary/secondary nodes
3629 for instance in self.wanted_instances:
3630 pnode = instance.primary_node
3631 if pnode not in per_node_disks:
3632 per_node_disks[pnode] = []
3633 for idx, disk in enumerate(instance.disks):
3634 per_node_disks[pnode].append((instance, idx, disk))
3636 assert not (frozenset(per_node_disks.keys()) -
3637 self.owned_locks(locking.LEVEL_NODE_RES)), \
3638 "Not owning correct locks"
3639 assert not self.owned_locks(locking.LEVEL_NODE)
3642 for node, dskl in per_node_disks.items():
3643 newl = [v[2].Copy() for v in dskl]
3645 self.cfg.SetDiskID(dsk, node)
3646 result = self.rpc.call_blockdev_getsize(node, newl)
3648 self.LogWarning("Failure in blockdev_getsize call to node"
3649 " %s, ignoring", node)
3651 if len(result.payload) != len(dskl):
3652 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3653 " result.payload=%s", node, len(dskl), result.payload)
3654 self.LogWarning("Invalid result from node %s, ignoring node results",
3657 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3659 self.LogWarning("Disk %d of instance %s did not return size"
3660 " information, ignoring", idx, instance.name)
3662 if not isinstance(size, (int, long)):
3663 self.LogWarning("Disk %d of instance %s did not return valid"
3664 " size information, ignoring", idx, instance.name)
3667 if size != disk.size:
3668 self.LogInfo("Disk %d of instance %s has mismatched size,"
3669 " correcting: recorded %d, actual %d", idx,
3670 instance.name, disk.size, size)
3672 self.cfg.Update(instance, feedback_fn)
3673 changed.append((instance.name, idx, size))
3674 if self._EnsureChildSizes(disk):
3675 self.cfg.Update(instance, feedback_fn)
3676 changed.append((instance.name, idx, disk.size))
3680 class LUClusterRename(LogicalUnit):
3681 """Rename the cluster.
3684 HPATH = "cluster-rename"
3685 HTYPE = constants.HTYPE_CLUSTER
3687 def BuildHooksEnv(self):
3692 "OP_TARGET": self.cfg.GetClusterName(),
3693 "NEW_NAME": self.op.name,
3696 def BuildHooksNodes(self):
3697 """Build hooks nodes.
3700 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3702 def CheckPrereq(self):
3703 """Verify that the passed name is a valid one.
3706 hostname = netutils.GetHostname(name=self.op.name,
3707 family=self.cfg.GetPrimaryIPFamily())
3709 new_name = hostname.name
3710 self.ip = new_ip = hostname.ip
3711 old_name = self.cfg.GetClusterName()
3712 old_ip = self.cfg.GetMasterIP()
3713 if new_name == old_name and new_ip == old_ip:
3714 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3715 " cluster has changed",
3717 if new_ip != old_ip:
3718 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3719 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3720 " reachable on the network" %
3721 new_ip, errors.ECODE_NOTUNIQUE)
3723 self.op.name = new_name
3725 def Exec(self, feedback_fn):
3726 """Rename the cluster.
3729 clustername = self.op.name
3732 # shutdown the master IP
3733 master_params = self.cfg.GetMasterNetworkParameters()
3734 ems = self.cfg.GetUseExternalMipScript()
3735 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3737 result.Raise("Could not disable the master role")
3740 cluster = self.cfg.GetClusterInfo()
3741 cluster.cluster_name = clustername
3742 cluster.master_ip = new_ip
3743 self.cfg.Update(cluster, feedback_fn)
3745 # update the known hosts file
3746 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3747 node_list = self.cfg.GetOnlineNodeList()
3749 node_list.remove(master_params.name)
3752 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3754 master_params.ip = new_ip
3755 result = self.rpc.call_node_activate_master_ip(master_params.name,
3757 msg = result.fail_msg
3759 self.LogWarning("Could not re-enable the master role on"
3760 " the master, please restart manually: %s", msg)
3765 def _ValidateNetmask(cfg, netmask):
3766 """Checks if a netmask is valid.
3768 @type cfg: L{config.ConfigWriter}
3769 @param cfg: The cluster configuration
3771 @param netmask: the netmask to be verified
3772 @raise errors.OpPrereqError: if the validation fails
3775 ip_family = cfg.GetPrimaryIPFamily()
3777 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3778 except errors.ProgrammerError:
3779 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3781 if not ipcls.ValidateNetmask(netmask):
3782 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3786 class LUClusterSetParams(LogicalUnit):
3787 """Change the parameters of the cluster.
3790 HPATH = "cluster-modify"
3791 HTYPE = constants.HTYPE_CLUSTER
3794 def CheckArguments(self):
3798 if self.op.uid_pool:
3799 uidpool.CheckUidPool(self.op.uid_pool)
3801 if self.op.add_uids:
3802 uidpool.CheckUidPool(self.op.add_uids)
3804 if self.op.remove_uids:
3805 uidpool.CheckUidPool(self.op.remove_uids)
3807 if self.op.master_netmask is not None:
3808 _ValidateNetmask(self.cfg, self.op.master_netmask)
3810 if self.op.diskparams:
3811 for dt_params in self.op.diskparams.values():
3812 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3814 def ExpandNames(self):
3815 # FIXME: in the future maybe other cluster params won't require checking on
3816 # all nodes to be modified.
3817 self.needed_locks = {
3818 locking.LEVEL_NODE: locking.ALL_SET,
3819 locking.LEVEL_INSTANCE: locking.ALL_SET,
3820 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3822 self.share_locks = {
3823 locking.LEVEL_NODE: 1,
3824 locking.LEVEL_INSTANCE: 1,
3825 locking.LEVEL_NODEGROUP: 1,
3828 def BuildHooksEnv(self):
3833 "OP_TARGET": self.cfg.GetClusterName(),
3834 "NEW_VG_NAME": self.op.vg_name,
3837 def BuildHooksNodes(self):
3838 """Build hooks nodes.
3841 mn = self.cfg.GetMasterNode()
3844 def CheckPrereq(self):
3845 """Check prerequisites.
3847 This checks whether the given params don't conflict and
3848 if the given volume group is valid.
3851 if self.op.vg_name is not None and not self.op.vg_name:
3852 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3853 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3854 " instances exist", errors.ECODE_INVAL)
3856 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3857 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3858 raise errors.OpPrereqError("Cannot disable drbd helper while"
3859 " drbd-based instances exist",
3862 node_list = self.owned_locks(locking.LEVEL_NODE)
3864 # if vg_name not None, checks given volume group on all nodes
3866 vglist = self.rpc.call_vg_list(node_list)
3867 for node in node_list:
3868 msg = vglist[node].fail_msg
3870 # ignoring down node
3871 self.LogWarning("Error while gathering data on node %s"
3872 " (ignoring node): %s", node, msg)
3874 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3876 constants.MIN_VG_SIZE)
3878 raise errors.OpPrereqError("Error on node '%s': %s" %
3879 (node, vgstatus), errors.ECODE_ENVIRON)
3881 if self.op.drbd_helper:
3882 # checks given drbd helper on all nodes
3883 helpers = self.rpc.call_drbd_helper(node_list)
3884 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3886 self.LogInfo("Not checking drbd helper on offline node %s", node)
3888 msg = helpers[node].fail_msg
3890 raise errors.OpPrereqError("Error checking drbd helper on node"
3891 " '%s': %s" % (node, msg),
3892 errors.ECODE_ENVIRON)
3893 node_helper = helpers[node].payload
3894 if node_helper != self.op.drbd_helper:
3895 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3896 (node, node_helper), errors.ECODE_ENVIRON)
3898 self.cluster = cluster = self.cfg.GetClusterInfo()
3899 # validate params changes
3900 if self.op.beparams:
3901 objects.UpgradeBeParams(self.op.beparams)
3902 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3903 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3905 if self.op.ndparams:
3906 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3907 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3909 # TODO: we need a more general way to handle resetting
3910 # cluster-level parameters to default values
3911 if self.new_ndparams["oob_program"] == "":
3912 self.new_ndparams["oob_program"] = \
3913 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3915 if self.op.hv_state:
3916 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3917 self.cluster.hv_state_static)
3918 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3919 for hv, values in new_hv_state.items())
3921 if self.op.disk_state:
3922 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3923 self.cluster.disk_state_static)
3924 self.new_disk_state = \
3925 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3926 for name, values in svalues.items()))
3927 for storage, svalues in new_disk_state.items())
3930 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3933 all_instances = self.cfg.GetAllInstancesInfo().values()
3935 for group in self.cfg.GetAllNodeGroupsInfo().values():
3936 instances = frozenset([inst for inst in all_instances
3937 if compat.any(node in group.members
3938 for node in inst.all_nodes)])
3939 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3940 new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3942 new_ipolicy, instances)
3944 violations.update(new)
3947 self.LogWarning("After the ipolicy change the following instances"
3948 " violate them: %s",
3949 utils.CommaJoin(violations))
3951 if self.op.nicparams:
3952 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3953 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3954 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3957 # check all instances for consistency
3958 for instance in self.cfg.GetAllInstancesInfo().values():
3959 for nic_idx, nic in enumerate(instance.nics):
3960 params_copy = copy.deepcopy(nic.nicparams)
3961 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3963 # check parameter syntax
3965 objects.NIC.CheckParameterSyntax(params_filled)
3966 except errors.ConfigurationError, err:
3967 nic_errors.append("Instance %s, nic/%d: %s" %
3968 (instance.name, nic_idx, err))
3970 # if we're moving instances to routed, check that they have an ip
3971 target_mode = params_filled[constants.NIC_MODE]
3972 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3973 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3974 " address" % (instance.name, nic_idx))
3976 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3977 "\n".join(nic_errors))
3979 # hypervisor list/parameters
3980 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3981 if self.op.hvparams:
3982 for hv_name, hv_dict in self.op.hvparams.items():
3983 if hv_name not in self.new_hvparams:
3984 self.new_hvparams[hv_name] = hv_dict
3986 self.new_hvparams[hv_name].update(hv_dict)
3988 # disk template parameters
3989 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3990 if self.op.diskparams:
3991 for dt_name, dt_params in self.op.diskparams.items():
3992 if dt_name not in self.op.diskparams:
3993 self.new_diskparams[dt_name] = dt_params
3995 self.new_diskparams[dt_name].update(dt_params)
3997 # os hypervisor parameters
3998 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4000 for os_name, hvs in self.op.os_hvp.items():
4001 if os_name not in self.new_os_hvp:
4002 self.new_os_hvp[os_name] = hvs
4004 for hv_name, hv_dict in hvs.items():
4005 if hv_name not in self.new_os_hvp[os_name]:
4006 self.new_os_hvp[os_name][hv_name] = hv_dict
4008 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4011 self.new_osp = objects.FillDict(cluster.osparams, {})
4012 if self.op.osparams:
4013 for os_name, osp in self.op.osparams.items():
4014 if os_name not in self.new_osp:
4015 self.new_osp[os_name] = {}
4017 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4020 if not self.new_osp[os_name]:
4021 # we removed all parameters
4022 del self.new_osp[os_name]
4024 # check the parameter validity (remote check)
4025 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4026 os_name, self.new_osp[os_name])
4028 # changes to the hypervisor list
4029 if self.op.enabled_hypervisors is not None:
4030 self.hv_list = self.op.enabled_hypervisors
4031 for hv in self.hv_list:
4032 # if the hypervisor doesn't already exist in the cluster
4033 # hvparams, we initialize it to empty, and then (in both
4034 # cases) we make sure to fill the defaults, as we might not
4035 # have a complete defaults list if the hypervisor wasn't
4037 if hv not in new_hvp:
4039 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4040 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4042 self.hv_list = cluster.enabled_hypervisors
4044 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4045 # either the enabled list has changed, or the parameters have, validate
4046 for hv_name, hv_params in self.new_hvparams.items():
4047 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4048 (self.op.enabled_hypervisors and
4049 hv_name in self.op.enabled_hypervisors)):
4050 # either this is a new hypervisor, or its parameters have changed
4051 hv_class = hypervisor.GetHypervisor(hv_name)
4052 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4053 hv_class.CheckParameterSyntax(hv_params)
4054 _CheckHVParams(self, node_list, hv_name, hv_params)
4057 # no need to check any newly-enabled hypervisors, since the
4058 # defaults have already been checked in the above code-block
4059 for os_name, os_hvp in self.new_os_hvp.items():
4060 for hv_name, hv_params in os_hvp.items():
4061 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4062 # we need to fill in the new os_hvp on top of the actual hv_p
4063 cluster_defaults = self.new_hvparams.get(hv_name, {})
4064 new_osp = objects.FillDict(cluster_defaults, hv_params)
4065 hv_class = hypervisor.GetHypervisor(hv_name)
4066 hv_class.CheckParameterSyntax(new_osp)
4067 _CheckHVParams(self, node_list, hv_name, new_osp)
4069 if self.op.default_iallocator:
4070 alloc_script = utils.FindFile(self.op.default_iallocator,
4071 constants.IALLOCATOR_SEARCH_PATH,
4073 if alloc_script is None:
4074 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4075 " specified" % self.op.default_iallocator,
4078 def Exec(self, feedback_fn):
4079 """Change the parameters of the cluster.
4082 if self.op.vg_name is not None:
4083 new_volume = self.op.vg_name
4086 if new_volume != self.cfg.GetVGName():
4087 self.cfg.SetVGName(new_volume)
4089 feedback_fn("Cluster LVM configuration already in desired"
4090 " state, not changing")
4091 if self.op.drbd_helper is not None:
4092 new_helper = self.op.drbd_helper
4095 if new_helper != self.cfg.GetDRBDHelper():
4096 self.cfg.SetDRBDHelper(new_helper)
4098 feedback_fn("Cluster DRBD helper already in desired state,"
4100 if self.op.hvparams:
4101 self.cluster.hvparams = self.new_hvparams
4103 self.cluster.os_hvp = self.new_os_hvp
4104 if self.op.enabled_hypervisors is not None:
4105 self.cluster.hvparams = self.new_hvparams
4106 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4107 if self.op.beparams:
4108 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4109 if self.op.nicparams:
4110 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4112 self.cluster.ipolicy = self.new_ipolicy
4113 if self.op.osparams:
4114 self.cluster.osparams = self.new_osp
4115 if self.op.ndparams:
4116 self.cluster.ndparams = self.new_ndparams
4117 if self.op.diskparams:
4118 self.cluster.diskparams = self.new_diskparams
4119 if self.op.hv_state:
4120 self.cluster.hv_state_static = self.new_hv_state
4121 if self.op.disk_state:
4122 self.cluster.disk_state_static = self.new_disk_state
4124 if self.op.candidate_pool_size is not None:
4125 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4126 # we need to update the pool size here, otherwise the save will fail
4127 _AdjustCandidatePool(self, [])
4129 if self.op.maintain_node_health is not None:
4130 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4131 feedback_fn("Note: CONFD was disabled at build time, node health"
4132 " maintenance is not useful (still enabling it)")
4133 self.cluster.maintain_node_health = self.op.maintain_node_health
4135 if self.op.prealloc_wipe_disks is not None:
4136 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4138 if self.op.add_uids is not None:
4139 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4141 if self.op.remove_uids is not None:
4142 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4144 if self.op.uid_pool is not None:
4145 self.cluster.uid_pool = self.op.uid_pool
4147 if self.op.default_iallocator is not None:
4148 self.cluster.default_iallocator = self.op.default_iallocator
4150 if self.op.reserved_lvs is not None:
4151 self.cluster.reserved_lvs = self.op.reserved_lvs
4153 if self.op.use_external_mip_script is not None:
4154 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4156 def helper_os(aname, mods, desc):
4158 lst = getattr(self.cluster, aname)
4159 for key, val in mods:
4160 if key == constants.DDM_ADD:
4162 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4165 elif key == constants.DDM_REMOVE:
4169 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4171 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4173 if self.op.hidden_os:
4174 helper_os("hidden_os", self.op.hidden_os, "hidden")
4176 if self.op.blacklisted_os:
4177 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4179 if self.op.master_netdev:
4180 master_params = self.cfg.GetMasterNetworkParameters()
4181 ems = self.cfg.GetUseExternalMipScript()
4182 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4183 self.cluster.master_netdev)
4184 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4186 result.Raise("Could not disable the master ip")
4187 feedback_fn("Changing master_netdev from %s to %s" %
4188 (master_params.netdev, self.op.master_netdev))
4189 self.cluster.master_netdev = self.op.master_netdev
4191 if self.op.master_netmask:
4192 master_params = self.cfg.GetMasterNetworkParameters()
4193 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4194 result = self.rpc.call_node_change_master_netmask(master_params.name,
4195 master_params.netmask,
4196 self.op.master_netmask,
4198 master_params.netdev)
4200 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4203 self.cluster.master_netmask = self.op.master_netmask
4205 self.cfg.Update(self.cluster, feedback_fn)
4207 if self.op.master_netdev:
4208 master_params = self.cfg.GetMasterNetworkParameters()
4209 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4210 self.op.master_netdev)
4211 ems = self.cfg.GetUseExternalMipScript()
4212 result = self.rpc.call_node_activate_master_ip(master_params.name,
4215 self.LogWarning("Could not re-enable the master ip on"
4216 " the master, please restart manually: %s",
4220 def _UploadHelper(lu, nodes, fname):
4221 """Helper for uploading a file and showing warnings.
4224 if os.path.exists(fname):
4225 result = lu.rpc.call_upload_file(nodes, fname)
4226 for to_node, to_result in result.items():
4227 msg = to_result.fail_msg
4229 msg = ("Copy of file %s to node %s failed: %s" %
4230 (fname, to_node, msg))
4231 lu.proc.LogWarning(msg)
4234 def _ComputeAncillaryFiles(cluster, redist):
4235 """Compute files external to Ganeti which need to be consistent.
4237 @type redist: boolean
4238 @param redist: Whether to include files which need to be redistributed
4241 # Compute files for all nodes
4243 constants.SSH_KNOWN_HOSTS_FILE,
4244 constants.CONFD_HMAC_KEY,
4245 constants.CLUSTER_DOMAIN_SECRET_FILE,
4246 constants.SPICE_CERT_FILE,
4247 constants.SPICE_CACERT_FILE,
4248 constants.RAPI_USERS_FILE,
4252 files_all.update(constants.ALL_CERT_FILES)
4253 files_all.update(ssconf.SimpleStore().GetFileList())
4255 # we need to ship at least the RAPI certificate
4256 files_all.add(constants.RAPI_CERT_FILE)
4258 if cluster.modify_etc_hosts:
4259 files_all.add(constants.ETC_HOSTS)
4261 # Files which are optional, these must:
4262 # - be present in one other category as well
4263 # - either exist or not exist on all nodes of that category (mc, vm all)
4265 constants.RAPI_USERS_FILE,
4268 # Files which should only be on master candidates
4272 files_mc.add(constants.CLUSTER_CONF_FILE)
4274 # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4276 files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4278 # Files which should only be on VM-capable nodes
4279 files_vm = set(filename
4280 for hv_name in cluster.enabled_hypervisors
4281 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4283 files_opt |= set(filename
4284 for hv_name in cluster.enabled_hypervisors
4285 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4287 # Filenames in each category must be unique
4288 all_files_set = files_all | files_mc | files_vm
4289 assert (len(all_files_set) ==
4290 sum(map(len, [files_all, files_mc, files_vm]))), \
4291 "Found file listed in more than one file list"
4293 # Optional files must be present in one other category
4294 assert all_files_set.issuperset(files_opt), \
4295 "Optional file not in a different required list"
4297 return (files_all, files_opt, files_mc, files_vm)
4300 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4301 """Distribute additional files which are part of the cluster configuration.
4303 ConfigWriter takes care of distributing the config and ssconf files, but
4304 there are more files which should be distributed to all nodes. This function
4305 makes sure those are copied.
4307 @param lu: calling logical unit
4308 @param additional_nodes: list of nodes not in the config to distribute to
4309 @type additional_vm: boolean
4310 @param additional_vm: whether the additional nodes are vm-capable or not
4313 # Gather target nodes
4314 cluster = lu.cfg.GetClusterInfo()
4315 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4317 online_nodes = lu.cfg.GetOnlineNodeList()
4318 vm_nodes = lu.cfg.GetVmCapableNodeList()
4320 if additional_nodes is not None:
4321 online_nodes.extend(additional_nodes)
4323 vm_nodes.extend(additional_nodes)
4325 # Never distribute to master node
4326 for nodelist in [online_nodes, vm_nodes]:
4327 if master_info.name in nodelist:
4328 nodelist.remove(master_info.name)
4331 (files_all, _, files_mc, files_vm) = \
4332 _ComputeAncillaryFiles(cluster, True)
4334 # Never re-distribute configuration file from here
4335 assert not (constants.CLUSTER_CONF_FILE in files_all or
4336 constants.CLUSTER_CONF_FILE in files_vm)
4337 assert not files_mc, "Master candidates not handled in this function"
4340 (online_nodes, files_all),
4341 (vm_nodes, files_vm),
4345 for (node_list, files) in filemap:
4347 _UploadHelper(lu, node_list, fname)
4350 class LUClusterRedistConf(NoHooksLU):
4351 """Force the redistribution of cluster configuration.
4353 This is a very simple LU.
4358 def ExpandNames(self):
4359 self.needed_locks = {
4360 locking.LEVEL_NODE: locking.ALL_SET,
4362 self.share_locks[locking.LEVEL_NODE] = 1
4364 def Exec(self, feedback_fn):
4365 """Redistribute the configuration.
4368 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4369 _RedistributeAncillaryFiles(self)
4372 class LUClusterActivateMasterIp(NoHooksLU):
4373 """Activate the master IP on the master node.
4376 def Exec(self, feedback_fn):
4377 """Activate the master IP.
4380 master_params = self.cfg.GetMasterNetworkParameters()
4381 ems = self.cfg.GetUseExternalMipScript()
4382 result = self.rpc.call_node_activate_master_ip(master_params.name,
4384 result.Raise("Could not activate the master IP")
4387 class LUClusterDeactivateMasterIp(NoHooksLU):
4388 """Deactivate the master IP on the master node.
4391 def Exec(self, feedback_fn):
4392 """Deactivate the master IP.
4395 master_params = self.cfg.GetMasterNetworkParameters()
4396 ems = self.cfg.GetUseExternalMipScript()
4397 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4399 result.Raise("Could not deactivate the master IP")
4402 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4403 """Sleep and poll for an instance's disk to sync.
4406 if not instance.disks or disks is not None and not disks:
4409 disks = _ExpandCheckDisks(instance, disks)
4412 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4414 node = instance.primary_node
4417 lu.cfg.SetDiskID(dev, node)
4419 # TODO: Convert to utils.Retry
4422 degr_retries = 10 # in seconds, as we sleep 1 second each time
4426 cumul_degraded = False
4427 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4428 msg = rstats.fail_msg
4430 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4433 raise errors.RemoteError("Can't contact node %s for mirror data,"
4434 " aborting." % node)
4437 rstats = rstats.payload
4439 for i, mstat in enumerate(rstats):
4441 lu.LogWarning("Can't compute data for node %s/%s",
4442 node, disks[i].iv_name)
4445 cumul_degraded = (cumul_degraded or
4446 (mstat.is_degraded and mstat.sync_percent is None))
4447 if mstat.sync_percent is not None:
4449 if mstat.estimated_time is not None:
4450 rem_time = ("%s remaining (estimated)" %
4451 utils.FormatSeconds(mstat.estimated_time))
4452 max_time = mstat.estimated_time
4454 rem_time = "no time estimate"
4455 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4456 (disks[i].iv_name, mstat.sync_percent, rem_time))
4458 # if we're done but degraded, let's do a few small retries, to
4459 # make sure we see a stable and not transient situation; therefore
4460 # we force restart of the loop
4461 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4462 logging.info("Degraded disks found, %d retries left", degr_retries)
4470 time.sleep(min(60, max_time))
4473 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4474 return not cumul_degraded
4477 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4478 """Check that mirrors are not degraded.
4480 The ldisk parameter, if True, will change the test from the
4481 is_degraded attribute (which represents overall non-ok status for
4482 the device(s)) to the ldisk (representing the local storage status).
4485 lu.cfg.SetDiskID(dev, node)
4489 if on_primary or dev.AssembleOnSecondary():
4490 rstats = lu.rpc.call_blockdev_find(node, dev)
4491 msg = rstats.fail_msg
4493 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4495 elif not rstats.payload:
4496 lu.LogWarning("Can't find disk on node %s", node)
4500 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4502 result = result and not rstats.payload.is_degraded
4505 for child in dev.children:
4506 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4511 class LUOobCommand(NoHooksLU):
4512 """Logical unit for OOB handling.
4516 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4518 def ExpandNames(self):
4519 """Gather locks we need.
4522 if self.op.node_names:
4523 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4524 lock_names = self.op.node_names
4526 lock_names = locking.ALL_SET
4528 self.needed_locks = {
4529 locking.LEVEL_NODE: lock_names,
4532 def CheckPrereq(self):
4533 """Check prerequisites.
4536 - the node exists in the configuration
4539 Any errors are signaled by raising errors.OpPrereqError.
4543 self.master_node = self.cfg.GetMasterNode()
4545 assert self.op.power_delay >= 0.0
4547 if self.op.node_names:
4548 if (self.op.command in self._SKIP_MASTER and
4549 self.master_node in self.op.node_names):
4550 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4551 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4553 if master_oob_handler:
4554 additional_text = ("run '%s %s %s' if you want to operate on the"
4555 " master regardless") % (master_oob_handler,
4559 additional_text = "it does not support out-of-band operations"
4561 raise errors.OpPrereqError(("Operating on the master node %s is not"
4562 " allowed for %s; %s") %
4563 (self.master_node, self.op.command,
4564 additional_text), errors.ECODE_INVAL)
4566 self.op.node_names = self.cfg.GetNodeList()
4567 if self.op.command in self._SKIP_MASTER:
4568 self.op.node_names.remove(self.master_node)
4570 if self.op.command in self._SKIP_MASTER:
4571 assert self.master_node not in self.op.node_names
4573 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4575 raise errors.OpPrereqError("Node %s not found" % node_name,
4578 self.nodes.append(node)
4580 if (not self.op.ignore_status and
4581 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4582 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4583 " not marked offline") % node_name,
4586 def Exec(self, feedback_fn):
4587 """Execute OOB and return result if we expect any.
4590 master_node = self.master_node
4593 for idx, node in enumerate(utils.NiceSort(self.nodes,
4594 key=lambda node: node.name)):
4595 node_entry = [(constants.RS_NORMAL, node.name)]
4596 ret.append(node_entry)
4598 oob_program = _SupportsOob(self.cfg, node)
4601 node_entry.append((constants.RS_UNAVAIL, None))
4604 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4605 self.op.command, oob_program, node.name)
4606 result = self.rpc.call_run_oob(master_node, oob_program,
4607 self.op.command, node.name,
4611 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4612 node.name, result.fail_msg)
4613 node_entry.append((constants.RS_NODATA, None))
4616 self._CheckPayload(result)
4617 except errors.OpExecError, err:
4618 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4620 node_entry.append((constants.RS_NODATA, None))
4622 if self.op.command == constants.OOB_HEALTH:
4623 # For health we should log important events
4624 for item, status in result.payload:
4625 if status in [constants.OOB_STATUS_WARNING,
4626 constants.OOB_STATUS_CRITICAL]:
4627 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4628 item, node.name, status)
4630 if self.op.command == constants.OOB_POWER_ON:
4632 elif self.op.command == constants.OOB_POWER_OFF:
4633 node.powered = False
4634 elif self.op.command == constants.OOB_POWER_STATUS:
4635 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4636 if powered != node.powered:
4637 logging.warning(("Recorded power state (%s) of node '%s' does not"
4638 " match actual power state (%s)"), node.powered,
4641 # For configuration changing commands we should update the node
4642 if self.op.command in (constants.OOB_POWER_ON,
4643 constants.OOB_POWER_OFF):
4644 self.cfg.Update(node, feedback_fn)
4646 node_entry.append((constants.RS_NORMAL, result.payload))
4648 if (self.op.command == constants.OOB_POWER_ON and
4649 idx < len(self.nodes) - 1):
4650 time.sleep(self.op.power_delay)
4654 def _CheckPayload(self, result):
4655 """Checks if the payload is valid.
4657 @param result: RPC result
4658 @raises errors.OpExecError: If payload is not valid
4662 if self.op.command == constants.OOB_HEALTH:
4663 if not isinstance(result.payload, list):
4664 errs.append("command 'health' is expected to return a list but got %s" %
4665 type(result.payload))
4667 for item, status in result.payload:
4668 if status not in constants.OOB_STATUSES:
4669 errs.append("health item '%s' has invalid status '%s'" %
4672 if self.op.command == constants.OOB_POWER_STATUS:
4673 if not isinstance(result.payload, dict):
4674 errs.append("power-status is expected to return a dict but got %s" %
4675 type(result.payload))
4677 if self.op.command in [
4678 constants.OOB_POWER_ON,
4679 constants.OOB_POWER_OFF,
4680 constants.OOB_POWER_CYCLE,
4682 if result.payload is not None:
4683 errs.append("%s is expected to not return payload but got '%s'" %
4684 (self.op.command, result.payload))
4687 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4688 utils.CommaJoin(errs))
4691 class _OsQuery(_QueryBase):
4692 FIELDS = query.OS_FIELDS
4694 def ExpandNames(self, lu):
4695 # Lock all nodes in shared mode
4696 # Temporary removal of locks, should be reverted later
4697 # TODO: reintroduce locks when they are lighter-weight
4698 lu.needed_locks = {}
4699 #self.share_locks[locking.LEVEL_NODE] = 1
4700 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4702 # The following variables interact with _QueryBase._GetNames
4704 self.wanted = self.names
4706 self.wanted = locking.ALL_SET
4708 self.do_locking = self.use_locking
4710 def DeclareLocks(self, lu, level):
4714 def _DiagnoseByOS(rlist):
4715 """Remaps a per-node return list into an a per-os per-node dictionary
4717 @param rlist: a map with node names as keys and OS objects as values
4720 @return: a dictionary with osnames as keys and as value another
4721 map, with nodes as keys and tuples of (path, status, diagnose,
4722 variants, parameters, api_versions) as values, eg::
4724 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4725 (/srv/..., False, "invalid api")],
4726 "node2": [(/srv/..., True, "", [], [])]}
4731 # we build here the list of nodes that didn't fail the RPC (at RPC
4732 # level), so that nodes with a non-responding node daemon don't
4733 # make all OSes invalid
4734 good_nodes = [node_name for node_name in rlist
4735 if not rlist[node_name].fail_msg]
4736 for node_name, nr in rlist.items():
4737 if nr.fail_msg or not nr.payload:
4739 for (name, path, status, diagnose, variants,
4740 params, api_versions) in nr.payload:
4741 if name not in all_os:
4742 # build a list of nodes for this os containing empty lists
4743 # for each node in node_list
4745 for nname in good_nodes:
4746 all_os[name][nname] = []
4747 # convert params from [name, help] to (name, help)
4748 params = [tuple(v) for v in params]
4749 all_os[name][node_name].append((path, status, diagnose,
4750 variants, params, api_versions))
4753 def _GetQueryData(self, lu):
4754 """Computes the list of nodes and their attributes.
4757 # Locking is not used
4758 assert not (compat.any(lu.glm.is_owned(level)
4759 for level in locking.LEVELS
4760 if level != locking.LEVEL_CLUSTER) or
4761 self.do_locking or self.use_locking)
4763 valid_nodes = [node.name
4764 for node in lu.cfg.GetAllNodesInfo().values()
4765 if not node.offline and node.vm_capable]
4766 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4767 cluster = lu.cfg.GetClusterInfo()
4771 for (os_name, os_data) in pol.items():
4772 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4773 hidden=(os_name in cluster.hidden_os),
4774 blacklisted=(os_name in cluster.blacklisted_os))
4778 api_versions = set()
4780 for idx, osl in enumerate(os_data.values()):
4781 info.valid = bool(info.valid and osl and osl[0][1])
4785 (node_variants, node_params, node_api) = osl[0][3:6]
4788 variants.update(node_variants)
4789 parameters.update(node_params)
4790 api_versions.update(node_api)
4792 # Filter out inconsistent values
4793 variants.intersection_update(node_variants)
4794 parameters.intersection_update(node_params)
4795 api_versions.intersection_update(node_api)
4797 info.variants = list(variants)
4798 info.parameters = list(parameters)
4799 info.api_versions = list(api_versions)
4801 data[os_name] = info
4803 # Prepare data in requested order
4804 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4808 class LUOsDiagnose(NoHooksLU):
4809 """Logical unit for OS diagnose/query.
4815 def _BuildFilter(fields, names):
4816 """Builds a filter for querying OSes.
4819 name_filter = qlang.MakeSimpleFilter("name", names)
4821 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4822 # respective field is not requested
4823 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4824 for fname in ["hidden", "blacklisted"]
4825 if fname not in fields]
4826 if "valid" not in fields:
4827 status_filter.append([qlang.OP_TRUE, "valid"])
4830 status_filter.insert(0, qlang.OP_AND)
4832 status_filter = None
4834 if name_filter and status_filter:
4835 return [qlang.OP_AND, name_filter, status_filter]
4839 return status_filter
4841 def CheckArguments(self):
4842 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4843 self.op.output_fields, False)
4845 def ExpandNames(self):
4846 self.oq.ExpandNames(self)
4848 def Exec(self, feedback_fn):
4849 return self.oq.OldStyleQuery(self)
4852 class LUNodeRemove(LogicalUnit):
4853 """Logical unit for removing a node.
4856 HPATH = "node-remove"
4857 HTYPE = constants.HTYPE_NODE
4859 def BuildHooksEnv(self):
4864 "OP_TARGET": self.op.node_name,
4865 "NODE_NAME": self.op.node_name,
4868 def BuildHooksNodes(self):
4869 """Build hooks nodes.
4871 This doesn't run on the target node in the pre phase as a failed
4872 node would then be impossible to remove.
4875 all_nodes = self.cfg.GetNodeList()
4877 all_nodes.remove(self.op.node_name)
4880 return (all_nodes, all_nodes)
4882 def CheckPrereq(self):
4883 """Check prerequisites.
4886 - the node exists in the configuration
4887 - it does not have primary or secondary instances
4888 - it's not the master
4890 Any errors are signaled by raising errors.OpPrereqError.
4893 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4894 node = self.cfg.GetNodeInfo(self.op.node_name)
4895 assert node is not None
4897 masternode = self.cfg.GetMasterNode()
4898 if node.name == masternode:
4899 raise errors.OpPrereqError("Node is the master node, failover to another"
4900 " node is required", errors.ECODE_INVAL)
4902 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4903 if node.name in instance.all_nodes:
4904 raise errors.OpPrereqError("Instance %s is still running on the node,"
4905 " please remove first" % instance_name,
4907 self.op.node_name = node.name
4910 def Exec(self, feedback_fn):
4911 """Removes the node from the cluster.
4915 logging.info("Stopping the node daemon and removing configs from node %s",
4918 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4920 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4923 # Promote nodes to master candidate as needed
4924 _AdjustCandidatePool(self, exceptions=[node.name])
4925 self.context.RemoveNode(node.name)
4927 # Run post hooks on the node before it's removed
4928 _RunPostHook(self, node.name)
4930 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4931 msg = result.fail_msg
4933 self.LogWarning("Errors encountered on the remote node while leaving"
4934 " the cluster: %s", msg)
4936 # Remove node from our /etc/hosts
4937 if self.cfg.GetClusterInfo().modify_etc_hosts:
4938 master_node = self.cfg.GetMasterNode()
4939 result = self.rpc.call_etc_hosts_modify(master_node,
4940 constants.ETC_HOSTS_REMOVE,
4942 result.Raise("Can't update hosts file with new host data")
4943 _RedistributeAncillaryFiles(self)
4946 class _NodeQuery(_QueryBase):
4947 FIELDS = query.NODE_FIELDS
4949 def ExpandNames(self, lu):
4950 lu.needed_locks = {}
4951 lu.share_locks = _ShareAll()
4954 self.wanted = _GetWantedNodes(lu, self.names)
4956 self.wanted = locking.ALL_SET
4958 self.do_locking = (self.use_locking and
4959 query.NQ_LIVE in self.requested_data)
4962 # If any non-static field is requested we need to lock the nodes
4963 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4965 def DeclareLocks(self, lu, level):
4968 def _GetQueryData(self, lu):
4969 """Computes the list of nodes and their attributes.
4972 all_info = lu.cfg.GetAllNodesInfo()
4974 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4976 # Gather data as requested
4977 if query.NQ_LIVE in self.requested_data:
4978 # filter out non-vm_capable nodes
4979 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4981 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4982 [lu.cfg.GetHypervisorType()])
4983 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4984 for (name, nresult) in node_data.items()
4985 if not nresult.fail_msg and nresult.payload)
4989 if query.NQ_INST in self.requested_data:
4990 node_to_primary = dict([(name, set()) for name in nodenames])
4991 node_to_secondary = dict([(name, set()) for name in nodenames])
4993 inst_data = lu.cfg.GetAllInstancesInfo()
4995 for inst in inst_data.values():
4996 if inst.primary_node in node_to_primary:
4997 node_to_primary[inst.primary_node].add(inst.name)
4998 for secnode in inst.secondary_nodes:
4999 if secnode in node_to_secondary:
5000 node_to_secondary[secnode].add(inst.name)
5002 node_to_primary = None
5003 node_to_secondary = None
5005 if query.NQ_OOB in self.requested_data:
5006 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5007 for name, node in all_info.iteritems())
5011 if query.NQ_GROUP in self.requested_data:
5012 groups = lu.cfg.GetAllNodeGroupsInfo()
5016 return query.NodeQueryData([all_info[name] for name in nodenames],
5017 live_data, lu.cfg.GetMasterNode(),
5018 node_to_primary, node_to_secondary, groups,
5019 oob_support, lu.cfg.GetClusterInfo())
5022 class LUNodeQuery(NoHooksLU):
5023 """Logical unit for querying nodes.
5026 # pylint: disable=W0142
5029 def CheckArguments(self):
5030 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5031 self.op.output_fields, self.op.use_locking)
5033 def ExpandNames(self):
5034 self.nq.ExpandNames(self)
5036 def DeclareLocks(self, level):
5037 self.nq.DeclareLocks(self, level)
5039 def Exec(self, feedback_fn):
5040 return self.nq.OldStyleQuery(self)
5043 class LUNodeQueryvols(NoHooksLU):
5044 """Logical unit for getting volumes on node(s).
5048 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5049 _FIELDS_STATIC = utils.FieldSet("node")
5051 def CheckArguments(self):
5052 _CheckOutputFields(static=self._FIELDS_STATIC,
5053 dynamic=self._FIELDS_DYNAMIC,
5054 selected=self.op.output_fields)
5056 def ExpandNames(self):
5057 self.share_locks = _ShareAll()
5058 self.needed_locks = {}
5060 if not self.op.nodes:
5061 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5063 self.needed_locks[locking.LEVEL_NODE] = \
5064 _GetWantedNodes(self, self.op.nodes)
5066 def Exec(self, feedback_fn):
5067 """Computes the list of nodes and their attributes.
5070 nodenames = self.owned_locks(locking.LEVEL_NODE)
5071 volumes = self.rpc.call_node_volumes(nodenames)
5073 ilist = self.cfg.GetAllInstancesInfo()
5074 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5077 for node in nodenames:
5078 nresult = volumes[node]
5081 msg = nresult.fail_msg
5083 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5086 node_vols = sorted(nresult.payload,
5087 key=operator.itemgetter("dev"))
5089 for vol in node_vols:
5091 for field in self.op.output_fields:
5094 elif field == "phys":
5098 elif field == "name":
5100 elif field == "size":
5101 val = int(float(vol["size"]))
5102 elif field == "instance":
5103 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5105 raise errors.ParameterError(field)
5106 node_output.append(str(val))
5108 output.append(node_output)
5113 class LUNodeQueryStorage(NoHooksLU):
5114 """Logical unit for getting information on storage units on node(s).
5117 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5120 def CheckArguments(self):
5121 _CheckOutputFields(static=self._FIELDS_STATIC,
5122 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5123 selected=self.op.output_fields)
5125 def ExpandNames(self):
5126 self.share_locks = _ShareAll()
5127 self.needed_locks = {}
5130 self.needed_locks[locking.LEVEL_NODE] = \
5131 _GetWantedNodes(self, self.op.nodes)
5133 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5135 def Exec(self, feedback_fn):
5136 """Computes the list of nodes and their attributes.
5139 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5141 # Always get name to sort by
5142 if constants.SF_NAME in self.op.output_fields:
5143 fields = self.op.output_fields[:]
5145 fields = [constants.SF_NAME] + self.op.output_fields
5147 # Never ask for node or type as it's only known to the LU
5148 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5149 while extra in fields:
5150 fields.remove(extra)
5152 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5153 name_idx = field_idx[constants.SF_NAME]
5155 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5156 data = self.rpc.call_storage_list(self.nodes,
5157 self.op.storage_type, st_args,
5158 self.op.name, fields)
5162 for node in utils.NiceSort(self.nodes):
5163 nresult = data[node]
5167 msg = nresult.fail_msg
5169 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5172 rows = dict([(row[name_idx], row) for row in nresult.payload])
5174 for name in utils.NiceSort(rows.keys()):
5179 for field in self.op.output_fields:
5180 if field == constants.SF_NODE:
5182 elif field == constants.SF_TYPE:
5183 val = self.op.storage_type
5184 elif field in field_idx:
5185 val = row[field_idx[field]]
5187 raise errors.ParameterError(field)
5196 class _InstanceQuery(_QueryBase):
5197 FIELDS = query.INSTANCE_FIELDS
5199 def ExpandNames(self, lu):
5200 lu.needed_locks = {}
5201 lu.share_locks = _ShareAll()
5204 self.wanted = _GetWantedInstances(lu, self.names)
5206 self.wanted = locking.ALL_SET
5208 self.do_locking = (self.use_locking and
5209 query.IQ_LIVE in self.requested_data)
5211 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5212 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5213 lu.needed_locks[locking.LEVEL_NODE] = []
5214 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5216 self.do_grouplocks = (self.do_locking and
5217 query.IQ_NODES in self.requested_data)
5219 def DeclareLocks(self, lu, level):
5221 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5222 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5224 # Lock all groups used by instances optimistically; this requires going
5225 # via the node before it's locked, requiring verification later on
5226 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5228 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5229 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5230 elif level == locking.LEVEL_NODE:
5231 lu._LockInstancesNodes() # pylint: disable=W0212
5234 def _CheckGroupLocks(lu):
5235 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5236 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5238 # Check if node groups for locked instances are still correct
5239 for instance_name in owned_instances:
5240 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5242 def _GetQueryData(self, lu):
5243 """Computes the list of instances and their attributes.
5246 if self.do_grouplocks:
5247 self._CheckGroupLocks(lu)
5249 cluster = lu.cfg.GetClusterInfo()
5250 all_info = lu.cfg.GetAllInstancesInfo()
5252 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5254 instance_list = [all_info[name] for name in instance_names]
5255 nodes = frozenset(itertools.chain(*(inst.all_nodes
5256 for inst in instance_list)))
5257 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5260 wrongnode_inst = set()
5262 # Gather data as requested
5263 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5265 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5267 result = node_data[name]
5269 # offline nodes will be in both lists
5270 assert result.fail_msg
5271 offline_nodes.append(name)
5273 bad_nodes.append(name)
5274 elif result.payload:
5275 for inst in result.payload:
5276 if inst in all_info:
5277 if all_info[inst].primary_node == name:
5278 live_data.update(result.payload)
5280 wrongnode_inst.add(inst)
5282 # orphan instance; we don't list it here as we don't
5283 # handle this case yet in the output of instance listing
5284 logging.warning("Orphan instance '%s' found on node %s",
5286 # else no instance is alive
5290 if query.IQ_DISKUSAGE in self.requested_data:
5291 disk_usage = dict((inst.name,
5292 _ComputeDiskSize(inst.disk_template,
5293 [{constants.IDISK_SIZE: disk.size}
5294 for disk in inst.disks]))
5295 for inst in instance_list)
5299 if query.IQ_CONSOLE in self.requested_data:
5301 for inst in instance_list:
5302 if inst.name in live_data:
5303 # Instance is running
5304 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5306 consinfo[inst.name] = None
5307 assert set(consinfo.keys()) == set(instance_names)
5311 if query.IQ_NODES in self.requested_data:
5312 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5314 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5315 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5316 for uuid in set(map(operator.attrgetter("group"),
5322 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5323 disk_usage, offline_nodes, bad_nodes,
5324 live_data, wrongnode_inst, consinfo,
5328 class LUQuery(NoHooksLU):
5329 """Query for resources/items of a certain kind.
5332 # pylint: disable=W0142
5335 def CheckArguments(self):
5336 qcls = _GetQueryImplementation(self.op.what)
5338 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5340 def ExpandNames(self):
5341 self.impl.ExpandNames(self)
5343 def DeclareLocks(self, level):
5344 self.impl.DeclareLocks(self, level)
5346 def Exec(self, feedback_fn):
5347 return self.impl.NewStyleQuery(self)
5350 class LUQueryFields(NoHooksLU):
5351 """Query for resources/items of a certain kind.
5354 # pylint: disable=W0142
5357 def CheckArguments(self):
5358 self.qcls = _GetQueryImplementation(self.op.what)
5360 def ExpandNames(self):
5361 self.needed_locks = {}
5363 def Exec(self, feedback_fn):
5364 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5367 class LUNodeModifyStorage(NoHooksLU):
5368 """Logical unit for modifying a storage volume on a node.
5373 def CheckArguments(self):
5374 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5376 storage_type = self.op.storage_type
5379 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5381 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5382 " modified" % storage_type,
5385 diff = set(self.op.changes.keys()) - modifiable
5387 raise errors.OpPrereqError("The following fields can not be modified for"
5388 " storage units of type '%s': %r" %
5389 (storage_type, list(diff)),
5392 def ExpandNames(self):
5393 self.needed_locks = {
5394 locking.LEVEL_NODE: self.op.node_name,
5397 def Exec(self, feedback_fn):
5398 """Computes the list of nodes and their attributes.
5401 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5402 result = self.rpc.call_storage_modify(self.op.node_name,
5403 self.op.storage_type, st_args,
5404 self.op.name, self.op.changes)
5405 result.Raise("Failed to modify storage unit '%s' on %s" %
5406 (self.op.name, self.op.node_name))
5409 class LUNodeAdd(LogicalUnit):
5410 """Logical unit for adding node to the cluster.
5414 HTYPE = constants.HTYPE_NODE
5415 _NFLAGS = ["master_capable", "vm_capable"]
5417 def CheckArguments(self):
5418 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5419 # validate/normalize the node name
5420 self.hostname = netutils.GetHostname(name=self.op.node_name,
5421 family=self.primary_ip_family)
5422 self.op.node_name = self.hostname.name
5424 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5425 raise errors.OpPrereqError("Cannot readd the master node",
5428 if self.op.readd and self.op.group:
5429 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5430 " being readded", errors.ECODE_INVAL)
5432 def BuildHooksEnv(self):
5435 This will run on all nodes before, and on all nodes + the new node after.
5439 "OP_TARGET": self.op.node_name,
5440 "NODE_NAME": self.op.node_name,
5441 "NODE_PIP": self.op.primary_ip,
5442 "NODE_SIP": self.op.secondary_ip,
5443 "MASTER_CAPABLE": str(self.op.master_capable),
5444 "VM_CAPABLE": str(self.op.vm_capable),
5447 def BuildHooksNodes(self):
5448 """Build hooks nodes.
5451 # Exclude added node
5452 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5453 post_nodes = pre_nodes + [self.op.node_name, ]
5455 return (pre_nodes, post_nodes)
5457 def CheckPrereq(self):
5458 """Check prerequisites.
5461 - the new node is not already in the config
5463 - its parameters (single/dual homed) matches the cluster
5465 Any errors are signaled by raising errors.OpPrereqError.
5469 hostname = self.hostname
5470 node = hostname.name
5471 primary_ip = self.op.primary_ip = hostname.ip
5472 if self.op.secondary_ip is None:
5473 if self.primary_ip_family == netutils.IP6Address.family:
5474 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5475 " IPv4 address must be given as secondary",
5477 self.op.secondary_ip = primary_ip
5479 secondary_ip = self.op.secondary_ip
5480 if not netutils.IP4Address.IsValid(secondary_ip):
5481 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5482 " address" % secondary_ip, errors.ECODE_INVAL)
5484 node_list = cfg.GetNodeList()
5485 if not self.op.readd and node in node_list:
5486 raise errors.OpPrereqError("Node %s is already in the configuration" %
5487 node, errors.ECODE_EXISTS)
5488 elif self.op.readd and node not in node_list:
5489 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5492 self.changed_primary_ip = False
5494 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5495 if self.op.readd and node == existing_node_name:
5496 if existing_node.secondary_ip != secondary_ip:
5497 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5498 " address configuration as before",
5500 if existing_node.primary_ip != primary_ip:
5501 self.changed_primary_ip = True
5505 if (existing_node.primary_ip == primary_ip or
5506 existing_node.secondary_ip == primary_ip or
5507 existing_node.primary_ip == secondary_ip or
5508 existing_node.secondary_ip == secondary_ip):
5509 raise errors.OpPrereqError("New node ip address(es) conflict with"
5510 " existing node %s" % existing_node.name,
5511 errors.ECODE_NOTUNIQUE)
5513 # After this 'if' block, None is no longer a valid value for the
5514 # _capable op attributes
5516 old_node = self.cfg.GetNodeInfo(node)
5517 assert old_node is not None, "Can't retrieve locked node %s" % node
5518 for attr in self._NFLAGS:
5519 if getattr(self.op, attr) is None:
5520 setattr(self.op, attr, getattr(old_node, attr))
5522 for attr in self._NFLAGS:
5523 if getattr(self.op, attr) is None:
5524 setattr(self.op, attr, True)
5526 if self.op.readd and not self.op.vm_capable:
5527 pri, sec = cfg.GetNodeInstances(node)
5529 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5530 " flag set to false, but it already holds"
5531 " instances" % node,
5534 # check that the type of the node (single versus dual homed) is the
5535 # same as for the master
5536 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5537 master_singlehomed = myself.secondary_ip == myself.primary_ip
5538 newbie_singlehomed = secondary_ip == primary_ip
5539 if master_singlehomed != newbie_singlehomed:
5540 if master_singlehomed:
5541 raise errors.OpPrereqError("The master has no secondary ip but the"
5542 " new node has one",
5545 raise errors.OpPrereqError("The master has a secondary ip but the"
5546 " new node doesn't have one",
5549 # checks reachability
5550 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5551 raise errors.OpPrereqError("Node not reachable by ping",
5552 errors.ECODE_ENVIRON)
5554 if not newbie_singlehomed:
5555 # check reachability from my secondary ip to newbie's secondary ip
5556 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5557 source=myself.secondary_ip):
5558 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5559 " based ping to node daemon port",
5560 errors.ECODE_ENVIRON)
5567 if self.op.master_capable:
5568 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5570 self.master_candidate = False
5573 self.new_node = old_node
5575 node_group = cfg.LookupNodeGroup(self.op.group)
5576 self.new_node = objects.Node(name=node,
5577 primary_ip=primary_ip,
5578 secondary_ip=secondary_ip,
5579 master_candidate=self.master_candidate,
5580 offline=False, drained=False,
5583 if self.op.ndparams:
5584 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5586 if self.op.hv_state:
5587 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5589 if self.op.disk_state:
5590 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5592 def Exec(self, feedback_fn):
5593 """Adds the new node to the cluster.
5596 new_node = self.new_node
5597 node = new_node.name
5599 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5602 # We adding a new node so we assume it's powered
5603 new_node.powered = True
5605 # for re-adds, reset the offline/drained/master-candidate flags;
5606 # we need to reset here, otherwise offline would prevent RPC calls
5607 # later in the procedure; this also means that if the re-add
5608 # fails, we are left with a non-offlined, broken node
5610 new_node.drained = new_node.offline = False # pylint: disable=W0201
5611 self.LogInfo("Readding a node, the offline/drained flags were reset")
5612 # if we demote the node, we do cleanup later in the procedure
5613 new_node.master_candidate = self.master_candidate
5614 if self.changed_primary_ip:
5615 new_node.primary_ip = self.op.primary_ip
5617 # copy the master/vm_capable flags
5618 for attr in self._NFLAGS:
5619 setattr(new_node, attr, getattr(self.op, attr))
5621 # notify the user about any possible mc promotion
5622 if new_node.master_candidate:
5623 self.LogInfo("Node will be a master candidate")
5625 if self.op.ndparams:
5626 new_node.ndparams = self.op.ndparams
5628 new_node.ndparams = {}
5630 if self.op.hv_state:
5631 new_node.hv_state_static = self.new_hv_state
5633 if self.op.disk_state:
5634 new_node.disk_state_static = self.new_disk_state
5636 # check connectivity
5637 result = self.rpc.call_version([node])[node]
5638 result.Raise("Can't get version information from node %s" % node)
5639 if constants.PROTOCOL_VERSION == result.payload:
5640 logging.info("Communication to node %s fine, sw version %s match",
5641 node, result.payload)
5643 raise errors.OpExecError("Version mismatch master version %s,"
5644 " node version %s" %
5645 (constants.PROTOCOL_VERSION, result.payload))
5647 # Add node to our /etc/hosts, and add key to known_hosts
5648 if self.cfg.GetClusterInfo().modify_etc_hosts:
5649 master_node = self.cfg.GetMasterNode()
5650 result = self.rpc.call_etc_hosts_modify(master_node,
5651 constants.ETC_HOSTS_ADD,
5654 result.Raise("Can't update hosts file with new host data")
5656 if new_node.secondary_ip != new_node.primary_ip:
5657 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5660 node_verify_list = [self.cfg.GetMasterNode()]
5661 node_verify_param = {
5662 constants.NV_NODELIST: ([node], {}),
5663 # TODO: do a node-net-test as well?
5666 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5667 self.cfg.GetClusterName())
5668 for verifier in node_verify_list:
5669 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5670 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5672 for failed in nl_payload:
5673 feedback_fn("ssh/hostname verification failed"
5674 " (checking from %s): %s" %
5675 (verifier, nl_payload[failed]))
5676 raise errors.OpExecError("ssh/hostname verification failed")
5679 _RedistributeAncillaryFiles(self)
5680 self.context.ReaddNode(new_node)
5681 # make sure we redistribute the config
5682 self.cfg.Update(new_node, feedback_fn)
5683 # and make sure the new node will not have old files around
5684 if not new_node.master_candidate:
5685 result = self.rpc.call_node_demote_from_mc(new_node.name)
5686 msg = result.fail_msg
5688 self.LogWarning("Node failed to demote itself from master"
5689 " candidate status: %s" % msg)
5691 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5692 additional_vm=self.op.vm_capable)
5693 self.context.AddNode(new_node, self.proc.GetECId())
5696 class LUNodeSetParams(LogicalUnit):
5697 """Modifies the parameters of a node.
5699 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5700 to the node role (as _ROLE_*)
5701 @cvar _R2F: a dictionary from node role to tuples of flags
5702 @cvar _FLAGS: a list of attribute names corresponding to the flags
5705 HPATH = "node-modify"
5706 HTYPE = constants.HTYPE_NODE
5708 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5710 (True, False, False): _ROLE_CANDIDATE,
5711 (False, True, False): _ROLE_DRAINED,
5712 (False, False, True): _ROLE_OFFLINE,
5713 (False, False, False): _ROLE_REGULAR,
5715 _R2F = dict((v, k) for k, v in _F2R.items())
5716 _FLAGS = ["master_candidate", "drained", "offline"]
5718 def CheckArguments(self):
5719 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5720 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5721 self.op.master_capable, self.op.vm_capable,
5722 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5724 if all_mods.count(None) == len(all_mods):
5725 raise errors.OpPrereqError("Please pass at least one modification",
5727 if all_mods.count(True) > 1:
5728 raise errors.OpPrereqError("Can't set the node into more than one"
5729 " state at the same time",
5732 # Boolean value that tells us whether we might be demoting from MC
5733 self.might_demote = (self.op.master_candidate == False or
5734 self.op.offline == True or
5735 self.op.drained == True or
5736 self.op.master_capable == False)
5738 if self.op.secondary_ip:
5739 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5740 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5741 " address" % self.op.secondary_ip,
5744 self.lock_all = self.op.auto_promote and self.might_demote
5745 self.lock_instances = self.op.secondary_ip is not None
5747 def _InstanceFilter(self, instance):
5748 """Filter for getting affected instances.
5751 return (instance.disk_template in constants.DTS_INT_MIRROR and
5752 self.op.node_name in instance.all_nodes)
5754 def ExpandNames(self):
5756 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5758 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5760 # Since modifying a node can have severe effects on currently running
5761 # operations the resource lock is at least acquired in shared mode
5762 self.needed_locks[locking.LEVEL_NODE_RES] = \
5763 self.needed_locks[locking.LEVEL_NODE]
5765 # Get node resource and instance locks in shared mode; they are not used
5766 # for anything but read-only access
5767 self.share_locks[locking.LEVEL_NODE_RES] = 1
5768 self.share_locks[locking.LEVEL_INSTANCE] = 1
5770 if self.lock_instances:
5771 self.needed_locks[locking.LEVEL_INSTANCE] = \
5772 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5774 def BuildHooksEnv(self):
5777 This runs on the master node.
5781 "OP_TARGET": self.op.node_name,
5782 "MASTER_CANDIDATE": str(self.op.master_candidate),
5783 "OFFLINE": str(self.op.offline),
5784 "DRAINED": str(self.op.drained),
5785 "MASTER_CAPABLE": str(self.op.master_capable),
5786 "VM_CAPABLE": str(self.op.vm_capable),
5789 def BuildHooksNodes(self):
5790 """Build hooks nodes.
5793 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5796 def CheckPrereq(self):
5797 """Check prerequisites.
5799 This only checks the instance list against the existing names.
5802 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5804 if self.lock_instances:
5805 affected_instances = \
5806 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5808 # Verify instance locks
5809 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5810 wanted_instances = frozenset(affected_instances.keys())
5811 if wanted_instances - owned_instances:
5812 raise errors.OpPrereqError("Instances affected by changing node %s's"
5813 " secondary IP address have changed since"
5814 " locks were acquired, wanted '%s', have"
5815 " '%s'; retry the operation" %
5817 utils.CommaJoin(wanted_instances),
5818 utils.CommaJoin(owned_instances)),
5821 affected_instances = None
5823 if (self.op.master_candidate is not None or
5824 self.op.drained is not None or
5825 self.op.offline is not None):
5826 # we can't change the master's node flags
5827 if self.op.node_name == self.cfg.GetMasterNode():
5828 raise errors.OpPrereqError("The master role can be changed"
5829 " only via master-failover",
5832 if self.op.master_candidate and not node.master_capable:
5833 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5834 " it a master candidate" % node.name,
5837 if self.op.vm_capable == False:
5838 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5840 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5841 " the vm_capable flag" % node.name,
5844 if node.master_candidate and self.might_demote and not self.lock_all:
5845 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5846 # check if after removing the current node, we're missing master
5848 (mc_remaining, mc_should, _) = \
5849 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5850 if mc_remaining < mc_should:
5851 raise errors.OpPrereqError("Not enough master candidates, please"
5852 " pass auto promote option to allow"
5853 " promotion", errors.ECODE_STATE)
5855 self.old_flags = old_flags = (node.master_candidate,
5856 node.drained, node.offline)
5857 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5858 self.old_role = old_role = self._F2R[old_flags]
5860 # Check for ineffective changes
5861 for attr in self._FLAGS:
5862 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5863 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5864 setattr(self.op, attr, None)
5866 # Past this point, any flag change to False means a transition
5867 # away from the respective state, as only real changes are kept
5869 # TODO: We might query the real power state if it supports OOB
5870 if _SupportsOob(self.cfg, node):
5871 if self.op.offline is False and not (node.powered or
5872 self.op.powered == True):
5873 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5874 " offline status can be reset") %
5876 elif self.op.powered is not None:
5877 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5878 " as it does not support out-of-band"
5879 " handling") % self.op.node_name)
5881 # If we're being deofflined/drained, we'll MC ourself if needed
5882 if (self.op.drained == False or self.op.offline == False or
5883 (self.op.master_capable and not node.master_capable)):
5884 if _DecideSelfPromotion(self):
5885 self.op.master_candidate = True
5886 self.LogInfo("Auto-promoting node to master candidate")
5888 # If we're no longer master capable, we'll demote ourselves from MC
5889 if self.op.master_capable == False and node.master_candidate:
5890 self.LogInfo("Demoting from master candidate")
5891 self.op.master_candidate = False
5894 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5895 if self.op.master_candidate:
5896 new_role = self._ROLE_CANDIDATE
5897 elif self.op.drained:
5898 new_role = self._ROLE_DRAINED
5899 elif self.op.offline:
5900 new_role = self._ROLE_OFFLINE
5901 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5902 # False is still in new flags, which means we're un-setting (the
5904 new_role = self._ROLE_REGULAR
5905 else: # no new flags, nothing, keep old role
5908 self.new_role = new_role
5910 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5911 # Trying to transition out of offline status
5912 result = self.rpc.call_version([node.name])[node.name]
5914 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5915 " to report its version: %s" %
5916 (node.name, result.fail_msg),
5919 self.LogWarning("Transitioning node from offline to online state"
5920 " without using re-add. Please make sure the node"
5923 if self.op.secondary_ip:
5924 # Ok even without locking, because this can't be changed by any LU
5925 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5926 master_singlehomed = master.secondary_ip == master.primary_ip
5927 if master_singlehomed and self.op.secondary_ip:
5928 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5929 " homed cluster", errors.ECODE_INVAL)
5931 assert not (frozenset(affected_instances) -
5932 self.owned_locks(locking.LEVEL_INSTANCE))
5935 if affected_instances:
5936 raise errors.OpPrereqError("Cannot change secondary IP address:"
5937 " offline node has instances (%s)"
5938 " configured to use it" %
5939 utils.CommaJoin(affected_instances.keys()))
5941 # On online nodes, check that no instances are running, and that
5942 # the node has the new ip and we can reach it.
5943 for instance in affected_instances.values():
5944 _CheckInstanceState(self, instance, INSTANCE_DOWN,
5945 msg="cannot change secondary ip")
5947 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5948 if master.name != node.name:
5949 # check reachability from master secondary ip to new secondary ip
5950 if not netutils.TcpPing(self.op.secondary_ip,
5951 constants.DEFAULT_NODED_PORT,
5952 source=master.secondary_ip):
5953 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5954 " based ping to node daemon port",
5955 errors.ECODE_ENVIRON)
5957 if self.op.ndparams:
5958 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5959 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5960 self.new_ndparams = new_ndparams
5962 if self.op.hv_state:
5963 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5964 self.node.hv_state_static)
5966 if self.op.disk_state:
5967 self.new_disk_state = \
5968 _MergeAndVerifyDiskState(self.op.disk_state,
5969 self.node.disk_state_static)
5971 def Exec(self, feedback_fn):
5976 old_role = self.old_role
5977 new_role = self.new_role
5981 if self.op.ndparams:
5982 node.ndparams = self.new_ndparams
5984 if self.op.powered is not None:
5985 node.powered = self.op.powered
5987 if self.op.hv_state:
5988 node.hv_state_static = self.new_hv_state
5990 if self.op.disk_state:
5991 node.disk_state_static = self.new_disk_state
5993 for attr in ["master_capable", "vm_capable"]:
5994 val = getattr(self.op, attr)
5996 setattr(node, attr, val)
5997 result.append((attr, str(val)))
5999 if new_role != old_role:
6000 # Tell the node to demote itself, if no longer MC and not offline
6001 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6002 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6004 self.LogWarning("Node failed to demote itself: %s", msg)
6006 new_flags = self._R2F[new_role]
6007 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6009 result.append((desc, str(nf)))
6010 (node.master_candidate, node.drained, node.offline) = new_flags
6012 # we locked all nodes, we adjust the CP before updating this node
6014 _AdjustCandidatePool(self, [node.name])
6016 if self.op.secondary_ip:
6017 node.secondary_ip = self.op.secondary_ip
6018 result.append(("secondary_ip", self.op.secondary_ip))
6020 # this will trigger configuration file update, if needed
6021 self.cfg.Update(node, feedback_fn)
6023 # this will trigger job queue propagation or cleanup if the mc
6025 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6026 self.context.ReaddNode(node)
6031 class LUNodePowercycle(NoHooksLU):
6032 """Powercycles a node.
6037 def CheckArguments(self):
6038 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6039 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6040 raise errors.OpPrereqError("The node is the master and the force"
6041 " parameter was not set",
6044 def ExpandNames(self):
6045 """Locking for PowercycleNode.
6047 This is a last-resort option and shouldn't block on other
6048 jobs. Therefore, we grab no locks.
6051 self.needed_locks = {}
6053 def Exec(self, feedback_fn):
6057 result = self.rpc.call_node_powercycle(self.op.node_name,
6058 self.cfg.GetHypervisorType())
6059 result.Raise("Failed to schedule the reboot")
6060 return result.payload
6063 class LUClusterQuery(NoHooksLU):
6064 """Query cluster configuration.
6069 def ExpandNames(self):
6070 self.needed_locks = {}
6072 def Exec(self, feedback_fn):
6073 """Return cluster config.
6076 cluster = self.cfg.GetClusterInfo()
6079 # Filter just for enabled hypervisors
6080 for os_name, hv_dict in cluster.os_hvp.items():
6081 os_hvp[os_name] = {}
6082 for hv_name, hv_params in hv_dict.items():
6083 if hv_name in cluster.enabled_hypervisors:
6084 os_hvp[os_name][hv_name] = hv_params
6086 # Convert ip_family to ip_version
6087 primary_ip_version = constants.IP4_VERSION
6088 if cluster.primary_ip_family == netutils.IP6Address.family:
6089 primary_ip_version = constants.IP6_VERSION
6092 "software_version": constants.RELEASE_VERSION,
6093 "protocol_version": constants.PROTOCOL_VERSION,
6094 "config_version": constants.CONFIG_VERSION,
6095 "os_api_version": max(constants.OS_API_VERSIONS),
6096 "export_version": constants.EXPORT_VERSION,
6097 "architecture": (platform.architecture()[0], platform.machine()),
6098 "name": cluster.cluster_name,
6099 "master": cluster.master_node,
6100 "default_hypervisor": cluster.primary_hypervisor,
6101 "enabled_hypervisors": cluster.enabled_hypervisors,
6102 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6103 for hypervisor_name in cluster.enabled_hypervisors]),
6105 "beparams": cluster.beparams,
6106 "osparams": cluster.osparams,
6107 "ipolicy": cluster.ipolicy,
6108 "nicparams": cluster.nicparams,
6109 "ndparams": cluster.ndparams,
6110 "candidate_pool_size": cluster.candidate_pool_size,
6111 "master_netdev": cluster.master_netdev,
6112 "master_netmask": cluster.master_netmask,
6113 "use_external_mip_script": cluster.use_external_mip_script,
6114 "volume_group_name": cluster.volume_group_name,
6115 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6116 "file_storage_dir": cluster.file_storage_dir,
6117 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6118 "maintain_node_health": cluster.maintain_node_health,
6119 "ctime": cluster.ctime,
6120 "mtime": cluster.mtime,
6121 "uuid": cluster.uuid,
6122 "tags": list(cluster.GetTags()),
6123 "uid_pool": cluster.uid_pool,
6124 "default_iallocator": cluster.default_iallocator,
6125 "reserved_lvs": cluster.reserved_lvs,
6126 "primary_ip_version": primary_ip_version,
6127 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6128 "hidden_os": cluster.hidden_os,
6129 "blacklisted_os": cluster.blacklisted_os,
6135 class LUClusterConfigQuery(NoHooksLU):
6136 """Return configuration values.
6140 _FIELDS_DYNAMIC = utils.FieldSet()
6141 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
6142 "watcher_pause", "volume_group_name")
6144 def CheckArguments(self):
6145 _CheckOutputFields(static=self._FIELDS_STATIC,
6146 dynamic=self._FIELDS_DYNAMIC,
6147 selected=self.op.output_fields)
6149 def ExpandNames(self):
6150 self.needed_locks = {}
6152 def Exec(self, feedback_fn):
6153 """Dump a representation of the cluster config to the standard output.
6157 for field in self.op.output_fields:
6158 if field == "cluster_name":
6159 entry = self.cfg.GetClusterName()
6160 elif field == "master_node":
6161 entry = self.cfg.GetMasterNode()
6162 elif field == "drain_flag":
6163 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6164 elif field == "watcher_pause":
6165 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6166 elif field == "volume_group_name":
6167 entry = self.cfg.GetVGName()
6169 raise errors.ParameterError(field)
6170 values.append(entry)
6174 class LUInstanceActivateDisks(NoHooksLU):
6175 """Bring up an instance's disks.
6180 def ExpandNames(self):
6181 self._ExpandAndLockInstance()
6182 self.needed_locks[locking.LEVEL_NODE] = []
6183 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6185 def DeclareLocks(self, level):
6186 if level == locking.LEVEL_NODE:
6187 self._LockInstancesNodes()
6189 def CheckPrereq(self):
6190 """Check prerequisites.
6192 This checks that the instance is in the cluster.
6195 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6196 assert self.instance is not None, \
6197 "Cannot retrieve locked instance %s" % self.op.instance_name
6198 _CheckNodeOnline(self, self.instance.primary_node)
6200 def Exec(self, feedback_fn):
6201 """Activate the disks.
6204 disks_ok, disks_info = \
6205 _AssembleInstanceDisks(self, self.instance,
6206 ignore_size=self.op.ignore_size)
6208 raise errors.OpExecError("Cannot activate block devices")
6213 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6215 """Prepare the block devices for an instance.
6217 This sets up the block devices on all nodes.
6219 @type lu: L{LogicalUnit}
6220 @param lu: the logical unit on whose behalf we execute
6221 @type instance: L{objects.Instance}
6222 @param instance: the instance for whose disks we assemble
6223 @type disks: list of L{objects.Disk} or None
6224 @param disks: which disks to assemble (or all, if None)
6225 @type ignore_secondaries: boolean
6226 @param ignore_secondaries: if true, errors on secondary nodes
6227 won't result in an error return from the function
6228 @type ignore_size: boolean
6229 @param ignore_size: if true, the current known size of the disk
6230 will not be used during the disk activation, useful for cases
6231 when the size is wrong
6232 @return: False if the operation failed, otherwise a list of
6233 (host, instance_visible_name, node_visible_name)
6234 with the mapping from node devices to instance devices
6239 iname = instance.name
6240 disks = _ExpandCheckDisks(instance, disks)
6242 # With the two passes mechanism we try to reduce the window of
6243 # opportunity for the race condition of switching DRBD to primary
6244 # before handshaking occured, but we do not eliminate it
6246 # The proper fix would be to wait (with some limits) until the
6247 # connection has been made and drbd transitions from WFConnection
6248 # into any other network-connected state (Connected, SyncTarget,
6251 # 1st pass, assemble on all nodes in secondary mode
6252 for idx, inst_disk in enumerate(disks):
6253 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6255 node_disk = node_disk.Copy()
6256 node_disk.UnsetSize()
6257 lu.cfg.SetDiskID(node_disk, node)
6258 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
6259 msg = result.fail_msg
6261 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6262 " (is_primary=False, pass=1): %s",
6263 inst_disk.iv_name, node, msg)
6264 if not ignore_secondaries:
6267 # FIXME: race condition on drbd migration to primary
6269 # 2nd pass, do only the primary node
6270 for idx, inst_disk in enumerate(disks):
6273 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6274 if node != instance.primary_node:
6277 node_disk = node_disk.Copy()
6278 node_disk.UnsetSize()
6279 lu.cfg.SetDiskID(node_disk, node)
6280 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6281 msg = result.fail_msg
6283 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6284 " (is_primary=True, pass=2): %s",
6285 inst_disk.iv_name, node, msg)
6288 dev_path = result.payload
6290 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6292 # leave the disks configured for the primary node
6293 # this is a workaround that would be fixed better by
6294 # improving the logical/physical id handling
6296 lu.cfg.SetDiskID(disk, instance.primary_node)
6298 return disks_ok, device_info
6301 def _StartInstanceDisks(lu, instance, force):
6302 """Start the disks of an instance.
6305 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6306 ignore_secondaries=force)
6308 _ShutdownInstanceDisks(lu, instance)
6309 if force is not None and not force:
6310 lu.proc.LogWarning("", hint="If the message above refers to a"
6312 " you can retry the operation using '--force'.")
6313 raise errors.OpExecError("Disk consistency error")
6316 class LUInstanceDeactivateDisks(NoHooksLU):
6317 """Shutdown an instance's disks.
6322 def ExpandNames(self):
6323 self._ExpandAndLockInstance()
6324 self.needed_locks[locking.LEVEL_NODE] = []
6325 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6327 def DeclareLocks(self, level):
6328 if level == locking.LEVEL_NODE:
6329 self._LockInstancesNodes()
6331 def CheckPrereq(self):
6332 """Check prerequisites.
6334 This checks that the instance is in the cluster.
6337 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6338 assert self.instance is not None, \
6339 "Cannot retrieve locked instance %s" % self.op.instance_name
6341 def Exec(self, feedback_fn):
6342 """Deactivate the disks
6345 instance = self.instance
6347 _ShutdownInstanceDisks(self, instance)
6349 _SafeShutdownInstanceDisks(self, instance)
6352 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6353 """Shutdown block devices of an instance.
6355 This function checks if an instance is running, before calling
6356 _ShutdownInstanceDisks.
6359 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6360 _ShutdownInstanceDisks(lu, instance, disks=disks)
6363 def _ExpandCheckDisks(instance, disks):
6364 """Return the instance disks selected by the disks list
6366 @type disks: list of L{objects.Disk} or None
6367 @param disks: selected disks
6368 @rtype: list of L{objects.Disk}
6369 @return: selected instance disks to act on
6373 return instance.disks
6375 if not set(disks).issubset(instance.disks):
6376 raise errors.ProgrammerError("Can only act on disks belonging to the"
6381 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6382 """Shutdown block devices of an instance.
6384 This does the shutdown on all nodes of the instance.
6386 If the ignore_primary is false, errors on the primary node are
6391 disks = _ExpandCheckDisks(instance, disks)
6394 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6395 lu.cfg.SetDiskID(top_disk, node)
6396 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6397 msg = result.fail_msg
6399 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6400 disk.iv_name, node, msg)
6401 if ((node == instance.primary_node and not ignore_primary) or
6402 (node != instance.primary_node and not result.offline)):
6407 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6408 """Checks if a node has enough free memory.
6410 This function check if a given node has the needed amount of free
6411 memory. In case the node has less memory or we cannot get the
6412 information from the node, this function raise an OpPrereqError
6415 @type lu: C{LogicalUnit}
6416 @param lu: a logical unit from which we get configuration data
6418 @param node: the node to check
6419 @type reason: C{str}
6420 @param reason: string to use in the error message
6421 @type requested: C{int}
6422 @param requested: the amount of memory in MiB to check for
6423 @type hypervisor_name: C{str}
6424 @param hypervisor_name: the hypervisor to ask for memory stats
6426 @return: node current free memory
6427 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6428 we cannot check the node
6431 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6432 nodeinfo[node].Raise("Can't get data from node %s" % node,
6433 prereq=True, ecode=errors.ECODE_ENVIRON)
6434 (_, _, (hv_info, )) = nodeinfo[node].payload
6436 free_mem = hv_info.get("memory_free", None)
6437 if not isinstance(free_mem, int):
6438 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6439 " was '%s'" % (node, free_mem),
6440 errors.ECODE_ENVIRON)
6441 if requested > free_mem:
6442 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6443 " needed %s MiB, available %s MiB" %
6444 (node, reason, requested, free_mem),
6449 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6450 """Checks if nodes have enough free disk space in the all VGs.
6452 This function check if all given nodes have the needed amount of
6453 free disk. In case any node has less disk or we cannot get the
6454 information from the node, this function raise an OpPrereqError
6457 @type lu: C{LogicalUnit}
6458 @param lu: a logical unit from which we get configuration data
6459 @type nodenames: C{list}
6460 @param nodenames: the list of node names to check
6461 @type req_sizes: C{dict}
6462 @param req_sizes: the hash of vg and corresponding amount of disk in
6464 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6465 or we cannot check the node
6468 for vg, req_size in req_sizes.items():
6469 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6472 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6473 """Checks if nodes have enough free disk space in the specified VG.
6475 This function check if all given nodes have the needed amount of
6476 free disk. In case any node has less disk or we cannot get the
6477 information from the node, this function raise an OpPrereqError
6480 @type lu: C{LogicalUnit}
6481 @param lu: a logical unit from which we get configuration data
6482 @type nodenames: C{list}
6483 @param nodenames: the list of node names to check
6485 @param vg: the volume group to check
6486 @type requested: C{int}
6487 @param requested: the amount of disk in MiB to check for
6488 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6489 or we cannot check the node
6492 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6493 for node in nodenames:
6494 info = nodeinfo[node]
6495 info.Raise("Cannot get current information from node %s" % node,
6496 prereq=True, ecode=errors.ECODE_ENVIRON)
6497 (_, (vg_info, ), _) = info.payload
6498 vg_free = vg_info.get("vg_free", None)
6499 if not isinstance(vg_free, int):
6500 raise errors.OpPrereqError("Can't compute free disk space on node"
6501 " %s for vg %s, result was '%s'" %
6502 (node, vg, vg_free), errors.ECODE_ENVIRON)
6503 if requested > vg_free:
6504 raise errors.OpPrereqError("Not enough disk space on target node %s"
6505 " vg %s: required %d MiB, available %d MiB" %
6506 (node, vg, requested, vg_free),
6510 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6511 """Checks if nodes have enough physical CPUs
6513 This function checks if all given nodes have the needed number of
6514 physical CPUs. In case any node has less CPUs or we cannot get the
6515 information from the node, this function raises an OpPrereqError
6518 @type lu: C{LogicalUnit}
6519 @param lu: a logical unit from which we get configuration data
6520 @type nodenames: C{list}
6521 @param nodenames: the list of node names to check
6522 @type requested: C{int}
6523 @param requested: the minimum acceptable number of physical CPUs
6524 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6525 or we cannot check the node
6528 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6529 for node in nodenames:
6530 info = nodeinfo[node]
6531 info.Raise("Cannot get current information from node %s" % node,
6532 prereq=True, ecode=errors.ECODE_ENVIRON)
6533 (_, _, (hv_info, )) = info.payload
6534 num_cpus = hv_info.get("cpu_total", None)
6535 if not isinstance(num_cpus, int):
6536 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6537 " on node %s, result was '%s'" %
6538 (node, num_cpus), errors.ECODE_ENVIRON)
6539 if requested > num_cpus:
6540 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6541 "required" % (node, num_cpus, requested),
6545 class LUInstanceStartup(LogicalUnit):
6546 """Starts an instance.
6549 HPATH = "instance-start"
6550 HTYPE = constants.HTYPE_INSTANCE
6553 def CheckArguments(self):
6555 if self.op.beparams:
6556 # fill the beparams dict
6557 objects.UpgradeBeParams(self.op.beparams)
6558 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6560 def ExpandNames(self):
6561 self._ExpandAndLockInstance()
6562 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6564 def DeclareLocks(self, level):
6565 if level == locking.LEVEL_NODE_RES:
6566 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6568 def BuildHooksEnv(self):
6571 This runs on master, primary and secondary nodes of the instance.
6575 "FORCE": self.op.force,
6578 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6582 def BuildHooksNodes(self):
6583 """Build hooks nodes.
6586 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6589 def CheckPrereq(self):
6590 """Check prerequisites.
6592 This checks that the instance is in the cluster.
6595 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6596 assert self.instance is not None, \
6597 "Cannot retrieve locked instance %s" % self.op.instance_name
6600 if self.op.hvparams:
6601 # check hypervisor parameter syntax (locally)
6602 cluster = self.cfg.GetClusterInfo()
6603 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6604 filled_hvp = cluster.FillHV(instance)
6605 filled_hvp.update(self.op.hvparams)
6606 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6607 hv_type.CheckParameterSyntax(filled_hvp)
6608 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6610 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6612 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6614 if self.primary_offline and self.op.ignore_offline_nodes:
6615 self.proc.LogWarning("Ignoring offline primary node")
6617 if self.op.hvparams or self.op.beparams:
6618 self.proc.LogWarning("Overridden parameters are ignored")
6620 _CheckNodeOnline(self, instance.primary_node)
6622 bep = self.cfg.GetClusterInfo().FillBE(instance)
6623 bep.update(self.op.beparams)
6625 # check bridges existence
6626 _CheckInstanceBridgesExist(self, instance)
6628 remote_info = self.rpc.call_instance_info(instance.primary_node,
6630 instance.hypervisor)
6631 remote_info.Raise("Error checking node %s" % instance.primary_node,
6632 prereq=True, ecode=errors.ECODE_ENVIRON)
6633 if not remote_info.payload: # not running already
6634 _CheckNodeFreeMemory(self, instance.primary_node,
6635 "starting instance %s" % instance.name,
6636 bep[constants.BE_MINMEM], instance.hypervisor)
6638 def Exec(self, feedback_fn):
6639 """Start the instance.
6642 instance = self.instance
6643 force = self.op.force
6645 if not self.op.no_remember:
6646 self.cfg.MarkInstanceUp(instance.name)
6648 if self.primary_offline:
6649 assert self.op.ignore_offline_nodes
6650 self.proc.LogInfo("Primary node offline, marked instance as started")
6652 node_current = instance.primary_node
6654 _StartInstanceDisks(self, instance, force)
6657 self.rpc.call_instance_start(node_current,
6658 (instance, self.op.hvparams,
6660 self.op.startup_paused)
6661 msg = result.fail_msg
6663 _ShutdownInstanceDisks(self, instance)
6664 raise errors.OpExecError("Could not start instance: %s" % msg)
6667 class LUInstanceReboot(LogicalUnit):
6668 """Reboot an instance.
6671 HPATH = "instance-reboot"
6672 HTYPE = constants.HTYPE_INSTANCE
6675 def ExpandNames(self):
6676 self._ExpandAndLockInstance()
6678 def BuildHooksEnv(self):
6681 This runs on master, primary and secondary nodes of the instance.
6685 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6686 "REBOOT_TYPE": self.op.reboot_type,
6687 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6690 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6694 def BuildHooksNodes(self):
6695 """Build hooks nodes.
6698 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6701 def CheckPrereq(self):
6702 """Check prerequisites.
6704 This checks that the instance is in the cluster.
6707 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6708 assert self.instance is not None, \
6709 "Cannot retrieve locked instance %s" % self.op.instance_name
6710 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6711 _CheckNodeOnline(self, instance.primary_node)
6713 # check bridges existence
6714 _CheckInstanceBridgesExist(self, instance)
6716 def Exec(self, feedback_fn):
6717 """Reboot the instance.
6720 instance = self.instance
6721 ignore_secondaries = self.op.ignore_secondaries
6722 reboot_type = self.op.reboot_type
6724 remote_info = self.rpc.call_instance_info(instance.primary_node,
6726 instance.hypervisor)
6727 remote_info.Raise("Error checking node %s" % instance.primary_node)
6728 instance_running = bool(remote_info.payload)
6730 node_current = instance.primary_node
6732 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6733 constants.INSTANCE_REBOOT_HARD]:
6734 for disk in instance.disks:
6735 self.cfg.SetDiskID(disk, node_current)
6736 result = self.rpc.call_instance_reboot(node_current, instance,
6738 self.op.shutdown_timeout)
6739 result.Raise("Could not reboot instance")
6741 if instance_running:
6742 result = self.rpc.call_instance_shutdown(node_current, instance,
6743 self.op.shutdown_timeout)
6744 result.Raise("Could not shutdown instance for full reboot")
6745 _ShutdownInstanceDisks(self, instance)
6747 self.LogInfo("Instance %s was already stopped, starting now",
6749 _StartInstanceDisks(self, instance, ignore_secondaries)
6750 result = self.rpc.call_instance_start(node_current,
6751 (instance, None, None), False)
6752 msg = result.fail_msg
6754 _ShutdownInstanceDisks(self, instance)
6755 raise errors.OpExecError("Could not start instance for"
6756 " full reboot: %s" % msg)
6758 self.cfg.MarkInstanceUp(instance.name)
6761 class LUInstanceShutdown(LogicalUnit):
6762 """Shutdown an instance.
6765 HPATH = "instance-stop"
6766 HTYPE = constants.HTYPE_INSTANCE
6769 def ExpandNames(self):
6770 self._ExpandAndLockInstance()
6772 def BuildHooksEnv(self):
6775 This runs on master, primary and secondary nodes of the instance.
6778 env = _BuildInstanceHookEnvByObject(self, self.instance)
6779 env["TIMEOUT"] = self.op.timeout
6782 def BuildHooksNodes(self):
6783 """Build hooks nodes.
6786 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6789 def CheckPrereq(self):
6790 """Check prerequisites.
6792 This checks that the instance is in the cluster.
6795 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6796 assert self.instance is not None, \
6797 "Cannot retrieve locked instance %s" % self.op.instance_name
6799 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6801 self.primary_offline = \
6802 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6804 if self.primary_offline and self.op.ignore_offline_nodes:
6805 self.proc.LogWarning("Ignoring offline primary node")
6807 _CheckNodeOnline(self, self.instance.primary_node)
6809 def Exec(self, feedback_fn):
6810 """Shutdown the instance.
6813 instance = self.instance
6814 node_current = instance.primary_node
6815 timeout = self.op.timeout
6817 if not self.op.no_remember:
6818 self.cfg.MarkInstanceDown(instance.name)
6820 if self.primary_offline:
6821 assert self.op.ignore_offline_nodes
6822 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6824 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6825 msg = result.fail_msg
6827 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6829 _ShutdownInstanceDisks(self, instance)
6832 class LUInstanceReinstall(LogicalUnit):
6833 """Reinstall an instance.
6836 HPATH = "instance-reinstall"
6837 HTYPE = constants.HTYPE_INSTANCE
6840 def ExpandNames(self):
6841 self._ExpandAndLockInstance()
6843 def BuildHooksEnv(self):
6846 This runs on master, primary and secondary nodes of the instance.
6849 return _BuildInstanceHookEnvByObject(self, self.instance)
6851 def BuildHooksNodes(self):
6852 """Build hooks nodes.
6855 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6858 def CheckPrereq(self):
6859 """Check prerequisites.
6861 This checks that the instance is in the cluster and is not running.
6864 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6865 assert instance is not None, \
6866 "Cannot retrieve locked instance %s" % self.op.instance_name
6867 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6868 " offline, cannot reinstall")
6869 for node in instance.secondary_nodes:
6870 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6871 " cannot reinstall")
6873 if instance.disk_template == constants.DT_DISKLESS:
6874 raise errors.OpPrereqError("Instance '%s' has no disks" %
6875 self.op.instance_name,
6877 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6879 if self.op.os_type is not None:
6881 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6882 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6883 instance_os = self.op.os_type
6885 instance_os = instance.os
6887 nodelist = list(instance.all_nodes)
6889 if self.op.osparams:
6890 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6891 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6892 self.os_inst = i_osdict # the new dict (without defaults)
6896 self.instance = instance
6898 def Exec(self, feedback_fn):
6899 """Reinstall the instance.
6902 inst = self.instance
6904 if self.op.os_type is not None:
6905 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6906 inst.os = self.op.os_type
6907 # Write to configuration
6908 self.cfg.Update(inst, feedback_fn)
6910 _StartInstanceDisks(self, inst, None)
6912 feedback_fn("Running the instance OS create scripts...")
6913 # FIXME: pass debug option from opcode to backend
6914 result = self.rpc.call_instance_os_add(inst.primary_node,
6915 (inst, self.os_inst), True,
6916 self.op.debug_level)
6917 result.Raise("Could not install OS for instance %s on node %s" %
6918 (inst.name, inst.primary_node))
6920 _ShutdownInstanceDisks(self, inst)
6923 class LUInstanceRecreateDisks(LogicalUnit):
6924 """Recreate an instance's missing disks.
6927 HPATH = "instance-recreate-disks"
6928 HTYPE = constants.HTYPE_INSTANCE
6931 _MODIFYABLE = frozenset([
6932 constants.IDISK_SIZE,
6933 constants.IDISK_MODE,
6936 # New or changed disk parameters may have different semantics
6937 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
6938 constants.IDISK_ADOPT,
6940 # TODO: Implement support changing VG while recreating
6942 constants.IDISK_METAVG,
6945 def CheckArguments(self):
6946 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
6947 # Normalize and convert deprecated list of disk indices
6948 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
6950 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
6952 raise errors.OpPrereqError("Some disks have been specified more than"
6953 " once: %s" % utils.CommaJoin(duplicates),
6956 for (idx, params) in self.op.disks:
6957 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
6958 unsupported = frozenset(params.keys()) - self._MODIFYABLE
6960 raise errors.OpPrereqError("Parameters for disk %s try to change"
6961 " unmodifyable parameter(s): %s" %
6962 (idx, utils.CommaJoin(unsupported)),
6965 def ExpandNames(self):
6966 self._ExpandAndLockInstance()
6967 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6969 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6970 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6972 self.needed_locks[locking.LEVEL_NODE] = []
6973 self.needed_locks[locking.LEVEL_NODE_RES] = []
6975 def DeclareLocks(self, level):
6976 if level == locking.LEVEL_NODE:
6977 # if we replace the nodes, we only need to lock the old primary,
6978 # otherwise we need to lock all nodes for disk re-creation
6979 primary_only = bool(self.op.nodes)
6980 self._LockInstancesNodes(primary_only=primary_only)
6981 elif level == locking.LEVEL_NODE_RES:
6983 self.needed_locks[locking.LEVEL_NODE_RES] = \
6984 self.needed_locks[locking.LEVEL_NODE][:]
6986 def BuildHooksEnv(self):
6989 This runs on master, primary and secondary nodes of the instance.
6992 return _BuildInstanceHookEnvByObject(self, self.instance)
6994 def BuildHooksNodes(self):
6995 """Build hooks nodes.
6998 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7001 def CheckPrereq(self):
7002 """Check prerequisites.
7004 This checks that the instance is in the cluster and is not running.
7007 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7008 assert instance is not None, \
7009 "Cannot retrieve locked instance %s" % self.op.instance_name
7011 if len(self.op.nodes) != len(instance.all_nodes):
7012 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7013 " %d replacement nodes were specified" %
7014 (instance.name, len(instance.all_nodes),
7015 len(self.op.nodes)),
7017 assert instance.disk_template != constants.DT_DRBD8 or \
7018 len(self.op.nodes) == 2
7019 assert instance.disk_template != constants.DT_PLAIN or \
7020 len(self.op.nodes) == 1
7021 primary_node = self.op.nodes[0]
7023 primary_node = instance.primary_node
7024 _CheckNodeOnline(self, primary_node)
7026 if instance.disk_template == constants.DT_DISKLESS:
7027 raise errors.OpPrereqError("Instance '%s' has no disks" %
7028 self.op.instance_name, errors.ECODE_INVAL)
7030 # if we replace nodes *and* the old primary is offline, we don't
7032 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7033 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7034 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7035 if not (self.op.nodes and old_pnode.offline):
7036 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7037 msg="cannot recreate disks")
7040 self.disks = dict(self.op.disks)
7042 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7044 maxidx = max(self.disks.keys())
7045 if maxidx >= len(instance.disks):
7046 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7049 if (self.op.nodes and
7050 sorted(self.disks.keys()) != range(len(instance.disks))):
7051 raise errors.OpPrereqError("Can't recreate disks partially and"
7052 " change the nodes at the same time",
7055 self.instance = instance
7057 def Exec(self, feedback_fn):
7058 """Recreate the disks.
7061 instance = self.instance
7063 assert (self.owned_locks(locking.LEVEL_NODE) ==
7064 self.owned_locks(locking.LEVEL_NODE_RES))
7067 mods = [] # keeps track of needed changes
7069 for idx, disk in enumerate(instance.disks):
7071 changes = self.disks[idx]
7073 # Disk should not be recreated
7077 # update secondaries for disks, if needed
7078 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7079 # need to update the nodes and minors
7080 assert len(self.op.nodes) == 2
7081 assert len(disk.logical_id) == 6 # otherwise disk internals
7083 (_, _, old_port, _, _, old_secret) = disk.logical_id
7084 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7085 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7086 new_minors[0], new_minors[1], old_secret)
7087 assert len(disk.logical_id) == len(new_id)
7091 mods.append((idx, new_id, changes))
7093 # now that we have passed all asserts above, we can apply the mods
7094 # in a single run (to avoid partial changes)
7095 for idx, new_id, changes in mods:
7096 disk = instance.disks[idx]
7097 if new_id is not None:
7098 assert disk.dev_type == constants.LD_DRBD8
7099 disk.logical_id = new_id
7101 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7102 mode=changes.get(constants.IDISK_MODE, None))
7104 # change primary node, if needed
7106 instance.primary_node = self.op.nodes[0]
7107 self.LogWarning("Changing the instance's nodes, you will have to"
7108 " remove any disks left on the older nodes manually")
7111 self.cfg.Update(instance, feedback_fn)
7113 _CreateDisks(self, instance, to_skip=to_skip)
7116 class LUInstanceRename(LogicalUnit):
7117 """Rename an instance.
7120 HPATH = "instance-rename"
7121 HTYPE = constants.HTYPE_INSTANCE
7123 def CheckArguments(self):
7127 if self.op.ip_check and not self.op.name_check:
7128 # TODO: make the ip check more flexible and not depend on the name check
7129 raise errors.OpPrereqError("IP address check requires a name check",
7132 def BuildHooksEnv(self):
7135 This runs on master, primary and secondary nodes of the instance.
7138 env = _BuildInstanceHookEnvByObject(self, self.instance)
7139 env["INSTANCE_NEW_NAME"] = self.op.new_name
7142 def BuildHooksNodes(self):
7143 """Build hooks nodes.
7146 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7149 def CheckPrereq(self):
7150 """Check prerequisites.
7152 This checks that the instance is in the cluster and is not running.
7155 self.op.instance_name = _ExpandInstanceName(self.cfg,
7156 self.op.instance_name)
7157 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7158 assert instance is not None
7159 _CheckNodeOnline(self, instance.primary_node)
7160 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7161 msg="cannot rename")
7162 self.instance = instance
7164 new_name = self.op.new_name
7165 if self.op.name_check:
7166 hostname = netutils.GetHostname(name=new_name)
7167 if hostname.name != new_name:
7168 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7170 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7171 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7172 " same as given hostname '%s'") %
7173 (hostname.name, self.op.new_name),
7175 new_name = self.op.new_name = hostname.name
7176 if (self.op.ip_check and
7177 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7178 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7179 (hostname.ip, new_name),
7180 errors.ECODE_NOTUNIQUE)
7182 instance_list = self.cfg.GetInstanceList()
7183 if new_name in instance_list and new_name != instance.name:
7184 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7185 new_name, errors.ECODE_EXISTS)
7187 def Exec(self, feedback_fn):
7188 """Rename the instance.
7191 inst = self.instance
7192 old_name = inst.name
7194 rename_file_storage = False
7195 if (inst.disk_template in constants.DTS_FILEBASED and
7196 self.op.new_name != inst.name):
7197 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7198 rename_file_storage = True
7200 self.cfg.RenameInstance(inst.name, self.op.new_name)
7201 # Change the instance lock. This is definitely safe while we hold the BGL.
7202 # Otherwise the new lock would have to be added in acquired mode.
7204 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7205 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7207 # re-read the instance from the configuration after rename
7208 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7210 if rename_file_storage:
7211 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7212 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7213 old_file_storage_dir,
7214 new_file_storage_dir)
7215 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7216 " (but the instance has been renamed in Ganeti)" %
7217 (inst.primary_node, old_file_storage_dir,
7218 new_file_storage_dir))
7220 _StartInstanceDisks(self, inst, None)
7222 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7223 old_name, self.op.debug_level)
7224 msg = result.fail_msg
7226 msg = ("Could not run OS rename script for instance %s on node %s"
7227 " (but the instance has been renamed in Ganeti): %s" %
7228 (inst.name, inst.primary_node, msg))
7229 self.proc.LogWarning(msg)
7231 _ShutdownInstanceDisks(self, inst)
7236 class LUInstanceRemove(LogicalUnit):
7237 """Remove an instance.
7240 HPATH = "instance-remove"
7241 HTYPE = constants.HTYPE_INSTANCE
7244 def ExpandNames(self):
7245 self._ExpandAndLockInstance()
7246 self.needed_locks[locking.LEVEL_NODE] = []
7247 self.needed_locks[locking.LEVEL_NODE_RES] = []
7248 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7250 def DeclareLocks(self, level):
7251 if level == locking.LEVEL_NODE:
7252 self._LockInstancesNodes()
7253 elif level == locking.LEVEL_NODE_RES:
7255 self.needed_locks[locking.LEVEL_NODE_RES] = \
7256 self.needed_locks[locking.LEVEL_NODE][:]
7258 def BuildHooksEnv(self):
7261 This runs on master, primary and secondary nodes of the instance.
7264 env = _BuildInstanceHookEnvByObject(self, self.instance)
7265 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7268 def BuildHooksNodes(self):
7269 """Build hooks nodes.
7272 nl = [self.cfg.GetMasterNode()]
7273 nl_post = list(self.instance.all_nodes) + nl
7274 return (nl, nl_post)
7276 def CheckPrereq(self):
7277 """Check prerequisites.
7279 This checks that the instance is in the cluster.
7282 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7283 assert self.instance is not None, \
7284 "Cannot retrieve locked instance %s" % self.op.instance_name
7286 def Exec(self, feedback_fn):
7287 """Remove the instance.
7290 instance = self.instance
7291 logging.info("Shutting down instance %s on node %s",
7292 instance.name, instance.primary_node)
7294 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7295 self.op.shutdown_timeout)
7296 msg = result.fail_msg
7298 if self.op.ignore_failures:
7299 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7301 raise errors.OpExecError("Could not shutdown instance %s on"
7303 (instance.name, instance.primary_node, msg))
7305 assert (self.owned_locks(locking.LEVEL_NODE) ==
7306 self.owned_locks(locking.LEVEL_NODE_RES))
7307 assert not (set(instance.all_nodes) -
7308 self.owned_locks(locking.LEVEL_NODE)), \
7309 "Not owning correct locks"
7311 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7314 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7315 """Utility function to remove an instance.
7318 logging.info("Removing block devices for instance %s", instance.name)
7320 if not _RemoveDisks(lu, instance):
7321 if not ignore_failures:
7322 raise errors.OpExecError("Can't remove instance's disks")
7323 feedback_fn("Warning: can't remove instance's disks")
7325 logging.info("Removing instance %s out of cluster config", instance.name)
7327 lu.cfg.RemoveInstance(instance.name)
7329 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7330 "Instance lock removal conflict"
7332 # Remove lock for the instance
7333 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7336 class LUInstanceQuery(NoHooksLU):
7337 """Logical unit for querying instances.
7340 # pylint: disable=W0142
7343 def CheckArguments(self):
7344 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7345 self.op.output_fields, self.op.use_locking)
7347 def ExpandNames(self):
7348 self.iq.ExpandNames(self)
7350 def DeclareLocks(self, level):
7351 self.iq.DeclareLocks(self, level)
7353 def Exec(self, feedback_fn):
7354 return self.iq.OldStyleQuery(self)
7357 class LUInstanceFailover(LogicalUnit):
7358 """Failover an instance.
7361 HPATH = "instance-failover"
7362 HTYPE = constants.HTYPE_INSTANCE
7365 def CheckArguments(self):
7366 """Check the arguments.
7369 self.iallocator = getattr(self.op, "iallocator", None)
7370 self.target_node = getattr(self.op, "target_node", None)
7372 def ExpandNames(self):
7373 self._ExpandAndLockInstance()
7375 if self.op.target_node is not None:
7376 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7378 self.needed_locks[locking.LEVEL_NODE] = []
7379 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7381 self.needed_locks[locking.LEVEL_NODE_RES] = []
7382 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7384 ignore_consistency = self.op.ignore_consistency
7385 shutdown_timeout = self.op.shutdown_timeout
7386 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7389 ignore_consistency=ignore_consistency,
7390 shutdown_timeout=shutdown_timeout,
7391 ignore_ipolicy=self.op.ignore_ipolicy)
7392 self.tasklets = [self._migrater]
7394 def DeclareLocks(self, level):
7395 if level == locking.LEVEL_NODE:
7396 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7397 if instance.disk_template in constants.DTS_EXT_MIRROR:
7398 if self.op.target_node is None:
7399 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7401 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7402 self.op.target_node]
7403 del self.recalculate_locks[locking.LEVEL_NODE]
7405 self._LockInstancesNodes()
7406 elif level == locking.LEVEL_NODE_RES:
7408 self.needed_locks[locking.LEVEL_NODE_RES] = \
7409 self.needed_locks[locking.LEVEL_NODE][:]
7411 def BuildHooksEnv(self):
7414 This runs on master, primary and secondary nodes of the instance.
7417 instance = self._migrater.instance
7418 source_node = instance.primary_node
7419 target_node = self.op.target_node
7421 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7422 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7423 "OLD_PRIMARY": source_node,
7424 "NEW_PRIMARY": target_node,
7427 if instance.disk_template in constants.DTS_INT_MIRROR:
7428 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7429 env["NEW_SECONDARY"] = source_node
7431 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7433 env.update(_BuildInstanceHookEnvByObject(self, instance))
7437 def BuildHooksNodes(self):
7438 """Build hooks nodes.
7441 instance = self._migrater.instance
7442 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7443 return (nl, nl + [instance.primary_node])
7446 class LUInstanceMigrate(LogicalUnit):
7447 """Migrate an instance.
7449 This is migration without shutting down, compared to the failover,
7450 which is done with shutdown.
7453 HPATH = "instance-migrate"
7454 HTYPE = constants.HTYPE_INSTANCE
7457 def ExpandNames(self):
7458 self._ExpandAndLockInstance()
7460 if self.op.target_node is not None:
7461 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7463 self.needed_locks[locking.LEVEL_NODE] = []
7464 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7466 self.needed_locks[locking.LEVEL_NODE] = []
7467 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7470 TLMigrateInstance(self, self.op.instance_name,
7471 cleanup=self.op.cleanup,
7473 fallback=self.op.allow_failover,
7474 allow_runtime_changes=self.op.allow_runtime_changes,
7475 ignore_ipolicy=self.op.ignore_ipolicy)
7476 self.tasklets = [self._migrater]
7478 def DeclareLocks(self, level):
7479 if level == locking.LEVEL_NODE:
7480 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7481 if instance.disk_template in constants.DTS_EXT_MIRROR:
7482 if self.op.target_node is None:
7483 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7485 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7486 self.op.target_node]
7487 del self.recalculate_locks[locking.LEVEL_NODE]
7489 self._LockInstancesNodes()
7490 elif level == locking.LEVEL_NODE_RES:
7492 self.needed_locks[locking.LEVEL_NODE_RES] = \
7493 self.needed_locks[locking.LEVEL_NODE][:]
7495 def BuildHooksEnv(self):
7498 This runs on master, primary and secondary nodes of the instance.
7501 instance = self._migrater.instance
7502 source_node = instance.primary_node
7503 target_node = self.op.target_node
7504 env = _BuildInstanceHookEnvByObject(self, instance)
7506 "MIGRATE_LIVE": self._migrater.live,
7507 "MIGRATE_CLEANUP": self.op.cleanup,
7508 "OLD_PRIMARY": source_node,
7509 "NEW_PRIMARY": target_node,
7510 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7513 if instance.disk_template in constants.DTS_INT_MIRROR:
7514 env["OLD_SECONDARY"] = target_node
7515 env["NEW_SECONDARY"] = source_node
7517 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7521 def BuildHooksNodes(self):
7522 """Build hooks nodes.
7525 instance = self._migrater.instance
7526 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7527 return (nl, nl + [instance.primary_node])
7530 class LUInstanceMove(LogicalUnit):
7531 """Move an instance by data-copying.
7534 HPATH = "instance-move"
7535 HTYPE = constants.HTYPE_INSTANCE
7538 def ExpandNames(self):
7539 self._ExpandAndLockInstance()
7540 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7541 self.op.target_node = target_node
7542 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7543 self.needed_locks[locking.LEVEL_NODE_RES] = []
7544 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7546 def DeclareLocks(self, level):
7547 if level == locking.LEVEL_NODE:
7548 self._LockInstancesNodes(primary_only=True)
7549 elif level == locking.LEVEL_NODE_RES:
7551 self.needed_locks[locking.LEVEL_NODE_RES] = \
7552 self.needed_locks[locking.LEVEL_NODE][:]
7554 def BuildHooksEnv(self):
7557 This runs on master, primary and secondary nodes of the instance.
7561 "TARGET_NODE": self.op.target_node,
7562 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7564 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7567 def BuildHooksNodes(self):
7568 """Build hooks nodes.
7572 self.cfg.GetMasterNode(),
7573 self.instance.primary_node,
7574 self.op.target_node,
7578 def CheckPrereq(self):
7579 """Check prerequisites.
7581 This checks that the instance is in the cluster.
7584 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7585 assert self.instance is not None, \
7586 "Cannot retrieve locked instance %s" % self.op.instance_name
7588 node = self.cfg.GetNodeInfo(self.op.target_node)
7589 assert node is not None, \
7590 "Cannot retrieve locked node %s" % self.op.target_node
7592 self.target_node = target_node = node.name
7594 if target_node == instance.primary_node:
7595 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7596 (instance.name, target_node),
7599 bep = self.cfg.GetClusterInfo().FillBE(instance)
7601 for idx, dsk in enumerate(instance.disks):
7602 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7603 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7604 " cannot copy" % idx, errors.ECODE_STATE)
7606 _CheckNodeOnline(self, target_node)
7607 _CheckNodeNotDrained(self, target_node)
7608 _CheckNodeVmCapable(self, target_node)
7609 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7610 self.cfg.GetNodeGroup(node.group))
7611 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7612 ignore=self.op.ignore_ipolicy)
7614 if instance.admin_state == constants.ADMINST_UP:
7615 # check memory requirements on the secondary node
7616 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7617 instance.name, bep[constants.BE_MAXMEM],
7618 instance.hypervisor)
7620 self.LogInfo("Not checking memory on the secondary node as"
7621 " instance will not be started")
7623 # check bridge existance
7624 _CheckInstanceBridgesExist(self, instance, node=target_node)
7626 def Exec(self, feedback_fn):
7627 """Move an instance.
7629 The move is done by shutting it down on its present node, copying
7630 the data over (slow) and starting it on the new node.
7633 instance = self.instance
7635 source_node = instance.primary_node
7636 target_node = self.target_node
7638 self.LogInfo("Shutting down instance %s on source node %s",
7639 instance.name, source_node)
7641 assert (self.owned_locks(locking.LEVEL_NODE) ==
7642 self.owned_locks(locking.LEVEL_NODE_RES))
7644 result = self.rpc.call_instance_shutdown(source_node, instance,
7645 self.op.shutdown_timeout)
7646 msg = result.fail_msg
7648 if self.op.ignore_consistency:
7649 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7650 " Proceeding anyway. Please make sure node"
7651 " %s is down. Error details: %s",
7652 instance.name, source_node, source_node, msg)
7654 raise errors.OpExecError("Could not shutdown instance %s on"
7656 (instance.name, source_node, msg))
7658 # create the target disks
7660 _CreateDisks(self, instance, target_node=target_node)
7661 except errors.OpExecError:
7662 self.LogWarning("Device creation failed, reverting...")
7664 _RemoveDisks(self, instance, target_node=target_node)
7666 self.cfg.ReleaseDRBDMinors(instance.name)
7669 cluster_name = self.cfg.GetClusterInfo().cluster_name
7672 # activate, get path, copy the data over
7673 for idx, disk in enumerate(instance.disks):
7674 self.LogInfo("Copying data for disk %d", idx)
7675 result = self.rpc.call_blockdev_assemble(target_node, disk,
7676 instance.name, True, idx)
7678 self.LogWarning("Can't assemble newly created disk %d: %s",
7679 idx, result.fail_msg)
7680 errs.append(result.fail_msg)
7682 dev_path = result.payload
7683 result = self.rpc.call_blockdev_export(source_node, disk,
7684 target_node, dev_path,
7687 self.LogWarning("Can't copy data over for disk %d: %s",
7688 idx, result.fail_msg)
7689 errs.append(result.fail_msg)
7693 self.LogWarning("Some disks failed to copy, aborting")
7695 _RemoveDisks(self, instance, target_node=target_node)
7697 self.cfg.ReleaseDRBDMinors(instance.name)
7698 raise errors.OpExecError("Errors during disk copy: %s" %
7701 instance.primary_node = target_node
7702 self.cfg.Update(instance, feedback_fn)
7704 self.LogInfo("Removing the disks on the original node")
7705 _RemoveDisks(self, instance, target_node=source_node)
7707 # Only start the instance if it's marked as up
7708 if instance.admin_state == constants.ADMINST_UP:
7709 self.LogInfo("Starting instance %s on node %s",
7710 instance.name, target_node)
7712 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7713 ignore_secondaries=True)
7715 _ShutdownInstanceDisks(self, instance)
7716 raise errors.OpExecError("Can't activate the instance's disks")
7718 result = self.rpc.call_instance_start(target_node,
7719 (instance, None, None), False)
7720 msg = result.fail_msg
7722 _ShutdownInstanceDisks(self, instance)
7723 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7724 (instance.name, target_node, msg))
7727 class LUNodeMigrate(LogicalUnit):
7728 """Migrate all instances from a node.
7731 HPATH = "node-migrate"
7732 HTYPE = constants.HTYPE_NODE
7735 def CheckArguments(self):
7738 def ExpandNames(self):
7739 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7741 self.share_locks = _ShareAll()
7742 self.needed_locks = {
7743 locking.LEVEL_NODE: [self.op.node_name],
7746 def BuildHooksEnv(self):
7749 This runs on the master, the primary and all the secondaries.
7753 "NODE_NAME": self.op.node_name,
7754 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7757 def BuildHooksNodes(self):
7758 """Build hooks nodes.
7761 nl = [self.cfg.GetMasterNode()]
7764 def CheckPrereq(self):
7767 def Exec(self, feedback_fn):
7768 # Prepare jobs for migration instances
7769 allow_runtime_changes = self.op.allow_runtime_changes
7771 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7774 iallocator=self.op.iallocator,
7775 target_node=self.op.target_node,
7776 allow_runtime_changes=allow_runtime_changes,
7777 ignore_ipolicy=self.op.ignore_ipolicy)]
7778 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7781 # TODO: Run iallocator in this opcode and pass correct placement options to
7782 # OpInstanceMigrate. Since other jobs can modify the cluster between
7783 # running the iallocator and the actual migration, a good consistency model
7784 # will have to be found.
7786 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7787 frozenset([self.op.node_name]))
7789 return ResultWithJobs(jobs)
7792 class TLMigrateInstance(Tasklet):
7793 """Tasklet class for instance migration.
7796 @ivar live: whether the migration will be done live or non-live;
7797 this variable is initalized only after CheckPrereq has run
7798 @type cleanup: boolean
7799 @ivar cleanup: Wheater we cleanup from a failed migration
7800 @type iallocator: string
7801 @ivar iallocator: The iallocator used to determine target_node
7802 @type target_node: string
7803 @ivar target_node: If given, the target_node to reallocate the instance to
7804 @type failover: boolean
7805 @ivar failover: Whether operation results in failover or migration
7806 @type fallback: boolean
7807 @ivar fallback: Whether fallback to failover is allowed if migration not
7809 @type ignore_consistency: boolean
7810 @ivar ignore_consistency: Wheter we should ignore consistency between source
7812 @type shutdown_timeout: int
7813 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7814 @type ignore_ipolicy: bool
7815 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7820 _MIGRATION_POLL_INTERVAL = 1 # seconds
7821 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7823 def __init__(self, lu, instance_name, cleanup=False,
7824 failover=False, fallback=False,
7825 ignore_consistency=False,
7826 allow_runtime_changes=True,
7827 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7828 ignore_ipolicy=False):
7829 """Initializes this class.
7832 Tasklet.__init__(self, lu)
7835 self.instance_name = instance_name
7836 self.cleanup = cleanup
7837 self.live = False # will be overridden later
7838 self.failover = failover
7839 self.fallback = fallback
7840 self.ignore_consistency = ignore_consistency
7841 self.shutdown_timeout = shutdown_timeout
7842 self.ignore_ipolicy = ignore_ipolicy
7843 self.allow_runtime_changes = allow_runtime_changes
7845 def CheckPrereq(self):
7846 """Check prerequisites.
7848 This checks that the instance is in the cluster.
7851 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7852 instance = self.cfg.GetInstanceInfo(instance_name)
7853 assert instance is not None
7854 self.instance = instance
7855 cluster = self.cfg.GetClusterInfo()
7857 if (not self.cleanup and
7858 not instance.admin_state == constants.ADMINST_UP and
7859 not self.failover and self.fallback):
7860 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7861 " switching to failover")
7862 self.failover = True
7864 if instance.disk_template not in constants.DTS_MIRRORED:
7869 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7870 " %s" % (instance.disk_template, text),
7873 if instance.disk_template in constants.DTS_EXT_MIRROR:
7874 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7876 if self.lu.op.iallocator:
7877 self._RunAllocator()
7879 # We set set self.target_node as it is required by
7881 self.target_node = self.lu.op.target_node
7883 # Check that the target node is correct in terms of instance policy
7884 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
7885 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7886 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7887 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7888 ignore=self.ignore_ipolicy)
7890 # self.target_node is already populated, either directly or by the
7892 target_node = self.target_node
7893 if self.target_node == instance.primary_node:
7894 raise errors.OpPrereqError("Cannot migrate instance %s"
7895 " to its primary (%s)" %
7896 (instance.name, instance.primary_node))
7898 if len(self.lu.tasklets) == 1:
7899 # It is safe to release locks only when we're the only tasklet
7901 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7902 keep=[instance.primary_node, self.target_node])
7905 secondary_nodes = instance.secondary_nodes
7906 if not secondary_nodes:
7907 raise errors.ConfigurationError("No secondary node but using"
7908 " %s disk template" %
7909 instance.disk_template)
7910 target_node = secondary_nodes[0]
7911 if self.lu.op.iallocator or (self.lu.op.target_node and
7912 self.lu.op.target_node != target_node):
7914 text = "failed over"
7917 raise errors.OpPrereqError("Instances with disk template %s cannot"
7918 " be %s to arbitrary nodes"
7919 " (neither an iallocator nor a target"
7920 " node can be passed)" %
7921 (instance.disk_template, text),
7923 nodeinfo = self.cfg.GetNodeInfo(target_node)
7924 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7925 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7926 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7927 ignore=self.ignore_ipolicy)
7929 i_be = cluster.FillBE(instance)
7931 # check memory requirements on the secondary node
7932 if (not self.cleanup and
7933 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
7934 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
7935 "migrating instance %s" %
7937 i_be[constants.BE_MINMEM],
7938 instance.hypervisor)
7940 self.lu.LogInfo("Not checking memory on the secondary node as"
7941 " instance will not be started")
7943 # check if failover must be forced instead of migration
7944 if (not self.cleanup and not self.failover and
7945 i_be[constants.BE_ALWAYS_FAILOVER]):
7947 self.lu.LogInfo("Instance configured to always failover; fallback"
7949 self.failover = True
7951 raise errors.OpPrereqError("This instance has been configured to"
7952 " always failover, please allow failover",
7955 # check bridge existance
7956 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7958 if not self.cleanup:
7959 _CheckNodeNotDrained(self.lu, target_node)
7960 if not self.failover:
7961 result = self.rpc.call_instance_migratable(instance.primary_node,
7963 if result.fail_msg and self.fallback:
7964 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7966 self.failover = True
7968 result.Raise("Can't migrate, please use failover",
7969 prereq=True, ecode=errors.ECODE_STATE)
7971 assert not (self.failover and self.cleanup)
7973 if not self.failover:
7974 if self.lu.op.live is not None and self.lu.op.mode is not None:
7975 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7976 " parameters are accepted",
7978 if self.lu.op.live is not None:
7980 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7982 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7983 # reset the 'live' parameter to None so that repeated
7984 # invocations of CheckPrereq do not raise an exception
7985 self.lu.op.live = None
7986 elif self.lu.op.mode is None:
7987 # read the default value from the hypervisor
7988 i_hv = cluster.FillHV(self.instance, skip_globals=False)
7989 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7991 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7993 # Failover is never live
7996 if not (self.failover or self.cleanup):
7997 remote_info = self.rpc.call_instance_info(instance.primary_node,
7999 instance.hypervisor)
8000 remote_info.Raise("Error checking instance on node %s" %
8001 instance.primary_node)
8002 instance_running = bool(remote_info.payload)
8003 if instance_running:
8004 self.current_mem = int(remote_info.payload["memory"])
8006 def _RunAllocator(self):
8007 """Run the allocator based on input opcode.
8010 # FIXME: add a self.ignore_ipolicy option
8011 ial = IAllocator(self.cfg, self.rpc,
8012 mode=constants.IALLOCATOR_MODE_RELOC,
8013 name=self.instance_name,
8014 relocate_from=[self.instance.primary_node],
8017 ial.Run(self.lu.op.iallocator)
8020 raise errors.OpPrereqError("Can't compute nodes using"
8021 " iallocator '%s': %s" %
8022 (self.lu.op.iallocator, ial.info),
8024 if len(ial.result) != ial.required_nodes:
8025 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8026 " of nodes (%s), required %s" %
8027 (self.lu.op.iallocator, len(ial.result),
8028 ial.required_nodes), errors.ECODE_FAULT)
8029 self.target_node = ial.result[0]
8030 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8031 self.instance_name, self.lu.op.iallocator,
8032 utils.CommaJoin(ial.result))
8034 def _WaitUntilSync(self):
8035 """Poll with custom rpc for disk sync.
8037 This uses our own step-based rpc call.
8040 self.feedback_fn("* wait until resync is done")
8044 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8046 self.instance.disks)
8048 for node, nres in result.items():
8049 nres.Raise("Cannot resync disks on node %s" % node)
8050 node_done, node_percent = nres.payload
8051 all_done = all_done and node_done
8052 if node_percent is not None:
8053 min_percent = min(min_percent, node_percent)
8055 if min_percent < 100:
8056 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8059 def _EnsureSecondary(self, node):
8060 """Demote a node to secondary.
8063 self.feedback_fn("* switching node %s to secondary mode" % node)
8065 for dev in self.instance.disks:
8066 self.cfg.SetDiskID(dev, node)
8068 result = self.rpc.call_blockdev_close(node, self.instance.name,
8069 self.instance.disks)
8070 result.Raise("Cannot change disk to secondary on node %s" % node)
8072 def _GoStandalone(self):
8073 """Disconnect from the network.
8076 self.feedback_fn("* changing into standalone mode")
8077 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8078 self.instance.disks)
8079 for node, nres in result.items():
8080 nres.Raise("Cannot disconnect disks node %s" % node)
8082 def _GoReconnect(self, multimaster):
8083 """Reconnect to the network.
8089 msg = "single-master"
8090 self.feedback_fn("* changing disks into %s mode" % msg)
8091 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8092 self.instance.disks,
8093 self.instance.name, multimaster)
8094 for node, nres in result.items():
8095 nres.Raise("Cannot change disks config on node %s" % node)
8097 def _ExecCleanup(self):
8098 """Try to cleanup after a failed migration.
8100 The cleanup is done by:
8101 - check that the instance is running only on one node
8102 (and update the config if needed)
8103 - change disks on its secondary node to secondary
8104 - wait until disks are fully synchronized
8105 - disconnect from the network
8106 - change disks into single-master mode
8107 - wait again until disks are fully synchronized
8110 instance = self.instance
8111 target_node = self.target_node
8112 source_node = self.source_node
8114 # check running on only one node
8115 self.feedback_fn("* checking where the instance actually runs"
8116 " (if this hangs, the hypervisor might be in"
8118 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8119 for node, result in ins_l.items():
8120 result.Raise("Can't contact node %s" % node)
8122 runningon_source = instance.name in ins_l[source_node].payload
8123 runningon_target = instance.name in ins_l[target_node].payload
8125 if runningon_source and runningon_target:
8126 raise errors.OpExecError("Instance seems to be running on two nodes,"
8127 " or the hypervisor is confused; you will have"
8128 " to ensure manually that it runs only on one"
8129 " and restart this operation")
8131 if not (runningon_source or runningon_target):
8132 raise errors.OpExecError("Instance does not seem to be running at all;"
8133 " in this case it's safer to repair by"
8134 " running 'gnt-instance stop' to ensure disk"
8135 " shutdown, and then restarting it")
8137 if runningon_target:
8138 # the migration has actually succeeded, we need to update the config
8139 self.feedback_fn("* instance running on secondary node (%s),"
8140 " updating config" % target_node)
8141 instance.primary_node = target_node
8142 self.cfg.Update(instance, self.feedback_fn)
8143 demoted_node = source_node
8145 self.feedback_fn("* instance confirmed to be running on its"
8146 " primary node (%s)" % source_node)
8147 demoted_node = target_node
8149 if instance.disk_template in constants.DTS_INT_MIRROR:
8150 self._EnsureSecondary(demoted_node)
8152 self._WaitUntilSync()
8153 except errors.OpExecError:
8154 # we ignore here errors, since if the device is standalone, it
8155 # won't be able to sync
8157 self._GoStandalone()
8158 self._GoReconnect(False)
8159 self._WaitUntilSync()
8161 self.feedback_fn("* done")
8163 def _RevertDiskStatus(self):
8164 """Try to revert the disk status after a failed migration.
8167 target_node = self.target_node
8168 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8172 self._EnsureSecondary(target_node)
8173 self._GoStandalone()
8174 self._GoReconnect(False)
8175 self._WaitUntilSync()
8176 except errors.OpExecError, err:
8177 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8178 " please try to recover the instance manually;"
8179 " error '%s'" % str(err))
8181 def _AbortMigration(self):
8182 """Call the hypervisor code to abort a started migration.
8185 instance = self.instance
8186 target_node = self.target_node
8187 source_node = self.source_node
8188 migration_info = self.migration_info
8190 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8194 abort_msg = abort_result.fail_msg
8196 logging.error("Aborting migration failed on target node %s: %s",
8197 target_node, abort_msg)
8198 # Don't raise an exception here, as we stil have to try to revert the
8199 # disk status, even if this step failed.
8201 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8202 instance, False, self.live)
8203 abort_msg = abort_result.fail_msg
8205 logging.error("Aborting migration failed on source node %s: %s",
8206 source_node, abort_msg)
8208 def _ExecMigration(self):
8209 """Migrate an instance.
8211 The migrate is done by:
8212 - change the disks into dual-master mode
8213 - wait until disks are fully synchronized again
8214 - migrate the instance
8215 - change disks on the new secondary node (the old primary) to secondary
8216 - wait until disks are fully synchronized
8217 - change disks into single-master mode
8220 instance = self.instance
8221 target_node = self.target_node
8222 source_node = self.source_node
8224 # Check for hypervisor version mismatch and warn the user.
8225 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8226 None, [self.instance.hypervisor])
8227 for ninfo in nodeinfo.values():
8228 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8230 (_, _, (src_info, )) = nodeinfo[source_node].payload
8231 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8233 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8234 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8235 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8236 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8237 if src_version != dst_version:
8238 self.feedback_fn("* warning: hypervisor version mismatch between"
8239 " source (%s) and target (%s) node" %
8240 (src_version, dst_version))
8242 self.feedback_fn("* checking disk consistency between source and target")
8243 for (idx, dev) in enumerate(instance.disks):
8244 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8245 raise errors.OpExecError("Disk %s is degraded or not fully"
8246 " synchronized on target node,"
8247 " aborting migration" % idx)
8249 if self.current_mem > self.tgt_free_mem:
8250 if not self.allow_runtime_changes:
8251 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8252 " free memory to fit instance %s on target"
8253 " node %s (have %dMB, need %dMB)" %
8254 (instance.name, target_node,
8255 self.tgt_free_mem, self.current_mem))
8256 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8257 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8260 rpcres.Raise("Cannot modify instance runtime memory")
8262 # First get the migration information from the remote node
8263 result = self.rpc.call_migration_info(source_node, instance)
8264 msg = result.fail_msg
8266 log_err = ("Failed fetching source migration information from %s: %s" %
8268 logging.error(log_err)
8269 raise errors.OpExecError(log_err)
8271 self.migration_info = migration_info = result.payload
8273 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8274 # Then switch the disks to master/master mode
8275 self._EnsureSecondary(target_node)
8276 self._GoStandalone()
8277 self._GoReconnect(True)
8278 self._WaitUntilSync()
8280 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8281 result = self.rpc.call_accept_instance(target_node,
8284 self.nodes_ip[target_node])
8286 msg = result.fail_msg
8288 logging.error("Instance pre-migration failed, trying to revert"
8289 " disk status: %s", msg)
8290 self.feedback_fn("Pre-migration failed, aborting")
8291 self._AbortMigration()
8292 self._RevertDiskStatus()
8293 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8294 (instance.name, msg))
8296 self.feedback_fn("* migrating instance to %s" % target_node)
8297 result = self.rpc.call_instance_migrate(source_node, instance,
8298 self.nodes_ip[target_node],
8300 msg = result.fail_msg
8302 logging.error("Instance migration failed, trying to revert"
8303 " disk status: %s", msg)
8304 self.feedback_fn("Migration failed, aborting")
8305 self._AbortMigration()
8306 self._RevertDiskStatus()
8307 raise errors.OpExecError("Could not migrate instance %s: %s" %
8308 (instance.name, msg))
8310 self.feedback_fn("* starting memory transfer")
8311 last_feedback = time.time()
8313 result = self.rpc.call_instance_get_migration_status(source_node,
8315 msg = result.fail_msg
8316 ms = result.payload # MigrationStatus instance
8317 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8318 logging.error("Instance migration failed, trying to revert"
8319 " disk status: %s", msg)
8320 self.feedback_fn("Migration failed, aborting")
8321 self._AbortMigration()
8322 self._RevertDiskStatus()
8323 raise errors.OpExecError("Could not migrate instance %s: %s" %
8324 (instance.name, msg))
8326 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8327 self.feedback_fn("* memory transfer complete")
8330 if (utils.TimeoutExpired(last_feedback,
8331 self._MIGRATION_FEEDBACK_INTERVAL) and
8332 ms.transferred_ram is not None):
8333 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8334 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8335 last_feedback = time.time()
8337 time.sleep(self._MIGRATION_POLL_INTERVAL)
8339 result = self.rpc.call_instance_finalize_migration_src(source_node,
8343 msg = result.fail_msg
8345 logging.error("Instance migration succeeded, but finalization failed"
8346 " on the source node: %s", msg)
8347 raise errors.OpExecError("Could not finalize instance migration: %s" %
8350 instance.primary_node = target_node
8352 # distribute new instance config to the other nodes
8353 self.cfg.Update(instance, self.feedback_fn)
8355 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8359 msg = result.fail_msg
8361 logging.error("Instance migration succeeded, but finalization failed"
8362 " on the target node: %s", msg)
8363 raise errors.OpExecError("Could not finalize instance migration: %s" %
8366 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8367 self._EnsureSecondary(source_node)
8368 self._WaitUntilSync()
8369 self._GoStandalone()
8370 self._GoReconnect(False)
8371 self._WaitUntilSync()
8373 # If the instance's disk template is `rbd' and there was a successful
8374 # migration, unmap the device from the source node.
8375 if self.instance.disk_template == constants.DT_RBD:
8376 disks = _ExpandCheckDisks(instance, instance.disks)
8377 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8379 result = self.rpc.call_blockdev_shutdown(source_node, disk)
8380 msg = result.fail_msg
8382 logging.error("Migration was successful, but couldn't unmap the"
8383 " block device %s on source node %s: %s",
8384 disk.iv_name, source_node, msg)
8385 logging.error("You need to unmap the device %s manually on %s",
8386 disk.iv_name, source_node)
8388 self.feedback_fn("* done")
8390 def _ExecFailover(self):
8391 """Failover an instance.
8393 The failover is done by shutting it down on its present node and
8394 starting it on the secondary.
8397 instance = self.instance
8398 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8400 source_node = instance.primary_node
8401 target_node = self.target_node
8403 if instance.admin_state == constants.ADMINST_UP:
8404 self.feedback_fn("* checking disk consistency between source and target")
8405 for (idx, dev) in enumerate(instance.disks):
8406 # for drbd, these are drbd over lvm
8407 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8408 if primary_node.offline:
8409 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8411 (primary_node.name, idx, target_node))
8412 elif not self.ignore_consistency:
8413 raise errors.OpExecError("Disk %s is degraded on target node,"
8414 " aborting failover" % idx)
8416 self.feedback_fn("* not checking disk consistency as instance is not"
8419 self.feedback_fn("* shutting down instance on source node")
8420 logging.info("Shutting down instance %s on node %s",
8421 instance.name, source_node)
8423 result = self.rpc.call_instance_shutdown(source_node, instance,
8424 self.shutdown_timeout)
8425 msg = result.fail_msg
8427 if self.ignore_consistency or primary_node.offline:
8428 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8429 " proceeding anyway; please make sure node"
8430 " %s is down; error details: %s",
8431 instance.name, source_node, source_node, msg)
8433 raise errors.OpExecError("Could not shutdown instance %s on"
8435 (instance.name, source_node, msg))
8437 self.feedback_fn("* deactivating the instance's disks on source node")
8438 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8439 raise errors.OpExecError("Can't shut down the instance's disks")
8441 instance.primary_node = target_node
8442 # distribute new instance config to the other nodes
8443 self.cfg.Update(instance, self.feedback_fn)
8445 # Only start the instance if it's marked as up
8446 if instance.admin_state == constants.ADMINST_UP:
8447 self.feedback_fn("* activating the instance's disks on target node %s" %
8449 logging.info("Starting instance %s on node %s",
8450 instance.name, target_node)
8452 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8453 ignore_secondaries=True)
8455 _ShutdownInstanceDisks(self.lu, instance)
8456 raise errors.OpExecError("Can't activate the instance's disks")
8458 self.feedback_fn("* starting the instance on the target node %s" %
8460 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8462 msg = result.fail_msg
8464 _ShutdownInstanceDisks(self.lu, instance)
8465 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8466 (instance.name, target_node, msg))
8468 def Exec(self, feedback_fn):
8469 """Perform the migration.
8472 self.feedback_fn = feedback_fn
8473 self.source_node = self.instance.primary_node
8475 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8476 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8477 self.target_node = self.instance.secondary_nodes[0]
8478 # Otherwise self.target_node has been populated either
8479 # directly, or through an iallocator.
8481 self.all_nodes = [self.source_node, self.target_node]
8482 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8483 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8486 feedback_fn("Failover instance %s" % self.instance.name)
8487 self._ExecFailover()
8489 feedback_fn("Migrating instance %s" % self.instance.name)
8492 return self._ExecCleanup()
8494 return self._ExecMigration()
8497 def _CreateBlockDev(lu, node, instance, device, force_create,
8499 """Create a tree of block devices on a given node.
8501 If this device type has to be created on secondaries, create it and
8504 If not, just recurse to children keeping the same 'force' value.
8506 @param lu: the lu on whose behalf we execute
8507 @param node: the node on which to create the device
8508 @type instance: L{objects.Instance}
8509 @param instance: the instance which owns the device
8510 @type device: L{objects.Disk}
8511 @param device: the device to create
8512 @type force_create: boolean
8513 @param force_create: whether to force creation of this device; this
8514 will be change to True whenever we find a device which has
8515 CreateOnSecondary() attribute
8516 @param info: the extra 'metadata' we should attach to the device
8517 (this will be represented as a LVM tag)
8518 @type force_open: boolean
8519 @param force_open: this parameter will be passes to the
8520 L{backend.BlockdevCreate} function where it specifies
8521 whether we run on primary or not, and it affects both
8522 the child assembly and the device own Open() execution
8525 if device.CreateOnSecondary():
8529 for child in device.children:
8530 _CreateBlockDev(lu, node, instance, child, force_create,
8533 if not force_create:
8536 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8539 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8540 """Create a single block device on a given node.
8542 This will not recurse over children of the device, so they must be
8545 @param lu: the lu on whose behalf we execute
8546 @param node: the node on which to create the device
8547 @type instance: L{objects.Instance}
8548 @param instance: the instance which owns the device
8549 @type device: L{objects.Disk}
8550 @param device: the device to create
8551 @param info: the extra 'metadata' we should attach to the device
8552 (this will be represented as a LVM tag)
8553 @type force_open: boolean
8554 @param force_open: this parameter will be passes to the
8555 L{backend.BlockdevCreate} function where it specifies
8556 whether we run on primary or not, and it affects both
8557 the child assembly and the device own Open() execution
8560 lu.cfg.SetDiskID(device, node)
8561 result = lu.rpc.call_blockdev_create(node, device, device.size,
8562 instance.name, force_open, info)
8563 result.Raise("Can't create block device %s on"
8564 " node %s for instance %s" % (device, node, instance.name))
8565 if device.physical_id is None:
8566 device.physical_id = result.payload
8569 def _GenerateUniqueNames(lu, exts):
8570 """Generate a suitable LV name.
8572 This will generate a logical volume name for the given instance.
8577 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8578 results.append("%s%s" % (new_id, val))
8582 def _ComputeLDParams(disk_template, disk_params):
8583 """Computes Logical Disk parameters from Disk Template parameters.
8585 @type disk_template: string
8586 @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8587 @type disk_params: dict
8588 @param disk_params: disk template parameters; dict(template_name -> parameters
8590 @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8591 contains the LD parameters of the node. The tree is flattened in-order.
8594 if disk_template not in constants.DISK_TEMPLATES:
8595 raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8598 dt_params = disk_params[disk_template]
8599 if disk_template == constants.DT_DRBD8:
8601 constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8602 constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8603 constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8604 constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8605 constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8606 constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8607 constants.LDP_DYNAMIC_RESYNC: dt_params[constants.DRBD_DYNAMIC_RESYNC],
8608 constants.LDP_PLAN_AHEAD: dt_params[constants.DRBD_PLAN_AHEAD],
8609 constants.LDP_FILL_TARGET: dt_params[constants.DRBD_FILL_TARGET],
8610 constants.LDP_DELAY_TARGET: dt_params[constants.DRBD_DELAY_TARGET],
8611 constants.LDP_MAX_RATE: dt_params[constants.DRBD_MAX_RATE],
8612 constants.LDP_MIN_RATE: dt_params[constants.DRBD_MIN_RATE],
8616 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8619 result.append(drbd_params)
8623 constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8626 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8628 result.append(data_params)
8632 constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8635 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8637 result.append(meta_params)
8639 elif (disk_template == constants.DT_FILE or
8640 disk_template == constants.DT_SHARED_FILE):
8641 result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8643 elif disk_template == constants.DT_PLAIN:
8645 constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8648 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8650 result.append(params)
8652 elif disk_template == constants.DT_BLOCK:
8653 result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8655 elif disk_template == constants.DT_RBD:
8657 constants.LDP_POOL: dt_params[constants.RBD_POOL]
8660 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_RBD],
8662 result.append(params)
8667 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8668 iv_name, p_minor, s_minor, drbd_params, data_params,
8670 """Generate a drbd8 device complete with its children.
8673 assert len(vgnames) == len(names) == 2
8674 port = lu.cfg.AllocatePort()
8675 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8677 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8678 logical_id=(vgnames[0], names[0]),
8680 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8681 logical_id=(vgnames[1], names[1]),
8683 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8684 logical_id=(primary, secondary, port,
8687 children=[dev_data, dev_meta],
8688 iv_name=iv_name, params=drbd_params)
8692 _DISK_TEMPLATE_NAME_PREFIX = {
8693 constants.DT_PLAIN: "",
8694 constants.DT_RBD: ".rbd",
8698 _DISK_TEMPLATE_DEVICE_TYPE = {
8699 constants.DT_PLAIN: constants.LD_LV,
8700 constants.DT_FILE: constants.LD_FILE,
8701 constants.DT_SHARED_FILE: constants.LD_FILE,
8702 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8703 constants.DT_RBD: constants.LD_RBD,
8707 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8708 secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8709 feedback_fn, disk_params,
8710 _req_file_storage=opcodes.RequireFileStorage,
8711 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8712 """Generate the entire disk layout for a given template type.
8715 #TODO: compute space requirements
8717 vgname = lu.cfg.GetVGName()
8718 disk_count = len(disk_info)
8720 ld_params = _ComputeLDParams(template_name, disk_params)
8722 if template_name == constants.DT_DISKLESS:
8724 elif template_name == constants.DT_DRBD8:
8725 drbd_params, data_params, meta_params = ld_params
8726 if len(secondary_nodes) != 1:
8727 raise errors.ProgrammerError("Wrong template configuration")
8728 remote_node = secondary_nodes[0]
8729 minors = lu.cfg.AllocateDRBDMinor(
8730 [primary_node, remote_node] * len(disk_info), instance_name)
8733 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8734 for i in range(disk_count)]):
8735 names.append(lv_prefix + "_data")
8736 names.append(lv_prefix + "_meta")
8737 for idx, disk in enumerate(disk_info):
8738 disk_index = idx + base_index
8739 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8740 data_vg = disk.get(constants.IDISK_VG, vgname)
8741 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8742 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8743 disk[constants.IDISK_SIZE],
8745 names[idx * 2:idx * 2 + 2],
8746 "disk/%d" % disk_index,
8747 minors[idx * 2], minors[idx * 2 + 1],
8748 drbd_params, data_params, meta_params)
8749 disk_dev.mode = disk[constants.IDISK_MODE]
8750 disks.append(disk_dev)
8753 raise errors.ProgrammerError("Wrong template configuration")
8755 if template_name == constants.DT_FILE:
8757 elif template_name == constants.DT_SHARED_FILE:
8758 _req_shr_file_storage()
8760 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8761 if name_prefix is None:
8764 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8765 (name_prefix, base_index + i)
8766 for i in range(disk_count)])
8768 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8770 if template_name == constants.DT_PLAIN:
8771 def logical_id_fn(idx, _, disk):
8772 vg = disk.get(constants.IDISK_VG, vgname)
8773 return (vg, names[idx])
8774 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8776 lambda _, disk_index, disk: (file_driver,
8777 "%s/disk%d" % (file_storage_dir,
8779 elif template_name == constants.DT_BLOCK:
8781 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8782 disk[constants.IDISK_ADOPT])
8783 elif template_name == constants.DT_RBD:
8784 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8786 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8788 for idx, disk in enumerate(disk_info):
8789 disk_index = idx + base_index
8790 size = disk[constants.IDISK_SIZE]
8791 feedback_fn("* disk %s, size %s" %
8792 (disk_index, utils.FormatUnit(size, "h")))
8793 disks.append(objects.Disk(dev_type=dev_type, size=size,
8794 logical_id=logical_id_fn(idx, disk_index, disk),
8795 iv_name="disk/%d" % disk_index,
8796 mode=disk[constants.IDISK_MODE],
8797 params=ld_params[0]))
8802 def _GetInstanceInfoText(instance):
8803 """Compute that text that should be added to the disk's metadata.
8806 return "originstname+%s" % instance.name
8809 def _CalcEta(time_taken, written, total_size):
8810 """Calculates the ETA based on size written and total size.
8812 @param time_taken: The time taken so far
8813 @param written: amount written so far
8814 @param total_size: The total size of data to be written
8815 @return: The remaining time in seconds
8818 avg_time = time_taken / float(written)
8819 return (total_size - written) * avg_time
8822 def _WipeDisks(lu, instance):
8823 """Wipes instance disks.
8825 @type lu: L{LogicalUnit}
8826 @param lu: the logical unit on whose behalf we execute
8827 @type instance: L{objects.Instance}
8828 @param instance: the instance whose disks we should create
8829 @return: the success of the wipe
8832 node = instance.primary_node
8834 for device in instance.disks:
8835 lu.cfg.SetDiskID(device, node)
8837 logging.info("Pause sync of instance %s disks", instance.name)
8838 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8840 for idx, success in enumerate(result.payload):
8842 logging.warn("pause-sync of instance %s for disks %d failed",
8846 for idx, device in enumerate(instance.disks):
8847 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8848 # MAX_WIPE_CHUNK at max
8849 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8850 constants.MIN_WIPE_CHUNK_PERCENT)
8851 # we _must_ make this an int, otherwise rounding errors will
8853 wipe_chunk_size = int(wipe_chunk_size)
8855 lu.LogInfo("* Wiping disk %d", idx)
8856 logging.info("Wiping disk %d for instance %s, node %s using"
8857 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8862 start_time = time.time()
8864 while offset < size:
8865 wipe_size = min(wipe_chunk_size, size - offset)
8866 logging.debug("Wiping disk %d, offset %s, chunk %s",
8867 idx, offset, wipe_size)
8868 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8869 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8870 (idx, offset, wipe_size))
8873 if now - last_output >= 60:
8874 eta = _CalcEta(now - start_time, offset, size)
8875 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8876 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8879 logging.info("Resume sync of instance %s disks", instance.name)
8881 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8883 for idx, success in enumerate(result.payload):
8885 lu.LogWarning("Resume sync of disk %d failed, please have a"
8886 " look at the status and troubleshoot the issue", idx)
8887 logging.warn("resume-sync of instance %s for disks %d failed",
8891 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8892 """Create all disks for an instance.
8894 This abstracts away some work from AddInstance.
8896 @type lu: L{LogicalUnit}
8897 @param lu: the logical unit on whose behalf we execute
8898 @type instance: L{objects.Instance}
8899 @param instance: the instance whose disks we should create
8901 @param to_skip: list of indices to skip
8902 @type target_node: string
8903 @param target_node: if passed, overrides the target node for creation
8905 @return: the success of the creation
8908 info = _GetInstanceInfoText(instance)
8909 if target_node is None:
8910 pnode = instance.primary_node
8911 all_nodes = instance.all_nodes
8916 if instance.disk_template in constants.DTS_FILEBASED:
8917 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8918 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8920 result.Raise("Failed to create directory '%s' on"
8921 " node %s" % (file_storage_dir, pnode))
8923 # Note: this needs to be kept in sync with adding of disks in
8924 # LUInstanceSetParams
8925 for idx, device in enumerate(instance.disks):
8926 if to_skip and idx in to_skip:
8928 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
8930 for node in all_nodes:
8931 f_create = node == pnode
8932 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8935 def _RemoveDisks(lu, instance, target_node=None):
8936 """Remove all disks for an instance.
8938 This abstracts away some work from `AddInstance()` and
8939 `RemoveInstance()`. Note that in case some of the devices couldn't
8940 be removed, the removal will continue with the other ones (compare
8941 with `_CreateDisks()`).
8943 @type lu: L{LogicalUnit}
8944 @param lu: the logical unit on whose behalf we execute
8945 @type instance: L{objects.Instance}
8946 @param instance: the instance whose disks we should remove
8947 @type target_node: string
8948 @param target_node: used to override the node on which to remove the disks
8950 @return: the success of the removal
8953 logging.info("Removing block devices for instance %s", instance.name)
8956 for (idx, device) in enumerate(instance.disks):
8958 edata = [(target_node, device)]
8960 edata = device.ComputeNodeTree(instance.primary_node)
8961 for node, disk in edata:
8962 lu.cfg.SetDiskID(disk, node)
8963 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8965 lu.LogWarning("Could not remove disk %s on node %s,"
8966 " continuing anyway: %s", idx, node, msg)
8969 # if this is a DRBD disk, return its port to the pool
8970 if device.dev_type in constants.LDS_DRBD:
8971 tcp_port = device.logical_id[2]
8972 lu.cfg.AddTcpUdpPort(tcp_port)
8974 if instance.disk_template == constants.DT_FILE:
8975 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8979 tgt = instance.primary_node
8980 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8982 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8983 file_storage_dir, instance.primary_node, result.fail_msg)
8989 def _ComputeDiskSizePerVG(disk_template, disks):
8990 """Compute disk size requirements in the volume group
8993 def _compute(disks, payload):
8994 """Universal algorithm.
8999 vgs[disk[constants.IDISK_VG]] = \
9000 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9004 # Required free disk space as a function of disk and swap space
9006 constants.DT_DISKLESS: {},
9007 constants.DT_PLAIN: _compute(disks, 0),
9008 # 128 MB are added for drbd metadata for each disk
9009 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9010 constants.DT_FILE: {},
9011 constants.DT_SHARED_FILE: {},
9014 if disk_template not in req_size_dict:
9015 raise errors.ProgrammerError("Disk template '%s' size requirement"
9016 " is unknown" % disk_template)
9018 return req_size_dict[disk_template]
9021 def _ComputeDiskSize(disk_template, disks):
9022 """Compute disk size requirements in the volume group
9025 # Required free disk space as a function of disk and swap space
9027 constants.DT_DISKLESS: None,
9028 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9029 # 128 MB are added for drbd metadata for each disk
9031 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9032 constants.DT_FILE: None,
9033 constants.DT_SHARED_FILE: 0,
9034 constants.DT_BLOCK: 0,
9035 constants.DT_RBD: 0,
9038 if disk_template not in req_size_dict:
9039 raise errors.ProgrammerError("Disk template '%s' size requirement"
9040 " is unknown" % disk_template)
9042 return req_size_dict[disk_template]
9045 def _FilterVmNodes(lu, nodenames):
9046 """Filters out non-vm_capable nodes from a list.
9048 @type lu: L{LogicalUnit}
9049 @param lu: the logical unit for which we check
9050 @type nodenames: list
9051 @param nodenames: the list of nodes on which we should check
9053 @return: the list of vm-capable nodes
9056 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9057 return [name for name in nodenames if name not in vm_nodes]
9060 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9061 """Hypervisor parameter validation.
9063 This function abstract the hypervisor parameter validation to be
9064 used in both instance create and instance modify.
9066 @type lu: L{LogicalUnit}
9067 @param lu: the logical unit for which we check
9068 @type nodenames: list
9069 @param nodenames: the list of nodes on which we should check
9070 @type hvname: string
9071 @param hvname: the name of the hypervisor we should use
9072 @type hvparams: dict
9073 @param hvparams: the parameters which we need to check
9074 @raise errors.OpPrereqError: if the parameters are not valid
9077 nodenames = _FilterVmNodes(lu, nodenames)
9079 cluster = lu.cfg.GetClusterInfo()
9080 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9082 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9083 for node in nodenames:
9087 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9090 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9091 """OS parameters validation.
9093 @type lu: L{LogicalUnit}
9094 @param lu: the logical unit for which we check
9095 @type required: boolean
9096 @param required: whether the validation should fail if the OS is not
9098 @type nodenames: list
9099 @param nodenames: the list of nodes on which we should check
9100 @type osname: string
9101 @param osname: the name of the hypervisor we should use
9102 @type osparams: dict
9103 @param osparams: the parameters which we need to check
9104 @raise errors.OpPrereqError: if the parameters are not valid
9107 nodenames = _FilterVmNodes(lu, nodenames)
9108 result = lu.rpc.call_os_validate(nodenames, required, osname,
9109 [constants.OS_VALIDATE_PARAMETERS],
9111 for node, nres in result.items():
9112 # we don't check for offline cases since this should be run only
9113 # against the master node and/or an instance's nodes
9114 nres.Raise("OS Parameters validation failed on node %s" % node)
9115 if not nres.payload:
9116 lu.LogInfo("OS %s not found on node %s, validation skipped",
9120 class LUInstanceCreate(LogicalUnit):
9121 """Create an instance.
9124 HPATH = "instance-add"
9125 HTYPE = constants.HTYPE_INSTANCE
9128 def CheckArguments(self):
9132 # do not require name_check to ease forward/backward compatibility
9134 if self.op.no_install and self.op.start:
9135 self.LogInfo("No-installation mode selected, disabling startup")
9136 self.op.start = False
9137 # validate/normalize the instance name
9138 self.op.instance_name = \
9139 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9141 if self.op.ip_check and not self.op.name_check:
9142 # TODO: make the ip check more flexible and not depend on the name check
9143 raise errors.OpPrereqError("Cannot do IP address check without a name"
9144 " check", errors.ECODE_INVAL)
9146 # check nics' parameter names
9147 for nic in self.op.nics:
9148 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9150 # check disks. parameter names and consistent adopt/no-adopt strategy
9151 has_adopt = has_no_adopt = False
9152 for disk in self.op.disks:
9153 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9154 if constants.IDISK_ADOPT in disk:
9158 if has_adopt and has_no_adopt:
9159 raise errors.OpPrereqError("Either all disks are adopted or none is",
9162 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9163 raise errors.OpPrereqError("Disk adoption is not supported for the"
9164 " '%s' disk template" %
9165 self.op.disk_template,
9167 if self.op.iallocator is not None:
9168 raise errors.OpPrereqError("Disk adoption not allowed with an"
9169 " iallocator script", errors.ECODE_INVAL)
9170 if self.op.mode == constants.INSTANCE_IMPORT:
9171 raise errors.OpPrereqError("Disk adoption not allowed for"
9172 " instance import", errors.ECODE_INVAL)
9174 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9175 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9176 " but no 'adopt' parameter given" %
9177 self.op.disk_template,
9180 self.adopt_disks = has_adopt
9182 # instance name verification
9183 if self.op.name_check:
9184 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9185 self.op.instance_name = self.hostname1.name
9186 # used in CheckPrereq for ip ping check
9187 self.check_ip = self.hostname1.ip
9189 self.check_ip = None
9191 # file storage checks
9192 if (self.op.file_driver and
9193 not self.op.file_driver in constants.FILE_DRIVER):
9194 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9195 self.op.file_driver, errors.ECODE_INVAL)
9197 if self.op.disk_template == constants.DT_FILE:
9198 opcodes.RequireFileStorage()
9199 elif self.op.disk_template == constants.DT_SHARED_FILE:
9200 opcodes.RequireSharedFileStorage()
9202 ### Node/iallocator related checks
9203 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9205 if self.op.pnode is not None:
9206 if self.op.disk_template in constants.DTS_INT_MIRROR:
9207 if self.op.snode is None:
9208 raise errors.OpPrereqError("The networked disk templates need"
9209 " a mirror node", errors.ECODE_INVAL)
9211 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9213 self.op.snode = None
9215 self._cds = _GetClusterDomainSecret()
9217 if self.op.mode == constants.INSTANCE_IMPORT:
9218 # On import force_variant must be True, because if we forced it at
9219 # initial install, our only chance when importing it back is that it
9221 self.op.force_variant = True
9223 if self.op.no_install:
9224 self.LogInfo("No-installation mode has no effect during import")
9226 elif self.op.mode == constants.INSTANCE_CREATE:
9227 if self.op.os_type is None:
9228 raise errors.OpPrereqError("No guest OS specified",
9230 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9231 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9232 " installation" % self.op.os_type,
9234 if self.op.disk_template is None:
9235 raise errors.OpPrereqError("No disk template specified",
9238 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9239 # Check handshake to ensure both clusters have the same domain secret
9240 src_handshake = self.op.source_handshake
9241 if not src_handshake:
9242 raise errors.OpPrereqError("Missing source handshake",
9245 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9248 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9251 # Load and check source CA
9252 self.source_x509_ca_pem = self.op.source_x509_ca
9253 if not self.source_x509_ca_pem:
9254 raise errors.OpPrereqError("Missing source X509 CA",
9258 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9260 except OpenSSL.crypto.Error, err:
9261 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9262 (err, ), errors.ECODE_INVAL)
9264 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9265 if errcode is not None:
9266 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9269 self.source_x509_ca = cert
9271 src_instance_name = self.op.source_instance_name
9272 if not src_instance_name:
9273 raise errors.OpPrereqError("Missing source instance name",
9276 self.source_instance_name = \
9277 netutils.GetHostname(name=src_instance_name).name
9280 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9281 self.op.mode, errors.ECODE_INVAL)
9283 def ExpandNames(self):
9284 """ExpandNames for CreateInstance.
9286 Figure out the right locks for instance creation.
9289 self.needed_locks = {}
9291 instance_name = self.op.instance_name
9292 # this is just a preventive check, but someone might still add this
9293 # instance in the meantime, and creation will fail at lock-add time
9294 if instance_name in self.cfg.GetInstanceList():
9295 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9296 instance_name, errors.ECODE_EXISTS)
9298 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9300 if self.op.iallocator:
9301 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9302 # specifying a group on instance creation and then selecting nodes from
9304 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9305 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9307 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9308 nodelist = [self.op.pnode]
9309 if self.op.snode is not None:
9310 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9311 nodelist.append(self.op.snode)
9312 self.needed_locks[locking.LEVEL_NODE] = nodelist
9313 # Lock resources of instance's primary and secondary nodes (copy to
9314 # prevent accidential modification)
9315 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9317 # in case of import lock the source node too
9318 if self.op.mode == constants.INSTANCE_IMPORT:
9319 src_node = self.op.src_node
9320 src_path = self.op.src_path
9322 if src_path is None:
9323 self.op.src_path = src_path = self.op.instance_name
9325 if src_node is None:
9326 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9327 self.op.src_node = None
9328 if os.path.isabs(src_path):
9329 raise errors.OpPrereqError("Importing an instance from a path"
9330 " requires a source node option",
9333 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9334 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9335 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9336 if not os.path.isabs(src_path):
9337 self.op.src_path = src_path = \
9338 utils.PathJoin(constants.EXPORT_DIR, src_path)
9340 def _RunAllocator(self):
9341 """Run the allocator based on input opcode.
9344 nics = [n.ToDict() for n in self.nics]
9345 ial = IAllocator(self.cfg, self.rpc,
9346 mode=constants.IALLOCATOR_MODE_ALLOC,
9347 name=self.op.instance_name,
9348 disk_template=self.op.disk_template,
9351 vcpus=self.be_full[constants.BE_VCPUS],
9352 memory=self.be_full[constants.BE_MAXMEM],
9353 spindle_use=self.be_full[constants.BE_SPINDLE_USE],
9356 hypervisor=self.op.hypervisor,
9359 ial.Run(self.op.iallocator)
9362 raise errors.OpPrereqError("Can't compute nodes using"
9363 " iallocator '%s': %s" %
9364 (self.op.iallocator, ial.info),
9366 if len(ial.result) != ial.required_nodes:
9367 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9368 " of nodes (%s), required %s" %
9369 (self.op.iallocator, len(ial.result),
9370 ial.required_nodes), errors.ECODE_FAULT)
9371 self.op.pnode = ial.result[0]
9372 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9373 self.op.instance_name, self.op.iallocator,
9374 utils.CommaJoin(ial.result))
9375 if ial.required_nodes == 2:
9376 self.op.snode = ial.result[1]
9378 def BuildHooksEnv(self):
9381 This runs on master, primary and secondary nodes of the instance.
9385 "ADD_MODE": self.op.mode,
9387 if self.op.mode == constants.INSTANCE_IMPORT:
9388 env["SRC_NODE"] = self.op.src_node
9389 env["SRC_PATH"] = self.op.src_path
9390 env["SRC_IMAGES"] = self.src_images
9392 env.update(_BuildInstanceHookEnv(
9393 name=self.op.instance_name,
9394 primary_node=self.op.pnode,
9395 secondary_nodes=self.secondaries,
9396 status=self.op.start,
9397 os_type=self.op.os_type,
9398 minmem=self.be_full[constants.BE_MINMEM],
9399 maxmem=self.be_full[constants.BE_MAXMEM],
9400 vcpus=self.be_full[constants.BE_VCPUS],
9401 nics=_NICListToTuple(self, self.nics),
9402 disk_template=self.op.disk_template,
9403 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9404 for d in self.disks],
9407 hypervisor_name=self.op.hypervisor,
9413 def BuildHooksNodes(self):
9414 """Build hooks nodes.
9417 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9420 def _ReadExportInfo(self):
9421 """Reads the export information from disk.
9423 It will override the opcode source node and path with the actual
9424 information, if these two were not specified before.
9426 @return: the export information
9429 assert self.op.mode == constants.INSTANCE_IMPORT
9431 src_node = self.op.src_node
9432 src_path = self.op.src_path
9434 if src_node is None:
9435 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9436 exp_list = self.rpc.call_export_list(locked_nodes)
9438 for node in exp_list:
9439 if exp_list[node].fail_msg:
9441 if src_path in exp_list[node].payload:
9443 self.op.src_node = src_node = node
9444 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9448 raise errors.OpPrereqError("No export found for relative path %s" %
9449 src_path, errors.ECODE_INVAL)
9451 _CheckNodeOnline(self, src_node)
9452 result = self.rpc.call_export_info(src_node, src_path)
9453 result.Raise("No export or invalid export found in dir %s" % src_path)
9455 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9456 if not export_info.has_section(constants.INISECT_EXP):
9457 raise errors.ProgrammerError("Corrupted export config",
9458 errors.ECODE_ENVIRON)
9460 ei_version = export_info.get(constants.INISECT_EXP, "version")
9461 if (int(ei_version) != constants.EXPORT_VERSION):
9462 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9463 (ei_version, constants.EXPORT_VERSION),
9464 errors.ECODE_ENVIRON)
9467 def _ReadExportParams(self, einfo):
9468 """Use export parameters as defaults.
9470 In case the opcode doesn't specify (as in override) some instance
9471 parameters, then try to use them from the export information, if
9475 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9477 if self.op.disk_template is None:
9478 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9479 self.op.disk_template = einfo.get(constants.INISECT_INS,
9481 if self.op.disk_template not in constants.DISK_TEMPLATES:
9482 raise errors.OpPrereqError("Disk template specified in configuration"
9483 " file is not one of the allowed values:"
9484 " %s" % " ".join(constants.DISK_TEMPLATES))
9486 raise errors.OpPrereqError("No disk template specified and the export"
9487 " is missing the disk_template information",
9490 if not self.op.disks:
9492 # TODO: import the disk iv_name too
9493 for idx in range(constants.MAX_DISKS):
9494 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9495 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9496 disks.append({constants.IDISK_SIZE: disk_sz})
9497 self.op.disks = disks
9498 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9499 raise errors.OpPrereqError("No disk info specified and the export"
9500 " is missing the disk information",
9503 if not self.op.nics:
9505 for idx in range(constants.MAX_NICS):
9506 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9508 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9509 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9516 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9517 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9519 if (self.op.hypervisor is None and
9520 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9521 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9523 if einfo.has_section(constants.INISECT_HYP):
9524 # use the export parameters but do not override the ones
9525 # specified by the user
9526 for name, value in einfo.items(constants.INISECT_HYP):
9527 if name not in self.op.hvparams:
9528 self.op.hvparams[name] = value
9530 if einfo.has_section(constants.INISECT_BEP):
9531 # use the parameters, without overriding
9532 for name, value in einfo.items(constants.INISECT_BEP):
9533 if name not in self.op.beparams:
9534 self.op.beparams[name] = value
9535 # Compatibility for the old "memory" be param
9536 if name == constants.BE_MEMORY:
9537 if constants.BE_MAXMEM not in self.op.beparams:
9538 self.op.beparams[constants.BE_MAXMEM] = value
9539 if constants.BE_MINMEM not in self.op.beparams:
9540 self.op.beparams[constants.BE_MINMEM] = value
9542 # try to read the parameters old style, from the main section
9543 for name in constants.BES_PARAMETERS:
9544 if (name not in self.op.beparams and
9545 einfo.has_option(constants.INISECT_INS, name)):
9546 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9548 if einfo.has_section(constants.INISECT_OSP):
9549 # use the parameters, without overriding
9550 for name, value in einfo.items(constants.INISECT_OSP):
9551 if name not in self.op.osparams:
9552 self.op.osparams[name] = value
9554 def _RevertToDefaults(self, cluster):
9555 """Revert the instance parameters to the default values.
9559 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9560 for name in self.op.hvparams.keys():
9561 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9562 del self.op.hvparams[name]
9564 be_defs = cluster.SimpleFillBE({})
9565 for name in self.op.beparams.keys():
9566 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9567 del self.op.beparams[name]
9569 nic_defs = cluster.SimpleFillNIC({})
9570 for nic in self.op.nics:
9571 for name in constants.NICS_PARAMETERS:
9572 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9575 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9576 for name in self.op.osparams.keys():
9577 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9578 del self.op.osparams[name]
9580 def _CalculateFileStorageDir(self):
9581 """Calculate final instance file storage dir.
9584 # file storage dir calculation/check
9585 self.instance_file_storage_dir = None
9586 if self.op.disk_template in constants.DTS_FILEBASED:
9587 # build the full file storage dir path
9590 if self.op.disk_template == constants.DT_SHARED_FILE:
9591 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9593 get_fsd_fn = self.cfg.GetFileStorageDir
9595 cfg_storagedir = get_fsd_fn()
9596 if not cfg_storagedir:
9597 raise errors.OpPrereqError("Cluster file storage dir not defined")
9598 joinargs.append(cfg_storagedir)
9600 if self.op.file_storage_dir is not None:
9601 joinargs.append(self.op.file_storage_dir)
9603 joinargs.append(self.op.instance_name)
9605 # pylint: disable=W0142
9606 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9608 def CheckPrereq(self): # pylint: disable=R0914
9609 """Check prerequisites.
9612 self._CalculateFileStorageDir()
9614 if self.op.mode == constants.INSTANCE_IMPORT:
9615 export_info = self._ReadExportInfo()
9616 self._ReadExportParams(export_info)
9618 if (not self.cfg.GetVGName() and
9619 self.op.disk_template not in constants.DTS_NOT_LVM):
9620 raise errors.OpPrereqError("Cluster does not support lvm-based"
9621 " instances", errors.ECODE_STATE)
9623 if (self.op.hypervisor is None or
9624 self.op.hypervisor == constants.VALUE_AUTO):
9625 self.op.hypervisor = self.cfg.GetHypervisorType()
9627 cluster = self.cfg.GetClusterInfo()
9628 enabled_hvs = cluster.enabled_hypervisors
9629 if self.op.hypervisor not in enabled_hvs:
9630 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9631 " cluster (%s)" % (self.op.hypervisor,
9632 ",".join(enabled_hvs)),
9635 # Check tag validity
9636 for tag in self.op.tags:
9637 objects.TaggableObject.ValidateTag(tag)
9639 # check hypervisor parameter syntax (locally)
9640 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9641 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9643 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9644 hv_type.CheckParameterSyntax(filled_hvp)
9645 self.hv_full = filled_hvp
9646 # check that we don't specify global parameters on an instance
9647 _CheckGlobalHvParams(self.op.hvparams)
9649 # fill and remember the beparams dict
9650 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9651 for param, value in self.op.beparams.iteritems():
9652 if value == constants.VALUE_AUTO:
9653 self.op.beparams[param] = default_beparams[param]
9654 objects.UpgradeBeParams(self.op.beparams)
9655 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9656 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9658 # build os parameters
9659 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9661 # now that hvp/bep are in final format, let's reset to defaults,
9663 if self.op.identify_defaults:
9664 self._RevertToDefaults(cluster)
9668 for idx, nic in enumerate(self.op.nics):
9669 nic_mode_req = nic.get(constants.INIC_MODE, None)
9670 nic_mode = nic_mode_req
9671 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9672 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9674 # in routed mode, for the first nic, the default ip is 'auto'
9675 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9676 default_ip_mode = constants.VALUE_AUTO
9678 default_ip_mode = constants.VALUE_NONE
9680 # ip validity checks
9681 ip = nic.get(constants.INIC_IP, default_ip_mode)
9682 if ip is None or ip.lower() == constants.VALUE_NONE:
9684 elif ip.lower() == constants.VALUE_AUTO:
9685 if not self.op.name_check:
9686 raise errors.OpPrereqError("IP address set to auto but name checks"
9687 " have been skipped",
9689 nic_ip = self.hostname1.ip
9691 if not netutils.IPAddress.IsValid(ip):
9692 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9696 # TODO: check the ip address for uniqueness
9697 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9698 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9701 # MAC address verification
9702 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9703 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9704 mac = utils.NormalizeAndValidateMac(mac)
9707 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9708 except errors.ReservationError:
9709 raise errors.OpPrereqError("MAC address %s already in use"
9710 " in cluster" % mac,
9711 errors.ECODE_NOTUNIQUE)
9713 # Build nic parameters
9714 link = nic.get(constants.INIC_LINK, None)
9715 if link == constants.VALUE_AUTO:
9716 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9719 nicparams[constants.NIC_MODE] = nic_mode
9721 nicparams[constants.NIC_LINK] = link
9723 check_params = cluster.SimpleFillNIC(nicparams)
9724 objects.NIC.CheckParameterSyntax(check_params)
9725 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9727 # disk checks/pre-build
9728 default_vg = self.cfg.GetVGName()
9730 for disk in self.op.disks:
9731 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9732 if mode not in constants.DISK_ACCESS_SET:
9733 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9734 mode, errors.ECODE_INVAL)
9735 size = disk.get(constants.IDISK_SIZE, None)
9737 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9740 except (TypeError, ValueError):
9741 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9744 data_vg = disk.get(constants.IDISK_VG, default_vg)
9746 constants.IDISK_SIZE: size,
9747 constants.IDISK_MODE: mode,
9748 constants.IDISK_VG: data_vg,
9750 if constants.IDISK_METAVG in disk:
9751 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9752 if constants.IDISK_ADOPT in disk:
9753 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9754 self.disks.append(new_disk)
9756 if self.op.mode == constants.INSTANCE_IMPORT:
9758 for idx in range(len(self.disks)):
9759 option = "disk%d_dump" % idx
9760 if export_info.has_option(constants.INISECT_INS, option):
9761 # FIXME: are the old os-es, disk sizes, etc. useful?
9762 export_name = export_info.get(constants.INISECT_INS, option)
9763 image = utils.PathJoin(self.op.src_path, export_name)
9764 disk_images.append(image)
9766 disk_images.append(False)
9768 self.src_images = disk_images
9770 old_name = export_info.get(constants.INISECT_INS, "name")
9771 if self.op.instance_name == old_name:
9772 for idx, nic in enumerate(self.nics):
9773 if nic.mac == constants.VALUE_AUTO:
9774 nic_mac_ini = "nic%d_mac" % idx
9775 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9777 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9779 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9780 if self.op.ip_check:
9781 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9782 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9783 (self.check_ip, self.op.instance_name),
9784 errors.ECODE_NOTUNIQUE)
9786 #### mac address generation
9787 # By generating here the mac address both the allocator and the hooks get
9788 # the real final mac address rather than the 'auto' or 'generate' value.
9789 # There is a race condition between the generation and the instance object
9790 # creation, which means that we know the mac is valid now, but we're not
9791 # sure it will be when we actually add the instance. If things go bad
9792 # adding the instance will abort because of a duplicate mac, and the
9793 # creation job will fail.
9794 for nic in self.nics:
9795 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9796 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9800 if self.op.iallocator is not None:
9801 self._RunAllocator()
9803 # Release all unneeded node locks
9804 _ReleaseLocks(self, locking.LEVEL_NODE,
9805 keep=filter(None, [self.op.pnode, self.op.snode,
9807 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9808 keep=filter(None, [self.op.pnode, self.op.snode,
9811 #### node related checks
9813 # check primary node
9814 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9815 assert self.pnode is not None, \
9816 "Cannot retrieve locked node %s" % self.op.pnode
9818 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9819 pnode.name, errors.ECODE_STATE)
9821 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9822 pnode.name, errors.ECODE_STATE)
9823 if not pnode.vm_capable:
9824 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9825 " '%s'" % pnode.name, errors.ECODE_STATE)
9827 self.secondaries = []
9829 # mirror node verification
9830 if self.op.disk_template in constants.DTS_INT_MIRROR:
9831 if self.op.snode == pnode.name:
9832 raise errors.OpPrereqError("The secondary node cannot be the"
9833 " primary node", errors.ECODE_INVAL)
9834 _CheckNodeOnline(self, self.op.snode)
9835 _CheckNodeNotDrained(self, self.op.snode)
9836 _CheckNodeVmCapable(self, self.op.snode)
9837 self.secondaries.append(self.op.snode)
9839 snode = self.cfg.GetNodeInfo(self.op.snode)
9840 if pnode.group != snode.group:
9841 self.LogWarning("The primary and secondary nodes are in two"
9842 " different node groups; the disk parameters"
9843 " from the first disk's node group will be"
9846 nodenames = [pnode.name] + self.secondaries
9848 # Verify instance specs
9849 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
9851 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9852 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9853 constants.ISPEC_DISK_COUNT: len(self.disks),
9854 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9855 constants.ISPEC_NIC_COUNT: len(self.nics),
9856 constants.ISPEC_SPINDLE_USE: spindle_use,
9859 group_info = self.cfg.GetNodeGroup(pnode.group)
9860 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9861 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9862 if not self.op.ignore_ipolicy and res:
9863 raise errors.OpPrereqError(("Instance allocation to group %s violates"
9864 " policy: %s") % (pnode.group,
9865 utils.CommaJoin(res)),
9868 # disk parameters (not customizable at instance or node level)
9869 # just use the primary node parameters, ignoring the secondary.
9870 self.diskparams = group_info.diskparams
9872 if not self.adopt_disks:
9873 if self.op.disk_template == constants.DT_RBD:
9874 # _CheckRADOSFreeSpace() is just a placeholder.
9875 # Any function that checks prerequisites can be placed here.
9876 # Check if there is enough space on the RADOS cluster.
9877 _CheckRADOSFreeSpace()
9879 # Check lv size requirements, if not adopting
9880 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9881 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9883 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9884 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9885 disk[constants.IDISK_ADOPT])
9886 for disk in self.disks])
9887 if len(all_lvs) != len(self.disks):
9888 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9890 for lv_name in all_lvs:
9892 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9893 # to ReserveLV uses the same syntax
9894 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9895 except errors.ReservationError:
9896 raise errors.OpPrereqError("LV named %s used by another instance" %
9897 lv_name, errors.ECODE_NOTUNIQUE)
9899 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9900 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9902 node_lvs = self.rpc.call_lv_list([pnode.name],
9903 vg_names.payload.keys())[pnode.name]
9904 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9905 node_lvs = node_lvs.payload
9907 delta = all_lvs.difference(node_lvs.keys())
9909 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9910 utils.CommaJoin(delta),
9912 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9914 raise errors.OpPrereqError("Online logical volumes found, cannot"
9915 " adopt: %s" % utils.CommaJoin(online_lvs),
9917 # update the size of disk based on what is found
9918 for dsk in self.disks:
9919 dsk[constants.IDISK_SIZE] = \
9920 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9921 dsk[constants.IDISK_ADOPT])][0]))
9923 elif self.op.disk_template == constants.DT_BLOCK:
9924 # Normalize and de-duplicate device paths
9925 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9926 for disk in self.disks])
9927 if len(all_disks) != len(self.disks):
9928 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9930 baddisks = [d for d in all_disks
9931 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9933 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9934 " cannot be adopted" %
9935 (", ".join(baddisks),
9936 constants.ADOPTABLE_BLOCKDEV_ROOT),
9939 node_disks = self.rpc.call_bdev_sizes([pnode.name],
9940 list(all_disks))[pnode.name]
9941 node_disks.Raise("Cannot get block device information from node %s" %
9943 node_disks = node_disks.payload
9944 delta = all_disks.difference(node_disks.keys())
9946 raise errors.OpPrereqError("Missing block device(s): %s" %
9947 utils.CommaJoin(delta),
9949 for dsk in self.disks:
9950 dsk[constants.IDISK_SIZE] = \
9951 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9953 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9955 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9956 # check OS parameters (remotely)
9957 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9959 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9961 # memory check on primary node
9962 #TODO(dynmem): use MINMEM for checking
9964 _CheckNodeFreeMemory(self, self.pnode.name,
9965 "creating instance %s" % self.op.instance_name,
9966 self.be_full[constants.BE_MAXMEM],
9969 self.dry_run_result = list(nodenames)
9971 def Exec(self, feedback_fn):
9972 """Create and add the instance to the cluster.
9975 instance = self.op.instance_name
9976 pnode_name = self.pnode.name
9978 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9979 self.owned_locks(locking.LEVEL_NODE)), \
9980 "Node locks differ from node resource locks"
9982 ht_kind = self.op.hypervisor
9983 if ht_kind in constants.HTS_REQ_PORT:
9984 network_port = self.cfg.AllocatePort()
9988 disks = _GenerateDiskTemplate(self,
9989 self.op.disk_template,
9990 instance, pnode_name,
9993 self.instance_file_storage_dir,
9994 self.op.file_driver,
9999 iobj = objects.Instance(name=instance, os=self.op.os_type,
10000 primary_node=pnode_name,
10001 nics=self.nics, disks=disks,
10002 disk_template=self.op.disk_template,
10003 admin_state=constants.ADMINST_DOWN,
10004 network_port=network_port,
10005 beparams=self.op.beparams,
10006 hvparams=self.op.hvparams,
10007 hypervisor=self.op.hypervisor,
10008 osparams=self.op.osparams,
10012 for tag in self.op.tags:
10015 if self.adopt_disks:
10016 if self.op.disk_template == constants.DT_PLAIN:
10017 # rename LVs to the newly-generated names; we need to construct
10018 # 'fake' LV disks with the old data, plus the new unique_id
10019 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10021 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10022 rename_to.append(t_dsk.logical_id)
10023 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10024 self.cfg.SetDiskID(t_dsk, pnode_name)
10025 result = self.rpc.call_blockdev_rename(pnode_name,
10026 zip(tmp_disks, rename_to))
10027 result.Raise("Failed to rename adoped LVs")
10029 feedback_fn("* creating instance disks...")
10031 _CreateDisks(self, iobj)
10032 except errors.OpExecError:
10033 self.LogWarning("Device creation failed, reverting...")
10035 _RemoveDisks(self, iobj)
10037 self.cfg.ReleaseDRBDMinors(instance)
10040 feedback_fn("adding instance %s to cluster config" % instance)
10042 self.cfg.AddInstance(iobj, self.proc.GetECId())
10044 # Declare that we don't want to remove the instance lock anymore, as we've
10045 # added the instance to the config
10046 del self.remove_locks[locking.LEVEL_INSTANCE]
10048 if self.op.mode == constants.INSTANCE_IMPORT:
10049 # Release unused nodes
10050 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10052 # Release all nodes
10053 _ReleaseLocks(self, locking.LEVEL_NODE)
10056 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10057 feedback_fn("* wiping instance disks...")
10059 _WipeDisks(self, iobj)
10060 except errors.OpExecError, err:
10061 logging.exception("Wiping disks failed")
10062 self.LogWarning("Wiping instance disks failed (%s)", err)
10066 # Something is already wrong with the disks, don't do anything else
10068 elif self.op.wait_for_sync:
10069 disk_abort = not _WaitForSync(self, iobj)
10070 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10071 # make sure the disks are not degraded (still sync-ing is ok)
10072 feedback_fn("* checking mirrors status")
10073 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10078 _RemoveDisks(self, iobj)
10079 self.cfg.RemoveInstance(iobj.name)
10080 # Make sure the instance lock gets removed
10081 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10082 raise errors.OpExecError("There are some degraded disks for"
10085 # Release all node resource locks
10086 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10088 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10089 if self.op.mode == constants.INSTANCE_CREATE:
10090 if not self.op.no_install:
10091 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10092 not self.op.wait_for_sync)
10094 feedback_fn("* pausing disk sync to install instance OS")
10095 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10097 for idx, success in enumerate(result.payload):
10099 logging.warn("pause-sync of instance %s for disk %d failed",
10102 feedback_fn("* running the instance OS create scripts...")
10103 # FIXME: pass debug option from opcode to backend
10105 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10106 self.op.debug_level)
10108 feedback_fn("* resuming disk sync")
10109 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10111 for idx, success in enumerate(result.payload):
10113 logging.warn("resume-sync of instance %s for disk %d failed",
10116 os_add_result.Raise("Could not add os for instance %s"
10117 " on node %s" % (instance, pnode_name))
10119 elif self.op.mode == constants.INSTANCE_IMPORT:
10120 feedback_fn("* running the instance OS import scripts...")
10124 for idx, image in enumerate(self.src_images):
10128 # FIXME: pass debug option from opcode to backend
10129 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10130 constants.IEIO_FILE, (image, ),
10131 constants.IEIO_SCRIPT,
10132 (iobj.disks[idx], idx),
10134 transfers.append(dt)
10137 masterd.instance.TransferInstanceData(self, feedback_fn,
10138 self.op.src_node, pnode_name,
10139 self.pnode.secondary_ip,
10141 if not compat.all(import_result):
10142 self.LogWarning("Some disks for instance %s on node %s were not"
10143 " imported successfully" % (instance, pnode_name))
10145 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10146 feedback_fn("* preparing remote import...")
10147 # The source cluster will stop the instance before attempting to make a
10148 # connection. In some cases stopping an instance can take a long time,
10149 # hence the shutdown timeout is added to the connection timeout.
10150 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10151 self.op.source_shutdown_timeout)
10152 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10154 assert iobj.primary_node == self.pnode.name
10156 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10157 self.source_x509_ca,
10158 self._cds, timeouts)
10159 if not compat.all(disk_results):
10160 # TODO: Should the instance still be started, even if some disks
10161 # failed to import (valid for local imports, too)?
10162 self.LogWarning("Some disks for instance %s on node %s were not"
10163 " imported successfully" % (instance, pnode_name))
10165 # Run rename script on newly imported instance
10166 assert iobj.name == instance
10167 feedback_fn("Running rename script for %s" % instance)
10168 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10169 self.source_instance_name,
10170 self.op.debug_level)
10171 if result.fail_msg:
10172 self.LogWarning("Failed to run rename script for %s on node"
10173 " %s: %s" % (instance, pnode_name, result.fail_msg))
10176 # also checked in the prereq part
10177 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10180 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10183 iobj.admin_state = constants.ADMINST_UP
10184 self.cfg.Update(iobj, feedback_fn)
10185 logging.info("Starting instance %s on node %s", instance, pnode_name)
10186 feedback_fn("* starting instance...")
10187 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10189 result.Raise("Could not start instance")
10191 return list(iobj.all_nodes)
10194 def _CheckRADOSFreeSpace():
10195 """Compute disk size requirements inside the RADOS cluster.
10198 # For the RADOS cluster we assume there is always enough space.
10202 class LUInstanceConsole(NoHooksLU):
10203 """Connect to an instance's console.
10205 This is somewhat special in that it returns the command line that
10206 you need to run on the master node in order to connect to the
10212 def ExpandNames(self):
10213 self.share_locks = _ShareAll()
10214 self._ExpandAndLockInstance()
10216 def CheckPrereq(self):
10217 """Check prerequisites.
10219 This checks that the instance is in the cluster.
10222 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10223 assert self.instance is not None, \
10224 "Cannot retrieve locked instance %s" % self.op.instance_name
10225 _CheckNodeOnline(self, self.instance.primary_node)
10227 def Exec(self, feedback_fn):
10228 """Connect to the console of an instance
10231 instance = self.instance
10232 node = instance.primary_node
10234 node_insts = self.rpc.call_instance_list([node],
10235 [instance.hypervisor])[node]
10236 node_insts.Raise("Can't get node information from %s" % node)
10238 if instance.name not in node_insts.payload:
10239 if instance.admin_state == constants.ADMINST_UP:
10240 state = constants.INSTST_ERRORDOWN
10241 elif instance.admin_state == constants.ADMINST_DOWN:
10242 state = constants.INSTST_ADMINDOWN
10244 state = constants.INSTST_ADMINOFFLINE
10245 raise errors.OpExecError("Instance %s is not running (state %s)" %
10246 (instance.name, state))
10248 logging.debug("Connecting to console of %s on %s", instance.name, node)
10250 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10253 def _GetInstanceConsole(cluster, instance):
10254 """Returns console information for an instance.
10256 @type cluster: L{objects.Cluster}
10257 @type instance: L{objects.Instance}
10261 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10262 # beparams and hvparams are passed separately, to avoid editing the
10263 # instance and then saving the defaults in the instance itself.
10264 hvparams = cluster.FillHV(instance)
10265 beparams = cluster.FillBE(instance)
10266 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10268 assert console.instance == instance.name
10269 assert console.Validate()
10271 return console.ToDict()
10274 class LUInstanceReplaceDisks(LogicalUnit):
10275 """Replace the disks of an instance.
10278 HPATH = "mirrors-replace"
10279 HTYPE = constants.HTYPE_INSTANCE
10282 def CheckArguments(self):
10283 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10284 self.op.iallocator)
10286 def ExpandNames(self):
10287 self._ExpandAndLockInstance()
10289 assert locking.LEVEL_NODE not in self.needed_locks
10290 assert locking.LEVEL_NODE_RES not in self.needed_locks
10291 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10293 assert self.op.iallocator is None or self.op.remote_node is None, \
10294 "Conflicting options"
10296 if self.op.remote_node is not None:
10297 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10299 # Warning: do not remove the locking of the new secondary here
10300 # unless DRBD8.AddChildren is changed to work in parallel;
10301 # currently it doesn't since parallel invocations of
10302 # FindUnusedMinor will conflict
10303 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10304 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10306 self.needed_locks[locking.LEVEL_NODE] = []
10307 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10309 if self.op.iallocator is not None:
10310 # iallocator will select a new node in the same group
10311 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10313 self.needed_locks[locking.LEVEL_NODE_RES] = []
10315 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10316 self.op.iallocator, self.op.remote_node,
10317 self.op.disks, False, self.op.early_release,
10318 self.op.ignore_ipolicy)
10320 self.tasklets = [self.replacer]
10322 def DeclareLocks(self, level):
10323 if level == locking.LEVEL_NODEGROUP:
10324 assert self.op.remote_node is None
10325 assert self.op.iallocator is not None
10326 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10328 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10329 # Lock all groups used by instance optimistically; this requires going
10330 # via the node before it's locked, requiring verification later on
10331 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10332 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10334 elif level == locking.LEVEL_NODE:
10335 if self.op.iallocator is not None:
10336 assert self.op.remote_node is None
10337 assert not self.needed_locks[locking.LEVEL_NODE]
10339 # Lock member nodes of all locked groups
10340 self.needed_locks[locking.LEVEL_NODE] = [node_name
10341 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10342 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10344 self._LockInstancesNodes()
10345 elif level == locking.LEVEL_NODE_RES:
10347 self.needed_locks[locking.LEVEL_NODE_RES] = \
10348 self.needed_locks[locking.LEVEL_NODE]
10350 def BuildHooksEnv(self):
10351 """Build hooks env.
10353 This runs on the master, the primary and all the secondaries.
10356 instance = self.replacer.instance
10358 "MODE": self.op.mode,
10359 "NEW_SECONDARY": self.op.remote_node,
10360 "OLD_SECONDARY": instance.secondary_nodes[0],
10362 env.update(_BuildInstanceHookEnvByObject(self, instance))
10365 def BuildHooksNodes(self):
10366 """Build hooks nodes.
10369 instance = self.replacer.instance
10371 self.cfg.GetMasterNode(),
10372 instance.primary_node,
10374 if self.op.remote_node is not None:
10375 nl.append(self.op.remote_node)
10378 def CheckPrereq(self):
10379 """Check prerequisites.
10382 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10383 self.op.iallocator is None)
10385 # Verify if node group locks are still correct
10386 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10388 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10390 return LogicalUnit.CheckPrereq(self)
10393 class TLReplaceDisks(Tasklet):
10394 """Replaces disks for an instance.
10396 Note: Locking is not within the scope of this class.
10399 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10400 disks, delay_iallocator, early_release, ignore_ipolicy):
10401 """Initializes this class.
10404 Tasklet.__init__(self, lu)
10407 self.instance_name = instance_name
10409 self.iallocator_name = iallocator_name
10410 self.remote_node = remote_node
10412 self.delay_iallocator = delay_iallocator
10413 self.early_release = early_release
10414 self.ignore_ipolicy = ignore_ipolicy
10417 self.instance = None
10418 self.new_node = None
10419 self.target_node = None
10420 self.other_node = None
10421 self.remote_node_info = None
10422 self.node_secondary_ip = None
10425 def CheckArguments(mode, remote_node, iallocator):
10426 """Helper function for users of this class.
10429 # check for valid parameter combination
10430 if mode == constants.REPLACE_DISK_CHG:
10431 if remote_node is None and iallocator is None:
10432 raise errors.OpPrereqError("When changing the secondary either an"
10433 " iallocator script must be used or the"
10434 " new node given", errors.ECODE_INVAL)
10436 if remote_node is not None and iallocator is not None:
10437 raise errors.OpPrereqError("Give either the iallocator or the new"
10438 " secondary, not both", errors.ECODE_INVAL)
10440 elif remote_node is not None or iallocator is not None:
10441 # Not replacing the secondary
10442 raise errors.OpPrereqError("The iallocator and new node options can"
10443 " only be used when changing the"
10444 " secondary node", errors.ECODE_INVAL)
10447 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10448 """Compute a new secondary node using an IAllocator.
10451 ial = IAllocator(lu.cfg, lu.rpc,
10452 mode=constants.IALLOCATOR_MODE_RELOC,
10453 name=instance_name,
10454 relocate_from=list(relocate_from))
10456 ial.Run(iallocator_name)
10458 if not ial.success:
10459 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10460 " %s" % (iallocator_name, ial.info),
10461 errors.ECODE_NORES)
10463 if len(ial.result) != ial.required_nodes:
10464 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10465 " of nodes (%s), required %s" %
10467 len(ial.result), ial.required_nodes),
10468 errors.ECODE_FAULT)
10470 remote_node_name = ial.result[0]
10472 lu.LogInfo("Selected new secondary for instance '%s': %s",
10473 instance_name, remote_node_name)
10475 return remote_node_name
10477 def _FindFaultyDisks(self, node_name):
10478 """Wrapper for L{_FindFaultyInstanceDisks}.
10481 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10484 def _CheckDisksActivated(self, instance):
10485 """Checks if the instance disks are activated.
10487 @param instance: The instance to check disks
10488 @return: True if they are activated, False otherwise
10491 nodes = instance.all_nodes
10493 for idx, dev in enumerate(instance.disks):
10495 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10496 self.cfg.SetDiskID(dev, node)
10498 result = self.rpc.call_blockdev_find(node, dev)
10502 elif result.fail_msg or not result.payload:
10507 def CheckPrereq(self):
10508 """Check prerequisites.
10510 This checks that the instance is in the cluster.
10513 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10514 assert instance is not None, \
10515 "Cannot retrieve locked instance %s" % self.instance_name
10517 if instance.disk_template != constants.DT_DRBD8:
10518 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10519 " instances", errors.ECODE_INVAL)
10521 if len(instance.secondary_nodes) != 1:
10522 raise errors.OpPrereqError("The instance has a strange layout,"
10523 " expected one secondary but found %d" %
10524 len(instance.secondary_nodes),
10525 errors.ECODE_FAULT)
10527 if not self.delay_iallocator:
10528 self._CheckPrereq2()
10530 def _CheckPrereq2(self):
10531 """Check prerequisites, second part.
10533 This function should always be part of CheckPrereq. It was separated and is
10534 now called from Exec because during node evacuation iallocator was only
10535 called with an unmodified cluster model, not taking planned changes into
10539 instance = self.instance
10540 secondary_node = instance.secondary_nodes[0]
10542 if self.iallocator_name is None:
10543 remote_node = self.remote_node
10545 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10546 instance.name, instance.secondary_nodes)
10548 if remote_node is None:
10549 self.remote_node_info = None
10551 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10552 "Remote node '%s' is not locked" % remote_node
10554 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10555 assert self.remote_node_info is not None, \
10556 "Cannot retrieve locked node %s" % remote_node
10558 if remote_node == self.instance.primary_node:
10559 raise errors.OpPrereqError("The specified node is the primary node of"
10560 " the instance", errors.ECODE_INVAL)
10562 if remote_node == secondary_node:
10563 raise errors.OpPrereqError("The specified node is already the"
10564 " secondary node of the instance",
10565 errors.ECODE_INVAL)
10567 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10568 constants.REPLACE_DISK_CHG):
10569 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10570 errors.ECODE_INVAL)
10572 if self.mode == constants.REPLACE_DISK_AUTO:
10573 if not self._CheckDisksActivated(instance):
10574 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10575 " first" % self.instance_name,
10576 errors.ECODE_STATE)
10577 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10578 faulty_secondary = self._FindFaultyDisks(secondary_node)
10580 if faulty_primary and faulty_secondary:
10581 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10582 " one node and can not be repaired"
10583 " automatically" % self.instance_name,
10584 errors.ECODE_STATE)
10587 self.disks = faulty_primary
10588 self.target_node = instance.primary_node
10589 self.other_node = secondary_node
10590 check_nodes = [self.target_node, self.other_node]
10591 elif faulty_secondary:
10592 self.disks = faulty_secondary
10593 self.target_node = secondary_node
10594 self.other_node = instance.primary_node
10595 check_nodes = [self.target_node, self.other_node]
10601 # Non-automatic modes
10602 if self.mode == constants.REPLACE_DISK_PRI:
10603 self.target_node = instance.primary_node
10604 self.other_node = secondary_node
10605 check_nodes = [self.target_node, self.other_node]
10607 elif self.mode == constants.REPLACE_DISK_SEC:
10608 self.target_node = secondary_node
10609 self.other_node = instance.primary_node
10610 check_nodes = [self.target_node, self.other_node]
10612 elif self.mode == constants.REPLACE_DISK_CHG:
10613 self.new_node = remote_node
10614 self.other_node = instance.primary_node
10615 self.target_node = secondary_node
10616 check_nodes = [self.new_node, self.other_node]
10618 _CheckNodeNotDrained(self.lu, remote_node)
10619 _CheckNodeVmCapable(self.lu, remote_node)
10621 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10622 assert old_node_info is not None
10623 if old_node_info.offline and not self.early_release:
10624 # doesn't make sense to delay the release
10625 self.early_release = True
10626 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10627 " early-release mode", secondary_node)
10630 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10633 # If not specified all disks should be replaced
10635 self.disks = range(len(self.instance.disks))
10637 # TODO: This is ugly, but right now we can't distinguish between internal
10638 # submitted opcode and external one. We should fix that.
10639 if self.remote_node_info:
10640 # We change the node, lets verify it still meets instance policy
10641 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10642 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10644 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10645 ignore=self.ignore_ipolicy)
10647 # TODO: compute disk parameters
10648 primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10649 secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10650 if primary_node_info.group != secondary_node_info.group:
10651 self.lu.LogInfo("The instance primary and secondary nodes are in two"
10652 " different node groups; the disk parameters of the"
10653 " primary node's group will be applied.")
10655 self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10657 for node in check_nodes:
10658 _CheckNodeOnline(self.lu, node)
10660 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10663 if node_name is not None)
10665 # Release unneeded node and node resource locks
10666 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10667 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10669 # Release any owned node group
10670 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10671 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10673 # Check whether disks are valid
10674 for disk_idx in self.disks:
10675 instance.FindDisk(disk_idx)
10677 # Get secondary node IP addresses
10678 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10679 in self.cfg.GetMultiNodeInfo(touched_nodes))
10681 def Exec(self, feedback_fn):
10682 """Execute disk replacement.
10684 This dispatches the disk replacement to the appropriate handler.
10687 if self.delay_iallocator:
10688 self._CheckPrereq2()
10691 # Verify owned locks before starting operation
10692 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10693 assert set(owned_nodes) == set(self.node_secondary_ip), \
10694 ("Incorrect node locks, owning %s, expected %s" %
10695 (owned_nodes, self.node_secondary_ip.keys()))
10696 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10697 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10699 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10700 assert list(owned_instances) == [self.instance_name], \
10701 "Instance '%s' not locked" % self.instance_name
10703 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10704 "Should not own any node group lock at this point"
10707 feedback_fn("No disks need replacement")
10710 feedback_fn("Replacing disk(s) %s for %s" %
10711 (utils.CommaJoin(self.disks), self.instance.name))
10713 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10715 # Activate the instance disks if we're replacing them on a down instance
10717 _StartInstanceDisks(self.lu, self.instance, True)
10720 # Should we replace the secondary node?
10721 if self.new_node is not None:
10722 fn = self._ExecDrbd8Secondary
10724 fn = self._ExecDrbd8DiskOnly
10726 result = fn(feedback_fn)
10728 # Deactivate the instance disks if we're replacing them on a
10731 _SafeShutdownInstanceDisks(self.lu, self.instance)
10733 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10736 # Verify owned locks
10737 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10738 nodes = frozenset(self.node_secondary_ip)
10739 assert ((self.early_release and not owned_nodes) or
10740 (not self.early_release and not (set(owned_nodes) - nodes))), \
10741 ("Not owning the correct locks, early_release=%s, owned=%r,"
10742 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10746 def _CheckVolumeGroup(self, nodes):
10747 self.lu.LogInfo("Checking volume groups")
10749 vgname = self.cfg.GetVGName()
10751 # Make sure volume group exists on all involved nodes
10752 results = self.rpc.call_vg_list(nodes)
10754 raise errors.OpExecError("Can't list volume groups on the nodes")
10757 res = results[node]
10758 res.Raise("Error checking node %s" % node)
10759 if vgname not in res.payload:
10760 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10763 def _CheckDisksExistence(self, nodes):
10764 # Check disk existence
10765 for idx, dev in enumerate(self.instance.disks):
10766 if idx not in self.disks:
10770 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10771 self.cfg.SetDiskID(dev, node)
10773 result = self.rpc.call_blockdev_find(node, dev)
10775 msg = result.fail_msg
10776 if msg or not result.payload:
10778 msg = "disk not found"
10779 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10782 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10783 for idx, dev in enumerate(self.instance.disks):
10784 if idx not in self.disks:
10787 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10790 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10792 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10793 " replace disks for instance %s" %
10794 (node_name, self.instance.name))
10796 def _CreateNewStorage(self, node_name):
10797 """Create new storage on the primary or secondary node.
10799 This is only used for same-node replaces, not for changing the
10800 secondary node, hence we don't want to modify the existing disk.
10805 for idx, dev in enumerate(self.instance.disks):
10806 if idx not in self.disks:
10809 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10811 self.cfg.SetDiskID(dev, node_name)
10813 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10814 names = _GenerateUniqueNames(self.lu, lv_names)
10816 _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10818 vg_data = dev.children[0].logical_id[0]
10819 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10820 logical_id=(vg_data, names[0]), params=data_p)
10821 vg_meta = dev.children[1].logical_id[0]
10822 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10823 logical_id=(vg_meta, names[1]), params=meta_p)
10825 new_lvs = [lv_data, lv_meta]
10826 old_lvs = [child.Copy() for child in dev.children]
10827 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10829 # we pass force_create=True to force the LVM creation
10830 for new_lv in new_lvs:
10831 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10832 _GetInstanceInfoText(self.instance), False)
10836 def _CheckDevices(self, node_name, iv_names):
10837 for name, (dev, _, _) in iv_names.iteritems():
10838 self.cfg.SetDiskID(dev, node_name)
10840 result = self.rpc.call_blockdev_find(node_name, dev)
10842 msg = result.fail_msg
10843 if msg or not result.payload:
10845 msg = "disk not found"
10846 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10849 if result.payload.is_degraded:
10850 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10852 def _RemoveOldStorage(self, node_name, iv_names):
10853 for name, (_, old_lvs, _) in iv_names.iteritems():
10854 self.lu.LogInfo("Remove logical volumes for %s" % name)
10857 self.cfg.SetDiskID(lv, node_name)
10859 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10861 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10862 hint="remove unused LVs manually")
10864 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10865 """Replace a disk on the primary or secondary for DRBD 8.
10867 The algorithm for replace is quite complicated:
10869 1. for each disk to be replaced:
10871 1. create new LVs on the target node with unique names
10872 1. detach old LVs from the drbd device
10873 1. rename old LVs to name_replaced.<time_t>
10874 1. rename new LVs to old LVs
10875 1. attach the new LVs (with the old names now) to the drbd device
10877 1. wait for sync across all devices
10879 1. for each modified disk:
10881 1. remove old LVs (which have the name name_replaces.<time_t>)
10883 Failures are not very well handled.
10888 # Step: check device activation
10889 self.lu.LogStep(1, steps_total, "Check device existence")
10890 self._CheckDisksExistence([self.other_node, self.target_node])
10891 self._CheckVolumeGroup([self.target_node, self.other_node])
10893 # Step: check other node consistency
10894 self.lu.LogStep(2, steps_total, "Check peer consistency")
10895 self._CheckDisksConsistency(self.other_node,
10896 self.other_node == self.instance.primary_node,
10899 # Step: create new storage
10900 self.lu.LogStep(3, steps_total, "Allocate new storage")
10901 iv_names = self._CreateNewStorage(self.target_node)
10903 # Step: for each lv, detach+rename*2+attach
10904 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10905 for dev, old_lvs, new_lvs in iv_names.itervalues():
10906 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10908 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10910 result.Raise("Can't detach drbd from local storage on node"
10911 " %s for device %s" % (self.target_node, dev.iv_name))
10913 #cfg.Update(instance)
10915 # ok, we created the new LVs, so now we know we have the needed
10916 # storage; as such, we proceed on the target node to rename
10917 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10918 # using the assumption that logical_id == physical_id (which in
10919 # turn is the unique_id on that node)
10921 # FIXME(iustin): use a better name for the replaced LVs
10922 temp_suffix = int(time.time())
10923 ren_fn = lambda d, suff: (d.physical_id[0],
10924 d.physical_id[1] + "_replaced-%s" % suff)
10926 # Build the rename list based on what LVs exist on the node
10927 rename_old_to_new = []
10928 for to_ren in old_lvs:
10929 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10930 if not result.fail_msg and result.payload:
10932 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10934 self.lu.LogInfo("Renaming the old LVs on the target node")
10935 result = self.rpc.call_blockdev_rename(self.target_node,
10937 result.Raise("Can't rename old LVs on node %s" % self.target_node)
10939 # Now we rename the new LVs to the old LVs
10940 self.lu.LogInfo("Renaming the new LVs on the target node")
10941 rename_new_to_old = [(new, old.physical_id)
10942 for old, new in zip(old_lvs, new_lvs)]
10943 result = self.rpc.call_blockdev_rename(self.target_node,
10945 result.Raise("Can't rename new LVs on node %s" % self.target_node)
10947 # Intermediate steps of in memory modifications
10948 for old, new in zip(old_lvs, new_lvs):
10949 new.logical_id = old.logical_id
10950 self.cfg.SetDiskID(new, self.target_node)
10952 # We need to modify old_lvs so that removal later removes the
10953 # right LVs, not the newly added ones; note that old_lvs is a
10955 for disk in old_lvs:
10956 disk.logical_id = ren_fn(disk, temp_suffix)
10957 self.cfg.SetDiskID(disk, self.target_node)
10959 # Now that the new lvs have the old name, we can add them to the device
10960 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10961 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10963 msg = result.fail_msg
10965 for new_lv in new_lvs:
10966 msg2 = self.rpc.call_blockdev_remove(self.target_node,
10969 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10970 hint=("cleanup manually the unused logical"
10972 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10974 cstep = itertools.count(5)
10976 if self.early_release:
10977 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10978 self._RemoveOldStorage(self.target_node, iv_names)
10979 # TODO: Check if releasing locks early still makes sense
10980 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10982 # Release all resource locks except those used by the instance
10983 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10984 keep=self.node_secondary_ip.keys())
10986 # Release all node locks while waiting for sync
10987 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10989 # TODO: Can the instance lock be downgraded here? Take the optional disk
10990 # shutdown in the caller into consideration.
10993 # This can fail as the old devices are degraded and _WaitForSync
10994 # does a combined result over all disks, so we don't check its return value
10995 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10996 _WaitForSync(self.lu, self.instance)
10998 # Check all devices manually
10999 self._CheckDevices(self.instance.primary_node, iv_names)
11001 # Step: remove old storage
11002 if not self.early_release:
11003 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11004 self._RemoveOldStorage(self.target_node, iv_names)
11006 def _ExecDrbd8Secondary(self, feedback_fn):
11007 """Replace the secondary node for DRBD 8.
11009 The algorithm for replace is quite complicated:
11010 - for all disks of the instance:
11011 - create new LVs on the new node with same names
11012 - shutdown the drbd device on the old secondary
11013 - disconnect the drbd network on the primary
11014 - create the drbd device on the new secondary
11015 - network attach the drbd on the primary, using an artifice:
11016 the drbd code for Attach() will connect to the network if it
11017 finds a device which is connected to the good local disks but
11018 not network enabled
11019 - wait for sync across all devices
11020 - remove all disks from the old secondary
11022 Failures are not very well handled.
11027 pnode = self.instance.primary_node
11029 # Step: check device activation
11030 self.lu.LogStep(1, steps_total, "Check device existence")
11031 self._CheckDisksExistence([self.instance.primary_node])
11032 self._CheckVolumeGroup([self.instance.primary_node])
11034 # Step: check other node consistency
11035 self.lu.LogStep(2, steps_total, "Check peer consistency")
11036 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11038 # Step: create new storage
11039 self.lu.LogStep(3, steps_total, "Allocate new storage")
11040 for idx, dev in enumerate(self.instance.disks):
11041 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11042 (self.new_node, idx))
11043 # we pass force_create=True to force LVM creation
11044 for new_lv in dev.children:
11045 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
11046 _GetInstanceInfoText(self.instance), False)
11048 # Step 4: dbrd minors and drbd setups changes
11049 # after this, we must manually remove the drbd minors on both the
11050 # error and the success paths
11051 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11052 minors = self.cfg.AllocateDRBDMinor([self.new_node
11053 for dev in self.instance.disks],
11054 self.instance.name)
11055 logging.debug("Allocated minors %r", minors)
11058 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11059 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11060 (self.new_node, idx))
11061 # create new devices on new_node; note that we create two IDs:
11062 # one without port, so the drbd will be activated without
11063 # networking information on the new node at this stage, and one
11064 # with network, for the latter activation in step 4
11065 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11066 if self.instance.primary_node == o_node1:
11069 assert self.instance.primary_node == o_node2, "Three-node instance?"
11072 new_alone_id = (self.instance.primary_node, self.new_node, None,
11073 p_minor, new_minor, o_secret)
11074 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11075 p_minor, new_minor, o_secret)
11077 iv_names[idx] = (dev, dev.children, new_net_id)
11078 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11080 drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
11081 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11082 logical_id=new_alone_id,
11083 children=dev.children,
11085 params=drbd_params)
11087 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
11088 _GetInstanceInfoText(self.instance), False)
11089 except errors.GenericError:
11090 self.cfg.ReleaseDRBDMinors(self.instance.name)
11093 # We have new devices, shutdown the drbd on the old secondary
11094 for idx, dev in enumerate(self.instance.disks):
11095 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11096 self.cfg.SetDiskID(dev, self.target_node)
11097 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
11099 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11100 "node: %s" % (idx, msg),
11101 hint=("Please cleanup this device manually as"
11102 " soon as possible"))
11104 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11105 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11106 self.instance.disks)[pnode]
11108 msg = result.fail_msg
11110 # detaches didn't succeed (unlikely)
11111 self.cfg.ReleaseDRBDMinors(self.instance.name)
11112 raise errors.OpExecError("Can't detach the disks from the network on"
11113 " old node: %s" % (msg,))
11115 # if we managed to detach at least one, we update all the disks of
11116 # the instance to point to the new secondary
11117 self.lu.LogInfo("Updating instance configuration")
11118 for dev, _, new_logical_id in iv_names.itervalues():
11119 dev.logical_id = new_logical_id
11120 self.cfg.SetDiskID(dev, self.instance.primary_node)
11122 self.cfg.Update(self.instance, feedback_fn)
11124 # Release all node locks (the configuration has been updated)
11125 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11127 # and now perform the drbd attach
11128 self.lu.LogInfo("Attaching primary drbds to new secondary"
11129 " (standalone => connected)")
11130 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11132 self.node_secondary_ip,
11133 self.instance.disks,
11134 self.instance.name,
11136 for to_node, to_result in result.items():
11137 msg = to_result.fail_msg
11139 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11141 hint=("please do a gnt-instance info to see the"
11142 " status of disks"))
11144 cstep = itertools.count(5)
11146 if self.early_release:
11147 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11148 self._RemoveOldStorage(self.target_node, iv_names)
11149 # TODO: Check if releasing locks early still makes sense
11150 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11152 # Release all resource locks except those used by the instance
11153 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11154 keep=self.node_secondary_ip.keys())
11156 # TODO: Can the instance lock be downgraded here? Take the optional disk
11157 # shutdown in the caller into consideration.
11160 # This can fail as the old devices are degraded and _WaitForSync
11161 # does a combined result over all disks, so we don't check its return value
11162 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11163 _WaitForSync(self.lu, self.instance)
11165 # Check all devices manually
11166 self._CheckDevices(self.instance.primary_node, iv_names)
11168 # Step: remove old storage
11169 if not self.early_release:
11170 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11171 self._RemoveOldStorage(self.target_node, iv_names)
11174 class LURepairNodeStorage(NoHooksLU):
11175 """Repairs the volume group on a node.
11180 def CheckArguments(self):
11181 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11183 storage_type = self.op.storage_type
11185 if (constants.SO_FIX_CONSISTENCY not in
11186 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11187 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11188 " repaired" % storage_type,
11189 errors.ECODE_INVAL)
11191 def ExpandNames(self):
11192 self.needed_locks = {
11193 locking.LEVEL_NODE: [self.op.node_name],
11196 def _CheckFaultyDisks(self, instance, node_name):
11197 """Ensure faulty disks abort the opcode or at least warn."""
11199 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11201 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11202 " node '%s'" % (instance.name, node_name),
11203 errors.ECODE_STATE)
11204 except errors.OpPrereqError, err:
11205 if self.op.ignore_consistency:
11206 self.proc.LogWarning(str(err.args[0]))
11210 def CheckPrereq(self):
11211 """Check prerequisites.
11214 # Check whether any instance on this node has faulty disks
11215 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11216 if inst.admin_state != constants.ADMINST_UP:
11218 check_nodes = set(inst.all_nodes)
11219 check_nodes.discard(self.op.node_name)
11220 for inst_node_name in check_nodes:
11221 self._CheckFaultyDisks(inst, inst_node_name)
11223 def Exec(self, feedback_fn):
11224 feedback_fn("Repairing storage unit '%s' on %s ..." %
11225 (self.op.name, self.op.node_name))
11227 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11228 result = self.rpc.call_storage_execute(self.op.node_name,
11229 self.op.storage_type, st_args,
11231 constants.SO_FIX_CONSISTENCY)
11232 result.Raise("Failed to repair storage unit '%s' on %s" %
11233 (self.op.name, self.op.node_name))
11236 class LUNodeEvacuate(NoHooksLU):
11237 """Evacuates instances off a list of nodes.
11242 _MODE2IALLOCATOR = {
11243 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11244 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11245 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11247 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11248 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11249 constants.IALLOCATOR_NEVAC_MODES)
11251 def CheckArguments(self):
11252 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11254 def ExpandNames(self):
11255 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11257 if self.op.remote_node is not None:
11258 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11259 assert self.op.remote_node
11261 if self.op.remote_node == self.op.node_name:
11262 raise errors.OpPrereqError("Can not use evacuated node as a new"
11263 " secondary node", errors.ECODE_INVAL)
11265 if self.op.mode != constants.NODE_EVAC_SEC:
11266 raise errors.OpPrereqError("Without the use of an iallocator only"
11267 " secondary instances can be evacuated",
11268 errors.ECODE_INVAL)
11271 self.share_locks = _ShareAll()
11272 self.needed_locks = {
11273 locking.LEVEL_INSTANCE: [],
11274 locking.LEVEL_NODEGROUP: [],
11275 locking.LEVEL_NODE: [],
11278 # Determine nodes (via group) optimistically, needs verification once locks
11279 # have been acquired
11280 self.lock_nodes = self._DetermineNodes()
11282 def _DetermineNodes(self):
11283 """Gets the list of nodes to operate on.
11286 if self.op.remote_node is None:
11287 # Iallocator will choose any node(s) in the same group
11288 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11290 group_nodes = frozenset([self.op.remote_node])
11292 # Determine nodes to be locked
11293 return set([self.op.node_name]) | group_nodes
11295 def _DetermineInstances(self):
11296 """Builds list of instances to operate on.
11299 assert self.op.mode in constants.NODE_EVAC_MODES
11301 if self.op.mode == constants.NODE_EVAC_PRI:
11302 # Primary instances only
11303 inst_fn = _GetNodePrimaryInstances
11304 assert self.op.remote_node is None, \
11305 "Evacuating primary instances requires iallocator"
11306 elif self.op.mode == constants.NODE_EVAC_SEC:
11307 # Secondary instances only
11308 inst_fn = _GetNodeSecondaryInstances
11311 assert self.op.mode == constants.NODE_EVAC_ALL
11312 inst_fn = _GetNodeInstances
11313 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11315 raise errors.OpPrereqError("Due to an issue with the iallocator"
11316 " interface it is not possible to evacuate"
11317 " all instances at once; specify explicitly"
11318 " whether to evacuate primary or secondary"
11320 errors.ECODE_INVAL)
11322 return inst_fn(self.cfg, self.op.node_name)
11324 def DeclareLocks(self, level):
11325 if level == locking.LEVEL_INSTANCE:
11326 # Lock instances optimistically, needs verification once node and group
11327 # locks have been acquired
11328 self.needed_locks[locking.LEVEL_INSTANCE] = \
11329 set(i.name for i in self._DetermineInstances())
11331 elif level == locking.LEVEL_NODEGROUP:
11332 # Lock node groups for all potential target nodes optimistically, needs
11333 # verification once nodes have been acquired
11334 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11335 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11337 elif level == locking.LEVEL_NODE:
11338 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11340 def CheckPrereq(self):
11342 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11343 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11344 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11346 need_nodes = self._DetermineNodes()
11348 if not owned_nodes.issuperset(need_nodes):
11349 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11350 " locks were acquired, current nodes are"
11351 " are '%s', used to be '%s'; retry the"
11353 (self.op.node_name,
11354 utils.CommaJoin(need_nodes),
11355 utils.CommaJoin(owned_nodes)),
11356 errors.ECODE_STATE)
11358 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11359 if owned_groups != wanted_groups:
11360 raise errors.OpExecError("Node groups changed since locks were acquired,"
11361 " current groups are '%s', used to be '%s';"
11362 " retry the operation" %
11363 (utils.CommaJoin(wanted_groups),
11364 utils.CommaJoin(owned_groups)))
11366 # Determine affected instances
11367 self.instances = self._DetermineInstances()
11368 self.instance_names = [i.name for i in self.instances]
11370 if set(self.instance_names) != owned_instances:
11371 raise errors.OpExecError("Instances on node '%s' changed since locks"
11372 " were acquired, current instances are '%s',"
11373 " used to be '%s'; retry the operation" %
11374 (self.op.node_name,
11375 utils.CommaJoin(self.instance_names),
11376 utils.CommaJoin(owned_instances)))
11378 if self.instance_names:
11379 self.LogInfo("Evacuating instances from node '%s': %s",
11381 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11383 self.LogInfo("No instances to evacuate from node '%s'",
11386 if self.op.remote_node is not None:
11387 for i in self.instances:
11388 if i.primary_node == self.op.remote_node:
11389 raise errors.OpPrereqError("Node %s is the primary node of"
11390 " instance %s, cannot use it as"
11392 (self.op.remote_node, i.name),
11393 errors.ECODE_INVAL)
11395 def Exec(self, feedback_fn):
11396 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11398 if not self.instance_names:
11399 # No instances to evacuate
11402 elif self.op.iallocator is not None:
11403 # TODO: Implement relocation to other group
11404 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11405 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11406 instances=list(self.instance_names))
11408 ial.Run(self.op.iallocator)
11410 if not ial.success:
11411 raise errors.OpPrereqError("Can't compute node evacuation using"
11412 " iallocator '%s': %s" %
11413 (self.op.iallocator, ial.info),
11414 errors.ECODE_NORES)
11416 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11418 elif self.op.remote_node is not None:
11419 assert self.op.mode == constants.NODE_EVAC_SEC
11421 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11422 remote_node=self.op.remote_node,
11424 mode=constants.REPLACE_DISK_CHG,
11425 early_release=self.op.early_release)]
11426 for instance_name in self.instance_names
11430 raise errors.ProgrammerError("No iallocator or remote node")
11432 return ResultWithJobs(jobs)
11435 def _SetOpEarlyRelease(early_release, op):
11436 """Sets C{early_release} flag on opcodes if available.
11440 op.early_release = early_release
11441 except AttributeError:
11442 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11447 def _NodeEvacDest(use_nodes, group, nodes):
11448 """Returns group or nodes depending on caller's choice.
11452 return utils.CommaJoin(nodes)
11457 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11458 """Unpacks the result of change-group and node-evacuate iallocator requests.
11460 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11461 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11463 @type lu: L{LogicalUnit}
11464 @param lu: Logical unit instance
11465 @type alloc_result: tuple/list
11466 @param alloc_result: Result from iallocator
11467 @type early_release: bool
11468 @param early_release: Whether to release locks early if possible
11469 @type use_nodes: bool
11470 @param use_nodes: Whether to display node names instead of groups
11473 (moved, failed, jobs) = alloc_result
11476 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11477 for (name, reason) in failed)
11478 lu.LogWarning("Unable to evacuate instances %s", failreason)
11479 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11482 lu.LogInfo("Instances to be moved: %s",
11483 utils.CommaJoin("%s (to %s)" %
11484 (name, _NodeEvacDest(use_nodes, group, nodes))
11485 for (name, group, nodes) in moved))
11487 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11488 map(opcodes.OpCode.LoadOpCode, ops))
11492 class LUInstanceGrowDisk(LogicalUnit):
11493 """Grow a disk of an instance.
11496 HPATH = "disk-grow"
11497 HTYPE = constants.HTYPE_INSTANCE
11500 def ExpandNames(self):
11501 self._ExpandAndLockInstance()
11502 self.needed_locks[locking.LEVEL_NODE] = []
11503 self.needed_locks[locking.LEVEL_NODE_RES] = []
11504 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11505 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11507 def DeclareLocks(self, level):
11508 if level == locking.LEVEL_NODE:
11509 self._LockInstancesNodes()
11510 elif level == locking.LEVEL_NODE_RES:
11512 self.needed_locks[locking.LEVEL_NODE_RES] = \
11513 self.needed_locks[locking.LEVEL_NODE][:]
11515 def BuildHooksEnv(self):
11516 """Build hooks env.
11518 This runs on the master, the primary and all the secondaries.
11522 "DISK": self.op.disk,
11523 "AMOUNT": self.op.amount,
11525 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11528 def BuildHooksNodes(self):
11529 """Build hooks nodes.
11532 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11535 def CheckPrereq(self):
11536 """Check prerequisites.
11538 This checks that the instance is in the cluster.
11541 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11542 assert instance is not None, \
11543 "Cannot retrieve locked instance %s" % self.op.instance_name
11544 nodenames = list(instance.all_nodes)
11545 for node in nodenames:
11546 _CheckNodeOnline(self, node)
11548 self.instance = instance
11550 if instance.disk_template not in constants.DTS_GROWABLE:
11551 raise errors.OpPrereqError("Instance's disk layout does not support"
11552 " growing", errors.ECODE_INVAL)
11554 self.disk = instance.FindDisk(self.op.disk)
11556 if instance.disk_template not in (constants.DT_FILE,
11557 constants.DT_SHARED_FILE,
11559 # TODO: check the free disk space for file, when that feature will be
11561 _CheckNodesFreeDiskPerVG(self, nodenames,
11562 self.disk.ComputeGrowth(self.op.amount))
11564 def Exec(self, feedback_fn):
11565 """Execute disk grow.
11568 instance = self.instance
11571 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11572 assert (self.owned_locks(locking.LEVEL_NODE) ==
11573 self.owned_locks(locking.LEVEL_NODE_RES))
11575 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11577 raise errors.OpExecError("Cannot activate block device to grow")
11579 feedback_fn("Growing disk %s of instance '%s' by %s" %
11580 (self.op.disk, instance.name,
11581 utils.FormatUnit(self.op.amount, "h")))
11583 # First run all grow ops in dry-run mode
11584 for node in instance.all_nodes:
11585 self.cfg.SetDiskID(disk, node)
11586 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11587 result.Raise("Grow request failed to node %s" % node)
11589 # We know that (as far as we can test) operations across different
11590 # nodes will succeed, time to run it for real
11591 for node in instance.all_nodes:
11592 self.cfg.SetDiskID(disk, node)
11593 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11594 result.Raise("Grow request failed to node %s" % node)
11596 # TODO: Rewrite code to work properly
11597 # DRBD goes into sync mode for a short amount of time after executing the
11598 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11599 # calling "resize" in sync mode fails. Sleeping for a short amount of
11600 # time is a work-around.
11603 disk.RecordGrow(self.op.amount)
11604 self.cfg.Update(instance, feedback_fn)
11606 # Changes have been recorded, release node lock
11607 _ReleaseLocks(self, locking.LEVEL_NODE)
11609 # Downgrade lock while waiting for sync
11610 self.glm.downgrade(locking.LEVEL_INSTANCE)
11612 if self.op.wait_for_sync:
11613 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11615 self.proc.LogWarning("Disk sync-ing has not returned a good"
11616 " status; please check the instance")
11617 if instance.admin_state != constants.ADMINST_UP:
11618 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11619 elif instance.admin_state != constants.ADMINST_UP:
11620 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11621 " not supposed to be running because no wait for"
11622 " sync mode was requested")
11624 assert self.owned_locks(locking.LEVEL_NODE_RES)
11625 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11628 class LUInstanceQueryData(NoHooksLU):
11629 """Query runtime instance data.
11634 def ExpandNames(self):
11635 self.needed_locks = {}
11637 # Use locking if requested or when non-static information is wanted
11638 if not (self.op.static or self.op.use_locking):
11639 self.LogWarning("Non-static data requested, locks need to be acquired")
11640 self.op.use_locking = True
11642 if self.op.instances or not self.op.use_locking:
11643 # Expand instance names right here
11644 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11646 # Will use acquired locks
11647 self.wanted_names = None
11649 if self.op.use_locking:
11650 self.share_locks = _ShareAll()
11652 if self.wanted_names is None:
11653 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11655 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11657 self.needed_locks[locking.LEVEL_NODE] = []
11658 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11660 def DeclareLocks(self, level):
11661 if self.op.use_locking and level == locking.LEVEL_NODE:
11662 self._LockInstancesNodes()
11664 def CheckPrereq(self):
11665 """Check prerequisites.
11667 This only checks the optional instance list against the existing names.
11670 if self.wanted_names is None:
11671 assert self.op.use_locking, "Locking was not used"
11672 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11674 self.wanted_instances = \
11675 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11677 def _ComputeBlockdevStatus(self, node, instance_name, dev):
11678 """Returns the status of a block device
11681 if self.op.static or not node:
11684 self.cfg.SetDiskID(dev, node)
11686 result = self.rpc.call_blockdev_find(node, dev)
11690 result.Raise("Can't compute disk status for %s" % instance_name)
11692 status = result.payload
11696 return (status.dev_path, status.major, status.minor,
11697 status.sync_percent, status.estimated_time,
11698 status.is_degraded, status.ldisk_status)
11700 def _ComputeDiskStatus(self, instance, snode, dev):
11701 """Compute block device status.
11704 if dev.dev_type in constants.LDS_DRBD:
11705 # we change the snode then (otherwise we use the one passed in)
11706 if dev.logical_id[0] == instance.primary_node:
11707 snode = dev.logical_id[1]
11709 snode = dev.logical_id[0]
11711 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11712 instance.name, dev)
11713 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11716 dev_children = map(compat.partial(self._ComputeDiskStatus,
11723 "iv_name": dev.iv_name,
11724 "dev_type": dev.dev_type,
11725 "logical_id": dev.logical_id,
11726 "physical_id": dev.physical_id,
11727 "pstatus": dev_pstatus,
11728 "sstatus": dev_sstatus,
11729 "children": dev_children,
11734 def Exec(self, feedback_fn):
11735 """Gather and return data"""
11738 cluster = self.cfg.GetClusterInfo()
11740 pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11741 for i in self.wanted_instances)
11742 for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11743 if self.op.static or pnode.offline:
11744 remote_state = None
11746 self.LogWarning("Primary node %s is marked offline, returning static"
11747 " information only for instance %s" %
11748 (pnode.name, instance.name))
11750 remote_info = self.rpc.call_instance_info(instance.primary_node,
11752 instance.hypervisor)
11753 remote_info.Raise("Error checking node %s" % instance.primary_node)
11754 remote_info = remote_info.payload
11755 if remote_info and "state" in remote_info:
11756 remote_state = "up"
11758 if instance.admin_state == constants.ADMINST_UP:
11759 remote_state = "down"
11761 remote_state = instance.admin_state
11763 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11766 result[instance.name] = {
11767 "name": instance.name,
11768 "config_state": instance.admin_state,
11769 "run_state": remote_state,
11770 "pnode": instance.primary_node,
11771 "snodes": instance.secondary_nodes,
11773 # this happens to be the same format used for hooks
11774 "nics": _NICListToTuple(self, instance.nics),
11775 "disk_template": instance.disk_template,
11777 "hypervisor": instance.hypervisor,
11778 "network_port": instance.network_port,
11779 "hv_instance": instance.hvparams,
11780 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11781 "be_instance": instance.beparams,
11782 "be_actual": cluster.FillBE(instance),
11783 "os_instance": instance.osparams,
11784 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11785 "serial_no": instance.serial_no,
11786 "mtime": instance.mtime,
11787 "ctime": instance.ctime,
11788 "uuid": instance.uuid,
11794 def PrepareContainerMods(mods, private_fn):
11795 """Prepares a list of container modifications by adding a private data field.
11797 @type mods: list of tuples; (operation, index, parameters)
11798 @param mods: List of modifications
11799 @type private_fn: callable or None
11800 @param private_fn: Callable for constructing a private data field for a
11805 if private_fn is None:
11810 return [(op, idx, params, fn()) for (op, idx, params) in mods]
11813 #: Type description for changes as returned by L{ApplyContainerMods}'s
11815 _TApplyContModsCbChanges = \
11816 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
11817 ht.TNonEmptyString,
11822 def ApplyContainerMods(kind, container, chgdesc, mods,
11823 create_fn, modify_fn, remove_fn):
11824 """Applies descriptions in C{mods} to C{container}.
11827 @param kind: One-word item description
11828 @type container: list
11829 @param container: Container to modify
11830 @type chgdesc: None or list
11831 @param chgdesc: List of applied changes
11833 @param mods: Modifications as returned by L{PrepareContainerMods}
11834 @type create_fn: callable
11835 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
11836 receives absolute item index, parameters and private data object as added
11837 by L{PrepareContainerMods}, returns tuple containing new item and changes
11839 @type modify_fn: callable
11840 @param modify_fn: Callback for modifying an existing item
11841 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
11842 and private data object as added by L{PrepareContainerMods}, returns
11844 @type remove_fn: callable
11845 @param remove_fn: Callback on removing item; receives absolute item index,
11846 item and private data object as added by L{PrepareContainerMods}
11849 for (op, idx, params, private) in mods:
11852 absidx = len(container) - 1
11854 raise IndexError("Not accepting negative indices other than -1")
11855 elif idx > len(container):
11856 raise IndexError("Got %s index %s, but there are only %s" %
11857 (kind, idx, len(container)))
11863 if op == constants.DDM_ADD:
11864 # Calculate where item will be added
11866 addidx = len(container)
11870 if create_fn is None:
11873 (item, changes) = create_fn(addidx, params, private)
11876 container.append(item)
11879 assert idx <= len(container)
11880 # list.insert does so before the specified index
11881 container.insert(idx, item)
11883 # Retrieve existing item
11885 item = container[absidx]
11887 raise IndexError("Invalid %s index %s" % (kind, idx))
11889 if op == constants.DDM_REMOVE:
11892 if remove_fn is not None:
11893 remove_fn(absidx, item, private)
11895 changes = [("%s/%s" % (kind, absidx), "remove")]
11897 assert container[absidx] == item
11898 del container[absidx]
11899 elif op == constants.DDM_MODIFY:
11900 if modify_fn is not None:
11901 changes = modify_fn(absidx, item, params, private)
11903 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
11905 assert _TApplyContModsCbChanges(changes)
11907 if not (chgdesc is None or changes is None):
11908 chgdesc.extend(changes)
11911 def _UpdateIvNames(base_index, disks):
11912 """Updates the C{iv_name} attribute of disks.
11914 @type disks: list of L{objects.Disk}
11917 for (idx, disk) in enumerate(disks):
11918 disk.iv_name = "disk/%s" % (base_index + idx, )
11921 class _InstNicModPrivate:
11922 """Data structure for network interface modifications.
11924 Used by L{LUInstanceSetParams}.
11927 def __init__(self):
11932 class LUInstanceSetParams(LogicalUnit):
11933 """Modifies an instances's parameters.
11936 HPATH = "instance-modify"
11937 HTYPE = constants.HTYPE_INSTANCE
11941 def _UpgradeDiskNicMods(kind, mods, verify_fn):
11942 assert ht.TList(mods)
11943 assert not mods or len(mods[0]) in (2, 3)
11945 if mods and len(mods[0]) == 2:
11949 for op, params in mods:
11950 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
11951 result.append((op, -1, params))
11955 raise errors.OpPrereqError("Only one %s add or remove operation is"
11956 " supported at a time" % kind,
11957 errors.ECODE_INVAL)
11959 result.append((constants.DDM_MODIFY, op, params))
11961 assert verify_fn(result)
11968 def _CheckMods(kind, mods, key_types, item_fn):
11969 """Ensures requested disk/NIC modifications are valid.
11972 for (op, _, params) in mods:
11973 assert ht.TDict(params)
11975 utils.ForceDictType(params, key_types)
11977 if op == constants.DDM_REMOVE:
11979 raise errors.OpPrereqError("No settings should be passed when"
11980 " removing a %s" % kind,
11981 errors.ECODE_INVAL)
11982 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
11983 item_fn(op, params)
11985 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
11988 def _VerifyDiskModification(op, params):
11989 """Verifies a disk modification.
11992 if op == constants.DDM_ADD:
11993 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11994 if mode not in constants.DISK_ACCESS_SET:
11995 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11996 errors.ECODE_INVAL)
11998 size = params.get(constants.IDISK_SIZE, None)
12000 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12001 constants.IDISK_SIZE, errors.ECODE_INVAL)
12005 except (TypeError, ValueError), err:
12006 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12007 errors.ECODE_INVAL)
12009 params[constants.IDISK_SIZE] = size
12011 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12012 raise errors.OpPrereqError("Disk size change not possible, use"
12013 " grow-disk", errors.ECODE_INVAL)
12016 def _VerifyNicModification(op, params):
12017 """Verifies a network interface modification.
12020 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12021 ip = params.get(constants.INIC_IP, None)
12024 elif ip.lower() == constants.VALUE_NONE:
12025 params[constants.INIC_IP] = None
12026 elif not netutils.IPAddress.IsValid(ip):
12027 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12028 errors.ECODE_INVAL)
12030 bridge = params.get("bridge", None)
12031 link = params.get(constants.INIC_LINK, None)
12032 if bridge and link:
12033 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12034 " at the same time", errors.ECODE_INVAL)
12035 elif bridge and bridge.lower() == constants.VALUE_NONE:
12036 params["bridge"] = None
12037 elif link and link.lower() == constants.VALUE_NONE:
12038 params[constants.INIC_LINK] = None
12040 if op == constants.DDM_ADD:
12041 macaddr = params.get(constants.INIC_MAC, None)
12042 if macaddr is None:
12043 params[constants.INIC_MAC] = constants.VALUE_AUTO
12045 if constants.INIC_MAC in params:
12046 macaddr = params[constants.INIC_MAC]
12047 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12048 macaddr = utils.NormalizeAndValidateMac(macaddr)
12050 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12051 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12052 " modifying an existing NIC",
12053 errors.ECODE_INVAL)
12055 def CheckArguments(self):
12056 if not (self.op.nics or self.op.disks or self.op.disk_template or
12057 self.op.hvparams or self.op.beparams or self.op.os_name or
12058 self.op.offline is not None or self.op.runtime_mem):
12059 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12061 if self.op.hvparams:
12062 _CheckGlobalHvParams(self.op.hvparams)
12065 self._UpgradeDiskNicMods("disk", self.op.disks,
12066 opcodes.OpInstanceSetParams.TestDiskModifications)
12068 self._UpgradeDiskNicMods("NIC", self.op.nics,
12069 opcodes.OpInstanceSetParams.TestNicModifications)
12071 # Check disk modifications
12072 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12073 self._VerifyDiskModification)
12075 if self.op.disks and self.op.disk_template is not None:
12076 raise errors.OpPrereqError("Disk template conversion and other disk"
12077 " changes not supported at the same time",
12078 errors.ECODE_INVAL)
12080 if (self.op.disk_template and
12081 self.op.disk_template in constants.DTS_INT_MIRROR and
12082 self.op.remote_node is None):
12083 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12084 " one requires specifying a secondary node",
12085 errors.ECODE_INVAL)
12087 # Check NIC modifications
12088 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12089 self._VerifyNicModification)
12091 def ExpandNames(self):
12092 self._ExpandAndLockInstance()
12093 # Can't even acquire node locks in shared mode as upcoming changes in
12094 # Ganeti 2.6 will start to modify the node object on disk conversion
12095 self.needed_locks[locking.LEVEL_NODE] = []
12096 self.needed_locks[locking.LEVEL_NODE_RES] = []
12097 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12099 def DeclareLocks(self, level):
12100 # TODO: Acquire group lock in shared mode (disk parameters)
12101 if level == locking.LEVEL_NODE:
12102 self._LockInstancesNodes()
12103 if self.op.disk_template and self.op.remote_node:
12104 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12105 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12106 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12108 self.needed_locks[locking.LEVEL_NODE_RES] = \
12109 self.needed_locks[locking.LEVEL_NODE][:]
12111 def BuildHooksEnv(self):
12112 """Build hooks env.
12114 This runs on the master, primary and secondaries.
12118 if constants.BE_MINMEM in self.be_new:
12119 args["minmem"] = self.be_new[constants.BE_MINMEM]
12120 if constants.BE_MAXMEM in self.be_new:
12121 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12122 if constants.BE_VCPUS in self.be_new:
12123 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12124 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12125 # information at all.
12127 if self._new_nics is not None:
12130 for nic in self._new_nics:
12131 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12132 mode = nicparams[constants.NIC_MODE]
12133 link = nicparams[constants.NIC_LINK]
12134 nics.append((nic.ip, nic.mac, mode, link))
12136 args["nics"] = nics
12138 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12139 if self.op.disk_template:
12140 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12141 if self.op.runtime_mem:
12142 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12146 def BuildHooksNodes(self):
12147 """Build hooks nodes.
12150 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12153 def _PrepareNicModification(self, params, private, old_ip, old_params,
12155 update_params_dict = dict([(key, params[key])
12156 for key in constants.NICS_PARAMETERS
12159 if "bridge" in params:
12160 update_params_dict[constants.NIC_LINK] = params["bridge"]
12162 new_params = _GetUpdatedParams(old_params, update_params_dict)
12163 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12165 new_filled_params = cluster.SimpleFillNIC(new_params)
12166 objects.NIC.CheckParameterSyntax(new_filled_params)
12168 new_mode = new_filled_params[constants.NIC_MODE]
12169 if new_mode == constants.NIC_MODE_BRIDGED:
12170 bridge = new_filled_params[constants.NIC_LINK]
12171 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12173 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12175 self.warn.append(msg)
12177 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12179 elif new_mode == constants.NIC_MODE_ROUTED:
12180 ip = params.get(constants.INIC_IP, old_ip)
12182 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12183 " on a routed NIC", errors.ECODE_INVAL)
12185 if constants.INIC_MAC in params:
12186 mac = params[constants.INIC_MAC]
12188 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12189 errors.ECODE_INVAL)
12190 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12191 # otherwise generate the MAC address
12192 params[constants.INIC_MAC] = \
12193 self.cfg.GenerateMAC(self.proc.GetECId())
12195 # or validate/reserve the current one
12197 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12198 except errors.ReservationError:
12199 raise errors.OpPrereqError("MAC address '%s' already in use"
12200 " in cluster" % mac,
12201 errors.ECODE_NOTUNIQUE)
12203 private.params = new_params
12204 private.filled = new_filled_params
12206 return (None, None)
12208 def CheckPrereq(self):
12209 """Check prerequisites.
12211 This only checks the instance list against the existing names.
12214 # checking the new params on the primary/secondary nodes
12216 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12217 cluster = self.cluster = self.cfg.GetClusterInfo()
12218 assert self.instance is not None, \
12219 "Cannot retrieve locked instance %s" % self.op.instance_name
12220 pnode = instance.primary_node
12221 nodelist = list(instance.all_nodes)
12222 pnode_info = self.cfg.GetNodeInfo(pnode)
12223 self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
12225 # Prepare disk/NIC modifications
12226 self.diskmod = PrepareContainerMods(self.op.disks, None)
12227 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12230 if self.op.os_name and not self.op.force:
12231 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12232 self.op.force_variant)
12233 instance_os = self.op.os_name
12235 instance_os = instance.os
12237 assert not (self.op.disk_template and self.op.disks), \
12238 "Can't modify disk template and apply disk changes at the same time"
12240 if self.op.disk_template:
12241 if instance.disk_template == self.op.disk_template:
12242 raise errors.OpPrereqError("Instance already has disk template %s" %
12243 instance.disk_template, errors.ECODE_INVAL)
12245 if (instance.disk_template,
12246 self.op.disk_template) not in self._DISK_CONVERSIONS:
12247 raise errors.OpPrereqError("Unsupported disk template conversion from"
12248 " %s to %s" % (instance.disk_template,
12249 self.op.disk_template),
12250 errors.ECODE_INVAL)
12251 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12252 msg="cannot change disk template")
12253 if self.op.disk_template in constants.DTS_INT_MIRROR:
12254 if self.op.remote_node == pnode:
12255 raise errors.OpPrereqError("Given new secondary node %s is the same"
12256 " as the primary node of the instance" %
12257 self.op.remote_node, errors.ECODE_STATE)
12258 _CheckNodeOnline(self, self.op.remote_node)
12259 _CheckNodeNotDrained(self, self.op.remote_node)
12260 # FIXME: here we assume that the old instance type is DT_PLAIN
12261 assert instance.disk_template == constants.DT_PLAIN
12262 disks = [{constants.IDISK_SIZE: d.size,
12263 constants.IDISK_VG: d.logical_id[0]}
12264 for d in instance.disks]
12265 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12266 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12268 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12269 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12270 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12271 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12272 ignore=self.op.ignore_ipolicy)
12273 if pnode_info.group != snode_info.group:
12274 self.LogWarning("The primary and secondary nodes are in two"
12275 " different node groups; the disk parameters"
12276 " from the first disk's node group will be"
12279 # hvparams processing
12280 if self.op.hvparams:
12281 hv_type = instance.hypervisor
12282 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12283 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12284 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12287 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12288 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12289 self.hv_proposed = self.hv_new = hv_new # the new actual values
12290 self.hv_inst = i_hvdict # the new dict (without defaults)
12292 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12294 self.hv_new = self.hv_inst = {}
12296 # beparams processing
12297 if self.op.beparams:
12298 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12300 objects.UpgradeBeParams(i_bedict)
12301 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12302 be_new = cluster.SimpleFillBE(i_bedict)
12303 self.be_proposed = self.be_new = be_new # the new actual values
12304 self.be_inst = i_bedict # the new dict (without defaults)
12306 self.be_new = self.be_inst = {}
12307 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12308 be_old = cluster.FillBE(instance)
12310 # CPU param validation -- checking every time a paramtere is
12311 # changed to cover all cases where either CPU mask or vcpus have
12313 if (constants.BE_VCPUS in self.be_proposed and
12314 constants.HV_CPU_MASK in self.hv_proposed):
12316 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12317 # Verify mask is consistent with number of vCPUs. Can skip this
12318 # test if only 1 entry in the CPU mask, which means same mask
12319 # is applied to all vCPUs.
12320 if (len(cpu_list) > 1 and
12321 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12322 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12324 (self.be_proposed[constants.BE_VCPUS],
12325 self.hv_proposed[constants.HV_CPU_MASK]),
12326 errors.ECODE_INVAL)
12328 # Only perform this test if a new CPU mask is given
12329 if constants.HV_CPU_MASK in self.hv_new:
12330 # Calculate the largest CPU number requested
12331 max_requested_cpu = max(map(max, cpu_list))
12332 # Check that all of the instance's nodes have enough physical CPUs to
12333 # satisfy the requested CPU mask
12334 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12335 max_requested_cpu + 1, instance.hypervisor)
12337 # osparams processing
12338 if self.op.osparams:
12339 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12340 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12341 self.os_inst = i_osdict # the new dict (without defaults)
12347 #TODO(dynmem): do the appropriate check involving MINMEM
12348 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12349 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12350 mem_check_list = [pnode]
12351 if be_new[constants.BE_AUTO_BALANCE]:
12352 # either we changed auto_balance to yes or it was from before
12353 mem_check_list.extend(instance.secondary_nodes)
12354 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12355 instance.hypervisor)
12356 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12357 [instance.hypervisor])
12358 pninfo = nodeinfo[pnode]
12359 msg = pninfo.fail_msg
12361 # Assume the primary node is unreachable and go ahead
12362 self.warn.append("Can't get info from primary node %s: %s" %
12365 (_, _, (pnhvinfo, )) = pninfo.payload
12366 if not isinstance(pnhvinfo.get("memory_free", None), int):
12367 self.warn.append("Node data from primary node %s doesn't contain"
12368 " free memory information" % pnode)
12369 elif instance_info.fail_msg:
12370 self.warn.append("Can't get instance runtime information: %s" %
12371 instance_info.fail_msg)
12373 if instance_info.payload:
12374 current_mem = int(instance_info.payload["memory"])
12376 # Assume instance not running
12377 # (there is a slight race condition here, but it's not very
12378 # probable, and we have no other way to check)
12379 # TODO: Describe race condition
12381 #TODO(dynmem): do the appropriate check involving MINMEM
12382 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12383 pnhvinfo["memory_free"])
12385 raise errors.OpPrereqError("This change will prevent the instance"
12386 " from starting, due to %d MB of memory"
12387 " missing on its primary node" %
12389 errors.ECODE_NORES)
12391 if be_new[constants.BE_AUTO_BALANCE]:
12392 for node, nres in nodeinfo.items():
12393 if node not in instance.secondary_nodes:
12395 nres.Raise("Can't get info from secondary node %s" % node,
12396 prereq=True, ecode=errors.ECODE_STATE)
12397 (_, _, (nhvinfo, )) = nres.payload
12398 if not isinstance(nhvinfo.get("memory_free", None), int):
12399 raise errors.OpPrereqError("Secondary node %s didn't return free"
12400 " memory information" % node,
12401 errors.ECODE_STATE)
12402 #TODO(dynmem): do the appropriate check involving MINMEM
12403 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12404 raise errors.OpPrereqError("This change will prevent the instance"
12405 " from failover to its secondary node"
12406 " %s, due to not enough memory" % node,
12407 errors.ECODE_STATE)
12409 if self.op.runtime_mem:
12410 remote_info = self.rpc.call_instance_info(instance.primary_node,
12412 instance.hypervisor)
12413 remote_info.Raise("Error checking node %s" % instance.primary_node)
12414 if not remote_info.payload: # not running already
12415 raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12416 errors.ECODE_STATE)
12418 current_memory = remote_info.payload["memory"]
12419 if (not self.op.force and
12420 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12421 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12422 raise errors.OpPrereqError("Instance %s must have memory between %d"
12423 " and %d MB of memory unless --force is"
12424 " given" % (instance.name,
12425 self.be_proposed[constants.BE_MINMEM],
12426 self.be_proposed[constants.BE_MAXMEM]),
12427 errors.ECODE_INVAL)
12429 if self.op.runtime_mem > current_memory:
12430 _CheckNodeFreeMemory(self, instance.primary_node,
12431 "ballooning memory for instance %s" %
12433 self.op.memory - current_memory,
12434 instance.hypervisor)
12436 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12437 raise errors.OpPrereqError("Disk operations not supported for"
12438 " diskless instances",
12439 errors.ECODE_INVAL)
12441 def _PrepareNicCreate(_, params, private):
12442 return self._PrepareNicModification(params, private, None, {},
12445 def _PrepareNicMod(_, nic, params, private):
12446 return self._PrepareNicModification(params, private, nic.ip,
12447 nic.nicparams, cluster, pnode)
12449 # Verify NIC changes (operating on copy)
12450 nics = instance.nics[:]
12451 ApplyContainerMods("NIC", nics, None, self.nicmod,
12452 _PrepareNicCreate, _PrepareNicMod, None)
12453 if len(nics) > constants.MAX_NICS:
12454 raise errors.OpPrereqError("Instance has too many network interfaces"
12455 " (%d), cannot add more" % constants.MAX_NICS,
12456 errors.ECODE_STATE)
12458 # Verify disk changes (operating on a copy)
12459 disks = instance.disks[:]
12460 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12461 if len(disks) > constants.MAX_DISKS:
12462 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12463 " more" % constants.MAX_DISKS,
12464 errors.ECODE_STATE)
12466 if self.op.offline is not None:
12467 if self.op.offline:
12468 msg = "can't change to offline"
12470 msg = "can't change to online"
12471 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12473 # Pre-compute NIC changes (necessary to use result in hooks)
12474 self._nic_chgdesc = []
12476 # Operate on copies as this is still in prereq
12477 nics = [nic.Copy() for nic in instance.nics]
12478 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12479 self._CreateNewNic, self._ApplyNicMods, None)
12480 self._new_nics = nics
12482 self._new_nics = None
12484 def _ConvertPlainToDrbd(self, feedback_fn):
12485 """Converts an instance from plain to drbd.
12488 feedback_fn("Converting template to drbd")
12489 instance = self.instance
12490 pnode = instance.primary_node
12491 snode = self.op.remote_node
12493 assert instance.disk_template == constants.DT_PLAIN
12495 # create a fake disk info for _GenerateDiskTemplate
12496 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12497 constants.IDISK_VG: d.logical_id[0]}
12498 for d in instance.disks]
12499 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12500 instance.name, pnode, [snode],
12501 disk_info, None, None, 0, feedback_fn,
12503 info = _GetInstanceInfoText(instance)
12504 feedback_fn("Creating additional volumes...")
12505 # first, create the missing data and meta devices
12506 for disk in new_disks:
12507 # unfortunately this is... not too nice
12508 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12510 for child in disk.children:
12511 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12512 # at this stage, all new LVs have been created, we can rename the
12514 feedback_fn("Renaming original volumes...")
12515 rename_list = [(o, n.children[0].logical_id)
12516 for (o, n) in zip(instance.disks, new_disks)]
12517 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12518 result.Raise("Failed to rename original LVs")
12520 feedback_fn("Initializing DRBD devices...")
12521 # all child devices are in place, we can now create the DRBD devices
12522 for disk in new_disks:
12523 for node in [pnode, snode]:
12524 f_create = node == pnode
12525 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12527 # at this point, the instance has been modified
12528 instance.disk_template = constants.DT_DRBD8
12529 instance.disks = new_disks
12530 self.cfg.Update(instance, feedback_fn)
12532 # Release node locks while waiting for sync
12533 _ReleaseLocks(self, locking.LEVEL_NODE)
12535 # disks are created, waiting for sync
12536 disk_abort = not _WaitForSync(self, instance,
12537 oneshot=not self.op.wait_for_sync)
12539 raise errors.OpExecError("There are some degraded disks for"
12540 " this instance, please cleanup manually")
12542 # Node resource locks will be released by caller
12544 def _ConvertDrbdToPlain(self, feedback_fn):
12545 """Converts an instance from drbd to plain.
12548 instance = self.instance
12550 assert len(instance.secondary_nodes) == 1
12551 assert instance.disk_template == constants.DT_DRBD8
12553 pnode = instance.primary_node
12554 snode = instance.secondary_nodes[0]
12555 feedback_fn("Converting template to plain")
12557 old_disks = instance.disks
12558 new_disks = [d.children[0] for d in old_disks]
12560 # copy over size and mode
12561 for parent, child in zip(old_disks, new_disks):
12562 child.size = parent.size
12563 child.mode = parent.mode
12565 # update instance structure
12566 instance.disks = new_disks
12567 instance.disk_template = constants.DT_PLAIN
12568 self.cfg.Update(instance, feedback_fn)
12570 # Release locks in case removing disks takes a while
12571 _ReleaseLocks(self, locking.LEVEL_NODE)
12573 feedback_fn("Removing volumes on the secondary node...")
12574 for disk in old_disks:
12575 self.cfg.SetDiskID(disk, snode)
12576 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12578 self.LogWarning("Could not remove block device %s on node %s,"
12579 " continuing anyway: %s", disk.iv_name, snode, msg)
12581 feedback_fn("Removing unneeded volumes on the primary node...")
12582 for idx, disk in enumerate(old_disks):
12583 meta = disk.children[1]
12584 self.cfg.SetDiskID(meta, pnode)
12585 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12587 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12588 " continuing anyway: %s", idx, pnode, msg)
12590 # this is a DRBD disk, return its port to the pool
12591 for disk in old_disks:
12592 tcp_port = disk.logical_id[2]
12593 self.cfg.AddTcpUdpPort(tcp_port)
12595 # Node resource locks will be released by caller
12597 def _CreateNewDisk(self, idx, params, _):
12598 """Creates a new disk.
12601 instance = self.instance
12604 if instance.disk_template in constants.DTS_FILEBASED:
12605 (file_driver, file_path) = instance.disks[0].logical_id
12606 file_path = os.path.dirname(file_path)
12608 file_driver = file_path = None
12611 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12612 instance.primary_node, instance.secondary_nodes,
12613 [params], file_path, file_driver, idx,
12614 self.Log, self.diskparams)[0]
12616 info = _GetInstanceInfoText(instance)
12618 logging.info("Creating volume %s for instance %s",
12619 disk.iv_name, instance.name)
12620 # Note: this needs to be kept in sync with _CreateDisks
12622 for node in instance.all_nodes:
12623 f_create = (node == instance.primary_node)
12625 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12626 except errors.OpExecError, err:
12627 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12628 disk.iv_name, disk, node, err)
12631 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12635 def _ModifyDisk(idx, disk, params, _):
12636 """Modifies a disk.
12639 disk.mode = params[constants.IDISK_MODE]
12642 ("disk.mode/%d" % idx, disk.mode),
12645 def _RemoveDisk(self, idx, root, _):
12649 for node, disk in root.ComputeNodeTree(self.instance.primary_node):
12650 self.cfg.SetDiskID(disk, node)
12651 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12653 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12654 " continuing anyway", idx, node, msg)
12656 # if this is a DRBD disk, return its port to the pool
12657 if root.dev_type in constants.LDS_DRBD:
12658 self.cfg.AddTcpUdpPort(root.logical_id[2])
12661 def _CreateNewNic(idx, params, private):
12662 """Creates data structure for a new network interface.
12665 mac = params[constants.INIC_MAC]
12666 ip = params.get(constants.INIC_IP, None)
12667 nicparams = private.params
12669 return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12671 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12672 (mac, ip, private.filled[constants.NIC_MODE],
12673 private.filled[constants.NIC_LINK])),
12677 def _ApplyNicMods(idx, nic, params, private):
12678 """Modifies a network interface.
12683 for key in [constants.INIC_MAC, constants.INIC_IP]:
12685 changes.append(("nic.%s/%d" % (key, idx), params[key]))
12686 setattr(nic, key, params[key])
12689 nic.nicparams = private.params
12691 for (key, val) in params.items():
12692 changes.append(("nic.%s/%d" % (key, idx), val))
12696 def Exec(self, feedback_fn):
12697 """Modifies an instance.
12699 All parameters take effect only at the next restart of the instance.
12702 # Process here the warnings from CheckPrereq, as we don't have a
12703 # feedback_fn there.
12704 # TODO: Replace with self.LogWarning
12705 for warn in self.warn:
12706 feedback_fn("WARNING: %s" % warn)
12708 assert ((self.op.disk_template is None) ^
12709 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12710 "Not owning any node resource locks"
12713 instance = self.instance
12716 if self.op.runtime_mem:
12717 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12719 self.op.runtime_mem)
12720 rpcres.Raise("Cannot modify instance runtime memory")
12721 result.append(("runtime_memory", self.op.runtime_mem))
12723 # Apply disk changes
12724 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12725 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12726 _UpdateIvNames(0, instance.disks)
12728 if self.op.disk_template:
12730 check_nodes = set(instance.all_nodes)
12731 if self.op.remote_node:
12732 check_nodes.add(self.op.remote_node)
12733 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12734 owned = self.owned_locks(level)
12735 assert not (check_nodes - owned), \
12736 ("Not owning the correct locks, owning %r, expected at least %r" %
12737 (owned, check_nodes))
12739 r_shut = _ShutdownInstanceDisks(self, instance)
12741 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12742 " proceed with disk template conversion")
12743 mode = (instance.disk_template, self.op.disk_template)
12745 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12747 self.cfg.ReleaseDRBDMinors(instance.name)
12749 result.append(("disk_template", self.op.disk_template))
12751 assert instance.disk_template == self.op.disk_template, \
12752 ("Expected disk template '%s', found '%s'" %
12753 (self.op.disk_template, instance.disk_template))
12755 # Release node and resource locks if there are any (they might already have
12756 # been released during disk conversion)
12757 _ReleaseLocks(self, locking.LEVEL_NODE)
12758 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12760 # Apply NIC changes
12761 if self._new_nics is not None:
12762 instance.nics = self._new_nics
12763 result.extend(self._nic_chgdesc)
12766 if self.op.hvparams:
12767 instance.hvparams = self.hv_inst
12768 for key, val in self.op.hvparams.iteritems():
12769 result.append(("hv/%s" % key, val))
12772 if self.op.beparams:
12773 instance.beparams = self.be_inst
12774 for key, val in self.op.beparams.iteritems():
12775 result.append(("be/%s" % key, val))
12778 if self.op.os_name:
12779 instance.os = self.op.os_name
12782 if self.op.osparams:
12783 instance.osparams = self.os_inst
12784 for key, val in self.op.osparams.iteritems():
12785 result.append(("os/%s" % key, val))
12787 if self.op.offline is None:
12790 elif self.op.offline:
12791 # Mark instance as offline
12792 self.cfg.MarkInstanceOffline(instance.name)
12793 result.append(("admin_state", constants.ADMINST_OFFLINE))
12795 # Mark instance as online, but stopped
12796 self.cfg.MarkInstanceDown(instance.name)
12797 result.append(("admin_state", constants.ADMINST_DOWN))
12799 self.cfg.Update(instance, feedback_fn)
12801 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12802 self.owned_locks(locking.LEVEL_NODE)), \
12803 "All node locks should have been released by now"
12807 _DISK_CONVERSIONS = {
12808 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12809 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12813 class LUInstanceChangeGroup(LogicalUnit):
12814 HPATH = "instance-change-group"
12815 HTYPE = constants.HTYPE_INSTANCE
12818 def ExpandNames(self):
12819 self.share_locks = _ShareAll()
12820 self.needed_locks = {
12821 locking.LEVEL_NODEGROUP: [],
12822 locking.LEVEL_NODE: [],
12825 self._ExpandAndLockInstance()
12827 if self.op.target_groups:
12828 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12829 self.op.target_groups)
12831 self.req_target_uuids = None
12833 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12835 def DeclareLocks(self, level):
12836 if level == locking.LEVEL_NODEGROUP:
12837 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12839 if self.req_target_uuids:
12840 lock_groups = set(self.req_target_uuids)
12842 # Lock all groups used by instance optimistically; this requires going
12843 # via the node before it's locked, requiring verification later on
12844 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12845 lock_groups.update(instance_groups)
12847 # No target groups, need to lock all of them
12848 lock_groups = locking.ALL_SET
12850 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12852 elif level == locking.LEVEL_NODE:
12853 if self.req_target_uuids:
12854 # Lock all nodes used by instances
12855 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12856 self._LockInstancesNodes()
12858 # Lock all nodes in all potential target groups
12859 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12860 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12861 member_nodes = [node_name
12862 for group in lock_groups
12863 for node_name in self.cfg.GetNodeGroup(group).members]
12864 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12866 # Lock all nodes as all groups are potential targets
12867 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12869 def CheckPrereq(self):
12870 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12871 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12872 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12874 assert (self.req_target_uuids is None or
12875 owned_groups.issuperset(self.req_target_uuids))
12876 assert owned_instances == set([self.op.instance_name])
12878 # Get instance information
12879 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12881 # Check if node groups for locked instance are still correct
12882 assert owned_nodes.issuperset(self.instance.all_nodes), \
12883 ("Instance %s's nodes changed while we kept the lock" %
12884 self.op.instance_name)
12886 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12889 if self.req_target_uuids:
12890 # User requested specific target groups
12891 self.target_uuids = self.req_target_uuids
12893 # All groups except those used by the instance are potential targets
12894 self.target_uuids = owned_groups - inst_groups
12896 conflicting_groups = self.target_uuids & inst_groups
12897 if conflicting_groups:
12898 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12899 " used by the instance '%s'" %
12900 (utils.CommaJoin(conflicting_groups),
12901 self.op.instance_name),
12902 errors.ECODE_INVAL)
12904 if not self.target_uuids:
12905 raise errors.OpPrereqError("There are no possible target groups",
12906 errors.ECODE_INVAL)
12908 def BuildHooksEnv(self):
12909 """Build hooks env.
12912 assert self.target_uuids
12915 "TARGET_GROUPS": " ".join(self.target_uuids),
12918 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12922 def BuildHooksNodes(self):
12923 """Build hooks nodes.
12926 mn = self.cfg.GetMasterNode()
12927 return ([mn], [mn])
12929 def Exec(self, feedback_fn):
12930 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12932 assert instances == [self.op.instance_name], "Instance not locked"
12934 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12935 instances=instances, target_groups=list(self.target_uuids))
12937 ial.Run(self.op.iallocator)
12939 if not ial.success:
12940 raise errors.OpPrereqError("Can't compute solution for changing group of"
12941 " instance '%s' using iallocator '%s': %s" %
12942 (self.op.instance_name, self.op.iallocator,
12944 errors.ECODE_NORES)
12946 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12948 self.LogInfo("Iallocator returned %s job(s) for changing group of"
12949 " instance '%s'", len(jobs), self.op.instance_name)
12951 return ResultWithJobs(jobs)
12954 class LUBackupQuery(NoHooksLU):
12955 """Query the exports list
12960 def ExpandNames(self):
12961 self.needed_locks = {}
12962 self.share_locks[locking.LEVEL_NODE] = 1
12963 if not self.op.nodes:
12964 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12966 self.needed_locks[locking.LEVEL_NODE] = \
12967 _GetWantedNodes(self, self.op.nodes)
12969 def Exec(self, feedback_fn):
12970 """Compute the list of all the exported system images.
12973 @return: a dictionary with the structure node->(export-list)
12974 where export-list is a list of the instances exported on
12978 self.nodes = self.owned_locks(locking.LEVEL_NODE)
12979 rpcresult = self.rpc.call_export_list(self.nodes)
12981 for node in rpcresult:
12982 if rpcresult[node].fail_msg:
12983 result[node] = False
12985 result[node] = rpcresult[node].payload
12990 class LUBackupPrepare(NoHooksLU):
12991 """Prepares an instance for an export and returns useful information.
12996 def ExpandNames(self):
12997 self._ExpandAndLockInstance()
12999 def CheckPrereq(self):
13000 """Check prerequisites.
13003 instance_name = self.op.instance_name
13005 self.instance = self.cfg.GetInstanceInfo(instance_name)
13006 assert self.instance is not None, \
13007 "Cannot retrieve locked instance %s" % self.op.instance_name
13008 _CheckNodeOnline(self, self.instance.primary_node)
13010 self._cds = _GetClusterDomainSecret()
13012 def Exec(self, feedback_fn):
13013 """Prepares an instance for an export.
13016 instance = self.instance
13018 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13019 salt = utils.GenerateSecret(8)
13021 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13022 result = self.rpc.call_x509_cert_create(instance.primary_node,
13023 constants.RIE_CERT_VALIDITY)
13024 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13026 (name, cert_pem) = result.payload
13028 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13032 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13033 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13035 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13041 class LUBackupExport(LogicalUnit):
13042 """Export an instance to an image in the cluster.
13045 HPATH = "instance-export"
13046 HTYPE = constants.HTYPE_INSTANCE
13049 def CheckArguments(self):
13050 """Check the arguments.
13053 self.x509_key_name = self.op.x509_key_name
13054 self.dest_x509_ca_pem = self.op.destination_x509_ca
13056 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13057 if not self.x509_key_name:
13058 raise errors.OpPrereqError("Missing X509 key name for encryption",
13059 errors.ECODE_INVAL)
13061 if not self.dest_x509_ca_pem:
13062 raise errors.OpPrereqError("Missing destination X509 CA",
13063 errors.ECODE_INVAL)
13065 def ExpandNames(self):
13066 self._ExpandAndLockInstance()
13068 # Lock all nodes for local exports
13069 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13070 # FIXME: lock only instance primary and destination node
13072 # Sad but true, for now we have do lock all nodes, as we don't know where
13073 # the previous export might be, and in this LU we search for it and
13074 # remove it from its current node. In the future we could fix this by:
13075 # - making a tasklet to search (share-lock all), then create the
13076 # new one, then one to remove, after
13077 # - removing the removal operation altogether
13078 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13080 def DeclareLocks(self, level):
13081 """Last minute lock declaration."""
13082 # All nodes are locked anyway, so nothing to do here.
13084 def BuildHooksEnv(self):
13085 """Build hooks env.
13087 This will run on the master, primary node and target node.
13091 "EXPORT_MODE": self.op.mode,
13092 "EXPORT_NODE": self.op.target_node,
13093 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13094 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13095 # TODO: Generic function for boolean env variables
13096 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13099 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13103 def BuildHooksNodes(self):
13104 """Build hooks nodes.
13107 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13109 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13110 nl.append(self.op.target_node)
13114 def CheckPrereq(self):
13115 """Check prerequisites.
13117 This checks that the instance and node names are valid.
13120 instance_name = self.op.instance_name
13122 self.instance = self.cfg.GetInstanceInfo(instance_name)
13123 assert self.instance is not None, \
13124 "Cannot retrieve locked instance %s" % self.op.instance_name
13125 _CheckNodeOnline(self, self.instance.primary_node)
13127 if (self.op.remove_instance and
13128 self.instance.admin_state == constants.ADMINST_UP and
13129 not self.op.shutdown):
13130 raise errors.OpPrereqError("Can not remove instance without shutting it"
13133 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13134 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13135 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13136 assert self.dst_node is not None
13138 _CheckNodeOnline(self, self.dst_node.name)
13139 _CheckNodeNotDrained(self, self.dst_node.name)
13142 self.dest_disk_info = None
13143 self.dest_x509_ca = None
13145 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13146 self.dst_node = None
13148 if len(self.op.target_node) != len(self.instance.disks):
13149 raise errors.OpPrereqError(("Received destination information for %s"
13150 " disks, but instance %s has %s disks") %
13151 (len(self.op.target_node), instance_name,
13152 len(self.instance.disks)),
13153 errors.ECODE_INVAL)
13155 cds = _GetClusterDomainSecret()
13157 # Check X509 key name
13159 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13160 except (TypeError, ValueError), err:
13161 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13163 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13164 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13165 errors.ECODE_INVAL)
13167 # Load and verify CA
13169 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13170 except OpenSSL.crypto.Error, err:
13171 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13172 (err, ), errors.ECODE_INVAL)
13174 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13175 if errcode is not None:
13176 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13177 (msg, ), errors.ECODE_INVAL)
13179 self.dest_x509_ca = cert
13181 # Verify target information
13183 for idx, disk_data in enumerate(self.op.target_node):
13185 (host, port, magic) = \
13186 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13187 except errors.GenericError, err:
13188 raise errors.OpPrereqError("Target info for disk %s: %s" %
13189 (idx, err), errors.ECODE_INVAL)
13191 disk_info.append((host, port, magic))
13193 assert len(disk_info) == len(self.op.target_node)
13194 self.dest_disk_info = disk_info
13197 raise errors.ProgrammerError("Unhandled export mode %r" %
13200 # instance disk type verification
13201 # TODO: Implement export support for file-based disks
13202 for disk in self.instance.disks:
13203 if disk.dev_type == constants.LD_FILE:
13204 raise errors.OpPrereqError("Export not supported for instances with"
13205 " file-based disks", errors.ECODE_INVAL)
13207 def _CleanupExports(self, feedback_fn):
13208 """Removes exports of current instance from all other nodes.
13210 If an instance in a cluster with nodes A..D was exported to node C, its
13211 exports will be removed from the nodes A, B and D.
13214 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13216 nodelist = self.cfg.GetNodeList()
13217 nodelist.remove(self.dst_node.name)
13219 # on one-node clusters nodelist will be empty after the removal
13220 # if we proceed the backup would be removed because OpBackupQuery
13221 # substitutes an empty list with the full cluster node list.
13222 iname = self.instance.name
13224 feedback_fn("Removing old exports for instance %s" % iname)
13225 exportlist = self.rpc.call_export_list(nodelist)
13226 for node in exportlist:
13227 if exportlist[node].fail_msg:
13229 if iname in exportlist[node].payload:
13230 msg = self.rpc.call_export_remove(node, iname).fail_msg
13232 self.LogWarning("Could not remove older export for instance %s"
13233 " on node %s: %s", iname, node, msg)
13235 def Exec(self, feedback_fn):
13236 """Export an instance to an image in the cluster.
13239 assert self.op.mode in constants.EXPORT_MODES
13241 instance = self.instance
13242 src_node = instance.primary_node
13244 if self.op.shutdown:
13245 # shutdown the instance, but not the disks
13246 feedback_fn("Shutting down instance %s" % instance.name)
13247 result = self.rpc.call_instance_shutdown(src_node, instance,
13248 self.op.shutdown_timeout)
13249 # TODO: Maybe ignore failures if ignore_remove_failures is set
13250 result.Raise("Could not shutdown instance %s on"
13251 " node %s" % (instance.name, src_node))
13253 # set the disks ID correctly since call_instance_start needs the
13254 # correct drbd minor to create the symlinks
13255 for disk in instance.disks:
13256 self.cfg.SetDiskID(disk, src_node)
13258 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13261 # Activate the instance disks if we'exporting a stopped instance
13262 feedback_fn("Activating disks for %s" % instance.name)
13263 _StartInstanceDisks(self, instance, None)
13266 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13269 helper.CreateSnapshots()
13271 if (self.op.shutdown and
13272 instance.admin_state == constants.ADMINST_UP and
13273 not self.op.remove_instance):
13274 assert not activate_disks
13275 feedback_fn("Starting instance %s" % instance.name)
13276 result = self.rpc.call_instance_start(src_node,
13277 (instance, None, None), False)
13278 msg = result.fail_msg
13280 feedback_fn("Failed to start instance: %s" % msg)
13281 _ShutdownInstanceDisks(self, instance)
13282 raise errors.OpExecError("Could not start instance: %s" % msg)
13284 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13285 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13286 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13287 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13288 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13290 (key_name, _, _) = self.x509_key_name
13293 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13296 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13297 key_name, dest_ca_pem,
13302 # Check for backwards compatibility
13303 assert len(dresults) == len(instance.disks)
13304 assert compat.all(isinstance(i, bool) for i in dresults), \
13305 "Not all results are boolean: %r" % dresults
13309 feedback_fn("Deactivating disks for %s" % instance.name)
13310 _ShutdownInstanceDisks(self, instance)
13312 if not (compat.all(dresults) and fin_resu):
13315 failures.append("export finalization")
13316 if not compat.all(dresults):
13317 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13319 failures.append("disk export: disk(s) %s" % fdsk)
13321 raise errors.OpExecError("Export failed, errors in %s" %
13322 utils.CommaJoin(failures))
13324 # At this point, the export was successful, we can cleanup/finish
13326 # Remove instance if requested
13327 if self.op.remove_instance:
13328 feedback_fn("Removing instance %s" % instance.name)
13329 _RemoveInstance(self, feedback_fn, instance,
13330 self.op.ignore_remove_failures)
13332 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13333 self._CleanupExports(feedback_fn)
13335 return fin_resu, dresults
13338 class LUBackupRemove(NoHooksLU):
13339 """Remove exports related to the named instance.
13344 def ExpandNames(self):
13345 self.needed_locks = {}
13346 # We need all nodes to be locked in order for RemoveExport to work, but we
13347 # don't need to lock the instance itself, as nothing will happen to it (and
13348 # we can remove exports also for a removed instance)
13349 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13351 def Exec(self, feedback_fn):
13352 """Remove any export.
13355 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13356 # If the instance was not found we'll try with the name that was passed in.
13357 # This will only work if it was an FQDN, though.
13359 if not instance_name:
13361 instance_name = self.op.instance_name
13363 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13364 exportlist = self.rpc.call_export_list(locked_nodes)
13366 for node in exportlist:
13367 msg = exportlist[node].fail_msg
13369 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13371 if instance_name in exportlist[node].payload:
13373 result = self.rpc.call_export_remove(node, instance_name)
13374 msg = result.fail_msg
13376 logging.error("Could not remove export for instance %s"
13377 " on node %s: %s", instance_name, node, msg)
13379 if fqdn_warn and not found:
13380 feedback_fn("Export not found. If trying to remove an export belonging"
13381 " to a deleted instance please use its Fully Qualified"
13385 class LUGroupAdd(LogicalUnit):
13386 """Logical unit for creating node groups.
13389 HPATH = "group-add"
13390 HTYPE = constants.HTYPE_GROUP
13393 def ExpandNames(self):
13394 # We need the new group's UUID here so that we can create and acquire the
13395 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13396 # that it should not check whether the UUID exists in the configuration.
13397 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13398 self.needed_locks = {}
13399 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13401 def CheckPrereq(self):
13402 """Check prerequisites.
13404 This checks that the given group name is not an existing node group
13409 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13410 except errors.OpPrereqError:
13413 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13414 " node group (UUID: %s)" %
13415 (self.op.group_name, existing_uuid),
13416 errors.ECODE_EXISTS)
13418 if self.op.ndparams:
13419 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13421 if self.op.hv_state:
13422 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13424 self.new_hv_state = None
13426 if self.op.disk_state:
13427 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13429 self.new_disk_state = None
13431 if self.op.diskparams:
13432 for templ in constants.DISK_TEMPLATES:
13433 if templ not in self.op.diskparams:
13434 self.op.diskparams[templ] = {}
13435 utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
13437 self.op.diskparams = self.cfg.GetClusterInfo().diskparams
13439 if self.op.ipolicy:
13440 cluster = self.cfg.GetClusterInfo()
13441 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13443 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
13444 except errors.ConfigurationError, err:
13445 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13446 errors.ECODE_INVAL)
13448 def BuildHooksEnv(self):
13449 """Build hooks env.
13453 "GROUP_NAME": self.op.group_name,
13456 def BuildHooksNodes(self):
13457 """Build hooks nodes.
13460 mn = self.cfg.GetMasterNode()
13461 return ([mn], [mn])
13463 def Exec(self, feedback_fn):
13464 """Add the node group to the cluster.
13467 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13468 uuid=self.group_uuid,
13469 alloc_policy=self.op.alloc_policy,
13470 ndparams=self.op.ndparams,
13471 diskparams=self.op.diskparams,
13472 ipolicy=self.op.ipolicy,
13473 hv_state_static=self.new_hv_state,
13474 disk_state_static=self.new_disk_state)
13476 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13477 del self.remove_locks[locking.LEVEL_NODEGROUP]
13480 class LUGroupAssignNodes(NoHooksLU):
13481 """Logical unit for assigning nodes to groups.
13486 def ExpandNames(self):
13487 # These raise errors.OpPrereqError on their own:
13488 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13489 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13491 # We want to lock all the affected nodes and groups. We have readily
13492 # available the list of nodes, and the *destination* group. To gather the
13493 # list of "source" groups, we need to fetch node information later on.
13494 self.needed_locks = {
13495 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13496 locking.LEVEL_NODE: self.op.nodes,
13499 def DeclareLocks(self, level):
13500 if level == locking.LEVEL_NODEGROUP:
13501 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13503 # Try to get all affected nodes' groups without having the group or node
13504 # lock yet. Needs verification later in the code flow.
13505 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13507 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13509 def CheckPrereq(self):
13510 """Check prerequisites.
13513 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13514 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13515 frozenset(self.op.nodes))
13517 expected_locks = (set([self.group_uuid]) |
13518 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13519 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13520 if actual_locks != expected_locks:
13521 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13522 " current groups are '%s', used to be '%s'" %
13523 (utils.CommaJoin(expected_locks),
13524 utils.CommaJoin(actual_locks)))
13526 self.node_data = self.cfg.GetAllNodesInfo()
13527 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13528 instance_data = self.cfg.GetAllInstancesInfo()
13530 if self.group is None:
13531 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13532 (self.op.group_name, self.group_uuid))
13534 (new_splits, previous_splits) = \
13535 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13536 for node in self.op.nodes],
13537 self.node_data, instance_data)
13540 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13542 if not self.op.force:
13543 raise errors.OpExecError("The following instances get split by this"
13544 " change and --force was not given: %s" %
13547 self.LogWarning("This operation will split the following instances: %s",
13550 if previous_splits:
13551 self.LogWarning("In addition, these already-split instances continue"
13552 " to be split across groups: %s",
13553 utils.CommaJoin(utils.NiceSort(previous_splits)))
13555 def Exec(self, feedback_fn):
13556 """Assign nodes to a new group.
13559 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13561 self.cfg.AssignGroupNodes(mods)
13564 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13565 """Check for split instances after a node assignment.
13567 This method considers a series of node assignments as an atomic operation,
13568 and returns information about split instances after applying the set of
13571 In particular, it returns information about newly split instances, and
13572 instances that were already split, and remain so after the change.
13574 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13577 @type changes: list of (node_name, new_group_uuid) pairs.
13578 @param changes: list of node assignments to consider.
13579 @param node_data: a dict with data for all nodes
13580 @param instance_data: a dict with all instances to consider
13581 @rtype: a two-tuple
13582 @return: a list of instances that were previously okay and result split as a
13583 consequence of this change, and a list of instances that were previously
13584 split and this change does not fix.
13587 changed_nodes = dict((node, group) for node, group in changes
13588 if node_data[node].group != group)
13590 all_split_instances = set()
13591 previously_split_instances = set()
13593 def InstanceNodes(instance):
13594 return [instance.primary_node] + list(instance.secondary_nodes)
13596 for inst in instance_data.values():
13597 if inst.disk_template not in constants.DTS_INT_MIRROR:
13600 instance_nodes = InstanceNodes(inst)
13602 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13603 previously_split_instances.add(inst.name)
13605 if len(set(changed_nodes.get(node, node_data[node].group)
13606 for node in instance_nodes)) > 1:
13607 all_split_instances.add(inst.name)
13609 return (list(all_split_instances - previously_split_instances),
13610 list(previously_split_instances & all_split_instances))
13613 class _GroupQuery(_QueryBase):
13614 FIELDS = query.GROUP_FIELDS
13616 def ExpandNames(self, lu):
13617 lu.needed_locks = {}
13619 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13620 self._cluster = lu.cfg.GetClusterInfo()
13621 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13624 self.wanted = [name_to_uuid[name]
13625 for name in utils.NiceSort(name_to_uuid.keys())]
13627 # Accept names to be either names or UUIDs.
13630 all_uuid = frozenset(self._all_groups.keys())
13632 for name in self.names:
13633 if name in all_uuid:
13634 self.wanted.append(name)
13635 elif name in name_to_uuid:
13636 self.wanted.append(name_to_uuid[name])
13638 missing.append(name)
13641 raise errors.OpPrereqError("Some groups do not exist: %s" %
13642 utils.CommaJoin(missing),
13643 errors.ECODE_NOENT)
13645 def DeclareLocks(self, lu, level):
13648 def _GetQueryData(self, lu):
13649 """Computes the list of node groups and their attributes.
13652 do_nodes = query.GQ_NODE in self.requested_data
13653 do_instances = query.GQ_INST in self.requested_data
13655 group_to_nodes = None
13656 group_to_instances = None
13658 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13659 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13660 # latter GetAllInstancesInfo() is not enough, for we have to go through
13661 # instance->node. Hence, we will need to process nodes even if we only need
13662 # instance information.
13663 if do_nodes or do_instances:
13664 all_nodes = lu.cfg.GetAllNodesInfo()
13665 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13668 for node in all_nodes.values():
13669 if node.group in group_to_nodes:
13670 group_to_nodes[node.group].append(node.name)
13671 node_to_group[node.name] = node.group
13674 all_instances = lu.cfg.GetAllInstancesInfo()
13675 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13677 for instance in all_instances.values():
13678 node = instance.primary_node
13679 if node in node_to_group:
13680 group_to_instances[node_to_group[node]].append(instance.name)
13683 # Do not pass on node information if it was not requested.
13684 group_to_nodes = None
13686 return query.GroupQueryData(self._cluster,
13687 [self._all_groups[uuid]
13688 for uuid in self.wanted],
13689 group_to_nodes, group_to_instances)
13692 class LUGroupQuery(NoHooksLU):
13693 """Logical unit for querying node groups.
13698 def CheckArguments(self):
13699 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13700 self.op.output_fields, False)
13702 def ExpandNames(self):
13703 self.gq.ExpandNames(self)
13705 def DeclareLocks(self, level):
13706 self.gq.DeclareLocks(self, level)
13708 def Exec(self, feedback_fn):
13709 return self.gq.OldStyleQuery(self)
13712 class LUGroupSetParams(LogicalUnit):
13713 """Modifies the parameters of a node group.
13716 HPATH = "group-modify"
13717 HTYPE = constants.HTYPE_GROUP
13720 def CheckArguments(self):
13723 self.op.diskparams,
13724 self.op.alloc_policy,
13726 self.op.disk_state,
13730 if all_changes.count(None) == len(all_changes):
13731 raise errors.OpPrereqError("Please pass at least one modification",
13732 errors.ECODE_INVAL)
13734 def ExpandNames(self):
13735 # This raises errors.OpPrereqError on its own:
13736 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13738 self.needed_locks = {
13739 locking.LEVEL_INSTANCE: [],
13740 locking.LEVEL_NODEGROUP: [self.group_uuid],
13743 self.share_locks[locking.LEVEL_INSTANCE] = 1
13745 def DeclareLocks(self, level):
13746 if level == locking.LEVEL_INSTANCE:
13747 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13749 # Lock instances optimistically, needs verification once group lock has
13751 self.needed_locks[locking.LEVEL_INSTANCE] = \
13752 self.cfg.GetNodeGroupInstances(self.group_uuid)
13754 def CheckPrereq(self):
13755 """Check prerequisites.
13758 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13760 # Check if locked instances are still correct
13761 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13763 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13764 cluster = self.cfg.GetClusterInfo()
13766 if self.group is None:
13767 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13768 (self.op.group_name, self.group_uuid))
13770 if self.op.ndparams:
13771 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13772 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13773 self.new_ndparams = new_ndparams
13775 if self.op.diskparams:
13776 self.new_diskparams = dict()
13777 for templ in constants.DISK_TEMPLATES:
13778 if templ not in self.op.diskparams:
13779 self.op.diskparams[templ] = {}
13780 new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13781 self.op.diskparams[templ])
13782 utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13783 self.new_diskparams[templ] = new_templ_params
13785 if self.op.hv_state:
13786 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13787 self.group.hv_state_static)
13789 if self.op.disk_state:
13790 self.new_disk_state = \
13791 _MergeAndVerifyDiskState(self.op.disk_state,
13792 self.group.disk_state_static)
13794 if self.op.ipolicy:
13795 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
13799 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
13800 inst_filter = lambda inst: inst.name in owned_instances
13801 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
13803 _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
13805 new_ipolicy, instances)
13808 self.LogWarning("After the ipolicy change the following instances"
13809 " violate them: %s",
13810 utils.CommaJoin(violations))
13812 def BuildHooksEnv(self):
13813 """Build hooks env.
13817 "GROUP_NAME": self.op.group_name,
13818 "NEW_ALLOC_POLICY": self.op.alloc_policy,
13821 def BuildHooksNodes(self):
13822 """Build hooks nodes.
13825 mn = self.cfg.GetMasterNode()
13826 return ([mn], [mn])
13828 def Exec(self, feedback_fn):
13829 """Modifies the node group.
13834 if self.op.ndparams:
13835 self.group.ndparams = self.new_ndparams
13836 result.append(("ndparams", str(self.group.ndparams)))
13838 if self.op.diskparams:
13839 self.group.diskparams = self.new_diskparams
13840 result.append(("diskparams", str(self.group.diskparams)))
13842 if self.op.alloc_policy:
13843 self.group.alloc_policy = self.op.alloc_policy
13845 if self.op.hv_state:
13846 self.group.hv_state_static = self.new_hv_state
13848 if self.op.disk_state:
13849 self.group.disk_state_static = self.new_disk_state
13851 if self.op.ipolicy:
13852 self.group.ipolicy = self.new_ipolicy
13854 self.cfg.Update(self.group, feedback_fn)
13858 class LUGroupRemove(LogicalUnit):
13859 HPATH = "group-remove"
13860 HTYPE = constants.HTYPE_GROUP
13863 def ExpandNames(self):
13864 # This will raises errors.OpPrereqError on its own:
13865 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13866 self.needed_locks = {
13867 locking.LEVEL_NODEGROUP: [self.group_uuid],
13870 def CheckPrereq(self):
13871 """Check prerequisites.
13873 This checks that the given group name exists as a node group, that is
13874 empty (i.e., contains no nodes), and that is not the last group of the
13878 # Verify that the group is empty.
13879 group_nodes = [node.name
13880 for node in self.cfg.GetAllNodesInfo().values()
13881 if node.group == self.group_uuid]
13884 raise errors.OpPrereqError("Group '%s' not empty, has the following"
13886 (self.op.group_name,
13887 utils.CommaJoin(utils.NiceSort(group_nodes))),
13888 errors.ECODE_STATE)
13890 # Verify the cluster would not be left group-less.
13891 if len(self.cfg.GetNodeGroupList()) == 1:
13892 raise errors.OpPrereqError("Group '%s' is the only group,"
13893 " cannot be removed" %
13894 self.op.group_name,
13895 errors.ECODE_STATE)
13897 def BuildHooksEnv(self):
13898 """Build hooks env.
13902 "GROUP_NAME": self.op.group_name,
13905 def BuildHooksNodes(self):
13906 """Build hooks nodes.
13909 mn = self.cfg.GetMasterNode()
13910 return ([mn], [mn])
13912 def Exec(self, feedback_fn):
13913 """Remove the node group.
13917 self.cfg.RemoveNodeGroup(self.group_uuid)
13918 except errors.ConfigurationError:
13919 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13920 (self.op.group_name, self.group_uuid))
13922 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13925 class LUGroupRename(LogicalUnit):
13926 HPATH = "group-rename"
13927 HTYPE = constants.HTYPE_GROUP
13930 def ExpandNames(self):
13931 # This raises errors.OpPrereqError on its own:
13932 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13934 self.needed_locks = {
13935 locking.LEVEL_NODEGROUP: [self.group_uuid],
13938 def CheckPrereq(self):
13939 """Check prerequisites.
13941 Ensures requested new name is not yet used.
13945 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13946 except errors.OpPrereqError:
13949 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13950 " node group (UUID: %s)" %
13951 (self.op.new_name, new_name_uuid),
13952 errors.ECODE_EXISTS)
13954 def BuildHooksEnv(self):
13955 """Build hooks env.
13959 "OLD_NAME": self.op.group_name,
13960 "NEW_NAME": self.op.new_name,
13963 def BuildHooksNodes(self):
13964 """Build hooks nodes.
13967 mn = self.cfg.GetMasterNode()
13969 all_nodes = self.cfg.GetAllNodesInfo()
13970 all_nodes.pop(mn, None)
13973 run_nodes.extend(node.name for node in all_nodes.values()
13974 if node.group == self.group_uuid)
13976 return (run_nodes, run_nodes)
13978 def Exec(self, feedback_fn):
13979 """Rename the node group.
13982 group = self.cfg.GetNodeGroup(self.group_uuid)
13985 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13986 (self.op.group_name, self.group_uuid))
13988 group.name = self.op.new_name
13989 self.cfg.Update(group, feedback_fn)
13991 return self.op.new_name
13994 class LUGroupEvacuate(LogicalUnit):
13995 HPATH = "group-evacuate"
13996 HTYPE = constants.HTYPE_GROUP
13999 def ExpandNames(self):
14000 # This raises errors.OpPrereqError on its own:
14001 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14003 if self.op.target_groups:
14004 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14005 self.op.target_groups)
14007 self.req_target_uuids = []
14009 if self.group_uuid in self.req_target_uuids:
14010 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14011 " as a target group (targets are %s)" %
14013 utils.CommaJoin(self.req_target_uuids)),
14014 errors.ECODE_INVAL)
14016 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14018 self.share_locks = _ShareAll()
14019 self.needed_locks = {
14020 locking.LEVEL_INSTANCE: [],
14021 locking.LEVEL_NODEGROUP: [],
14022 locking.LEVEL_NODE: [],
14025 def DeclareLocks(self, level):
14026 if level == locking.LEVEL_INSTANCE:
14027 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14029 # Lock instances optimistically, needs verification once node and group
14030 # locks have been acquired
14031 self.needed_locks[locking.LEVEL_INSTANCE] = \
14032 self.cfg.GetNodeGroupInstances(self.group_uuid)
14034 elif level == locking.LEVEL_NODEGROUP:
14035 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14037 if self.req_target_uuids:
14038 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14040 # Lock all groups used by instances optimistically; this requires going
14041 # via the node before it's locked, requiring verification later on
14042 lock_groups.update(group_uuid
14043 for instance_name in
14044 self.owned_locks(locking.LEVEL_INSTANCE)
14046 self.cfg.GetInstanceNodeGroups(instance_name))
14048 # No target groups, need to lock all of them
14049 lock_groups = locking.ALL_SET
14051 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14053 elif level == locking.LEVEL_NODE:
14054 # This will only lock the nodes in the group to be evacuated which
14055 # contain actual instances
14056 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14057 self._LockInstancesNodes()
14059 # Lock all nodes in group to be evacuated and target groups
14060 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14061 assert self.group_uuid in owned_groups
14062 member_nodes = [node_name
14063 for group in owned_groups
14064 for node_name in self.cfg.GetNodeGroup(group).members]
14065 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14067 def CheckPrereq(self):
14068 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14069 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14070 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14072 assert owned_groups.issuperset(self.req_target_uuids)
14073 assert self.group_uuid in owned_groups
14075 # Check if locked instances are still correct
14076 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14078 # Get instance information
14079 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14081 # Check if node groups for locked instances are still correct
14082 for instance_name in owned_instances:
14083 inst = self.instances[instance_name]
14084 assert owned_nodes.issuperset(inst.all_nodes), \
14085 "Instance %s's nodes changed while we kept the lock" % instance_name
14087 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
14090 assert self.group_uuid in inst_groups, \
14091 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
14093 if self.req_target_uuids:
14094 # User requested specific target groups
14095 self.target_uuids = self.req_target_uuids
14097 # All groups except the one to be evacuated are potential targets
14098 self.target_uuids = [group_uuid for group_uuid in owned_groups
14099 if group_uuid != self.group_uuid]
14101 if not self.target_uuids:
14102 raise errors.OpPrereqError("There are no possible target groups",
14103 errors.ECODE_INVAL)
14105 def BuildHooksEnv(self):
14106 """Build hooks env.
14110 "GROUP_NAME": self.op.group_name,
14111 "TARGET_GROUPS": " ".join(self.target_uuids),
14114 def BuildHooksNodes(self):
14115 """Build hooks nodes.
14118 mn = self.cfg.GetMasterNode()
14120 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14122 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14124 return (run_nodes, run_nodes)
14126 def Exec(self, feedback_fn):
14127 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14129 assert self.group_uuid not in self.target_uuids
14131 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14132 instances=instances, target_groups=self.target_uuids)
14134 ial.Run(self.op.iallocator)
14136 if not ial.success:
14137 raise errors.OpPrereqError("Can't compute group evacuation using"
14138 " iallocator '%s': %s" %
14139 (self.op.iallocator, ial.info),
14140 errors.ECODE_NORES)
14142 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14144 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14145 len(jobs), self.op.group_name)
14147 return ResultWithJobs(jobs)
14150 class TagsLU(NoHooksLU): # pylint: disable=W0223
14151 """Generic tags LU.
14153 This is an abstract class which is the parent of all the other tags LUs.
14156 def ExpandNames(self):
14157 self.group_uuid = None
14158 self.needed_locks = {}
14159 if self.op.kind == constants.TAG_NODE:
14160 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14161 self.needed_locks[locking.LEVEL_NODE] = self.op.name
14162 elif self.op.kind == constants.TAG_INSTANCE:
14163 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14164 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
14165 elif self.op.kind == constants.TAG_NODEGROUP:
14166 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14168 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14169 # not possible to acquire the BGL based on opcode parameters)
14171 def CheckPrereq(self):
14172 """Check prerequisites.
14175 if self.op.kind == constants.TAG_CLUSTER:
14176 self.target = self.cfg.GetClusterInfo()
14177 elif self.op.kind == constants.TAG_NODE:
14178 self.target = self.cfg.GetNodeInfo(self.op.name)
14179 elif self.op.kind == constants.TAG_INSTANCE:
14180 self.target = self.cfg.GetInstanceInfo(self.op.name)
14181 elif self.op.kind == constants.TAG_NODEGROUP:
14182 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14184 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14185 str(self.op.kind), errors.ECODE_INVAL)
14188 class LUTagsGet(TagsLU):
14189 """Returns the tags of a given object.
14194 def ExpandNames(self):
14195 TagsLU.ExpandNames(self)
14197 # Share locks as this is only a read operation
14198 self.share_locks = _ShareAll()
14200 def Exec(self, feedback_fn):
14201 """Returns the tag list.
14204 return list(self.target.GetTags())
14207 class LUTagsSearch(NoHooksLU):
14208 """Searches the tags for a given pattern.
14213 def ExpandNames(self):
14214 self.needed_locks = {}
14216 def CheckPrereq(self):
14217 """Check prerequisites.
14219 This checks the pattern passed for validity by compiling it.
14223 self.re = re.compile(self.op.pattern)
14224 except re.error, err:
14225 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14226 (self.op.pattern, err), errors.ECODE_INVAL)
14228 def Exec(self, feedback_fn):
14229 """Returns the tag list.
14233 tgts = [("/cluster", cfg.GetClusterInfo())]
14234 ilist = cfg.GetAllInstancesInfo().values()
14235 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14236 nlist = cfg.GetAllNodesInfo().values()
14237 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14238 tgts.extend(("/nodegroup/%s" % n.name, n)
14239 for n in cfg.GetAllNodeGroupsInfo().values())
14241 for path, target in tgts:
14242 for tag in target.GetTags():
14243 if self.re.search(tag):
14244 results.append((path, tag))
14248 class LUTagsSet(TagsLU):
14249 """Sets a tag on a given object.
14254 def CheckPrereq(self):
14255 """Check prerequisites.
14257 This checks the type and length of the tag name and value.
14260 TagsLU.CheckPrereq(self)
14261 for tag in self.op.tags:
14262 objects.TaggableObject.ValidateTag(tag)
14264 def Exec(self, feedback_fn):
14269 for tag in self.op.tags:
14270 self.target.AddTag(tag)
14271 except errors.TagError, err:
14272 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14273 self.cfg.Update(self.target, feedback_fn)
14276 class LUTagsDel(TagsLU):
14277 """Delete a list of tags from a given object.
14282 def CheckPrereq(self):
14283 """Check prerequisites.
14285 This checks that we have the given tag.
14288 TagsLU.CheckPrereq(self)
14289 for tag in self.op.tags:
14290 objects.TaggableObject.ValidateTag(tag)
14291 del_tags = frozenset(self.op.tags)
14292 cur_tags = self.target.GetTags()
14294 diff_tags = del_tags - cur_tags
14296 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14297 raise errors.OpPrereqError("Tag(s) %s not found" %
14298 (utils.CommaJoin(diff_names), ),
14299 errors.ECODE_NOENT)
14301 def Exec(self, feedback_fn):
14302 """Remove the tag from the object.
14305 for tag in self.op.tags:
14306 self.target.RemoveTag(tag)
14307 self.cfg.Update(self.target, feedback_fn)
14310 class LUTestDelay(NoHooksLU):
14311 """Sleep for a specified amount of time.
14313 This LU sleeps on the master and/or nodes for a specified amount of
14319 def ExpandNames(self):
14320 """Expand names and set required locks.
14322 This expands the node list, if any.
14325 self.needed_locks = {}
14326 if self.op.on_nodes:
14327 # _GetWantedNodes can be used here, but is not always appropriate to use
14328 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14329 # more information.
14330 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14331 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14333 def _TestDelay(self):
14334 """Do the actual sleep.
14337 if self.op.on_master:
14338 if not utils.TestDelay(self.op.duration):
14339 raise errors.OpExecError("Error during master delay test")
14340 if self.op.on_nodes:
14341 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14342 for node, node_result in result.items():
14343 node_result.Raise("Failure during rpc call to node %s" % node)
14345 def Exec(self, feedback_fn):
14346 """Execute the test delay opcode, with the wanted repetitions.
14349 if self.op.repeat == 0:
14352 top_value = self.op.repeat - 1
14353 for i in range(self.op.repeat):
14354 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14358 class LUTestJqueue(NoHooksLU):
14359 """Utility LU to test some aspects of the job queue.
14364 # Must be lower than default timeout for WaitForJobChange to see whether it
14365 # notices changed jobs
14366 _CLIENT_CONNECT_TIMEOUT = 20.0
14367 _CLIENT_CONFIRM_TIMEOUT = 60.0
14370 def _NotifyUsingSocket(cls, cb, errcls):
14371 """Opens a Unix socket and waits for another program to connect.
14374 @param cb: Callback to send socket name to client
14375 @type errcls: class
14376 @param errcls: Exception class to use for errors
14379 # Using a temporary directory as there's no easy way to create temporary
14380 # sockets without writing a custom loop around tempfile.mktemp and
14382 tmpdir = tempfile.mkdtemp()
14384 tmpsock = utils.PathJoin(tmpdir, "sock")
14386 logging.debug("Creating temporary socket at %s", tmpsock)
14387 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14392 # Send details to client
14395 # Wait for client to connect before continuing
14396 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14398 (conn, _) = sock.accept()
14399 except socket.error, err:
14400 raise errcls("Client didn't connect in time (%s)" % err)
14404 # Remove as soon as client is connected
14405 shutil.rmtree(tmpdir)
14407 # Wait for client to close
14410 # pylint: disable=E1101
14411 # Instance of '_socketobject' has no ... member
14412 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14414 except socket.error, err:
14415 raise errcls("Client failed to confirm notification (%s)" % err)
14419 def _SendNotification(self, test, arg, sockname):
14420 """Sends a notification to the client.
14423 @param test: Test name
14424 @param arg: Test argument (depends on test)
14425 @type sockname: string
14426 @param sockname: Socket path
14429 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14431 def _Notify(self, prereq, test, arg):
14432 """Notifies the client of a test.
14435 @param prereq: Whether this is a prereq-phase test
14437 @param test: Test name
14438 @param arg: Test argument (depends on test)
14442 errcls = errors.OpPrereqError
14444 errcls = errors.OpExecError
14446 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14450 def CheckArguments(self):
14451 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14452 self.expandnames_calls = 0
14454 def ExpandNames(self):
14455 checkargs_calls = getattr(self, "checkargs_calls", 0)
14456 if checkargs_calls < 1:
14457 raise errors.ProgrammerError("CheckArguments was not called")
14459 self.expandnames_calls += 1
14461 if self.op.notify_waitlock:
14462 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14464 self.LogInfo("Expanding names")
14466 # Get lock on master node (just to get a lock, not for a particular reason)
14467 self.needed_locks = {
14468 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14471 def Exec(self, feedback_fn):
14472 if self.expandnames_calls < 1:
14473 raise errors.ProgrammerError("ExpandNames was not called")
14475 if self.op.notify_exec:
14476 self._Notify(False, constants.JQT_EXEC, None)
14478 self.LogInfo("Executing")
14480 if self.op.log_messages:
14481 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14482 for idx, msg in enumerate(self.op.log_messages):
14483 self.LogInfo("Sending log message %s", idx + 1)
14484 feedback_fn(constants.JQT_MSGPREFIX + msg)
14485 # Report how many test messages have been sent
14486 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14489 raise errors.OpExecError("Opcode failure was requested")
14494 class IAllocator(object):
14495 """IAllocator framework.
14497 An IAllocator instance has three sets of attributes:
14498 - cfg that is needed to query the cluster
14499 - input data (all members of the _KEYS class attribute are required)
14500 - four buffer attributes (in|out_data|text), that represent the
14501 input (to the external script) in text and data structure format,
14502 and the output from it, again in two formats
14503 - the result variables from the script (success, info, nodes) for
14507 # pylint: disable=R0902
14508 # lots of instance attributes
14510 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14512 self.rpc = rpc_runner
14513 # init buffer variables
14514 self.in_text = self.out_text = self.in_data = self.out_data = None
14515 # init all input fields so that pylint is happy
14517 self.memory = self.disks = self.disk_template = self.spindle_use = None
14518 self.os = self.tags = self.nics = self.vcpus = None
14519 self.hypervisor = None
14520 self.relocate_from = None
14522 self.instances = None
14523 self.evac_mode = None
14524 self.target_groups = []
14526 self.required_nodes = None
14527 # init result fields
14528 self.success = self.info = self.result = None
14531 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14533 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14534 " IAllocator" % self.mode)
14536 keyset = [n for (n, _) in keydata]
14539 if key not in keyset:
14540 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14541 " IAllocator" % key)
14542 setattr(self, key, kwargs[key])
14545 if key not in kwargs:
14546 raise errors.ProgrammerError("Missing input parameter '%s' to"
14547 " IAllocator" % key)
14548 self._BuildInputData(compat.partial(fn, self), keydata)
14550 def _ComputeClusterData(self):
14551 """Compute the generic allocator input data.
14553 This is the data that is independent of the actual operation.
14557 cluster_info = cfg.GetClusterInfo()
14560 "version": constants.IALLOCATOR_VERSION,
14561 "cluster_name": cfg.GetClusterName(),
14562 "cluster_tags": list(cluster_info.GetTags()),
14563 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14564 "ipolicy": cluster_info.ipolicy,
14566 ninfo = cfg.GetAllNodesInfo()
14567 iinfo = cfg.GetAllInstancesInfo().values()
14568 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14571 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14573 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14574 hypervisor_name = self.hypervisor
14575 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14576 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14578 hypervisor_name = cluster_info.primary_hypervisor
14580 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14583 self.rpc.call_all_instances_info(node_list,
14584 cluster_info.enabled_hypervisors)
14586 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14588 config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14589 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14590 i_list, config_ndata)
14591 assert len(data["nodes"]) == len(ninfo), \
14592 "Incomplete node data computed"
14594 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14596 self.in_data = data
14599 def _ComputeNodeGroupData(cfg):
14600 """Compute node groups data.
14603 cluster = cfg.GetClusterInfo()
14604 ng = dict((guuid, {
14605 "name": gdata.name,
14606 "alloc_policy": gdata.alloc_policy,
14607 "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14609 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14614 def _ComputeBasicNodeData(cfg, node_cfg):
14615 """Compute global node data.
14618 @returns: a dict of name: (node dict, node config)
14621 # fill in static (config-based) values
14622 node_results = dict((ninfo.name, {
14623 "tags": list(ninfo.GetTags()),
14624 "primary_ip": ninfo.primary_ip,
14625 "secondary_ip": ninfo.secondary_ip,
14626 "offline": ninfo.offline,
14627 "drained": ninfo.drained,
14628 "master_candidate": ninfo.master_candidate,
14629 "group": ninfo.group,
14630 "master_capable": ninfo.master_capable,
14631 "vm_capable": ninfo.vm_capable,
14632 "ndparams": cfg.GetNdParams(ninfo),
14634 for ninfo in node_cfg.values())
14636 return node_results
14639 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14641 """Compute global node data.
14643 @param node_results: the basic node structures as filled from the config
14646 #TODO(dynmem): compute the right data on MAX and MIN memory
14647 # make a copy of the current dict
14648 node_results = dict(node_results)
14649 for nname, nresult in node_data.items():
14650 assert nname in node_results, "Missing basic data for node %s" % nname
14651 ninfo = node_cfg[nname]
14653 if not (ninfo.offline or ninfo.drained):
14654 nresult.Raise("Can't get data for node %s" % nname)
14655 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14657 remote_info = _MakeLegacyNodeInfo(nresult.payload)
14659 for attr in ["memory_total", "memory_free", "memory_dom0",
14660 "vg_size", "vg_free", "cpu_total"]:
14661 if attr not in remote_info:
14662 raise errors.OpExecError("Node '%s' didn't return attribute"
14663 " '%s'" % (nname, attr))
14664 if not isinstance(remote_info[attr], int):
14665 raise errors.OpExecError("Node '%s' returned invalid value"
14667 (nname, attr, remote_info[attr]))
14668 # compute memory used by primary instances
14669 i_p_mem = i_p_up_mem = 0
14670 for iinfo, beinfo in i_list:
14671 if iinfo.primary_node == nname:
14672 i_p_mem += beinfo[constants.BE_MAXMEM]
14673 if iinfo.name not in node_iinfo[nname].payload:
14676 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14677 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14678 remote_info["memory_free"] -= max(0, i_mem_diff)
14680 if iinfo.admin_state == constants.ADMINST_UP:
14681 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14683 # compute memory used by instances
14685 "total_memory": remote_info["memory_total"],
14686 "reserved_memory": remote_info["memory_dom0"],
14687 "free_memory": remote_info["memory_free"],
14688 "total_disk": remote_info["vg_size"],
14689 "free_disk": remote_info["vg_free"],
14690 "total_cpus": remote_info["cpu_total"],
14691 "i_pri_memory": i_p_mem,
14692 "i_pri_up_memory": i_p_up_mem,
14694 pnr_dyn.update(node_results[nname])
14695 node_results[nname] = pnr_dyn
14697 return node_results
14700 def _ComputeInstanceData(cluster_info, i_list):
14701 """Compute global instance data.
14705 for iinfo, beinfo in i_list:
14707 for nic in iinfo.nics:
14708 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14712 "mode": filled_params[constants.NIC_MODE],
14713 "link": filled_params[constants.NIC_LINK],
14715 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14716 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14717 nic_data.append(nic_dict)
14719 "tags": list(iinfo.GetTags()),
14720 "admin_state": iinfo.admin_state,
14721 "vcpus": beinfo[constants.BE_VCPUS],
14722 "memory": beinfo[constants.BE_MAXMEM],
14723 "spindle_use": beinfo[constants.BE_SPINDLE_USE],
14725 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14727 "disks": [{constants.IDISK_SIZE: dsk.size,
14728 constants.IDISK_MODE: dsk.mode}
14729 for dsk in iinfo.disks],
14730 "disk_template": iinfo.disk_template,
14731 "hypervisor": iinfo.hypervisor,
14733 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14735 instance_data[iinfo.name] = pir
14737 return instance_data
14739 def _AddNewInstance(self):
14740 """Add new instance data to allocator structure.
14742 This in combination with _AllocatorGetClusterData will create the
14743 correct structure needed as input for the allocator.
14745 The checks for the completeness of the opcode must have already been
14749 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14751 if self.disk_template in constants.DTS_INT_MIRROR:
14752 self.required_nodes = 2
14754 self.required_nodes = 1
14758 "disk_template": self.disk_template,
14761 "vcpus": self.vcpus,
14762 "memory": self.memory,
14763 "spindle_use": self.spindle_use,
14764 "disks": self.disks,
14765 "disk_space_total": disk_space,
14767 "required_nodes": self.required_nodes,
14768 "hypervisor": self.hypervisor,
14773 def _AddRelocateInstance(self):
14774 """Add relocate instance data to allocator structure.
14776 This in combination with _IAllocatorGetClusterData will create the
14777 correct structure needed as input for the allocator.
14779 The checks for the completeness of the opcode must have already been
14783 instance = self.cfg.GetInstanceInfo(self.name)
14784 if instance is None:
14785 raise errors.ProgrammerError("Unknown instance '%s' passed to"
14786 " IAllocator" % self.name)
14788 if instance.disk_template not in constants.DTS_MIRRORED:
14789 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14790 errors.ECODE_INVAL)
14792 if instance.disk_template in constants.DTS_INT_MIRROR and \
14793 len(instance.secondary_nodes) != 1:
14794 raise errors.OpPrereqError("Instance has not exactly one secondary node",
14795 errors.ECODE_STATE)
14797 self.required_nodes = 1
14798 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14799 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14803 "disk_space_total": disk_space,
14804 "required_nodes": self.required_nodes,
14805 "relocate_from": self.relocate_from,
14809 def _AddNodeEvacuate(self):
14810 """Get data for node-evacuate requests.
14814 "instances": self.instances,
14815 "evac_mode": self.evac_mode,
14818 def _AddChangeGroup(self):
14819 """Get data for node-evacuate requests.
14823 "instances": self.instances,
14824 "target_groups": self.target_groups,
14827 def _BuildInputData(self, fn, keydata):
14828 """Build input data structures.
14831 self._ComputeClusterData()
14834 request["type"] = self.mode
14835 for keyname, keytype in keydata:
14836 if keyname not in request:
14837 raise errors.ProgrammerError("Request parameter %s is missing" %
14839 val = request[keyname]
14840 if not keytype(val):
14841 raise errors.ProgrammerError("Request parameter %s doesn't pass"
14842 " validation, value %s, expected"
14843 " type %s" % (keyname, val, keytype))
14844 self.in_data["request"] = request
14846 self.in_text = serializer.Dump(self.in_data)
14848 _STRING_LIST = ht.TListOf(ht.TString)
14849 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14850 # pylint: disable=E1101
14851 # Class '...' has no 'OP_ID' member
14852 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14853 opcodes.OpInstanceMigrate.OP_ID,
14854 opcodes.OpInstanceReplaceDisks.OP_ID])
14858 ht.TListOf(ht.TAnd(ht.TIsLength(3),
14859 ht.TItems([ht.TNonEmptyString,
14860 ht.TNonEmptyString,
14861 ht.TListOf(ht.TNonEmptyString),
14864 ht.TListOf(ht.TAnd(ht.TIsLength(2),
14865 ht.TItems([ht.TNonEmptyString,
14868 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14869 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14872 constants.IALLOCATOR_MODE_ALLOC:
14875 ("name", ht.TString),
14876 ("memory", ht.TInt),
14877 ("spindle_use", ht.TInt),
14878 ("disks", ht.TListOf(ht.TDict)),
14879 ("disk_template", ht.TString),
14880 ("os", ht.TString),
14881 ("tags", _STRING_LIST),
14882 ("nics", ht.TListOf(ht.TDict)),
14883 ("vcpus", ht.TInt),
14884 ("hypervisor", ht.TString),
14886 constants.IALLOCATOR_MODE_RELOC:
14887 (_AddRelocateInstance,
14888 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14890 constants.IALLOCATOR_MODE_NODE_EVAC:
14891 (_AddNodeEvacuate, [
14892 ("instances", _STRING_LIST),
14893 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14895 constants.IALLOCATOR_MODE_CHG_GROUP:
14896 (_AddChangeGroup, [
14897 ("instances", _STRING_LIST),
14898 ("target_groups", _STRING_LIST),
14902 def Run(self, name, validate=True, call_fn=None):
14903 """Run an instance allocator and return the results.
14906 if call_fn is None:
14907 call_fn = self.rpc.call_iallocator_runner
14909 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14910 result.Raise("Failure while running the iallocator script")
14912 self.out_text = result.payload
14914 self._ValidateResult()
14916 def _ValidateResult(self):
14917 """Process the allocator results.
14919 This will process and if successful save the result in
14920 self.out_data and the other parameters.
14924 rdict = serializer.Load(self.out_text)
14925 except Exception, err:
14926 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14928 if not isinstance(rdict, dict):
14929 raise errors.OpExecError("Can't parse iallocator results: not a dict")
14931 # TODO: remove backwards compatiblity in later versions
14932 if "nodes" in rdict and "result" not in rdict:
14933 rdict["result"] = rdict["nodes"]
14936 for key in "success", "info", "result":
14937 if key not in rdict:
14938 raise errors.OpExecError("Can't parse iallocator results:"
14939 " missing key '%s'" % key)
14940 setattr(self, key, rdict[key])
14942 if not self._result_check(self.result):
14943 raise errors.OpExecError("Iallocator returned invalid result,"
14944 " expected %s, got %s" %
14945 (self._result_check, self.result),
14946 errors.ECODE_INVAL)
14948 if self.mode == constants.IALLOCATOR_MODE_RELOC:
14949 assert self.relocate_from is not None
14950 assert self.required_nodes == 1
14952 node2group = dict((name, ndata["group"])
14953 for (name, ndata) in self.in_data["nodes"].items())
14955 fn = compat.partial(self._NodesToGroups, node2group,
14956 self.in_data["nodegroups"])
14958 instance = self.cfg.GetInstanceInfo(self.name)
14959 request_groups = fn(self.relocate_from + [instance.primary_node])
14960 result_groups = fn(rdict["result"] + [instance.primary_node])
14962 if self.success and not set(result_groups).issubset(request_groups):
14963 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14964 " differ from original groups (%s)" %
14965 (utils.CommaJoin(result_groups),
14966 utils.CommaJoin(request_groups)))
14968 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14969 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14971 self.out_data = rdict
14974 def _NodesToGroups(node2group, groups, nodes):
14975 """Returns a list of unique group names for a list of nodes.
14977 @type node2group: dict
14978 @param node2group: Map from node name to group UUID
14980 @param groups: Group information
14982 @param nodes: Node names
14989 group_uuid = node2group[node]
14991 # Ignore unknown node
14995 group = groups[group_uuid]
14997 # Can't find group, let's use UUID
14998 group_name = group_uuid
15000 group_name = group["name"]
15002 result.add(group_name)
15004 return sorted(result)
15007 class LUTestAllocator(NoHooksLU):
15008 """Run allocator tests.
15010 This LU runs the allocator tests
15013 def CheckPrereq(self):
15014 """Check prerequisites.
15016 This checks the opcode parameters depending on the director and mode test.
15019 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15020 for attr in ["memory", "disks", "disk_template",
15021 "os", "tags", "nics", "vcpus"]:
15022 if not hasattr(self.op, attr):
15023 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15024 attr, errors.ECODE_INVAL)
15025 iname = self.cfg.ExpandInstanceName(self.op.name)
15026 if iname is not None:
15027 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15028 iname, errors.ECODE_EXISTS)
15029 if not isinstance(self.op.nics, list):
15030 raise errors.OpPrereqError("Invalid parameter 'nics'",
15031 errors.ECODE_INVAL)
15032 if not isinstance(self.op.disks, list):
15033 raise errors.OpPrereqError("Invalid parameter 'disks'",
15034 errors.ECODE_INVAL)
15035 for row in self.op.disks:
15036 if (not isinstance(row, dict) or
15037 constants.IDISK_SIZE not in row or
15038 not isinstance(row[constants.IDISK_SIZE], int) or
15039 constants.IDISK_MODE not in row or
15040 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15041 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15042 " parameter", errors.ECODE_INVAL)
15043 if self.op.hypervisor is None:
15044 self.op.hypervisor = self.cfg.GetHypervisorType()
15045 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15046 fname = _ExpandInstanceName(self.cfg, self.op.name)
15047 self.op.name = fname
15048 self.relocate_from = \
15049 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15050 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15051 constants.IALLOCATOR_MODE_NODE_EVAC):
15052 if not self.op.instances:
15053 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15054 self.op.instances = _GetWantedInstances(self, self.op.instances)
15056 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15057 self.op.mode, errors.ECODE_INVAL)
15059 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15060 if self.op.allocator is None:
15061 raise errors.OpPrereqError("Missing allocator name",
15062 errors.ECODE_INVAL)
15063 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15064 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15065 self.op.direction, errors.ECODE_INVAL)
15067 def Exec(self, feedback_fn):
15068 """Run the allocator test.
15071 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15072 ial = IAllocator(self.cfg, self.rpc,
15075 memory=self.op.memory,
15076 disks=self.op.disks,
15077 disk_template=self.op.disk_template,
15081 vcpus=self.op.vcpus,
15082 hypervisor=self.op.hypervisor,
15084 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15085 ial = IAllocator(self.cfg, self.rpc,
15088 relocate_from=list(self.relocate_from),
15090 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15091 ial = IAllocator(self.cfg, self.rpc,
15093 instances=self.op.instances,
15094 target_groups=self.op.target_groups)
15095 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15096 ial = IAllocator(self.cfg, self.rpc,
15098 instances=self.op.instances,
15099 evac_mode=self.op.evac_mode)
15101 raise errors.ProgrammerError("Uncatched mode %s in"
15102 " LUTestAllocator.Exec", self.op.mode)
15104 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15105 result = ial.in_text
15107 ial.Run(self.op.allocator, validate=False)
15108 result = ial.out_text
15112 #: Query type implementations
15114 constants.QR_INSTANCE: _InstanceQuery,
15115 constants.QR_NODE: _NodeQuery,
15116 constants.QR_GROUP: _GroupQuery,
15117 constants.QR_OS: _OsQuery,
15120 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15123 def _GetQueryImplementation(name):
15124 """Returns the implemtnation for a query type.
15126 @param name: Query type, must be one of L{constants.QR_VIA_OP}
15130 return _QUERY_IMPL[name]
15132 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15133 errors.ECODE_INVAL)