4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
62 from ganeti import rpc
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
141 # Dicts used to declare locking needs to mcpu
142 self.needed_locks = None
143 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.remove_locks = {}
146 # Used to force good behavior when calling helper functions
147 self.recalculate_locks = {}
149 self.Log = processor.Log # pylint: disable=C0103
150 self.LogWarning = processor.LogWarning # pylint: disable=C0103
151 self.LogInfo = processor.LogInfo # pylint: disable=C0103
152 self.LogStep = processor.LogStep # pylint: disable=C0103
153 # support for dry-run
154 self.dry_run_result = None
155 # support for generic debug attribute
156 if (not hasattr(self.op, "debug_level") or
157 not isinstance(self.op.debug_level, int)):
158 self.op.debug_level = 0
163 # Validate opcode parameters and set defaults
164 self.op.Validate(True)
166 self.CheckArguments()
168 def CheckArguments(self):
169 """Check syntactic validity for the opcode arguments.
171 This method is for doing a simple syntactic check and ensure
172 validity of opcode parameters, without any cluster-related
173 checks. While the same can be accomplished in ExpandNames and/or
174 CheckPrereq, doing these separate is better because:
176 - ExpandNames is left as as purely a lock-related function
177 - CheckPrereq is run after we have acquired locks (and possible
180 The function is allowed to change the self.op attribute so that
181 later methods can no longer worry about missing parameters.
186 def ExpandNames(self):
187 """Expand names for this LU.
189 This method is called before starting to execute the opcode, and it should
190 update all the parameters of the opcode to their canonical form (e.g. a
191 short node name must be fully expanded after this method has successfully
192 completed). This way locking, hooks, logging, etc. can work correctly.
194 LUs which implement this method must also populate the self.needed_locks
195 member, as a dict with lock levels as keys, and a list of needed lock names
198 - use an empty dict if you don't need any lock
199 - if you don't need any lock at a particular level omit that
200 level (note that in this case C{DeclareLocks} won't be called
201 at all for that level)
202 - if you need locks at a level, but you can't calculate it in
203 this function, initialise that level with an empty list and do
204 further processing in L{LogicalUnit.DeclareLocks} (see that
205 function's docstring)
206 - don't put anything for the BGL level
207 - if you want all locks at a level use L{locking.ALL_SET} as a value
209 If you need to share locks (rather than acquire them exclusively) at one
210 level you can modify self.share_locks, setting a true value (usually 1) for
211 that level. By default locks are not shared.
213 This function can also define a list of tasklets, which then will be
214 executed in order instead of the usual LU-level CheckPrereq and Exec
215 functions, if those are not defined by the LU.
219 # Acquire all nodes and one instance
220 self.needed_locks = {
221 locking.LEVEL_NODE: locking.ALL_SET,
222 locking.LEVEL_INSTANCE: ['instance1.example.com'],
224 # Acquire just two nodes
225 self.needed_locks = {
226 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
229 self.needed_locks = {} # No, you can't leave it to the default value None
232 # The implementation of this method is mandatory only if the new LU is
233 # concurrent, so that old LUs don't need to be changed all at the same
236 self.needed_locks = {} # Exclusive LUs don't need locks.
238 raise NotImplementedError
240 def DeclareLocks(self, level):
241 """Declare LU locking needs for a level
243 While most LUs can just declare their locking needs at ExpandNames time,
244 sometimes there's the need to calculate some locks after having acquired
245 the ones before. This function is called just before acquiring locks at a
246 particular level, but after acquiring the ones at lower levels, and permits
247 such calculations. It can be used to modify self.needed_locks, and by
248 default it does nothing.
250 This function is only called if you have something already set in
251 self.needed_locks for the level.
253 @param level: Locking level which is going to be locked
254 @type level: member of L{ganeti.locking.LEVELS}
258 def CheckPrereq(self):
259 """Check prerequisites for this LU.
261 This method should check that the prerequisites for the execution
262 of this LU are fulfilled. It can do internode communication, but
263 it should be idempotent - no cluster or system changes are
266 The method should raise errors.OpPrereqError in case something is
267 not fulfilled. Its return value is ignored.
269 This method should also update all the parameters of the opcode to
270 their canonical form if it hasn't been done by ExpandNames before.
273 if self.tasklets is not None:
274 for (idx, tl) in enumerate(self.tasklets):
275 logging.debug("Checking prerequisites for tasklet %s/%s",
276 idx + 1, len(self.tasklets))
281 def Exec(self, feedback_fn):
284 This method should implement the actual work. It should raise
285 errors.OpExecError for failures that are somewhat dealt with in
289 if self.tasklets is not None:
290 for (idx, tl) in enumerate(self.tasklets):
291 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
294 raise NotImplementedError
296 def BuildHooksEnv(self):
297 """Build hooks environment for this LU.
300 @return: Dictionary containing the environment that will be used for
301 running the hooks for this LU. The keys of the dict must not be prefixed
302 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
303 will extend the environment with additional variables. If no environment
304 should be defined, an empty dictionary should be returned (not C{None}).
305 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
309 raise NotImplementedError
311 def BuildHooksNodes(self):
312 """Build list of nodes to run LU's hooks.
314 @rtype: tuple; (list, list)
315 @return: Tuple containing a list of node names on which the hook
316 should run before the execution and a list of node names on which the
317 hook should run after the execution. No nodes should be returned as an
318 empty list (and not None).
319 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
323 raise NotImplementedError
325 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
326 """Notify the LU about the results of its hooks.
328 This method is called every time a hooks phase is executed, and notifies
329 the Logical Unit about the hooks' result. The LU can then use it to alter
330 its result based on the hooks. By default the method does nothing and the
331 previous result is passed back unchanged but any LU can define it if it
332 wants to use the local cluster hook-scripts somehow.
334 @param phase: one of L{constants.HOOKS_PHASE_POST} or
335 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
336 @param hook_results: the results of the multi-node hooks rpc call
337 @param feedback_fn: function used send feedback back to the caller
338 @param lu_result: the previous Exec result this LU had, or None
340 @return: the new Exec result, based on the previous result
344 # API must be kept, thus we ignore the unused argument and could
345 # be a function warnings
346 # pylint: disable=W0613,R0201
349 def _ExpandAndLockInstance(self):
350 """Helper function to expand and lock an instance.
352 Many LUs that work on an instance take its name in self.op.instance_name
353 and need to expand it and then declare the expanded name for locking. This
354 function does it, and then updates self.op.instance_name to the expanded
355 name. It also initializes needed_locks as a dict, if this hasn't been done
359 if self.needed_locks is None:
360 self.needed_locks = {}
362 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
363 "_ExpandAndLockInstance called with instance-level locks set"
364 self.op.instance_name = _ExpandInstanceName(self.cfg,
365 self.op.instance_name)
366 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
368 def _LockInstancesNodes(self, primary_only=False,
369 level=locking.LEVEL_NODE):
370 """Helper function to declare instances' nodes for locking.
372 This function should be called after locking one or more instances to lock
373 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
374 with all primary or secondary nodes for instances already locked and
375 present in self.needed_locks[locking.LEVEL_INSTANCE].
377 It should be called from DeclareLocks, and for safety only works if
378 self.recalculate_locks[locking.LEVEL_NODE] is set.
380 In the future it may grow parameters to just lock some instance's nodes, or
381 to just lock primaries or secondary nodes, if needed.
383 If should be called in DeclareLocks in a way similar to::
385 if level == locking.LEVEL_NODE:
386 self._LockInstancesNodes()
388 @type primary_only: boolean
389 @param primary_only: only lock primary nodes of locked instances
390 @param level: Which lock level to use for locking nodes
393 assert level in self.recalculate_locks, \
394 "_LockInstancesNodes helper function called with no nodes to recalculate"
396 # TODO: check if we're really been called with the instance locks held
398 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
399 # future we might want to have different behaviors depending on the value
400 # of self.recalculate_locks[locking.LEVEL_NODE]
402 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
403 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
404 wanted_nodes.append(instance.primary_node)
406 wanted_nodes.extend(instance.secondary_nodes)
408 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
409 self.needed_locks[level] = wanted_nodes
410 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
411 self.needed_locks[level].extend(wanted_nodes)
413 raise errors.ProgrammerError("Unknown recalculation mode")
415 del self.recalculate_locks[level]
418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
419 """Simple LU which runs no hooks.
421 This LU is intended as a parent for other LogicalUnits which will
422 run no hooks, in order to reduce duplicate code.
428 def BuildHooksEnv(self):
429 """Empty BuildHooksEnv for NoHooksLu.
431 This just raises an error.
434 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
436 def BuildHooksNodes(self):
437 """Empty BuildHooksNodes for NoHooksLU.
440 raise AssertionError("BuildHooksNodes called for NoHooksLU")
444 """Tasklet base class.
446 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
447 they can mix legacy code with tasklets. Locking needs to be done in the LU,
448 tasklets know nothing about locks.
450 Subclasses must follow these rules:
451 - Implement CheckPrereq
455 def __init__(self, lu):
462 def CheckPrereq(self):
463 """Check prerequisites for this tasklets.
465 This method should check whether the prerequisites for the execution of
466 this tasklet are fulfilled. It can do internode communication, but it
467 should be idempotent - no cluster or system changes are allowed.
469 The method should raise errors.OpPrereqError in case something is not
470 fulfilled. Its return value is ignored.
472 This method should also update all parameters to their canonical form if it
473 hasn't been done before.
478 def Exec(self, feedback_fn):
479 """Execute the tasklet.
481 This method should implement the actual work. It should raise
482 errors.OpExecError for failures that are somewhat dealt with in code, or
486 raise NotImplementedError
490 """Base for query utility classes.
493 #: Attribute holding field definitions
496 def __init__(self, qfilter, fields, use_locking):
497 """Initializes this class.
500 self.use_locking = use_locking
502 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
504 self.requested_data = self.query.RequestedData()
505 self.names = self.query.RequestedNames()
507 # Sort only if no names were requested
508 self.sort_by_name = not self.names
510 self.do_locking = None
513 def _GetNames(self, lu, all_names, lock_level):
514 """Helper function to determine names asked for in the query.
518 names = lu.owned_locks(lock_level)
522 if self.wanted == locking.ALL_SET:
523 assert not self.names
524 # caller didn't specify names, so ordering is not important
525 return utils.NiceSort(names)
527 # caller specified names and we must keep the same order
529 assert not self.do_locking or lu.glm.is_owned(lock_level)
531 missing = set(self.wanted).difference(names)
533 raise errors.OpExecError("Some items were removed before retrieving"
534 " their data: %s" % missing)
536 # Return expanded names
539 def ExpandNames(self, lu):
540 """Expand names for this query.
542 See L{LogicalUnit.ExpandNames}.
545 raise NotImplementedError()
547 def DeclareLocks(self, lu, level):
548 """Declare locks for this query.
550 See L{LogicalUnit.DeclareLocks}.
553 raise NotImplementedError()
555 def _GetQueryData(self, lu):
556 """Collects all data for this query.
558 @return: Query data object
561 raise NotImplementedError()
563 def NewStyleQuery(self, lu):
564 """Collect data and execute query.
567 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
568 sort_by_name=self.sort_by_name)
570 def OldStyleQuery(self, lu):
571 """Collect data and execute query.
574 return self.query.OldStyleQuery(self._GetQueryData(lu),
575 sort_by_name=self.sort_by_name)
579 """Returns a dict declaring all lock levels shared.
582 return dict.fromkeys(locking.LEVELS, 1)
585 def _MakeLegacyNodeInfo(data):
586 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
588 Converts the data into a single dictionary. This is fine for most use cases,
589 but some require information from more than one volume group or hypervisor.
592 (bootid, (vg_info, ), (hv_info, )) = data
594 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
599 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
600 """Checks if the owned node groups are still correct for an instance.
602 @type cfg: L{config.ConfigWriter}
603 @param cfg: The cluster configuration
604 @type instance_name: string
605 @param instance_name: Instance name
606 @type owned_groups: set or frozenset
607 @param owned_groups: List of currently owned node groups
610 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
612 if not owned_groups.issuperset(inst_groups):
613 raise errors.OpPrereqError("Instance %s's node groups changed since"
614 " locks were acquired, current groups are"
615 " are '%s', owning groups '%s'; retry the"
618 utils.CommaJoin(inst_groups),
619 utils.CommaJoin(owned_groups)),
625 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
626 """Checks if the instances in a node group are still correct.
628 @type cfg: L{config.ConfigWriter}
629 @param cfg: The cluster configuration
630 @type group_uuid: string
631 @param group_uuid: Node group UUID
632 @type owned_instances: set or frozenset
633 @param owned_instances: List of currently owned instances
636 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
637 if owned_instances != wanted_instances:
638 raise errors.OpPrereqError("Instances in node group '%s' changed since"
639 " locks were acquired, wanted '%s', have '%s';"
640 " retry the operation" %
642 utils.CommaJoin(wanted_instances),
643 utils.CommaJoin(owned_instances)),
646 return wanted_instances
649 def _SupportsOob(cfg, node):
650 """Tells if node supports OOB.
652 @type cfg: L{config.ConfigWriter}
653 @param cfg: The cluster configuration
654 @type node: L{objects.Node}
655 @param node: The node
656 @return: The OOB script if supported or an empty string otherwise
659 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
662 def _GetWantedNodes(lu, nodes):
663 """Returns list of checked and expanded node names.
665 @type lu: L{LogicalUnit}
666 @param lu: the logical unit on whose behalf we execute
668 @param nodes: list of node names or None for all nodes
670 @return: the list of nodes, sorted
671 @raise errors.ProgrammerError: if the nodes parameter is wrong type
675 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
677 return utils.NiceSort(lu.cfg.GetNodeList())
680 def _GetWantedInstances(lu, instances):
681 """Returns list of checked and expanded instance names.
683 @type lu: L{LogicalUnit}
684 @param lu: the logical unit on whose behalf we execute
685 @type instances: list
686 @param instances: list of instance names or None for all instances
688 @return: the list of instances, sorted
689 @raise errors.OpPrereqError: if the instances parameter is wrong type
690 @raise errors.OpPrereqError: if any of the passed instances is not found
694 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
696 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
700 def _GetUpdatedParams(old_params, update_dict,
701 use_default=True, use_none=False):
702 """Return the new version of a parameter dictionary.
704 @type old_params: dict
705 @param old_params: old parameters
706 @type update_dict: dict
707 @param update_dict: dict containing new parameter values, or
708 constants.VALUE_DEFAULT to reset the parameter to its default
710 @param use_default: boolean
711 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
712 values as 'to be deleted' values
713 @param use_none: boolean
714 @type use_none: whether to recognise C{None} values as 'to be
717 @return: the new parameter dictionary
720 params_copy = copy.deepcopy(old_params)
721 for key, val in update_dict.iteritems():
722 if ((use_default and val == constants.VALUE_DEFAULT) or
723 (use_none and val is None)):
729 params_copy[key] = val
733 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
734 """Return the new version of a instance policy.
736 @param group_policy: whether this policy applies to a group and thus
737 we should support removal of policy entries
740 use_none = use_default = group_policy
741 ipolicy = copy.deepcopy(old_ipolicy)
742 for key, value in new_ipolicy.items():
743 if key not in constants.IPOLICY_ALL_KEYS:
744 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
746 if key in constants.IPOLICY_ISPECS:
747 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
748 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
750 use_default=use_default)
752 if not value or value == [constants.VALUE_DEFAULT]:
756 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
757 " on the cluster'" % key,
760 if key in constants.IPOLICY_PARAMETERS:
761 # FIXME: we assume all such values are float
763 ipolicy[key] = float(value)
764 except (TypeError, ValueError), err:
765 raise errors.OpPrereqError("Invalid value for attribute"
766 " '%s': '%s', error: %s" %
767 (key, value, err), errors.ECODE_INVAL)
769 # FIXME: we assume all others are lists; this should be redone
771 ipolicy[key] = list(value)
773 objects.InstancePolicy.CheckParameterSyntax(ipolicy)
774 except errors.ConfigurationError, err:
775 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
780 def _UpdateAndVerifySubDict(base, updates, type_check):
781 """Updates and verifies a dict with sub dicts of the same type.
783 @param base: The dict with the old data
784 @param updates: The dict with the new data
785 @param type_check: Dict suitable to ForceDictType to verify correct types
786 @returns: A new dict with updated and verified values
790 new = _GetUpdatedParams(old, value)
791 utils.ForceDictType(new, type_check)
794 ret = copy.deepcopy(base)
795 ret.update(dict((key, fn(base.get(key, {}), value))
796 for key, value in updates.items()))
800 def _MergeAndVerifyHvState(op_input, obj_input):
801 """Combines the hv state from an opcode with the one of the object
803 @param op_input: The input dict from the opcode
804 @param obj_input: The input dict from the objects
805 @return: The verified and updated dict
809 invalid_hvs = set(op_input) - constants.HYPER_TYPES
811 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
812 " %s" % utils.CommaJoin(invalid_hvs),
814 if obj_input is None:
816 type_check = constants.HVSTS_PARAMETER_TYPES
817 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
822 def _MergeAndVerifyDiskState(op_input, obj_input):
823 """Combines the disk state from an opcode with the one of the object
825 @param op_input: The input dict from the opcode
826 @param obj_input: The input dict from the objects
827 @return: The verified and updated dict
830 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
832 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
833 utils.CommaJoin(invalid_dst),
835 type_check = constants.DSS_PARAMETER_TYPES
836 if obj_input is None:
838 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
840 for key, value in op_input.items())
845 def _ReleaseLocks(lu, level, names=None, keep=None):
846 """Releases locks owned by an LU.
848 @type lu: L{LogicalUnit}
849 @param level: Lock level
850 @type names: list or None
851 @param names: Names of locks to release
852 @type keep: list or None
853 @param keep: Names of locks to retain
856 assert not (keep is not None and names is not None), \
857 "Only one of the 'names' and the 'keep' parameters can be given"
859 if names is not None:
860 should_release = names.__contains__
862 should_release = lambda name: name not in keep
864 should_release = None
866 owned = lu.owned_locks(level)
868 # Not owning any lock at this level, do nothing
875 # Determine which locks to release
877 if should_release(name):
882 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
884 # Release just some locks
885 lu.glm.release(level, names=release)
887 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
890 lu.glm.release(level)
892 assert not lu.glm.is_owned(level), "No locks should be owned"
895 def _MapInstanceDisksToNodes(instances):
896 """Creates a map from (node, volume) to instance name.
898 @type instances: list of L{objects.Instance}
899 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
902 return dict(((node, vol), inst.name)
903 for inst in instances
904 for (node, vols) in inst.MapLVsByNode().items()
908 def _RunPostHook(lu, node_name):
909 """Runs the post-hook for an opcode on a single node.
912 hm = lu.proc.BuildHooksManager(lu)
914 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
916 # pylint: disable=W0702
917 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
920 def _CheckOutputFields(static, dynamic, selected):
921 """Checks whether all selected fields are valid.
923 @type static: L{utils.FieldSet}
924 @param static: static fields set
925 @type dynamic: L{utils.FieldSet}
926 @param dynamic: dynamic fields set
933 delta = f.NonMatching(selected)
935 raise errors.OpPrereqError("Unknown output fields selected: %s"
936 % ",".join(delta), errors.ECODE_INVAL)
939 def _CheckGlobalHvParams(params):
940 """Validates that given hypervisor params are not global ones.
942 This will ensure that instances don't get customised versions of
946 used_globals = constants.HVC_GLOBALS.intersection(params)
948 msg = ("The following hypervisor parameters are global and cannot"
949 " be customized at instance level, please modify them at"
950 " cluster level: %s" % utils.CommaJoin(used_globals))
951 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
954 def _CheckNodeOnline(lu, node, msg=None):
955 """Ensure that a given node is online.
957 @param lu: the LU on behalf of which we make the check
958 @param node: the node to check
959 @param msg: if passed, should be a message to replace the default one
960 @raise errors.OpPrereqError: if the node is offline
964 msg = "Can't use offline node"
965 if lu.cfg.GetNodeInfo(node).offline:
966 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
969 def _CheckNodeNotDrained(lu, node):
970 """Ensure that a given node is not drained.
972 @param lu: the LU on behalf of which we make the check
973 @param node: the node to check
974 @raise errors.OpPrereqError: if the node is drained
977 if lu.cfg.GetNodeInfo(node).drained:
978 raise errors.OpPrereqError("Can't use drained node %s" % node,
982 def _CheckNodeVmCapable(lu, node):
983 """Ensure that a given node is vm capable.
985 @param lu: the LU on behalf of which we make the check
986 @param node: the node to check
987 @raise errors.OpPrereqError: if the node is not vm capable
990 if not lu.cfg.GetNodeInfo(node).vm_capable:
991 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
995 def _CheckNodeHasOS(lu, node, os_name, force_variant):
996 """Ensure that a node supports a given OS.
998 @param lu: the LU on behalf of which we make the check
999 @param node: the node to check
1000 @param os_name: the OS to query about
1001 @param force_variant: whether to ignore variant errors
1002 @raise errors.OpPrereqError: if the node is not supporting the OS
1005 result = lu.rpc.call_os_get(node, os_name)
1006 result.Raise("OS '%s' not in supported OS list for node %s" %
1008 prereq=True, ecode=errors.ECODE_INVAL)
1009 if not force_variant:
1010 _CheckOSVariant(result.payload, os_name)
1013 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1014 """Ensure that a node has the given secondary ip.
1016 @type lu: L{LogicalUnit}
1017 @param lu: the LU on behalf of which we make the check
1019 @param node: the node to check
1020 @type secondary_ip: string
1021 @param secondary_ip: the ip to check
1022 @type prereq: boolean
1023 @param prereq: whether to throw a prerequisite or an execute error
1024 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1025 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1028 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1029 result.Raise("Failure checking secondary ip on node %s" % node,
1030 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1031 if not result.payload:
1032 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1033 " please fix and re-run this command" % secondary_ip)
1035 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1037 raise errors.OpExecError(msg)
1040 def _GetClusterDomainSecret():
1041 """Reads the cluster domain secret.
1044 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1048 def _CheckInstanceState(lu, instance, req_states, msg=None):
1049 """Ensure that an instance is in one of the required states.
1051 @param lu: the LU on behalf of which we make the check
1052 @param instance: the instance to check
1053 @param msg: if passed, should be a message to replace the default one
1054 @raise errors.OpPrereqError: if the instance is not in the required state
1058 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1059 if instance.admin_state not in req_states:
1060 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1061 (instance.name, instance.admin_state, msg),
1064 if constants.ADMINST_UP not in req_states:
1065 pnode = instance.primary_node
1066 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1067 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1068 prereq=True, ecode=errors.ECODE_ENVIRON)
1070 if instance.name in ins_l.payload:
1071 raise errors.OpPrereqError("Instance %s is running, %s" %
1072 (instance.name, msg), errors.ECODE_STATE)
1075 def _ComputeMinMaxSpec(name, ipolicy, value):
1076 """Computes if value is in the desired range.
1078 @param name: name of the parameter for which we perform the check
1079 @param ipolicy: dictionary containing min, max and std values
1080 @param value: actual value that we want to use
1081 @return: None or element not meeting the criteria
1085 if value in [None, constants.VALUE_AUTO]:
1087 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1088 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1089 if value > max_v or min_v > value:
1090 return ("%s value %s is not in range [%s, %s]" %
1091 (name, value, min_v, max_v))
1095 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1096 nic_count, disk_sizes, spindle_use,
1097 _compute_fn=_ComputeMinMaxSpec):
1098 """Verifies ipolicy against provided specs.
1101 @param ipolicy: The ipolicy
1103 @param mem_size: The memory size
1104 @type cpu_count: int
1105 @param cpu_count: Used cpu cores
1106 @type disk_count: int
1107 @param disk_count: Number of disks used
1108 @type nic_count: int
1109 @param nic_count: Number of nics used
1110 @type disk_sizes: list of ints
1111 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1112 @type spindle_use: int
1113 @param spindle_use: The number of spindles this instance uses
1114 @param _compute_fn: The compute function (unittest only)
1115 @return: A list of violations, or an empty list of no violations are found
1118 assert disk_count == len(disk_sizes)
1121 (constants.ISPEC_MEM_SIZE, mem_size),
1122 (constants.ISPEC_CPU_COUNT, cpu_count),
1123 (constants.ISPEC_DISK_COUNT, disk_count),
1124 (constants.ISPEC_NIC_COUNT, nic_count),
1125 (constants.ISPEC_SPINDLE_USE, spindle_use),
1126 ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
1129 (_compute_fn(name, ipolicy, value)
1130 for (name, value) in test_settings))
1133 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1134 _compute_fn=_ComputeIPolicySpecViolation):
1135 """Compute if instance meets the specs of ipolicy.
1138 @param ipolicy: The ipolicy to verify against
1139 @type instance: L{objects.Instance}
1140 @param instance: The instance to verify
1141 @param _compute_fn: The function to verify ipolicy (unittest only)
1142 @see: L{_ComputeIPolicySpecViolation}
1145 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1146 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1147 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1148 disk_count = len(instance.disks)
1149 disk_sizes = [disk.size for disk in instance.disks]
1150 nic_count = len(instance.nics)
1152 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1153 disk_sizes, spindle_use)
1156 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1157 _compute_fn=_ComputeIPolicySpecViolation):
1158 """Compute if instance specs meets the specs of ipolicy.
1161 @param ipolicy: The ipolicy to verify against
1162 @param instance_spec: dict
1163 @param instance_spec: The instance spec to verify
1164 @param _compute_fn: The function to verify ipolicy (unittest only)
1165 @see: L{_ComputeIPolicySpecViolation}
1168 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1169 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1170 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1171 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1172 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1173 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1175 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1176 disk_sizes, spindle_use)
1179 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1181 _compute_fn=_ComputeIPolicyInstanceViolation):
1182 """Compute if instance meets the specs of the new target group.
1184 @param ipolicy: The ipolicy to verify
1185 @param instance: The instance object to verify
1186 @param current_group: The current group of the instance
1187 @param target_group: The new group of the instance
1188 @param _compute_fn: The function to verify ipolicy (unittest only)
1189 @see: L{_ComputeIPolicySpecViolation}
1192 if current_group == target_group:
1195 return _compute_fn(ipolicy, instance)
1198 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1199 _compute_fn=_ComputeIPolicyNodeViolation):
1200 """Checks that the target node is correct in terms of instance policy.
1202 @param ipolicy: The ipolicy to verify
1203 @param instance: The instance object to verify
1204 @param node: The new node to relocate
1205 @param ignore: Ignore violations of the ipolicy
1206 @param _compute_fn: The function to verify ipolicy (unittest only)
1207 @see: L{_ComputeIPolicySpecViolation}
1210 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1211 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1214 msg = ("Instance does not meet target node group's (%s) instance"
1215 " policy: %s") % (node.group, utils.CommaJoin(res))
1219 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1222 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1223 """Computes a set of any instances that would violate the new ipolicy.
1225 @param old_ipolicy: The current (still in-place) ipolicy
1226 @param new_ipolicy: The new (to become) ipolicy
1227 @param instances: List of instances to verify
1228 @return: A list of instances which violates the new ipolicy but did not before
1231 return (_ComputeViolatingInstances(old_ipolicy, instances) -
1232 _ComputeViolatingInstances(new_ipolicy, instances))
1235 def _ExpandItemName(fn, name, kind):
1236 """Expand an item name.
1238 @param fn: the function to use for expansion
1239 @param name: requested item name
1240 @param kind: text description ('Node' or 'Instance')
1241 @return: the resolved (full) name
1242 @raise errors.OpPrereqError: if the item is not found
1245 full_name = fn(name)
1246 if full_name is None:
1247 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1252 def _ExpandNodeName(cfg, name):
1253 """Wrapper over L{_ExpandItemName} for nodes."""
1254 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1257 def _ExpandInstanceName(cfg, name):
1258 """Wrapper over L{_ExpandItemName} for instance."""
1259 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1262 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1263 minmem, maxmem, vcpus, nics, disk_template, disks,
1264 bep, hvp, hypervisor_name, tags):
1265 """Builds instance related env variables for hooks
1267 This builds the hook environment from individual variables.
1270 @param name: the name of the instance
1271 @type primary_node: string
1272 @param primary_node: the name of the instance's primary node
1273 @type secondary_nodes: list
1274 @param secondary_nodes: list of secondary nodes as strings
1275 @type os_type: string
1276 @param os_type: the name of the instance's OS
1277 @type status: string
1278 @param status: the desired status of the instance
1279 @type minmem: string
1280 @param minmem: the minimum memory size of the instance
1281 @type maxmem: string
1282 @param maxmem: the maximum memory size of the instance
1284 @param vcpus: the count of VCPUs the instance has
1286 @param nics: list of tuples (ip, mac, mode, link) representing
1287 the NICs the instance has
1288 @type disk_template: string
1289 @param disk_template: the disk template of the instance
1291 @param disks: the list of (size, mode) pairs
1293 @param bep: the backend parameters for the instance
1295 @param hvp: the hypervisor parameters for the instance
1296 @type hypervisor_name: string
1297 @param hypervisor_name: the hypervisor for the instance
1299 @param tags: list of instance tags as strings
1301 @return: the hook environment for this instance
1306 "INSTANCE_NAME": name,
1307 "INSTANCE_PRIMARY": primary_node,
1308 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1309 "INSTANCE_OS_TYPE": os_type,
1310 "INSTANCE_STATUS": status,
1311 "INSTANCE_MINMEM": minmem,
1312 "INSTANCE_MAXMEM": maxmem,
1313 # TODO(2.7) remove deprecated "memory" value
1314 "INSTANCE_MEMORY": maxmem,
1315 "INSTANCE_VCPUS": vcpus,
1316 "INSTANCE_DISK_TEMPLATE": disk_template,
1317 "INSTANCE_HYPERVISOR": hypervisor_name,
1320 nic_count = len(nics)
1321 for idx, (ip, mac, mode, link) in enumerate(nics):
1324 env["INSTANCE_NIC%d_IP" % idx] = ip
1325 env["INSTANCE_NIC%d_MAC" % idx] = mac
1326 env["INSTANCE_NIC%d_MODE" % idx] = mode
1327 env["INSTANCE_NIC%d_LINK" % idx] = link
1328 if mode == constants.NIC_MODE_BRIDGED:
1329 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1333 env["INSTANCE_NIC_COUNT"] = nic_count
1336 disk_count = len(disks)
1337 for idx, (size, mode) in enumerate(disks):
1338 env["INSTANCE_DISK%d_SIZE" % idx] = size
1339 env["INSTANCE_DISK%d_MODE" % idx] = mode
1343 env["INSTANCE_DISK_COUNT"] = disk_count
1348 env["INSTANCE_TAGS"] = " ".join(tags)
1350 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1351 for key, value in source.items():
1352 env["INSTANCE_%s_%s" % (kind, key)] = value
1357 def _NICListToTuple(lu, nics):
1358 """Build a list of nic information tuples.
1360 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1361 value in LUInstanceQueryData.
1363 @type lu: L{LogicalUnit}
1364 @param lu: the logical unit on whose behalf we execute
1365 @type nics: list of L{objects.NIC}
1366 @param nics: list of nics to convert to hooks tuples
1370 cluster = lu.cfg.GetClusterInfo()
1374 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1375 mode = filled_params[constants.NIC_MODE]
1376 link = filled_params[constants.NIC_LINK]
1377 hooks_nics.append((ip, mac, mode, link))
1381 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1382 """Builds instance related env variables for hooks from an object.
1384 @type lu: L{LogicalUnit}
1385 @param lu: the logical unit on whose behalf we execute
1386 @type instance: L{objects.Instance}
1387 @param instance: the instance for which we should build the
1389 @type override: dict
1390 @param override: dictionary with key/values that will override
1393 @return: the hook environment dictionary
1396 cluster = lu.cfg.GetClusterInfo()
1397 bep = cluster.FillBE(instance)
1398 hvp = cluster.FillHV(instance)
1400 "name": instance.name,
1401 "primary_node": instance.primary_node,
1402 "secondary_nodes": instance.secondary_nodes,
1403 "os_type": instance.os,
1404 "status": instance.admin_state,
1405 "maxmem": bep[constants.BE_MAXMEM],
1406 "minmem": bep[constants.BE_MINMEM],
1407 "vcpus": bep[constants.BE_VCPUS],
1408 "nics": _NICListToTuple(lu, instance.nics),
1409 "disk_template": instance.disk_template,
1410 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1413 "hypervisor_name": instance.hypervisor,
1414 "tags": instance.tags,
1417 args.update(override)
1418 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1421 def _AdjustCandidatePool(lu, exceptions):
1422 """Adjust the candidate pool after node operations.
1425 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1427 lu.LogInfo("Promoted nodes to master candidate role: %s",
1428 utils.CommaJoin(node.name for node in mod_list))
1429 for name in mod_list:
1430 lu.context.ReaddNode(name)
1431 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1433 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1437 def _DecideSelfPromotion(lu, exceptions=None):
1438 """Decide whether I should promote myself as a master candidate.
1441 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1442 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1443 # the new node will increase mc_max with one, so:
1444 mc_should = min(mc_should + 1, cp_size)
1445 return mc_now < mc_should
1448 def _CalculateGroupIPolicy(cluster, group):
1449 """Calculate instance policy for group.
1452 return cluster.SimpleFillIPolicy(group.ipolicy)
1455 def _ComputeViolatingInstances(ipolicy, instances):
1456 """Computes a set of instances who violates given ipolicy.
1458 @param ipolicy: The ipolicy to verify
1459 @type instances: object.Instance
1460 @param instances: List of instances to verify
1461 @return: A frozenset of instance names violating the ipolicy
1464 return frozenset([inst.name for inst in instances
1465 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1468 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1469 """Check that the brigdes needed by a list of nics exist.
1472 cluster = lu.cfg.GetClusterInfo()
1473 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1474 brlist = [params[constants.NIC_LINK] for params in paramslist
1475 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1477 result = lu.rpc.call_bridges_exist(target_node, brlist)
1478 result.Raise("Error checking bridges on destination node '%s'" %
1479 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1482 def _CheckInstanceBridgesExist(lu, instance, node=None):
1483 """Check that the brigdes needed by an instance exist.
1487 node = instance.primary_node
1488 _CheckNicsBridgesExist(lu, instance.nics, node)
1491 def _CheckOSVariant(os_obj, name):
1492 """Check whether an OS name conforms to the os variants specification.
1494 @type os_obj: L{objects.OS}
1495 @param os_obj: OS object to check
1497 @param name: OS name passed by the user, to check for validity
1500 variant = objects.OS.GetVariant(name)
1501 if not os_obj.supported_variants:
1503 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1504 " passed)" % (os_obj.name, variant),
1508 raise errors.OpPrereqError("OS name must include a variant",
1511 if variant not in os_obj.supported_variants:
1512 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1515 def _GetNodeInstancesInner(cfg, fn):
1516 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1519 def _GetNodeInstances(cfg, node_name):
1520 """Returns a list of all primary and secondary instances on a node.
1524 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1527 def _GetNodePrimaryInstances(cfg, node_name):
1528 """Returns primary instances on a node.
1531 return _GetNodeInstancesInner(cfg,
1532 lambda inst: node_name == inst.primary_node)
1535 def _GetNodeSecondaryInstances(cfg, node_name):
1536 """Returns secondary instances on a node.
1539 return _GetNodeInstancesInner(cfg,
1540 lambda inst: node_name in inst.secondary_nodes)
1543 def _GetStorageTypeArgs(cfg, storage_type):
1544 """Returns the arguments for a storage type.
1547 # Special case for file storage
1548 if storage_type == constants.ST_FILE:
1549 # storage.FileStorage wants a list of storage directories
1550 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1555 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1558 for dev in instance.disks:
1559 cfg.SetDiskID(dev, node_name)
1561 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1562 result.Raise("Failed to get disk status from node %s" % node_name,
1563 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1565 for idx, bdev_status in enumerate(result.payload):
1566 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1572 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1573 """Check the sanity of iallocator and node arguments and use the
1574 cluster-wide iallocator if appropriate.
1576 Check that at most one of (iallocator, node) is specified. If none is
1577 specified, then the LU's opcode's iallocator slot is filled with the
1578 cluster-wide default iallocator.
1580 @type iallocator_slot: string
1581 @param iallocator_slot: the name of the opcode iallocator slot
1582 @type node_slot: string
1583 @param node_slot: the name of the opcode target node slot
1586 node = getattr(lu.op, node_slot, None)
1587 iallocator = getattr(lu.op, iallocator_slot, None)
1589 if node is not None and iallocator is not None:
1590 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1592 elif node is None and iallocator is None:
1593 default_iallocator = lu.cfg.GetDefaultIAllocator()
1594 if default_iallocator:
1595 setattr(lu.op, iallocator_slot, default_iallocator)
1597 raise errors.OpPrereqError("No iallocator or node given and no"
1598 " cluster-wide default iallocator found;"
1599 " please specify either an iallocator or a"
1600 " node, or set a cluster-wide default"
1604 def _GetDefaultIAllocator(cfg, iallocator):
1605 """Decides on which iallocator to use.
1607 @type cfg: L{config.ConfigWriter}
1608 @param cfg: Cluster configuration object
1609 @type iallocator: string or None
1610 @param iallocator: Iallocator specified in opcode
1612 @return: Iallocator name
1616 # Use default iallocator
1617 iallocator = cfg.GetDefaultIAllocator()
1620 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1621 " opcode nor as a cluster-wide default",
1627 class LUClusterPostInit(LogicalUnit):
1628 """Logical unit for running hooks after cluster initialization.
1631 HPATH = "cluster-init"
1632 HTYPE = constants.HTYPE_CLUSTER
1634 def BuildHooksEnv(self):
1639 "OP_TARGET": self.cfg.GetClusterName(),
1642 def BuildHooksNodes(self):
1643 """Build hooks nodes.
1646 return ([], [self.cfg.GetMasterNode()])
1648 def Exec(self, feedback_fn):
1655 class LUClusterDestroy(LogicalUnit):
1656 """Logical unit for destroying the cluster.
1659 HPATH = "cluster-destroy"
1660 HTYPE = constants.HTYPE_CLUSTER
1662 def BuildHooksEnv(self):
1667 "OP_TARGET": self.cfg.GetClusterName(),
1670 def BuildHooksNodes(self):
1671 """Build hooks nodes.
1676 def CheckPrereq(self):
1677 """Check prerequisites.
1679 This checks whether the cluster is empty.
1681 Any errors are signaled by raising errors.OpPrereqError.
1684 master = self.cfg.GetMasterNode()
1686 nodelist = self.cfg.GetNodeList()
1687 if len(nodelist) != 1 or nodelist[0] != master:
1688 raise errors.OpPrereqError("There are still %d node(s) in"
1689 " this cluster." % (len(nodelist) - 1),
1691 instancelist = self.cfg.GetInstanceList()
1693 raise errors.OpPrereqError("There are still %d instance(s) in"
1694 " this cluster." % len(instancelist),
1697 def Exec(self, feedback_fn):
1698 """Destroys the cluster.
1701 master_params = self.cfg.GetMasterNetworkParameters()
1703 # Run post hooks on master node before it's removed
1704 _RunPostHook(self, master_params.name)
1706 ems = self.cfg.GetUseExternalMipScript()
1707 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1710 self.LogWarning("Error disabling the master IP address: %s",
1713 return master_params.name
1716 def _VerifyCertificate(filename):
1717 """Verifies a certificate for L{LUClusterVerifyConfig}.
1719 @type filename: string
1720 @param filename: Path to PEM file
1724 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1725 utils.ReadFile(filename))
1726 except Exception, err: # pylint: disable=W0703
1727 return (LUClusterVerifyConfig.ETYPE_ERROR,
1728 "Failed to load X509 certificate %s: %s" % (filename, err))
1731 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1732 constants.SSL_CERT_EXPIRATION_ERROR)
1735 fnamemsg = "While verifying %s: %s" % (filename, msg)
1740 return (None, fnamemsg)
1741 elif errcode == utils.CERT_WARNING:
1742 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1743 elif errcode == utils.CERT_ERROR:
1744 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1746 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1749 def _GetAllHypervisorParameters(cluster, instances):
1750 """Compute the set of all hypervisor parameters.
1752 @type cluster: L{objects.Cluster}
1753 @param cluster: the cluster object
1754 @param instances: list of L{objects.Instance}
1755 @param instances: additional instances from which to obtain parameters
1756 @rtype: list of (origin, hypervisor, parameters)
1757 @return: a list with all parameters found, indicating the hypervisor they
1758 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1763 for hv_name in cluster.enabled_hypervisors:
1764 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1766 for os_name, os_hvp in cluster.os_hvp.items():
1767 for hv_name, hv_params in os_hvp.items():
1769 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1770 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1772 # TODO: collapse identical parameter values in a single one
1773 for instance in instances:
1774 if instance.hvparams:
1775 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1776 cluster.FillHV(instance)))
1781 class _VerifyErrors(object):
1782 """Mix-in for cluster/group verify LUs.
1784 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1785 self.op and self._feedback_fn to be available.)
1789 ETYPE_FIELD = "code"
1790 ETYPE_ERROR = "ERROR"
1791 ETYPE_WARNING = "WARNING"
1793 def _Error(self, ecode, item, msg, *args, **kwargs):
1794 """Format an error message.
1796 Based on the opcode's error_codes parameter, either format a
1797 parseable error code, or a simpler error string.
1799 This must be called only from Exec and functions called from Exec.
1802 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1803 itype, etxt, _ = ecode
1804 # first complete the msg
1807 # then format the whole message
1808 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1809 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1815 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1816 # and finally report it via the feedback_fn
1817 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1819 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1820 """Log an error message if the passed condition is True.
1824 or self.op.debug_simulate_errors) # pylint: disable=E1101
1826 # If the error code is in the list of ignored errors, demote the error to a
1828 (_, etxt, _) = ecode
1829 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1830 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1833 self._Error(ecode, *args, **kwargs)
1835 # do not mark the operation as failed for WARN cases only
1836 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1837 self.bad = self.bad or cond
1840 class LUClusterVerify(NoHooksLU):
1841 """Submits all jobs necessary to verify the cluster.
1846 def ExpandNames(self):
1847 self.needed_locks = {}
1849 def Exec(self, feedback_fn):
1852 if self.op.group_name:
1853 groups = [self.op.group_name]
1854 depends_fn = lambda: None
1856 groups = self.cfg.GetNodeGroupList()
1858 # Verify global configuration
1860 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1863 # Always depend on global verification
1864 depends_fn = lambda: [(-len(jobs), [])]
1866 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1867 ignore_errors=self.op.ignore_errors,
1868 depends=depends_fn())]
1869 for group in groups)
1871 # Fix up all parameters
1872 for op in itertools.chain(*jobs): # pylint: disable=W0142
1873 op.debug_simulate_errors = self.op.debug_simulate_errors
1874 op.verbose = self.op.verbose
1875 op.error_codes = self.op.error_codes
1877 op.skip_checks = self.op.skip_checks
1878 except AttributeError:
1879 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1881 return ResultWithJobs(jobs)
1884 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1885 """Verifies the cluster config.
1890 def _VerifyHVP(self, hvp_data):
1891 """Verifies locally the syntax of the hypervisor parameters.
1894 for item, hv_name, hv_params in hvp_data:
1895 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1898 hv_class = hypervisor.GetHypervisor(hv_name)
1899 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1900 hv_class.CheckParameterSyntax(hv_params)
1901 except errors.GenericError, err:
1902 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1904 def ExpandNames(self):
1905 # Information can be safely retrieved as the BGL is acquired in exclusive
1907 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1908 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1909 self.all_node_info = self.cfg.GetAllNodesInfo()
1910 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1911 self.needed_locks = {}
1913 def Exec(self, feedback_fn):
1914 """Verify integrity of cluster, performing various test on nodes.
1918 self._feedback_fn = feedback_fn
1920 feedback_fn("* Verifying cluster config")
1922 for msg in self.cfg.VerifyConfig():
1923 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1925 feedback_fn("* Verifying cluster certificate files")
1927 for cert_filename in constants.ALL_CERT_FILES:
1928 (errcode, msg) = _VerifyCertificate(cert_filename)
1929 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1931 feedback_fn("* Verifying hypervisor parameters")
1933 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1934 self.all_inst_info.values()))
1936 feedback_fn("* Verifying all nodes belong to an existing group")
1938 # We do this verification here because, should this bogus circumstance
1939 # occur, it would never be caught by VerifyGroup, which only acts on
1940 # nodes/instances reachable from existing node groups.
1942 dangling_nodes = set(node.name for node in self.all_node_info.values()
1943 if node.group not in self.all_group_info)
1945 dangling_instances = {}
1946 no_node_instances = []
1948 for inst in self.all_inst_info.values():
1949 if inst.primary_node in dangling_nodes:
1950 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1951 elif inst.primary_node not in self.all_node_info:
1952 no_node_instances.append(inst.name)
1957 utils.CommaJoin(dangling_instances.get(node.name,
1959 for node in dangling_nodes]
1961 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1963 "the following nodes (and their instances) belong to a non"
1964 " existing group: %s", utils.CommaJoin(pretty_dangling))
1966 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1968 "the following instances have a non-existing primary-node:"
1969 " %s", utils.CommaJoin(no_node_instances))
1974 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1975 """Verifies the status of a node group.
1978 HPATH = "cluster-verify"
1979 HTYPE = constants.HTYPE_CLUSTER
1982 _HOOKS_INDENT_RE = re.compile("^", re.M)
1984 class NodeImage(object):
1985 """A class representing the logical and physical status of a node.
1988 @ivar name: the node name to which this object refers
1989 @ivar volumes: a structure as returned from
1990 L{ganeti.backend.GetVolumeList} (runtime)
1991 @ivar instances: a list of running instances (runtime)
1992 @ivar pinst: list of configured primary instances (config)
1993 @ivar sinst: list of configured secondary instances (config)
1994 @ivar sbp: dictionary of {primary-node: list of instances} for all
1995 instances for which this node is secondary (config)
1996 @ivar mfree: free memory, as reported by hypervisor (runtime)
1997 @ivar dfree: free disk, as reported by the node (runtime)
1998 @ivar offline: the offline status (config)
1999 @type rpc_fail: boolean
2000 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2001 not whether the individual keys were correct) (runtime)
2002 @type lvm_fail: boolean
2003 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2004 @type hyp_fail: boolean
2005 @ivar hyp_fail: whether the RPC call didn't return the instance list
2006 @type ghost: boolean
2007 @ivar ghost: whether this is a known node or not (config)
2008 @type os_fail: boolean
2009 @ivar os_fail: whether the RPC call didn't return valid OS data
2011 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2012 @type vm_capable: boolean
2013 @ivar vm_capable: whether the node can host instances
2016 def __init__(self, offline=False, name=None, vm_capable=True):
2025 self.offline = offline
2026 self.vm_capable = vm_capable
2027 self.rpc_fail = False
2028 self.lvm_fail = False
2029 self.hyp_fail = False
2031 self.os_fail = False
2034 def ExpandNames(self):
2035 # This raises errors.OpPrereqError on its own:
2036 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2038 # Get instances in node group; this is unsafe and needs verification later
2040 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2042 self.needed_locks = {
2043 locking.LEVEL_INSTANCE: inst_names,
2044 locking.LEVEL_NODEGROUP: [self.group_uuid],
2045 locking.LEVEL_NODE: [],
2048 self.share_locks = _ShareAll()
2050 def DeclareLocks(self, level):
2051 if level == locking.LEVEL_NODE:
2052 # Get members of node group; this is unsafe and needs verification later
2053 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2055 all_inst_info = self.cfg.GetAllInstancesInfo()
2057 # In Exec(), we warn about mirrored instances that have primary and
2058 # secondary living in separate node groups. To fully verify that
2059 # volumes for these instances are healthy, we will need to do an
2060 # extra call to their secondaries. We ensure here those nodes will
2062 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2063 # Important: access only the instances whose lock is owned
2064 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2065 nodes.update(all_inst_info[inst].secondary_nodes)
2067 self.needed_locks[locking.LEVEL_NODE] = nodes
2069 def CheckPrereq(self):
2070 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2071 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2073 group_nodes = set(self.group_info.members)
2075 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2078 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2080 unlocked_instances = \
2081 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2084 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2085 utils.CommaJoin(unlocked_nodes),
2088 if unlocked_instances:
2089 raise errors.OpPrereqError("Missing lock for instances: %s" %
2090 utils.CommaJoin(unlocked_instances),
2093 self.all_node_info = self.cfg.GetAllNodesInfo()
2094 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2096 self.my_node_names = utils.NiceSort(group_nodes)
2097 self.my_inst_names = utils.NiceSort(group_instances)
2099 self.my_node_info = dict((name, self.all_node_info[name])
2100 for name in self.my_node_names)
2102 self.my_inst_info = dict((name, self.all_inst_info[name])
2103 for name in self.my_inst_names)
2105 # We detect here the nodes that will need the extra RPC calls for verifying
2106 # split LV volumes; they should be locked.
2107 extra_lv_nodes = set()
2109 for inst in self.my_inst_info.values():
2110 if inst.disk_template in constants.DTS_INT_MIRROR:
2111 for nname in inst.all_nodes:
2112 if self.all_node_info[nname].group != self.group_uuid:
2113 extra_lv_nodes.add(nname)
2115 unlocked_lv_nodes = \
2116 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2118 if unlocked_lv_nodes:
2119 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2120 utils.CommaJoin(unlocked_lv_nodes),
2122 self.extra_lv_nodes = list(extra_lv_nodes)
2124 def _VerifyNode(self, ninfo, nresult):
2125 """Perform some basic validation on data returned from a node.
2127 - check the result data structure is well formed and has all the
2129 - check ganeti version
2131 @type ninfo: L{objects.Node}
2132 @param ninfo: the node to check
2133 @param nresult: the results from the node
2135 @return: whether overall this call was successful (and we can expect
2136 reasonable values in the respose)
2140 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2142 # main result, nresult should be a non-empty dict
2143 test = not nresult or not isinstance(nresult, dict)
2144 _ErrorIf(test, constants.CV_ENODERPC, node,
2145 "unable to verify node: no data returned")
2149 # compares ganeti version
2150 local_version = constants.PROTOCOL_VERSION
2151 remote_version = nresult.get("version", None)
2152 test = not (remote_version and
2153 isinstance(remote_version, (list, tuple)) and
2154 len(remote_version) == 2)
2155 _ErrorIf(test, constants.CV_ENODERPC, node,
2156 "connection to node returned invalid data")
2160 test = local_version != remote_version[0]
2161 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2162 "incompatible protocol versions: master %s,"
2163 " node %s", local_version, remote_version[0])
2167 # node seems compatible, we can actually try to look into its results
2169 # full package version
2170 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2171 constants.CV_ENODEVERSION, node,
2172 "software version mismatch: master %s, node %s",
2173 constants.RELEASE_VERSION, remote_version[1],
2174 code=self.ETYPE_WARNING)
2176 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2177 if ninfo.vm_capable and isinstance(hyp_result, dict):
2178 for hv_name, hv_result in hyp_result.iteritems():
2179 test = hv_result is not None
2180 _ErrorIf(test, constants.CV_ENODEHV, node,
2181 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2183 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2184 if ninfo.vm_capable and isinstance(hvp_result, list):
2185 for item, hv_name, hv_result in hvp_result:
2186 _ErrorIf(True, constants.CV_ENODEHV, node,
2187 "hypervisor %s parameter verify failure (source %s): %s",
2188 hv_name, item, hv_result)
2190 test = nresult.get(constants.NV_NODESETUP,
2191 ["Missing NODESETUP results"])
2192 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2197 def _VerifyNodeTime(self, ninfo, nresult,
2198 nvinfo_starttime, nvinfo_endtime):
2199 """Check the node time.
2201 @type ninfo: L{objects.Node}
2202 @param ninfo: the node to check
2203 @param nresult: the remote results for the node
2204 @param nvinfo_starttime: the start time of the RPC call
2205 @param nvinfo_endtime: the end time of the RPC call
2209 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2211 ntime = nresult.get(constants.NV_TIME, None)
2213 ntime_merged = utils.MergeTime(ntime)
2214 except (ValueError, TypeError):
2215 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2218 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2219 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2220 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2221 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2225 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2226 "Node time diverges by at least %s from master node time",
2229 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2230 """Check the node LVM results.
2232 @type ninfo: L{objects.Node}
2233 @param ninfo: the node to check
2234 @param nresult: the remote results for the node
2235 @param vg_name: the configured VG name
2242 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2244 # checks vg existence and size > 20G
2245 vglist = nresult.get(constants.NV_VGLIST, None)
2247 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2249 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2250 constants.MIN_VG_SIZE)
2251 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2254 pvlist = nresult.get(constants.NV_PVLIST, None)
2255 test = pvlist is None
2256 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2258 # check that ':' is not present in PV names, since it's a
2259 # special character for lvcreate (denotes the range of PEs to
2261 for _, pvname, owner_vg in pvlist:
2262 test = ":" in pvname
2263 _ErrorIf(test, constants.CV_ENODELVM, node,
2264 "Invalid character ':' in PV '%s' of VG '%s'",
2267 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2268 """Check the node bridges.
2270 @type ninfo: L{objects.Node}
2271 @param ninfo: the node to check
2272 @param nresult: the remote results for the node
2273 @param bridges: the expected list of bridges
2280 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2282 missing = nresult.get(constants.NV_BRIDGES, None)
2283 test = not isinstance(missing, list)
2284 _ErrorIf(test, constants.CV_ENODENET, node,
2285 "did not return valid bridge information")
2287 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2288 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2290 def _VerifyNodeUserScripts(self, ninfo, nresult):
2291 """Check the results of user scripts presence and executability on the node
2293 @type ninfo: L{objects.Node}
2294 @param ninfo: the node to check
2295 @param nresult: the remote results for the node
2300 test = not constants.NV_USERSCRIPTS in nresult
2301 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2302 "did not return user scripts information")
2304 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2306 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2307 "user scripts not present or not executable: %s" %
2308 utils.CommaJoin(sorted(broken_scripts)))
2310 def _VerifyNodeNetwork(self, ninfo, nresult):
2311 """Check the node network connectivity results.
2313 @type ninfo: L{objects.Node}
2314 @param ninfo: the node to check
2315 @param nresult: the remote results for the node
2319 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2321 test = constants.NV_NODELIST not in nresult
2322 _ErrorIf(test, constants.CV_ENODESSH, node,
2323 "node hasn't returned node ssh connectivity data")
2325 if nresult[constants.NV_NODELIST]:
2326 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2327 _ErrorIf(True, constants.CV_ENODESSH, node,
2328 "ssh communication with node '%s': %s", a_node, a_msg)
2330 test = constants.NV_NODENETTEST not in nresult
2331 _ErrorIf(test, constants.CV_ENODENET, node,
2332 "node hasn't returned node tcp connectivity data")
2334 if nresult[constants.NV_NODENETTEST]:
2335 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2337 _ErrorIf(True, constants.CV_ENODENET, node,
2338 "tcp communication with node '%s': %s",
2339 anode, nresult[constants.NV_NODENETTEST][anode])
2341 test = constants.NV_MASTERIP not in nresult
2342 _ErrorIf(test, constants.CV_ENODENET, node,
2343 "node hasn't returned node master IP reachability data")
2345 if not nresult[constants.NV_MASTERIP]:
2346 if node == self.master_node:
2347 msg = "the master node cannot reach the master IP (not configured?)"
2349 msg = "cannot reach the master IP"
2350 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2352 def _VerifyInstance(self, instance, instanceconfig, node_image,
2354 """Verify an instance.
2356 This function checks to see if the required block devices are
2357 available on the instance's node.
2360 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2361 node_current = instanceconfig.primary_node
2363 node_vol_should = {}
2364 instanceconfig.MapLVsByNode(node_vol_should)
2366 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2367 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2368 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, err)
2370 for node in node_vol_should:
2371 n_img = node_image[node]
2372 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2373 # ignore missing volumes on offline or broken nodes
2375 for volume in node_vol_should[node]:
2376 test = volume not in n_img.volumes
2377 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2378 "volume %s missing on node %s", volume, node)
2380 if instanceconfig.admin_state == constants.ADMINST_UP:
2381 pri_img = node_image[node_current]
2382 test = instance not in pri_img.instances and not pri_img.offline
2383 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2384 "instance not running on its primary node %s",
2387 diskdata = [(nname, success, status, idx)
2388 for (nname, disks) in diskstatus.items()
2389 for idx, (success, status) in enumerate(disks)]
2391 for nname, success, bdev_status, idx in diskdata:
2392 # the 'ghost node' construction in Exec() ensures that we have a
2394 snode = node_image[nname]
2395 bad_snode = snode.ghost or snode.offline
2396 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2397 not success and not bad_snode,
2398 constants.CV_EINSTANCEFAULTYDISK, instance,
2399 "couldn't retrieve status for disk/%s on %s: %s",
2400 idx, nname, bdev_status)
2401 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2402 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2403 constants.CV_EINSTANCEFAULTYDISK, instance,
2404 "disk/%s on %s is faulty", idx, nname)
2406 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2407 """Verify if there are any unknown volumes in the cluster.
2409 The .os, .swap and backup volumes are ignored. All other volumes are
2410 reported as unknown.
2412 @type reserved: L{ganeti.utils.FieldSet}
2413 @param reserved: a FieldSet of reserved volume names
2416 for node, n_img in node_image.items():
2417 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2418 self.all_node_info[node].group != self.group_uuid):
2419 # skip non-healthy nodes
2421 for volume in n_img.volumes:
2422 test = ((node not in node_vol_should or
2423 volume not in node_vol_should[node]) and
2424 not reserved.Matches(volume))
2425 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2426 "volume %s is unknown", volume)
2428 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2429 """Verify N+1 Memory Resilience.
2431 Check that if one single node dies we can still start all the
2432 instances it was primary for.
2435 cluster_info = self.cfg.GetClusterInfo()
2436 for node, n_img in node_image.items():
2437 # This code checks that every node which is now listed as
2438 # secondary has enough memory to host all instances it is
2439 # supposed to should a single other node in the cluster fail.
2440 # FIXME: not ready for failover to an arbitrary node
2441 # FIXME: does not support file-backed instances
2442 # WARNING: we currently take into account down instances as well
2443 # as up ones, considering that even if they're down someone
2444 # might want to start them even in the event of a node failure.
2445 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2446 # we're skipping nodes marked offline and nodes in other groups from
2447 # the N+1 warning, since most likely we don't have good memory
2448 # infromation from them; we already list instances living on such
2449 # nodes, and that's enough warning
2451 #TODO(dynmem): also consider ballooning out other instances
2452 for prinode, instances in n_img.sbp.items():
2454 for instance in instances:
2455 bep = cluster_info.FillBE(instance_cfg[instance])
2456 if bep[constants.BE_AUTO_BALANCE]:
2457 needed_mem += bep[constants.BE_MINMEM]
2458 test = n_img.mfree < needed_mem
2459 self._ErrorIf(test, constants.CV_ENODEN1, node,
2460 "not enough memory to accomodate instance failovers"
2461 " should node %s fail (%dMiB needed, %dMiB available)",
2462 prinode, needed_mem, n_img.mfree)
2465 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2466 (files_all, files_opt, files_mc, files_vm)):
2467 """Verifies file checksums collected from all nodes.
2469 @param errorif: Callback for reporting errors
2470 @param nodeinfo: List of L{objects.Node} objects
2471 @param master_node: Name of master node
2472 @param all_nvinfo: RPC results
2475 # Define functions determining which nodes to consider for a file
2478 (files_mc, lambda node: (node.master_candidate or
2479 node.name == master_node)),
2480 (files_vm, lambda node: node.vm_capable),
2483 # Build mapping from filename to list of nodes which should have the file
2485 for (files, fn) in files2nodefn:
2487 filenodes = nodeinfo
2489 filenodes = filter(fn, nodeinfo)
2490 nodefiles.update((filename,
2491 frozenset(map(operator.attrgetter("name"), filenodes)))
2492 for filename in files)
2494 assert set(nodefiles) == (files_all | files_mc | files_vm)
2496 fileinfo = dict((filename, {}) for filename in nodefiles)
2497 ignore_nodes = set()
2499 for node in nodeinfo:
2501 ignore_nodes.add(node.name)
2504 nresult = all_nvinfo[node.name]
2506 if nresult.fail_msg or not nresult.payload:
2509 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2511 test = not (node_files and isinstance(node_files, dict))
2512 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2513 "Node did not return file checksum data")
2515 ignore_nodes.add(node.name)
2518 # Build per-checksum mapping from filename to nodes having it
2519 for (filename, checksum) in node_files.items():
2520 assert filename in nodefiles
2521 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2523 for (filename, checksums) in fileinfo.items():
2524 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2526 # Nodes having the file
2527 with_file = frozenset(node_name
2528 for nodes in fileinfo[filename].values()
2529 for node_name in nodes) - ignore_nodes
2531 expected_nodes = nodefiles[filename] - ignore_nodes
2533 # Nodes missing file
2534 missing_file = expected_nodes - with_file
2536 if filename in files_opt:
2538 errorif(missing_file and missing_file != expected_nodes,
2539 constants.CV_ECLUSTERFILECHECK, None,
2540 "File %s is optional, but it must exist on all or no"
2541 " nodes (not found on %s)",
2542 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2544 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2545 "File %s is missing from node(s) %s", filename,
2546 utils.CommaJoin(utils.NiceSort(missing_file)))
2548 # Warn if a node has a file it shouldn't
2549 unexpected = with_file - expected_nodes
2551 constants.CV_ECLUSTERFILECHECK, None,
2552 "File %s should not exist on node(s) %s",
2553 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2555 # See if there are multiple versions of the file
2556 test = len(checksums) > 1
2558 variants = ["variant %s on %s" %
2559 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2560 for (idx, (checksum, nodes)) in
2561 enumerate(sorted(checksums.items()))]
2565 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2566 "File %s found with %s different checksums (%s)",
2567 filename, len(checksums), "; ".join(variants))
2569 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2571 """Verifies and the node DRBD status.
2573 @type ninfo: L{objects.Node}
2574 @param ninfo: the node to check
2575 @param nresult: the remote results for the node
2576 @param instanceinfo: the dict of instances
2577 @param drbd_helper: the configured DRBD usermode helper
2578 @param drbd_map: the DRBD map as returned by
2579 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2583 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2586 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2587 test = (helper_result == None)
2588 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2589 "no drbd usermode helper returned")
2591 status, payload = helper_result
2593 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2594 "drbd usermode helper check unsuccessful: %s", payload)
2595 test = status and (payload != drbd_helper)
2596 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2597 "wrong drbd usermode helper: %s", payload)
2599 # compute the DRBD minors
2601 for minor, instance in drbd_map[node].items():
2602 test = instance not in instanceinfo
2603 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2604 "ghost instance '%s' in temporary DRBD map", instance)
2605 # ghost instance should not be running, but otherwise we
2606 # don't give double warnings (both ghost instance and
2607 # unallocated minor in use)
2609 node_drbd[minor] = (instance, False)
2611 instance = instanceinfo[instance]
2612 node_drbd[minor] = (instance.name,
2613 instance.admin_state == constants.ADMINST_UP)
2615 # and now check them
2616 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2617 test = not isinstance(used_minors, (tuple, list))
2618 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2619 "cannot parse drbd status file: %s", str(used_minors))
2621 # we cannot check drbd status
2624 for minor, (iname, must_exist) in node_drbd.items():
2625 test = minor not in used_minors and must_exist
2626 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2627 "drbd minor %d of instance %s is not active", minor, iname)
2628 for minor in used_minors:
2629 test = minor not in node_drbd
2630 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2631 "unallocated drbd minor %d is in use", minor)
2633 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2634 """Builds the node OS structures.
2636 @type ninfo: L{objects.Node}
2637 @param ninfo: the node to check
2638 @param nresult: the remote results for the node
2639 @param nimg: the node image object
2643 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2645 remote_os = nresult.get(constants.NV_OSLIST, None)
2646 test = (not isinstance(remote_os, list) or
2647 not compat.all(isinstance(v, list) and len(v) == 7
2648 for v in remote_os))
2650 _ErrorIf(test, constants.CV_ENODEOS, node,
2651 "node hasn't returned valid OS data")
2660 for (name, os_path, status, diagnose,
2661 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2663 if name not in os_dict:
2666 # parameters is a list of lists instead of list of tuples due to
2667 # JSON lacking a real tuple type, fix it:
2668 parameters = [tuple(v) for v in parameters]
2669 os_dict[name].append((os_path, status, diagnose,
2670 set(variants), set(parameters), set(api_ver)))
2672 nimg.oslist = os_dict
2674 def _VerifyNodeOS(self, ninfo, nimg, base):
2675 """Verifies the node OS list.
2677 @type ninfo: L{objects.Node}
2678 @param ninfo: the node to check
2679 @param nimg: the node image object
2680 @param base: the 'template' node we match against (e.g. from the master)
2684 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2686 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2688 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2689 for os_name, os_data in nimg.oslist.items():
2690 assert os_data, "Empty OS status for OS %s?!" % os_name
2691 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2692 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2693 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2694 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2695 "OS '%s' has multiple entries (first one shadows the rest): %s",
2696 os_name, utils.CommaJoin([v[0] for v in os_data]))
2697 # comparisons with the 'base' image
2698 test = os_name not in base.oslist
2699 _ErrorIf(test, constants.CV_ENODEOS, node,
2700 "Extra OS %s not present on reference node (%s)",
2704 assert base.oslist[os_name], "Base node has empty OS status?"
2705 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2707 # base OS is invalid, skipping
2709 for kind, a, b in [("API version", f_api, b_api),
2710 ("variants list", f_var, b_var),
2711 ("parameters", beautify_params(f_param),
2712 beautify_params(b_param))]:
2713 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2714 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2715 kind, os_name, base.name,
2716 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2718 # check any missing OSes
2719 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2720 _ErrorIf(missing, constants.CV_ENODEOS, node,
2721 "OSes present on reference node %s but missing on this node: %s",
2722 base.name, utils.CommaJoin(missing))
2724 def _VerifyOob(self, ninfo, nresult):
2725 """Verifies out of band functionality of a node.
2727 @type ninfo: L{objects.Node}
2728 @param ninfo: the node to check
2729 @param nresult: the remote results for the node
2733 # We just have to verify the paths on master and/or master candidates
2734 # as the oob helper is invoked on the master
2735 if ((ninfo.master_candidate or ninfo.master_capable) and
2736 constants.NV_OOB_PATHS in nresult):
2737 for path_result in nresult[constants.NV_OOB_PATHS]:
2738 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2740 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2741 """Verifies and updates the node volume data.
2743 This function will update a L{NodeImage}'s internal structures
2744 with data from the remote call.
2746 @type ninfo: L{objects.Node}
2747 @param ninfo: the node to check
2748 @param nresult: the remote results for the node
2749 @param nimg: the node image object
2750 @param vg_name: the configured VG name
2754 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2756 nimg.lvm_fail = True
2757 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2760 elif isinstance(lvdata, basestring):
2761 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2762 utils.SafeEncode(lvdata))
2763 elif not isinstance(lvdata, dict):
2764 _ErrorIf(True, constants.CV_ENODELVM, node,
2765 "rpc call to node failed (lvlist)")
2767 nimg.volumes = lvdata
2768 nimg.lvm_fail = False
2770 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2771 """Verifies and updates the node instance list.
2773 If the listing was successful, then updates this node's instance
2774 list. Otherwise, it marks the RPC call as failed for the instance
2777 @type ninfo: L{objects.Node}
2778 @param ninfo: the node to check
2779 @param nresult: the remote results for the node
2780 @param nimg: the node image object
2783 idata = nresult.get(constants.NV_INSTANCELIST, None)
2784 test = not isinstance(idata, list)
2785 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2786 "rpc call to node failed (instancelist): %s",
2787 utils.SafeEncode(str(idata)))
2789 nimg.hyp_fail = True
2791 nimg.instances = idata
2793 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2794 """Verifies and computes a node information map
2796 @type ninfo: L{objects.Node}
2797 @param ninfo: the node to check
2798 @param nresult: the remote results for the node
2799 @param nimg: the node image object
2800 @param vg_name: the configured VG name
2804 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2806 # try to read free memory (from the hypervisor)
2807 hv_info = nresult.get(constants.NV_HVINFO, None)
2808 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2809 _ErrorIf(test, constants.CV_ENODEHV, node,
2810 "rpc call to node failed (hvinfo)")
2813 nimg.mfree = int(hv_info["memory_free"])
2814 except (ValueError, TypeError):
2815 _ErrorIf(True, constants.CV_ENODERPC, node,
2816 "node returned invalid nodeinfo, check hypervisor")
2818 # FIXME: devise a free space model for file based instances as well
2819 if vg_name is not None:
2820 test = (constants.NV_VGLIST not in nresult or
2821 vg_name not in nresult[constants.NV_VGLIST])
2822 _ErrorIf(test, constants.CV_ENODELVM, node,
2823 "node didn't return data for the volume group '%s'"
2824 " - it is either missing or broken", vg_name)
2827 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2828 except (ValueError, TypeError):
2829 _ErrorIf(True, constants.CV_ENODERPC, node,
2830 "node returned invalid LVM info, check LVM status")
2832 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2833 """Gets per-disk status information for all instances.
2835 @type nodelist: list of strings
2836 @param nodelist: Node names
2837 @type node_image: dict of (name, L{objects.Node})
2838 @param node_image: Node objects
2839 @type instanceinfo: dict of (name, L{objects.Instance})
2840 @param instanceinfo: Instance objects
2841 @rtype: {instance: {node: [(succes, payload)]}}
2842 @return: a dictionary of per-instance dictionaries with nodes as
2843 keys and disk information as values; the disk information is a
2844 list of tuples (success, payload)
2847 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2850 node_disks_devonly = {}
2851 diskless_instances = set()
2852 diskless = constants.DT_DISKLESS
2854 for nname in nodelist:
2855 node_instances = list(itertools.chain(node_image[nname].pinst,
2856 node_image[nname].sinst))
2857 diskless_instances.update(inst for inst in node_instances
2858 if instanceinfo[inst].disk_template == diskless)
2859 disks = [(inst, disk)
2860 for inst in node_instances
2861 for disk in instanceinfo[inst].disks]
2864 # No need to collect data
2867 node_disks[nname] = disks
2869 # Creating copies as SetDiskID below will modify the objects and that can
2870 # lead to incorrect data returned from nodes
2871 devonly = [dev.Copy() for (_, dev) in disks]
2874 self.cfg.SetDiskID(dev, nname)
2876 node_disks_devonly[nname] = devonly
2878 assert len(node_disks) == len(node_disks_devonly)
2880 # Collect data from all nodes with disks
2881 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2884 assert len(result) == len(node_disks)
2888 for (nname, nres) in result.items():
2889 disks = node_disks[nname]
2892 # No data from this node
2893 data = len(disks) * [(False, "node offline")]
2896 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2897 "while getting disk information: %s", msg)
2899 # No data from this node
2900 data = len(disks) * [(False, msg)]
2903 for idx, i in enumerate(nres.payload):
2904 if isinstance(i, (tuple, list)) and len(i) == 2:
2907 logging.warning("Invalid result from node %s, entry %d: %s",
2909 data.append((False, "Invalid result from the remote node"))
2911 for ((inst, _), status) in zip(disks, data):
2912 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2914 # Add empty entries for diskless instances.
2915 for inst in diskless_instances:
2916 assert inst not in instdisk
2919 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2920 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2921 compat.all(isinstance(s, (tuple, list)) and
2922 len(s) == 2 for s in statuses)
2923 for inst, nnames in instdisk.items()
2924 for nname, statuses in nnames.items())
2925 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2930 def _SshNodeSelector(group_uuid, all_nodes):
2931 """Create endless iterators for all potential SSH check hosts.
2934 nodes = [node for node in all_nodes
2935 if (node.group != group_uuid and
2937 keyfunc = operator.attrgetter("group")
2939 return map(itertools.cycle,
2940 [sorted(map(operator.attrgetter("name"), names))
2941 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2945 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2946 """Choose which nodes should talk to which other nodes.
2948 We will make nodes contact all nodes in their group, and one node from
2951 @warning: This algorithm has a known issue if one node group is much
2952 smaller than others (e.g. just one node). In such a case all other
2953 nodes will talk to the single node.
2956 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2957 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2959 return (online_nodes,
2960 dict((name, sorted([i.next() for i in sel]))
2961 for name in online_nodes))
2963 def BuildHooksEnv(self):
2966 Cluster-Verify hooks just ran in the post phase and their failure makes
2967 the output be logged in the verify output and the verification to fail.
2971 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2974 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2975 for node in self.my_node_info.values())
2979 def BuildHooksNodes(self):
2980 """Build hooks nodes.
2983 return ([], self.my_node_names)
2985 def Exec(self, feedback_fn):
2986 """Verify integrity of the node group, performing various test on nodes.
2989 # This method has too many local variables. pylint: disable=R0914
2990 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2992 if not self.my_node_names:
2994 feedback_fn("* Empty node group, skipping verification")
2998 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2999 verbose = self.op.verbose
3000 self._feedback_fn = feedback_fn
3002 vg_name = self.cfg.GetVGName()
3003 drbd_helper = self.cfg.GetDRBDHelper()
3004 cluster = self.cfg.GetClusterInfo()
3005 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3006 hypervisors = cluster.enabled_hypervisors
3007 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3009 i_non_redundant = [] # Non redundant instances
3010 i_non_a_balanced = [] # Non auto-balanced instances
3011 i_offline = 0 # Count of offline instances
3012 n_offline = 0 # Count of offline nodes
3013 n_drained = 0 # Count of nodes being drained
3014 node_vol_should = {}
3016 # FIXME: verify OS list
3019 filemap = _ComputeAncillaryFiles(cluster, False)
3021 # do local checksums
3022 master_node = self.master_node = self.cfg.GetMasterNode()
3023 master_ip = self.cfg.GetMasterIP()
3025 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3028 if self.cfg.GetUseExternalMipScript():
3029 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3031 node_verify_param = {
3032 constants.NV_FILELIST:
3033 utils.UniqueSequence(filename
3034 for files in filemap
3035 for filename in files),
3036 constants.NV_NODELIST:
3037 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3038 self.all_node_info.values()),
3039 constants.NV_HYPERVISOR: hypervisors,
3040 constants.NV_HVPARAMS:
3041 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3042 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3043 for node in node_data_list
3044 if not node.offline],
3045 constants.NV_INSTANCELIST: hypervisors,
3046 constants.NV_VERSION: None,
3047 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3048 constants.NV_NODESETUP: None,
3049 constants.NV_TIME: None,
3050 constants.NV_MASTERIP: (master_node, master_ip),
3051 constants.NV_OSLIST: None,
3052 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3053 constants.NV_USERSCRIPTS: user_scripts,
3056 if vg_name is not None:
3057 node_verify_param[constants.NV_VGLIST] = None
3058 node_verify_param[constants.NV_LVLIST] = vg_name
3059 node_verify_param[constants.NV_PVLIST] = [vg_name]
3060 node_verify_param[constants.NV_DRBDLIST] = None
3063 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3066 # FIXME: this needs to be changed per node-group, not cluster-wide
3068 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3069 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3070 bridges.add(default_nicpp[constants.NIC_LINK])
3071 for instance in self.my_inst_info.values():
3072 for nic in instance.nics:
3073 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3074 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3075 bridges.add(full_nic[constants.NIC_LINK])
3078 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3080 # Build our expected cluster state
3081 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3083 vm_capable=node.vm_capable))
3084 for node in node_data_list)
3088 for node in self.all_node_info.values():
3089 path = _SupportsOob(self.cfg, node)
3090 if path and path not in oob_paths:
3091 oob_paths.append(path)
3094 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3096 for instance in self.my_inst_names:
3097 inst_config = self.my_inst_info[instance]
3099 for nname in inst_config.all_nodes:
3100 if nname not in node_image:
3101 gnode = self.NodeImage(name=nname)
3102 gnode.ghost = (nname not in self.all_node_info)
3103 node_image[nname] = gnode
3105 inst_config.MapLVsByNode(node_vol_should)
3107 pnode = inst_config.primary_node
3108 node_image[pnode].pinst.append(instance)
3110 for snode in inst_config.secondary_nodes:
3111 nimg = node_image[snode]
3112 nimg.sinst.append(instance)
3113 if pnode not in nimg.sbp:
3114 nimg.sbp[pnode] = []
3115 nimg.sbp[pnode].append(instance)
3117 # At this point, we have the in-memory data structures complete,
3118 # except for the runtime information, which we'll gather next
3120 # Due to the way our RPC system works, exact response times cannot be
3121 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3122 # time before and after executing the request, we can at least have a time
3124 nvinfo_starttime = time.time()
3125 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3127 self.cfg.GetClusterName())
3128 nvinfo_endtime = time.time()
3130 if self.extra_lv_nodes and vg_name is not None:
3132 self.rpc.call_node_verify(self.extra_lv_nodes,
3133 {constants.NV_LVLIST: vg_name},
3134 self.cfg.GetClusterName())
3136 extra_lv_nvinfo = {}
3138 all_drbd_map = self.cfg.ComputeDRBDMap()
3140 feedback_fn("* Gathering disk information (%s nodes)" %
3141 len(self.my_node_names))
3142 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3145 feedback_fn("* Verifying configuration file consistency")
3147 # If not all nodes are being checked, we need to make sure the master node
3148 # and a non-checked vm_capable node are in the list.
3149 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3151 vf_nvinfo = all_nvinfo.copy()
3152 vf_node_info = list(self.my_node_info.values())
3153 additional_nodes = []
3154 if master_node not in self.my_node_info:
3155 additional_nodes.append(master_node)
3156 vf_node_info.append(self.all_node_info[master_node])
3157 # Add the first vm_capable node we find which is not included
3158 for node in absent_nodes:
3159 nodeinfo = self.all_node_info[node]
3160 if nodeinfo.vm_capable and not nodeinfo.offline:
3161 additional_nodes.append(node)
3162 vf_node_info.append(self.all_node_info[node])
3164 key = constants.NV_FILELIST
3165 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3166 {key: node_verify_param[key]},
3167 self.cfg.GetClusterName()))
3169 vf_nvinfo = all_nvinfo
3170 vf_node_info = self.my_node_info.values()
3172 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3174 feedback_fn("* Verifying node status")
3178 for node_i in node_data_list:
3180 nimg = node_image[node]
3184 feedback_fn("* Skipping offline node %s" % (node,))
3188 if node == master_node:
3190 elif node_i.master_candidate:
3191 ntype = "master candidate"
3192 elif node_i.drained:
3198 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3200 msg = all_nvinfo[node].fail_msg
3201 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3204 nimg.rpc_fail = True
3207 nresult = all_nvinfo[node].payload
3209 nimg.call_ok = self._VerifyNode(node_i, nresult)
3210 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3211 self._VerifyNodeNetwork(node_i, nresult)
3212 self._VerifyNodeUserScripts(node_i, nresult)
3213 self._VerifyOob(node_i, nresult)
3216 self._VerifyNodeLVM(node_i, nresult, vg_name)
3217 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3220 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3221 self._UpdateNodeInstances(node_i, nresult, nimg)
3222 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3223 self._UpdateNodeOS(node_i, nresult, nimg)
3225 if not nimg.os_fail:
3226 if refos_img is None:
3228 self._VerifyNodeOS(node_i, nimg, refos_img)
3229 self._VerifyNodeBridges(node_i, nresult, bridges)
3231 # Check whether all running instancies are primary for the node. (This
3232 # can no longer be done from _VerifyInstance below, since some of the
3233 # wrong instances could be from other node groups.)
3234 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3236 for inst in non_primary_inst:
3237 # FIXME: investigate best way to handle offline insts
3238 if inst.admin_state == constants.ADMINST_OFFLINE:
3240 feedback_fn("* Skipping offline instance %s" % inst.name)
3243 test = inst in self.all_inst_info
3244 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3245 "instance should not run on node %s", node_i.name)
3246 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3247 "node is running unknown instance %s", inst)
3249 for node, result in extra_lv_nvinfo.items():
3250 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3251 node_image[node], vg_name)
3253 feedback_fn("* Verifying instance status")
3254 for instance in self.my_inst_names:
3256 feedback_fn("* Verifying instance %s" % instance)
3257 inst_config = self.my_inst_info[instance]
3258 self._VerifyInstance(instance, inst_config, node_image,
3260 inst_nodes_offline = []
3262 pnode = inst_config.primary_node
3263 pnode_img = node_image[pnode]
3264 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3265 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3266 " primary node failed", instance)
3268 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3270 constants.CV_EINSTANCEBADNODE, instance,
3271 "instance is marked as running and lives on offline node %s",
3272 inst_config.primary_node)
3274 # If the instance is non-redundant we cannot survive losing its primary
3275 # node, so we are not N+1 compliant. On the other hand we have no disk
3276 # templates with more than one secondary so that situation is not well
3278 # FIXME: does not support file-backed instances
3279 if not inst_config.secondary_nodes:
3280 i_non_redundant.append(instance)
3282 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3283 constants.CV_EINSTANCELAYOUT,
3284 instance, "instance has multiple secondary nodes: %s",
3285 utils.CommaJoin(inst_config.secondary_nodes),
3286 code=self.ETYPE_WARNING)
3288 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3289 pnode = inst_config.primary_node
3290 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3291 instance_groups = {}
3293 for node in instance_nodes:
3294 instance_groups.setdefault(self.all_node_info[node].group,
3298 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3299 # Sort so that we always list the primary node first.
3300 for group, nodes in sorted(instance_groups.items(),
3301 key=lambda (_, nodes): pnode in nodes,
3304 self._ErrorIf(len(instance_groups) > 1,
3305 constants.CV_EINSTANCESPLITGROUPS,
3306 instance, "instance has primary and secondary nodes in"
3307 " different groups: %s", utils.CommaJoin(pretty_list),
3308 code=self.ETYPE_WARNING)
3310 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3311 i_non_a_balanced.append(instance)
3313 for snode in inst_config.secondary_nodes:
3314 s_img = node_image[snode]
3315 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3316 snode, "instance %s, connection to secondary node failed",
3320 inst_nodes_offline.append(snode)
3322 # warn that the instance lives on offline nodes
3323 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3324 "instance has offline secondary node(s) %s",
3325 utils.CommaJoin(inst_nodes_offline))
3326 # ... or ghost/non-vm_capable nodes
3327 for node in inst_config.all_nodes:
3328 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3329 instance, "instance lives on ghost node %s", node)
3330 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3331 instance, "instance lives on non-vm_capable node %s", node)
3333 feedback_fn("* Verifying orphan volumes")
3334 reserved = utils.FieldSet(*cluster.reserved_lvs)
3336 # We will get spurious "unknown volume" warnings if any node of this group
3337 # is secondary for an instance whose primary is in another group. To avoid
3338 # them, we find these instances and add their volumes to node_vol_should.
3339 for inst in self.all_inst_info.values():
3340 for secondary in inst.secondary_nodes:
3341 if (secondary in self.my_node_info
3342 and inst.name not in self.my_inst_info):
3343 inst.MapLVsByNode(node_vol_should)
3346 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3348 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3349 feedback_fn("* Verifying N+1 Memory redundancy")
3350 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3352 feedback_fn("* Other Notes")
3354 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3355 % len(i_non_redundant))
3357 if i_non_a_balanced:
3358 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3359 % len(i_non_a_balanced))
3362 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3365 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3368 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3372 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3373 """Analyze the post-hooks' result
3375 This method analyses the hook result, handles it, and sends some
3376 nicely-formatted feedback back to the user.
3378 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3379 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3380 @param hooks_results: the results of the multi-node hooks rpc call
3381 @param feedback_fn: function used send feedback back to the caller
3382 @param lu_result: previous Exec result
3383 @return: the new Exec result, based on the previous result
3387 # We only really run POST phase hooks, only for non-empty groups,
3388 # and are only interested in their results
3389 if not self.my_node_names:
3392 elif phase == constants.HOOKS_PHASE_POST:
3393 # Used to change hooks' output to proper indentation
3394 feedback_fn("* Hooks Results")
3395 assert hooks_results, "invalid result from hooks"
3397 for node_name in hooks_results:
3398 res = hooks_results[node_name]
3400 test = msg and not res.offline
3401 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3402 "Communication failure in hooks execution: %s", msg)
3403 if res.offline or msg:
3404 # No need to investigate payload if node is offline or gave
3407 for script, hkr, output in res.payload:
3408 test = hkr == constants.HKR_FAIL
3409 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3410 "Script %s failed, output:", script)
3412 output = self._HOOKS_INDENT_RE.sub(" ", output)
3413 feedback_fn("%s" % output)
3419 class LUClusterVerifyDisks(NoHooksLU):
3420 """Verifies the cluster disks status.
3425 def ExpandNames(self):
3426 self.share_locks = _ShareAll()
3427 self.needed_locks = {
3428 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3431 def Exec(self, feedback_fn):
3432 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3434 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3435 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3436 for group in group_names])
3439 class LUGroupVerifyDisks(NoHooksLU):
3440 """Verifies the status of all disks in a node group.
3445 def ExpandNames(self):
3446 # Raises errors.OpPrereqError on its own if group can't be found
3447 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3449 self.share_locks = _ShareAll()
3450 self.needed_locks = {
3451 locking.LEVEL_INSTANCE: [],
3452 locking.LEVEL_NODEGROUP: [],
3453 locking.LEVEL_NODE: [],
3456 def DeclareLocks(self, level):
3457 if level == locking.LEVEL_INSTANCE:
3458 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3460 # Lock instances optimistically, needs verification once node and group
3461 # locks have been acquired
3462 self.needed_locks[locking.LEVEL_INSTANCE] = \
3463 self.cfg.GetNodeGroupInstances(self.group_uuid)
3465 elif level == locking.LEVEL_NODEGROUP:
3466 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3468 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3469 set([self.group_uuid] +
3470 # Lock all groups used by instances optimistically; this requires
3471 # going via the node before it's locked, requiring verification
3474 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3475 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3477 elif level == locking.LEVEL_NODE:
3478 # This will only lock the nodes in the group to be verified which contain
3480 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3481 self._LockInstancesNodes()
3483 # Lock all nodes in group to be verified
3484 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3485 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3486 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3488 def CheckPrereq(self):
3489 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3490 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3491 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3493 assert self.group_uuid in owned_groups
3495 # Check if locked instances are still correct
3496 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3498 # Get instance information
3499 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3501 # Check if node groups for locked instances are still correct
3502 for (instance_name, inst) in self.instances.items():
3503 assert owned_nodes.issuperset(inst.all_nodes), \
3504 "Instance %s's nodes changed while we kept the lock" % instance_name
3506 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3509 assert self.group_uuid in inst_groups, \
3510 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3512 def Exec(self, feedback_fn):
3513 """Verify integrity of cluster disks.
3515 @rtype: tuple of three items
3516 @return: a tuple of (dict of node-to-node_error, list of instances
3517 which need activate-disks, dict of instance: (node, volume) for
3522 res_instances = set()
3525 nv_dict = _MapInstanceDisksToNodes([inst
3526 for inst in self.instances.values()
3527 if inst.admin_state == constants.ADMINST_UP])
3530 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3531 set(self.cfg.GetVmCapableNodeList()))
3533 node_lvs = self.rpc.call_lv_list(nodes, [])
3535 for (node, node_res) in node_lvs.items():
3536 if node_res.offline:
3539 msg = node_res.fail_msg
3541 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3542 res_nodes[node] = msg
3545 for lv_name, (_, _, lv_online) in node_res.payload.items():
3546 inst = nv_dict.pop((node, lv_name), None)
3547 if not (lv_online or inst is None):
3548 res_instances.add(inst)
3550 # any leftover items in nv_dict are missing LVs, let's arrange the data
3552 for key, inst in nv_dict.iteritems():
3553 res_missing.setdefault(inst, []).append(list(key))
3555 return (res_nodes, list(res_instances), res_missing)
3558 class LUClusterRepairDiskSizes(NoHooksLU):
3559 """Verifies the cluster disks sizes.
3564 def ExpandNames(self):
3565 if self.op.instances:
3566 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3567 self.needed_locks = {
3568 locking.LEVEL_NODE_RES: [],
3569 locking.LEVEL_INSTANCE: self.wanted_names,
3571 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3573 self.wanted_names = None
3574 self.needed_locks = {
3575 locking.LEVEL_NODE_RES: locking.ALL_SET,
3576 locking.LEVEL_INSTANCE: locking.ALL_SET,
3578 self.share_locks = {
3579 locking.LEVEL_NODE_RES: 1,
3580 locking.LEVEL_INSTANCE: 0,
3583 def DeclareLocks(self, level):
3584 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3585 self._LockInstancesNodes(primary_only=True, level=level)
3587 def CheckPrereq(self):
3588 """Check prerequisites.
3590 This only checks the optional instance list against the existing names.
3593 if self.wanted_names is None:
3594 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3596 self.wanted_instances = \
3597 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3599 def _EnsureChildSizes(self, disk):
3600 """Ensure children of the disk have the needed disk size.
3602 This is valid mainly for DRBD8 and fixes an issue where the
3603 children have smaller disk size.
3605 @param disk: an L{ganeti.objects.Disk} object
3608 if disk.dev_type == constants.LD_DRBD8:
3609 assert disk.children, "Empty children for DRBD8?"
3610 fchild = disk.children[0]
3611 mismatch = fchild.size < disk.size
3613 self.LogInfo("Child disk has size %d, parent %d, fixing",
3614 fchild.size, disk.size)
3615 fchild.size = disk.size
3617 # and we recurse on this child only, not on the metadev
3618 return self._EnsureChildSizes(fchild) or mismatch
3622 def Exec(self, feedback_fn):
3623 """Verify the size of cluster disks.
3626 # TODO: check child disks too
3627 # TODO: check differences in size between primary/secondary nodes
3629 for instance in self.wanted_instances:
3630 pnode = instance.primary_node
3631 if pnode not in per_node_disks:
3632 per_node_disks[pnode] = []
3633 for idx, disk in enumerate(instance.disks):
3634 per_node_disks[pnode].append((instance, idx, disk))
3636 assert not (frozenset(per_node_disks.keys()) -
3637 self.owned_locks(locking.LEVEL_NODE_RES)), \
3638 "Not owning correct locks"
3639 assert not self.owned_locks(locking.LEVEL_NODE)
3642 for node, dskl in per_node_disks.items():
3643 newl = [v[2].Copy() for v in dskl]
3645 self.cfg.SetDiskID(dsk, node)
3646 result = self.rpc.call_blockdev_getsize(node, newl)
3648 self.LogWarning("Failure in blockdev_getsize call to node"
3649 " %s, ignoring", node)
3651 if len(result.payload) != len(dskl):
3652 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3653 " result.payload=%s", node, len(dskl), result.payload)
3654 self.LogWarning("Invalid result from node %s, ignoring node results",
3657 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3659 self.LogWarning("Disk %d of instance %s did not return size"
3660 " information, ignoring", idx, instance.name)
3662 if not isinstance(size, (int, long)):
3663 self.LogWarning("Disk %d of instance %s did not return valid"
3664 " size information, ignoring", idx, instance.name)
3667 if size != disk.size:
3668 self.LogInfo("Disk %d of instance %s has mismatched size,"
3669 " correcting: recorded %d, actual %d", idx,
3670 instance.name, disk.size, size)
3672 self.cfg.Update(instance, feedback_fn)
3673 changed.append((instance.name, idx, size))
3674 if self._EnsureChildSizes(disk):
3675 self.cfg.Update(instance, feedback_fn)
3676 changed.append((instance.name, idx, disk.size))
3680 class LUClusterRename(LogicalUnit):
3681 """Rename the cluster.
3684 HPATH = "cluster-rename"
3685 HTYPE = constants.HTYPE_CLUSTER
3687 def BuildHooksEnv(self):
3692 "OP_TARGET": self.cfg.GetClusterName(),
3693 "NEW_NAME": self.op.name,
3696 def BuildHooksNodes(self):
3697 """Build hooks nodes.
3700 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3702 def CheckPrereq(self):
3703 """Verify that the passed name is a valid one.
3706 hostname = netutils.GetHostname(name=self.op.name,
3707 family=self.cfg.GetPrimaryIPFamily())
3709 new_name = hostname.name
3710 self.ip = new_ip = hostname.ip
3711 old_name = self.cfg.GetClusterName()
3712 old_ip = self.cfg.GetMasterIP()
3713 if new_name == old_name and new_ip == old_ip:
3714 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3715 " cluster has changed",
3717 if new_ip != old_ip:
3718 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3719 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3720 " reachable on the network" %
3721 new_ip, errors.ECODE_NOTUNIQUE)
3723 self.op.name = new_name
3725 def Exec(self, feedback_fn):
3726 """Rename the cluster.
3729 clustername = self.op.name
3732 # shutdown the master IP
3733 master_params = self.cfg.GetMasterNetworkParameters()
3734 ems = self.cfg.GetUseExternalMipScript()
3735 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3737 result.Raise("Could not disable the master role")
3740 cluster = self.cfg.GetClusterInfo()
3741 cluster.cluster_name = clustername
3742 cluster.master_ip = new_ip
3743 self.cfg.Update(cluster, feedback_fn)
3745 # update the known hosts file
3746 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3747 node_list = self.cfg.GetOnlineNodeList()
3749 node_list.remove(master_params.name)
3752 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3754 master_params.ip = new_ip
3755 result = self.rpc.call_node_activate_master_ip(master_params.name,
3757 msg = result.fail_msg
3759 self.LogWarning("Could not re-enable the master role on"
3760 " the master, please restart manually: %s", msg)
3765 def _ValidateNetmask(cfg, netmask):
3766 """Checks if a netmask is valid.
3768 @type cfg: L{config.ConfigWriter}
3769 @param cfg: The cluster configuration
3771 @param netmask: the netmask to be verified
3772 @raise errors.OpPrereqError: if the validation fails
3775 ip_family = cfg.GetPrimaryIPFamily()
3777 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3778 except errors.ProgrammerError:
3779 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3781 if not ipcls.ValidateNetmask(netmask):
3782 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3786 class LUClusterSetParams(LogicalUnit):
3787 """Change the parameters of the cluster.
3790 HPATH = "cluster-modify"
3791 HTYPE = constants.HTYPE_CLUSTER
3794 def CheckArguments(self):
3798 if self.op.uid_pool:
3799 uidpool.CheckUidPool(self.op.uid_pool)
3801 if self.op.add_uids:
3802 uidpool.CheckUidPool(self.op.add_uids)
3804 if self.op.remove_uids:
3805 uidpool.CheckUidPool(self.op.remove_uids)
3807 if self.op.master_netmask is not None:
3808 _ValidateNetmask(self.cfg, self.op.master_netmask)
3810 if self.op.diskparams:
3811 for dt_params in self.op.diskparams.values():
3812 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3814 def ExpandNames(self):
3815 # FIXME: in the future maybe other cluster params won't require checking on
3816 # all nodes to be modified.
3817 self.needed_locks = {
3818 locking.LEVEL_NODE: locking.ALL_SET,
3819 locking.LEVEL_INSTANCE: locking.ALL_SET,
3820 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3822 self.share_locks = {
3823 locking.LEVEL_NODE: 1,
3824 locking.LEVEL_INSTANCE: 1,
3825 locking.LEVEL_NODEGROUP: 1,
3828 def BuildHooksEnv(self):
3833 "OP_TARGET": self.cfg.GetClusterName(),
3834 "NEW_VG_NAME": self.op.vg_name,
3837 def BuildHooksNodes(self):
3838 """Build hooks nodes.
3841 mn = self.cfg.GetMasterNode()
3844 def CheckPrereq(self):
3845 """Check prerequisites.
3847 This checks whether the given params don't conflict and
3848 if the given volume group is valid.
3851 if self.op.vg_name is not None and not self.op.vg_name:
3852 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3853 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3854 " instances exist", errors.ECODE_INVAL)
3856 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3857 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3858 raise errors.OpPrereqError("Cannot disable drbd helper while"
3859 " drbd-based instances exist",
3862 node_list = self.owned_locks(locking.LEVEL_NODE)
3864 # if vg_name not None, checks given volume group on all nodes
3866 vglist = self.rpc.call_vg_list(node_list)
3867 for node in node_list:
3868 msg = vglist[node].fail_msg
3870 # ignoring down node
3871 self.LogWarning("Error while gathering data on node %s"
3872 " (ignoring node): %s", node, msg)
3874 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3876 constants.MIN_VG_SIZE)
3878 raise errors.OpPrereqError("Error on node '%s': %s" %
3879 (node, vgstatus), errors.ECODE_ENVIRON)
3881 if self.op.drbd_helper:
3882 # checks given drbd helper on all nodes
3883 helpers = self.rpc.call_drbd_helper(node_list)
3884 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3886 self.LogInfo("Not checking drbd helper on offline node %s", node)
3888 msg = helpers[node].fail_msg
3890 raise errors.OpPrereqError("Error checking drbd helper on node"
3891 " '%s': %s" % (node, msg),
3892 errors.ECODE_ENVIRON)
3893 node_helper = helpers[node].payload
3894 if node_helper != self.op.drbd_helper:
3895 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3896 (node, node_helper), errors.ECODE_ENVIRON)
3898 self.cluster = cluster = self.cfg.GetClusterInfo()
3899 # validate params changes
3900 if self.op.beparams:
3901 objects.UpgradeBeParams(self.op.beparams)
3902 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3903 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3905 if self.op.ndparams:
3906 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3907 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3909 # TODO: we need a more general way to handle resetting
3910 # cluster-level parameters to default values
3911 if self.new_ndparams["oob_program"] == "":
3912 self.new_ndparams["oob_program"] = \
3913 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3915 if self.op.hv_state:
3916 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3917 self.cluster.hv_state_static)
3918 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3919 for hv, values in new_hv_state.items())
3921 if self.op.disk_state:
3922 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3923 self.cluster.disk_state_static)
3924 self.new_disk_state = \
3925 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3926 for name, values in svalues.items()))
3927 for storage, svalues in new_disk_state.items())
3930 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3933 all_instances = self.cfg.GetAllInstancesInfo().values()
3935 for group in self.cfg.GetAllNodeGroupsInfo().values():
3936 instances = frozenset([inst for inst in all_instances
3937 if compat.any(node in group.members
3938 for node in inst.all_nodes)])
3939 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3940 new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3942 new_ipolicy, instances)
3944 violations.update(new)
3947 self.LogWarning("After the ipolicy change the following instances"
3948 " violate them: %s",
3949 utils.CommaJoin(violations))
3951 if self.op.nicparams:
3952 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3953 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3954 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3957 # check all instances for consistency
3958 for instance in self.cfg.GetAllInstancesInfo().values():
3959 for nic_idx, nic in enumerate(instance.nics):
3960 params_copy = copy.deepcopy(nic.nicparams)
3961 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3963 # check parameter syntax
3965 objects.NIC.CheckParameterSyntax(params_filled)
3966 except errors.ConfigurationError, err:
3967 nic_errors.append("Instance %s, nic/%d: %s" %
3968 (instance.name, nic_idx, err))
3970 # if we're moving instances to routed, check that they have an ip
3971 target_mode = params_filled[constants.NIC_MODE]
3972 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3973 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3974 " address" % (instance.name, nic_idx))
3976 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3977 "\n".join(nic_errors))
3979 # hypervisor list/parameters
3980 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3981 if self.op.hvparams:
3982 for hv_name, hv_dict in self.op.hvparams.items():
3983 if hv_name not in self.new_hvparams:
3984 self.new_hvparams[hv_name] = hv_dict
3986 self.new_hvparams[hv_name].update(hv_dict)
3988 # disk template parameters
3989 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3990 if self.op.diskparams:
3991 for dt_name, dt_params in self.op.diskparams.items():
3992 if dt_name not in self.op.diskparams:
3993 self.new_diskparams[dt_name] = dt_params
3995 self.new_diskparams[dt_name].update(dt_params)
3997 # os hypervisor parameters
3998 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4000 for os_name, hvs in self.op.os_hvp.items():
4001 if os_name not in self.new_os_hvp:
4002 self.new_os_hvp[os_name] = hvs
4004 for hv_name, hv_dict in hvs.items():
4005 if hv_name not in self.new_os_hvp[os_name]:
4006 self.new_os_hvp[os_name][hv_name] = hv_dict
4008 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4011 self.new_osp = objects.FillDict(cluster.osparams, {})
4012 if self.op.osparams:
4013 for os_name, osp in self.op.osparams.items():
4014 if os_name not in self.new_osp:
4015 self.new_osp[os_name] = {}
4017 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4020 if not self.new_osp[os_name]:
4021 # we removed all parameters
4022 del self.new_osp[os_name]
4024 # check the parameter validity (remote check)
4025 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4026 os_name, self.new_osp[os_name])
4028 # changes to the hypervisor list
4029 if self.op.enabled_hypervisors is not None:
4030 self.hv_list = self.op.enabled_hypervisors
4031 for hv in self.hv_list:
4032 # if the hypervisor doesn't already exist in the cluster
4033 # hvparams, we initialize it to empty, and then (in both
4034 # cases) we make sure to fill the defaults, as we might not
4035 # have a complete defaults list if the hypervisor wasn't
4037 if hv not in new_hvp:
4039 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4040 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4042 self.hv_list = cluster.enabled_hypervisors
4044 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4045 # either the enabled list has changed, or the parameters have, validate
4046 for hv_name, hv_params in self.new_hvparams.items():
4047 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4048 (self.op.enabled_hypervisors and
4049 hv_name in self.op.enabled_hypervisors)):
4050 # either this is a new hypervisor, or its parameters have changed
4051 hv_class = hypervisor.GetHypervisor(hv_name)
4052 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4053 hv_class.CheckParameterSyntax(hv_params)
4054 _CheckHVParams(self, node_list, hv_name, hv_params)
4057 # no need to check any newly-enabled hypervisors, since the
4058 # defaults have already been checked in the above code-block
4059 for os_name, os_hvp in self.new_os_hvp.items():
4060 for hv_name, hv_params in os_hvp.items():
4061 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4062 # we need to fill in the new os_hvp on top of the actual hv_p
4063 cluster_defaults = self.new_hvparams.get(hv_name, {})
4064 new_osp = objects.FillDict(cluster_defaults, hv_params)
4065 hv_class = hypervisor.GetHypervisor(hv_name)
4066 hv_class.CheckParameterSyntax(new_osp)
4067 _CheckHVParams(self, node_list, hv_name, new_osp)
4069 if self.op.default_iallocator:
4070 alloc_script = utils.FindFile(self.op.default_iallocator,
4071 constants.IALLOCATOR_SEARCH_PATH,
4073 if alloc_script is None:
4074 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4075 " specified" % self.op.default_iallocator,
4078 def Exec(self, feedback_fn):
4079 """Change the parameters of the cluster.
4082 if self.op.vg_name is not None:
4083 new_volume = self.op.vg_name
4086 if new_volume != self.cfg.GetVGName():
4087 self.cfg.SetVGName(new_volume)
4089 feedback_fn("Cluster LVM configuration already in desired"
4090 " state, not changing")
4091 if self.op.drbd_helper is not None:
4092 new_helper = self.op.drbd_helper
4095 if new_helper != self.cfg.GetDRBDHelper():
4096 self.cfg.SetDRBDHelper(new_helper)
4098 feedback_fn("Cluster DRBD helper already in desired state,"
4100 if self.op.hvparams:
4101 self.cluster.hvparams = self.new_hvparams
4103 self.cluster.os_hvp = self.new_os_hvp
4104 if self.op.enabled_hypervisors is not None:
4105 self.cluster.hvparams = self.new_hvparams
4106 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4107 if self.op.beparams:
4108 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4109 if self.op.nicparams:
4110 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4112 self.cluster.ipolicy = self.new_ipolicy
4113 if self.op.osparams:
4114 self.cluster.osparams = self.new_osp
4115 if self.op.ndparams:
4116 self.cluster.ndparams = self.new_ndparams
4117 if self.op.diskparams:
4118 self.cluster.diskparams = self.new_diskparams
4119 if self.op.hv_state:
4120 self.cluster.hv_state_static = self.new_hv_state
4121 if self.op.disk_state:
4122 self.cluster.disk_state_static = self.new_disk_state
4124 if self.op.candidate_pool_size is not None:
4125 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4126 # we need to update the pool size here, otherwise the save will fail
4127 _AdjustCandidatePool(self, [])
4129 if self.op.maintain_node_health is not None:
4130 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4131 feedback_fn("Note: CONFD was disabled at build time, node health"
4132 " maintenance is not useful (still enabling it)")
4133 self.cluster.maintain_node_health = self.op.maintain_node_health
4135 if self.op.prealloc_wipe_disks is not None:
4136 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4138 if self.op.add_uids is not None:
4139 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4141 if self.op.remove_uids is not None:
4142 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4144 if self.op.uid_pool is not None:
4145 self.cluster.uid_pool = self.op.uid_pool
4147 if self.op.default_iallocator is not None:
4148 self.cluster.default_iallocator = self.op.default_iallocator
4150 if self.op.reserved_lvs is not None:
4151 self.cluster.reserved_lvs = self.op.reserved_lvs
4153 if self.op.use_external_mip_script is not None:
4154 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4156 def helper_os(aname, mods, desc):
4158 lst = getattr(self.cluster, aname)
4159 for key, val in mods:
4160 if key == constants.DDM_ADD:
4162 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4165 elif key == constants.DDM_REMOVE:
4169 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4171 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4173 if self.op.hidden_os:
4174 helper_os("hidden_os", self.op.hidden_os, "hidden")
4176 if self.op.blacklisted_os:
4177 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4179 if self.op.master_netdev:
4180 master_params = self.cfg.GetMasterNetworkParameters()
4181 ems = self.cfg.GetUseExternalMipScript()
4182 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4183 self.cluster.master_netdev)
4184 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4186 result.Raise("Could not disable the master ip")
4187 feedback_fn("Changing master_netdev from %s to %s" %
4188 (master_params.netdev, self.op.master_netdev))
4189 self.cluster.master_netdev = self.op.master_netdev
4191 if self.op.master_netmask:
4192 master_params = self.cfg.GetMasterNetworkParameters()
4193 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4194 result = self.rpc.call_node_change_master_netmask(master_params.name,
4195 master_params.netmask,
4196 self.op.master_netmask,
4198 master_params.netdev)
4200 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4203 self.cluster.master_netmask = self.op.master_netmask
4205 self.cfg.Update(self.cluster, feedback_fn)
4207 if self.op.master_netdev:
4208 master_params = self.cfg.GetMasterNetworkParameters()
4209 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4210 self.op.master_netdev)
4211 ems = self.cfg.GetUseExternalMipScript()
4212 result = self.rpc.call_node_activate_master_ip(master_params.name,
4215 self.LogWarning("Could not re-enable the master ip on"
4216 " the master, please restart manually: %s",
4220 def _UploadHelper(lu, nodes, fname):
4221 """Helper for uploading a file and showing warnings.
4224 if os.path.exists(fname):
4225 result = lu.rpc.call_upload_file(nodes, fname)
4226 for to_node, to_result in result.items():
4227 msg = to_result.fail_msg
4229 msg = ("Copy of file %s to node %s failed: %s" %
4230 (fname, to_node, msg))
4231 lu.proc.LogWarning(msg)
4234 def _ComputeAncillaryFiles(cluster, redist):
4235 """Compute files external to Ganeti which need to be consistent.
4237 @type redist: boolean
4238 @param redist: Whether to include files which need to be redistributed
4241 # Compute files for all nodes
4243 constants.SSH_KNOWN_HOSTS_FILE,
4244 constants.CONFD_HMAC_KEY,
4245 constants.CLUSTER_DOMAIN_SECRET_FILE,
4246 constants.SPICE_CERT_FILE,
4247 constants.SPICE_CACERT_FILE,
4248 constants.RAPI_USERS_FILE,
4252 files_all.update(constants.ALL_CERT_FILES)
4253 files_all.update(ssconf.SimpleStore().GetFileList())
4255 # we need to ship at least the RAPI certificate
4256 files_all.add(constants.RAPI_CERT_FILE)
4258 if cluster.modify_etc_hosts:
4259 files_all.add(constants.ETC_HOSTS)
4261 # Files which are optional, these must:
4262 # - be present in one other category as well
4263 # - either exist or not exist on all nodes of that category (mc, vm all)
4265 constants.RAPI_USERS_FILE,
4268 # Files which should only be on master candidates
4272 files_mc.add(constants.CLUSTER_CONF_FILE)
4274 # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4276 files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4278 # Files which should only be on VM-capable nodes
4279 files_vm = set(filename
4280 for hv_name in cluster.enabled_hypervisors
4281 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4283 files_opt |= set(filename
4284 for hv_name in cluster.enabled_hypervisors
4285 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4287 # Filenames in each category must be unique
4288 all_files_set = files_all | files_mc | files_vm
4289 assert (len(all_files_set) ==
4290 sum(map(len, [files_all, files_mc, files_vm]))), \
4291 "Found file listed in more than one file list"
4293 # Optional files must be present in one other category
4294 assert all_files_set.issuperset(files_opt), \
4295 "Optional file not in a different required list"
4297 return (files_all, files_opt, files_mc, files_vm)
4300 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4301 """Distribute additional files which are part of the cluster configuration.
4303 ConfigWriter takes care of distributing the config and ssconf files, but
4304 there are more files which should be distributed to all nodes. This function
4305 makes sure those are copied.
4307 @param lu: calling logical unit
4308 @param additional_nodes: list of nodes not in the config to distribute to
4309 @type additional_vm: boolean
4310 @param additional_vm: whether the additional nodes are vm-capable or not
4313 # Gather target nodes
4314 cluster = lu.cfg.GetClusterInfo()
4315 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4317 online_nodes = lu.cfg.GetOnlineNodeList()
4318 vm_nodes = lu.cfg.GetVmCapableNodeList()
4320 if additional_nodes is not None:
4321 online_nodes.extend(additional_nodes)
4323 vm_nodes.extend(additional_nodes)
4325 # Never distribute to master node
4326 for nodelist in [online_nodes, vm_nodes]:
4327 if master_info.name in nodelist:
4328 nodelist.remove(master_info.name)
4331 (files_all, _, files_mc, files_vm) = \
4332 _ComputeAncillaryFiles(cluster, True)
4334 # Never re-distribute configuration file from here
4335 assert not (constants.CLUSTER_CONF_FILE in files_all or
4336 constants.CLUSTER_CONF_FILE in files_vm)
4337 assert not files_mc, "Master candidates not handled in this function"
4340 (online_nodes, files_all),
4341 (vm_nodes, files_vm),
4345 for (node_list, files) in filemap:
4347 _UploadHelper(lu, node_list, fname)
4350 class LUClusterRedistConf(NoHooksLU):
4351 """Force the redistribution of cluster configuration.
4353 This is a very simple LU.
4358 def ExpandNames(self):
4359 self.needed_locks = {
4360 locking.LEVEL_NODE: locking.ALL_SET,
4362 self.share_locks[locking.LEVEL_NODE] = 1
4364 def Exec(self, feedback_fn):
4365 """Redistribute the configuration.
4368 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4369 _RedistributeAncillaryFiles(self)
4372 class LUClusterActivateMasterIp(NoHooksLU):
4373 """Activate the master IP on the master node.
4376 def Exec(self, feedback_fn):
4377 """Activate the master IP.
4380 master_params = self.cfg.GetMasterNetworkParameters()
4381 ems = self.cfg.GetUseExternalMipScript()
4382 result = self.rpc.call_node_activate_master_ip(master_params.name,
4384 result.Raise("Could not activate the master IP")
4387 class LUClusterDeactivateMasterIp(NoHooksLU):
4388 """Deactivate the master IP on the master node.
4391 def Exec(self, feedback_fn):
4392 """Deactivate the master IP.
4395 master_params = self.cfg.GetMasterNetworkParameters()
4396 ems = self.cfg.GetUseExternalMipScript()
4397 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4399 result.Raise("Could not deactivate the master IP")
4402 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4403 """Sleep and poll for an instance's disk to sync.
4406 if not instance.disks or disks is not None and not disks:
4409 disks = _ExpandCheckDisks(instance, disks)
4412 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4414 node = instance.primary_node
4417 lu.cfg.SetDiskID(dev, node)
4419 # TODO: Convert to utils.Retry
4422 degr_retries = 10 # in seconds, as we sleep 1 second each time
4426 cumul_degraded = False
4427 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4428 msg = rstats.fail_msg
4430 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4433 raise errors.RemoteError("Can't contact node %s for mirror data,"
4434 " aborting." % node)
4437 rstats = rstats.payload
4439 for i, mstat in enumerate(rstats):
4441 lu.LogWarning("Can't compute data for node %s/%s",
4442 node, disks[i].iv_name)
4445 cumul_degraded = (cumul_degraded or
4446 (mstat.is_degraded and mstat.sync_percent is None))
4447 if mstat.sync_percent is not None:
4449 if mstat.estimated_time is not None:
4450 rem_time = ("%s remaining (estimated)" %
4451 utils.FormatSeconds(mstat.estimated_time))
4452 max_time = mstat.estimated_time
4454 rem_time = "no time estimate"
4455 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4456 (disks[i].iv_name, mstat.sync_percent, rem_time))
4458 # if we're done but degraded, let's do a few small retries, to
4459 # make sure we see a stable and not transient situation; therefore
4460 # we force restart of the loop
4461 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4462 logging.info("Degraded disks found, %d retries left", degr_retries)
4470 time.sleep(min(60, max_time))
4473 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4474 return not cumul_degraded
4477 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4478 """Check that mirrors are not degraded.
4480 The ldisk parameter, if True, will change the test from the
4481 is_degraded attribute (which represents overall non-ok status for
4482 the device(s)) to the ldisk (representing the local storage status).
4485 lu.cfg.SetDiskID(dev, node)
4489 if on_primary or dev.AssembleOnSecondary():
4490 rstats = lu.rpc.call_blockdev_find(node, dev)
4491 msg = rstats.fail_msg
4493 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4495 elif not rstats.payload:
4496 lu.LogWarning("Can't find disk on node %s", node)
4500 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4502 result = result and not rstats.payload.is_degraded
4505 for child in dev.children:
4506 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4511 class LUOobCommand(NoHooksLU):
4512 """Logical unit for OOB handling.
4516 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4518 def ExpandNames(self):
4519 """Gather locks we need.
4522 if self.op.node_names:
4523 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4524 lock_names = self.op.node_names
4526 lock_names = locking.ALL_SET
4528 self.needed_locks = {
4529 locking.LEVEL_NODE: lock_names,
4532 def CheckPrereq(self):
4533 """Check prerequisites.
4536 - the node exists in the configuration
4539 Any errors are signaled by raising errors.OpPrereqError.
4543 self.master_node = self.cfg.GetMasterNode()
4545 assert self.op.power_delay >= 0.0
4547 if self.op.node_names:
4548 if (self.op.command in self._SKIP_MASTER and
4549 self.master_node in self.op.node_names):
4550 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4551 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4553 if master_oob_handler:
4554 additional_text = ("run '%s %s %s' if you want to operate on the"
4555 " master regardless") % (master_oob_handler,
4559 additional_text = "it does not support out-of-band operations"
4561 raise errors.OpPrereqError(("Operating on the master node %s is not"
4562 " allowed for %s; %s") %
4563 (self.master_node, self.op.command,
4564 additional_text), errors.ECODE_INVAL)
4566 self.op.node_names = self.cfg.GetNodeList()
4567 if self.op.command in self._SKIP_MASTER:
4568 self.op.node_names.remove(self.master_node)
4570 if self.op.command in self._SKIP_MASTER:
4571 assert self.master_node not in self.op.node_names
4573 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4575 raise errors.OpPrereqError("Node %s not found" % node_name,
4578 self.nodes.append(node)
4580 if (not self.op.ignore_status and
4581 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4582 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4583 " not marked offline") % node_name,
4586 def Exec(self, feedback_fn):
4587 """Execute OOB and return result if we expect any.
4590 master_node = self.master_node
4593 for idx, node in enumerate(utils.NiceSort(self.nodes,
4594 key=lambda node: node.name)):
4595 node_entry = [(constants.RS_NORMAL, node.name)]
4596 ret.append(node_entry)
4598 oob_program = _SupportsOob(self.cfg, node)
4601 node_entry.append((constants.RS_UNAVAIL, None))
4604 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4605 self.op.command, oob_program, node.name)
4606 result = self.rpc.call_run_oob(master_node, oob_program,
4607 self.op.command, node.name,
4611 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4612 node.name, result.fail_msg)
4613 node_entry.append((constants.RS_NODATA, None))
4616 self._CheckPayload(result)
4617 except errors.OpExecError, err:
4618 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4620 node_entry.append((constants.RS_NODATA, None))
4622 if self.op.command == constants.OOB_HEALTH:
4623 # For health we should log important events
4624 for item, status in result.payload:
4625 if status in [constants.OOB_STATUS_WARNING,
4626 constants.OOB_STATUS_CRITICAL]:
4627 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4628 item, node.name, status)
4630 if self.op.command == constants.OOB_POWER_ON:
4632 elif self.op.command == constants.OOB_POWER_OFF:
4633 node.powered = False
4634 elif self.op.command == constants.OOB_POWER_STATUS:
4635 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4636 if powered != node.powered:
4637 logging.warning(("Recorded power state (%s) of node '%s' does not"
4638 " match actual power state (%s)"), node.powered,
4641 # For configuration changing commands we should update the node
4642 if self.op.command in (constants.OOB_POWER_ON,
4643 constants.OOB_POWER_OFF):
4644 self.cfg.Update(node, feedback_fn)
4646 node_entry.append((constants.RS_NORMAL, result.payload))
4648 if (self.op.command == constants.OOB_POWER_ON and
4649 idx < len(self.nodes) - 1):
4650 time.sleep(self.op.power_delay)
4654 def _CheckPayload(self, result):
4655 """Checks if the payload is valid.
4657 @param result: RPC result
4658 @raises errors.OpExecError: If payload is not valid
4662 if self.op.command == constants.OOB_HEALTH:
4663 if not isinstance(result.payload, list):
4664 errs.append("command 'health' is expected to return a list but got %s" %
4665 type(result.payload))
4667 for item, status in result.payload:
4668 if status not in constants.OOB_STATUSES:
4669 errs.append("health item '%s' has invalid status '%s'" %
4672 if self.op.command == constants.OOB_POWER_STATUS:
4673 if not isinstance(result.payload, dict):
4674 errs.append("power-status is expected to return a dict but got %s" %
4675 type(result.payload))
4677 if self.op.command in [
4678 constants.OOB_POWER_ON,
4679 constants.OOB_POWER_OFF,
4680 constants.OOB_POWER_CYCLE,
4682 if result.payload is not None:
4683 errs.append("%s is expected to not return payload but got '%s'" %
4684 (self.op.command, result.payload))
4687 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4688 utils.CommaJoin(errs))
4691 class _OsQuery(_QueryBase):
4692 FIELDS = query.OS_FIELDS
4694 def ExpandNames(self, lu):
4695 # Lock all nodes in shared mode
4696 # Temporary removal of locks, should be reverted later
4697 # TODO: reintroduce locks when they are lighter-weight
4698 lu.needed_locks = {}
4699 #self.share_locks[locking.LEVEL_NODE] = 1
4700 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4702 # The following variables interact with _QueryBase._GetNames
4704 self.wanted = self.names
4706 self.wanted = locking.ALL_SET
4708 self.do_locking = self.use_locking
4710 def DeclareLocks(self, lu, level):
4714 def _DiagnoseByOS(rlist):
4715 """Remaps a per-node return list into an a per-os per-node dictionary
4717 @param rlist: a map with node names as keys and OS objects as values
4720 @return: a dictionary with osnames as keys and as value another
4721 map, with nodes as keys and tuples of (path, status, diagnose,
4722 variants, parameters, api_versions) as values, eg::
4724 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4725 (/srv/..., False, "invalid api")],
4726 "node2": [(/srv/..., True, "", [], [])]}
4731 # we build here the list of nodes that didn't fail the RPC (at RPC
4732 # level), so that nodes with a non-responding node daemon don't
4733 # make all OSes invalid
4734 good_nodes = [node_name for node_name in rlist
4735 if not rlist[node_name].fail_msg]
4736 for node_name, nr in rlist.items():
4737 if nr.fail_msg or not nr.payload:
4739 for (name, path, status, diagnose, variants,
4740 params, api_versions) in nr.payload:
4741 if name not in all_os:
4742 # build a list of nodes for this os containing empty lists
4743 # for each node in node_list
4745 for nname in good_nodes:
4746 all_os[name][nname] = []
4747 # convert params from [name, help] to (name, help)
4748 params = [tuple(v) for v in params]
4749 all_os[name][node_name].append((path, status, diagnose,
4750 variants, params, api_versions))
4753 def _GetQueryData(self, lu):
4754 """Computes the list of nodes and their attributes.
4757 # Locking is not used
4758 assert not (compat.any(lu.glm.is_owned(level)
4759 for level in locking.LEVELS
4760 if level != locking.LEVEL_CLUSTER) or
4761 self.do_locking or self.use_locking)
4763 valid_nodes = [node.name
4764 for node in lu.cfg.GetAllNodesInfo().values()
4765 if not node.offline and node.vm_capable]
4766 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4767 cluster = lu.cfg.GetClusterInfo()
4771 for (os_name, os_data) in pol.items():
4772 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4773 hidden=(os_name in cluster.hidden_os),
4774 blacklisted=(os_name in cluster.blacklisted_os))
4778 api_versions = set()
4780 for idx, osl in enumerate(os_data.values()):
4781 info.valid = bool(info.valid and osl and osl[0][1])
4785 (node_variants, node_params, node_api) = osl[0][3:6]
4788 variants.update(node_variants)
4789 parameters.update(node_params)
4790 api_versions.update(node_api)
4792 # Filter out inconsistent values
4793 variants.intersection_update(node_variants)
4794 parameters.intersection_update(node_params)
4795 api_versions.intersection_update(node_api)
4797 info.variants = list(variants)
4798 info.parameters = list(parameters)
4799 info.api_versions = list(api_versions)
4801 data[os_name] = info
4803 # Prepare data in requested order
4804 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4808 class LUOsDiagnose(NoHooksLU):
4809 """Logical unit for OS diagnose/query.
4815 def _BuildFilter(fields, names):
4816 """Builds a filter for querying OSes.
4819 name_filter = qlang.MakeSimpleFilter("name", names)
4821 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4822 # respective field is not requested
4823 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4824 for fname in ["hidden", "blacklisted"]
4825 if fname not in fields]
4826 if "valid" not in fields:
4827 status_filter.append([qlang.OP_TRUE, "valid"])
4830 status_filter.insert(0, qlang.OP_AND)
4832 status_filter = None
4834 if name_filter and status_filter:
4835 return [qlang.OP_AND, name_filter, status_filter]
4839 return status_filter
4841 def CheckArguments(self):
4842 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4843 self.op.output_fields, False)
4845 def ExpandNames(self):
4846 self.oq.ExpandNames(self)
4848 def Exec(self, feedback_fn):
4849 return self.oq.OldStyleQuery(self)
4852 class LUNodeRemove(LogicalUnit):
4853 """Logical unit for removing a node.
4856 HPATH = "node-remove"
4857 HTYPE = constants.HTYPE_NODE
4859 def BuildHooksEnv(self):
4864 "OP_TARGET": self.op.node_name,
4865 "NODE_NAME": self.op.node_name,
4868 def BuildHooksNodes(self):
4869 """Build hooks nodes.
4871 This doesn't run on the target node in the pre phase as a failed
4872 node would then be impossible to remove.
4875 all_nodes = self.cfg.GetNodeList()
4877 all_nodes.remove(self.op.node_name)
4880 return (all_nodes, all_nodes)
4882 def CheckPrereq(self):
4883 """Check prerequisites.
4886 - the node exists in the configuration
4887 - it does not have primary or secondary instances
4888 - it's not the master
4890 Any errors are signaled by raising errors.OpPrereqError.
4893 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4894 node = self.cfg.GetNodeInfo(self.op.node_name)
4895 assert node is not None
4897 masternode = self.cfg.GetMasterNode()
4898 if node.name == masternode:
4899 raise errors.OpPrereqError("Node is the master node, failover to another"
4900 " node is required", errors.ECODE_INVAL)
4902 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4903 if node.name in instance.all_nodes:
4904 raise errors.OpPrereqError("Instance %s is still running on the node,"
4905 " please remove first" % instance_name,
4907 self.op.node_name = node.name
4910 def Exec(self, feedback_fn):
4911 """Removes the node from the cluster.
4915 logging.info("Stopping the node daemon and removing configs from node %s",
4918 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4920 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4923 # Promote nodes to master candidate as needed
4924 _AdjustCandidatePool(self, exceptions=[node.name])
4925 self.context.RemoveNode(node.name)
4927 # Run post hooks on the node before it's removed
4928 _RunPostHook(self, node.name)
4930 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4931 msg = result.fail_msg
4933 self.LogWarning("Errors encountered on the remote node while leaving"
4934 " the cluster: %s", msg)
4936 # Remove node from our /etc/hosts
4937 if self.cfg.GetClusterInfo().modify_etc_hosts:
4938 master_node = self.cfg.GetMasterNode()
4939 result = self.rpc.call_etc_hosts_modify(master_node,
4940 constants.ETC_HOSTS_REMOVE,
4942 result.Raise("Can't update hosts file with new host data")
4943 _RedistributeAncillaryFiles(self)
4946 class _NodeQuery(_QueryBase):
4947 FIELDS = query.NODE_FIELDS
4949 def ExpandNames(self, lu):
4950 lu.needed_locks = {}
4951 lu.share_locks = _ShareAll()
4954 self.wanted = _GetWantedNodes(lu, self.names)
4956 self.wanted = locking.ALL_SET
4958 self.do_locking = (self.use_locking and
4959 query.NQ_LIVE in self.requested_data)
4962 # If any non-static field is requested we need to lock the nodes
4963 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4965 def DeclareLocks(self, lu, level):
4968 def _GetQueryData(self, lu):
4969 """Computes the list of nodes and their attributes.
4972 all_info = lu.cfg.GetAllNodesInfo()
4974 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4976 # Gather data as requested
4977 if query.NQ_LIVE in self.requested_data:
4978 # filter out non-vm_capable nodes
4979 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4981 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4982 [lu.cfg.GetHypervisorType()])
4983 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4984 for (name, nresult) in node_data.items()
4985 if not nresult.fail_msg and nresult.payload)
4989 if query.NQ_INST in self.requested_data:
4990 node_to_primary = dict([(name, set()) for name in nodenames])
4991 node_to_secondary = dict([(name, set()) for name in nodenames])
4993 inst_data = lu.cfg.GetAllInstancesInfo()
4995 for inst in inst_data.values():
4996 if inst.primary_node in node_to_primary:
4997 node_to_primary[inst.primary_node].add(inst.name)
4998 for secnode in inst.secondary_nodes:
4999 if secnode in node_to_secondary:
5000 node_to_secondary[secnode].add(inst.name)
5002 node_to_primary = None
5003 node_to_secondary = None
5005 if query.NQ_OOB in self.requested_data:
5006 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5007 for name, node in all_info.iteritems())
5011 if query.NQ_GROUP in self.requested_data:
5012 groups = lu.cfg.GetAllNodeGroupsInfo()
5016 return query.NodeQueryData([all_info[name] for name in nodenames],
5017 live_data, lu.cfg.GetMasterNode(),
5018 node_to_primary, node_to_secondary, groups,
5019 oob_support, lu.cfg.GetClusterInfo())
5022 class LUNodeQuery(NoHooksLU):
5023 """Logical unit for querying nodes.
5026 # pylint: disable=W0142
5029 def CheckArguments(self):
5030 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5031 self.op.output_fields, self.op.use_locking)
5033 def ExpandNames(self):
5034 self.nq.ExpandNames(self)
5036 def DeclareLocks(self, level):
5037 self.nq.DeclareLocks(self, level)
5039 def Exec(self, feedback_fn):
5040 return self.nq.OldStyleQuery(self)
5043 class LUNodeQueryvols(NoHooksLU):
5044 """Logical unit for getting volumes on node(s).
5048 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5049 _FIELDS_STATIC = utils.FieldSet("node")
5051 def CheckArguments(self):
5052 _CheckOutputFields(static=self._FIELDS_STATIC,
5053 dynamic=self._FIELDS_DYNAMIC,
5054 selected=self.op.output_fields)
5056 def ExpandNames(self):
5057 self.share_locks = _ShareAll()
5058 self.needed_locks = {}
5060 if not self.op.nodes:
5061 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5063 self.needed_locks[locking.LEVEL_NODE] = \
5064 _GetWantedNodes(self, self.op.nodes)
5066 def Exec(self, feedback_fn):
5067 """Computes the list of nodes and their attributes.
5070 nodenames = self.owned_locks(locking.LEVEL_NODE)
5071 volumes = self.rpc.call_node_volumes(nodenames)
5073 ilist = self.cfg.GetAllInstancesInfo()
5074 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5077 for node in nodenames:
5078 nresult = volumes[node]
5081 msg = nresult.fail_msg
5083 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5086 node_vols = sorted(nresult.payload,
5087 key=operator.itemgetter("dev"))
5089 for vol in node_vols:
5091 for field in self.op.output_fields:
5094 elif field == "phys":
5098 elif field == "name":
5100 elif field == "size":
5101 val = int(float(vol["size"]))
5102 elif field == "instance":
5103 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5105 raise errors.ParameterError(field)
5106 node_output.append(str(val))
5108 output.append(node_output)
5113 class LUNodeQueryStorage(NoHooksLU):
5114 """Logical unit for getting information on storage units on node(s).
5117 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5120 def CheckArguments(self):
5121 _CheckOutputFields(static=self._FIELDS_STATIC,
5122 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5123 selected=self.op.output_fields)
5125 def ExpandNames(self):
5126 self.share_locks = _ShareAll()
5127 self.needed_locks = {}
5130 self.needed_locks[locking.LEVEL_NODE] = \
5131 _GetWantedNodes(self, self.op.nodes)
5133 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5135 def Exec(self, feedback_fn):
5136 """Computes the list of nodes and their attributes.
5139 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5141 # Always get name to sort by
5142 if constants.SF_NAME in self.op.output_fields:
5143 fields = self.op.output_fields[:]
5145 fields = [constants.SF_NAME] + self.op.output_fields
5147 # Never ask for node or type as it's only known to the LU
5148 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5149 while extra in fields:
5150 fields.remove(extra)
5152 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5153 name_idx = field_idx[constants.SF_NAME]
5155 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5156 data = self.rpc.call_storage_list(self.nodes,
5157 self.op.storage_type, st_args,
5158 self.op.name, fields)
5162 for node in utils.NiceSort(self.nodes):
5163 nresult = data[node]
5167 msg = nresult.fail_msg
5169 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5172 rows = dict([(row[name_idx], row) for row in nresult.payload])
5174 for name in utils.NiceSort(rows.keys()):
5179 for field in self.op.output_fields:
5180 if field == constants.SF_NODE:
5182 elif field == constants.SF_TYPE:
5183 val = self.op.storage_type
5184 elif field in field_idx:
5185 val = row[field_idx[field]]
5187 raise errors.ParameterError(field)
5196 class _InstanceQuery(_QueryBase):
5197 FIELDS = query.INSTANCE_FIELDS
5199 def ExpandNames(self, lu):
5200 lu.needed_locks = {}
5201 lu.share_locks = _ShareAll()
5204 self.wanted = _GetWantedInstances(lu, self.names)
5206 self.wanted = locking.ALL_SET
5208 self.do_locking = (self.use_locking and
5209 query.IQ_LIVE in self.requested_data)
5211 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5212 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5213 lu.needed_locks[locking.LEVEL_NODE] = []
5214 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5216 self.do_grouplocks = (self.do_locking and
5217 query.IQ_NODES in self.requested_data)
5219 def DeclareLocks(self, lu, level):
5221 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5222 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5224 # Lock all groups used by instances optimistically; this requires going
5225 # via the node before it's locked, requiring verification later on
5226 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5228 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5229 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5230 elif level == locking.LEVEL_NODE:
5231 lu._LockInstancesNodes() # pylint: disable=W0212
5234 def _CheckGroupLocks(lu):
5235 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5236 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5238 # Check if node groups for locked instances are still correct
5239 for instance_name in owned_instances:
5240 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5242 def _GetQueryData(self, lu):
5243 """Computes the list of instances and their attributes.
5246 if self.do_grouplocks:
5247 self._CheckGroupLocks(lu)
5249 cluster = lu.cfg.GetClusterInfo()
5250 all_info = lu.cfg.GetAllInstancesInfo()
5252 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5254 instance_list = [all_info[name] for name in instance_names]
5255 nodes = frozenset(itertools.chain(*(inst.all_nodes
5256 for inst in instance_list)))
5257 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5260 wrongnode_inst = set()
5262 # Gather data as requested
5263 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5265 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5267 result = node_data[name]
5269 # offline nodes will be in both lists
5270 assert result.fail_msg
5271 offline_nodes.append(name)
5273 bad_nodes.append(name)
5274 elif result.payload:
5275 for inst in result.payload:
5276 if inst in all_info:
5277 if all_info[inst].primary_node == name:
5278 live_data.update(result.payload)
5280 wrongnode_inst.add(inst)
5282 # orphan instance; we don't list it here as we don't
5283 # handle this case yet in the output of instance listing
5284 logging.warning("Orphan instance '%s' found on node %s",
5286 # else no instance is alive
5290 if query.IQ_DISKUSAGE in self.requested_data:
5291 disk_usage = dict((inst.name,
5292 _ComputeDiskSize(inst.disk_template,
5293 [{constants.IDISK_SIZE: disk.size}
5294 for disk in inst.disks]))
5295 for inst in instance_list)
5299 if query.IQ_CONSOLE in self.requested_data:
5301 for inst in instance_list:
5302 if inst.name in live_data:
5303 # Instance is running
5304 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5306 consinfo[inst.name] = None
5307 assert set(consinfo.keys()) == set(instance_names)
5311 if query.IQ_NODES in self.requested_data:
5312 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5314 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5315 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5316 for uuid in set(map(operator.attrgetter("group"),
5322 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5323 disk_usage, offline_nodes, bad_nodes,
5324 live_data, wrongnode_inst, consinfo,
5328 class LUQuery(NoHooksLU):
5329 """Query for resources/items of a certain kind.
5332 # pylint: disable=W0142
5335 def CheckArguments(self):
5336 qcls = _GetQueryImplementation(self.op.what)
5338 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5340 def ExpandNames(self):
5341 self.impl.ExpandNames(self)
5343 def DeclareLocks(self, level):
5344 self.impl.DeclareLocks(self, level)
5346 def Exec(self, feedback_fn):
5347 return self.impl.NewStyleQuery(self)
5350 class LUQueryFields(NoHooksLU):
5351 """Query for resources/items of a certain kind.
5354 # pylint: disable=W0142
5357 def CheckArguments(self):
5358 self.qcls = _GetQueryImplementation(self.op.what)
5360 def ExpandNames(self):
5361 self.needed_locks = {}
5363 def Exec(self, feedback_fn):
5364 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5367 class LUNodeModifyStorage(NoHooksLU):
5368 """Logical unit for modifying a storage volume on a node.
5373 def CheckArguments(self):
5374 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5376 storage_type = self.op.storage_type
5379 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5381 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5382 " modified" % storage_type,
5385 diff = set(self.op.changes.keys()) - modifiable
5387 raise errors.OpPrereqError("The following fields can not be modified for"
5388 " storage units of type '%s': %r" %
5389 (storage_type, list(diff)),
5392 def ExpandNames(self):
5393 self.needed_locks = {
5394 locking.LEVEL_NODE: self.op.node_name,
5397 def Exec(self, feedback_fn):
5398 """Computes the list of nodes and their attributes.
5401 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5402 result = self.rpc.call_storage_modify(self.op.node_name,
5403 self.op.storage_type, st_args,
5404 self.op.name, self.op.changes)
5405 result.Raise("Failed to modify storage unit '%s' on %s" %
5406 (self.op.name, self.op.node_name))
5409 class LUNodeAdd(LogicalUnit):
5410 """Logical unit for adding node to the cluster.
5414 HTYPE = constants.HTYPE_NODE
5415 _NFLAGS = ["master_capable", "vm_capable"]
5417 def CheckArguments(self):
5418 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5419 # validate/normalize the node name
5420 self.hostname = netutils.GetHostname(name=self.op.node_name,
5421 family=self.primary_ip_family)
5422 self.op.node_name = self.hostname.name
5424 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5425 raise errors.OpPrereqError("Cannot readd the master node",
5428 if self.op.readd and self.op.group:
5429 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5430 " being readded", errors.ECODE_INVAL)
5432 def BuildHooksEnv(self):
5435 This will run on all nodes before, and on all nodes + the new node after.
5439 "OP_TARGET": self.op.node_name,
5440 "NODE_NAME": self.op.node_name,
5441 "NODE_PIP": self.op.primary_ip,
5442 "NODE_SIP": self.op.secondary_ip,
5443 "MASTER_CAPABLE": str(self.op.master_capable),
5444 "VM_CAPABLE": str(self.op.vm_capable),
5447 def BuildHooksNodes(self):
5448 """Build hooks nodes.
5451 # Exclude added node
5452 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5453 post_nodes = pre_nodes + [self.op.node_name, ]
5455 return (pre_nodes, post_nodes)
5457 def CheckPrereq(self):
5458 """Check prerequisites.
5461 - the new node is not already in the config
5463 - its parameters (single/dual homed) matches the cluster
5465 Any errors are signaled by raising errors.OpPrereqError.
5469 hostname = self.hostname
5470 node = hostname.name
5471 primary_ip = self.op.primary_ip = hostname.ip
5472 if self.op.secondary_ip is None:
5473 if self.primary_ip_family == netutils.IP6Address.family:
5474 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5475 " IPv4 address must be given as secondary",
5477 self.op.secondary_ip = primary_ip
5479 secondary_ip = self.op.secondary_ip
5480 if not netutils.IP4Address.IsValid(secondary_ip):
5481 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5482 " address" % secondary_ip, errors.ECODE_INVAL)
5484 node_list = cfg.GetNodeList()
5485 if not self.op.readd and node in node_list:
5486 raise errors.OpPrereqError("Node %s is already in the configuration" %
5487 node, errors.ECODE_EXISTS)
5488 elif self.op.readd and node not in node_list:
5489 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5492 self.changed_primary_ip = False
5494 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5495 if self.op.readd and node == existing_node_name:
5496 if existing_node.secondary_ip != secondary_ip:
5497 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5498 " address configuration as before",
5500 if existing_node.primary_ip != primary_ip:
5501 self.changed_primary_ip = True
5505 if (existing_node.primary_ip == primary_ip or
5506 existing_node.secondary_ip == primary_ip or
5507 existing_node.primary_ip == secondary_ip or
5508 existing_node.secondary_ip == secondary_ip):
5509 raise errors.OpPrereqError("New node ip address(es) conflict with"
5510 " existing node %s" % existing_node.name,
5511 errors.ECODE_NOTUNIQUE)
5513 # After this 'if' block, None is no longer a valid value for the
5514 # _capable op attributes
5516 old_node = self.cfg.GetNodeInfo(node)
5517 assert old_node is not None, "Can't retrieve locked node %s" % node
5518 for attr in self._NFLAGS:
5519 if getattr(self.op, attr) is None:
5520 setattr(self.op, attr, getattr(old_node, attr))
5522 for attr in self._NFLAGS:
5523 if getattr(self.op, attr) is None:
5524 setattr(self.op, attr, True)
5526 if self.op.readd and not self.op.vm_capable:
5527 pri, sec = cfg.GetNodeInstances(node)
5529 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5530 " flag set to false, but it already holds"
5531 " instances" % node,
5534 # check that the type of the node (single versus dual homed) is the
5535 # same as for the master
5536 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5537 master_singlehomed = myself.secondary_ip == myself.primary_ip
5538 newbie_singlehomed = secondary_ip == primary_ip
5539 if master_singlehomed != newbie_singlehomed:
5540 if master_singlehomed:
5541 raise errors.OpPrereqError("The master has no secondary ip but the"
5542 " new node has one",
5545 raise errors.OpPrereqError("The master has a secondary ip but the"
5546 " new node doesn't have one",
5549 # checks reachability
5550 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5551 raise errors.OpPrereqError("Node not reachable by ping",
5552 errors.ECODE_ENVIRON)
5554 if not newbie_singlehomed:
5555 # check reachability from my secondary ip to newbie's secondary ip
5556 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5557 source=myself.secondary_ip):
5558 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5559 " based ping to node daemon port",
5560 errors.ECODE_ENVIRON)
5567 if self.op.master_capable:
5568 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5570 self.master_candidate = False
5573 self.new_node = old_node
5575 node_group = cfg.LookupNodeGroup(self.op.group)
5576 self.new_node = objects.Node(name=node,
5577 primary_ip=primary_ip,
5578 secondary_ip=secondary_ip,
5579 master_candidate=self.master_candidate,
5580 offline=False, drained=False,
5583 if self.op.ndparams:
5584 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5586 if self.op.hv_state:
5587 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5589 if self.op.disk_state:
5590 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5592 def Exec(self, feedback_fn):
5593 """Adds the new node to the cluster.
5596 new_node = self.new_node
5597 node = new_node.name
5599 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5602 # We adding a new node so we assume it's powered
5603 new_node.powered = True
5605 # for re-adds, reset the offline/drained/master-candidate flags;
5606 # we need to reset here, otherwise offline would prevent RPC calls
5607 # later in the procedure; this also means that if the re-add
5608 # fails, we are left with a non-offlined, broken node
5610 new_node.drained = new_node.offline = False # pylint: disable=W0201
5611 self.LogInfo("Readding a node, the offline/drained flags were reset")
5612 # if we demote the node, we do cleanup later in the procedure
5613 new_node.master_candidate = self.master_candidate
5614 if self.changed_primary_ip:
5615 new_node.primary_ip = self.op.primary_ip
5617 # copy the master/vm_capable flags
5618 for attr in self._NFLAGS:
5619 setattr(new_node, attr, getattr(self.op, attr))
5621 # notify the user about any possible mc promotion
5622 if new_node.master_candidate:
5623 self.LogInfo("Node will be a master candidate")
5625 if self.op.ndparams:
5626 new_node.ndparams = self.op.ndparams
5628 new_node.ndparams = {}
5630 if self.op.hv_state:
5631 new_node.hv_state_static = self.new_hv_state
5633 if self.op.disk_state:
5634 new_node.disk_state_static = self.new_disk_state
5636 # check connectivity
5637 result = self.rpc.call_version([node])[node]
5638 result.Raise("Can't get version information from node %s" % node)
5639 if constants.PROTOCOL_VERSION == result.payload:
5640 logging.info("Communication to node %s fine, sw version %s match",
5641 node, result.payload)
5643 raise errors.OpExecError("Version mismatch master version %s,"
5644 " node version %s" %
5645 (constants.PROTOCOL_VERSION, result.payload))
5647 # Add node to our /etc/hosts, and add key to known_hosts
5648 if self.cfg.GetClusterInfo().modify_etc_hosts:
5649 master_node = self.cfg.GetMasterNode()
5650 result = self.rpc.call_etc_hosts_modify(master_node,
5651 constants.ETC_HOSTS_ADD,
5654 result.Raise("Can't update hosts file with new host data")
5656 if new_node.secondary_ip != new_node.primary_ip:
5657 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5660 node_verify_list = [self.cfg.GetMasterNode()]
5661 node_verify_param = {
5662 constants.NV_NODELIST: ([node], {}),
5663 # TODO: do a node-net-test as well?
5666 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5667 self.cfg.GetClusterName())
5668 for verifier in node_verify_list:
5669 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5670 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5672 for failed in nl_payload:
5673 feedback_fn("ssh/hostname verification failed"
5674 " (checking from %s): %s" %
5675 (verifier, nl_payload[failed]))
5676 raise errors.OpExecError("ssh/hostname verification failed")
5679 _RedistributeAncillaryFiles(self)
5680 self.context.ReaddNode(new_node)
5681 # make sure we redistribute the config
5682 self.cfg.Update(new_node, feedback_fn)
5683 # and make sure the new node will not have old files around
5684 if not new_node.master_candidate:
5685 result = self.rpc.call_node_demote_from_mc(new_node.name)
5686 msg = result.fail_msg
5688 self.LogWarning("Node failed to demote itself from master"
5689 " candidate status: %s" % msg)
5691 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5692 additional_vm=self.op.vm_capable)
5693 self.context.AddNode(new_node, self.proc.GetECId())
5696 class LUNodeSetParams(LogicalUnit):
5697 """Modifies the parameters of a node.
5699 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5700 to the node role (as _ROLE_*)
5701 @cvar _R2F: a dictionary from node role to tuples of flags
5702 @cvar _FLAGS: a list of attribute names corresponding to the flags
5705 HPATH = "node-modify"
5706 HTYPE = constants.HTYPE_NODE
5708 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5710 (True, False, False): _ROLE_CANDIDATE,
5711 (False, True, False): _ROLE_DRAINED,
5712 (False, False, True): _ROLE_OFFLINE,
5713 (False, False, False): _ROLE_REGULAR,
5715 _R2F = dict((v, k) for k, v in _F2R.items())
5716 _FLAGS = ["master_candidate", "drained", "offline"]
5718 def CheckArguments(self):
5719 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5720 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5721 self.op.master_capable, self.op.vm_capable,
5722 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5724 if all_mods.count(None) == len(all_mods):
5725 raise errors.OpPrereqError("Please pass at least one modification",
5727 if all_mods.count(True) > 1:
5728 raise errors.OpPrereqError("Can't set the node into more than one"
5729 " state at the same time",
5732 # Boolean value that tells us whether we might be demoting from MC
5733 self.might_demote = (self.op.master_candidate == False or
5734 self.op.offline == True or
5735 self.op.drained == True or
5736 self.op.master_capable == False)
5738 if self.op.secondary_ip:
5739 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5740 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5741 " address" % self.op.secondary_ip,
5744 self.lock_all = self.op.auto_promote and self.might_demote
5745 self.lock_instances = self.op.secondary_ip is not None
5747 def _InstanceFilter(self, instance):
5748 """Filter for getting affected instances.
5751 return (instance.disk_template in constants.DTS_INT_MIRROR and
5752 self.op.node_name in instance.all_nodes)
5754 def ExpandNames(self):
5756 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5758 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5760 # Since modifying a node can have severe effects on currently running
5761 # operations the resource lock is at least acquired in shared mode
5762 self.needed_locks[locking.LEVEL_NODE_RES] = \
5763 self.needed_locks[locking.LEVEL_NODE]
5765 # Get node resource and instance locks in shared mode; they are not used
5766 # for anything but read-only access
5767 self.share_locks[locking.LEVEL_NODE_RES] = 1
5768 self.share_locks[locking.LEVEL_INSTANCE] = 1
5770 if self.lock_instances:
5771 self.needed_locks[locking.LEVEL_INSTANCE] = \
5772 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5774 def BuildHooksEnv(self):
5777 This runs on the master node.
5781 "OP_TARGET": self.op.node_name,
5782 "MASTER_CANDIDATE": str(self.op.master_candidate),
5783 "OFFLINE": str(self.op.offline),
5784 "DRAINED": str(self.op.drained),
5785 "MASTER_CAPABLE": str(self.op.master_capable),
5786 "VM_CAPABLE": str(self.op.vm_capable),
5789 def BuildHooksNodes(self):
5790 """Build hooks nodes.
5793 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5796 def CheckPrereq(self):
5797 """Check prerequisites.
5799 This only checks the instance list against the existing names.
5802 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5804 if self.lock_instances:
5805 affected_instances = \
5806 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5808 # Verify instance locks
5809 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5810 wanted_instances = frozenset(affected_instances.keys())
5811 if wanted_instances - owned_instances:
5812 raise errors.OpPrereqError("Instances affected by changing node %s's"
5813 " secondary IP address have changed since"
5814 " locks were acquired, wanted '%s', have"
5815 " '%s'; retry the operation" %
5817 utils.CommaJoin(wanted_instances),
5818 utils.CommaJoin(owned_instances)),
5821 affected_instances = None
5823 if (self.op.master_candidate is not None or
5824 self.op.drained is not None or
5825 self.op.offline is not None):
5826 # we can't change the master's node flags
5827 if self.op.node_name == self.cfg.GetMasterNode():
5828 raise errors.OpPrereqError("The master role can be changed"
5829 " only via master-failover",
5832 if self.op.master_candidate and not node.master_capable:
5833 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5834 " it a master candidate" % node.name,
5837 if self.op.vm_capable == False:
5838 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5840 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5841 " the vm_capable flag" % node.name,
5844 if node.master_candidate and self.might_demote and not self.lock_all:
5845 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5846 # check if after removing the current node, we're missing master
5848 (mc_remaining, mc_should, _) = \
5849 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5850 if mc_remaining < mc_should:
5851 raise errors.OpPrereqError("Not enough master candidates, please"
5852 " pass auto promote option to allow"
5853 " promotion", errors.ECODE_STATE)
5855 self.old_flags = old_flags = (node.master_candidate,
5856 node.drained, node.offline)
5857 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5858 self.old_role = old_role = self._F2R[old_flags]
5860 # Check for ineffective changes
5861 for attr in self._FLAGS:
5862 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5863 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5864 setattr(self.op, attr, None)
5866 # Past this point, any flag change to False means a transition
5867 # away from the respective state, as only real changes are kept
5869 # TODO: We might query the real power state if it supports OOB
5870 if _SupportsOob(self.cfg, node):
5871 if self.op.offline is False and not (node.powered or
5872 self.op.powered == True):
5873 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5874 " offline status can be reset") %
5876 elif self.op.powered is not None:
5877 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5878 " as it does not support out-of-band"
5879 " handling") % self.op.node_name)
5881 # If we're being deofflined/drained, we'll MC ourself if needed
5882 if (self.op.drained == False or self.op.offline == False or
5883 (self.op.master_capable and not node.master_capable)):
5884 if _DecideSelfPromotion(self):
5885 self.op.master_candidate = True
5886 self.LogInfo("Auto-promoting node to master candidate")
5888 # If we're no longer master capable, we'll demote ourselves from MC
5889 if self.op.master_capable == False and node.master_candidate:
5890 self.LogInfo("Demoting from master candidate")
5891 self.op.master_candidate = False
5894 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5895 if self.op.master_candidate:
5896 new_role = self._ROLE_CANDIDATE
5897 elif self.op.drained:
5898 new_role = self._ROLE_DRAINED
5899 elif self.op.offline:
5900 new_role = self._ROLE_OFFLINE
5901 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5902 # False is still in new flags, which means we're un-setting (the
5904 new_role = self._ROLE_REGULAR
5905 else: # no new flags, nothing, keep old role
5908 self.new_role = new_role
5910 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5911 # Trying to transition out of offline status
5912 # TODO: Use standard RPC runner, but make sure it works when the node is
5913 # still marked offline
5914 result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5916 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5917 " to report its version: %s" %
5918 (node.name, result.fail_msg),
5921 self.LogWarning("Transitioning node from offline to online state"
5922 " without using re-add. Please make sure the node"
5925 if self.op.secondary_ip:
5926 # Ok even without locking, because this can't be changed by any LU
5927 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5928 master_singlehomed = master.secondary_ip == master.primary_ip
5929 if master_singlehomed and self.op.secondary_ip:
5930 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5931 " homed cluster", errors.ECODE_INVAL)
5933 assert not (frozenset(affected_instances) -
5934 self.owned_locks(locking.LEVEL_INSTANCE))
5937 if affected_instances:
5938 raise errors.OpPrereqError("Cannot change secondary IP address:"
5939 " offline node has instances (%s)"
5940 " configured to use it" %
5941 utils.CommaJoin(affected_instances.keys()))
5943 # On online nodes, check that no instances are running, and that
5944 # the node has the new ip and we can reach it.
5945 for instance in affected_instances.values():
5946 _CheckInstanceState(self, instance, INSTANCE_DOWN,
5947 msg="cannot change secondary ip")
5949 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5950 if master.name != node.name:
5951 # check reachability from master secondary ip to new secondary ip
5952 if not netutils.TcpPing(self.op.secondary_ip,
5953 constants.DEFAULT_NODED_PORT,
5954 source=master.secondary_ip):
5955 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5956 " based ping to node daemon port",
5957 errors.ECODE_ENVIRON)
5959 if self.op.ndparams:
5960 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5961 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5962 self.new_ndparams = new_ndparams
5964 if self.op.hv_state:
5965 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5966 self.node.hv_state_static)
5968 if self.op.disk_state:
5969 self.new_disk_state = \
5970 _MergeAndVerifyDiskState(self.op.disk_state,
5971 self.node.disk_state_static)
5973 def Exec(self, feedback_fn):
5978 old_role = self.old_role
5979 new_role = self.new_role
5983 if self.op.ndparams:
5984 node.ndparams = self.new_ndparams
5986 if self.op.powered is not None:
5987 node.powered = self.op.powered
5989 if self.op.hv_state:
5990 node.hv_state_static = self.new_hv_state
5992 if self.op.disk_state:
5993 node.disk_state_static = self.new_disk_state
5995 for attr in ["master_capable", "vm_capable"]:
5996 val = getattr(self.op, attr)
5998 setattr(node, attr, val)
5999 result.append((attr, str(val)))
6001 if new_role != old_role:
6002 # Tell the node to demote itself, if no longer MC and not offline
6003 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6004 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6006 self.LogWarning("Node failed to demote itself: %s", msg)
6008 new_flags = self._R2F[new_role]
6009 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6011 result.append((desc, str(nf)))
6012 (node.master_candidate, node.drained, node.offline) = new_flags
6014 # we locked all nodes, we adjust the CP before updating this node
6016 _AdjustCandidatePool(self, [node.name])
6018 if self.op.secondary_ip:
6019 node.secondary_ip = self.op.secondary_ip
6020 result.append(("secondary_ip", self.op.secondary_ip))
6022 # this will trigger configuration file update, if needed
6023 self.cfg.Update(node, feedback_fn)
6025 # this will trigger job queue propagation or cleanup if the mc
6027 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6028 self.context.ReaddNode(node)
6033 class LUNodePowercycle(NoHooksLU):
6034 """Powercycles a node.
6039 def CheckArguments(self):
6040 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6041 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6042 raise errors.OpPrereqError("The node is the master and the force"
6043 " parameter was not set",
6046 def ExpandNames(self):
6047 """Locking for PowercycleNode.
6049 This is a last-resort option and shouldn't block on other
6050 jobs. Therefore, we grab no locks.
6053 self.needed_locks = {}
6055 def Exec(self, feedback_fn):
6059 result = self.rpc.call_node_powercycle(self.op.node_name,
6060 self.cfg.GetHypervisorType())
6061 result.Raise("Failed to schedule the reboot")
6062 return result.payload
6065 class LUClusterQuery(NoHooksLU):
6066 """Query cluster configuration.
6071 def ExpandNames(self):
6072 self.needed_locks = {}
6074 def Exec(self, feedback_fn):
6075 """Return cluster config.
6078 cluster = self.cfg.GetClusterInfo()
6081 # Filter just for enabled hypervisors
6082 for os_name, hv_dict in cluster.os_hvp.items():
6083 os_hvp[os_name] = {}
6084 for hv_name, hv_params in hv_dict.items():
6085 if hv_name in cluster.enabled_hypervisors:
6086 os_hvp[os_name][hv_name] = hv_params
6088 # Convert ip_family to ip_version
6089 primary_ip_version = constants.IP4_VERSION
6090 if cluster.primary_ip_family == netutils.IP6Address.family:
6091 primary_ip_version = constants.IP6_VERSION
6094 "software_version": constants.RELEASE_VERSION,
6095 "protocol_version": constants.PROTOCOL_VERSION,
6096 "config_version": constants.CONFIG_VERSION,
6097 "os_api_version": max(constants.OS_API_VERSIONS),
6098 "export_version": constants.EXPORT_VERSION,
6099 "architecture": (platform.architecture()[0], platform.machine()),
6100 "name": cluster.cluster_name,
6101 "master": cluster.master_node,
6102 "default_hypervisor": cluster.primary_hypervisor,
6103 "enabled_hypervisors": cluster.enabled_hypervisors,
6104 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6105 for hypervisor_name in cluster.enabled_hypervisors]),
6107 "beparams": cluster.beparams,
6108 "osparams": cluster.osparams,
6109 "ipolicy": cluster.ipolicy,
6110 "nicparams": cluster.nicparams,
6111 "ndparams": cluster.ndparams,
6112 "candidate_pool_size": cluster.candidate_pool_size,
6113 "master_netdev": cluster.master_netdev,
6114 "master_netmask": cluster.master_netmask,
6115 "use_external_mip_script": cluster.use_external_mip_script,
6116 "volume_group_name": cluster.volume_group_name,
6117 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6118 "file_storage_dir": cluster.file_storage_dir,
6119 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6120 "maintain_node_health": cluster.maintain_node_health,
6121 "ctime": cluster.ctime,
6122 "mtime": cluster.mtime,
6123 "uuid": cluster.uuid,
6124 "tags": list(cluster.GetTags()),
6125 "uid_pool": cluster.uid_pool,
6126 "default_iallocator": cluster.default_iallocator,
6127 "reserved_lvs": cluster.reserved_lvs,
6128 "primary_ip_version": primary_ip_version,
6129 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6130 "hidden_os": cluster.hidden_os,
6131 "blacklisted_os": cluster.blacklisted_os,
6137 class LUClusterConfigQuery(NoHooksLU):
6138 """Return configuration values.
6142 _FIELDS_DYNAMIC = utils.FieldSet()
6143 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
6144 "watcher_pause", "volume_group_name")
6146 def CheckArguments(self):
6147 _CheckOutputFields(static=self._FIELDS_STATIC,
6148 dynamic=self._FIELDS_DYNAMIC,
6149 selected=self.op.output_fields)
6151 def ExpandNames(self):
6152 self.needed_locks = {}
6154 def Exec(self, feedback_fn):
6155 """Dump a representation of the cluster config to the standard output.
6159 for field in self.op.output_fields:
6160 if field == "cluster_name":
6161 entry = self.cfg.GetClusterName()
6162 elif field == "master_node":
6163 entry = self.cfg.GetMasterNode()
6164 elif field == "drain_flag":
6165 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6166 elif field == "watcher_pause":
6167 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6168 elif field == "volume_group_name":
6169 entry = self.cfg.GetVGName()
6171 raise errors.ParameterError(field)
6172 values.append(entry)
6176 class LUInstanceActivateDisks(NoHooksLU):
6177 """Bring up an instance's disks.
6182 def ExpandNames(self):
6183 self._ExpandAndLockInstance()
6184 self.needed_locks[locking.LEVEL_NODE] = []
6185 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6187 def DeclareLocks(self, level):
6188 if level == locking.LEVEL_NODE:
6189 self._LockInstancesNodes()
6191 def CheckPrereq(self):
6192 """Check prerequisites.
6194 This checks that the instance is in the cluster.
6197 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6198 assert self.instance is not None, \
6199 "Cannot retrieve locked instance %s" % self.op.instance_name
6200 _CheckNodeOnline(self, self.instance.primary_node)
6202 def Exec(self, feedback_fn):
6203 """Activate the disks.
6206 disks_ok, disks_info = \
6207 _AssembleInstanceDisks(self, self.instance,
6208 ignore_size=self.op.ignore_size)
6210 raise errors.OpExecError("Cannot activate block devices")
6215 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6217 """Prepare the block devices for an instance.
6219 This sets up the block devices on all nodes.
6221 @type lu: L{LogicalUnit}
6222 @param lu: the logical unit on whose behalf we execute
6223 @type instance: L{objects.Instance}
6224 @param instance: the instance for whose disks we assemble
6225 @type disks: list of L{objects.Disk} or None
6226 @param disks: which disks to assemble (or all, if None)
6227 @type ignore_secondaries: boolean
6228 @param ignore_secondaries: if true, errors on secondary nodes
6229 won't result in an error return from the function
6230 @type ignore_size: boolean
6231 @param ignore_size: if true, the current known size of the disk
6232 will not be used during the disk activation, useful for cases
6233 when the size is wrong
6234 @return: False if the operation failed, otherwise a list of
6235 (host, instance_visible_name, node_visible_name)
6236 with the mapping from node devices to instance devices
6241 iname = instance.name
6242 disks = _ExpandCheckDisks(instance, disks)
6244 # With the two passes mechanism we try to reduce the window of
6245 # opportunity for the race condition of switching DRBD to primary
6246 # before handshaking occured, but we do not eliminate it
6248 # The proper fix would be to wait (with some limits) until the
6249 # connection has been made and drbd transitions from WFConnection
6250 # into any other network-connected state (Connected, SyncTarget,
6253 # 1st pass, assemble on all nodes in secondary mode
6254 for idx, inst_disk in enumerate(disks):
6255 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6257 node_disk = node_disk.Copy()
6258 node_disk.UnsetSize()
6259 lu.cfg.SetDiskID(node_disk, node)
6260 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
6261 msg = result.fail_msg
6263 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6264 " (is_primary=False, pass=1): %s",
6265 inst_disk.iv_name, node, msg)
6266 if not ignore_secondaries:
6269 # FIXME: race condition on drbd migration to primary
6271 # 2nd pass, do only the primary node
6272 for idx, inst_disk in enumerate(disks):
6275 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6276 if node != instance.primary_node:
6279 node_disk = node_disk.Copy()
6280 node_disk.UnsetSize()
6281 lu.cfg.SetDiskID(node_disk, node)
6282 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6283 msg = result.fail_msg
6285 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6286 " (is_primary=True, pass=2): %s",
6287 inst_disk.iv_name, node, msg)
6290 dev_path = result.payload
6292 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6294 # leave the disks configured for the primary node
6295 # this is a workaround that would be fixed better by
6296 # improving the logical/physical id handling
6298 lu.cfg.SetDiskID(disk, instance.primary_node)
6300 return disks_ok, device_info
6303 def _StartInstanceDisks(lu, instance, force):
6304 """Start the disks of an instance.
6307 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6308 ignore_secondaries=force)
6310 _ShutdownInstanceDisks(lu, instance)
6311 if force is not None and not force:
6312 lu.proc.LogWarning("", hint="If the message above refers to a"
6314 " you can retry the operation using '--force'.")
6315 raise errors.OpExecError("Disk consistency error")
6318 class LUInstanceDeactivateDisks(NoHooksLU):
6319 """Shutdown an instance's disks.
6324 def ExpandNames(self):
6325 self._ExpandAndLockInstance()
6326 self.needed_locks[locking.LEVEL_NODE] = []
6327 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6329 def DeclareLocks(self, level):
6330 if level == locking.LEVEL_NODE:
6331 self._LockInstancesNodes()
6333 def CheckPrereq(self):
6334 """Check prerequisites.
6336 This checks that the instance is in the cluster.
6339 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6340 assert self.instance is not None, \
6341 "Cannot retrieve locked instance %s" % self.op.instance_name
6343 def Exec(self, feedback_fn):
6344 """Deactivate the disks
6347 instance = self.instance
6349 _ShutdownInstanceDisks(self, instance)
6351 _SafeShutdownInstanceDisks(self, instance)
6354 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6355 """Shutdown block devices of an instance.
6357 This function checks if an instance is running, before calling
6358 _ShutdownInstanceDisks.
6361 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6362 _ShutdownInstanceDisks(lu, instance, disks=disks)
6365 def _ExpandCheckDisks(instance, disks):
6366 """Return the instance disks selected by the disks list
6368 @type disks: list of L{objects.Disk} or None
6369 @param disks: selected disks
6370 @rtype: list of L{objects.Disk}
6371 @return: selected instance disks to act on
6375 return instance.disks
6377 if not set(disks).issubset(instance.disks):
6378 raise errors.ProgrammerError("Can only act on disks belonging to the"
6383 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6384 """Shutdown block devices of an instance.
6386 This does the shutdown on all nodes of the instance.
6388 If the ignore_primary is false, errors on the primary node are
6393 disks = _ExpandCheckDisks(instance, disks)
6396 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6397 lu.cfg.SetDiskID(top_disk, node)
6398 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6399 msg = result.fail_msg
6401 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6402 disk.iv_name, node, msg)
6403 if ((node == instance.primary_node and not ignore_primary) or
6404 (node != instance.primary_node and not result.offline)):
6409 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6410 """Checks if a node has enough free memory.
6412 This function check if a given node has the needed amount of free
6413 memory. In case the node has less memory or we cannot get the
6414 information from the node, this function raise an OpPrereqError
6417 @type lu: C{LogicalUnit}
6418 @param lu: a logical unit from which we get configuration data
6420 @param node: the node to check
6421 @type reason: C{str}
6422 @param reason: string to use in the error message
6423 @type requested: C{int}
6424 @param requested: the amount of memory in MiB to check for
6425 @type hypervisor_name: C{str}
6426 @param hypervisor_name: the hypervisor to ask for memory stats
6428 @return: node current free memory
6429 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6430 we cannot check the node
6433 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6434 nodeinfo[node].Raise("Can't get data from node %s" % node,
6435 prereq=True, ecode=errors.ECODE_ENVIRON)
6436 (_, _, (hv_info, )) = nodeinfo[node].payload
6438 free_mem = hv_info.get("memory_free", None)
6439 if not isinstance(free_mem, int):
6440 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6441 " was '%s'" % (node, free_mem),
6442 errors.ECODE_ENVIRON)
6443 if requested > free_mem:
6444 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6445 " needed %s MiB, available %s MiB" %
6446 (node, reason, requested, free_mem),
6451 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6452 """Checks if nodes have enough free disk space in the all VGs.
6454 This function check if all given nodes have the needed amount of
6455 free disk. In case any node has less disk or we cannot get the
6456 information from the node, this function raise an OpPrereqError
6459 @type lu: C{LogicalUnit}
6460 @param lu: a logical unit from which we get configuration data
6461 @type nodenames: C{list}
6462 @param nodenames: the list of node names to check
6463 @type req_sizes: C{dict}
6464 @param req_sizes: the hash of vg and corresponding amount of disk in
6466 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6467 or we cannot check the node
6470 for vg, req_size in req_sizes.items():
6471 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6474 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6475 """Checks if nodes have enough free disk space in the specified VG.
6477 This function check if all given nodes have the needed amount of
6478 free disk. In case any node has less disk or we cannot get the
6479 information from the node, this function raise an OpPrereqError
6482 @type lu: C{LogicalUnit}
6483 @param lu: a logical unit from which we get configuration data
6484 @type nodenames: C{list}
6485 @param nodenames: the list of node names to check
6487 @param vg: the volume group to check
6488 @type requested: C{int}
6489 @param requested: the amount of disk in MiB to check for
6490 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6491 or we cannot check the node
6494 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6495 for node in nodenames:
6496 info = nodeinfo[node]
6497 info.Raise("Cannot get current information from node %s" % node,
6498 prereq=True, ecode=errors.ECODE_ENVIRON)
6499 (_, (vg_info, ), _) = info.payload
6500 vg_free = vg_info.get("vg_free", None)
6501 if not isinstance(vg_free, int):
6502 raise errors.OpPrereqError("Can't compute free disk space on node"
6503 " %s for vg %s, result was '%s'" %
6504 (node, vg, vg_free), errors.ECODE_ENVIRON)
6505 if requested > vg_free:
6506 raise errors.OpPrereqError("Not enough disk space on target node %s"
6507 " vg %s: required %d MiB, available %d MiB" %
6508 (node, vg, requested, vg_free),
6512 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6513 """Checks if nodes have enough physical CPUs
6515 This function checks if all given nodes have the needed number of
6516 physical CPUs. In case any node has less CPUs or we cannot get the
6517 information from the node, this function raises an OpPrereqError
6520 @type lu: C{LogicalUnit}
6521 @param lu: a logical unit from which we get configuration data
6522 @type nodenames: C{list}
6523 @param nodenames: the list of node names to check
6524 @type requested: C{int}
6525 @param requested: the minimum acceptable number of physical CPUs
6526 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6527 or we cannot check the node
6530 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6531 for node in nodenames:
6532 info = nodeinfo[node]
6533 info.Raise("Cannot get current information from node %s" % node,
6534 prereq=True, ecode=errors.ECODE_ENVIRON)
6535 (_, _, (hv_info, )) = info.payload
6536 num_cpus = hv_info.get("cpu_total", None)
6537 if not isinstance(num_cpus, int):
6538 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6539 " on node %s, result was '%s'" %
6540 (node, num_cpus), errors.ECODE_ENVIRON)
6541 if requested > num_cpus:
6542 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6543 "required" % (node, num_cpus, requested),
6547 class LUInstanceStartup(LogicalUnit):
6548 """Starts an instance.
6551 HPATH = "instance-start"
6552 HTYPE = constants.HTYPE_INSTANCE
6555 def CheckArguments(self):
6557 if self.op.beparams:
6558 # fill the beparams dict
6559 objects.UpgradeBeParams(self.op.beparams)
6560 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6562 def ExpandNames(self):
6563 self._ExpandAndLockInstance()
6564 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6566 def DeclareLocks(self, level):
6567 if level == locking.LEVEL_NODE_RES:
6568 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6570 def BuildHooksEnv(self):
6573 This runs on master, primary and secondary nodes of the instance.
6577 "FORCE": self.op.force,
6580 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6584 def BuildHooksNodes(self):
6585 """Build hooks nodes.
6588 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6591 def CheckPrereq(self):
6592 """Check prerequisites.
6594 This checks that the instance is in the cluster.
6597 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6598 assert self.instance is not None, \
6599 "Cannot retrieve locked instance %s" % self.op.instance_name
6602 if self.op.hvparams:
6603 # check hypervisor parameter syntax (locally)
6604 cluster = self.cfg.GetClusterInfo()
6605 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6606 filled_hvp = cluster.FillHV(instance)
6607 filled_hvp.update(self.op.hvparams)
6608 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6609 hv_type.CheckParameterSyntax(filled_hvp)
6610 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6612 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6614 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6616 if self.primary_offline and self.op.ignore_offline_nodes:
6617 self.proc.LogWarning("Ignoring offline primary node")
6619 if self.op.hvparams or self.op.beparams:
6620 self.proc.LogWarning("Overridden parameters are ignored")
6622 _CheckNodeOnline(self, instance.primary_node)
6624 bep = self.cfg.GetClusterInfo().FillBE(instance)
6625 bep.update(self.op.beparams)
6627 # check bridges existence
6628 _CheckInstanceBridgesExist(self, instance)
6630 remote_info = self.rpc.call_instance_info(instance.primary_node,
6632 instance.hypervisor)
6633 remote_info.Raise("Error checking node %s" % instance.primary_node,
6634 prereq=True, ecode=errors.ECODE_ENVIRON)
6635 if not remote_info.payload: # not running already
6636 _CheckNodeFreeMemory(self, instance.primary_node,
6637 "starting instance %s" % instance.name,
6638 bep[constants.BE_MINMEM], instance.hypervisor)
6640 def Exec(self, feedback_fn):
6641 """Start the instance.
6644 instance = self.instance
6645 force = self.op.force
6647 if not self.op.no_remember:
6648 self.cfg.MarkInstanceUp(instance.name)
6650 if self.primary_offline:
6651 assert self.op.ignore_offline_nodes
6652 self.proc.LogInfo("Primary node offline, marked instance as started")
6654 node_current = instance.primary_node
6656 _StartInstanceDisks(self, instance, force)
6659 self.rpc.call_instance_start(node_current,
6660 (instance, self.op.hvparams,
6662 self.op.startup_paused)
6663 msg = result.fail_msg
6665 _ShutdownInstanceDisks(self, instance)
6666 raise errors.OpExecError("Could not start instance: %s" % msg)
6669 class LUInstanceReboot(LogicalUnit):
6670 """Reboot an instance.
6673 HPATH = "instance-reboot"
6674 HTYPE = constants.HTYPE_INSTANCE
6677 def ExpandNames(self):
6678 self._ExpandAndLockInstance()
6680 def BuildHooksEnv(self):
6683 This runs on master, primary and secondary nodes of the instance.
6687 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6688 "REBOOT_TYPE": self.op.reboot_type,
6689 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6692 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6696 def BuildHooksNodes(self):
6697 """Build hooks nodes.
6700 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6703 def CheckPrereq(self):
6704 """Check prerequisites.
6706 This checks that the instance is in the cluster.
6709 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6710 assert self.instance is not None, \
6711 "Cannot retrieve locked instance %s" % self.op.instance_name
6712 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6713 _CheckNodeOnline(self, instance.primary_node)
6715 # check bridges existence
6716 _CheckInstanceBridgesExist(self, instance)
6718 def Exec(self, feedback_fn):
6719 """Reboot the instance.
6722 instance = self.instance
6723 ignore_secondaries = self.op.ignore_secondaries
6724 reboot_type = self.op.reboot_type
6726 remote_info = self.rpc.call_instance_info(instance.primary_node,
6728 instance.hypervisor)
6729 remote_info.Raise("Error checking node %s" % instance.primary_node)
6730 instance_running = bool(remote_info.payload)
6732 node_current = instance.primary_node
6734 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6735 constants.INSTANCE_REBOOT_HARD]:
6736 for disk in instance.disks:
6737 self.cfg.SetDiskID(disk, node_current)
6738 result = self.rpc.call_instance_reboot(node_current, instance,
6740 self.op.shutdown_timeout)
6741 result.Raise("Could not reboot instance")
6743 if instance_running:
6744 result = self.rpc.call_instance_shutdown(node_current, instance,
6745 self.op.shutdown_timeout)
6746 result.Raise("Could not shutdown instance for full reboot")
6747 _ShutdownInstanceDisks(self, instance)
6749 self.LogInfo("Instance %s was already stopped, starting now",
6751 _StartInstanceDisks(self, instance, ignore_secondaries)
6752 result = self.rpc.call_instance_start(node_current,
6753 (instance, None, None), False)
6754 msg = result.fail_msg
6756 _ShutdownInstanceDisks(self, instance)
6757 raise errors.OpExecError("Could not start instance for"
6758 " full reboot: %s" % msg)
6760 self.cfg.MarkInstanceUp(instance.name)
6763 class LUInstanceShutdown(LogicalUnit):
6764 """Shutdown an instance.
6767 HPATH = "instance-stop"
6768 HTYPE = constants.HTYPE_INSTANCE
6771 def ExpandNames(self):
6772 self._ExpandAndLockInstance()
6774 def BuildHooksEnv(self):
6777 This runs on master, primary and secondary nodes of the instance.
6780 env = _BuildInstanceHookEnvByObject(self, self.instance)
6781 env["TIMEOUT"] = self.op.timeout
6784 def BuildHooksNodes(self):
6785 """Build hooks nodes.
6788 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6791 def CheckPrereq(self):
6792 """Check prerequisites.
6794 This checks that the instance is in the cluster.
6797 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6798 assert self.instance is not None, \
6799 "Cannot retrieve locked instance %s" % self.op.instance_name
6801 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6803 self.primary_offline = \
6804 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6806 if self.primary_offline and self.op.ignore_offline_nodes:
6807 self.proc.LogWarning("Ignoring offline primary node")
6809 _CheckNodeOnline(self, self.instance.primary_node)
6811 def Exec(self, feedback_fn):
6812 """Shutdown the instance.
6815 instance = self.instance
6816 node_current = instance.primary_node
6817 timeout = self.op.timeout
6819 if not self.op.no_remember:
6820 self.cfg.MarkInstanceDown(instance.name)
6822 if self.primary_offline:
6823 assert self.op.ignore_offline_nodes
6824 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6826 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6827 msg = result.fail_msg
6829 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6831 _ShutdownInstanceDisks(self, instance)
6834 class LUInstanceReinstall(LogicalUnit):
6835 """Reinstall an instance.
6838 HPATH = "instance-reinstall"
6839 HTYPE = constants.HTYPE_INSTANCE
6842 def ExpandNames(self):
6843 self._ExpandAndLockInstance()
6845 def BuildHooksEnv(self):
6848 This runs on master, primary and secondary nodes of the instance.
6851 return _BuildInstanceHookEnvByObject(self, self.instance)
6853 def BuildHooksNodes(self):
6854 """Build hooks nodes.
6857 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6860 def CheckPrereq(self):
6861 """Check prerequisites.
6863 This checks that the instance is in the cluster and is not running.
6866 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6867 assert instance is not None, \
6868 "Cannot retrieve locked instance %s" % self.op.instance_name
6869 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6870 " offline, cannot reinstall")
6871 for node in instance.secondary_nodes:
6872 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6873 " cannot reinstall")
6875 if instance.disk_template == constants.DT_DISKLESS:
6876 raise errors.OpPrereqError("Instance '%s' has no disks" %
6877 self.op.instance_name,
6879 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6881 if self.op.os_type is not None:
6883 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6884 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6885 instance_os = self.op.os_type
6887 instance_os = instance.os
6889 nodelist = list(instance.all_nodes)
6891 if self.op.osparams:
6892 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6893 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6894 self.os_inst = i_osdict # the new dict (without defaults)
6898 self.instance = instance
6900 def Exec(self, feedback_fn):
6901 """Reinstall the instance.
6904 inst = self.instance
6906 if self.op.os_type is not None:
6907 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6908 inst.os = self.op.os_type
6909 # Write to configuration
6910 self.cfg.Update(inst, feedback_fn)
6912 _StartInstanceDisks(self, inst, None)
6914 feedback_fn("Running the instance OS create scripts...")
6915 # FIXME: pass debug option from opcode to backend
6916 result = self.rpc.call_instance_os_add(inst.primary_node,
6917 (inst, self.os_inst), True,
6918 self.op.debug_level)
6919 result.Raise("Could not install OS for instance %s on node %s" %
6920 (inst.name, inst.primary_node))
6922 _ShutdownInstanceDisks(self, inst)
6925 class LUInstanceRecreateDisks(LogicalUnit):
6926 """Recreate an instance's missing disks.
6929 HPATH = "instance-recreate-disks"
6930 HTYPE = constants.HTYPE_INSTANCE
6933 _MODIFYABLE = frozenset([
6934 constants.IDISK_SIZE,
6935 constants.IDISK_MODE,
6938 # New or changed disk parameters may have different semantics
6939 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
6940 constants.IDISK_ADOPT,
6942 # TODO: Implement support changing VG while recreating
6944 constants.IDISK_METAVG,
6947 def CheckArguments(self):
6948 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
6949 # Normalize and convert deprecated list of disk indices
6950 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
6952 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
6954 raise errors.OpPrereqError("Some disks have been specified more than"
6955 " once: %s" % utils.CommaJoin(duplicates),
6958 for (idx, params) in self.op.disks:
6959 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
6960 unsupported = frozenset(params.keys()) - self._MODIFYABLE
6962 raise errors.OpPrereqError("Parameters for disk %s try to change"
6963 " unmodifyable parameter(s): %s" %
6964 (idx, utils.CommaJoin(unsupported)),
6967 def ExpandNames(self):
6968 self._ExpandAndLockInstance()
6969 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6971 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6972 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6974 self.needed_locks[locking.LEVEL_NODE] = []
6975 self.needed_locks[locking.LEVEL_NODE_RES] = []
6977 def DeclareLocks(self, level):
6978 if level == locking.LEVEL_NODE:
6979 # if we replace the nodes, we only need to lock the old primary,
6980 # otherwise we need to lock all nodes for disk re-creation
6981 primary_only = bool(self.op.nodes)
6982 self._LockInstancesNodes(primary_only=primary_only)
6983 elif level == locking.LEVEL_NODE_RES:
6985 self.needed_locks[locking.LEVEL_NODE_RES] = \
6986 self.needed_locks[locking.LEVEL_NODE][:]
6988 def BuildHooksEnv(self):
6991 This runs on master, primary and secondary nodes of the instance.
6994 return _BuildInstanceHookEnvByObject(self, self.instance)
6996 def BuildHooksNodes(self):
6997 """Build hooks nodes.
7000 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7003 def CheckPrereq(self):
7004 """Check prerequisites.
7006 This checks that the instance is in the cluster and is not running.
7009 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7010 assert instance is not None, \
7011 "Cannot retrieve locked instance %s" % self.op.instance_name
7013 if len(self.op.nodes) != len(instance.all_nodes):
7014 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7015 " %d replacement nodes were specified" %
7016 (instance.name, len(instance.all_nodes),
7017 len(self.op.nodes)),
7019 assert instance.disk_template != constants.DT_DRBD8 or \
7020 len(self.op.nodes) == 2
7021 assert instance.disk_template != constants.DT_PLAIN or \
7022 len(self.op.nodes) == 1
7023 primary_node = self.op.nodes[0]
7025 primary_node = instance.primary_node
7026 _CheckNodeOnline(self, primary_node)
7028 if instance.disk_template == constants.DT_DISKLESS:
7029 raise errors.OpPrereqError("Instance '%s' has no disks" %
7030 self.op.instance_name, errors.ECODE_INVAL)
7032 # if we replace nodes *and* the old primary is offline, we don't
7034 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7035 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7036 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7037 if not (self.op.nodes and old_pnode.offline):
7038 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7039 msg="cannot recreate disks")
7042 self.disks = dict(self.op.disks)
7044 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7046 maxidx = max(self.disks.keys())
7047 if maxidx >= len(instance.disks):
7048 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7051 if (self.op.nodes and
7052 sorted(self.disks.keys()) != range(len(instance.disks))):
7053 raise errors.OpPrereqError("Can't recreate disks partially and"
7054 " change the nodes at the same time",
7057 self.instance = instance
7059 def Exec(self, feedback_fn):
7060 """Recreate the disks.
7063 instance = self.instance
7065 assert (self.owned_locks(locking.LEVEL_NODE) ==
7066 self.owned_locks(locking.LEVEL_NODE_RES))
7069 mods = [] # keeps track of needed changes
7071 for idx, disk in enumerate(instance.disks):
7073 changes = self.disks[idx]
7075 # Disk should not be recreated
7079 # update secondaries for disks, if needed
7080 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7081 # need to update the nodes and minors
7082 assert len(self.op.nodes) == 2
7083 assert len(disk.logical_id) == 6 # otherwise disk internals
7085 (_, _, old_port, _, _, old_secret) = disk.logical_id
7086 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7087 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7088 new_minors[0], new_minors[1], old_secret)
7089 assert len(disk.logical_id) == len(new_id)
7093 mods.append((idx, new_id, changes))
7095 # now that we have passed all asserts above, we can apply the mods
7096 # in a single run (to avoid partial changes)
7097 for idx, new_id, changes in mods:
7098 disk = instance.disks[idx]
7099 if new_id is not None:
7100 assert disk.dev_type == constants.LD_DRBD8
7101 disk.logical_id = new_id
7103 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7104 mode=changes.get(constants.IDISK_MODE, None))
7106 # change primary node, if needed
7108 instance.primary_node = self.op.nodes[0]
7109 self.LogWarning("Changing the instance's nodes, you will have to"
7110 " remove any disks left on the older nodes manually")
7113 self.cfg.Update(instance, feedback_fn)
7115 _CreateDisks(self, instance, to_skip=to_skip)
7118 class LUInstanceRename(LogicalUnit):
7119 """Rename an instance.
7122 HPATH = "instance-rename"
7123 HTYPE = constants.HTYPE_INSTANCE
7125 def CheckArguments(self):
7129 if self.op.ip_check and not self.op.name_check:
7130 # TODO: make the ip check more flexible and not depend on the name check
7131 raise errors.OpPrereqError("IP address check requires a name check",
7134 def BuildHooksEnv(self):
7137 This runs on master, primary and secondary nodes of the instance.
7140 env = _BuildInstanceHookEnvByObject(self, self.instance)
7141 env["INSTANCE_NEW_NAME"] = self.op.new_name
7144 def BuildHooksNodes(self):
7145 """Build hooks nodes.
7148 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7151 def CheckPrereq(self):
7152 """Check prerequisites.
7154 This checks that the instance is in the cluster and is not running.
7157 self.op.instance_name = _ExpandInstanceName(self.cfg,
7158 self.op.instance_name)
7159 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7160 assert instance is not None
7161 _CheckNodeOnline(self, instance.primary_node)
7162 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7163 msg="cannot rename")
7164 self.instance = instance
7166 new_name = self.op.new_name
7167 if self.op.name_check:
7168 hostname = netutils.GetHostname(name=new_name)
7169 if hostname.name != new_name:
7170 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7172 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7173 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7174 " same as given hostname '%s'") %
7175 (hostname.name, self.op.new_name),
7177 new_name = self.op.new_name = hostname.name
7178 if (self.op.ip_check and
7179 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7180 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7181 (hostname.ip, new_name),
7182 errors.ECODE_NOTUNIQUE)
7184 instance_list = self.cfg.GetInstanceList()
7185 if new_name in instance_list and new_name != instance.name:
7186 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7187 new_name, errors.ECODE_EXISTS)
7189 def Exec(self, feedback_fn):
7190 """Rename the instance.
7193 inst = self.instance
7194 old_name = inst.name
7196 rename_file_storage = False
7197 if (inst.disk_template in constants.DTS_FILEBASED and
7198 self.op.new_name != inst.name):
7199 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7200 rename_file_storage = True
7202 self.cfg.RenameInstance(inst.name, self.op.new_name)
7203 # Change the instance lock. This is definitely safe while we hold the BGL.
7204 # Otherwise the new lock would have to be added in acquired mode.
7206 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7207 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7209 # re-read the instance from the configuration after rename
7210 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7212 if rename_file_storage:
7213 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7214 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7215 old_file_storage_dir,
7216 new_file_storage_dir)
7217 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7218 " (but the instance has been renamed in Ganeti)" %
7219 (inst.primary_node, old_file_storage_dir,
7220 new_file_storage_dir))
7222 _StartInstanceDisks(self, inst, None)
7224 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7225 old_name, self.op.debug_level)
7226 msg = result.fail_msg
7228 msg = ("Could not run OS rename script for instance %s on node %s"
7229 " (but the instance has been renamed in Ganeti): %s" %
7230 (inst.name, inst.primary_node, msg))
7231 self.proc.LogWarning(msg)
7233 _ShutdownInstanceDisks(self, inst)
7238 class LUInstanceRemove(LogicalUnit):
7239 """Remove an instance.
7242 HPATH = "instance-remove"
7243 HTYPE = constants.HTYPE_INSTANCE
7246 def ExpandNames(self):
7247 self._ExpandAndLockInstance()
7248 self.needed_locks[locking.LEVEL_NODE] = []
7249 self.needed_locks[locking.LEVEL_NODE_RES] = []
7250 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7252 def DeclareLocks(self, level):
7253 if level == locking.LEVEL_NODE:
7254 self._LockInstancesNodes()
7255 elif level == locking.LEVEL_NODE_RES:
7257 self.needed_locks[locking.LEVEL_NODE_RES] = \
7258 self.needed_locks[locking.LEVEL_NODE][:]
7260 def BuildHooksEnv(self):
7263 This runs on master, primary and secondary nodes of the instance.
7266 env = _BuildInstanceHookEnvByObject(self, self.instance)
7267 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7270 def BuildHooksNodes(self):
7271 """Build hooks nodes.
7274 nl = [self.cfg.GetMasterNode()]
7275 nl_post = list(self.instance.all_nodes) + nl
7276 return (nl, nl_post)
7278 def CheckPrereq(self):
7279 """Check prerequisites.
7281 This checks that the instance is in the cluster.
7284 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7285 assert self.instance is not None, \
7286 "Cannot retrieve locked instance %s" % self.op.instance_name
7288 def Exec(self, feedback_fn):
7289 """Remove the instance.
7292 instance = self.instance
7293 logging.info("Shutting down instance %s on node %s",
7294 instance.name, instance.primary_node)
7296 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7297 self.op.shutdown_timeout)
7298 msg = result.fail_msg
7300 if self.op.ignore_failures:
7301 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7303 raise errors.OpExecError("Could not shutdown instance %s on"
7305 (instance.name, instance.primary_node, msg))
7307 assert (self.owned_locks(locking.LEVEL_NODE) ==
7308 self.owned_locks(locking.LEVEL_NODE_RES))
7309 assert not (set(instance.all_nodes) -
7310 self.owned_locks(locking.LEVEL_NODE)), \
7311 "Not owning correct locks"
7313 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7316 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7317 """Utility function to remove an instance.
7320 logging.info("Removing block devices for instance %s", instance.name)
7322 if not _RemoveDisks(lu, instance):
7323 if not ignore_failures:
7324 raise errors.OpExecError("Can't remove instance's disks")
7325 feedback_fn("Warning: can't remove instance's disks")
7327 logging.info("Removing instance %s out of cluster config", instance.name)
7329 lu.cfg.RemoveInstance(instance.name)
7331 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7332 "Instance lock removal conflict"
7334 # Remove lock for the instance
7335 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7338 class LUInstanceQuery(NoHooksLU):
7339 """Logical unit for querying instances.
7342 # pylint: disable=W0142
7345 def CheckArguments(self):
7346 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7347 self.op.output_fields, self.op.use_locking)
7349 def ExpandNames(self):
7350 self.iq.ExpandNames(self)
7352 def DeclareLocks(self, level):
7353 self.iq.DeclareLocks(self, level)
7355 def Exec(self, feedback_fn):
7356 return self.iq.OldStyleQuery(self)
7359 class LUInstanceFailover(LogicalUnit):
7360 """Failover an instance.
7363 HPATH = "instance-failover"
7364 HTYPE = constants.HTYPE_INSTANCE
7367 def CheckArguments(self):
7368 """Check the arguments.
7371 self.iallocator = getattr(self.op, "iallocator", None)
7372 self.target_node = getattr(self.op, "target_node", None)
7374 def ExpandNames(self):
7375 self._ExpandAndLockInstance()
7377 if self.op.target_node is not None:
7378 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7380 self.needed_locks[locking.LEVEL_NODE] = []
7381 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7383 self.needed_locks[locking.LEVEL_NODE_RES] = []
7384 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7386 ignore_consistency = self.op.ignore_consistency
7387 shutdown_timeout = self.op.shutdown_timeout
7388 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7391 ignore_consistency=ignore_consistency,
7392 shutdown_timeout=shutdown_timeout,
7393 ignore_ipolicy=self.op.ignore_ipolicy)
7394 self.tasklets = [self._migrater]
7396 def DeclareLocks(self, level):
7397 if level == locking.LEVEL_NODE:
7398 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7399 if instance.disk_template in constants.DTS_EXT_MIRROR:
7400 if self.op.target_node is None:
7401 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7403 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7404 self.op.target_node]
7405 del self.recalculate_locks[locking.LEVEL_NODE]
7407 self._LockInstancesNodes()
7408 elif level == locking.LEVEL_NODE_RES:
7410 self.needed_locks[locking.LEVEL_NODE_RES] = \
7411 self.needed_locks[locking.LEVEL_NODE][:]
7413 def BuildHooksEnv(self):
7416 This runs on master, primary and secondary nodes of the instance.
7419 instance = self._migrater.instance
7420 source_node = instance.primary_node
7421 target_node = self.op.target_node
7423 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7424 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7425 "OLD_PRIMARY": source_node,
7426 "NEW_PRIMARY": target_node,
7429 if instance.disk_template in constants.DTS_INT_MIRROR:
7430 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7431 env["NEW_SECONDARY"] = source_node
7433 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7435 env.update(_BuildInstanceHookEnvByObject(self, instance))
7439 def BuildHooksNodes(self):
7440 """Build hooks nodes.
7443 instance = self._migrater.instance
7444 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7445 return (nl, nl + [instance.primary_node])
7448 class LUInstanceMigrate(LogicalUnit):
7449 """Migrate an instance.
7451 This is migration without shutting down, compared to the failover,
7452 which is done with shutdown.
7455 HPATH = "instance-migrate"
7456 HTYPE = constants.HTYPE_INSTANCE
7459 def ExpandNames(self):
7460 self._ExpandAndLockInstance()
7462 if self.op.target_node is not None:
7463 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7465 self.needed_locks[locking.LEVEL_NODE] = []
7466 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7468 self.needed_locks[locking.LEVEL_NODE] = []
7469 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7472 TLMigrateInstance(self, self.op.instance_name,
7473 cleanup=self.op.cleanup,
7475 fallback=self.op.allow_failover,
7476 allow_runtime_changes=self.op.allow_runtime_changes,
7477 ignore_ipolicy=self.op.ignore_ipolicy)
7478 self.tasklets = [self._migrater]
7480 def DeclareLocks(self, level):
7481 if level == locking.LEVEL_NODE:
7482 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7483 if instance.disk_template in constants.DTS_EXT_MIRROR:
7484 if self.op.target_node is None:
7485 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7487 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7488 self.op.target_node]
7489 del self.recalculate_locks[locking.LEVEL_NODE]
7491 self._LockInstancesNodes()
7492 elif level == locking.LEVEL_NODE_RES:
7494 self.needed_locks[locking.LEVEL_NODE_RES] = \
7495 self.needed_locks[locking.LEVEL_NODE][:]
7497 def BuildHooksEnv(self):
7500 This runs on master, primary and secondary nodes of the instance.
7503 instance = self._migrater.instance
7504 source_node = instance.primary_node
7505 target_node = self.op.target_node
7506 env = _BuildInstanceHookEnvByObject(self, instance)
7508 "MIGRATE_LIVE": self._migrater.live,
7509 "MIGRATE_CLEANUP": self.op.cleanup,
7510 "OLD_PRIMARY": source_node,
7511 "NEW_PRIMARY": target_node,
7512 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7515 if instance.disk_template in constants.DTS_INT_MIRROR:
7516 env["OLD_SECONDARY"] = target_node
7517 env["NEW_SECONDARY"] = source_node
7519 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7523 def BuildHooksNodes(self):
7524 """Build hooks nodes.
7527 instance = self._migrater.instance
7528 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7529 return (nl, nl + [instance.primary_node])
7532 class LUInstanceMove(LogicalUnit):
7533 """Move an instance by data-copying.
7536 HPATH = "instance-move"
7537 HTYPE = constants.HTYPE_INSTANCE
7540 def ExpandNames(self):
7541 self._ExpandAndLockInstance()
7542 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7543 self.op.target_node = target_node
7544 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7545 self.needed_locks[locking.LEVEL_NODE_RES] = []
7546 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7548 def DeclareLocks(self, level):
7549 if level == locking.LEVEL_NODE:
7550 self._LockInstancesNodes(primary_only=True)
7551 elif level == locking.LEVEL_NODE_RES:
7553 self.needed_locks[locking.LEVEL_NODE_RES] = \
7554 self.needed_locks[locking.LEVEL_NODE][:]
7556 def BuildHooksEnv(self):
7559 This runs on master, primary and secondary nodes of the instance.
7563 "TARGET_NODE": self.op.target_node,
7564 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7566 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7569 def BuildHooksNodes(self):
7570 """Build hooks nodes.
7574 self.cfg.GetMasterNode(),
7575 self.instance.primary_node,
7576 self.op.target_node,
7580 def CheckPrereq(self):
7581 """Check prerequisites.
7583 This checks that the instance is in the cluster.
7586 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7587 assert self.instance is not None, \
7588 "Cannot retrieve locked instance %s" % self.op.instance_name
7590 node = self.cfg.GetNodeInfo(self.op.target_node)
7591 assert node is not None, \
7592 "Cannot retrieve locked node %s" % self.op.target_node
7594 self.target_node = target_node = node.name
7596 if target_node == instance.primary_node:
7597 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7598 (instance.name, target_node),
7601 bep = self.cfg.GetClusterInfo().FillBE(instance)
7603 for idx, dsk in enumerate(instance.disks):
7604 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7605 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7606 " cannot copy" % idx, errors.ECODE_STATE)
7608 _CheckNodeOnline(self, target_node)
7609 _CheckNodeNotDrained(self, target_node)
7610 _CheckNodeVmCapable(self, target_node)
7611 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7612 self.cfg.GetNodeGroup(node.group))
7613 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7614 ignore=self.op.ignore_ipolicy)
7616 if instance.admin_state == constants.ADMINST_UP:
7617 # check memory requirements on the secondary node
7618 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7619 instance.name, bep[constants.BE_MAXMEM],
7620 instance.hypervisor)
7622 self.LogInfo("Not checking memory on the secondary node as"
7623 " instance will not be started")
7625 # check bridge existance
7626 _CheckInstanceBridgesExist(self, instance, node=target_node)
7628 def Exec(self, feedback_fn):
7629 """Move an instance.
7631 The move is done by shutting it down on its present node, copying
7632 the data over (slow) and starting it on the new node.
7635 instance = self.instance
7637 source_node = instance.primary_node
7638 target_node = self.target_node
7640 self.LogInfo("Shutting down instance %s on source node %s",
7641 instance.name, source_node)
7643 assert (self.owned_locks(locking.LEVEL_NODE) ==
7644 self.owned_locks(locking.LEVEL_NODE_RES))
7646 result = self.rpc.call_instance_shutdown(source_node, instance,
7647 self.op.shutdown_timeout)
7648 msg = result.fail_msg
7650 if self.op.ignore_consistency:
7651 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7652 " Proceeding anyway. Please make sure node"
7653 " %s is down. Error details: %s",
7654 instance.name, source_node, source_node, msg)
7656 raise errors.OpExecError("Could not shutdown instance %s on"
7658 (instance.name, source_node, msg))
7660 # create the target disks
7662 _CreateDisks(self, instance, target_node=target_node)
7663 except errors.OpExecError:
7664 self.LogWarning("Device creation failed, reverting...")
7666 _RemoveDisks(self, instance, target_node=target_node)
7668 self.cfg.ReleaseDRBDMinors(instance.name)
7671 cluster_name = self.cfg.GetClusterInfo().cluster_name
7674 # activate, get path, copy the data over
7675 for idx, disk in enumerate(instance.disks):
7676 self.LogInfo("Copying data for disk %d", idx)
7677 result = self.rpc.call_blockdev_assemble(target_node, disk,
7678 instance.name, True, idx)
7680 self.LogWarning("Can't assemble newly created disk %d: %s",
7681 idx, result.fail_msg)
7682 errs.append(result.fail_msg)
7684 dev_path = result.payload
7685 result = self.rpc.call_blockdev_export(source_node, disk,
7686 target_node, dev_path,
7689 self.LogWarning("Can't copy data over for disk %d: %s",
7690 idx, result.fail_msg)
7691 errs.append(result.fail_msg)
7695 self.LogWarning("Some disks failed to copy, aborting")
7697 _RemoveDisks(self, instance, target_node=target_node)
7699 self.cfg.ReleaseDRBDMinors(instance.name)
7700 raise errors.OpExecError("Errors during disk copy: %s" %
7703 instance.primary_node = target_node
7704 self.cfg.Update(instance, feedback_fn)
7706 self.LogInfo("Removing the disks on the original node")
7707 _RemoveDisks(self, instance, target_node=source_node)
7709 # Only start the instance if it's marked as up
7710 if instance.admin_state == constants.ADMINST_UP:
7711 self.LogInfo("Starting instance %s on node %s",
7712 instance.name, target_node)
7714 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7715 ignore_secondaries=True)
7717 _ShutdownInstanceDisks(self, instance)
7718 raise errors.OpExecError("Can't activate the instance's disks")
7720 result = self.rpc.call_instance_start(target_node,
7721 (instance, None, None), False)
7722 msg = result.fail_msg
7724 _ShutdownInstanceDisks(self, instance)
7725 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7726 (instance.name, target_node, msg))
7729 class LUNodeMigrate(LogicalUnit):
7730 """Migrate all instances from a node.
7733 HPATH = "node-migrate"
7734 HTYPE = constants.HTYPE_NODE
7737 def CheckArguments(self):
7740 def ExpandNames(self):
7741 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7743 self.share_locks = _ShareAll()
7744 self.needed_locks = {
7745 locking.LEVEL_NODE: [self.op.node_name],
7748 def BuildHooksEnv(self):
7751 This runs on the master, the primary and all the secondaries.
7755 "NODE_NAME": self.op.node_name,
7756 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7759 def BuildHooksNodes(self):
7760 """Build hooks nodes.
7763 nl = [self.cfg.GetMasterNode()]
7766 def CheckPrereq(self):
7769 def Exec(self, feedback_fn):
7770 # Prepare jobs for migration instances
7771 allow_runtime_changes = self.op.allow_runtime_changes
7773 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7776 iallocator=self.op.iallocator,
7777 target_node=self.op.target_node,
7778 allow_runtime_changes=allow_runtime_changes,
7779 ignore_ipolicy=self.op.ignore_ipolicy)]
7780 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7783 # TODO: Run iallocator in this opcode and pass correct placement options to
7784 # OpInstanceMigrate. Since other jobs can modify the cluster between
7785 # running the iallocator and the actual migration, a good consistency model
7786 # will have to be found.
7788 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7789 frozenset([self.op.node_name]))
7791 return ResultWithJobs(jobs)
7794 class TLMigrateInstance(Tasklet):
7795 """Tasklet class for instance migration.
7798 @ivar live: whether the migration will be done live or non-live;
7799 this variable is initalized only after CheckPrereq has run
7800 @type cleanup: boolean
7801 @ivar cleanup: Wheater we cleanup from a failed migration
7802 @type iallocator: string
7803 @ivar iallocator: The iallocator used to determine target_node
7804 @type target_node: string
7805 @ivar target_node: If given, the target_node to reallocate the instance to
7806 @type failover: boolean
7807 @ivar failover: Whether operation results in failover or migration
7808 @type fallback: boolean
7809 @ivar fallback: Whether fallback to failover is allowed if migration not
7811 @type ignore_consistency: boolean
7812 @ivar ignore_consistency: Wheter we should ignore consistency between source
7814 @type shutdown_timeout: int
7815 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7816 @type ignore_ipolicy: bool
7817 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7822 _MIGRATION_POLL_INTERVAL = 1 # seconds
7823 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7825 def __init__(self, lu, instance_name, cleanup=False,
7826 failover=False, fallback=False,
7827 ignore_consistency=False,
7828 allow_runtime_changes=True,
7829 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7830 ignore_ipolicy=False):
7831 """Initializes this class.
7834 Tasklet.__init__(self, lu)
7837 self.instance_name = instance_name
7838 self.cleanup = cleanup
7839 self.live = False # will be overridden later
7840 self.failover = failover
7841 self.fallback = fallback
7842 self.ignore_consistency = ignore_consistency
7843 self.shutdown_timeout = shutdown_timeout
7844 self.ignore_ipolicy = ignore_ipolicy
7845 self.allow_runtime_changes = allow_runtime_changes
7847 def CheckPrereq(self):
7848 """Check prerequisites.
7850 This checks that the instance is in the cluster.
7853 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7854 instance = self.cfg.GetInstanceInfo(instance_name)
7855 assert instance is not None
7856 self.instance = instance
7857 cluster = self.cfg.GetClusterInfo()
7859 if (not self.cleanup and
7860 not instance.admin_state == constants.ADMINST_UP and
7861 not self.failover and self.fallback):
7862 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7863 " switching to failover")
7864 self.failover = True
7866 if instance.disk_template not in constants.DTS_MIRRORED:
7871 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7872 " %s" % (instance.disk_template, text),
7875 if instance.disk_template in constants.DTS_EXT_MIRROR:
7876 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7878 if self.lu.op.iallocator:
7879 self._RunAllocator()
7881 # We set set self.target_node as it is required by
7883 self.target_node = self.lu.op.target_node
7885 # Check that the target node is correct in terms of instance policy
7886 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
7887 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7888 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7889 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7890 ignore=self.ignore_ipolicy)
7892 # self.target_node is already populated, either directly or by the
7894 target_node = self.target_node
7895 if self.target_node == instance.primary_node:
7896 raise errors.OpPrereqError("Cannot migrate instance %s"
7897 " to its primary (%s)" %
7898 (instance.name, instance.primary_node))
7900 if len(self.lu.tasklets) == 1:
7901 # It is safe to release locks only when we're the only tasklet
7903 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7904 keep=[instance.primary_node, self.target_node])
7907 secondary_nodes = instance.secondary_nodes
7908 if not secondary_nodes:
7909 raise errors.ConfigurationError("No secondary node but using"
7910 " %s disk template" %
7911 instance.disk_template)
7912 target_node = secondary_nodes[0]
7913 if self.lu.op.iallocator or (self.lu.op.target_node and
7914 self.lu.op.target_node != target_node):
7916 text = "failed over"
7919 raise errors.OpPrereqError("Instances with disk template %s cannot"
7920 " be %s to arbitrary nodes"
7921 " (neither an iallocator nor a target"
7922 " node can be passed)" %
7923 (instance.disk_template, text),
7925 nodeinfo = self.cfg.GetNodeInfo(target_node)
7926 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7927 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7928 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7929 ignore=self.ignore_ipolicy)
7931 i_be = cluster.FillBE(instance)
7933 # check memory requirements on the secondary node
7934 if (not self.cleanup and
7935 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
7936 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
7937 "migrating instance %s" %
7939 i_be[constants.BE_MINMEM],
7940 instance.hypervisor)
7942 self.lu.LogInfo("Not checking memory on the secondary node as"
7943 " instance will not be started")
7945 # check if failover must be forced instead of migration
7946 if (not self.cleanup and not self.failover and
7947 i_be[constants.BE_ALWAYS_FAILOVER]):
7949 self.lu.LogInfo("Instance configured to always failover; fallback"
7951 self.failover = True
7953 raise errors.OpPrereqError("This instance has been configured to"
7954 " always failover, please allow failover",
7957 # check bridge existance
7958 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7960 if not self.cleanup:
7961 _CheckNodeNotDrained(self.lu, target_node)
7962 if not self.failover:
7963 result = self.rpc.call_instance_migratable(instance.primary_node,
7965 if result.fail_msg and self.fallback:
7966 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7968 self.failover = True
7970 result.Raise("Can't migrate, please use failover",
7971 prereq=True, ecode=errors.ECODE_STATE)
7973 assert not (self.failover and self.cleanup)
7975 if not self.failover:
7976 if self.lu.op.live is not None and self.lu.op.mode is not None:
7977 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7978 " parameters are accepted",
7980 if self.lu.op.live is not None:
7982 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7984 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7985 # reset the 'live' parameter to None so that repeated
7986 # invocations of CheckPrereq do not raise an exception
7987 self.lu.op.live = None
7988 elif self.lu.op.mode is None:
7989 # read the default value from the hypervisor
7990 i_hv = cluster.FillHV(self.instance, skip_globals=False)
7991 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7993 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7995 # Failover is never live
7998 if not (self.failover or self.cleanup):
7999 remote_info = self.rpc.call_instance_info(instance.primary_node,
8001 instance.hypervisor)
8002 remote_info.Raise("Error checking instance on node %s" %
8003 instance.primary_node)
8004 instance_running = bool(remote_info.payload)
8005 if instance_running:
8006 self.current_mem = int(remote_info.payload["memory"])
8008 def _RunAllocator(self):
8009 """Run the allocator based on input opcode.
8012 # FIXME: add a self.ignore_ipolicy option
8013 ial = IAllocator(self.cfg, self.rpc,
8014 mode=constants.IALLOCATOR_MODE_RELOC,
8015 name=self.instance_name,
8016 relocate_from=[self.instance.primary_node],
8019 ial.Run(self.lu.op.iallocator)
8022 raise errors.OpPrereqError("Can't compute nodes using"
8023 " iallocator '%s': %s" %
8024 (self.lu.op.iallocator, ial.info),
8026 if len(ial.result) != ial.required_nodes:
8027 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8028 " of nodes (%s), required %s" %
8029 (self.lu.op.iallocator, len(ial.result),
8030 ial.required_nodes), errors.ECODE_FAULT)
8031 self.target_node = ial.result[0]
8032 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8033 self.instance_name, self.lu.op.iallocator,
8034 utils.CommaJoin(ial.result))
8036 def _WaitUntilSync(self):
8037 """Poll with custom rpc for disk sync.
8039 This uses our own step-based rpc call.
8042 self.feedback_fn("* wait until resync is done")
8046 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8048 self.instance.disks)
8050 for node, nres in result.items():
8051 nres.Raise("Cannot resync disks on node %s" % node)
8052 node_done, node_percent = nres.payload
8053 all_done = all_done and node_done
8054 if node_percent is not None:
8055 min_percent = min(min_percent, node_percent)
8057 if min_percent < 100:
8058 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8061 def _EnsureSecondary(self, node):
8062 """Demote a node to secondary.
8065 self.feedback_fn("* switching node %s to secondary mode" % node)
8067 for dev in self.instance.disks:
8068 self.cfg.SetDiskID(dev, node)
8070 result = self.rpc.call_blockdev_close(node, self.instance.name,
8071 self.instance.disks)
8072 result.Raise("Cannot change disk to secondary on node %s" % node)
8074 def _GoStandalone(self):
8075 """Disconnect from the network.
8078 self.feedback_fn("* changing into standalone mode")
8079 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8080 self.instance.disks)
8081 for node, nres in result.items():
8082 nres.Raise("Cannot disconnect disks node %s" % node)
8084 def _GoReconnect(self, multimaster):
8085 """Reconnect to the network.
8091 msg = "single-master"
8092 self.feedback_fn("* changing disks into %s mode" % msg)
8093 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8094 self.instance.disks,
8095 self.instance.name, multimaster)
8096 for node, nres in result.items():
8097 nres.Raise("Cannot change disks config on node %s" % node)
8099 def _ExecCleanup(self):
8100 """Try to cleanup after a failed migration.
8102 The cleanup is done by:
8103 - check that the instance is running only on one node
8104 (and update the config if needed)
8105 - change disks on its secondary node to secondary
8106 - wait until disks are fully synchronized
8107 - disconnect from the network
8108 - change disks into single-master mode
8109 - wait again until disks are fully synchronized
8112 instance = self.instance
8113 target_node = self.target_node
8114 source_node = self.source_node
8116 # check running on only one node
8117 self.feedback_fn("* checking where the instance actually runs"
8118 " (if this hangs, the hypervisor might be in"
8120 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8121 for node, result in ins_l.items():
8122 result.Raise("Can't contact node %s" % node)
8124 runningon_source = instance.name in ins_l[source_node].payload
8125 runningon_target = instance.name in ins_l[target_node].payload
8127 if runningon_source and runningon_target:
8128 raise errors.OpExecError("Instance seems to be running on two nodes,"
8129 " or the hypervisor is confused; you will have"
8130 " to ensure manually that it runs only on one"
8131 " and restart this operation")
8133 if not (runningon_source or runningon_target):
8134 raise errors.OpExecError("Instance does not seem to be running at all;"
8135 " in this case it's safer to repair by"
8136 " running 'gnt-instance stop' to ensure disk"
8137 " shutdown, and then restarting it")
8139 if runningon_target:
8140 # the migration has actually succeeded, we need to update the config
8141 self.feedback_fn("* instance running on secondary node (%s),"
8142 " updating config" % target_node)
8143 instance.primary_node = target_node
8144 self.cfg.Update(instance, self.feedback_fn)
8145 demoted_node = source_node
8147 self.feedback_fn("* instance confirmed to be running on its"
8148 " primary node (%s)" % source_node)
8149 demoted_node = target_node
8151 if instance.disk_template in constants.DTS_INT_MIRROR:
8152 self._EnsureSecondary(demoted_node)
8154 self._WaitUntilSync()
8155 except errors.OpExecError:
8156 # we ignore here errors, since if the device is standalone, it
8157 # won't be able to sync
8159 self._GoStandalone()
8160 self._GoReconnect(False)
8161 self._WaitUntilSync()
8163 self.feedback_fn("* done")
8165 def _RevertDiskStatus(self):
8166 """Try to revert the disk status after a failed migration.
8169 target_node = self.target_node
8170 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8174 self._EnsureSecondary(target_node)
8175 self._GoStandalone()
8176 self._GoReconnect(False)
8177 self._WaitUntilSync()
8178 except errors.OpExecError, err:
8179 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8180 " please try to recover the instance manually;"
8181 " error '%s'" % str(err))
8183 def _AbortMigration(self):
8184 """Call the hypervisor code to abort a started migration.
8187 instance = self.instance
8188 target_node = self.target_node
8189 source_node = self.source_node
8190 migration_info = self.migration_info
8192 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8196 abort_msg = abort_result.fail_msg
8198 logging.error("Aborting migration failed on target node %s: %s",
8199 target_node, abort_msg)
8200 # Don't raise an exception here, as we stil have to try to revert the
8201 # disk status, even if this step failed.
8203 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8204 instance, False, self.live)
8205 abort_msg = abort_result.fail_msg
8207 logging.error("Aborting migration failed on source node %s: %s",
8208 source_node, abort_msg)
8210 def _ExecMigration(self):
8211 """Migrate an instance.
8213 The migrate is done by:
8214 - change the disks into dual-master mode
8215 - wait until disks are fully synchronized again
8216 - migrate the instance
8217 - change disks on the new secondary node (the old primary) to secondary
8218 - wait until disks are fully synchronized
8219 - change disks into single-master mode
8222 instance = self.instance
8223 target_node = self.target_node
8224 source_node = self.source_node
8226 # Check for hypervisor version mismatch and warn the user.
8227 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8228 None, [self.instance.hypervisor])
8229 for ninfo in nodeinfo.values():
8230 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8232 (_, _, (src_info, )) = nodeinfo[source_node].payload
8233 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8235 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8236 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8237 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8238 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8239 if src_version != dst_version:
8240 self.feedback_fn("* warning: hypervisor version mismatch between"
8241 " source (%s) and target (%s) node" %
8242 (src_version, dst_version))
8244 self.feedback_fn("* checking disk consistency between source and target")
8245 for (idx, dev) in enumerate(instance.disks):
8246 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8247 raise errors.OpExecError("Disk %s is degraded or not fully"
8248 " synchronized on target node,"
8249 " aborting migration" % idx)
8251 if self.current_mem > self.tgt_free_mem:
8252 if not self.allow_runtime_changes:
8253 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8254 " free memory to fit instance %s on target"
8255 " node %s (have %dMB, need %dMB)" %
8256 (instance.name, target_node,
8257 self.tgt_free_mem, self.current_mem))
8258 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8259 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8262 rpcres.Raise("Cannot modify instance runtime memory")
8264 # First get the migration information from the remote node
8265 result = self.rpc.call_migration_info(source_node, instance)
8266 msg = result.fail_msg
8268 log_err = ("Failed fetching source migration information from %s: %s" %
8270 logging.error(log_err)
8271 raise errors.OpExecError(log_err)
8273 self.migration_info = migration_info = result.payload
8275 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8276 # Then switch the disks to master/master mode
8277 self._EnsureSecondary(target_node)
8278 self._GoStandalone()
8279 self._GoReconnect(True)
8280 self._WaitUntilSync()
8282 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8283 result = self.rpc.call_accept_instance(target_node,
8286 self.nodes_ip[target_node])
8288 msg = result.fail_msg
8290 logging.error("Instance pre-migration failed, trying to revert"
8291 " disk status: %s", msg)
8292 self.feedback_fn("Pre-migration failed, aborting")
8293 self._AbortMigration()
8294 self._RevertDiskStatus()
8295 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8296 (instance.name, msg))
8298 self.feedback_fn("* migrating instance to %s" % target_node)
8299 result = self.rpc.call_instance_migrate(source_node, instance,
8300 self.nodes_ip[target_node],
8302 msg = result.fail_msg
8304 logging.error("Instance migration failed, trying to revert"
8305 " disk status: %s", msg)
8306 self.feedback_fn("Migration failed, aborting")
8307 self._AbortMigration()
8308 self._RevertDiskStatus()
8309 raise errors.OpExecError("Could not migrate instance %s: %s" %
8310 (instance.name, msg))
8312 self.feedback_fn("* starting memory transfer")
8313 last_feedback = time.time()
8315 result = self.rpc.call_instance_get_migration_status(source_node,
8317 msg = result.fail_msg
8318 ms = result.payload # MigrationStatus instance
8319 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8320 logging.error("Instance migration failed, trying to revert"
8321 " disk status: %s", msg)
8322 self.feedback_fn("Migration failed, aborting")
8323 self._AbortMigration()
8324 self._RevertDiskStatus()
8325 raise errors.OpExecError("Could not migrate instance %s: %s" %
8326 (instance.name, msg))
8328 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8329 self.feedback_fn("* memory transfer complete")
8332 if (utils.TimeoutExpired(last_feedback,
8333 self._MIGRATION_FEEDBACK_INTERVAL) and
8334 ms.transferred_ram is not None):
8335 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8336 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8337 last_feedback = time.time()
8339 time.sleep(self._MIGRATION_POLL_INTERVAL)
8341 result = self.rpc.call_instance_finalize_migration_src(source_node,
8345 msg = result.fail_msg
8347 logging.error("Instance migration succeeded, but finalization failed"
8348 " on the source node: %s", msg)
8349 raise errors.OpExecError("Could not finalize instance migration: %s" %
8352 instance.primary_node = target_node
8354 # distribute new instance config to the other nodes
8355 self.cfg.Update(instance, self.feedback_fn)
8357 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8361 msg = result.fail_msg
8363 logging.error("Instance migration succeeded, but finalization failed"
8364 " on the target node: %s", msg)
8365 raise errors.OpExecError("Could not finalize instance migration: %s" %
8368 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8369 self._EnsureSecondary(source_node)
8370 self._WaitUntilSync()
8371 self._GoStandalone()
8372 self._GoReconnect(False)
8373 self._WaitUntilSync()
8375 # If the instance's disk template is `rbd' and there was a successful
8376 # migration, unmap the device from the source node.
8377 if self.instance.disk_template == constants.DT_RBD:
8378 disks = _ExpandCheckDisks(instance, instance.disks)
8379 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8381 result = self.rpc.call_blockdev_shutdown(source_node, disk)
8382 msg = result.fail_msg
8384 logging.error("Migration was successful, but couldn't unmap the"
8385 " block device %s on source node %s: %s",
8386 disk.iv_name, source_node, msg)
8387 logging.error("You need to unmap the device %s manually on %s",
8388 disk.iv_name, source_node)
8390 self.feedback_fn("* done")
8392 def _ExecFailover(self):
8393 """Failover an instance.
8395 The failover is done by shutting it down on its present node and
8396 starting it on the secondary.
8399 instance = self.instance
8400 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8402 source_node = instance.primary_node
8403 target_node = self.target_node
8405 if instance.admin_state == constants.ADMINST_UP:
8406 self.feedback_fn("* checking disk consistency between source and target")
8407 for (idx, dev) in enumerate(instance.disks):
8408 # for drbd, these are drbd over lvm
8409 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8410 if primary_node.offline:
8411 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8413 (primary_node.name, idx, target_node))
8414 elif not self.ignore_consistency:
8415 raise errors.OpExecError("Disk %s is degraded on target node,"
8416 " aborting failover" % idx)
8418 self.feedback_fn("* not checking disk consistency as instance is not"
8421 self.feedback_fn("* shutting down instance on source node")
8422 logging.info("Shutting down instance %s on node %s",
8423 instance.name, source_node)
8425 result = self.rpc.call_instance_shutdown(source_node, instance,
8426 self.shutdown_timeout)
8427 msg = result.fail_msg
8429 if self.ignore_consistency or primary_node.offline:
8430 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8431 " proceeding anyway; please make sure node"
8432 " %s is down; error details: %s",
8433 instance.name, source_node, source_node, msg)
8435 raise errors.OpExecError("Could not shutdown instance %s on"
8437 (instance.name, source_node, msg))
8439 self.feedback_fn("* deactivating the instance's disks on source node")
8440 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8441 raise errors.OpExecError("Can't shut down the instance's disks")
8443 instance.primary_node = target_node
8444 # distribute new instance config to the other nodes
8445 self.cfg.Update(instance, self.feedback_fn)
8447 # Only start the instance if it's marked as up
8448 if instance.admin_state == constants.ADMINST_UP:
8449 self.feedback_fn("* activating the instance's disks on target node %s" %
8451 logging.info("Starting instance %s on node %s",
8452 instance.name, target_node)
8454 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8455 ignore_secondaries=True)
8457 _ShutdownInstanceDisks(self.lu, instance)
8458 raise errors.OpExecError("Can't activate the instance's disks")
8460 self.feedback_fn("* starting the instance on the target node %s" %
8462 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8464 msg = result.fail_msg
8466 _ShutdownInstanceDisks(self.lu, instance)
8467 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8468 (instance.name, target_node, msg))
8470 def Exec(self, feedback_fn):
8471 """Perform the migration.
8474 self.feedback_fn = feedback_fn
8475 self.source_node = self.instance.primary_node
8477 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8478 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8479 self.target_node = self.instance.secondary_nodes[0]
8480 # Otherwise self.target_node has been populated either
8481 # directly, or through an iallocator.
8483 self.all_nodes = [self.source_node, self.target_node]
8484 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8485 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8488 feedback_fn("Failover instance %s" % self.instance.name)
8489 self._ExecFailover()
8491 feedback_fn("Migrating instance %s" % self.instance.name)
8494 return self._ExecCleanup()
8496 return self._ExecMigration()
8499 def _CreateBlockDev(lu, node, instance, device, force_create,
8501 """Create a tree of block devices on a given node.
8503 If this device type has to be created on secondaries, create it and
8506 If not, just recurse to children keeping the same 'force' value.
8508 @param lu: the lu on whose behalf we execute
8509 @param node: the node on which to create the device
8510 @type instance: L{objects.Instance}
8511 @param instance: the instance which owns the device
8512 @type device: L{objects.Disk}
8513 @param device: the device to create
8514 @type force_create: boolean
8515 @param force_create: whether to force creation of this device; this
8516 will be change to True whenever we find a device which has
8517 CreateOnSecondary() attribute
8518 @param info: the extra 'metadata' we should attach to the device
8519 (this will be represented as a LVM tag)
8520 @type force_open: boolean
8521 @param force_open: this parameter will be passes to the
8522 L{backend.BlockdevCreate} function where it specifies
8523 whether we run on primary or not, and it affects both
8524 the child assembly and the device own Open() execution
8527 if device.CreateOnSecondary():
8531 for child in device.children:
8532 _CreateBlockDev(lu, node, instance, child, force_create,
8535 if not force_create:
8538 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8541 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8542 """Create a single block device on a given node.
8544 This will not recurse over children of the device, so they must be
8547 @param lu: the lu on whose behalf we execute
8548 @param node: the node on which to create the device
8549 @type instance: L{objects.Instance}
8550 @param instance: the instance which owns the device
8551 @type device: L{objects.Disk}
8552 @param device: the device to create
8553 @param info: the extra 'metadata' we should attach to the device
8554 (this will be represented as a LVM tag)
8555 @type force_open: boolean
8556 @param force_open: this parameter will be passes to the
8557 L{backend.BlockdevCreate} function where it specifies
8558 whether we run on primary or not, and it affects both
8559 the child assembly and the device own Open() execution
8562 lu.cfg.SetDiskID(device, node)
8563 result = lu.rpc.call_blockdev_create(node, device, device.size,
8564 instance.name, force_open, info)
8565 result.Raise("Can't create block device %s on"
8566 " node %s for instance %s" % (device, node, instance.name))
8567 if device.physical_id is None:
8568 device.physical_id = result.payload
8571 def _GenerateUniqueNames(lu, exts):
8572 """Generate a suitable LV name.
8574 This will generate a logical volume name for the given instance.
8579 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8580 results.append("%s%s" % (new_id, val))
8584 def _ComputeLDParams(disk_template, disk_params):
8585 """Computes Logical Disk parameters from Disk Template parameters.
8587 @type disk_template: string
8588 @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8589 @type disk_params: dict
8590 @param disk_params: disk template parameters; dict(template_name -> parameters
8592 @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8593 contains the LD parameters of the node. The tree is flattened in-order.
8596 if disk_template not in constants.DISK_TEMPLATES:
8597 raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8600 dt_params = disk_params[disk_template]
8601 if disk_template == constants.DT_DRBD8:
8603 constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8604 constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8605 constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8606 constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8607 constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8608 constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8609 constants.LDP_DYNAMIC_RESYNC: dt_params[constants.DRBD_DYNAMIC_RESYNC],
8610 constants.LDP_PLAN_AHEAD: dt_params[constants.DRBD_PLAN_AHEAD],
8611 constants.LDP_FILL_TARGET: dt_params[constants.DRBD_FILL_TARGET],
8612 constants.LDP_DELAY_TARGET: dt_params[constants.DRBD_DELAY_TARGET],
8613 constants.LDP_MAX_RATE: dt_params[constants.DRBD_MAX_RATE],
8614 constants.LDP_MIN_RATE: dt_params[constants.DRBD_MIN_RATE],
8618 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8621 result.append(drbd_params)
8625 constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8628 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8630 result.append(data_params)
8634 constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8637 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8639 result.append(meta_params)
8641 elif (disk_template == constants.DT_FILE or
8642 disk_template == constants.DT_SHARED_FILE):
8643 result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8645 elif disk_template == constants.DT_PLAIN:
8647 constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8650 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8652 result.append(params)
8654 elif disk_template == constants.DT_BLOCK:
8655 result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8657 elif disk_template == constants.DT_RBD:
8659 constants.LDP_POOL: dt_params[constants.RBD_POOL]
8662 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_RBD],
8664 result.append(params)
8669 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8670 iv_name, p_minor, s_minor, drbd_params, data_params,
8672 """Generate a drbd8 device complete with its children.
8675 assert len(vgnames) == len(names) == 2
8676 port = lu.cfg.AllocatePort()
8677 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8679 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8680 logical_id=(vgnames[0], names[0]),
8682 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8683 logical_id=(vgnames[1], names[1]),
8685 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8686 logical_id=(primary, secondary, port,
8689 children=[dev_data, dev_meta],
8690 iv_name=iv_name, params=drbd_params)
8694 _DISK_TEMPLATE_NAME_PREFIX = {
8695 constants.DT_PLAIN: "",
8696 constants.DT_RBD: ".rbd",
8700 _DISK_TEMPLATE_DEVICE_TYPE = {
8701 constants.DT_PLAIN: constants.LD_LV,
8702 constants.DT_FILE: constants.LD_FILE,
8703 constants.DT_SHARED_FILE: constants.LD_FILE,
8704 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8705 constants.DT_RBD: constants.LD_RBD,
8709 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8710 secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8711 feedback_fn, disk_params,
8712 _req_file_storage=opcodes.RequireFileStorage,
8713 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8714 """Generate the entire disk layout for a given template type.
8717 #TODO: compute space requirements
8719 vgname = lu.cfg.GetVGName()
8720 disk_count = len(disk_info)
8722 ld_params = _ComputeLDParams(template_name, disk_params)
8724 if template_name == constants.DT_DISKLESS:
8726 elif template_name == constants.DT_DRBD8:
8727 drbd_params, data_params, meta_params = ld_params
8728 if len(secondary_nodes) != 1:
8729 raise errors.ProgrammerError("Wrong template configuration")
8730 remote_node = secondary_nodes[0]
8731 minors = lu.cfg.AllocateDRBDMinor(
8732 [primary_node, remote_node] * len(disk_info), instance_name)
8735 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8736 for i in range(disk_count)]):
8737 names.append(lv_prefix + "_data")
8738 names.append(lv_prefix + "_meta")
8739 for idx, disk in enumerate(disk_info):
8740 disk_index = idx + base_index
8741 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8742 data_vg = disk.get(constants.IDISK_VG, vgname)
8743 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8744 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8745 disk[constants.IDISK_SIZE],
8747 names[idx * 2:idx * 2 + 2],
8748 "disk/%d" % disk_index,
8749 minors[idx * 2], minors[idx * 2 + 1],
8750 drbd_params, data_params, meta_params)
8751 disk_dev.mode = disk[constants.IDISK_MODE]
8752 disks.append(disk_dev)
8755 raise errors.ProgrammerError("Wrong template configuration")
8757 if template_name == constants.DT_FILE:
8759 elif template_name == constants.DT_SHARED_FILE:
8760 _req_shr_file_storage()
8762 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8763 if name_prefix is None:
8766 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8767 (name_prefix, base_index + i)
8768 for i in range(disk_count)])
8770 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8772 if template_name == constants.DT_PLAIN:
8773 def logical_id_fn(idx, _, disk):
8774 vg = disk.get(constants.IDISK_VG, vgname)
8775 return (vg, names[idx])
8776 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8778 lambda _, disk_index, disk: (file_driver,
8779 "%s/disk%d" % (file_storage_dir,
8781 elif template_name == constants.DT_BLOCK:
8783 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8784 disk[constants.IDISK_ADOPT])
8785 elif template_name == constants.DT_RBD:
8786 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8788 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8790 for idx, disk in enumerate(disk_info):
8791 disk_index = idx + base_index
8792 size = disk[constants.IDISK_SIZE]
8793 feedback_fn("* disk %s, size %s" %
8794 (disk_index, utils.FormatUnit(size, "h")))
8795 disks.append(objects.Disk(dev_type=dev_type, size=size,
8796 logical_id=logical_id_fn(idx, disk_index, disk),
8797 iv_name="disk/%d" % disk_index,
8798 mode=disk[constants.IDISK_MODE],
8799 params=ld_params[0]))
8804 def _GetInstanceInfoText(instance):
8805 """Compute that text that should be added to the disk's metadata.
8808 return "originstname+%s" % instance.name
8811 def _CalcEta(time_taken, written, total_size):
8812 """Calculates the ETA based on size written and total size.
8814 @param time_taken: The time taken so far
8815 @param written: amount written so far
8816 @param total_size: The total size of data to be written
8817 @return: The remaining time in seconds
8820 avg_time = time_taken / float(written)
8821 return (total_size - written) * avg_time
8824 def _WipeDisks(lu, instance):
8825 """Wipes instance disks.
8827 @type lu: L{LogicalUnit}
8828 @param lu: the logical unit on whose behalf we execute
8829 @type instance: L{objects.Instance}
8830 @param instance: the instance whose disks we should create
8831 @return: the success of the wipe
8834 node = instance.primary_node
8836 for device in instance.disks:
8837 lu.cfg.SetDiskID(device, node)
8839 logging.info("Pause sync of instance %s disks", instance.name)
8840 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8842 for idx, success in enumerate(result.payload):
8844 logging.warn("pause-sync of instance %s for disks %d failed",
8848 for idx, device in enumerate(instance.disks):
8849 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8850 # MAX_WIPE_CHUNK at max
8851 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8852 constants.MIN_WIPE_CHUNK_PERCENT)
8853 # we _must_ make this an int, otherwise rounding errors will
8855 wipe_chunk_size = int(wipe_chunk_size)
8857 lu.LogInfo("* Wiping disk %d", idx)
8858 logging.info("Wiping disk %d for instance %s, node %s using"
8859 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8864 start_time = time.time()
8866 while offset < size:
8867 wipe_size = min(wipe_chunk_size, size - offset)
8868 logging.debug("Wiping disk %d, offset %s, chunk %s",
8869 idx, offset, wipe_size)
8870 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8871 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8872 (idx, offset, wipe_size))
8875 if now - last_output >= 60:
8876 eta = _CalcEta(now - start_time, offset, size)
8877 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8878 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8881 logging.info("Resume sync of instance %s disks", instance.name)
8883 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8885 for idx, success in enumerate(result.payload):
8887 lu.LogWarning("Resume sync of disk %d failed, please have a"
8888 " look at the status and troubleshoot the issue", idx)
8889 logging.warn("resume-sync of instance %s for disks %d failed",
8893 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8894 """Create all disks for an instance.
8896 This abstracts away some work from AddInstance.
8898 @type lu: L{LogicalUnit}
8899 @param lu: the logical unit on whose behalf we execute
8900 @type instance: L{objects.Instance}
8901 @param instance: the instance whose disks we should create
8903 @param to_skip: list of indices to skip
8904 @type target_node: string
8905 @param target_node: if passed, overrides the target node for creation
8907 @return: the success of the creation
8910 info = _GetInstanceInfoText(instance)
8911 if target_node is None:
8912 pnode = instance.primary_node
8913 all_nodes = instance.all_nodes
8918 if instance.disk_template in constants.DTS_FILEBASED:
8919 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8920 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8922 result.Raise("Failed to create directory '%s' on"
8923 " node %s" % (file_storage_dir, pnode))
8925 # Note: this needs to be kept in sync with adding of disks in
8926 # LUInstanceSetParams
8927 for idx, device in enumerate(instance.disks):
8928 if to_skip and idx in to_skip:
8930 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
8932 for node in all_nodes:
8933 f_create = node == pnode
8934 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8937 def _RemoveDisks(lu, instance, target_node=None):
8938 """Remove all disks for an instance.
8940 This abstracts away some work from `AddInstance()` and
8941 `RemoveInstance()`. Note that in case some of the devices couldn't
8942 be removed, the removal will continue with the other ones (compare
8943 with `_CreateDisks()`).
8945 @type lu: L{LogicalUnit}
8946 @param lu: the logical unit on whose behalf we execute
8947 @type instance: L{objects.Instance}
8948 @param instance: the instance whose disks we should remove
8949 @type target_node: string
8950 @param target_node: used to override the node on which to remove the disks
8952 @return: the success of the removal
8955 logging.info("Removing block devices for instance %s", instance.name)
8958 for (idx, device) in enumerate(instance.disks):
8960 edata = [(target_node, device)]
8962 edata = device.ComputeNodeTree(instance.primary_node)
8963 for node, disk in edata:
8964 lu.cfg.SetDiskID(disk, node)
8965 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8967 lu.LogWarning("Could not remove disk %s on node %s,"
8968 " continuing anyway: %s", idx, node, msg)
8971 # if this is a DRBD disk, return its port to the pool
8972 if device.dev_type in constants.LDS_DRBD:
8973 tcp_port = device.logical_id[2]
8974 lu.cfg.AddTcpUdpPort(tcp_port)
8976 if instance.disk_template == constants.DT_FILE:
8977 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8981 tgt = instance.primary_node
8982 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8984 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8985 file_storage_dir, instance.primary_node, result.fail_msg)
8991 def _ComputeDiskSizePerVG(disk_template, disks):
8992 """Compute disk size requirements in the volume group
8995 def _compute(disks, payload):
8996 """Universal algorithm.
9001 vgs[disk[constants.IDISK_VG]] = \
9002 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9006 # Required free disk space as a function of disk and swap space
9008 constants.DT_DISKLESS: {},
9009 constants.DT_PLAIN: _compute(disks, 0),
9010 # 128 MB are added for drbd metadata for each disk
9011 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9012 constants.DT_FILE: {},
9013 constants.DT_SHARED_FILE: {},
9016 if disk_template not in req_size_dict:
9017 raise errors.ProgrammerError("Disk template '%s' size requirement"
9018 " is unknown" % disk_template)
9020 return req_size_dict[disk_template]
9023 def _ComputeDiskSize(disk_template, disks):
9024 """Compute disk size requirements in the volume group
9027 # Required free disk space as a function of disk and swap space
9029 constants.DT_DISKLESS: None,
9030 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9031 # 128 MB are added for drbd metadata for each disk
9033 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9034 constants.DT_FILE: None,
9035 constants.DT_SHARED_FILE: 0,
9036 constants.DT_BLOCK: 0,
9037 constants.DT_RBD: 0,
9040 if disk_template not in req_size_dict:
9041 raise errors.ProgrammerError("Disk template '%s' size requirement"
9042 " is unknown" % disk_template)
9044 return req_size_dict[disk_template]
9047 def _FilterVmNodes(lu, nodenames):
9048 """Filters out non-vm_capable nodes from a list.
9050 @type lu: L{LogicalUnit}
9051 @param lu: the logical unit for which we check
9052 @type nodenames: list
9053 @param nodenames: the list of nodes on which we should check
9055 @return: the list of vm-capable nodes
9058 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9059 return [name for name in nodenames if name not in vm_nodes]
9062 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9063 """Hypervisor parameter validation.
9065 This function abstract the hypervisor parameter validation to be
9066 used in both instance create and instance modify.
9068 @type lu: L{LogicalUnit}
9069 @param lu: the logical unit for which we check
9070 @type nodenames: list
9071 @param nodenames: the list of nodes on which we should check
9072 @type hvname: string
9073 @param hvname: the name of the hypervisor we should use
9074 @type hvparams: dict
9075 @param hvparams: the parameters which we need to check
9076 @raise errors.OpPrereqError: if the parameters are not valid
9079 nodenames = _FilterVmNodes(lu, nodenames)
9081 cluster = lu.cfg.GetClusterInfo()
9082 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9084 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9085 for node in nodenames:
9089 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9092 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9093 """OS parameters validation.
9095 @type lu: L{LogicalUnit}
9096 @param lu: the logical unit for which we check
9097 @type required: boolean
9098 @param required: whether the validation should fail if the OS is not
9100 @type nodenames: list
9101 @param nodenames: the list of nodes on which we should check
9102 @type osname: string
9103 @param osname: the name of the hypervisor we should use
9104 @type osparams: dict
9105 @param osparams: the parameters which we need to check
9106 @raise errors.OpPrereqError: if the parameters are not valid
9109 nodenames = _FilterVmNodes(lu, nodenames)
9110 result = lu.rpc.call_os_validate(nodenames, required, osname,
9111 [constants.OS_VALIDATE_PARAMETERS],
9113 for node, nres in result.items():
9114 # we don't check for offline cases since this should be run only
9115 # against the master node and/or an instance's nodes
9116 nres.Raise("OS Parameters validation failed on node %s" % node)
9117 if not nres.payload:
9118 lu.LogInfo("OS %s not found on node %s, validation skipped",
9122 class LUInstanceCreate(LogicalUnit):
9123 """Create an instance.
9126 HPATH = "instance-add"
9127 HTYPE = constants.HTYPE_INSTANCE
9130 def CheckArguments(self):
9134 # do not require name_check to ease forward/backward compatibility
9136 if self.op.no_install and self.op.start:
9137 self.LogInfo("No-installation mode selected, disabling startup")
9138 self.op.start = False
9139 # validate/normalize the instance name
9140 self.op.instance_name = \
9141 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9143 if self.op.ip_check and not self.op.name_check:
9144 # TODO: make the ip check more flexible and not depend on the name check
9145 raise errors.OpPrereqError("Cannot do IP address check without a name"
9146 " check", errors.ECODE_INVAL)
9148 # check nics' parameter names
9149 for nic in self.op.nics:
9150 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9152 # check disks. parameter names and consistent adopt/no-adopt strategy
9153 has_adopt = has_no_adopt = False
9154 for disk in self.op.disks:
9155 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9156 if constants.IDISK_ADOPT in disk:
9160 if has_adopt and has_no_adopt:
9161 raise errors.OpPrereqError("Either all disks are adopted or none is",
9164 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9165 raise errors.OpPrereqError("Disk adoption is not supported for the"
9166 " '%s' disk template" %
9167 self.op.disk_template,
9169 if self.op.iallocator is not None:
9170 raise errors.OpPrereqError("Disk adoption not allowed with an"
9171 " iallocator script", errors.ECODE_INVAL)
9172 if self.op.mode == constants.INSTANCE_IMPORT:
9173 raise errors.OpPrereqError("Disk adoption not allowed for"
9174 " instance import", errors.ECODE_INVAL)
9176 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9177 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9178 " but no 'adopt' parameter given" %
9179 self.op.disk_template,
9182 self.adopt_disks = has_adopt
9184 # instance name verification
9185 if self.op.name_check:
9186 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9187 self.op.instance_name = self.hostname1.name
9188 # used in CheckPrereq for ip ping check
9189 self.check_ip = self.hostname1.ip
9191 self.check_ip = None
9193 # file storage checks
9194 if (self.op.file_driver and
9195 not self.op.file_driver in constants.FILE_DRIVER):
9196 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9197 self.op.file_driver, errors.ECODE_INVAL)
9199 if self.op.disk_template == constants.DT_FILE:
9200 opcodes.RequireFileStorage()
9201 elif self.op.disk_template == constants.DT_SHARED_FILE:
9202 opcodes.RequireSharedFileStorage()
9204 ### Node/iallocator related checks
9205 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9207 if self.op.pnode is not None:
9208 if self.op.disk_template in constants.DTS_INT_MIRROR:
9209 if self.op.snode is None:
9210 raise errors.OpPrereqError("The networked disk templates need"
9211 " a mirror node", errors.ECODE_INVAL)
9213 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9215 self.op.snode = None
9217 self._cds = _GetClusterDomainSecret()
9219 if self.op.mode == constants.INSTANCE_IMPORT:
9220 # On import force_variant must be True, because if we forced it at
9221 # initial install, our only chance when importing it back is that it
9223 self.op.force_variant = True
9225 if self.op.no_install:
9226 self.LogInfo("No-installation mode has no effect during import")
9228 elif self.op.mode == constants.INSTANCE_CREATE:
9229 if self.op.os_type is None:
9230 raise errors.OpPrereqError("No guest OS specified",
9232 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9233 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9234 " installation" % self.op.os_type,
9236 if self.op.disk_template is None:
9237 raise errors.OpPrereqError("No disk template specified",
9240 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9241 # Check handshake to ensure both clusters have the same domain secret
9242 src_handshake = self.op.source_handshake
9243 if not src_handshake:
9244 raise errors.OpPrereqError("Missing source handshake",
9247 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9250 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9253 # Load and check source CA
9254 self.source_x509_ca_pem = self.op.source_x509_ca
9255 if not self.source_x509_ca_pem:
9256 raise errors.OpPrereqError("Missing source X509 CA",
9260 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9262 except OpenSSL.crypto.Error, err:
9263 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9264 (err, ), errors.ECODE_INVAL)
9266 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9267 if errcode is not None:
9268 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9271 self.source_x509_ca = cert
9273 src_instance_name = self.op.source_instance_name
9274 if not src_instance_name:
9275 raise errors.OpPrereqError("Missing source instance name",
9278 self.source_instance_name = \
9279 netutils.GetHostname(name=src_instance_name).name
9282 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9283 self.op.mode, errors.ECODE_INVAL)
9285 def ExpandNames(self):
9286 """ExpandNames for CreateInstance.
9288 Figure out the right locks for instance creation.
9291 self.needed_locks = {}
9293 instance_name = self.op.instance_name
9294 # this is just a preventive check, but someone might still add this
9295 # instance in the meantime, and creation will fail at lock-add time
9296 if instance_name in self.cfg.GetInstanceList():
9297 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9298 instance_name, errors.ECODE_EXISTS)
9300 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9302 if self.op.iallocator:
9303 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9304 # specifying a group on instance creation and then selecting nodes from
9306 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9307 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9309 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9310 nodelist = [self.op.pnode]
9311 if self.op.snode is not None:
9312 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9313 nodelist.append(self.op.snode)
9314 self.needed_locks[locking.LEVEL_NODE] = nodelist
9315 # Lock resources of instance's primary and secondary nodes (copy to
9316 # prevent accidential modification)
9317 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9319 # in case of import lock the source node too
9320 if self.op.mode == constants.INSTANCE_IMPORT:
9321 src_node = self.op.src_node
9322 src_path = self.op.src_path
9324 if src_path is None:
9325 self.op.src_path = src_path = self.op.instance_name
9327 if src_node is None:
9328 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9329 self.op.src_node = None
9330 if os.path.isabs(src_path):
9331 raise errors.OpPrereqError("Importing an instance from a path"
9332 " requires a source node option",
9335 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9336 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9337 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9338 if not os.path.isabs(src_path):
9339 self.op.src_path = src_path = \
9340 utils.PathJoin(constants.EXPORT_DIR, src_path)
9342 def _RunAllocator(self):
9343 """Run the allocator based on input opcode.
9346 nics = [n.ToDict() for n in self.nics]
9347 ial = IAllocator(self.cfg, self.rpc,
9348 mode=constants.IALLOCATOR_MODE_ALLOC,
9349 name=self.op.instance_name,
9350 disk_template=self.op.disk_template,
9353 vcpus=self.be_full[constants.BE_VCPUS],
9354 memory=self.be_full[constants.BE_MAXMEM],
9355 spindle_use=self.be_full[constants.BE_SPINDLE_USE],
9358 hypervisor=self.op.hypervisor,
9361 ial.Run(self.op.iallocator)
9364 raise errors.OpPrereqError("Can't compute nodes using"
9365 " iallocator '%s': %s" %
9366 (self.op.iallocator, ial.info),
9368 if len(ial.result) != ial.required_nodes:
9369 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9370 " of nodes (%s), required %s" %
9371 (self.op.iallocator, len(ial.result),
9372 ial.required_nodes), errors.ECODE_FAULT)
9373 self.op.pnode = ial.result[0]
9374 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9375 self.op.instance_name, self.op.iallocator,
9376 utils.CommaJoin(ial.result))
9377 if ial.required_nodes == 2:
9378 self.op.snode = ial.result[1]
9380 def BuildHooksEnv(self):
9383 This runs on master, primary and secondary nodes of the instance.
9387 "ADD_MODE": self.op.mode,
9389 if self.op.mode == constants.INSTANCE_IMPORT:
9390 env["SRC_NODE"] = self.op.src_node
9391 env["SRC_PATH"] = self.op.src_path
9392 env["SRC_IMAGES"] = self.src_images
9394 env.update(_BuildInstanceHookEnv(
9395 name=self.op.instance_name,
9396 primary_node=self.op.pnode,
9397 secondary_nodes=self.secondaries,
9398 status=self.op.start,
9399 os_type=self.op.os_type,
9400 minmem=self.be_full[constants.BE_MINMEM],
9401 maxmem=self.be_full[constants.BE_MAXMEM],
9402 vcpus=self.be_full[constants.BE_VCPUS],
9403 nics=_NICListToTuple(self, self.nics),
9404 disk_template=self.op.disk_template,
9405 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9406 for d in self.disks],
9409 hypervisor_name=self.op.hypervisor,
9415 def BuildHooksNodes(self):
9416 """Build hooks nodes.
9419 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9422 def _ReadExportInfo(self):
9423 """Reads the export information from disk.
9425 It will override the opcode source node and path with the actual
9426 information, if these two were not specified before.
9428 @return: the export information
9431 assert self.op.mode == constants.INSTANCE_IMPORT
9433 src_node = self.op.src_node
9434 src_path = self.op.src_path
9436 if src_node is None:
9437 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9438 exp_list = self.rpc.call_export_list(locked_nodes)
9440 for node in exp_list:
9441 if exp_list[node].fail_msg:
9443 if src_path in exp_list[node].payload:
9445 self.op.src_node = src_node = node
9446 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9450 raise errors.OpPrereqError("No export found for relative path %s" %
9451 src_path, errors.ECODE_INVAL)
9453 _CheckNodeOnline(self, src_node)
9454 result = self.rpc.call_export_info(src_node, src_path)
9455 result.Raise("No export or invalid export found in dir %s" % src_path)
9457 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9458 if not export_info.has_section(constants.INISECT_EXP):
9459 raise errors.ProgrammerError("Corrupted export config",
9460 errors.ECODE_ENVIRON)
9462 ei_version = export_info.get(constants.INISECT_EXP, "version")
9463 if (int(ei_version) != constants.EXPORT_VERSION):
9464 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9465 (ei_version, constants.EXPORT_VERSION),
9466 errors.ECODE_ENVIRON)
9469 def _ReadExportParams(self, einfo):
9470 """Use export parameters as defaults.
9472 In case the opcode doesn't specify (as in override) some instance
9473 parameters, then try to use them from the export information, if
9477 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9479 if self.op.disk_template is None:
9480 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9481 self.op.disk_template = einfo.get(constants.INISECT_INS,
9483 if self.op.disk_template not in constants.DISK_TEMPLATES:
9484 raise errors.OpPrereqError("Disk template specified in configuration"
9485 " file is not one of the allowed values:"
9486 " %s" % " ".join(constants.DISK_TEMPLATES))
9488 raise errors.OpPrereqError("No disk template specified and the export"
9489 " is missing the disk_template information",
9492 if not self.op.disks:
9494 # TODO: import the disk iv_name too
9495 for idx in range(constants.MAX_DISKS):
9496 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9497 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9498 disks.append({constants.IDISK_SIZE: disk_sz})
9499 self.op.disks = disks
9500 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9501 raise errors.OpPrereqError("No disk info specified and the export"
9502 " is missing the disk information",
9505 if not self.op.nics:
9507 for idx in range(constants.MAX_NICS):
9508 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9510 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9511 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9518 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9519 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9521 if (self.op.hypervisor is None and
9522 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9523 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9525 if einfo.has_section(constants.INISECT_HYP):
9526 # use the export parameters but do not override the ones
9527 # specified by the user
9528 for name, value in einfo.items(constants.INISECT_HYP):
9529 if name not in self.op.hvparams:
9530 self.op.hvparams[name] = value
9532 if einfo.has_section(constants.INISECT_BEP):
9533 # use the parameters, without overriding
9534 for name, value in einfo.items(constants.INISECT_BEP):
9535 if name not in self.op.beparams:
9536 self.op.beparams[name] = value
9537 # Compatibility for the old "memory" be param
9538 if name == constants.BE_MEMORY:
9539 if constants.BE_MAXMEM not in self.op.beparams:
9540 self.op.beparams[constants.BE_MAXMEM] = value
9541 if constants.BE_MINMEM not in self.op.beparams:
9542 self.op.beparams[constants.BE_MINMEM] = value
9544 # try to read the parameters old style, from the main section
9545 for name in constants.BES_PARAMETERS:
9546 if (name not in self.op.beparams and
9547 einfo.has_option(constants.INISECT_INS, name)):
9548 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9550 if einfo.has_section(constants.INISECT_OSP):
9551 # use the parameters, without overriding
9552 for name, value in einfo.items(constants.INISECT_OSP):
9553 if name not in self.op.osparams:
9554 self.op.osparams[name] = value
9556 def _RevertToDefaults(self, cluster):
9557 """Revert the instance parameters to the default values.
9561 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9562 for name in self.op.hvparams.keys():
9563 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9564 del self.op.hvparams[name]
9566 be_defs = cluster.SimpleFillBE({})
9567 for name in self.op.beparams.keys():
9568 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9569 del self.op.beparams[name]
9571 nic_defs = cluster.SimpleFillNIC({})
9572 for nic in self.op.nics:
9573 for name in constants.NICS_PARAMETERS:
9574 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9577 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9578 for name in self.op.osparams.keys():
9579 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9580 del self.op.osparams[name]
9582 def _CalculateFileStorageDir(self):
9583 """Calculate final instance file storage dir.
9586 # file storage dir calculation/check
9587 self.instance_file_storage_dir = None
9588 if self.op.disk_template in constants.DTS_FILEBASED:
9589 # build the full file storage dir path
9592 if self.op.disk_template == constants.DT_SHARED_FILE:
9593 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9595 get_fsd_fn = self.cfg.GetFileStorageDir
9597 cfg_storagedir = get_fsd_fn()
9598 if not cfg_storagedir:
9599 raise errors.OpPrereqError("Cluster file storage dir not defined")
9600 joinargs.append(cfg_storagedir)
9602 if self.op.file_storage_dir is not None:
9603 joinargs.append(self.op.file_storage_dir)
9605 joinargs.append(self.op.instance_name)
9607 # pylint: disable=W0142
9608 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9610 def CheckPrereq(self): # pylint: disable=R0914
9611 """Check prerequisites.
9614 self._CalculateFileStorageDir()
9616 if self.op.mode == constants.INSTANCE_IMPORT:
9617 export_info = self._ReadExportInfo()
9618 self._ReadExportParams(export_info)
9620 if (not self.cfg.GetVGName() and
9621 self.op.disk_template not in constants.DTS_NOT_LVM):
9622 raise errors.OpPrereqError("Cluster does not support lvm-based"
9623 " instances", errors.ECODE_STATE)
9625 if (self.op.hypervisor is None or
9626 self.op.hypervisor == constants.VALUE_AUTO):
9627 self.op.hypervisor = self.cfg.GetHypervisorType()
9629 cluster = self.cfg.GetClusterInfo()
9630 enabled_hvs = cluster.enabled_hypervisors
9631 if self.op.hypervisor not in enabled_hvs:
9632 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9633 " cluster (%s)" % (self.op.hypervisor,
9634 ",".join(enabled_hvs)),
9637 # Check tag validity
9638 for tag in self.op.tags:
9639 objects.TaggableObject.ValidateTag(tag)
9641 # check hypervisor parameter syntax (locally)
9642 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9643 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9645 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9646 hv_type.CheckParameterSyntax(filled_hvp)
9647 self.hv_full = filled_hvp
9648 # check that we don't specify global parameters on an instance
9649 _CheckGlobalHvParams(self.op.hvparams)
9651 # fill and remember the beparams dict
9652 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9653 for param, value in self.op.beparams.iteritems():
9654 if value == constants.VALUE_AUTO:
9655 self.op.beparams[param] = default_beparams[param]
9656 objects.UpgradeBeParams(self.op.beparams)
9657 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9658 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9660 # build os parameters
9661 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9663 # now that hvp/bep are in final format, let's reset to defaults,
9665 if self.op.identify_defaults:
9666 self._RevertToDefaults(cluster)
9670 for idx, nic in enumerate(self.op.nics):
9671 nic_mode_req = nic.get(constants.INIC_MODE, None)
9672 nic_mode = nic_mode_req
9673 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9674 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9676 # in routed mode, for the first nic, the default ip is 'auto'
9677 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9678 default_ip_mode = constants.VALUE_AUTO
9680 default_ip_mode = constants.VALUE_NONE
9682 # ip validity checks
9683 ip = nic.get(constants.INIC_IP, default_ip_mode)
9684 if ip is None or ip.lower() == constants.VALUE_NONE:
9686 elif ip.lower() == constants.VALUE_AUTO:
9687 if not self.op.name_check:
9688 raise errors.OpPrereqError("IP address set to auto but name checks"
9689 " have been skipped",
9691 nic_ip = self.hostname1.ip
9693 if not netutils.IPAddress.IsValid(ip):
9694 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9698 # TODO: check the ip address for uniqueness
9699 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9700 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9703 # MAC address verification
9704 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9705 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9706 mac = utils.NormalizeAndValidateMac(mac)
9709 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9710 except errors.ReservationError:
9711 raise errors.OpPrereqError("MAC address %s already in use"
9712 " in cluster" % mac,
9713 errors.ECODE_NOTUNIQUE)
9715 # Build nic parameters
9716 link = nic.get(constants.INIC_LINK, None)
9717 if link == constants.VALUE_AUTO:
9718 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9721 nicparams[constants.NIC_MODE] = nic_mode
9723 nicparams[constants.NIC_LINK] = link
9725 check_params = cluster.SimpleFillNIC(nicparams)
9726 objects.NIC.CheckParameterSyntax(check_params)
9727 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9729 # disk checks/pre-build
9730 default_vg = self.cfg.GetVGName()
9732 for disk in self.op.disks:
9733 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9734 if mode not in constants.DISK_ACCESS_SET:
9735 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9736 mode, errors.ECODE_INVAL)
9737 size = disk.get(constants.IDISK_SIZE, None)
9739 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9742 except (TypeError, ValueError):
9743 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9746 data_vg = disk.get(constants.IDISK_VG, default_vg)
9748 constants.IDISK_SIZE: size,
9749 constants.IDISK_MODE: mode,
9750 constants.IDISK_VG: data_vg,
9752 if constants.IDISK_METAVG in disk:
9753 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9754 if constants.IDISK_ADOPT in disk:
9755 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9756 self.disks.append(new_disk)
9758 if self.op.mode == constants.INSTANCE_IMPORT:
9760 for idx in range(len(self.disks)):
9761 option = "disk%d_dump" % idx
9762 if export_info.has_option(constants.INISECT_INS, option):
9763 # FIXME: are the old os-es, disk sizes, etc. useful?
9764 export_name = export_info.get(constants.INISECT_INS, option)
9765 image = utils.PathJoin(self.op.src_path, export_name)
9766 disk_images.append(image)
9768 disk_images.append(False)
9770 self.src_images = disk_images
9772 old_name = export_info.get(constants.INISECT_INS, "name")
9773 if self.op.instance_name == old_name:
9774 for idx, nic in enumerate(self.nics):
9775 if nic.mac == constants.VALUE_AUTO:
9776 nic_mac_ini = "nic%d_mac" % idx
9777 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9779 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9781 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9782 if self.op.ip_check:
9783 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9784 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9785 (self.check_ip, self.op.instance_name),
9786 errors.ECODE_NOTUNIQUE)
9788 #### mac address generation
9789 # By generating here the mac address both the allocator and the hooks get
9790 # the real final mac address rather than the 'auto' or 'generate' value.
9791 # There is a race condition between the generation and the instance object
9792 # creation, which means that we know the mac is valid now, but we're not
9793 # sure it will be when we actually add the instance. If things go bad
9794 # adding the instance will abort because of a duplicate mac, and the
9795 # creation job will fail.
9796 for nic in self.nics:
9797 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9798 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9802 if self.op.iallocator is not None:
9803 self._RunAllocator()
9805 # Release all unneeded node locks
9806 _ReleaseLocks(self, locking.LEVEL_NODE,
9807 keep=filter(None, [self.op.pnode, self.op.snode,
9809 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9810 keep=filter(None, [self.op.pnode, self.op.snode,
9813 #### node related checks
9815 # check primary node
9816 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9817 assert self.pnode is not None, \
9818 "Cannot retrieve locked node %s" % self.op.pnode
9820 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9821 pnode.name, errors.ECODE_STATE)
9823 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9824 pnode.name, errors.ECODE_STATE)
9825 if not pnode.vm_capable:
9826 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9827 " '%s'" % pnode.name, errors.ECODE_STATE)
9829 self.secondaries = []
9831 # mirror node verification
9832 if self.op.disk_template in constants.DTS_INT_MIRROR:
9833 if self.op.snode == pnode.name:
9834 raise errors.OpPrereqError("The secondary node cannot be the"
9835 " primary node", errors.ECODE_INVAL)
9836 _CheckNodeOnline(self, self.op.snode)
9837 _CheckNodeNotDrained(self, self.op.snode)
9838 _CheckNodeVmCapable(self, self.op.snode)
9839 self.secondaries.append(self.op.snode)
9841 snode = self.cfg.GetNodeInfo(self.op.snode)
9842 if pnode.group != snode.group:
9843 self.LogWarning("The primary and secondary nodes are in two"
9844 " different node groups; the disk parameters"
9845 " from the first disk's node group will be"
9848 nodenames = [pnode.name] + self.secondaries
9850 # Verify instance specs
9851 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
9853 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9854 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9855 constants.ISPEC_DISK_COUNT: len(self.disks),
9856 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9857 constants.ISPEC_NIC_COUNT: len(self.nics),
9858 constants.ISPEC_SPINDLE_USE: spindle_use,
9861 group_info = self.cfg.GetNodeGroup(pnode.group)
9862 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9863 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9864 if not self.op.ignore_ipolicy and res:
9865 raise errors.OpPrereqError(("Instance allocation to group %s violates"
9866 " policy: %s") % (pnode.group,
9867 utils.CommaJoin(res)),
9870 # disk parameters (not customizable at instance or node level)
9871 # just use the primary node parameters, ignoring the secondary.
9872 self.diskparams = group_info.diskparams
9874 if not self.adopt_disks:
9875 if self.op.disk_template == constants.DT_RBD:
9876 # _CheckRADOSFreeSpace() is just a placeholder.
9877 # Any function that checks prerequisites can be placed here.
9878 # Check if there is enough space on the RADOS cluster.
9879 _CheckRADOSFreeSpace()
9881 # Check lv size requirements, if not adopting
9882 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9883 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9885 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9886 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9887 disk[constants.IDISK_ADOPT])
9888 for disk in self.disks])
9889 if len(all_lvs) != len(self.disks):
9890 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9892 for lv_name in all_lvs:
9894 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9895 # to ReserveLV uses the same syntax
9896 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9897 except errors.ReservationError:
9898 raise errors.OpPrereqError("LV named %s used by another instance" %
9899 lv_name, errors.ECODE_NOTUNIQUE)
9901 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9902 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9904 node_lvs = self.rpc.call_lv_list([pnode.name],
9905 vg_names.payload.keys())[pnode.name]
9906 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9907 node_lvs = node_lvs.payload
9909 delta = all_lvs.difference(node_lvs.keys())
9911 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9912 utils.CommaJoin(delta),
9914 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9916 raise errors.OpPrereqError("Online logical volumes found, cannot"
9917 " adopt: %s" % utils.CommaJoin(online_lvs),
9919 # update the size of disk based on what is found
9920 for dsk in self.disks:
9921 dsk[constants.IDISK_SIZE] = \
9922 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9923 dsk[constants.IDISK_ADOPT])][0]))
9925 elif self.op.disk_template == constants.DT_BLOCK:
9926 # Normalize and de-duplicate device paths
9927 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9928 for disk in self.disks])
9929 if len(all_disks) != len(self.disks):
9930 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9932 baddisks = [d for d in all_disks
9933 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9935 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9936 " cannot be adopted" %
9937 (", ".join(baddisks),
9938 constants.ADOPTABLE_BLOCKDEV_ROOT),
9941 node_disks = self.rpc.call_bdev_sizes([pnode.name],
9942 list(all_disks))[pnode.name]
9943 node_disks.Raise("Cannot get block device information from node %s" %
9945 node_disks = node_disks.payload
9946 delta = all_disks.difference(node_disks.keys())
9948 raise errors.OpPrereqError("Missing block device(s): %s" %
9949 utils.CommaJoin(delta),
9951 for dsk in self.disks:
9952 dsk[constants.IDISK_SIZE] = \
9953 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9955 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9957 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9958 # check OS parameters (remotely)
9959 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9961 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9963 # memory check on primary node
9964 #TODO(dynmem): use MINMEM for checking
9966 _CheckNodeFreeMemory(self, self.pnode.name,
9967 "creating instance %s" % self.op.instance_name,
9968 self.be_full[constants.BE_MAXMEM],
9971 self.dry_run_result = list(nodenames)
9973 def Exec(self, feedback_fn):
9974 """Create and add the instance to the cluster.
9977 instance = self.op.instance_name
9978 pnode_name = self.pnode.name
9980 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9981 self.owned_locks(locking.LEVEL_NODE)), \
9982 "Node locks differ from node resource locks"
9984 ht_kind = self.op.hypervisor
9985 if ht_kind in constants.HTS_REQ_PORT:
9986 network_port = self.cfg.AllocatePort()
9990 disks = _GenerateDiskTemplate(self,
9991 self.op.disk_template,
9992 instance, pnode_name,
9995 self.instance_file_storage_dir,
9996 self.op.file_driver,
10001 iobj = objects.Instance(name=instance, os=self.op.os_type,
10002 primary_node=pnode_name,
10003 nics=self.nics, disks=disks,
10004 disk_template=self.op.disk_template,
10005 admin_state=constants.ADMINST_DOWN,
10006 network_port=network_port,
10007 beparams=self.op.beparams,
10008 hvparams=self.op.hvparams,
10009 hypervisor=self.op.hypervisor,
10010 osparams=self.op.osparams,
10014 for tag in self.op.tags:
10017 if self.adopt_disks:
10018 if self.op.disk_template == constants.DT_PLAIN:
10019 # rename LVs to the newly-generated names; we need to construct
10020 # 'fake' LV disks with the old data, plus the new unique_id
10021 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10023 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10024 rename_to.append(t_dsk.logical_id)
10025 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10026 self.cfg.SetDiskID(t_dsk, pnode_name)
10027 result = self.rpc.call_blockdev_rename(pnode_name,
10028 zip(tmp_disks, rename_to))
10029 result.Raise("Failed to rename adoped LVs")
10031 feedback_fn("* creating instance disks...")
10033 _CreateDisks(self, iobj)
10034 except errors.OpExecError:
10035 self.LogWarning("Device creation failed, reverting...")
10037 _RemoveDisks(self, iobj)
10039 self.cfg.ReleaseDRBDMinors(instance)
10042 feedback_fn("adding instance %s to cluster config" % instance)
10044 self.cfg.AddInstance(iobj, self.proc.GetECId())
10046 # Declare that we don't want to remove the instance lock anymore, as we've
10047 # added the instance to the config
10048 del self.remove_locks[locking.LEVEL_INSTANCE]
10050 if self.op.mode == constants.INSTANCE_IMPORT:
10051 # Release unused nodes
10052 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10054 # Release all nodes
10055 _ReleaseLocks(self, locking.LEVEL_NODE)
10058 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10059 feedback_fn("* wiping instance disks...")
10061 _WipeDisks(self, iobj)
10062 except errors.OpExecError, err:
10063 logging.exception("Wiping disks failed")
10064 self.LogWarning("Wiping instance disks failed (%s)", err)
10068 # Something is already wrong with the disks, don't do anything else
10070 elif self.op.wait_for_sync:
10071 disk_abort = not _WaitForSync(self, iobj)
10072 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10073 # make sure the disks are not degraded (still sync-ing is ok)
10074 feedback_fn("* checking mirrors status")
10075 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10080 _RemoveDisks(self, iobj)
10081 self.cfg.RemoveInstance(iobj.name)
10082 # Make sure the instance lock gets removed
10083 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10084 raise errors.OpExecError("There are some degraded disks for"
10087 # Release all node resource locks
10088 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10090 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10091 if self.op.mode == constants.INSTANCE_CREATE:
10092 if not self.op.no_install:
10093 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10094 not self.op.wait_for_sync)
10096 feedback_fn("* pausing disk sync to install instance OS")
10097 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10099 for idx, success in enumerate(result.payload):
10101 logging.warn("pause-sync of instance %s for disk %d failed",
10104 feedback_fn("* running the instance OS create scripts...")
10105 # FIXME: pass debug option from opcode to backend
10107 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10108 self.op.debug_level)
10110 feedback_fn("* resuming disk sync")
10111 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10113 for idx, success in enumerate(result.payload):
10115 logging.warn("resume-sync of instance %s for disk %d failed",
10118 os_add_result.Raise("Could not add os for instance %s"
10119 " on node %s" % (instance, pnode_name))
10121 elif self.op.mode == constants.INSTANCE_IMPORT:
10122 feedback_fn("* running the instance OS import scripts...")
10126 for idx, image in enumerate(self.src_images):
10130 # FIXME: pass debug option from opcode to backend
10131 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10132 constants.IEIO_FILE, (image, ),
10133 constants.IEIO_SCRIPT,
10134 (iobj.disks[idx], idx),
10136 transfers.append(dt)
10139 masterd.instance.TransferInstanceData(self, feedback_fn,
10140 self.op.src_node, pnode_name,
10141 self.pnode.secondary_ip,
10143 if not compat.all(import_result):
10144 self.LogWarning("Some disks for instance %s on node %s were not"
10145 " imported successfully" % (instance, pnode_name))
10147 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10148 feedback_fn("* preparing remote import...")
10149 # The source cluster will stop the instance before attempting to make a
10150 # connection. In some cases stopping an instance can take a long time,
10151 # hence the shutdown timeout is added to the connection timeout.
10152 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10153 self.op.source_shutdown_timeout)
10154 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10156 assert iobj.primary_node == self.pnode.name
10158 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10159 self.source_x509_ca,
10160 self._cds, timeouts)
10161 if not compat.all(disk_results):
10162 # TODO: Should the instance still be started, even if some disks
10163 # failed to import (valid for local imports, too)?
10164 self.LogWarning("Some disks for instance %s on node %s were not"
10165 " imported successfully" % (instance, pnode_name))
10167 # Run rename script on newly imported instance
10168 assert iobj.name == instance
10169 feedback_fn("Running rename script for %s" % instance)
10170 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10171 self.source_instance_name,
10172 self.op.debug_level)
10173 if result.fail_msg:
10174 self.LogWarning("Failed to run rename script for %s on node"
10175 " %s: %s" % (instance, pnode_name, result.fail_msg))
10178 # also checked in the prereq part
10179 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10182 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10185 iobj.admin_state = constants.ADMINST_UP
10186 self.cfg.Update(iobj, feedback_fn)
10187 logging.info("Starting instance %s on node %s", instance, pnode_name)
10188 feedback_fn("* starting instance...")
10189 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10191 result.Raise("Could not start instance")
10193 return list(iobj.all_nodes)
10196 def _CheckRADOSFreeSpace():
10197 """Compute disk size requirements inside the RADOS cluster.
10200 # For the RADOS cluster we assume there is always enough space.
10204 class LUInstanceConsole(NoHooksLU):
10205 """Connect to an instance's console.
10207 This is somewhat special in that it returns the command line that
10208 you need to run on the master node in order to connect to the
10214 def ExpandNames(self):
10215 self.share_locks = _ShareAll()
10216 self._ExpandAndLockInstance()
10218 def CheckPrereq(self):
10219 """Check prerequisites.
10221 This checks that the instance is in the cluster.
10224 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10225 assert self.instance is not None, \
10226 "Cannot retrieve locked instance %s" % self.op.instance_name
10227 _CheckNodeOnline(self, self.instance.primary_node)
10229 def Exec(self, feedback_fn):
10230 """Connect to the console of an instance
10233 instance = self.instance
10234 node = instance.primary_node
10236 node_insts = self.rpc.call_instance_list([node],
10237 [instance.hypervisor])[node]
10238 node_insts.Raise("Can't get node information from %s" % node)
10240 if instance.name not in node_insts.payload:
10241 if instance.admin_state == constants.ADMINST_UP:
10242 state = constants.INSTST_ERRORDOWN
10243 elif instance.admin_state == constants.ADMINST_DOWN:
10244 state = constants.INSTST_ADMINDOWN
10246 state = constants.INSTST_ADMINOFFLINE
10247 raise errors.OpExecError("Instance %s is not running (state %s)" %
10248 (instance.name, state))
10250 logging.debug("Connecting to console of %s on %s", instance.name, node)
10252 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10255 def _GetInstanceConsole(cluster, instance):
10256 """Returns console information for an instance.
10258 @type cluster: L{objects.Cluster}
10259 @type instance: L{objects.Instance}
10263 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10264 # beparams and hvparams are passed separately, to avoid editing the
10265 # instance and then saving the defaults in the instance itself.
10266 hvparams = cluster.FillHV(instance)
10267 beparams = cluster.FillBE(instance)
10268 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10270 assert console.instance == instance.name
10271 assert console.Validate()
10273 return console.ToDict()
10276 class LUInstanceReplaceDisks(LogicalUnit):
10277 """Replace the disks of an instance.
10280 HPATH = "mirrors-replace"
10281 HTYPE = constants.HTYPE_INSTANCE
10284 def CheckArguments(self):
10285 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10286 self.op.iallocator)
10288 def ExpandNames(self):
10289 self._ExpandAndLockInstance()
10291 assert locking.LEVEL_NODE not in self.needed_locks
10292 assert locking.LEVEL_NODE_RES not in self.needed_locks
10293 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10295 assert self.op.iallocator is None or self.op.remote_node is None, \
10296 "Conflicting options"
10298 if self.op.remote_node is not None:
10299 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10301 # Warning: do not remove the locking of the new secondary here
10302 # unless DRBD8.AddChildren is changed to work in parallel;
10303 # currently it doesn't since parallel invocations of
10304 # FindUnusedMinor will conflict
10305 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10306 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10308 self.needed_locks[locking.LEVEL_NODE] = []
10309 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10311 if self.op.iallocator is not None:
10312 # iallocator will select a new node in the same group
10313 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10315 self.needed_locks[locking.LEVEL_NODE_RES] = []
10317 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10318 self.op.iallocator, self.op.remote_node,
10319 self.op.disks, False, self.op.early_release,
10320 self.op.ignore_ipolicy)
10322 self.tasklets = [self.replacer]
10324 def DeclareLocks(self, level):
10325 if level == locking.LEVEL_NODEGROUP:
10326 assert self.op.remote_node is None
10327 assert self.op.iallocator is not None
10328 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10330 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10331 # Lock all groups used by instance optimistically; this requires going
10332 # via the node before it's locked, requiring verification later on
10333 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10334 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10336 elif level == locking.LEVEL_NODE:
10337 if self.op.iallocator is not None:
10338 assert self.op.remote_node is None
10339 assert not self.needed_locks[locking.LEVEL_NODE]
10341 # Lock member nodes of all locked groups
10342 self.needed_locks[locking.LEVEL_NODE] = [node_name
10343 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10344 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10346 self._LockInstancesNodes()
10347 elif level == locking.LEVEL_NODE_RES:
10349 self.needed_locks[locking.LEVEL_NODE_RES] = \
10350 self.needed_locks[locking.LEVEL_NODE]
10352 def BuildHooksEnv(self):
10353 """Build hooks env.
10355 This runs on the master, the primary and all the secondaries.
10358 instance = self.replacer.instance
10360 "MODE": self.op.mode,
10361 "NEW_SECONDARY": self.op.remote_node,
10362 "OLD_SECONDARY": instance.secondary_nodes[0],
10364 env.update(_BuildInstanceHookEnvByObject(self, instance))
10367 def BuildHooksNodes(self):
10368 """Build hooks nodes.
10371 instance = self.replacer.instance
10373 self.cfg.GetMasterNode(),
10374 instance.primary_node,
10376 if self.op.remote_node is not None:
10377 nl.append(self.op.remote_node)
10380 def CheckPrereq(self):
10381 """Check prerequisites.
10384 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10385 self.op.iallocator is None)
10387 # Verify if node group locks are still correct
10388 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10390 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10392 return LogicalUnit.CheckPrereq(self)
10395 class TLReplaceDisks(Tasklet):
10396 """Replaces disks for an instance.
10398 Note: Locking is not within the scope of this class.
10401 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10402 disks, delay_iallocator, early_release, ignore_ipolicy):
10403 """Initializes this class.
10406 Tasklet.__init__(self, lu)
10409 self.instance_name = instance_name
10411 self.iallocator_name = iallocator_name
10412 self.remote_node = remote_node
10414 self.delay_iallocator = delay_iallocator
10415 self.early_release = early_release
10416 self.ignore_ipolicy = ignore_ipolicy
10419 self.instance = None
10420 self.new_node = None
10421 self.target_node = None
10422 self.other_node = None
10423 self.remote_node_info = None
10424 self.node_secondary_ip = None
10427 def CheckArguments(mode, remote_node, iallocator):
10428 """Helper function for users of this class.
10431 # check for valid parameter combination
10432 if mode == constants.REPLACE_DISK_CHG:
10433 if remote_node is None and iallocator is None:
10434 raise errors.OpPrereqError("When changing the secondary either an"
10435 " iallocator script must be used or the"
10436 " new node given", errors.ECODE_INVAL)
10438 if remote_node is not None and iallocator is not None:
10439 raise errors.OpPrereqError("Give either the iallocator or the new"
10440 " secondary, not both", errors.ECODE_INVAL)
10442 elif remote_node is not None or iallocator is not None:
10443 # Not replacing the secondary
10444 raise errors.OpPrereqError("The iallocator and new node options can"
10445 " only be used when changing the"
10446 " secondary node", errors.ECODE_INVAL)
10449 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10450 """Compute a new secondary node using an IAllocator.
10453 ial = IAllocator(lu.cfg, lu.rpc,
10454 mode=constants.IALLOCATOR_MODE_RELOC,
10455 name=instance_name,
10456 relocate_from=list(relocate_from))
10458 ial.Run(iallocator_name)
10460 if not ial.success:
10461 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10462 " %s" % (iallocator_name, ial.info),
10463 errors.ECODE_NORES)
10465 if len(ial.result) != ial.required_nodes:
10466 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10467 " of nodes (%s), required %s" %
10469 len(ial.result), ial.required_nodes),
10470 errors.ECODE_FAULT)
10472 remote_node_name = ial.result[0]
10474 lu.LogInfo("Selected new secondary for instance '%s': %s",
10475 instance_name, remote_node_name)
10477 return remote_node_name
10479 def _FindFaultyDisks(self, node_name):
10480 """Wrapper for L{_FindFaultyInstanceDisks}.
10483 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10486 def _CheckDisksActivated(self, instance):
10487 """Checks if the instance disks are activated.
10489 @param instance: The instance to check disks
10490 @return: True if they are activated, False otherwise
10493 nodes = instance.all_nodes
10495 for idx, dev in enumerate(instance.disks):
10497 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10498 self.cfg.SetDiskID(dev, node)
10500 result = self.rpc.call_blockdev_find(node, dev)
10504 elif result.fail_msg or not result.payload:
10509 def CheckPrereq(self):
10510 """Check prerequisites.
10512 This checks that the instance is in the cluster.
10515 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10516 assert instance is not None, \
10517 "Cannot retrieve locked instance %s" % self.instance_name
10519 if instance.disk_template != constants.DT_DRBD8:
10520 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10521 " instances", errors.ECODE_INVAL)
10523 if len(instance.secondary_nodes) != 1:
10524 raise errors.OpPrereqError("The instance has a strange layout,"
10525 " expected one secondary but found %d" %
10526 len(instance.secondary_nodes),
10527 errors.ECODE_FAULT)
10529 if not self.delay_iallocator:
10530 self._CheckPrereq2()
10532 def _CheckPrereq2(self):
10533 """Check prerequisites, second part.
10535 This function should always be part of CheckPrereq. It was separated and is
10536 now called from Exec because during node evacuation iallocator was only
10537 called with an unmodified cluster model, not taking planned changes into
10541 instance = self.instance
10542 secondary_node = instance.secondary_nodes[0]
10544 if self.iallocator_name is None:
10545 remote_node = self.remote_node
10547 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10548 instance.name, instance.secondary_nodes)
10550 if remote_node is None:
10551 self.remote_node_info = None
10553 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10554 "Remote node '%s' is not locked" % remote_node
10556 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10557 assert self.remote_node_info is not None, \
10558 "Cannot retrieve locked node %s" % remote_node
10560 if remote_node == self.instance.primary_node:
10561 raise errors.OpPrereqError("The specified node is the primary node of"
10562 " the instance", errors.ECODE_INVAL)
10564 if remote_node == secondary_node:
10565 raise errors.OpPrereqError("The specified node is already the"
10566 " secondary node of the instance",
10567 errors.ECODE_INVAL)
10569 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10570 constants.REPLACE_DISK_CHG):
10571 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10572 errors.ECODE_INVAL)
10574 if self.mode == constants.REPLACE_DISK_AUTO:
10575 if not self._CheckDisksActivated(instance):
10576 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10577 " first" % self.instance_name,
10578 errors.ECODE_STATE)
10579 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10580 faulty_secondary = self._FindFaultyDisks(secondary_node)
10582 if faulty_primary and faulty_secondary:
10583 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10584 " one node and can not be repaired"
10585 " automatically" % self.instance_name,
10586 errors.ECODE_STATE)
10589 self.disks = faulty_primary
10590 self.target_node = instance.primary_node
10591 self.other_node = secondary_node
10592 check_nodes = [self.target_node, self.other_node]
10593 elif faulty_secondary:
10594 self.disks = faulty_secondary
10595 self.target_node = secondary_node
10596 self.other_node = instance.primary_node
10597 check_nodes = [self.target_node, self.other_node]
10603 # Non-automatic modes
10604 if self.mode == constants.REPLACE_DISK_PRI:
10605 self.target_node = instance.primary_node
10606 self.other_node = secondary_node
10607 check_nodes = [self.target_node, self.other_node]
10609 elif self.mode == constants.REPLACE_DISK_SEC:
10610 self.target_node = secondary_node
10611 self.other_node = instance.primary_node
10612 check_nodes = [self.target_node, self.other_node]
10614 elif self.mode == constants.REPLACE_DISK_CHG:
10615 self.new_node = remote_node
10616 self.other_node = instance.primary_node
10617 self.target_node = secondary_node
10618 check_nodes = [self.new_node, self.other_node]
10620 _CheckNodeNotDrained(self.lu, remote_node)
10621 _CheckNodeVmCapable(self.lu, remote_node)
10623 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10624 assert old_node_info is not None
10625 if old_node_info.offline and not self.early_release:
10626 # doesn't make sense to delay the release
10627 self.early_release = True
10628 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10629 " early-release mode", secondary_node)
10632 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10635 # If not specified all disks should be replaced
10637 self.disks = range(len(self.instance.disks))
10639 # TODO: This is ugly, but right now we can't distinguish between internal
10640 # submitted opcode and external one. We should fix that.
10641 if self.remote_node_info:
10642 # We change the node, lets verify it still meets instance policy
10643 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10644 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10646 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10647 ignore=self.ignore_ipolicy)
10649 # TODO: compute disk parameters
10650 primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10651 secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10652 if primary_node_info.group != secondary_node_info.group:
10653 self.lu.LogInfo("The instance primary and secondary nodes are in two"
10654 " different node groups; the disk parameters of the"
10655 " primary node's group will be applied.")
10657 self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10659 for node in check_nodes:
10660 _CheckNodeOnline(self.lu, node)
10662 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10665 if node_name is not None)
10667 # Release unneeded node and node resource locks
10668 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10669 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10671 # Release any owned node group
10672 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10673 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10675 # Check whether disks are valid
10676 for disk_idx in self.disks:
10677 instance.FindDisk(disk_idx)
10679 # Get secondary node IP addresses
10680 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10681 in self.cfg.GetMultiNodeInfo(touched_nodes))
10683 def Exec(self, feedback_fn):
10684 """Execute disk replacement.
10686 This dispatches the disk replacement to the appropriate handler.
10689 if self.delay_iallocator:
10690 self._CheckPrereq2()
10693 # Verify owned locks before starting operation
10694 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10695 assert set(owned_nodes) == set(self.node_secondary_ip), \
10696 ("Incorrect node locks, owning %s, expected %s" %
10697 (owned_nodes, self.node_secondary_ip.keys()))
10698 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10699 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10701 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10702 assert list(owned_instances) == [self.instance_name], \
10703 "Instance '%s' not locked" % self.instance_name
10705 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10706 "Should not own any node group lock at this point"
10709 feedback_fn("No disks need replacement")
10712 feedback_fn("Replacing disk(s) %s for %s" %
10713 (utils.CommaJoin(self.disks), self.instance.name))
10715 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10717 # Activate the instance disks if we're replacing them on a down instance
10719 _StartInstanceDisks(self.lu, self.instance, True)
10722 # Should we replace the secondary node?
10723 if self.new_node is not None:
10724 fn = self._ExecDrbd8Secondary
10726 fn = self._ExecDrbd8DiskOnly
10728 result = fn(feedback_fn)
10730 # Deactivate the instance disks if we're replacing them on a
10733 _SafeShutdownInstanceDisks(self.lu, self.instance)
10735 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10738 # Verify owned locks
10739 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10740 nodes = frozenset(self.node_secondary_ip)
10741 assert ((self.early_release and not owned_nodes) or
10742 (not self.early_release and not (set(owned_nodes) - nodes))), \
10743 ("Not owning the correct locks, early_release=%s, owned=%r,"
10744 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10748 def _CheckVolumeGroup(self, nodes):
10749 self.lu.LogInfo("Checking volume groups")
10751 vgname = self.cfg.GetVGName()
10753 # Make sure volume group exists on all involved nodes
10754 results = self.rpc.call_vg_list(nodes)
10756 raise errors.OpExecError("Can't list volume groups on the nodes")
10759 res = results[node]
10760 res.Raise("Error checking node %s" % node)
10761 if vgname not in res.payload:
10762 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10765 def _CheckDisksExistence(self, nodes):
10766 # Check disk existence
10767 for idx, dev in enumerate(self.instance.disks):
10768 if idx not in self.disks:
10772 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10773 self.cfg.SetDiskID(dev, node)
10775 result = self.rpc.call_blockdev_find(node, dev)
10777 msg = result.fail_msg
10778 if msg or not result.payload:
10780 msg = "disk not found"
10781 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10784 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10785 for idx, dev in enumerate(self.instance.disks):
10786 if idx not in self.disks:
10789 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10792 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10794 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10795 " replace disks for instance %s" %
10796 (node_name, self.instance.name))
10798 def _CreateNewStorage(self, node_name):
10799 """Create new storage on the primary or secondary node.
10801 This is only used for same-node replaces, not for changing the
10802 secondary node, hence we don't want to modify the existing disk.
10807 for idx, dev in enumerate(self.instance.disks):
10808 if idx not in self.disks:
10811 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10813 self.cfg.SetDiskID(dev, node_name)
10815 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10816 names = _GenerateUniqueNames(self.lu, lv_names)
10818 _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10820 vg_data = dev.children[0].logical_id[0]
10821 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10822 logical_id=(vg_data, names[0]), params=data_p)
10823 vg_meta = dev.children[1].logical_id[0]
10824 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10825 logical_id=(vg_meta, names[1]), params=meta_p)
10827 new_lvs = [lv_data, lv_meta]
10828 old_lvs = [child.Copy() for child in dev.children]
10829 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10831 # we pass force_create=True to force the LVM creation
10832 for new_lv in new_lvs:
10833 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10834 _GetInstanceInfoText(self.instance), False)
10838 def _CheckDevices(self, node_name, iv_names):
10839 for name, (dev, _, _) in iv_names.iteritems():
10840 self.cfg.SetDiskID(dev, node_name)
10842 result = self.rpc.call_blockdev_find(node_name, dev)
10844 msg = result.fail_msg
10845 if msg or not result.payload:
10847 msg = "disk not found"
10848 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10851 if result.payload.is_degraded:
10852 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10854 def _RemoveOldStorage(self, node_name, iv_names):
10855 for name, (_, old_lvs, _) in iv_names.iteritems():
10856 self.lu.LogInfo("Remove logical volumes for %s" % name)
10859 self.cfg.SetDiskID(lv, node_name)
10861 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10863 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10864 hint="remove unused LVs manually")
10866 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10867 """Replace a disk on the primary or secondary for DRBD 8.
10869 The algorithm for replace is quite complicated:
10871 1. for each disk to be replaced:
10873 1. create new LVs on the target node with unique names
10874 1. detach old LVs from the drbd device
10875 1. rename old LVs to name_replaced.<time_t>
10876 1. rename new LVs to old LVs
10877 1. attach the new LVs (with the old names now) to the drbd device
10879 1. wait for sync across all devices
10881 1. for each modified disk:
10883 1. remove old LVs (which have the name name_replaces.<time_t>)
10885 Failures are not very well handled.
10890 # Step: check device activation
10891 self.lu.LogStep(1, steps_total, "Check device existence")
10892 self._CheckDisksExistence([self.other_node, self.target_node])
10893 self._CheckVolumeGroup([self.target_node, self.other_node])
10895 # Step: check other node consistency
10896 self.lu.LogStep(2, steps_total, "Check peer consistency")
10897 self._CheckDisksConsistency(self.other_node,
10898 self.other_node == self.instance.primary_node,
10901 # Step: create new storage
10902 self.lu.LogStep(3, steps_total, "Allocate new storage")
10903 iv_names = self._CreateNewStorage(self.target_node)
10905 # Step: for each lv, detach+rename*2+attach
10906 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10907 for dev, old_lvs, new_lvs in iv_names.itervalues():
10908 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10910 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10912 result.Raise("Can't detach drbd from local storage on node"
10913 " %s for device %s" % (self.target_node, dev.iv_name))
10915 #cfg.Update(instance)
10917 # ok, we created the new LVs, so now we know we have the needed
10918 # storage; as such, we proceed on the target node to rename
10919 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10920 # using the assumption that logical_id == physical_id (which in
10921 # turn is the unique_id on that node)
10923 # FIXME(iustin): use a better name for the replaced LVs
10924 temp_suffix = int(time.time())
10925 ren_fn = lambda d, suff: (d.physical_id[0],
10926 d.physical_id[1] + "_replaced-%s" % suff)
10928 # Build the rename list based on what LVs exist on the node
10929 rename_old_to_new = []
10930 for to_ren in old_lvs:
10931 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10932 if not result.fail_msg and result.payload:
10934 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10936 self.lu.LogInfo("Renaming the old LVs on the target node")
10937 result = self.rpc.call_blockdev_rename(self.target_node,
10939 result.Raise("Can't rename old LVs on node %s" % self.target_node)
10941 # Now we rename the new LVs to the old LVs
10942 self.lu.LogInfo("Renaming the new LVs on the target node")
10943 rename_new_to_old = [(new, old.physical_id)
10944 for old, new in zip(old_lvs, new_lvs)]
10945 result = self.rpc.call_blockdev_rename(self.target_node,
10947 result.Raise("Can't rename new LVs on node %s" % self.target_node)
10949 # Intermediate steps of in memory modifications
10950 for old, new in zip(old_lvs, new_lvs):
10951 new.logical_id = old.logical_id
10952 self.cfg.SetDiskID(new, self.target_node)
10954 # We need to modify old_lvs so that removal later removes the
10955 # right LVs, not the newly added ones; note that old_lvs is a
10957 for disk in old_lvs:
10958 disk.logical_id = ren_fn(disk, temp_suffix)
10959 self.cfg.SetDiskID(disk, self.target_node)
10961 # Now that the new lvs have the old name, we can add them to the device
10962 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10963 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10965 msg = result.fail_msg
10967 for new_lv in new_lvs:
10968 msg2 = self.rpc.call_blockdev_remove(self.target_node,
10971 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10972 hint=("cleanup manually the unused logical"
10974 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10976 cstep = itertools.count(5)
10978 if self.early_release:
10979 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10980 self._RemoveOldStorage(self.target_node, iv_names)
10981 # TODO: Check if releasing locks early still makes sense
10982 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10984 # Release all resource locks except those used by the instance
10985 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10986 keep=self.node_secondary_ip.keys())
10988 # Release all node locks while waiting for sync
10989 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10991 # TODO: Can the instance lock be downgraded here? Take the optional disk
10992 # shutdown in the caller into consideration.
10995 # This can fail as the old devices are degraded and _WaitForSync
10996 # does a combined result over all disks, so we don't check its return value
10997 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10998 _WaitForSync(self.lu, self.instance)
11000 # Check all devices manually
11001 self._CheckDevices(self.instance.primary_node, iv_names)
11003 # Step: remove old storage
11004 if not self.early_release:
11005 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11006 self._RemoveOldStorage(self.target_node, iv_names)
11008 def _ExecDrbd8Secondary(self, feedback_fn):
11009 """Replace the secondary node for DRBD 8.
11011 The algorithm for replace is quite complicated:
11012 - for all disks of the instance:
11013 - create new LVs on the new node with same names
11014 - shutdown the drbd device on the old secondary
11015 - disconnect the drbd network on the primary
11016 - create the drbd device on the new secondary
11017 - network attach the drbd on the primary, using an artifice:
11018 the drbd code for Attach() will connect to the network if it
11019 finds a device which is connected to the good local disks but
11020 not network enabled
11021 - wait for sync across all devices
11022 - remove all disks from the old secondary
11024 Failures are not very well handled.
11029 pnode = self.instance.primary_node
11031 # Step: check device activation
11032 self.lu.LogStep(1, steps_total, "Check device existence")
11033 self._CheckDisksExistence([self.instance.primary_node])
11034 self._CheckVolumeGroup([self.instance.primary_node])
11036 # Step: check other node consistency
11037 self.lu.LogStep(2, steps_total, "Check peer consistency")
11038 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11040 # Step: create new storage
11041 self.lu.LogStep(3, steps_total, "Allocate new storage")
11042 for idx, dev in enumerate(self.instance.disks):
11043 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11044 (self.new_node, idx))
11045 # we pass force_create=True to force LVM creation
11046 for new_lv in dev.children:
11047 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
11048 _GetInstanceInfoText(self.instance), False)
11050 # Step 4: dbrd minors and drbd setups changes
11051 # after this, we must manually remove the drbd minors on both the
11052 # error and the success paths
11053 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11054 minors = self.cfg.AllocateDRBDMinor([self.new_node
11055 for dev in self.instance.disks],
11056 self.instance.name)
11057 logging.debug("Allocated minors %r", minors)
11060 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11061 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11062 (self.new_node, idx))
11063 # create new devices on new_node; note that we create two IDs:
11064 # one without port, so the drbd will be activated without
11065 # networking information on the new node at this stage, and one
11066 # with network, for the latter activation in step 4
11067 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11068 if self.instance.primary_node == o_node1:
11071 assert self.instance.primary_node == o_node2, "Three-node instance?"
11074 new_alone_id = (self.instance.primary_node, self.new_node, None,
11075 p_minor, new_minor, o_secret)
11076 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11077 p_minor, new_minor, o_secret)
11079 iv_names[idx] = (dev, dev.children, new_net_id)
11080 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11082 drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
11083 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11084 logical_id=new_alone_id,
11085 children=dev.children,
11087 params=drbd_params)
11089 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
11090 _GetInstanceInfoText(self.instance), False)
11091 except errors.GenericError:
11092 self.cfg.ReleaseDRBDMinors(self.instance.name)
11095 # We have new devices, shutdown the drbd on the old secondary
11096 for idx, dev in enumerate(self.instance.disks):
11097 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11098 self.cfg.SetDiskID(dev, self.target_node)
11099 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
11101 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11102 "node: %s" % (idx, msg),
11103 hint=("Please cleanup this device manually as"
11104 " soon as possible"))
11106 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11107 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11108 self.instance.disks)[pnode]
11110 msg = result.fail_msg
11112 # detaches didn't succeed (unlikely)
11113 self.cfg.ReleaseDRBDMinors(self.instance.name)
11114 raise errors.OpExecError("Can't detach the disks from the network on"
11115 " old node: %s" % (msg,))
11117 # if we managed to detach at least one, we update all the disks of
11118 # the instance to point to the new secondary
11119 self.lu.LogInfo("Updating instance configuration")
11120 for dev, _, new_logical_id in iv_names.itervalues():
11121 dev.logical_id = new_logical_id
11122 self.cfg.SetDiskID(dev, self.instance.primary_node)
11124 self.cfg.Update(self.instance, feedback_fn)
11126 # Release all node locks (the configuration has been updated)
11127 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11129 # and now perform the drbd attach
11130 self.lu.LogInfo("Attaching primary drbds to new secondary"
11131 " (standalone => connected)")
11132 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11134 self.node_secondary_ip,
11135 self.instance.disks,
11136 self.instance.name,
11138 for to_node, to_result in result.items():
11139 msg = to_result.fail_msg
11141 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11143 hint=("please do a gnt-instance info to see the"
11144 " status of disks"))
11146 cstep = itertools.count(5)
11148 if self.early_release:
11149 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11150 self._RemoveOldStorage(self.target_node, iv_names)
11151 # TODO: Check if releasing locks early still makes sense
11152 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11154 # Release all resource locks except those used by the instance
11155 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11156 keep=self.node_secondary_ip.keys())
11158 # TODO: Can the instance lock be downgraded here? Take the optional disk
11159 # shutdown in the caller into consideration.
11162 # This can fail as the old devices are degraded and _WaitForSync
11163 # does a combined result over all disks, so we don't check its return value
11164 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11165 _WaitForSync(self.lu, self.instance)
11167 # Check all devices manually
11168 self._CheckDevices(self.instance.primary_node, iv_names)
11170 # Step: remove old storage
11171 if not self.early_release:
11172 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11173 self._RemoveOldStorage(self.target_node, iv_names)
11176 class LURepairNodeStorage(NoHooksLU):
11177 """Repairs the volume group on a node.
11182 def CheckArguments(self):
11183 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11185 storage_type = self.op.storage_type
11187 if (constants.SO_FIX_CONSISTENCY not in
11188 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11189 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11190 " repaired" % storage_type,
11191 errors.ECODE_INVAL)
11193 def ExpandNames(self):
11194 self.needed_locks = {
11195 locking.LEVEL_NODE: [self.op.node_name],
11198 def _CheckFaultyDisks(self, instance, node_name):
11199 """Ensure faulty disks abort the opcode or at least warn."""
11201 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11203 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11204 " node '%s'" % (instance.name, node_name),
11205 errors.ECODE_STATE)
11206 except errors.OpPrereqError, err:
11207 if self.op.ignore_consistency:
11208 self.proc.LogWarning(str(err.args[0]))
11212 def CheckPrereq(self):
11213 """Check prerequisites.
11216 # Check whether any instance on this node has faulty disks
11217 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11218 if inst.admin_state != constants.ADMINST_UP:
11220 check_nodes = set(inst.all_nodes)
11221 check_nodes.discard(self.op.node_name)
11222 for inst_node_name in check_nodes:
11223 self._CheckFaultyDisks(inst, inst_node_name)
11225 def Exec(self, feedback_fn):
11226 feedback_fn("Repairing storage unit '%s' on %s ..." %
11227 (self.op.name, self.op.node_name))
11229 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11230 result = self.rpc.call_storage_execute(self.op.node_name,
11231 self.op.storage_type, st_args,
11233 constants.SO_FIX_CONSISTENCY)
11234 result.Raise("Failed to repair storage unit '%s' on %s" %
11235 (self.op.name, self.op.node_name))
11238 class LUNodeEvacuate(NoHooksLU):
11239 """Evacuates instances off a list of nodes.
11244 _MODE2IALLOCATOR = {
11245 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11246 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11247 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11249 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11250 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11251 constants.IALLOCATOR_NEVAC_MODES)
11253 def CheckArguments(self):
11254 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11256 def ExpandNames(self):
11257 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11259 if self.op.remote_node is not None:
11260 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11261 assert self.op.remote_node
11263 if self.op.remote_node == self.op.node_name:
11264 raise errors.OpPrereqError("Can not use evacuated node as a new"
11265 " secondary node", errors.ECODE_INVAL)
11267 if self.op.mode != constants.NODE_EVAC_SEC:
11268 raise errors.OpPrereqError("Without the use of an iallocator only"
11269 " secondary instances can be evacuated",
11270 errors.ECODE_INVAL)
11273 self.share_locks = _ShareAll()
11274 self.needed_locks = {
11275 locking.LEVEL_INSTANCE: [],
11276 locking.LEVEL_NODEGROUP: [],
11277 locking.LEVEL_NODE: [],
11280 # Determine nodes (via group) optimistically, needs verification once locks
11281 # have been acquired
11282 self.lock_nodes = self._DetermineNodes()
11284 def _DetermineNodes(self):
11285 """Gets the list of nodes to operate on.
11288 if self.op.remote_node is None:
11289 # Iallocator will choose any node(s) in the same group
11290 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11292 group_nodes = frozenset([self.op.remote_node])
11294 # Determine nodes to be locked
11295 return set([self.op.node_name]) | group_nodes
11297 def _DetermineInstances(self):
11298 """Builds list of instances to operate on.
11301 assert self.op.mode in constants.NODE_EVAC_MODES
11303 if self.op.mode == constants.NODE_EVAC_PRI:
11304 # Primary instances only
11305 inst_fn = _GetNodePrimaryInstances
11306 assert self.op.remote_node is None, \
11307 "Evacuating primary instances requires iallocator"
11308 elif self.op.mode == constants.NODE_EVAC_SEC:
11309 # Secondary instances only
11310 inst_fn = _GetNodeSecondaryInstances
11313 assert self.op.mode == constants.NODE_EVAC_ALL
11314 inst_fn = _GetNodeInstances
11315 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11317 raise errors.OpPrereqError("Due to an issue with the iallocator"
11318 " interface it is not possible to evacuate"
11319 " all instances at once; specify explicitly"
11320 " whether to evacuate primary or secondary"
11322 errors.ECODE_INVAL)
11324 return inst_fn(self.cfg, self.op.node_name)
11326 def DeclareLocks(self, level):
11327 if level == locking.LEVEL_INSTANCE:
11328 # Lock instances optimistically, needs verification once node and group
11329 # locks have been acquired
11330 self.needed_locks[locking.LEVEL_INSTANCE] = \
11331 set(i.name for i in self._DetermineInstances())
11333 elif level == locking.LEVEL_NODEGROUP:
11334 # Lock node groups for all potential target nodes optimistically, needs
11335 # verification once nodes have been acquired
11336 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11337 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11339 elif level == locking.LEVEL_NODE:
11340 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11342 def CheckPrereq(self):
11344 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11345 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11346 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11348 need_nodes = self._DetermineNodes()
11350 if not owned_nodes.issuperset(need_nodes):
11351 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11352 " locks were acquired, current nodes are"
11353 " are '%s', used to be '%s'; retry the"
11355 (self.op.node_name,
11356 utils.CommaJoin(need_nodes),
11357 utils.CommaJoin(owned_nodes)),
11358 errors.ECODE_STATE)
11360 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11361 if owned_groups != wanted_groups:
11362 raise errors.OpExecError("Node groups changed since locks were acquired,"
11363 " current groups are '%s', used to be '%s';"
11364 " retry the operation" %
11365 (utils.CommaJoin(wanted_groups),
11366 utils.CommaJoin(owned_groups)))
11368 # Determine affected instances
11369 self.instances = self._DetermineInstances()
11370 self.instance_names = [i.name for i in self.instances]
11372 if set(self.instance_names) != owned_instances:
11373 raise errors.OpExecError("Instances on node '%s' changed since locks"
11374 " were acquired, current instances are '%s',"
11375 " used to be '%s'; retry the operation" %
11376 (self.op.node_name,
11377 utils.CommaJoin(self.instance_names),
11378 utils.CommaJoin(owned_instances)))
11380 if self.instance_names:
11381 self.LogInfo("Evacuating instances from node '%s': %s",
11383 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11385 self.LogInfo("No instances to evacuate from node '%s'",
11388 if self.op.remote_node is not None:
11389 for i in self.instances:
11390 if i.primary_node == self.op.remote_node:
11391 raise errors.OpPrereqError("Node %s is the primary node of"
11392 " instance %s, cannot use it as"
11394 (self.op.remote_node, i.name),
11395 errors.ECODE_INVAL)
11397 def Exec(self, feedback_fn):
11398 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11400 if not self.instance_names:
11401 # No instances to evacuate
11404 elif self.op.iallocator is not None:
11405 # TODO: Implement relocation to other group
11406 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11407 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11408 instances=list(self.instance_names))
11410 ial.Run(self.op.iallocator)
11412 if not ial.success:
11413 raise errors.OpPrereqError("Can't compute node evacuation using"
11414 " iallocator '%s': %s" %
11415 (self.op.iallocator, ial.info),
11416 errors.ECODE_NORES)
11418 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11420 elif self.op.remote_node is not None:
11421 assert self.op.mode == constants.NODE_EVAC_SEC
11423 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11424 remote_node=self.op.remote_node,
11426 mode=constants.REPLACE_DISK_CHG,
11427 early_release=self.op.early_release)]
11428 for instance_name in self.instance_names
11432 raise errors.ProgrammerError("No iallocator or remote node")
11434 return ResultWithJobs(jobs)
11437 def _SetOpEarlyRelease(early_release, op):
11438 """Sets C{early_release} flag on opcodes if available.
11442 op.early_release = early_release
11443 except AttributeError:
11444 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11449 def _NodeEvacDest(use_nodes, group, nodes):
11450 """Returns group or nodes depending on caller's choice.
11454 return utils.CommaJoin(nodes)
11459 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11460 """Unpacks the result of change-group and node-evacuate iallocator requests.
11462 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11463 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11465 @type lu: L{LogicalUnit}
11466 @param lu: Logical unit instance
11467 @type alloc_result: tuple/list
11468 @param alloc_result: Result from iallocator
11469 @type early_release: bool
11470 @param early_release: Whether to release locks early if possible
11471 @type use_nodes: bool
11472 @param use_nodes: Whether to display node names instead of groups
11475 (moved, failed, jobs) = alloc_result
11478 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11479 for (name, reason) in failed)
11480 lu.LogWarning("Unable to evacuate instances %s", failreason)
11481 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11484 lu.LogInfo("Instances to be moved: %s",
11485 utils.CommaJoin("%s (to %s)" %
11486 (name, _NodeEvacDest(use_nodes, group, nodes))
11487 for (name, group, nodes) in moved))
11489 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11490 map(opcodes.OpCode.LoadOpCode, ops))
11494 class LUInstanceGrowDisk(LogicalUnit):
11495 """Grow a disk of an instance.
11498 HPATH = "disk-grow"
11499 HTYPE = constants.HTYPE_INSTANCE
11502 def ExpandNames(self):
11503 self._ExpandAndLockInstance()
11504 self.needed_locks[locking.LEVEL_NODE] = []
11505 self.needed_locks[locking.LEVEL_NODE_RES] = []
11506 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11507 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11509 def DeclareLocks(self, level):
11510 if level == locking.LEVEL_NODE:
11511 self._LockInstancesNodes()
11512 elif level == locking.LEVEL_NODE_RES:
11514 self.needed_locks[locking.LEVEL_NODE_RES] = \
11515 self.needed_locks[locking.LEVEL_NODE][:]
11517 def BuildHooksEnv(self):
11518 """Build hooks env.
11520 This runs on the master, the primary and all the secondaries.
11524 "DISK": self.op.disk,
11525 "AMOUNT": self.op.amount,
11527 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11530 def BuildHooksNodes(self):
11531 """Build hooks nodes.
11534 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11537 def CheckPrereq(self):
11538 """Check prerequisites.
11540 This checks that the instance is in the cluster.
11543 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11544 assert instance is not None, \
11545 "Cannot retrieve locked instance %s" % self.op.instance_name
11546 nodenames = list(instance.all_nodes)
11547 for node in nodenames:
11548 _CheckNodeOnline(self, node)
11550 self.instance = instance
11552 if instance.disk_template not in constants.DTS_GROWABLE:
11553 raise errors.OpPrereqError("Instance's disk layout does not support"
11554 " growing", errors.ECODE_INVAL)
11556 self.disk = instance.FindDisk(self.op.disk)
11558 if instance.disk_template not in (constants.DT_FILE,
11559 constants.DT_SHARED_FILE,
11561 # TODO: check the free disk space for file, when that feature will be
11563 _CheckNodesFreeDiskPerVG(self, nodenames,
11564 self.disk.ComputeGrowth(self.op.amount))
11566 def Exec(self, feedback_fn):
11567 """Execute disk grow.
11570 instance = self.instance
11573 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11574 assert (self.owned_locks(locking.LEVEL_NODE) ==
11575 self.owned_locks(locking.LEVEL_NODE_RES))
11577 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11579 raise errors.OpExecError("Cannot activate block device to grow")
11581 feedback_fn("Growing disk %s of instance '%s' by %s" %
11582 (self.op.disk, instance.name,
11583 utils.FormatUnit(self.op.amount, "h")))
11585 # First run all grow ops in dry-run mode
11586 for node in instance.all_nodes:
11587 self.cfg.SetDiskID(disk, node)
11588 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11589 result.Raise("Grow request failed to node %s" % node)
11591 # We know that (as far as we can test) operations across different
11592 # nodes will succeed, time to run it for real
11593 for node in instance.all_nodes:
11594 self.cfg.SetDiskID(disk, node)
11595 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11596 result.Raise("Grow request failed to node %s" % node)
11598 # TODO: Rewrite code to work properly
11599 # DRBD goes into sync mode for a short amount of time after executing the
11600 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11601 # calling "resize" in sync mode fails. Sleeping for a short amount of
11602 # time is a work-around.
11605 disk.RecordGrow(self.op.amount)
11606 self.cfg.Update(instance, feedback_fn)
11608 # Changes have been recorded, release node lock
11609 _ReleaseLocks(self, locking.LEVEL_NODE)
11611 # Downgrade lock while waiting for sync
11612 self.glm.downgrade(locking.LEVEL_INSTANCE)
11614 if self.op.wait_for_sync:
11615 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11617 self.proc.LogWarning("Disk sync-ing has not returned a good"
11618 " status; please check the instance")
11619 if instance.admin_state != constants.ADMINST_UP:
11620 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11621 elif instance.admin_state != constants.ADMINST_UP:
11622 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11623 " not supposed to be running because no wait for"
11624 " sync mode was requested")
11626 assert self.owned_locks(locking.LEVEL_NODE_RES)
11627 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11630 class LUInstanceQueryData(NoHooksLU):
11631 """Query runtime instance data.
11636 def ExpandNames(self):
11637 self.needed_locks = {}
11639 # Use locking if requested or when non-static information is wanted
11640 if not (self.op.static or self.op.use_locking):
11641 self.LogWarning("Non-static data requested, locks need to be acquired")
11642 self.op.use_locking = True
11644 if self.op.instances or not self.op.use_locking:
11645 # Expand instance names right here
11646 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11648 # Will use acquired locks
11649 self.wanted_names = None
11651 if self.op.use_locking:
11652 self.share_locks = _ShareAll()
11654 if self.wanted_names is None:
11655 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11657 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11659 self.needed_locks[locking.LEVEL_NODE] = []
11660 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11662 def DeclareLocks(self, level):
11663 if self.op.use_locking and level == locking.LEVEL_NODE:
11664 self._LockInstancesNodes()
11666 def CheckPrereq(self):
11667 """Check prerequisites.
11669 This only checks the optional instance list against the existing names.
11672 if self.wanted_names is None:
11673 assert self.op.use_locking, "Locking was not used"
11674 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11676 self.wanted_instances = \
11677 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11679 def _ComputeBlockdevStatus(self, node, instance_name, dev):
11680 """Returns the status of a block device
11683 if self.op.static or not node:
11686 self.cfg.SetDiskID(dev, node)
11688 result = self.rpc.call_blockdev_find(node, dev)
11692 result.Raise("Can't compute disk status for %s" % instance_name)
11694 status = result.payload
11698 return (status.dev_path, status.major, status.minor,
11699 status.sync_percent, status.estimated_time,
11700 status.is_degraded, status.ldisk_status)
11702 def _ComputeDiskStatus(self, instance, snode, dev):
11703 """Compute block device status.
11706 if dev.dev_type in constants.LDS_DRBD:
11707 # we change the snode then (otherwise we use the one passed in)
11708 if dev.logical_id[0] == instance.primary_node:
11709 snode = dev.logical_id[1]
11711 snode = dev.logical_id[0]
11713 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11714 instance.name, dev)
11715 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11718 dev_children = map(compat.partial(self._ComputeDiskStatus,
11725 "iv_name": dev.iv_name,
11726 "dev_type": dev.dev_type,
11727 "logical_id": dev.logical_id,
11728 "physical_id": dev.physical_id,
11729 "pstatus": dev_pstatus,
11730 "sstatus": dev_sstatus,
11731 "children": dev_children,
11736 def Exec(self, feedback_fn):
11737 """Gather and return data"""
11740 cluster = self.cfg.GetClusterInfo()
11742 pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11743 for i in self.wanted_instances)
11744 for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11745 if self.op.static or pnode.offline:
11746 remote_state = None
11748 self.LogWarning("Primary node %s is marked offline, returning static"
11749 " information only for instance %s" %
11750 (pnode.name, instance.name))
11752 remote_info = self.rpc.call_instance_info(instance.primary_node,
11754 instance.hypervisor)
11755 remote_info.Raise("Error checking node %s" % instance.primary_node)
11756 remote_info = remote_info.payload
11757 if remote_info and "state" in remote_info:
11758 remote_state = "up"
11760 if instance.admin_state == constants.ADMINST_UP:
11761 remote_state = "down"
11763 remote_state = instance.admin_state
11765 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11768 result[instance.name] = {
11769 "name": instance.name,
11770 "config_state": instance.admin_state,
11771 "run_state": remote_state,
11772 "pnode": instance.primary_node,
11773 "snodes": instance.secondary_nodes,
11775 # this happens to be the same format used for hooks
11776 "nics": _NICListToTuple(self, instance.nics),
11777 "disk_template": instance.disk_template,
11779 "hypervisor": instance.hypervisor,
11780 "network_port": instance.network_port,
11781 "hv_instance": instance.hvparams,
11782 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11783 "be_instance": instance.beparams,
11784 "be_actual": cluster.FillBE(instance),
11785 "os_instance": instance.osparams,
11786 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11787 "serial_no": instance.serial_no,
11788 "mtime": instance.mtime,
11789 "ctime": instance.ctime,
11790 "uuid": instance.uuid,
11796 def PrepareContainerMods(mods, private_fn):
11797 """Prepares a list of container modifications by adding a private data field.
11799 @type mods: list of tuples; (operation, index, parameters)
11800 @param mods: List of modifications
11801 @type private_fn: callable or None
11802 @param private_fn: Callable for constructing a private data field for a
11807 if private_fn is None:
11812 return [(op, idx, params, fn()) for (op, idx, params) in mods]
11815 #: Type description for changes as returned by L{ApplyContainerMods}'s
11817 _TApplyContModsCbChanges = \
11818 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
11819 ht.TNonEmptyString,
11824 def ApplyContainerMods(kind, container, chgdesc, mods,
11825 create_fn, modify_fn, remove_fn):
11826 """Applies descriptions in C{mods} to C{container}.
11829 @param kind: One-word item description
11830 @type container: list
11831 @param container: Container to modify
11832 @type chgdesc: None or list
11833 @param chgdesc: List of applied changes
11835 @param mods: Modifications as returned by L{PrepareContainerMods}
11836 @type create_fn: callable
11837 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
11838 receives absolute item index, parameters and private data object as added
11839 by L{PrepareContainerMods}, returns tuple containing new item and changes
11841 @type modify_fn: callable
11842 @param modify_fn: Callback for modifying an existing item
11843 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
11844 and private data object as added by L{PrepareContainerMods}, returns
11846 @type remove_fn: callable
11847 @param remove_fn: Callback on removing item; receives absolute item index,
11848 item and private data object as added by L{PrepareContainerMods}
11851 for (op, idx, params, private) in mods:
11854 absidx = len(container) - 1
11856 raise IndexError("Not accepting negative indices other than -1")
11857 elif idx > len(container):
11858 raise IndexError("Got %s index %s, but there are only %s" %
11859 (kind, idx, len(container)))
11865 if op == constants.DDM_ADD:
11866 # Calculate where item will be added
11868 addidx = len(container)
11872 if create_fn is None:
11875 (item, changes) = create_fn(addidx, params, private)
11878 container.append(item)
11881 assert idx <= len(container)
11882 # list.insert does so before the specified index
11883 container.insert(idx, item)
11885 # Retrieve existing item
11887 item = container[absidx]
11889 raise IndexError("Invalid %s index %s" % (kind, idx))
11891 if op == constants.DDM_REMOVE:
11894 if remove_fn is not None:
11895 remove_fn(absidx, item, private)
11897 changes = [("%s/%s" % (kind, absidx), "remove")]
11899 assert container[absidx] == item
11900 del container[absidx]
11901 elif op == constants.DDM_MODIFY:
11902 if modify_fn is not None:
11903 changes = modify_fn(absidx, item, params, private)
11905 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
11907 assert _TApplyContModsCbChanges(changes)
11909 if not (chgdesc is None or changes is None):
11910 chgdesc.extend(changes)
11913 def _UpdateIvNames(base_index, disks):
11914 """Updates the C{iv_name} attribute of disks.
11916 @type disks: list of L{objects.Disk}
11919 for (idx, disk) in enumerate(disks):
11920 disk.iv_name = "disk/%s" % (base_index + idx, )
11923 class _InstNicModPrivate:
11924 """Data structure for network interface modifications.
11926 Used by L{LUInstanceSetParams}.
11929 def __init__(self):
11934 class LUInstanceSetParams(LogicalUnit):
11935 """Modifies an instances's parameters.
11938 HPATH = "instance-modify"
11939 HTYPE = constants.HTYPE_INSTANCE
11943 def _UpgradeDiskNicMods(kind, mods, verify_fn):
11944 assert ht.TList(mods)
11945 assert not mods or len(mods[0]) in (2, 3)
11947 if mods and len(mods[0]) == 2:
11951 for op, params in mods:
11952 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
11953 result.append((op, -1, params))
11957 raise errors.OpPrereqError("Only one %s add or remove operation is"
11958 " supported at a time" % kind,
11959 errors.ECODE_INVAL)
11961 result.append((constants.DDM_MODIFY, op, params))
11963 assert verify_fn(result)
11970 def _CheckMods(kind, mods, key_types, item_fn):
11971 """Ensures requested disk/NIC modifications are valid.
11974 for (op, _, params) in mods:
11975 assert ht.TDict(params)
11977 utils.ForceDictType(params, key_types)
11979 if op == constants.DDM_REMOVE:
11981 raise errors.OpPrereqError("No settings should be passed when"
11982 " removing a %s" % kind,
11983 errors.ECODE_INVAL)
11984 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
11985 item_fn(op, params)
11987 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
11990 def _VerifyDiskModification(op, params):
11991 """Verifies a disk modification.
11994 if op == constants.DDM_ADD:
11995 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11996 if mode not in constants.DISK_ACCESS_SET:
11997 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11998 errors.ECODE_INVAL)
12000 size = params.get(constants.IDISK_SIZE, None)
12002 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12003 constants.IDISK_SIZE, errors.ECODE_INVAL)
12007 except (TypeError, ValueError), err:
12008 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12009 errors.ECODE_INVAL)
12011 params[constants.IDISK_SIZE] = size
12013 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12014 raise errors.OpPrereqError("Disk size change not possible, use"
12015 " grow-disk", errors.ECODE_INVAL)
12018 def _VerifyNicModification(op, params):
12019 """Verifies a network interface modification.
12022 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12023 ip = params.get(constants.INIC_IP, None)
12026 elif ip.lower() == constants.VALUE_NONE:
12027 params[constants.INIC_IP] = None
12028 elif not netutils.IPAddress.IsValid(ip):
12029 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12030 errors.ECODE_INVAL)
12032 bridge = params.get("bridge", None)
12033 link = params.get(constants.INIC_LINK, None)
12034 if bridge and link:
12035 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12036 " at the same time", errors.ECODE_INVAL)
12037 elif bridge and bridge.lower() == constants.VALUE_NONE:
12038 params["bridge"] = None
12039 elif link and link.lower() == constants.VALUE_NONE:
12040 params[constants.INIC_LINK] = None
12042 if op == constants.DDM_ADD:
12043 macaddr = params.get(constants.INIC_MAC, None)
12044 if macaddr is None:
12045 params[constants.INIC_MAC] = constants.VALUE_AUTO
12047 if constants.INIC_MAC in params:
12048 macaddr = params[constants.INIC_MAC]
12049 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12050 macaddr = utils.NormalizeAndValidateMac(macaddr)
12052 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12053 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12054 " modifying an existing NIC",
12055 errors.ECODE_INVAL)
12057 def CheckArguments(self):
12058 if not (self.op.nics or self.op.disks or self.op.disk_template or
12059 self.op.hvparams or self.op.beparams or self.op.os_name or
12060 self.op.offline is not None or self.op.runtime_mem):
12061 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12063 if self.op.hvparams:
12064 _CheckGlobalHvParams(self.op.hvparams)
12067 self._UpgradeDiskNicMods("disk", self.op.disks,
12068 opcodes.OpInstanceSetParams.TestDiskModifications)
12070 self._UpgradeDiskNicMods("NIC", self.op.nics,
12071 opcodes.OpInstanceSetParams.TestNicModifications)
12073 # Check disk modifications
12074 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12075 self._VerifyDiskModification)
12077 if self.op.disks and self.op.disk_template is not None:
12078 raise errors.OpPrereqError("Disk template conversion and other disk"
12079 " changes not supported at the same time",
12080 errors.ECODE_INVAL)
12082 if (self.op.disk_template and
12083 self.op.disk_template in constants.DTS_INT_MIRROR and
12084 self.op.remote_node is None):
12085 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12086 " one requires specifying a secondary node",
12087 errors.ECODE_INVAL)
12089 # Check NIC modifications
12090 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12091 self._VerifyNicModification)
12093 def ExpandNames(self):
12094 self._ExpandAndLockInstance()
12095 # Can't even acquire node locks in shared mode as upcoming changes in
12096 # Ganeti 2.6 will start to modify the node object on disk conversion
12097 self.needed_locks[locking.LEVEL_NODE] = []
12098 self.needed_locks[locking.LEVEL_NODE_RES] = []
12099 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12101 def DeclareLocks(self, level):
12102 # TODO: Acquire group lock in shared mode (disk parameters)
12103 if level == locking.LEVEL_NODE:
12104 self._LockInstancesNodes()
12105 if self.op.disk_template and self.op.remote_node:
12106 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12107 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12108 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12110 self.needed_locks[locking.LEVEL_NODE_RES] = \
12111 self.needed_locks[locking.LEVEL_NODE][:]
12113 def BuildHooksEnv(self):
12114 """Build hooks env.
12116 This runs on the master, primary and secondaries.
12120 if constants.BE_MINMEM in self.be_new:
12121 args["minmem"] = self.be_new[constants.BE_MINMEM]
12122 if constants.BE_MAXMEM in self.be_new:
12123 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12124 if constants.BE_VCPUS in self.be_new:
12125 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12126 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12127 # information at all.
12129 if self._new_nics is not None:
12132 for nic in self._new_nics:
12133 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12134 mode = nicparams[constants.NIC_MODE]
12135 link = nicparams[constants.NIC_LINK]
12136 nics.append((nic.ip, nic.mac, mode, link))
12138 args["nics"] = nics
12140 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12141 if self.op.disk_template:
12142 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12143 if self.op.runtime_mem:
12144 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12148 def BuildHooksNodes(self):
12149 """Build hooks nodes.
12152 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12155 def _PrepareNicModification(self, params, private, old_ip, old_params,
12157 update_params_dict = dict([(key, params[key])
12158 for key in constants.NICS_PARAMETERS
12161 if "bridge" in params:
12162 update_params_dict[constants.NIC_LINK] = params["bridge"]
12164 new_params = _GetUpdatedParams(old_params, update_params_dict)
12165 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12167 new_filled_params = cluster.SimpleFillNIC(new_params)
12168 objects.NIC.CheckParameterSyntax(new_filled_params)
12170 new_mode = new_filled_params[constants.NIC_MODE]
12171 if new_mode == constants.NIC_MODE_BRIDGED:
12172 bridge = new_filled_params[constants.NIC_LINK]
12173 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12175 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12177 self.warn.append(msg)
12179 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12181 elif new_mode == constants.NIC_MODE_ROUTED:
12182 ip = params.get(constants.INIC_IP, old_ip)
12184 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12185 " on a routed NIC", errors.ECODE_INVAL)
12187 if constants.INIC_MAC in params:
12188 mac = params[constants.INIC_MAC]
12190 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12191 errors.ECODE_INVAL)
12192 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12193 # otherwise generate the MAC address
12194 params[constants.INIC_MAC] = \
12195 self.cfg.GenerateMAC(self.proc.GetECId())
12197 # or validate/reserve the current one
12199 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12200 except errors.ReservationError:
12201 raise errors.OpPrereqError("MAC address '%s' already in use"
12202 " in cluster" % mac,
12203 errors.ECODE_NOTUNIQUE)
12205 private.params = new_params
12206 private.filled = new_filled_params
12208 return (None, None)
12210 def CheckPrereq(self):
12211 """Check prerequisites.
12213 This only checks the instance list against the existing names.
12216 # checking the new params on the primary/secondary nodes
12218 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12219 cluster = self.cluster = self.cfg.GetClusterInfo()
12220 assert self.instance is not None, \
12221 "Cannot retrieve locked instance %s" % self.op.instance_name
12222 pnode = instance.primary_node
12223 nodelist = list(instance.all_nodes)
12224 pnode_info = self.cfg.GetNodeInfo(pnode)
12225 self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
12227 # Prepare disk/NIC modifications
12228 self.diskmod = PrepareContainerMods(self.op.disks, None)
12229 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12232 if self.op.os_name and not self.op.force:
12233 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12234 self.op.force_variant)
12235 instance_os = self.op.os_name
12237 instance_os = instance.os
12239 assert not (self.op.disk_template and self.op.disks), \
12240 "Can't modify disk template and apply disk changes at the same time"
12242 if self.op.disk_template:
12243 if instance.disk_template == self.op.disk_template:
12244 raise errors.OpPrereqError("Instance already has disk template %s" %
12245 instance.disk_template, errors.ECODE_INVAL)
12247 if (instance.disk_template,
12248 self.op.disk_template) not in self._DISK_CONVERSIONS:
12249 raise errors.OpPrereqError("Unsupported disk template conversion from"
12250 " %s to %s" % (instance.disk_template,
12251 self.op.disk_template),
12252 errors.ECODE_INVAL)
12253 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12254 msg="cannot change disk template")
12255 if self.op.disk_template in constants.DTS_INT_MIRROR:
12256 if self.op.remote_node == pnode:
12257 raise errors.OpPrereqError("Given new secondary node %s is the same"
12258 " as the primary node of the instance" %
12259 self.op.remote_node, errors.ECODE_STATE)
12260 _CheckNodeOnline(self, self.op.remote_node)
12261 _CheckNodeNotDrained(self, self.op.remote_node)
12262 # FIXME: here we assume that the old instance type is DT_PLAIN
12263 assert instance.disk_template == constants.DT_PLAIN
12264 disks = [{constants.IDISK_SIZE: d.size,
12265 constants.IDISK_VG: d.logical_id[0]}
12266 for d in instance.disks]
12267 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12268 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12270 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12271 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12272 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12273 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12274 ignore=self.op.ignore_ipolicy)
12275 if pnode_info.group != snode_info.group:
12276 self.LogWarning("The primary and secondary nodes are in two"
12277 " different node groups; the disk parameters"
12278 " from the first disk's node group will be"
12281 # hvparams processing
12282 if self.op.hvparams:
12283 hv_type = instance.hypervisor
12284 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12285 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12286 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12289 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12290 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12291 self.hv_proposed = self.hv_new = hv_new # the new actual values
12292 self.hv_inst = i_hvdict # the new dict (without defaults)
12294 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12296 self.hv_new = self.hv_inst = {}
12298 # beparams processing
12299 if self.op.beparams:
12300 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12302 objects.UpgradeBeParams(i_bedict)
12303 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12304 be_new = cluster.SimpleFillBE(i_bedict)
12305 self.be_proposed = self.be_new = be_new # the new actual values
12306 self.be_inst = i_bedict # the new dict (without defaults)
12308 self.be_new = self.be_inst = {}
12309 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12310 be_old = cluster.FillBE(instance)
12312 # CPU param validation -- checking every time a paramtere is
12313 # changed to cover all cases where either CPU mask or vcpus have
12315 if (constants.BE_VCPUS in self.be_proposed and
12316 constants.HV_CPU_MASK in self.hv_proposed):
12318 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12319 # Verify mask is consistent with number of vCPUs. Can skip this
12320 # test if only 1 entry in the CPU mask, which means same mask
12321 # is applied to all vCPUs.
12322 if (len(cpu_list) > 1 and
12323 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12324 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12326 (self.be_proposed[constants.BE_VCPUS],
12327 self.hv_proposed[constants.HV_CPU_MASK]),
12328 errors.ECODE_INVAL)
12330 # Only perform this test if a new CPU mask is given
12331 if constants.HV_CPU_MASK in self.hv_new:
12332 # Calculate the largest CPU number requested
12333 max_requested_cpu = max(map(max, cpu_list))
12334 # Check that all of the instance's nodes have enough physical CPUs to
12335 # satisfy the requested CPU mask
12336 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12337 max_requested_cpu + 1, instance.hypervisor)
12339 # osparams processing
12340 if self.op.osparams:
12341 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12342 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12343 self.os_inst = i_osdict # the new dict (without defaults)
12349 #TODO(dynmem): do the appropriate check involving MINMEM
12350 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12351 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12352 mem_check_list = [pnode]
12353 if be_new[constants.BE_AUTO_BALANCE]:
12354 # either we changed auto_balance to yes or it was from before
12355 mem_check_list.extend(instance.secondary_nodes)
12356 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12357 instance.hypervisor)
12358 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12359 [instance.hypervisor])
12360 pninfo = nodeinfo[pnode]
12361 msg = pninfo.fail_msg
12363 # Assume the primary node is unreachable and go ahead
12364 self.warn.append("Can't get info from primary node %s: %s" %
12367 (_, _, (pnhvinfo, )) = pninfo.payload
12368 if not isinstance(pnhvinfo.get("memory_free", None), int):
12369 self.warn.append("Node data from primary node %s doesn't contain"
12370 " free memory information" % pnode)
12371 elif instance_info.fail_msg:
12372 self.warn.append("Can't get instance runtime information: %s" %
12373 instance_info.fail_msg)
12375 if instance_info.payload:
12376 current_mem = int(instance_info.payload["memory"])
12378 # Assume instance not running
12379 # (there is a slight race condition here, but it's not very
12380 # probable, and we have no other way to check)
12381 # TODO: Describe race condition
12383 #TODO(dynmem): do the appropriate check involving MINMEM
12384 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12385 pnhvinfo["memory_free"])
12387 raise errors.OpPrereqError("This change will prevent the instance"
12388 " from starting, due to %d MB of memory"
12389 " missing on its primary node" %
12391 errors.ECODE_NORES)
12393 if be_new[constants.BE_AUTO_BALANCE]:
12394 for node, nres in nodeinfo.items():
12395 if node not in instance.secondary_nodes:
12397 nres.Raise("Can't get info from secondary node %s" % node,
12398 prereq=True, ecode=errors.ECODE_STATE)
12399 (_, _, (nhvinfo, )) = nres.payload
12400 if not isinstance(nhvinfo.get("memory_free", None), int):
12401 raise errors.OpPrereqError("Secondary node %s didn't return free"
12402 " memory information" % node,
12403 errors.ECODE_STATE)
12404 #TODO(dynmem): do the appropriate check involving MINMEM
12405 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12406 raise errors.OpPrereqError("This change will prevent the instance"
12407 " from failover to its secondary node"
12408 " %s, due to not enough memory" % node,
12409 errors.ECODE_STATE)
12411 if self.op.runtime_mem:
12412 remote_info = self.rpc.call_instance_info(instance.primary_node,
12414 instance.hypervisor)
12415 remote_info.Raise("Error checking node %s" % instance.primary_node)
12416 if not remote_info.payload: # not running already
12417 raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12418 errors.ECODE_STATE)
12420 current_memory = remote_info.payload["memory"]
12421 if (not self.op.force and
12422 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12423 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12424 raise errors.OpPrereqError("Instance %s must have memory between %d"
12425 " and %d MB of memory unless --force is"
12426 " given" % (instance.name,
12427 self.be_proposed[constants.BE_MINMEM],
12428 self.be_proposed[constants.BE_MAXMEM]),
12429 errors.ECODE_INVAL)
12431 if self.op.runtime_mem > current_memory:
12432 _CheckNodeFreeMemory(self, instance.primary_node,
12433 "ballooning memory for instance %s" %
12435 self.op.memory - current_memory,
12436 instance.hypervisor)
12438 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12439 raise errors.OpPrereqError("Disk operations not supported for"
12440 " diskless instances",
12441 errors.ECODE_INVAL)
12443 def _PrepareNicCreate(_, params, private):
12444 return self._PrepareNicModification(params, private, None, {},
12447 def _PrepareNicMod(_, nic, params, private):
12448 return self._PrepareNicModification(params, private, nic.ip,
12449 nic.nicparams, cluster, pnode)
12451 # Verify NIC changes (operating on copy)
12452 nics = instance.nics[:]
12453 ApplyContainerMods("NIC", nics, None, self.nicmod,
12454 _PrepareNicCreate, _PrepareNicMod, None)
12455 if len(nics) > constants.MAX_NICS:
12456 raise errors.OpPrereqError("Instance has too many network interfaces"
12457 " (%d), cannot add more" % constants.MAX_NICS,
12458 errors.ECODE_STATE)
12460 # Verify disk changes (operating on a copy)
12461 disks = instance.disks[:]
12462 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12463 if len(disks) > constants.MAX_DISKS:
12464 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12465 " more" % constants.MAX_DISKS,
12466 errors.ECODE_STATE)
12468 if self.op.offline is not None:
12469 if self.op.offline:
12470 msg = "can't change to offline"
12472 msg = "can't change to online"
12473 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12475 # Pre-compute NIC changes (necessary to use result in hooks)
12476 self._nic_chgdesc = []
12478 # Operate on copies as this is still in prereq
12479 nics = [nic.Copy() for nic in instance.nics]
12480 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12481 self._CreateNewNic, self._ApplyNicMods, None)
12482 self._new_nics = nics
12484 self._new_nics = None
12486 def _ConvertPlainToDrbd(self, feedback_fn):
12487 """Converts an instance from plain to drbd.
12490 feedback_fn("Converting template to drbd")
12491 instance = self.instance
12492 pnode = instance.primary_node
12493 snode = self.op.remote_node
12495 assert instance.disk_template == constants.DT_PLAIN
12497 # create a fake disk info for _GenerateDiskTemplate
12498 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12499 constants.IDISK_VG: d.logical_id[0]}
12500 for d in instance.disks]
12501 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12502 instance.name, pnode, [snode],
12503 disk_info, None, None, 0, feedback_fn,
12505 info = _GetInstanceInfoText(instance)
12506 feedback_fn("Creating additional volumes...")
12507 # first, create the missing data and meta devices
12508 for disk in new_disks:
12509 # unfortunately this is... not too nice
12510 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12512 for child in disk.children:
12513 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12514 # at this stage, all new LVs have been created, we can rename the
12516 feedback_fn("Renaming original volumes...")
12517 rename_list = [(o, n.children[0].logical_id)
12518 for (o, n) in zip(instance.disks, new_disks)]
12519 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12520 result.Raise("Failed to rename original LVs")
12522 feedback_fn("Initializing DRBD devices...")
12523 # all child devices are in place, we can now create the DRBD devices
12524 for disk in new_disks:
12525 for node in [pnode, snode]:
12526 f_create = node == pnode
12527 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12529 # at this point, the instance has been modified
12530 instance.disk_template = constants.DT_DRBD8
12531 instance.disks = new_disks
12532 self.cfg.Update(instance, feedback_fn)
12534 # Release node locks while waiting for sync
12535 _ReleaseLocks(self, locking.LEVEL_NODE)
12537 # disks are created, waiting for sync
12538 disk_abort = not _WaitForSync(self, instance,
12539 oneshot=not self.op.wait_for_sync)
12541 raise errors.OpExecError("There are some degraded disks for"
12542 " this instance, please cleanup manually")
12544 # Node resource locks will be released by caller
12546 def _ConvertDrbdToPlain(self, feedback_fn):
12547 """Converts an instance from drbd to plain.
12550 instance = self.instance
12552 assert len(instance.secondary_nodes) == 1
12553 assert instance.disk_template == constants.DT_DRBD8
12555 pnode = instance.primary_node
12556 snode = instance.secondary_nodes[0]
12557 feedback_fn("Converting template to plain")
12559 old_disks = instance.disks
12560 new_disks = [d.children[0] for d in old_disks]
12562 # copy over size and mode
12563 for parent, child in zip(old_disks, new_disks):
12564 child.size = parent.size
12565 child.mode = parent.mode
12567 # update instance structure
12568 instance.disks = new_disks
12569 instance.disk_template = constants.DT_PLAIN
12570 self.cfg.Update(instance, feedback_fn)
12572 # Release locks in case removing disks takes a while
12573 _ReleaseLocks(self, locking.LEVEL_NODE)
12575 feedback_fn("Removing volumes on the secondary node...")
12576 for disk in old_disks:
12577 self.cfg.SetDiskID(disk, snode)
12578 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12580 self.LogWarning("Could not remove block device %s on node %s,"
12581 " continuing anyway: %s", disk.iv_name, snode, msg)
12583 feedback_fn("Removing unneeded volumes on the primary node...")
12584 for idx, disk in enumerate(old_disks):
12585 meta = disk.children[1]
12586 self.cfg.SetDiskID(meta, pnode)
12587 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12589 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12590 " continuing anyway: %s", idx, pnode, msg)
12592 # this is a DRBD disk, return its port to the pool
12593 for disk in old_disks:
12594 tcp_port = disk.logical_id[2]
12595 self.cfg.AddTcpUdpPort(tcp_port)
12597 # Node resource locks will be released by caller
12599 def _CreateNewDisk(self, idx, params, _):
12600 """Creates a new disk.
12603 instance = self.instance
12606 if instance.disk_template in constants.DTS_FILEBASED:
12607 (file_driver, file_path) = instance.disks[0].logical_id
12608 file_path = os.path.dirname(file_path)
12610 file_driver = file_path = None
12613 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12614 instance.primary_node, instance.secondary_nodes,
12615 [params], file_path, file_driver, idx,
12616 self.Log, self.diskparams)[0]
12618 info = _GetInstanceInfoText(instance)
12620 logging.info("Creating volume %s for instance %s",
12621 disk.iv_name, instance.name)
12622 # Note: this needs to be kept in sync with _CreateDisks
12624 for node in instance.all_nodes:
12625 f_create = (node == instance.primary_node)
12627 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12628 except errors.OpExecError, err:
12629 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12630 disk.iv_name, disk, node, err)
12633 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12637 def _ModifyDisk(idx, disk, params, _):
12638 """Modifies a disk.
12641 disk.mode = params[constants.IDISK_MODE]
12644 ("disk.mode/%d" % idx, disk.mode),
12647 def _RemoveDisk(self, idx, root, _):
12651 for node, disk in root.ComputeNodeTree(self.instance.primary_node):
12652 self.cfg.SetDiskID(disk, node)
12653 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12655 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12656 " continuing anyway", idx, node, msg)
12658 # if this is a DRBD disk, return its port to the pool
12659 if root.dev_type in constants.LDS_DRBD:
12660 self.cfg.AddTcpUdpPort(root.logical_id[2])
12663 def _CreateNewNic(idx, params, private):
12664 """Creates data structure for a new network interface.
12667 mac = params[constants.INIC_MAC]
12668 ip = params.get(constants.INIC_IP, None)
12669 nicparams = private.params
12671 return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12673 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12674 (mac, ip, private.filled[constants.NIC_MODE],
12675 private.filled[constants.NIC_LINK])),
12679 def _ApplyNicMods(idx, nic, params, private):
12680 """Modifies a network interface.
12685 for key in [constants.INIC_MAC, constants.INIC_IP]:
12687 changes.append(("nic.%s/%d" % (key, idx), params[key]))
12688 setattr(nic, key, params[key])
12691 nic.nicparams = private.params
12693 for (key, val) in params.items():
12694 changes.append(("nic.%s/%d" % (key, idx), val))
12698 def Exec(self, feedback_fn):
12699 """Modifies an instance.
12701 All parameters take effect only at the next restart of the instance.
12704 # Process here the warnings from CheckPrereq, as we don't have a
12705 # feedback_fn there.
12706 # TODO: Replace with self.LogWarning
12707 for warn in self.warn:
12708 feedback_fn("WARNING: %s" % warn)
12710 assert ((self.op.disk_template is None) ^
12711 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12712 "Not owning any node resource locks"
12715 instance = self.instance
12718 if self.op.runtime_mem:
12719 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12721 self.op.runtime_mem)
12722 rpcres.Raise("Cannot modify instance runtime memory")
12723 result.append(("runtime_memory", self.op.runtime_mem))
12725 # Apply disk changes
12726 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12727 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12728 _UpdateIvNames(0, instance.disks)
12730 if self.op.disk_template:
12732 check_nodes = set(instance.all_nodes)
12733 if self.op.remote_node:
12734 check_nodes.add(self.op.remote_node)
12735 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12736 owned = self.owned_locks(level)
12737 assert not (check_nodes - owned), \
12738 ("Not owning the correct locks, owning %r, expected at least %r" %
12739 (owned, check_nodes))
12741 r_shut = _ShutdownInstanceDisks(self, instance)
12743 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12744 " proceed with disk template conversion")
12745 mode = (instance.disk_template, self.op.disk_template)
12747 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12749 self.cfg.ReleaseDRBDMinors(instance.name)
12751 result.append(("disk_template", self.op.disk_template))
12753 assert instance.disk_template == self.op.disk_template, \
12754 ("Expected disk template '%s', found '%s'" %
12755 (self.op.disk_template, instance.disk_template))
12757 # Release node and resource locks if there are any (they might already have
12758 # been released during disk conversion)
12759 _ReleaseLocks(self, locking.LEVEL_NODE)
12760 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12762 # Apply NIC changes
12763 if self._new_nics is not None:
12764 instance.nics = self._new_nics
12765 result.extend(self._nic_chgdesc)
12768 if self.op.hvparams:
12769 instance.hvparams = self.hv_inst
12770 for key, val in self.op.hvparams.iteritems():
12771 result.append(("hv/%s" % key, val))
12774 if self.op.beparams:
12775 instance.beparams = self.be_inst
12776 for key, val in self.op.beparams.iteritems():
12777 result.append(("be/%s" % key, val))
12780 if self.op.os_name:
12781 instance.os = self.op.os_name
12784 if self.op.osparams:
12785 instance.osparams = self.os_inst
12786 for key, val in self.op.osparams.iteritems():
12787 result.append(("os/%s" % key, val))
12789 if self.op.offline is None:
12792 elif self.op.offline:
12793 # Mark instance as offline
12794 self.cfg.MarkInstanceOffline(instance.name)
12795 result.append(("admin_state", constants.ADMINST_OFFLINE))
12797 # Mark instance as online, but stopped
12798 self.cfg.MarkInstanceDown(instance.name)
12799 result.append(("admin_state", constants.ADMINST_DOWN))
12801 self.cfg.Update(instance, feedback_fn)
12803 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12804 self.owned_locks(locking.LEVEL_NODE)), \
12805 "All node locks should have been released by now"
12809 _DISK_CONVERSIONS = {
12810 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12811 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12815 class LUInstanceChangeGroup(LogicalUnit):
12816 HPATH = "instance-change-group"
12817 HTYPE = constants.HTYPE_INSTANCE
12820 def ExpandNames(self):
12821 self.share_locks = _ShareAll()
12822 self.needed_locks = {
12823 locking.LEVEL_NODEGROUP: [],
12824 locking.LEVEL_NODE: [],
12827 self._ExpandAndLockInstance()
12829 if self.op.target_groups:
12830 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12831 self.op.target_groups)
12833 self.req_target_uuids = None
12835 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12837 def DeclareLocks(self, level):
12838 if level == locking.LEVEL_NODEGROUP:
12839 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12841 if self.req_target_uuids:
12842 lock_groups = set(self.req_target_uuids)
12844 # Lock all groups used by instance optimistically; this requires going
12845 # via the node before it's locked, requiring verification later on
12846 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12847 lock_groups.update(instance_groups)
12849 # No target groups, need to lock all of them
12850 lock_groups = locking.ALL_SET
12852 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12854 elif level == locking.LEVEL_NODE:
12855 if self.req_target_uuids:
12856 # Lock all nodes used by instances
12857 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12858 self._LockInstancesNodes()
12860 # Lock all nodes in all potential target groups
12861 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12862 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12863 member_nodes = [node_name
12864 for group in lock_groups
12865 for node_name in self.cfg.GetNodeGroup(group).members]
12866 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12868 # Lock all nodes as all groups are potential targets
12869 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12871 def CheckPrereq(self):
12872 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12873 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12874 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12876 assert (self.req_target_uuids is None or
12877 owned_groups.issuperset(self.req_target_uuids))
12878 assert owned_instances == set([self.op.instance_name])
12880 # Get instance information
12881 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12883 # Check if node groups for locked instance are still correct
12884 assert owned_nodes.issuperset(self.instance.all_nodes), \
12885 ("Instance %s's nodes changed while we kept the lock" %
12886 self.op.instance_name)
12888 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12891 if self.req_target_uuids:
12892 # User requested specific target groups
12893 self.target_uuids = self.req_target_uuids
12895 # All groups except those used by the instance are potential targets
12896 self.target_uuids = owned_groups - inst_groups
12898 conflicting_groups = self.target_uuids & inst_groups
12899 if conflicting_groups:
12900 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12901 " used by the instance '%s'" %
12902 (utils.CommaJoin(conflicting_groups),
12903 self.op.instance_name),
12904 errors.ECODE_INVAL)
12906 if not self.target_uuids:
12907 raise errors.OpPrereqError("There are no possible target groups",
12908 errors.ECODE_INVAL)
12910 def BuildHooksEnv(self):
12911 """Build hooks env.
12914 assert self.target_uuids
12917 "TARGET_GROUPS": " ".join(self.target_uuids),
12920 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12924 def BuildHooksNodes(self):
12925 """Build hooks nodes.
12928 mn = self.cfg.GetMasterNode()
12929 return ([mn], [mn])
12931 def Exec(self, feedback_fn):
12932 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12934 assert instances == [self.op.instance_name], "Instance not locked"
12936 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12937 instances=instances, target_groups=list(self.target_uuids))
12939 ial.Run(self.op.iallocator)
12941 if not ial.success:
12942 raise errors.OpPrereqError("Can't compute solution for changing group of"
12943 " instance '%s' using iallocator '%s': %s" %
12944 (self.op.instance_name, self.op.iallocator,
12946 errors.ECODE_NORES)
12948 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12950 self.LogInfo("Iallocator returned %s job(s) for changing group of"
12951 " instance '%s'", len(jobs), self.op.instance_name)
12953 return ResultWithJobs(jobs)
12956 class LUBackupQuery(NoHooksLU):
12957 """Query the exports list
12962 def ExpandNames(self):
12963 self.needed_locks = {}
12964 self.share_locks[locking.LEVEL_NODE] = 1
12965 if not self.op.nodes:
12966 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12968 self.needed_locks[locking.LEVEL_NODE] = \
12969 _GetWantedNodes(self, self.op.nodes)
12971 def Exec(self, feedback_fn):
12972 """Compute the list of all the exported system images.
12975 @return: a dictionary with the structure node->(export-list)
12976 where export-list is a list of the instances exported on
12980 self.nodes = self.owned_locks(locking.LEVEL_NODE)
12981 rpcresult = self.rpc.call_export_list(self.nodes)
12983 for node in rpcresult:
12984 if rpcresult[node].fail_msg:
12985 result[node] = False
12987 result[node] = rpcresult[node].payload
12992 class LUBackupPrepare(NoHooksLU):
12993 """Prepares an instance for an export and returns useful information.
12998 def ExpandNames(self):
12999 self._ExpandAndLockInstance()
13001 def CheckPrereq(self):
13002 """Check prerequisites.
13005 instance_name = self.op.instance_name
13007 self.instance = self.cfg.GetInstanceInfo(instance_name)
13008 assert self.instance is not None, \
13009 "Cannot retrieve locked instance %s" % self.op.instance_name
13010 _CheckNodeOnline(self, self.instance.primary_node)
13012 self._cds = _GetClusterDomainSecret()
13014 def Exec(self, feedback_fn):
13015 """Prepares an instance for an export.
13018 instance = self.instance
13020 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13021 salt = utils.GenerateSecret(8)
13023 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13024 result = self.rpc.call_x509_cert_create(instance.primary_node,
13025 constants.RIE_CERT_VALIDITY)
13026 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13028 (name, cert_pem) = result.payload
13030 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13034 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13035 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13037 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13043 class LUBackupExport(LogicalUnit):
13044 """Export an instance to an image in the cluster.
13047 HPATH = "instance-export"
13048 HTYPE = constants.HTYPE_INSTANCE
13051 def CheckArguments(self):
13052 """Check the arguments.
13055 self.x509_key_name = self.op.x509_key_name
13056 self.dest_x509_ca_pem = self.op.destination_x509_ca
13058 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13059 if not self.x509_key_name:
13060 raise errors.OpPrereqError("Missing X509 key name for encryption",
13061 errors.ECODE_INVAL)
13063 if not self.dest_x509_ca_pem:
13064 raise errors.OpPrereqError("Missing destination X509 CA",
13065 errors.ECODE_INVAL)
13067 def ExpandNames(self):
13068 self._ExpandAndLockInstance()
13070 # Lock all nodes for local exports
13071 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13072 # FIXME: lock only instance primary and destination node
13074 # Sad but true, for now we have do lock all nodes, as we don't know where
13075 # the previous export might be, and in this LU we search for it and
13076 # remove it from its current node. In the future we could fix this by:
13077 # - making a tasklet to search (share-lock all), then create the
13078 # new one, then one to remove, after
13079 # - removing the removal operation altogether
13080 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13082 def DeclareLocks(self, level):
13083 """Last minute lock declaration."""
13084 # All nodes are locked anyway, so nothing to do here.
13086 def BuildHooksEnv(self):
13087 """Build hooks env.
13089 This will run on the master, primary node and target node.
13093 "EXPORT_MODE": self.op.mode,
13094 "EXPORT_NODE": self.op.target_node,
13095 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13096 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13097 # TODO: Generic function for boolean env variables
13098 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13101 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13105 def BuildHooksNodes(self):
13106 """Build hooks nodes.
13109 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13111 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13112 nl.append(self.op.target_node)
13116 def CheckPrereq(self):
13117 """Check prerequisites.
13119 This checks that the instance and node names are valid.
13122 instance_name = self.op.instance_name
13124 self.instance = self.cfg.GetInstanceInfo(instance_name)
13125 assert self.instance is not None, \
13126 "Cannot retrieve locked instance %s" % self.op.instance_name
13127 _CheckNodeOnline(self, self.instance.primary_node)
13129 if (self.op.remove_instance and
13130 self.instance.admin_state == constants.ADMINST_UP and
13131 not self.op.shutdown):
13132 raise errors.OpPrereqError("Can not remove instance without shutting it"
13135 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13136 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13137 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13138 assert self.dst_node is not None
13140 _CheckNodeOnline(self, self.dst_node.name)
13141 _CheckNodeNotDrained(self, self.dst_node.name)
13144 self.dest_disk_info = None
13145 self.dest_x509_ca = None
13147 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13148 self.dst_node = None
13150 if len(self.op.target_node) != len(self.instance.disks):
13151 raise errors.OpPrereqError(("Received destination information for %s"
13152 " disks, but instance %s has %s disks") %
13153 (len(self.op.target_node), instance_name,
13154 len(self.instance.disks)),
13155 errors.ECODE_INVAL)
13157 cds = _GetClusterDomainSecret()
13159 # Check X509 key name
13161 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13162 except (TypeError, ValueError), err:
13163 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13165 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13166 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13167 errors.ECODE_INVAL)
13169 # Load and verify CA
13171 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13172 except OpenSSL.crypto.Error, err:
13173 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13174 (err, ), errors.ECODE_INVAL)
13176 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13177 if errcode is not None:
13178 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13179 (msg, ), errors.ECODE_INVAL)
13181 self.dest_x509_ca = cert
13183 # Verify target information
13185 for idx, disk_data in enumerate(self.op.target_node):
13187 (host, port, magic) = \
13188 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13189 except errors.GenericError, err:
13190 raise errors.OpPrereqError("Target info for disk %s: %s" %
13191 (idx, err), errors.ECODE_INVAL)
13193 disk_info.append((host, port, magic))
13195 assert len(disk_info) == len(self.op.target_node)
13196 self.dest_disk_info = disk_info
13199 raise errors.ProgrammerError("Unhandled export mode %r" %
13202 # instance disk type verification
13203 # TODO: Implement export support for file-based disks
13204 for disk in self.instance.disks:
13205 if disk.dev_type == constants.LD_FILE:
13206 raise errors.OpPrereqError("Export not supported for instances with"
13207 " file-based disks", errors.ECODE_INVAL)
13209 def _CleanupExports(self, feedback_fn):
13210 """Removes exports of current instance from all other nodes.
13212 If an instance in a cluster with nodes A..D was exported to node C, its
13213 exports will be removed from the nodes A, B and D.
13216 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13218 nodelist = self.cfg.GetNodeList()
13219 nodelist.remove(self.dst_node.name)
13221 # on one-node clusters nodelist will be empty after the removal
13222 # if we proceed the backup would be removed because OpBackupQuery
13223 # substitutes an empty list with the full cluster node list.
13224 iname = self.instance.name
13226 feedback_fn("Removing old exports for instance %s" % iname)
13227 exportlist = self.rpc.call_export_list(nodelist)
13228 for node in exportlist:
13229 if exportlist[node].fail_msg:
13231 if iname in exportlist[node].payload:
13232 msg = self.rpc.call_export_remove(node, iname).fail_msg
13234 self.LogWarning("Could not remove older export for instance %s"
13235 " on node %s: %s", iname, node, msg)
13237 def Exec(self, feedback_fn):
13238 """Export an instance to an image in the cluster.
13241 assert self.op.mode in constants.EXPORT_MODES
13243 instance = self.instance
13244 src_node = instance.primary_node
13246 if self.op.shutdown:
13247 # shutdown the instance, but not the disks
13248 feedback_fn("Shutting down instance %s" % instance.name)
13249 result = self.rpc.call_instance_shutdown(src_node, instance,
13250 self.op.shutdown_timeout)
13251 # TODO: Maybe ignore failures if ignore_remove_failures is set
13252 result.Raise("Could not shutdown instance %s on"
13253 " node %s" % (instance.name, src_node))
13255 # set the disks ID correctly since call_instance_start needs the
13256 # correct drbd minor to create the symlinks
13257 for disk in instance.disks:
13258 self.cfg.SetDiskID(disk, src_node)
13260 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13263 # Activate the instance disks if we'exporting a stopped instance
13264 feedback_fn("Activating disks for %s" % instance.name)
13265 _StartInstanceDisks(self, instance, None)
13268 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13271 helper.CreateSnapshots()
13273 if (self.op.shutdown and
13274 instance.admin_state == constants.ADMINST_UP and
13275 not self.op.remove_instance):
13276 assert not activate_disks
13277 feedback_fn("Starting instance %s" % instance.name)
13278 result = self.rpc.call_instance_start(src_node,
13279 (instance, None, None), False)
13280 msg = result.fail_msg
13282 feedback_fn("Failed to start instance: %s" % msg)
13283 _ShutdownInstanceDisks(self, instance)
13284 raise errors.OpExecError("Could not start instance: %s" % msg)
13286 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13287 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13288 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13289 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13290 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13292 (key_name, _, _) = self.x509_key_name
13295 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13298 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13299 key_name, dest_ca_pem,
13304 # Check for backwards compatibility
13305 assert len(dresults) == len(instance.disks)
13306 assert compat.all(isinstance(i, bool) for i in dresults), \
13307 "Not all results are boolean: %r" % dresults
13311 feedback_fn("Deactivating disks for %s" % instance.name)
13312 _ShutdownInstanceDisks(self, instance)
13314 if not (compat.all(dresults) and fin_resu):
13317 failures.append("export finalization")
13318 if not compat.all(dresults):
13319 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13321 failures.append("disk export: disk(s) %s" % fdsk)
13323 raise errors.OpExecError("Export failed, errors in %s" %
13324 utils.CommaJoin(failures))
13326 # At this point, the export was successful, we can cleanup/finish
13328 # Remove instance if requested
13329 if self.op.remove_instance:
13330 feedback_fn("Removing instance %s" % instance.name)
13331 _RemoveInstance(self, feedback_fn, instance,
13332 self.op.ignore_remove_failures)
13334 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13335 self._CleanupExports(feedback_fn)
13337 return fin_resu, dresults
13340 class LUBackupRemove(NoHooksLU):
13341 """Remove exports related to the named instance.
13346 def ExpandNames(self):
13347 self.needed_locks = {}
13348 # We need all nodes to be locked in order for RemoveExport to work, but we
13349 # don't need to lock the instance itself, as nothing will happen to it (and
13350 # we can remove exports also for a removed instance)
13351 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13353 def Exec(self, feedback_fn):
13354 """Remove any export.
13357 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13358 # If the instance was not found we'll try with the name that was passed in.
13359 # This will only work if it was an FQDN, though.
13361 if not instance_name:
13363 instance_name = self.op.instance_name
13365 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13366 exportlist = self.rpc.call_export_list(locked_nodes)
13368 for node in exportlist:
13369 msg = exportlist[node].fail_msg
13371 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13373 if instance_name in exportlist[node].payload:
13375 result = self.rpc.call_export_remove(node, instance_name)
13376 msg = result.fail_msg
13378 logging.error("Could not remove export for instance %s"
13379 " on node %s: %s", instance_name, node, msg)
13381 if fqdn_warn and not found:
13382 feedback_fn("Export not found. If trying to remove an export belonging"
13383 " to a deleted instance please use its Fully Qualified"
13387 class LUGroupAdd(LogicalUnit):
13388 """Logical unit for creating node groups.
13391 HPATH = "group-add"
13392 HTYPE = constants.HTYPE_GROUP
13395 def ExpandNames(self):
13396 # We need the new group's UUID here so that we can create and acquire the
13397 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13398 # that it should not check whether the UUID exists in the configuration.
13399 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13400 self.needed_locks = {}
13401 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13403 def CheckPrereq(self):
13404 """Check prerequisites.
13406 This checks that the given group name is not an existing node group
13411 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13412 except errors.OpPrereqError:
13415 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13416 " node group (UUID: %s)" %
13417 (self.op.group_name, existing_uuid),
13418 errors.ECODE_EXISTS)
13420 if self.op.ndparams:
13421 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13423 if self.op.hv_state:
13424 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13426 self.new_hv_state = None
13428 if self.op.disk_state:
13429 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13431 self.new_disk_state = None
13433 if self.op.diskparams:
13434 for templ in constants.DISK_TEMPLATES:
13435 if templ not in self.op.diskparams:
13436 self.op.diskparams[templ] = {}
13437 utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
13439 self.op.diskparams = self.cfg.GetClusterInfo().diskparams
13441 if self.op.ipolicy:
13442 cluster = self.cfg.GetClusterInfo()
13443 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13445 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
13446 except errors.ConfigurationError, err:
13447 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13448 errors.ECODE_INVAL)
13450 def BuildHooksEnv(self):
13451 """Build hooks env.
13455 "GROUP_NAME": self.op.group_name,
13458 def BuildHooksNodes(self):
13459 """Build hooks nodes.
13462 mn = self.cfg.GetMasterNode()
13463 return ([mn], [mn])
13465 def Exec(self, feedback_fn):
13466 """Add the node group to the cluster.
13469 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13470 uuid=self.group_uuid,
13471 alloc_policy=self.op.alloc_policy,
13472 ndparams=self.op.ndparams,
13473 diskparams=self.op.diskparams,
13474 ipolicy=self.op.ipolicy,
13475 hv_state_static=self.new_hv_state,
13476 disk_state_static=self.new_disk_state)
13478 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13479 del self.remove_locks[locking.LEVEL_NODEGROUP]
13482 class LUGroupAssignNodes(NoHooksLU):
13483 """Logical unit for assigning nodes to groups.
13488 def ExpandNames(self):
13489 # These raise errors.OpPrereqError on their own:
13490 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13491 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13493 # We want to lock all the affected nodes and groups. We have readily
13494 # available the list of nodes, and the *destination* group. To gather the
13495 # list of "source" groups, we need to fetch node information later on.
13496 self.needed_locks = {
13497 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13498 locking.LEVEL_NODE: self.op.nodes,
13501 def DeclareLocks(self, level):
13502 if level == locking.LEVEL_NODEGROUP:
13503 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13505 # Try to get all affected nodes' groups without having the group or node
13506 # lock yet. Needs verification later in the code flow.
13507 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13509 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13511 def CheckPrereq(self):
13512 """Check prerequisites.
13515 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13516 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13517 frozenset(self.op.nodes))
13519 expected_locks = (set([self.group_uuid]) |
13520 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13521 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13522 if actual_locks != expected_locks:
13523 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13524 " current groups are '%s', used to be '%s'" %
13525 (utils.CommaJoin(expected_locks),
13526 utils.CommaJoin(actual_locks)))
13528 self.node_data = self.cfg.GetAllNodesInfo()
13529 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13530 instance_data = self.cfg.GetAllInstancesInfo()
13532 if self.group is None:
13533 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13534 (self.op.group_name, self.group_uuid))
13536 (new_splits, previous_splits) = \
13537 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13538 for node in self.op.nodes],
13539 self.node_data, instance_data)
13542 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13544 if not self.op.force:
13545 raise errors.OpExecError("The following instances get split by this"
13546 " change and --force was not given: %s" %
13549 self.LogWarning("This operation will split the following instances: %s",
13552 if previous_splits:
13553 self.LogWarning("In addition, these already-split instances continue"
13554 " to be split across groups: %s",
13555 utils.CommaJoin(utils.NiceSort(previous_splits)))
13557 def Exec(self, feedback_fn):
13558 """Assign nodes to a new group.
13561 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13563 self.cfg.AssignGroupNodes(mods)
13566 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13567 """Check for split instances after a node assignment.
13569 This method considers a series of node assignments as an atomic operation,
13570 and returns information about split instances after applying the set of
13573 In particular, it returns information about newly split instances, and
13574 instances that were already split, and remain so after the change.
13576 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13579 @type changes: list of (node_name, new_group_uuid) pairs.
13580 @param changes: list of node assignments to consider.
13581 @param node_data: a dict with data for all nodes
13582 @param instance_data: a dict with all instances to consider
13583 @rtype: a two-tuple
13584 @return: a list of instances that were previously okay and result split as a
13585 consequence of this change, and a list of instances that were previously
13586 split and this change does not fix.
13589 changed_nodes = dict((node, group) for node, group in changes
13590 if node_data[node].group != group)
13592 all_split_instances = set()
13593 previously_split_instances = set()
13595 def InstanceNodes(instance):
13596 return [instance.primary_node] + list(instance.secondary_nodes)
13598 for inst in instance_data.values():
13599 if inst.disk_template not in constants.DTS_INT_MIRROR:
13602 instance_nodes = InstanceNodes(inst)
13604 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13605 previously_split_instances.add(inst.name)
13607 if len(set(changed_nodes.get(node, node_data[node].group)
13608 for node in instance_nodes)) > 1:
13609 all_split_instances.add(inst.name)
13611 return (list(all_split_instances - previously_split_instances),
13612 list(previously_split_instances & all_split_instances))
13615 class _GroupQuery(_QueryBase):
13616 FIELDS = query.GROUP_FIELDS
13618 def ExpandNames(self, lu):
13619 lu.needed_locks = {}
13621 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13622 self._cluster = lu.cfg.GetClusterInfo()
13623 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13626 self.wanted = [name_to_uuid[name]
13627 for name in utils.NiceSort(name_to_uuid.keys())]
13629 # Accept names to be either names or UUIDs.
13632 all_uuid = frozenset(self._all_groups.keys())
13634 for name in self.names:
13635 if name in all_uuid:
13636 self.wanted.append(name)
13637 elif name in name_to_uuid:
13638 self.wanted.append(name_to_uuid[name])
13640 missing.append(name)
13643 raise errors.OpPrereqError("Some groups do not exist: %s" %
13644 utils.CommaJoin(missing),
13645 errors.ECODE_NOENT)
13647 def DeclareLocks(self, lu, level):
13650 def _GetQueryData(self, lu):
13651 """Computes the list of node groups and their attributes.
13654 do_nodes = query.GQ_NODE in self.requested_data
13655 do_instances = query.GQ_INST in self.requested_data
13657 group_to_nodes = None
13658 group_to_instances = None
13660 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13661 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13662 # latter GetAllInstancesInfo() is not enough, for we have to go through
13663 # instance->node. Hence, we will need to process nodes even if we only need
13664 # instance information.
13665 if do_nodes or do_instances:
13666 all_nodes = lu.cfg.GetAllNodesInfo()
13667 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13670 for node in all_nodes.values():
13671 if node.group in group_to_nodes:
13672 group_to_nodes[node.group].append(node.name)
13673 node_to_group[node.name] = node.group
13676 all_instances = lu.cfg.GetAllInstancesInfo()
13677 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13679 for instance in all_instances.values():
13680 node = instance.primary_node
13681 if node in node_to_group:
13682 group_to_instances[node_to_group[node]].append(instance.name)
13685 # Do not pass on node information if it was not requested.
13686 group_to_nodes = None
13688 return query.GroupQueryData(self._cluster,
13689 [self._all_groups[uuid]
13690 for uuid in self.wanted],
13691 group_to_nodes, group_to_instances)
13694 class LUGroupQuery(NoHooksLU):
13695 """Logical unit for querying node groups.
13700 def CheckArguments(self):
13701 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13702 self.op.output_fields, False)
13704 def ExpandNames(self):
13705 self.gq.ExpandNames(self)
13707 def DeclareLocks(self, level):
13708 self.gq.DeclareLocks(self, level)
13710 def Exec(self, feedback_fn):
13711 return self.gq.OldStyleQuery(self)
13714 class LUGroupSetParams(LogicalUnit):
13715 """Modifies the parameters of a node group.
13718 HPATH = "group-modify"
13719 HTYPE = constants.HTYPE_GROUP
13722 def CheckArguments(self):
13725 self.op.diskparams,
13726 self.op.alloc_policy,
13728 self.op.disk_state,
13732 if all_changes.count(None) == len(all_changes):
13733 raise errors.OpPrereqError("Please pass at least one modification",
13734 errors.ECODE_INVAL)
13736 def ExpandNames(self):
13737 # This raises errors.OpPrereqError on its own:
13738 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13740 self.needed_locks = {
13741 locking.LEVEL_INSTANCE: [],
13742 locking.LEVEL_NODEGROUP: [self.group_uuid],
13745 self.share_locks[locking.LEVEL_INSTANCE] = 1
13747 def DeclareLocks(self, level):
13748 if level == locking.LEVEL_INSTANCE:
13749 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13751 # Lock instances optimistically, needs verification once group lock has
13753 self.needed_locks[locking.LEVEL_INSTANCE] = \
13754 self.cfg.GetNodeGroupInstances(self.group_uuid)
13756 def CheckPrereq(self):
13757 """Check prerequisites.
13760 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13762 # Check if locked instances are still correct
13763 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13765 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13766 cluster = self.cfg.GetClusterInfo()
13768 if self.group is None:
13769 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13770 (self.op.group_name, self.group_uuid))
13772 if self.op.ndparams:
13773 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13774 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13775 self.new_ndparams = new_ndparams
13777 if self.op.diskparams:
13778 self.new_diskparams = dict()
13779 for templ in constants.DISK_TEMPLATES:
13780 if templ not in self.op.diskparams:
13781 self.op.diskparams[templ] = {}
13782 new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13783 self.op.diskparams[templ])
13784 utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13785 self.new_diskparams[templ] = new_templ_params
13787 if self.op.hv_state:
13788 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13789 self.group.hv_state_static)
13791 if self.op.disk_state:
13792 self.new_disk_state = \
13793 _MergeAndVerifyDiskState(self.op.disk_state,
13794 self.group.disk_state_static)
13796 if self.op.ipolicy:
13797 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
13801 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
13802 inst_filter = lambda inst: inst.name in owned_instances
13803 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
13805 _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
13807 new_ipolicy, instances)
13810 self.LogWarning("After the ipolicy change the following instances"
13811 " violate them: %s",
13812 utils.CommaJoin(violations))
13814 def BuildHooksEnv(self):
13815 """Build hooks env.
13819 "GROUP_NAME": self.op.group_name,
13820 "NEW_ALLOC_POLICY": self.op.alloc_policy,
13823 def BuildHooksNodes(self):
13824 """Build hooks nodes.
13827 mn = self.cfg.GetMasterNode()
13828 return ([mn], [mn])
13830 def Exec(self, feedback_fn):
13831 """Modifies the node group.
13836 if self.op.ndparams:
13837 self.group.ndparams = self.new_ndparams
13838 result.append(("ndparams", str(self.group.ndparams)))
13840 if self.op.diskparams:
13841 self.group.diskparams = self.new_diskparams
13842 result.append(("diskparams", str(self.group.diskparams)))
13844 if self.op.alloc_policy:
13845 self.group.alloc_policy = self.op.alloc_policy
13847 if self.op.hv_state:
13848 self.group.hv_state_static = self.new_hv_state
13850 if self.op.disk_state:
13851 self.group.disk_state_static = self.new_disk_state
13853 if self.op.ipolicy:
13854 self.group.ipolicy = self.new_ipolicy
13856 self.cfg.Update(self.group, feedback_fn)
13860 class LUGroupRemove(LogicalUnit):
13861 HPATH = "group-remove"
13862 HTYPE = constants.HTYPE_GROUP
13865 def ExpandNames(self):
13866 # This will raises errors.OpPrereqError on its own:
13867 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13868 self.needed_locks = {
13869 locking.LEVEL_NODEGROUP: [self.group_uuid],
13872 def CheckPrereq(self):
13873 """Check prerequisites.
13875 This checks that the given group name exists as a node group, that is
13876 empty (i.e., contains no nodes), and that is not the last group of the
13880 # Verify that the group is empty.
13881 group_nodes = [node.name
13882 for node in self.cfg.GetAllNodesInfo().values()
13883 if node.group == self.group_uuid]
13886 raise errors.OpPrereqError("Group '%s' not empty, has the following"
13888 (self.op.group_name,
13889 utils.CommaJoin(utils.NiceSort(group_nodes))),
13890 errors.ECODE_STATE)
13892 # Verify the cluster would not be left group-less.
13893 if len(self.cfg.GetNodeGroupList()) == 1:
13894 raise errors.OpPrereqError("Group '%s' is the only group,"
13895 " cannot be removed" %
13896 self.op.group_name,
13897 errors.ECODE_STATE)
13899 def BuildHooksEnv(self):
13900 """Build hooks env.
13904 "GROUP_NAME": self.op.group_name,
13907 def BuildHooksNodes(self):
13908 """Build hooks nodes.
13911 mn = self.cfg.GetMasterNode()
13912 return ([mn], [mn])
13914 def Exec(self, feedback_fn):
13915 """Remove the node group.
13919 self.cfg.RemoveNodeGroup(self.group_uuid)
13920 except errors.ConfigurationError:
13921 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13922 (self.op.group_name, self.group_uuid))
13924 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13927 class LUGroupRename(LogicalUnit):
13928 HPATH = "group-rename"
13929 HTYPE = constants.HTYPE_GROUP
13932 def ExpandNames(self):
13933 # This raises errors.OpPrereqError on its own:
13934 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13936 self.needed_locks = {
13937 locking.LEVEL_NODEGROUP: [self.group_uuid],
13940 def CheckPrereq(self):
13941 """Check prerequisites.
13943 Ensures requested new name is not yet used.
13947 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13948 except errors.OpPrereqError:
13951 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13952 " node group (UUID: %s)" %
13953 (self.op.new_name, new_name_uuid),
13954 errors.ECODE_EXISTS)
13956 def BuildHooksEnv(self):
13957 """Build hooks env.
13961 "OLD_NAME": self.op.group_name,
13962 "NEW_NAME": self.op.new_name,
13965 def BuildHooksNodes(self):
13966 """Build hooks nodes.
13969 mn = self.cfg.GetMasterNode()
13971 all_nodes = self.cfg.GetAllNodesInfo()
13972 all_nodes.pop(mn, None)
13975 run_nodes.extend(node.name for node in all_nodes.values()
13976 if node.group == self.group_uuid)
13978 return (run_nodes, run_nodes)
13980 def Exec(self, feedback_fn):
13981 """Rename the node group.
13984 group = self.cfg.GetNodeGroup(self.group_uuid)
13987 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13988 (self.op.group_name, self.group_uuid))
13990 group.name = self.op.new_name
13991 self.cfg.Update(group, feedback_fn)
13993 return self.op.new_name
13996 class LUGroupEvacuate(LogicalUnit):
13997 HPATH = "group-evacuate"
13998 HTYPE = constants.HTYPE_GROUP
14001 def ExpandNames(self):
14002 # This raises errors.OpPrereqError on its own:
14003 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14005 if self.op.target_groups:
14006 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14007 self.op.target_groups)
14009 self.req_target_uuids = []
14011 if self.group_uuid in self.req_target_uuids:
14012 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14013 " as a target group (targets are %s)" %
14015 utils.CommaJoin(self.req_target_uuids)),
14016 errors.ECODE_INVAL)
14018 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14020 self.share_locks = _ShareAll()
14021 self.needed_locks = {
14022 locking.LEVEL_INSTANCE: [],
14023 locking.LEVEL_NODEGROUP: [],
14024 locking.LEVEL_NODE: [],
14027 def DeclareLocks(self, level):
14028 if level == locking.LEVEL_INSTANCE:
14029 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14031 # Lock instances optimistically, needs verification once node and group
14032 # locks have been acquired
14033 self.needed_locks[locking.LEVEL_INSTANCE] = \
14034 self.cfg.GetNodeGroupInstances(self.group_uuid)
14036 elif level == locking.LEVEL_NODEGROUP:
14037 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14039 if self.req_target_uuids:
14040 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14042 # Lock all groups used by instances optimistically; this requires going
14043 # via the node before it's locked, requiring verification later on
14044 lock_groups.update(group_uuid
14045 for instance_name in
14046 self.owned_locks(locking.LEVEL_INSTANCE)
14048 self.cfg.GetInstanceNodeGroups(instance_name))
14050 # No target groups, need to lock all of them
14051 lock_groups = locking.ALL_SET
14053 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14055 elif level == locking.LEVEL_NODE:
14056 # This will only lock the nodes in the group to be evacuated which
14057 # contain actual instances
14058 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14059 self._LockInstancesNodes()
14061 # Lock all nodes in group to be evacuated and target groups
14062 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14063 assert self.group_uuid in owned_groups
14064 member_nodes = [node_name
14065 for group in owned_groups
14066 for node_name in self.cfg.GetNodeGroup(group).members]
14067 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14069 def CheckPrereq(self):
14070 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14071 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14072 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14074 assert owned_groups.issuperset(self.req_target_uuids)
14075 assert self.group_uuid in owned_groups
14077 # Check if locked instances are still correct
14078 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14080 # Get instance information
14081 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14083 # Check if node groups for locked instances are still correct
14084 for instance_name in owned_instances:
14085 inst = self.instances[instance_name]
14086 assert owned_nodes.issuperset(inst.all_nodes), \
14087 "Instance %s's nodes changed while we kept the lock" % instance_name
14089 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
14092 assert self.group_uuid in inst_groups, \
14093 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
14095 if self.req_target_uuids:
14096 # User requested specific target groups
14097 self.target_uuids = self.req_target_uuids
14099 # All groups except the one to be evacuated are potential targets
14100 self.target_uuids = [group_uuid for group_uuid in owned_groups
14101 if group_uuid != self.group_uuid]
14103 if not self.target_uuids:
14104 raise errors.OpPrereqError("There are no possible target groups",
14105 errors.ECODE_INVAL)
14107 def BuildHooksEnv(self):
14108 """Build hooks env.
14112 "GROUP_NAME": self.op.group_name,
14113 "TARGET_GROUPS": " ".join(self.target_uuids),
14116 def BuildHooksNodes(self):
14117 """Build hooks nodes.
14120 mn = self.cfg.GetMasterNode()
14122 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14124 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14126 return (run_nodes, run_nodes)
14128 def Exec(self, feedback_fn):
14129 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14131 assert self.group_uuid not in self.target_uuids
14133 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14134 instances=instances, target_groups=self.target_uuids)
14136 ial.Run(self.op.iallocator)
14138 if not ial.success:
14139 raise errors.OpPrereqError("Can't compute group evacuation using"
14140 " iallocator '%s': %s" %
14141 (self.op.iallocator, ial.info),
14142 errors.ECODE_NORES)
14144 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14146 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14147 len(jobs), self.op.group_name)
14149 return ResultWithJobs(jobs)
14152 class TagsLU(NoHooksLU): # pylint: disable=W0223
14153 """Generic tags LU.
14155 This is an abstract class which is the parent of all the other tags LUs.
14158 def ExpandNames(self):
14159 self.group_uuid = None
14160 self.needed_locks = {}
14161 if self.op.kind == constants.TAG_NODE:
14162 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14163 self.needed_locks[locking.LEVEL_NODE] = self.op.name
14164 elif self.op.kind == constants.TAG_INSTANCE:
14165 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14166 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
14167 elif self.op.kind == constants.TAG_NODEGROUP:
14168 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14170 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14171 # not possible to acquire the BGL based on opcode parameters)
14173 def CheckPrereq(self):
14174 """Check prerequisites.
14177 if self.op.kind == constants.TAG_CLUSTER:
14178 self.target = self.cfg.GetClusterInfo()
14179 elif self.op.kind == constants.TAG_NODE:
14180 self.target = self.cfg.GetNodeInfo(self.op.name)
14181 elif self.op.kind == constants.TAG_INSTANCE:
14182 self.target = self.cfg.GetInstanceInfo(self.op.name)
14183 elif self.op.kind == constants.TAG_NODEGROUP:
14184 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14186 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14187 str(self.op.kind), errors.ECODE_INVAL)
14190 class LUTagsGet(TagsLU):
14191 """Returns the tags of a given object.
14196 def ExpandNames(self):
14197 TagsLU.ExpandNames(self)
14199 # Share locks as this is only a read operation
14200 self.share_locks = _ShareAll()
14202 def Exec(self, feedback_fn):
14203 """Returns the tag list.
14206 return list(self.target.GetTags())
14209 class LUTagsSearch(NoHooksLU):
14210 """Searches the tags for a given pattern.
14215 def ExpandNames(self):
14216 self.needed_locks = {}
14218 def CheckPrereq(self):
14219 """Check prerequisites.
14221 This checks the pattern passed for validity by compiling it.
14225 self.re = re.compile(self.op.pattern)
14226 except re.error, err:
14227 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14228 (self.op.pattern, err), errors.ECODE_INVAL)
14230 def Exec(self, feedback_fn):
14231 """Returns the tag list.
14235 tgts = [("/cluster", cfg.GetClusterInfo())]
14236 ilist = cfg.GetAllInstancesInfo().values()
14237 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14238 nlist = cfg.GetAllNodesInfo().values()
14239 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14240 tgts.extend(("/nodegroup/%s" % n.name, n)
14241 for n in cfg.GetAllNodeGroupsInfo().values())
14243 for path, target in tgts:
14244 for tag in target.GetTags():
14245 if self.re.search(tag):
14246 results.append((path, tag))
14250 class LUTagsSet(TagsLU):
14251 """Sets a tag on a given object.
14256 def CheckPrereq(self):
14257 """Check prerequisites.
14259 This checks the type and length of the tag name and value.
14262 TagsLU.CheckPrereq(self)
14263 for tag in self.op.tags:
14264 objects.TaggableObject.ValidateTag(tag)
14266 def Exec(self, feedback_fn):
14271 for tag in self.op.tags:
14272 self.target.AddTag(tag)
14273 except errors.TagError, err:
14274 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14275 self.cfg.Update(self.target, feedback_fn)
14278 class LUTagsDel(TagsLU):
14279 """Delete a list of tags from a given object.
14284 def CheckPrereq(self):
14285 """Check prerequisites.
14287 This checks that we have the given tag.
14290 TagsLU.CheckPrereq(self)
14291 for tag in self.op.tags:
14292 objects.TaggableObject.ValidateTag(tag)
14293 del_tags = frozenset(self.op.tags)
14294 cur_tags = self.target.GetTags()
14296 diff_tags = del_tags - cur_tags
14298 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14299 raise errors.OpPrereqError("Tag(s) %s not found" %
14300 (utils.CommaJoin(diff_names), ),
14301 errors.ECODE_NOENT)
14303 def Exec(self, feedback_fn):
14304 """Remove the tag from the object.
14307 for tag in self.op.tags:
14308 self.target.RemoveTag(tag)
14309 self.cfg.Update(self.target, feedback_fn)
14312 class LUTestDelay(NoHooksLU):
14313 """Sleep for a specified amount of time.
14315 This LU sleeps on the master and/or nodes for a specified amount of
14321 def ExpandNames(self):
14322 """Expand names and set required locks.
14324 This expands the node list, if any.
14327 self.needed_locks = {}
14328 if self.op.on_nodes:
14329 # _GetWantedNodes can be used here, but is not always appropriate to use
14330 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14331 # more information.
14332 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14333 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14335 def _TestDelay(self):
14336 """Do the actual sleep.
14339 if self.op.on_master:
14340 if not utils.TestDelay(self.op.duration):
14341 raise errors.OpExecError("Error during master delay test")
14342 if self.op.on_nodes:
14343 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14344 for node, node_result in result.items():
14345 node_result.Raise("Failure during rpc call to node %s" % node)
14347 def Exec(self, feedback_fn):
14348 """Execute the test delay opcode, with the wanted repetitions.
14351 if self.op.repeat == 0:
14354 top_value = self.op.repeat - 1
14355 for i in range(self.op.repeat):
14356 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14360 class LUTestJqueue(NoHooksLU):
14361 """Utility LU to test some aspects of the job queue.
14366 # Must be lower than default timeout for WaitForJobChange to see whether it
14367 # notices changed jobs
14368 _CLIENT_CONNECT_TIMEOUT = 20.0
14369 _CLIENT_CONFIRM_TIMEOUT = 60.0
14372 def _NotifyUsingSocket(cls, cb, errcls):
14373 """Opens a Unix socket and waits for another program to connect.
14376 @param cb: Callback to send socket name to client
14377 @type errcls: class
14378 @param errcls: Exception class to use for errors
14381 # Using a temporary directory as there's no easy way to create temporary
14382 # sockets without writing a custom loop around tempfile.mktemp and
14384 tmpdir = tempfile.mkdtemp()
14386 tmpsock = utils.PathJoin(tmpdir, "sock")
14388 logging.debug("Creating temporary socket at %s", tmpsock)
14389 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14394 # Send details to client
14397 # Wait for client to connect before continuing
14398 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14400 (conn, _) = sock.accept()
14401 except socket.error, err:
14402 raise errcls("Client didn't connect in time (%s)" % err)
14406 # Remove as soon as client is connected
14407 shutil.rmtree(tmpdir)
14409 # Wait for client to close
14412 # pylint: disable=E1101
14413 # Instance of '_socketobject' has no ... member
14414 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14416 except socket.error, err:
14417 raise errcls("Client failed to confirm notification (%s)" % err)
14421 def _SendNotification(self, test, arg, sockname):
14422 """Sends a notification to the client.
14425 @param test: Test name
14426 @param arg: Test argument (depends on test)
14427 @type sockname: string
14428 @param sockname: Socket path
14431 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14433 def _Notify(self, prereq, test, arg):
14434 """Notifies the client of a test.
14437 @param prereq: Whether this is a prereq-phase test
14439 @param test: Test name
14440 @param arg: Test argument (depends on test)
14444 errcls = errors.OpPrereqError
14446 errcls = errors.OpExecError
14448 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14452 def CheckArguments(self):
14453 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14454 self.expandnames_calls = 0
14456 def ExpandNames(self):
14457 checkargs_calls = getattr(self, "checkargs_calls", 0)
14458 if checkargs_calls < 1:
14459 raise errors.ProgrammerError("CheckArguments was not called")
14461 self.expandnames_calls += 1
14463 if self.op.notify_waitlock:
14464 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14466 self.LogInfo("Expanding names")
14468 # Get lock on master node (just to get a lock, not for a particular reason)
14469 self.needed_locks = {
14470 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14473 def Exec(self, feedback_fn):
14474 if self.expandnames_calls < 1:
14475 raise errors.ProgrammerError("ExpandNames was not called")
14477 if self.op.notify_exec:
14478 self._Notify(False, constants.JQT_EXEC, None)
14480 self.LogInfo("Executing")
14482 if self.op.log_messages:
14483 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14484 for idx, msg in enumerate(self.op.log_messages):
14485 self.LogInfo("Sending log message %s", idx + 1)
14486 feedback_fn(constants.JQT_MSGPREFIX + msg)
14487 # Report how many test messages have been sent
14488 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14491 raise errors.OpExecError("Opcode failure was requested")
14496 class IAllocator(object):
14497 """IAllocator framework.
14499 An IAllocator instance has three sets of attributes:
14500 - cfg that is needed to query the cluster
14501 - input data (all members of the _KEYS class attribute are required)
14502 - four buffer attributes (in|out_data|text), that represent the
14503 input (to the external script) in text and data structure format,
14504 and the output from it, again in two formats
14505 - the result variables from the script (success, info, nodes) for
14509 # pylint: disable=R0902
14510 # lots of instance attributes
14512 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14514 self.rpc = rpc_runner
14515 # init buffer variables
14516 self.in_text = self.out_text = self.in_data = self.out_data = None
14517 # init all input fields so that pylint is happy
14519 self.memory = self.disks = self.disk_template = self.spindle_use = None
14520 self.os = self.tags = self.nics = self.vcpus = None
14521 self.hypervisor = None
14522 self.relocate_from = None
14524 self.instances = None
14525 self.evac_mode = None
14526 self.target_groups = []
14528 self.required_nodes = None
14529 # init result fields
14530 self.success = self.info = self.result = None
14533 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14535 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14536 " IAllocator" % self.mode)
14538 keyset = [n for (n, _) in keydata]
14541 if key not in keyset:
14542 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14543 " IAllocator" % key)
14544 setattr(self, key, kwargs[key])
14547 if key not in kwargs:
14548 raise errors.ProgrammerError("Missing input parameter '%s' to"
14549 " IAllocator" % key)
14550 self._BuildInputData(compat.partial(fn, self), keydata)
14552 def _ComputeClusterData(self):
14553 """Compute the generic allocator input data.
14555 This is the data that is independent of the actual operation.
14559 cluster_info = cfg.GetClusterInfo()
14562 "version": constants.IALLOCATOR_VERSION,
14563 "cluster_name": cfg.GetClusterName(),
14564 "cluster_tags": list(cluster_info.GetTags()),
14565 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14566 "ipolicy": cluster_info.ipolicy,
14568 ninfo = cfg.GetAllNodesInfo()
14569 iinfo = cfg.GetAllInstancesInfo().values()
14570 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14573 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14575 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14576 hypervisor_name = self.hypervisor
14577 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14578 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14580 hypervisor_name = cluster_info.primary_hypervisor
14582 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14585 self.rpc.call_all_instances_info(node_list,
14586 cluster_info.enabled_hypervisors)
14588 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14590 config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14591 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14592 i_list, config_ndata)
14593 assert len(data["nodes"]) == len(ninfo), \
14594 "Incomplete node data computed"
14596 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14598 self.in_data = data
14601 def _ComputeNodeGroupData(cfg):
14602 """Compute node groups data.
14605 cluster = cfg.GetClusterInfo()
14606 ng = dict((guuid, {
14607 "name": gdata.name,
14608 "alloc_policy": gdata.alloc_policy,
14609 "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14611 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14616 def _ComputeBasicNodeData(cfg, node_cfg):
14617 """Compute global node data.
14620 @returns: a dict of name: (node dict, node config)
14623 # fill in static (config-based) values
14624 node_results = dict((ninfo.name, {
14625 "tags": list(ninfo.GetTags()),
14626 "primary_ip": ninfo.primary_ip,
14627 "secondary_ip": ninfo.secondary_ip,
14628 "offline": ninfo.offline,
14629 "drained": ninfo.drained,
14630 "master_candidate": ninfo.master_candidate,
14631 "group": ninfo.group,
14632 "master_capable": ninfo.master_capable,
14633 "vm_capable": ninfo.vm_capable,
14634 "ndparams": cfg.GetNdParams(ninfo),
14636 for ninfo in node_cfg.values())
14638 return node_results
14641 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14643 """Compute global node data.
14645 @param node_results: the basic node structures as filled from the config
14648 #TODO(dynmem): compute the right data on MAX and MIN memory
14649 # make a copy of the current dict
14650 node_results = dict(node_results)
14651 for nname, nresult in node_data.items():
14652 assert nname in node_results, "Missing basic data for node %s" % nname
14653 ninfo = node_cfg[nname]
14655 if not (ninfo.offline or ninfo.drained):
14656 nresult.Raise("Can't get data for node %s" % nname)
14657 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14659 remote_info = _MakeLegacyNodeInfo(nresult.payload)
14661 for attr in ["memory_total", "memory_free", "memory_dom0",
14662 "vg_size", "vg_free", "cpu_total"]:
14663 if attr not in remote_info:
14664 raise errors.OpExecError("Node '%s' didn't return attribute"
14665 " '%s'" % (nname, attr))
14666 if not isinstance(remote_info[attr], int):
14667 raise errors.OpExecError("Node '%s' returned invalid value"
14669 (nname, attr, remote_info[attr]))
14670 # compute memory used by primary instances
14671 i_p_mem = i_p_up_mem = 0
14672 for iinfo, beinfo in i_list:
14673 if iinfo.primary_node == nname:
14674 i_p_mem += beinfo[constants.BE_MAXMEM]
14675 if iinfo.name not in node_iinfo[nname].payload:
14678 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14679 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14680 remote_info["memory_free"] -= max(0, i_mem_diff)
14682 if iinfo.admin_state == constants.ADMINST_UP:
14683 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14685 # compute memory used by instances
14687 "total_memory": remote_info["memory_total"],
14688 "reserved_memory": remote_info["memory_dom0"],
14689 "free_memory": remote_info["memory_free"],
14690 "total_disk": remote_info["vg_size"],
14691 "free_disk": remote_info["vg_free"],
14692 "total_cpus": remote_info["cpu_total"],
14693 "i_pri_memory": i_p_mem,
14694 "i_pri_up_memory": i_p_up_mem,
14696 pnr_dyn.update(node_results[nname])
14697 node_results[nname] = pnr_dyn
14699 return node_results
14702 def _ComputeInstanceData(cluster_info, i_list):
14703 """Compute global instance data.
14707 for iinfo, beinfo in i_list:
14709 for nic in iinfo.nics:
14710 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14714 "mode": filled_params[constants.NIC_MODE],
14715 "link": filled_params[constants.NIC_LINK],
14717 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14718 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14719 nic_data.append(nic_dict)
14721 "tags": list(iinfo.GetTags()),
14722 "admin_state": iinfo.admin_state,
14723 "vcpus": beinfo[constants.BE_VCPUS],
14724 "memory": beinfo[constants.BE_MAXMEM],
14725 "spindle_use": beinfo[constants.BE_SPINDLE_USE],
14727 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14729 "disks": [{constants.IDISK_SIZE: dsk.size,
14730 constants.IDISK_MODE: dsk.mode}
14731 for dsk in iinfo.disks],
14732 "disk_template": iinfo.disk_template,
14733 "hypervisor": iinfo.hypervisor,
14735 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14737 instance_data[iinfo.name] = pir
14739 return instance_data
14741 def _AddNewInstance(self):
14742 """Add new instance data to allocator structure.
14744 This in combination with _AllocatorGetClusterData will create the
14745 correct structure needed as input for the allocator.
14747 The checks for the completeness of the opcode must have already been
14751 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14753 if self.disk_template in constants.DTS_INT_MIRROR:
14754 self.required_nodes = 2
14756 self.required_nodes = 1
14760 "disk_template": self.disk_template,
14763 "vcpus": self.vcpus,
14764 "memory": self.memory,
14765 "spindle_use": self.spindle_use,
14766 "disks": self.disks,
14767 "disk_space_total": disk_space,
14769 "required_nodes": self.required_nodes,
14770 "hypervisor": self.hypervisor,
14775 def _AddRelocateInstance(self):
14776 """Add relocate instance data to allocator structure.
14778 This in combination with _IAllocatorGetClusterData will create the
14779 correct structure needed as input for the allocator.
14781 The checks for the completeness of the opcode must have already been
14785 instance = self.cfg.GetInstanceInfo(self.name)
14786 if instance is None:
14787 raise errors.ProgrammerError("Unknown instance '%s' passed to"
14788 " IAllocator" % self.name)
14790 if instance.disk_template not in constants.DTS_MIRRORED:
14791 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14792 errors.ECODE_INVAL)
14794 if instance.disk_template in constants.DTS_INT_MIRROR and \
14795 len(instance.secondary_nodes) != 1:
14796 raise errors.OpPrereqError("Instance has not exactly one secondary node",
14797 errors.ECODE_STATE)
14799 self.required_nodes = 1
14800 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14801 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14805 "disk_space_total": disk_space,
14806 "required_nodes": self.required_nodes,
14807 "relocate_from": self.relocate_from,
14811 def _AddNodeEvacuate(self):
14812 """Get data for node-evacuate requests.
14816 "instances": self.instances,
14817 "evac_mode": self.evac_mode,
14820 def _AddChangeGroup(self):
14821 """Get data for node-evacuate requests.
14825 "instances": self.instances,
14826 "target_groups": self.target_groups,
14829 def _BuildInputData(self, fn, keydata):
14830 """Build input data structures.
14833 self._ComputeClusterData()
14836 request["type"] = self.mode
14837 for keyname, keytype in keydata:
14838 if keyname not in request:
14839 raise errors.ProgrammerError("Request parameter %s is missing" %
14841 val = request[keyname]
14842 if not keytype(val):
14843 raise errors.ProgrammerError("Request parameter %s doesn't pass"
14844 " validation, value %s, expected"
14845 " type %s" % (keyname, val, keytype))
14846 self.in_data["request"] = request
14848 self.in_text = serializer.Dump(self.in_data)
14850 _STRING_LIST = ht.TListOf(ht.TString)
14851 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14852 # pylint: disable=E1101
14853 # Class '...' has no 'OP_ID' member
14854 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14855 opcodes.OpInstanceMigrate.OP_ID,
14856 opcodes.OpInstanceReplaceDisks.OP_ID])
14860 ht.TListOf(ht.TAnd(ht.TIsLength(3),
14861 ht.TItems([ht.TNonEmptyString,
14862 ht.TNonEmptyString,
14863 ht.TListOf(ht.TNonEmptyString),
14866 ht.TListOf(ht.TAnd(ht.TIsLength(2),
14867 ht.TItems([ht.TNonEmptyString,
14870 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14871 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14874 constants.IALLOCATOR_MODE_ALLOC:
14877 ("name", ht.TString),
14878 ("memory", ht.TInt),
14879 ("spindle_use", ht.TInt),
14880 ("disks", ht.TListOf(ht.TDict)),
14881 ("disk_template", ht.TString),
14882 ("os", ht.TString),
14883 ("tags", _STRING_LIST),
14884 ("nics", ht.TListOf(ht.TDict)),
14885 ("vcpus", ht.TInt),
14886 ("hypervisor", ht.TString),
14888 constants.IALLOCATOR_MODE_RELOC:
14889 (_AddRelocateInstance,
14890 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14892 constants.IALLOCATOR_MODE_NODE_EVAC:
14893 (_AddNodeEvacuate, [
14894 ("instances", _STRING_LIST),
14895 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14897 constants.IALLOCATOR_MODE_CHG_GROUP:
14898 (_AddChangeGroup, [
14899 ("instances", _STRING_LIST),
14900 ("target_groups", _STRING_LIST),
14904 def Run(self, name, validate=True, call_fn=None):
14905 """Run an instance allocator and return the results.
14908 if call_fn is None:
14909 call_fn = self.rpc.call_iallocator_runner
14911 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14912 result.Raise("Failure while running the iallocator script")
14914 self.out_text = result.payload
14916 self._ValidateResult()
14918 def _ValidateResult(self):
14919 """Process the allocator results.
14921 This will process and if successful save the result in
14922 self.out_data and the other parameters.
14926 rdict = serializer.Load(self.out_text)
14927 except Exception, err:
14928 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14930 if not isinstance(rdict, dict):
14931 raise errors.OpExecError("Can't parse iallocator results: not a dict")
14933 # TODO: remove backwards compatiblity in later versions
14934 if "nodes" in rdict and "result" not in rdict:
14935 rdict["result"] = rdict["nodes"]
14938 for key in "success", "info", "result":
14939 if key not in rdict:
14940 raise errors.OpExecError("Can't parse iallocator results:"
14941 " missing key '%s'" % key)
14942 setattr(self, key, rdict[key])
14944 if not self._result_check(self.result):
14945 raise errors.OpExecError("Iallocator returned invalid result,"
14946 " expected %s, got %s" %
14947 (self._result_check, self.result),
14948 errors.ECODE_INVAL)
14950 if self.mode == constants.IALLOCATOR_MODE_RELOC:
14951 assert self.relocate_from is not None
14952 assert self.required_nodes == 1
14954 node2group = dict((name, ndata["group"])
14955 for (name, ndata) in self.in_data["nodes"].items())
14957 fn = compat.partial(self._NodesToGroups, node2group,
14958 self.in_data["nodegroups"])
14960 instance = self.cfg.GetInstanceInfo(self.name)
14961 request_groups = fn(self.relocate_from + [instance.primary_node])
14962 result_groups = fn(rdict["result"] + [instance.primary_node])
14964 if self.success and not set(result_groups).issubset(request_groups):
14965 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14966 " differ from original groups (%s)" %
14967 (utils.CommaJoin(result_groups),
14968 utils.CommaJoin(request_groups)))
14970 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14971 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14973 self.out_data = rdict
14976 def _NodesToGroups(node2group, groups, nodes):
14977 """Returns a list of unique group names for a list of nodes.
14979 @type node2group: dict
14980 @param node2group: Map from node name to group UUID
14982 @param groups: Group information
14984 @param nodes: Node names
14991 group_uuid = node2group[node]
14993 # Ignore unknown node
14997 group = groups[group_uuid]
14999 # Can't find group, let's use UUID
15000 group_name = group_uuid
15002 group_name = group["name"]
15004 result.add(group_name)
15006 return sorted(result)
15009 class LUTestAllocator(NoHooksLU):
15010 """Run allocator tests.
15012 This LU runs the allocator tests
15015 def CheckPrereq(self):
15016 """Check prerequisites.
15018 This checks the opcode parameters depending on the director and mode test.
15021 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15022 for attr in ["memory", "disks", "disk_template",
15023 "os", "tags", "nics", "vcpus"]:
15024 if not hasattr(self.op, attr):
15025 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15026 attr, errors.ECODE_INVAL)
15027 iname = self.cfg.ExpandInstanceName(self.op.name)
15028 if iname is not None:
15029 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15030 iname, errors.ECODE_EXISTS)
15031 if not isinstance(self.op.nics, list):
15032 raise errors.OpPrereqError("Invalid parameter 'nics'",
15033 errors.ECODE_INVAL)
15034 if not isinstance(self.op.disks, list):
15035 raise errors.OpPrereqError("Invalid parameter 'disks'",
15036 errors.ECODE_INVAL)
15037 for row in self.op.disks:
15038 if (not isinstance(row, dict) or
15039 constants.IDISK_SIZE not in row or
15040 not isinstance(row[constants.IDISK_SIZE], int) or
15041 constants.IDISK_MODE not in row or
15042 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15043 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15044 " parameter", errors.ECODE_INVAL)
15045 if self.op.hypervisor is None:
15046 self.op.hypervisor = self.cfg.GetHypervisorType()
15047 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15048 fname = _ExpandInstanceName(self.cfg, self.op.name)
15049 self.op.name = fname
15050 self.relocate_from = \
15051 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15052 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15053 constants.IALLOCATOR_MODE_NODE_EVAC):
15054 if not self.op.instances:
15055 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15056 self.op.instances = _GetWantedInstances(self, self.op.instances)
15058 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15059 self.op.mode, errors.ECODE_INVAL)
15061 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15062 if self.op.allocator is None:
15063 raise errors.OpPrereqError("Missing allocator name",
15064 errors.ECODE_INVAL)
15065 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15066 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15067 self.op.direction, errors.ECODE_INVAL)
15069 def Exec(self, feedback_fn):
15070 """Run the allocator test.
15073 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15074 ial = IAllocator(self.cfg, self.rpc,
15077 memory=self.op.memory,
15078 disks=self.op.disks,
15079 disk_template=self.op.disk_template,
15083 vcpus=self.op.vcpus,
15084 hypervisor=self.op.hypervisor,
15086 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15087 ial = IAllocator(self.cfg, self.rpc,
15090 relocate_from=list(self.relocate_from),
15092 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15093 ial = IAllocator(self.cfg, self.rpc,
15095 instances=self.op.instances,
15096 target_groups=self.op.target_groups)
15097 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15098 ial = IAllocator(self.cfg, self.rpc,
15100 instances=self.op.instances,
15101 evac_mode=self.op.evac_mode)
15103 raise errors.ProgrammerError("Uncatched mode %s in"
15104 " LUTestAllocator.Exec", self.op.mode)
15106 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15107 result = ial.in_text
15109 ial.Run(self.op.allocator, validate=False)
15110 result = ial.out_text
15114 #: Query type implementations
15116 constants.QR_INSTANCE: _InstanceQuery,
15117 constants.QR_NODE: _NodeQuery,
15118 constants.QR_GROUP: _GroupQuery,
15119 constants.QR_OS: _OsQuery,
15122 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15125 def _GetQueryImplementation(name):
15126 """Returns the implemtnation for a query type.
15128 @param name: Query type, must be one of L{constants.QR_VIA_OP}
15132 return _QUERY_IMPL[name]
15134 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15135 errors.ECODE_INVAL)