4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
62 from ganeti import rpc
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
141 # Dicts used to declare locking needs to mcpu
142 self.needed_locks = None
143 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.remove_locks = {}
146 # Used to force good behavior when calling helper functions
147 self.recalculate_locks = {}
149 self.Log = processor.Log # pylint: disable=C0103
150 self.LogWarning = processor.LogWarning # pylint: disable=C0103
151 self.LogInfo = processor.LogInfo # pylint: disable=C0103
152 self.LogStep = processor.LogStep # pylint: disable=C0103
153 # support for dry-run
154 self.dry_run_result = None
155 # support for generic debug attribute
156 if (not hasattr(self.op, "debug_level") or
157 not isinstance(self.op.debug_level, int)):
158 self.op.debug_level = 0
163 # Validate opcode parameters and set defaults
164 self.op.Validate(True)
166 self.CheckArguments()
168 def CheckArguments(self):
169 """Check syntactic validity for the opcode arguments.
171 This method is for doing a simple syntactic check and ensure
172 validity of opcode parameters, without any cluster-related
173 checks. While the same can be accomplished in ExpandNames and/or
174 CheckPrereq, doing these separate is better because:
176 - ExpandNames is left as as purely a lock-related function
177 - CheckPrereq is run after we have acquired locks (and possible
180 The function is allowed to change the self.op attribute so that
181 later methods can no longer worry about missing parameters.
186 def ExpandNames(self):
187 """Expand names for this LU.
189 This method is called before starting to execute the opcode, and it should
190 update all the parameters of the opcode to their canonical form (e.g. a
191 short node name must be fully expanded after this method has successfully
192 completed). This way locking, hooks, logging, etc. can work correctly.
194 LUs which implement this method must also populate the self.needed_locks
195 member, as a dict with lock levels as keys, and a list of needed lock names
198 - use an empty dict if you don't need any lock
199 - if you don't need any lock at a particular level omit that
200 level (note that in this case C{DeclareLocks} won't be called
201 at all for that level)
202 - if you need locks at a level, but you can't calculate it in
203 this function, initialise that level with an empty list and do
204 further processing in L{LogicalUnit.DeclareLocks} (see that
205 function's docstring)
206 - don't put anything for the BGL level
207 - if you want all locks at a level use L{locking.ALL_SET} as a value
209 If you need to share locks (rather than acquire them exclusively) at one
210 level you can modify self.share_locks, setting a true value (usually 1) for
211 that level. By default locks are not shared.
213 This function can also define a list of tasklets, which then will be
214 executed in order instead of the usual LU-level CheckPrereq and Exec
215 functions, if those are not defined by the LU.
219 # Acquire all nodes and one instance
220 self.needed_locks = {
221 locking.LEVEL_NODE: locking.ALL_SET,
222 locking.LEVEL_INSTANCE: ['instance1.example.com'],
224 # Acquire just two nodes
225 self.needed_locks = {
226 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
229 self.needed_locks = {} # No, you can't leave it to the default value None
232 # The implementation of this method is mandatory only if the new LU is
233 # concurrent, so that old LUs don't need to be changed all at the same
236 self.needed_locks = {} # Exclusive LUs don't need locks.
238 raise NotImplementedError
240 def DeclareLocks(self, level):
241 """Declare LU locking needs for a level
243 While most LUs can just declare their locking needs at ExpandNames time,
244 sometimes there's the need to calculate some locks after having acquired
245 the ones before. This function is called just before acquiring locks at a
246 particular level, but after acquiring the ones at lower levels, and permits
247 such calculations. It can be used to modify self.needed_locks, and by
248 default it does nothing.
250 This function is only called if you have something already set in
251 self.needed_locks for the level.
253 @param level: Locking level which is going to be locked
254 @type level: member of L{ganeti.locking.LEVELS}
258 def CheckPrereq(self):
259 """Check prerequisites for this LU.
261 This method should check that the prerequisites for the execution
262 of this LU are fulfilled. It can do internode communication, but
263 it should be idempotent - no cluster or system changes are
266 The method should raise errors.OpPrereqError in case something is
267 not fulfilled. Its return value is ignored.
269 This method should also update all the parameters of the opcode to
270 their canonical form if it hasn't been done by ExpandNames before.
273 if self.tasklets is not None:
274 for (idx, tl) in enumerate(self.tasklets):
275 logging.debug("Checking prerequisites for tasklet %s/%s",
276 idx + 1, len(self.tasklets))
281 def Exec(self, feedback_fn):
284 This method should implement the actual work. It should raise
285 errors.OpExecError for failures that are somewhat dealt with in
289 if self.tasklets is not None:
290 for (idx, tl) in enumerate(self.tasklets):
291 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
294 raise NotImplementedError
296 def BuildHooksEnv(self):
297 """Build hooks environment for this LU.
300 @return: Dictionary containing the environment that will be used for
301 running the hooks for this LU. The keys of the dict must not be prefixed
302 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
303 will extend the environment with additional variables. If no environment
304 should be defined, an empty dictionary should be returned (not C{None}).
305 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
309 raise NotImplementedError
311 def BuildHooksNodes(self):
312 """Build list of nodes to run LU's hooks.
314 @rtype: tuple; (list, list)
315 @return: Tuple containing a list of node names on which the hook
316 should run before the execution and a list of node names on which the
317 hook should run after the execution. No nodes should be returned as an
318 empty list (and not None).
319 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
323 raise NotImplementedError
325 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
326 """Notify the LU about the results of its hooks.
328 This method is called every time a hooks phase is executed, and notifies
329 the Logical Unit about the hooks' result. The LU can then use it to alter
330 its result based on the hooks. By default the method does nothing and the
331 previous result is passed back unchanged but any LU can define it if it
332 wants to use the local cluster hook-scripts somehow.
334 @param phase: one of L{constants.HOOKS_PHASE_POST} or
335 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
336 @param hook_results: the results of the multi-node hooks rpc call
337 @param feedback_fn: function used send feedback back to the caller
338 @param lu_result: the previous Exec result this LU had, or None
340 @return: the new Exec result, based on the previous result
344 # API must be kept, thus we ignore the unused argument and could
345 # be a function warnings
346 # pylint: disable=W0613,R0201
349 def _ExpandAndLockInstance(self):
350 """Helper function to expand and lock an instance.
352 Many LUs that work on an instance take its name in self.op.instance_name
353 and need to expand it and then declare the expanded name for locking. This
354 function does it, and then updates self.op.instance_name to the expanded
355 name. It also initializes needed_locks as a dict, if this hasn't been done
359 if self.needed_locks is None:
360 self.needed_locks = {}
362 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
363 "_ExpandAndLockInstance called with instance-level locks set"
364 self.op.instance_name = _ExpandInstanceName(self.cfg,
365 self.op.instance_name)
366 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
368 def _LockInstancesNodes(self, primary_only=False,
369 level=locking.LEVEL_NODE):
370 """Helper function to declare instances' nodes for locking.
372 This function should be called after locking one or more instances to lock
373 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
374 with all primary or secondary nodes for instances already locked and
375 present in self.needed_locks[locking.LEVEL_INSTANCE].
377 It should be called from DeclareLocks, and for safety only works if
378 self.recalculate_locks[locking.LEVEL_NODE] is set.
380 In the future it may grow parameters to just lock some instance's nodes, or
381 to just lock primaries or secondary nodes, if needed.
383 If should be called in DeclareLocks in a way similar to::
385 if level == locking.LEVEL_NODE:
386 self._LockInstancesNodes()
388 @type primary_only: boolean
389 @param primary_only: only lock primary nodes of locked instances
390 @param level: Which lock level to use for locking nodes
393 assert level in self.recalculate_locks, \
394 "_LockInstancesNodes helper function called with no nodes to recalculate"
396 # TODO: check if we're really been called with the instance locks held
398 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
399 # future we might want to have different behaviors depending on the value
400 # of self.recalculate_locks[locking.LEVEL_NODE]
402 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
403 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
404 wanted_nodes.append(instance.primary_node)
406 wanted_nodes.extend(instance.secondary_nodes)
408 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
409 self.needed_locks[level] = wanted_nodes
410 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
411 self.needed_locks[level].extend(wanted_nodes)
413 raise errors.ProgrammerError("Unknown recalculation mode")
415 del self.recalculate_locks[level]
418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
419 """Simple LU which runs no hooks.
421 This LU is intended as a parent for other LogicalUnits which will
422 run no hooks, in order to reduce duplicate code.
428 def BuildHooksEnv(self):
429 """Empty BuildHooksEnv for NoHooksLu.
431 This just raises an error.
434 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
436 def BuildHooksNodes(self):
437 """Empty BuildHooksNodes for NoHooksLU.
440 raise AssertionError("BuildHooksNodes called for NoHooksLU")
444 """Tasklet base class.
446 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
447 they can mix legacy code with tasklets. Locking needs to be done in the LU,
448 tasklets know nothing about locks.
450 Subclasses must follow these rules:
451 - Implement CheckPrereq
455 def __init__(self, lu):
462 def CheckPrereq(self):
463 """Check prerequisites for this tasklets.
465 This method should check whether the prerequisites for the execution of
466 this tasklet are fulfilled. It can do internode communication, but it
467 should be idempotent - no cluster or system changes are allowed.
469 The method should raise errors.OpPrereqError in case something is not
470 fulfilled. Its return value is ignored.
472 This method should also update all parameters to their canonical form if it
473 hasn't been done before.
478 def Exec(self, feedback_fn):
479 """Execute the tasklet.
481 This method should implement the actual work. It should raise
482 errors.OpExecError for failures that are somewhat dealt with in code, or
486 raise NotImplementedError
490 """Base for query utility classes.
493 #: Attribute holding field definitions
496 def __init__(self, qfilter, fields, use_locking):
497 """Initializes this class.
500 self.use_locking = use_locking
502 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
504 self.requested_data = self.query.RequestedData()
505 self.names = self.query.RequestedNames()
507 # Sort only if no names were requested
508 self.sort_by_name = not self.names
510 self.do_locking = None
513 def _GetNames(self, lu, all_names, lock_level):
514 """Helper function to determine names asked for in the query.
518 names = lu.owned_locks(lock_level)
522 if self.wanted == locking.ALL_SET:
523 assert not self.names
524 # caller didn't specify names, so ordering is not important
525 return utils.NiceSort(names)
527 # caller specified names and we must keep the same order
529 assert not self.do_locking or lu.glm.is_owned(lock_level)
531 missing = set(self.wanted).difference(names)
533 raise errors.OpExecError("Some items were removed before retrieving"
534 " their data: %s" % missing)
536 # Return expanded names
539 def ExpandNames(self, lu):
540 """Expand names for this query.
542 See L{LogicalUnit.ExpandNames}.
545 raise NotImplementedError()
547 def DeclareLocks(self, lu, level):
548 """Declare locks for this query.
550 See L{LogicalUnit.DeclareLocks}.
553 raise NotImplementedError()
555 def _GetQueryData(self, lu):
556 """Collects all data for this query.
558 @return: Query data object
561 raise NotImplementedError()
563 def NewStyleQuery(self, lu):
564 """Collect data and execute query.
567 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
568 sort_by_name=self.sort_by_name)
570 def OldStyleQuery(self, lu):
571 """Collect data and execute query.
574 return self.query.OldStyleQuery(self._GetQueryData(lu),
575 sort_by_name=self.sort_by_name)
579 """Returns a dict declaring all lock levels shared.
582 return dict.fromkeys(locking.LEVELS, 1)
585 def _MakeLegacyNodeInfo(data):
586 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
588 Converts the data into a single dictionary. This is fine for most use cases,
589 but some require information from more than one volume group or hypervisor.
592 (bootid, (vg_info, ), (hv_info, )) = data
594 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
599 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
600 """Checks if the owned node groups are still correct for an instance.
602 @type cfg: L{config.ConfigWriter}
603 @param cfg: The cluster configuration
604 @type instance_name: string
605 @param instance_name: Instance name
606 @type owned_groups: set or frozenset
607 @param owned_groups: List of currently owned node groups
610 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
612 if not owned_groups.issuperset(inst_groups):
613 raise errors.OpPrereqError("Instance %s's node groups changed since"
614 " locks were acquired, current groups are"
615 " are '%s', owning groups '%s'; retry the"
618 utils.CommaJoin(inst_groups),
619 utils.CommaJoin(owned_groups)),
625 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
626 """Checks if the instances in a node group are still correct.
628 @type cfg: L{config.ConfigWriter}
629 @param cfg: The cluster configuration
630 @type group_uuid: string
631 @param group_uuid: Node group UUID
632 @type owned_instances: set or frozenset
633 @param owned_instances: List of currently owned instances
636 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
637 if owned_instances != wanted_instances:
638 raise errors.OpPrereqError("Instances in node group '%s' changed since"
639 " locks were acquired, wanted '%s', have '%s';"
640 " retry the operation" %
642 utils.CommaJoin(wanted_instances),
643 utils.CommaJoin(owned_instances)),
646 return wanted_instances
649 def _SupportsOob(cfg, node):
650 """Tells if node supports OOB.
652 @type cfg: L{config.ConfigWriter}
653 @param cfg: The cluster configuration
654 @type node: L{objects.Node}
655 @param node: The node
656 @return: The OOB script if supported or an empty string otherwise
659 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
662 def _GetWantedNodes(lu, nodes):
663 """Returns list of checked and expanded node names.
665 @type lu: L{LogicalUnit}
666 @param lu: the logical unit on whose behalf we execute
668 @param nodes: list of node names or None for all nodes
670 @return: the list of nodes, sorted
671 @raise errors.ProgrammerError: if the nodes parameter is wrong type
675 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
677 return utils.NiceSort(lu.cfg.GetNodeList())
680 def _GetWantedInstances(lu, instances):
681 """Returns list of checked and expanded instance names.
683 @type lu: L{LogicalUnit}
684 @param lu: the logical unit on whose behalf we execute
685 @type instances: list
686 @param instances: list of instance names or None for all instances
688 @return: the list of instances, sorted
689 @raise errors.OpPrereqError: if the instances parameter is wrong type
690 @raise errors.OpPrereqError: if any of the passed instances is not found
694 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
696 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
700 def _GetUpdatedParams(old_params, update_dict,
701 use_default=True, use_none=False):
702 """Return the new version of a parameter dictionary.
704 @type old_params: dict
705 @param old_params: old parameters
706 @type update_dict: dict
707 @param update_dict: dict containing new parameter values, or
708 constants.VALUE_DEFAULT to reset the parameter to its default
710 @param use_default: boolean
711 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
712 values as 'to be deleted' values
713 @param use_none: boolean
714 @type use_none: whether to recognise C{None} values as 'to be
717 @return: the new parameter dictionary
720 params_copy = copy.deepcopy(old_params)
721 for key, val in update_dict.iteritems():
722 if ((use_default and val == constants.VALUE_DEFAULT) or
723 (use_none and val is None)):
729 params_copy[key] = val
733 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
734 """Return the new version of a instance policy.
736 @param group_policy: whether this policy applies to a group and thus
737 we should support removal of policy entries
740 use_none = use_default = group_policy
741 ipolicy = copy.deepcopy(old_ipolicy)
742 for key, value in new_ipolicy.items():
743 if key not in constants.IPOLICY_ALL_KEYS:
744 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
746 if key in constants.IPOLICY_ISPECS:
747 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
748 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
750 use_default=use_default)
752 if not value or value == [constants.VALUE_DEFAULT]:
756 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
757 " on the cluster'" % key,
760 if key in constants.IPOLICY_PARAMETERS:
761 # FIXME: we assume all such values are float
763 ipolicy[key] = float(value)
764 except (TypeError, ValueError), err:
765 raise errors.OpPrereqError("Invalid value for attribute"
766 " '%s': '%s', error: %s" %
767 (key, value, err), errors.ECODE_INVAL)
769 # FIXME: we assume all others are lists; this should be redone
771 ipolicy[key] = list(value)
773 objects.InstancePolicy.CheckParameterSyntax(ipolicy)
774 except errors.ConfigurationError, err:
775 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
780 def _UpdateAndVerifySubDict(base, updates, type_check):
781 """Updates and verifies a dict with sub dicts of the same type.
783 @param base: The dict with the old data
784 @param updates: The dict with the new data
785 @param type_check: Dict suitable to ForceDictType to verify correct types
786 @returns: A new dict with updated and verified values
790 new = _GetUpdatedParams(old, value)
791 utils.ForceDictType(new, type_check)
794 ret = copy.deepcopy(base)
795 ret.update(dict((key, fn(base.get(key, {}), value))
796 for key, value in updates.items()))
800 def _MergeAndVerifyHvState(op_input, obj_input):
801 """Combines the hv state from an opcode with the one of the object
803 @param op_input: The input dict from the opcode
804 @param obj_input: The input dict from the objects
805 @return: The verified and updated dict
809 invalid_hvs = set(op_input) - constants.HYPER_TYPES
811 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
812 " %s" % utils.CommaJoin(invalid_hvs),
814 if obj_input is None:
816 type_check = constants.HVSTS_PARAMETER_TYPES
817 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
822 def _MergeAndVerifyDiskState(op_input, obj_input):
823 """Combines the disk state from an opcode with the one of the object
825 @param op_input: The input dict from the opcode
826 @param obj_input: The input dict from the objects
827 @return: The verified and updated dict
830 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
832 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
833 utils.CommaJoin(invalid_dst),
835 type_check = constants.DSS_PARAMETER_TYPES
836 if obj_input is None:
838 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
840 for key, value in op_input.items())
845 def _ReleaseLocks(lu, level, names=None, keep=None):
846 """Releases locks owned by an LU.
848 @type lu: L{LogicalUnit}
849 @param level: Lock level
850 @type names: list or None
851 @param names: Names of locks to release
852 @type keep: list or None
853 @param keep: Names of locks to retain
856 assert not (keep is not None and names is not None), \
857 "Only one of the 'names' and the 'keep' parameters can be given"
859 if names is not None:
860 should_release = names.__contains__
862 should_release = lambda name: name not in keep
864 should_release = None
866 owned = lu.owned_locks(level)
868 # Not owning any lock at this level, do nothing
875 # Determine which locks to release
877 if should_release(name):
882 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
884 # Release just some locks
885 lu.glm.release(level, names=release)
887 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
890 lu.glm.release(level)
892 assert not lu.glm.is_owned(level), "No locks should be owned"
895 def _MapInstanceDisksToNodes(instances):
896 """Creates a map from (node, volume) to instance name.
898 @type instances: list of L{objects.Instance}
899 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
902 return dict(((node, vol), inst.name)
903 for inst in instances
904 for (node, vols) in inst.MapLVsByNode().items()
908 def _RunPostHook(lu, node_name):
909 """Runs the post-hook for an opcode on a single node.
912 hm = lu.proc.BuildHooksManager(lu)
914 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
916 # pylint: disable=W0702
917 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
920 def _CheckOutputFields(static, dynamic, selected):
921 """Checks whether all selected fields are valid.
923 @type static: L{utils.FieldSet}
924 @param static: static fields set
925 @type dynamic: L{utils.FieldSet}
926 @param dynamic: dynamic fields set
933 delta = f.NonMatching(selected)
935 raise errors.OpPrereqError("Unknown output fields selected: %s"
936 % ",".join(delta), errors.ECODE_INVAL)
939 def _CheckGlobalHvParams(params):
940 """Validates that given hypervisor params are not global ones.
942 This will ensure that instances don't get customised versions of
946 used_globals = constants.HVC_GLOBALS.intersection(params)
948 msg = ("The following hypervisor parameters are global and cannot"
949 " be customized at instance level, please modify them at"
950 " cluster level: %s" % utils.CommaJoin(used_globals))
951 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
954 def _CheckNodeOnline(lu, node, msg=None):
955 """Ensure that a given node is online.
957 @param lu: the LU on behalf of which we make the check
958 @param node: the node to check
959 @param msg: if passed, should be a message to replace the default one
960 @raise errors.OpPrereqError: if the node is offline
964 msg = "Can't use offline node"
965 if lu.cfg.GetNodeInfo(node).offline:
966 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
969 def _CheckNodeNotDrained(lu, node):
970 """Ensure that a given node is not drained.
972 @param lu: the LU on behalf of which we make the check
973 @param node: the node to check
974 @raise errors.OpPrereqError: if the node is drained
977 if lu.cfg.GetNodeInfo(node).drained:
978 raise errors.OpPrereqError("Can't use drained node %s" % node,
982 def _CheckNodeVmCapable(lu, node):
983 """Ensure that a given node is vm capable.
985 @param lu: the LU on behalf of which we make the check
986 @param node: the node to check
987 @raise errors.OpPrereqError: if the node is not vm capable
990 if not lu.cfg.GetNodeInfo(node).vm_capable:
991 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
995 def _CheckNodeHasOS(lu, node, os_name, force_variant):
996 """Ensure that a node supports a given OS.
998 @param lu: the LU on behalf of which we make the check
999 @param node: the node to check
1000 @param os_name: the OS to query about
1001 @param force_variant: whether to ignore variant errors
1002 @raise errors.OpPrereqError: if the node is not supporting the OS
1005 result = lu.rpc.call_os_get(node, os_name)
1006 result.Raise("OS '%s' not in supported OS list for node %s" %
1008 prereq=True, ecode=errors.ECODE_INVAL)
1009 if not force_variant:
1010 _CheckOSVariant(result.payload, os_name)
1013 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1014 """Ensure that a node has the given secondary ip.
1016 @type lu: L{LogicalUnit}
1017 @param lu: the LU on behalf of which we make the check
1019 @param node: the node to check
1020 @type secondary_ip: string
1021 @param secondary_ip: the ip to check
1022 @type prereq: boolean
1023 @param prereq: whether to throw a prerequisite or an execute error
1024 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1025 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1028 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1029 result.Raise("Failure checking secondary ip on node %s" % node,
1030 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1031 if not result.payload:
1032 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1033 " please fix and re-run this command" % secondary_ip)
1035 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1037 raise errors.OpExecError(msg)
1040 def _GetClusterDomainSecret():
1041 """Reads the cluster domain secret.
1044 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1048 def _CheckInstanceState(lu, instance, req_states, msg=None):
1049 """Ensure that an instance is in one of the required states.
1051 @param lu: the LU on behalf of which we make the check
1052 @param instance: the instance to check
1053 @param msg: if passed, should be a message to replace the default one
1054 @raise errors.OpPrereqError: if the instance is not in the required state
1058 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1059 if instance.admin_state not in req_states:
1060 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1061 (instance.name, instance.admin_state, msg),
1064 if constants.ADMINST_UP not in req_states:
1065 pnode = instance.primary_node
1066 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1067 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1068 prereq=True, ecode=errors.ECODE_ENVIRON)
1070 if instance.name in ins_l.payload:
1071 raise errors.OpPrereqError("Instance %s is running, %s" %
1072 (instance.name, msg), errors.ECODE_STATE)
1075 def _ComputeMinMaxSpec(name, ipolicy, value):
1076 """Computes if value is in the desired range.
1078 @param name: name of the parameter for which we perform the check
1079 @param ipolicy: dictionary containing min, max and std values
1080 @param value: actual value that we want to use
1081 @return: None or element not meeting the criteria
1085 if value in [None, constants.VALUE_AUTO]:
1087 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1088 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1089 if value > max_v or min_v > value:
1090 return ("%s value %s is not in range [%s, %s]" %
1091 (name, value, min_v, max_v))
1095 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1096 nic_count, disk_sizes,
1097 _compute_fn=_ComputeMinMaxSpec):
1098 """Verifies ipolicy against provided specs.
1101 @param ipolicy: The ipolicy
1103 @param mem_size: The memory size
1104 @type cpu_count: int
1105 @param cpu_count: Used cpu cores
1106 @type disk_count: int
1107 @param disk_count: Number of disks used
1108 @type nic_count: int
1109 @param nic_count: Number of nics used
1110 @type disk_sizes: list of ints
1111 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1112 @param _compute_fn: The compute function (unittest only)
1113 @return: A list of violations, or an empty list of no violations are found
1116 assert disk_count == len(disk_sizes)
1119 (constants.ISPEC_MEM_SIZE, mem_size),
1120 (constants.ISPEC_CPU_COUNT, cpu_count),
1121 (constants.ISPEC_DISK_COUNT, disk_count),
1122 (constants.ISPEC_NIC_COUNT, nic_count),
1123 ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
1126 (_compute_fn(name, ipolicy, value)
1127 for (name, value) in test_settings))
1130 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1131 _compute_fn=_ComputeIPolicySpecViolation):
1132 """Compute if instance meets the specs of ipolicy.
1135 @param ipolicy: The ipolicy to verify against
1136 @type instance: L{objects.Instance}
1137 @param instance: The instance to verify
1138 @param _compute_fn: The function to verify ipolicy (unittest only)
1139 @see: L{_ComputeIPolicySpecViolation}
1142 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1143 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1144 disk_count = len(instance.disks)
1145 disk_sizes = [disk.size for disk in instance.disks]
1146 nic_count = len(instance.nics)
1148 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1152 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1153 _compute_fn=_ComputeIPolicySpecViolation):
1154 """Compute if instance specs meets the specs of ipolicy.
1157 @param ipolicy: The ipolicy to verify against
1158 @param instance_spec: dict
1159 @param instance_spec: The instance spec to verify
1160 @param _compute_fn: The function to verify ipolicy (unittest only)
1161 @see: L{_ComputeIPolicySpecViolation}
1164 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1165 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1166 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1167 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1168 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1170 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1174 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1176 _compute_fn=_ComputeIPolicyInstanceViolation):
1177 """Compute if instance meets the specs of the new target group.
1179 @param ipolicy: The ipolicy to verify
1180 @param instance: The instance object to verify
1181 @param current_group: The current group of the instance
1182 @param target_group: The new group of the instance
1183 @param _compute_fn: The function to verify ipolicy (unittest only)
1184 @see: L{_ComputeIPolicySpecViolation}
1187 if current_group == target_group:
1190 return _compute_fn(ipolicy, instance)
1193 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1194 _compute_fn=_ComputeIPolicyNodeViolation):
1195 """Checks that the target node is correct in terms of instance policy.
1197 @param ipolicy: The ipolicy to verify
1198 @param instance: The instance object to verify
1199 @param node: The new node to relocate
1200 @param ignore: Ignore violations of the ipolicy
1201 @param _compute_fn: The function to verify ipolicy (unittest only)
1202 @see: L{_ComputeIPolicySpecViolation}
1205 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1206 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1209 msg = ("Instance does not meet target node group's (%s) instance"
1210 " policy: %s") % (node.group, utils.CommaJoin(res))
1214 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1217 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1218 """Computes a set of any instances that would violate the new ipolicy.
1220 @param old_ipolicy: The current (still in-place) ipolicy
1221 @param new_ipolicy: The new (to become) ipolicy
1222 @param instances: List of instances to verify
1223 @return: A list of instances which violates the new ipolicy but did not before
1226 return (_ComputeViolatingInstances(old_ipolicy, instances) -
1227 _ComputeViolatingInstances(new_ipolicy, instances))
1230 def _ExpandItemName(fn, name, kind):
1231 """Expand an item name.
1233 @param fn: the function to use for expansion
1234 @param name: requested item name
1235 @param kind: text description ('Node' or 'Instance')
1236 @return: the resolved (full) name
1237 @raise errors.OpPrereqError: if the item is not found
1240 full_name = fn(name)
1241 if full_name is None:
1242 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1247 def _ExpandNodeName(cfg, name):
1248 """Wrapper over L{_ExpandItemName} for nodes."""
1249 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1252 def _ExpandInstanceName(cfg, name):
1253 """Wrapper over L{_ExpandItemName} for instance."""
1254 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1257 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1258 minmem, maxmem, vcpus, nics, disk_template, disks,
1259 bep, hvp, hypervisor_name, tags):
1260 """Builds instance related env variables for hooks
1262 This builds the hook environment from individual variables.
1265 @param name: the name of the instance
1266 @type primary_node: string
1267 @param primary_node: the name of the instance's primary node
1268 @type secondary_nodes: list
1269 @param secondary_nodes: list of secondary nodes as strings
1270 @type os_type: string
1271 @param os_type: the name of the instance's OS
1272 @type status: string
1273 @param status: the desired status of the instance
1274 @type minmem: string
1275 @param minmem: the minimum memory size of the instance
1276 @type maxmem: string
1277 @param maxmem: the maximum memory size of the instance
1279 @param vcpus: the count of VCPUs the instance has
1281 @param nics: list of tuples (ip, mac, mode, link) representing
1282 the NICs the instance has
1283 @type disk_template: string
1284 @param disk_template: the disk template of the instance
1286 @param disks: the list of (size, mode) pairs
1288 @param bep: the backend parameters for the instance
1290 @param hvp: the hypervisor parameters for the instance
1291 @type hypervisor_name: string
1292 @param hypervisor_name: the hypervisor for the instance
1294 @param tags: list of instance tags as strings
1296 @return: the hook environment for this instance
1301 "INSTANCE_NAME": name,
1302 "INSTANCE_PRIMARY": primary_node,
1303 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1304 "INSTANCE_OS_TYPE": os_type,
1305 "INSTANCE_STATUS": status,
1306 "INSTANCE_MINMEM": minmem,
1307 "INSTANCE_MAXMEM": maxmem,
1308 # TODO(2.7) remove deprecated "memory" value
1309 "INSTANCE_MEMORY": maxmem,
1310 "INSTANCE_VCPUS": vcpus,
1311 "INSTANCE_DISK_TEMPLATE": disk_template,
1312 "INSTANCE_HYPERVISOR": hypervisor_name,
1315 nic_count = len(nics)
1316 for idx, (ip, mac, mode, link) in enumerate(nics):
1319 env["INSTANCE_NIC%d_IP" % idx] = ip
1320 env["INSTANCE_NIC%d_MAC" % idx] = mac
1321 env["INSTANCE_NIC%d_MODE" % idx] = mode
1322 env["INSTANCE_NIC%d_LINK" % idx] = link
1323 if mode == constants.NIC_MODE_BRIDGED:
1324 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1328 env["INSTANCE_NIC_COUNT"] = nic_count
1331 disk_count = len(disks)
1332 for idx, (size, mode) in enumerate(disks):
1333 env["INSTANCE_DISK%d_SIZE" % idx] = size
1334 env["INSTANCE_DISK%d_MODE" % idx] = mode
1338 env["INSTANCE_DISK_COUNT"] = disk_count
1343 env["INSTANCE_TAGS"] = " ".join(tags)
1345 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1346 for key, value in source.items():
1347 env["INSTANCE_%s_%s" % (kind, key)] = value
1352 def _NICListToTuple(lu, nics):
1353 """Build a list of nic information tuples.
1355 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1356 value in LUInstanceQueryData.
1358 @type lu: L{LogicalUnit}
1359 @param lu: the logical unit on whose behalf we execute
1360 @type nics: list of L{objects.NIC}
1361 @param nics: list of nics to convert to hooks tuples
1365 cluster = lu.cfg.GetClusterInfo()
1369 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1370 mode = filled_params[constants.NIC_MODE]
1371 link = filled_params[constants.NIC_LINK]
1372 hooks_nics.append((ip, mac, mode, link))
1376 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1377 """Builds instance related env variables for hooks from an object.
1379 @type lu: L{LogicalUnit}
1380 @param lu: the logical unit on whose behalf we execute
1381 @type instance: L{objects.Instance}
1382 @param instance: the instance for which we should build the
1384 @type override: dict
1385 @param override: dictionary with key/values that will override
1388 @return: the hook environment dictionary
1391 cluster = lu.cfg.GetClusterInfo()
1392 bep = cluster.FillBE(instance)
1393 hvp = cluster.FillHV(instance)
1395 "name": instance.name,
1396 "primary_node": instance.primary_node,
1397 "secondary_nodes": instance.secondary_nodes,
1398 "os_type": instance.os,
1399 "status": instance.admin_state,
1400 "maxmem": bep[constants.BE_MAXMEM],
1401 "minmem": bep[constants.BE_MINMEM],
1402 "vcpus": bep[constants.BE_VCPUS],
1403 "nics": _NICListToTuple(lu, instance.nics),
1404 "disk_template": instance.disk_template,
1405 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1408 "hypervisor_name": instance.hypervisor,
1409 "tags": instance.tags,
1412 args.update(override)
1413 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1416 def _AdjustCandidatePool(lu, exceptions):
1417 """Adjust the candidate pool after node operations.
1420 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1422 lu.LogInfo("Promoted nodes to master candidate role: %s",
1423 utils.CommaJoin(node.name for node in mod_list))
1424 for name in mod_list:
1425 lu.context.ReaddNode(name)
1426 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1428 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1432 def _DecideSelfPromotion(lu, exceptions=None):
1433 """Decide whether I should promote myself as a master candidate.
1436 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1437 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1438 # the new node will increase mc_max with one, so:
1439 mc_should = min(mc_should + 1, cp_size)
1440 return mc_now < mc_should
1443 def _CalculateGroupIPolicy(cluster, group):
1444 """Calculate instance policy for group.
1447 return cluster.SimpleFillIPolicy(group.ipolicy)
1450 def _ComputeViolatingInstances(ipolicy, instances):
1451 """Computes a set of instances who violates given ipolicy.
1453 @param ipolicy: The ipolicy to verify
1454 @type instances: object.Instance
1455 @param instances: List of instances to verify
1456 @return: A frozenset of instance names violating the ipolicy
1459 return frozenset([inst.name for inst in instances
1460 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1463 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1464 """Check that the brigdes needed by a list of nics exist.
1467 cluster = lu.cfg.GetClusterInfo()
1468 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1469 brlist = [params[constants.NIC_LINK] for params in paramslist
1470 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1472 result = lu.rpc.call_bridges_exist(target_node, brlist)
1473 result.Raise("Error checking bridges on destination node '%s'" %
1474 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1477 def _CheckInstanceBridgesExist(lu, instance, node=None):
1478 """Check that the brigdes needed by an instance exist.
1482 node = instance.primary_node
1483 _CheckNicsBridgesExist(lu, instance.nics, node)
1486 def _CheckOSVariant(os_obj, name):
1487 """Check whether an OS name conforms to the os variants specification.
1489 @type os_obj: L{objects.OS}
1490 @param os_obj: OS object to check
1492 @param name: OS name passed by the user, to check for validity
1495 variant = objects.OS.GetVariant(name)
1496 if not os_obj.supported_variants:
1498 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1499 " passed)" % (os_obj.name, variant),
1503 raise errors.OpPrereqError("OS name must include a variant",
1506 if variant not in os_obj.supported_variants:
1507 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1510 def _GetNodeInstancesInner(cfg, fn):
1511 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1514 def _GetNodeInstances(cfg, node_name):
1515 """Returns a list of all primary and secondary instances on a node.
1519 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1522 def _GetNodePrimaryInstances(cfg, node_name):
1523 """Returns primary instances on a node.
1526 return _GetNodeInstancesInner(cfg,
1527 lambda inst: node_name == inst.primary_node)
1530 def _GetNodeSecondaryInstances(cfg, node_name):
1531 """Returns secondary instances on a node.
1534 return _GetNodeInstancesInner(cfg,
1535 lambda inst: node_name in inst.secondary_nodes)
1538 def _GetStorageTypeArgs(cfg, storage_type):
1539 """Returns the arguments for a storage type.
1542 # Special case for file storage
1543 if storage_type == constants.ST_FILE:
1544 # storage.FileStorage wants a list of storage directories
1545 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1550 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1553 for dev in instance.disks:
1554 cfg.SetDiskID(dev, node_name)
1556 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1557 result.Raise("Failed to get disk status from node %s" % node_name,
1558 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1560 for idx, bdev_status in enumerate(result.payload):
1561 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1567 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1568 """Check the sanity of iallocator and node arguments and use the
1569 cluster-wide iallocator if appropriate.
1571 Check that at most one of (iallocator, node) is specified. If none is
1572 specified, then the LU's opcode's iallocator slot is filled with the
1573 cluster-wide default iallocator.
1575 @type iallocator_slot: string
1576 @param iallocator_slot: the name of the opcode iallocator slot
1577 @type node_slot: string
1578 @param node_slot: the name of the opcode target node slot
1581 node = getattr(lu.op, node_slot, None)
1582 iallocator = getattr(lu.op, iallocator_slot, None)
1584 if node is not None and iallocator is not None:
1585 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1587 elif node is None and iallocator is None:
1588 default_iallocator = lu.cfg.GetDefaultIAllocator()
1589 if default_iallocator:
1590 setattr(lu.op, iallocator_slot, default_iallocator)
1592 raise errors.OpPrereqError("No iallocator or node given and no"
1593 " cluster-wide default iallocator found;"
1594 " please specify either an iallocator or a"
1595 " node, or set a cluster-wide default"
1599 def _GetDefaultIAllocator(cfg, iallocator):
1600 """Decides on which iallocator to use.
1602 @type cfg: L{config.ConfigWriter}
1603 @param cfg: Cluster configuration object
1604 @type iallocator: string or None
1605 @param iallocator: Iallocator specified in opcode
1607 @return: Iallocator name
1611 # Use default iallocator
1612 iallocator = cfg.GetDefaultIAllocator()
1615 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1616 " opcode nor as a cluster-wide default",
1622 class LUClusterPostInit(LogicalUnit):
1623 """Logical unit for running hooks after cluster initialization.
1626 HPATH = "cluster-init"
1627 HTYPE = constants.HTYPE_CLUSTER
1629 def BuildHooksEnv(self):
1634 "OP_TARGET": self.cfg.GetClusterName(),
1637 def BuildHooksNodes(self):
1638 """Build hooks nodes.
1641 return ([], [self.cfg.GetMasterNode()])
1643 def Exec(self, feedback_fn):
1650 class LUClusterDestroy(LogicalUnit):
1651 """Logical unit for destroying the cluster.
1654 HPATH = "cluster-destroy"
1655 HTYPE = constants.HTYPE_CLUSTER
1657 def BuildHooksEnv(self):
1662 "OP_TARGET": self.cfg.GetClusterName(),
1665 def BuildHooksNodes(self):
1666 """Build hooks nodes.
1671 def CheckPrereq(self):
1672 """Check prerequisites.
1674 This checks whether the cluster is empty.
1676 Any errors are signaled by raising errors.OpPrereqError.
1679 master = self.cfg.GetMasterNode()
1681 nodelist = self.cfg.GetNodeList()
1682 if len(nodelist) != 1 or nodelist[0] != master:
1683 raise errors.OpPrereqError("There are still %d node(s) in"
1684 " this cluster." % (len(nodelist) - 1),
1686 instancelist = self.cfg.GetInstanceList()
1688 raise errors.OpPrereqError("There are still %d instance(s) in"
1689 " this cluster." % len(instancelist),
1692 def Exec(self, feedback_fn):
1693 """Destroys the cluster.
1696 master_params = self.cfg.GetMasterNetworkParameters()
1698 # Run post hooks on master node before it's removed
1699 _RunPostHook(self, master_params.name)
1701 ems = self.cfg.GetUseExternalMipScript()
1702 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1705 self.LogWarning("Error disabling the master IP address: %s",
1708 return master_params.name
1711 def _VerifyCertificate(filename):
1712 """Verifies a certificate for L{LUClusterVerifyConfig}.
1714 @type filename: string
1715 @param filename: Path to PEM file
1719 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1720 utils.ReadFile(filename))
1721 except Exception, err: # pylint: disable=W0703
1722 return (LUClusterVerifyConfig.ETYPE_ERROR,
1723 "Failed to load X509 certificate %s: %s" % (filename, err))
1726 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1727 constants.SSL_CERT_EXPIRATION_ERROR)
1730 fnamemsg = "While verifying %s: %s" % (filename, msg)
1735 return (None, fnamemsg)
1736 elif errcode == utils.CERT_WARNING:
1737 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1738 elif errcode == utils.CERT_ERROR:
1739 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1741 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1744 def _GetAllHypervisorParameters(cluster, instances):
1745 """Compute the set of all hypervisor parameters.
1747 @type cluster: L{objects.Cluster}
1748 @param cluster: the cluster object
1749 @param instances: list of L{objects.Instance}
1750 @param instances: additional instances from which to obtain parameters
1751 @rtype: list of (origin, hypervisor, parameters)
1752 @return: a list with all parameters found, indicating the hypervisor they
1753 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1758 for hv_name in cluster.enabled_hypervisors:
1759 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1761 for os_name, os_hvp in cluster.os_hvp.items():
1762 for hv_name, hv_params in os_hvp.items():
1764 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1765 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1767 # TODO: collapse identical parameter values in a single one
1768 for instance in instances:
1769 if instance.hvparams:
1770 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1771 cluster.FillHV(instance)))
1776 class _VerifyErrors(object):
1777 """Mix-in for cluster/group verify LUs.
1779 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1780 self.op and self._feedback_fn to be available.)
1784 ETYPE_FIELD = "code"
1785 ETYPE_ERROR = "ERROR"
1786 ETYPE_WARNING = "WARNING"
1788 def _Error(self, ecode, item, msg, *args, **kwargs):
1789 """Format an error message.
1791 Based on the opcode's error_codes parameter, either format a
1792 parseable error code, or a simpler error string.
1794 This must be called only from Exec and functions called from Exec.
1797 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1798 itype, etxt, _ = ecode
1799 # first complete the msg
1802 # then format the whole message
1803 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1804 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1810 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1811 # and finally report it via the feedback_fn
1812 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1814 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1815 """Log an error message if the passed condition is True.
1819 or self.op.debug_simulate_errors) # pylint: disable=E1101
1821 # If the error code is in the list of ignored errors, demote the error to a
1823 (_, etxt, _) = ecode
1824 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1825 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1828 self._Error(ecode, *args, **kwargs)
1830 # do not mark the operation as failed for WARN cases only
1831 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1832 self.bad = self.bad or cond
1835 class LUClusterVerify(NoHooksLU):
1836 """Submits all jobs necessary to verify the cluster.
1841 def ExpandNames(self):
1842 self.needed_locks = {}
1844 def Exec(self, feedback_fn):
1847 if self.op.group_name:
1848 groups = [self.op.group_name]
1849 depends_fn = lambda: None
1851 groups = self.cfg.GetNodeGroupList()
1853 # Verify global configuration
1855 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1858 # Always depend on global verification
1859 depends_fn = lambda: [(-len(jobs), [])]
1861 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1862 ignore_errors=self.op.ignore_errors,
1863 depends=depends_fn())]
1864 for group in groups)
1866 # Fix up all parameters
1867 for op in itertools.chain(*jobs): # pylint: disable=W0142
1868 op.debug_simulate_errors = self.op.debug_simulate_errors
1869 op.verbose = self.op.verbose
1870 op.error_codes = self.op.error_codes
1872 op.skip_checks = self.op.skip_checks
1873 except AttributeError:
1874 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1876 return ResultWithJobs(jobs)
1879 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1880 """Verifies the cluster config.
1885 def _VerifyHVP(self, hvp_data):
1886 """Verifies locally the syntax of the hypervisor parameters.
1889 for item, hv_name, hv_params in hvp_data:
1890 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1893 hv_class = hypervisor.GetHypervisor(hv_name)
1894 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1895 hv_class.CheckParameterSyntax(hv_params)
1896 except errors.GenericError, err:
1897 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1899 def ExpandNames(self):
1900 # Information can be safely retrieved as the BGL is acquired in exclusive
1902 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1903 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1904 self.all_node_info = self.cfg.GetAllNodesInfo()
1905 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1906 self.needed_locks = {}
1908 def Exec(self, feedback_fn):
1909 """Verify integrity of cluster, performing various test on nodes.
1913 self._feedback_fn = feedback_fn
1915 feedback_fn("* Verifying cluster config")
1917 for msg in self.cfg.VerifyConfig():
1918 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1920 feedback_fn("* Verifying cluster certificate files")
1922 for cert_filename in constants.ALL_CERT_FILES:
1923 (errcode, msg) = _VerifyCertificate(cert_filename)
1924 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1926 feedback_fn("* Verifying hypervisor parameters")
1928 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1929 self.all_inst_info.values()))
1931 feedback_fn("* Verifying all nodes belong to an existing group")
1933 # We do this verification here because, should this bogus circumstance
1934 # occur, it would never be caught by VerifyGroup, which only acts on
1935 # nodes/instances reachable from existing node groups.
1937 dangling_nodes = set(node.name for node in self.all_node_info.values()
1938 if node.group not in self.all_group_info)
1940 dangling_instances = {}
1941 no_node_instances = []
1943 for inst in self.all_inst_info.values():
1944 if inst.primary_node in dangling_nodes:
1945 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1946 elif inst.primary_node not in self.all_node_info:
1947 no_node_instances.append(inst.name)
1952 utils.CommaJoin(dangling_instances.get(node.name,
1954 for node in dangling_nodes]
1956 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1958 "the following nodes (and their instances) belong to a non"
1959 " existing group: %s", utils.CommaJoin(pretty_dangling))
1961 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1963 "the following instances have a non-existing primary-node:"
1964 " %s", utils.CommaJoin(no_node_instances))
1969 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1970 """Verifies the status of a node group.
1973 HPATH = "cluster-verify"
1974 HTYPE = constants.HTYPE_CLUSTER
1977 _HOOKS_INDENT_RE = re.compile("^", re.M)
1979 class NodeImage(object):
1980 """A class representing the logical and physical status of a node.
1983 @ivar name: the node name to which this object refers
1984 @ivar volumes: a structure as returned from
1985 L{ganeti.backend.GetVolumeList} (runtime)
1986 @ivar instances: a list of running instances (runtime)
1987 @ivar pinst: list of configured primary instances (config)
1988 @ivar sinst: list of configured secondary instances (config)
1989 @ivar sbp: dictionary of {primary-node: list of instances} for all
1990 instances for which this node is secondary (config)
1991 @ivar mfree: free memory, as reported by hypervisor (runtime)
1992 @ivar dfree: free disk, as reported by the node (runtime)
1993 @ivar offline: the offline status (config)
1994 @type rpc_fail: boolean
1995 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1996 not whether the individual keys were correct) (runtime)
1997 @type lvm_fail: boolean
1998 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1999 @type hyp_fail: boolean
2000 @ivar hyp_fail: whether the RPC call didn't return the instance list
2001 @type ghost: boolean
2002 @ivar ghost: whether this is a known node or not (config)
2003 @type os_fail: boolean
2004 @ivar os_fail: whether the RPC call didn't return valid OS data
2006 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2007 @type vm_capable: boolean
2008 @ivar vm_capable: whether the node can host instances
2011 def __init__(self, offline=False, name=None, vm_capable=True):
2020 self.offline = offline
2021 self.vm_capable = vm_capable
2022 self.rpc_fail = False
2023 self.lvm_fail = False
2024 self.hyp_fail = False
2026 self.os_fail = False
2029 def ExpandNames(self):
2030 # This raises errors.OpPrereqError on its own:
2031 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2033 # Get instances in node group; this is unsafe and needs verification later
2034 inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
2036 self.needed_locks = {
2037 locking.LEVEL_INSTANCE: inst_names,
2038 locking.LEVEL_NODEGROUP: [self.group_uuid],
2039 locking.LEVEL_NODE: [],
2042 self.share_locks = _ShareAll()
2044 def DeclareLocks(self, level):
2045 if level == locking.LEVEL_NODE:
2046 # Get members of node group; this is unsafe and needs verification later
2047 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2049 all_inst_info = self.cfg.GetAllInstancesInfo()
2051 # In Exec(), we warn about mirrored instances that have primary and
2052 # secondary living in separate node groups. To fully verify that
2053 # volumes for these instances are healthy, we will need to do an
2054 # extra call to their secondaries. We ensure here those nodes will
2056 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2057 # Important: access only the instances whose lock is owned
2058 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2059 nodes.update(all_inst_info[inst].secondary_nodes)
2061 self.needed_locks[locking.LEVEL_NODE] = nodes
2063 def CheckPrereq(self):
2064 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2065 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2067 group_nodes = set(self.group_info.members)
2068 group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
2071 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2073 unlocked_instances = \
2074 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2077 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2078 utils.CommaJoin(unlocked_nodes))
2080 if unlocked_instances:
2081 raise errors.OpPrereqError("Missing lock for instances: %s" %
2082 utils.CommaJoin(unlocked_instances))
2084 self.all_node_info = self.cfg.GetAllNodesInfo()
2085 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2087 self.my_node_names = utils.NiceSort(group_nodes)
2088 self.my_inst_names = utils.NiceSort(group_instances)
2090 self.my_node_info = dict((name, self.all_node_info[name])
2091 for name in self.my_node_names)
2093 self.my_inst_info = dict((name, self.all_inst_info[name])
2094 for name in self.my_inst_names)
2096 # We detect here the nodes that will need the extra RPC calls for verifying
2097 # split LV volumes; they should be locked.
2098 extra_lv_nodes = set()
2100 for inst in self.my_inst_info.values():
2101 if inst.disk_template in constants.DTS_INT_MIRROR:
2102 group = self.my_node_info[inst.primary_node].group
2103 for nname in inst.secondary_nodes:
2104 if self.all_node_info[nname].group != group:
2105 extra_lv_nodes.add(nname)
2107 unlocked_lv_nodes = \
2108 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2110 if unlocked_lv_nodes:
2111 raise errors.OpPrereqError("these nodes could be locked: %s" %
2112 utils.CommaJoin(unlocked_lv_nodes))
2113 self.extra_lv_nodes = list(extra_lv_nodes)
2115 def _VerifyNode(self, ninfo, nresult):
2116 """Perform some basic validation on data returned from a node.
2118 - check the result data structure is well formed and has all the
2120 - check ganeti version
2122 @type ninfo: L{objects.Node}
2123 @param ninfo: the node to check
2124 @param nresult: the results from the node
2126 @return: whether overall this call was successful (and we can expect
2127 reasonable values in the respose)
2131 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2133 # main result, nresult should be a non-empty dict
2134 test = not nresult or not isinstance(nresult, dict)
2135 _ErrorIf(test, constants.CV_ENODERPC, node,
2136 "unable to verify node: no data returned")
2140 # compares ganeti version
2141 local_version = constants.PROTOCOL_VERSION
2142 remote_version = nresult.get("version", None)
2143 test = not (remote_version and
2144 isinstance(remote_version, (list, tuple)) and
2145 len(remote_version) == 2)
2146 _ErrorIf(test, constants.CV_ENODERPC, node,
2147 "connection to node returned invalid data")
2151 test = local_version != remote_version[0]
2152 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2153 "incompatible protocol versions: master %s,"
2154 " node %s", local_version, remote_version[0])
2158 # node seems compatible, we can actually try to look into its results
2160 # full package version
2161 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2162 constants.CV_ENODEVERSION, node,
2163 "software version mismatch: master %s, node %s",
2164 constants.RELEASE_VERSION, remote_version[1],
2165 code=self.ETYPE_WARNING)
2167 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2168 if ninfo.vm_capable and isinstance(hyp_result, dict):
2169 for hv_name, hv_result in hyp_result.iteritems():
2170 test = hv_result is not None
2171 _ErrorIf(test, constants.CV_ENODEHV, node,
2172 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2174 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2175 if ninfo.vm_capable and isinstance(hvp_result, list):
2176 for item, hv_name, hv_result in hvp_result:
2177 _ErrorIf(True, constants.CV_ENODEHV, node,
2178 "hypervisor %s parameter verify failure (source %s): %s",
2179 hv_name, item, hv_result)
2181 test = nresult.get(constants.NV_NODESETUP,
2182 ["Missing NODESETUP results"])
2183 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2188 def _VerifyNodeTime(self, ninfo, nresult,
2189 nvinfo_starttime, nvinfo_endtime):
2190 """Check the node time.
2192 @type ninfo: L{objects.Node}
2193 @param ninfo: the node to check
2194 @param nresult: the remote results for the node
2195 @param nvinfo_starttime: the start time of the RPC call
2196 @param nvinfo_endtime: the end time of the RPC call
2200 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2202 ntime = nresult.get(constants.NV_TIME, None)
2204 ntime_merged = utils.MergeTime(ntime)
2205 except (ValueError, TypeError):
2206 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2209 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2210 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2211 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2212 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2216 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2217 "Node time diverges by at least %s from master node time",
2220 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2221 """Check the node LVM results.
2223 @type ninfo: L{objects.Node}
2224 @param ninfo: the node to check
2225 @param nresult: the remote results for the node
2226 @param vg_name: the configured VG name
2233 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2235 # checks vg existence and size > 20G
2236 vglist = nresult.get(constants.NV_VGLIST, None)
2238 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2240 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2241 constants.MIN_VG_SIZE)
2242 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2245 pvlist = nresult.get(constants.NV_PVLIST, None)
2246 test = pvlist is None
2247 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2249 # check that ':' is not present in PV names, since it's a
2250 # special character for lvcreate (denotes the range of PEs to
2252 for _, pvname, owner_vg in pvlist:
2253 test = ":" in pvname
2254 _ErrorIf(test, constants.CV_ENODELVM, node,
2255 "Invalid character ':' in PV '%s' of VG '%s'",
2258 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2259 """Check the node bridges.
2261 @type ninfo: L{objects.Node}
2262 @param ninfo: the node to check
2263 @param nresult: the remote results for the node
2264 @param bridges: the expected list of bridges
2271 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2273 missing = nresult.get(constants.NV_BRIDGES, None)
2274 test = not isinstance(missing, list)
2275 _ErrorIf(test, constants.CV_ENODENET, node,
2276 "did not return valid bridge information")
2278 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2279 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2281 def _VerifyNodeUserScripts(self, ninfo, nresult):
2282 """Check the results of user scripts presence and executability on the node
2284 @type ninfo: L{objects.Node}
2285 @param ninfo: the node to check
2286 @param nresult: the remote results for the node
2291 test = not constants.NV_USERSCRIPTS in nresult
2292 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2293 "did not return user scripts information")
2295 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2297 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2298 "user scripts not present or not executable: %s" %
2299 utils.CommaJoin(sorted(broken_scripts)))
2301 def _VerifyNodeNetwork(self, ninfo, nresult):
2302 """Check the node network connectivity results.
2304 @type ninfo: L{objects.Node}
2305 @param ninfo: the node to check
2306 @param nresult: the remote results for the node
2310 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2312 test = constants.NV_NODELIST not in nresult
2313 _ErrorIf(test, constants.CV_ENODESSH, node,
2314 "node hasn't returned node ssh connectivity data")
2316 if nresult[constants.NV_NODELIST]:
2317 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2318 _ErrorIf(True, constants.CV_ENODESSH, node,
2319 "ssh communication with node '%s': %s", a_node, a_msg)
2321 test = constants.NV_NODENETTEST not in nresult
2322 _ErrorIf(test, constants.CV_ENODENET, node,
2323 "node hasn't returned node tcp connectivity data")
2325 if nresult[constants.NV_NODENETTEST]:
2326 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2328 _ErrorIf(True, constants.CV_ENODENET, node,
2329 "tcp communication with node '%s': %s",
2330 anode, nresult[constants.NV_NODENETTEST][anode])
2332 test = constants.NV_MASTERIP not in nresult
2333 _ErrorIf(test, constants.CV_ENODENET, node,
2334 "node hasn't returned node master IP reachability data")
2336 if not nresult[constants.NV_MASTERIP]:
2337 if node == self.master_node:
2338 msg = "the master node cannot reach the master IP (not configured?)"
2340 msg = "cannot reach the master IP"
2341 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2343 def _VerifyInstance(self, instance, instanceconfig, node_image,
2345 """Verify an instance.
2347 This function checks to see if the required block devices are
2348 available on the instance's node.
2351 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2352 node_current = instanceconfig.primary_node
2354 node_vol_should = {}
2355 instanceconfig.MapLVsByNode(node_vol_should)
2357 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2358 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2359 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, err)
2361 for node in node_vol_should:
2362 n_img = node_image[node]
2363 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2364 # ignore missing volumes on offline or broken nodes
2366 for volume in node_vol_should[node]:
2367 test = volume not in n_img.volumes
2368 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2369 "volume %s missing on node %s", volume, node)
2371 if instanceconfig.admin_state == constants.ADMINST_UP:
2372 pri_img = node_image[node_current]
2373 test = instance not in pri_img.instances and not pri_img.offline
2374 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2375 "instance not running on its primary node %s",
2378 diskdata = [(nname, success, status, idx)
2379 for (nname, disks) in diskstatus.items()
2380 for idx, (success, status) in enumerate(disks)]
2382 for nname, success, bdev_status, idx in diskdata:
2383 # the 'ghost node' construction in Exec() ensures that we have a
2385 snode = node_image[nname]
2386 bad_snode = snode.ghost or snode.offline
2387 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2388 not success and not bad_snode,
2389 constants.CV_EINSTANCEFAULTYDISK, instance,
2390 "couldn't retrieve status for disk/%s on %s: %s",
2391 idx, nname, bdev_status)
2392 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2393 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2394 constants.CV_EINSTANCEFAULTYDISK, instance,
2395 "disk/%s on %s is faulty", idx, nname)
2397 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2398 """Verify if there are any unknown volumes in the cluster.
2400 The .os, .swap and backup volumes are ignored. All other volumes are
2401 reported as unknown.
2403 @type reserved: L{ganeti.utils.FieldSet}
2404 @param reserved: a FieldSet of reserved volume names
2407 for node, n_img in node_image.items():
2408 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2409 # skip non-healthy nodes
2411 for volume in n_img.volumes:
2412 test = ((node not in node_vol_should or
2413 volume not in node_vol_should[node]) and
2414 not reserved.Matches(volume))
2415 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2416 "volume %s is unknown", volume)
2418 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2419 """Verify N+1 Memory Resilience.
2421 Check that if one single node dies we can still start all the
2422 instances it was primary for.
2425 cluster_info = self.cfg.GetClusterInfo()
2426 for node, n_img in node_image.items():
2427 # This code checks that every node which is now listed as
2428 # secondary has enough memory to host all instances it is
2429 # supposed to should a single other node in the cluster fail.
2430 # FIXME: not ready for failover to an arbitrary node
2431 # FIXME: does not support file-backed instances
2432 # WARNING: we currently take into account down instances as well
2433 # as up ones, considering that even if they're down someone
2434 # might want to start them even in the event of a node failure.
2436 # we're skipping offline nodes from the N+1 warning, since
2437 # most likely we don't have good memory infromation from them;
2438 # we already list instances living on such nodes, and that's
2441 #TODO(dynmem): also consider ballooning out other instances
2442 for prinode, instances in n_img.sbp.items():
2444 for instance in instances:
2445 bep = cluster_info.FillBE(instance_cfg[instance])
2446 if bep[constants.BE_AUTO_BALANCE]:
2447 needed_mem += bep[constants.BE_MINMEM]
2448 test = n_img.mfree < needed_mem
2449 self._ErrorIf(test, constants.CV_ENODEN1, node,
2450 "not enough memory to accomodate instance failovers"
2451 " should node %s fail (%dMiB needed, %dMiB available)",
2452 prinode, needed_mem, n_img.mfree)
2455 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2456 (files_all, files_opt, files_mc, files_vm)):
2457 """Verifies file checksums collected from all nodes.
2459 @param errorif: Callback for reporting errors
2460 @param nodeinfo: List of L{objects.Node} objects
2461 @param master_node: Name of master node
2462 @param all_nvinfo: RPC results
2465 # Define functions determining which nodes to consider for a file
2468 (files_mc, lambda node: (node.master_candidate or
2469 node.name == master_node)),
2470 (files_vm, lambda node: node.vm_capable),
2473 # Build mapping from filename to list of nodes which should have the file
2475 for (files, fn) in files2nodefn:
2477 filenodes = nodeinfo
2479 filenodes = filter(fn, nodeinfo)
2480 nodefiles.update((filename,
2481 frozenset(map(operator.attrgetter("name"), filenodes)))
2482 for filename in files)
2484 assert set(nodefiles) == (files_all | files_mc | files_vm)
2486 fileinfo = dict((filename, {}) for filename in nodefiles)
2487 ignore_nodes = set()
2489 for node in nodeinfo:
2491 ignore_nodes.add(node.name)
2494 nresult = all_nvinfo[node.name]
2496 if nresult.fail_msg or not nresult.payload:
2499 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2501 test = not (node_files and isinstance(node_files, dict))
2502 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2503 "Node did not return file checksum data")
2505 ignore_nodes.add(node.name)
2508 # Build per-checksum mapping from filename to nodes having it
2509 for (filename, checksum) in node_files.items():
2510 assert filename in nodefiles
2511 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2513 for (filename, checksums) in fileinfo.items():
2514 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2516 # Nodes having the file
2517 with_file = frozenset(node_name
2518 for nodes in fileinfo[filename].values()
2519 for node_name in nodes) - ignore_nodes
2521 expected_nodes = nodefiles[filename] - ignore_nodes
2523 # Nodes missing file
2524 missing_file = expected_nodes - with_file
2526 if filename in files_opt:
2528 errorif(missing_file and missing_file != expected_nodes,
2529 constants.CV_ECLUSTERFILECHECK, None,
2530 "File %s is optional, but it must exist on all or no"
2531 " nodes (not found on %s)",
2532 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2534 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2535 "File %s is missing from node(s) %s", filename,
2536 utils.CommaJoin(utils.NiceSort(missing_file)))
2538 # Warn if a node has a file it shouldn't
2539 unexpected = with_file - expected_nodes
2541 constants.CV_ECLUSTERFILECHECK, None,
2542 "File %s should not exist on node(s) %s",
2543 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2545 # See if there are multiple versions of the file
2546 test = len(checksums) > 1
2548 variants = ["variant %s on %s" %
2549 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2550 for (idx, (checksum, nodes)) in
2551 enumerate(sorted(checksums.items()))]
2555 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2556 "File %s found with %s different checksums (%s)",
2557 filename, len(checksums), "; ".join(variants))
2559 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2561 """Verifies and the node DRBD status.
2563 @type ninfo: L{objects.Node}
2564 @param ninfo: the node to check
2565 @param nresult: the remote results for the node
2566 @param instanceinfo: the dict of instances
2567 @param drbd_helper: the configured DRBD usermode helper
2568 @param drbd_map: the DRBD map as returned by
2569 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2573 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2576 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2577 test = (helper_result == None)
2578 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2579 "no drbd usermode helper returned")
2581 status, payload = helper_result
2583 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2584 "drbd usermode helper check unsuccessful: %s", payload)
2585 test = status and (payload != drbd_helper)
2586 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2587 "wrong drbd usermode helper: %s", payload)
2589 # compute the DRBD minors
2591 for minor, instance in drbd_map[node].items():
2592 test = instance not in instanceinfo
2593 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2594 "ghost instance '%s' in temporary DRBD map", instance)
2595 # ghost instance should not be running, but otherwise we
2596 # don't give double warnings (both ghost instance and
2597 # unallocated minor in use)
2599 node_drbd[minor] = (instance, False)
2601 instance = instanceinfo[instance]
2602 node_drbd[minor] = (instance.name,
2603 instance.admin_state == constants.ADMINST_UP)
2605 # and now check them
2606 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2607 test = not isinstance(used_minors, (tuple, list))
2608 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2609 "cannot parse drbd status file: %s", str(used_minors))
2611 # we cannot check drbd status
2614 for minor, (iname, must_exist) in node_drbd.items():
2615 test = minor not in used_minors and must_exist
2616 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2617 "drbd minor %d of instance %s is not active", minor, iname)
2618 for minor in used_minors:
2619 test = minor not in node_drbd
2620 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2621 "unallocated drbd minor %d is in use", minor)
2623 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2624 """Builds the node OS structures.
2626 @type ninfo: L{objects.Node}
2627 @param ninfo: the node to check
2628 @param nresult: the remote results for the node
2629 @param nimg: the node image object
2633 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2635 remote_os = nresult.get(constants.NV_OSLIST, None)
2636 test = (not isinstance(remote_os, list) or
2637 not compat.all(isinstance(v, list) and len(v) == 7
2638 for v in remote_os))
2640 _ErrorIf(test, constants.CV_ENODEOS, node,
2641 "node hasn't returned valid OS data")
2650 for (name, os_path, status, diagnose,
2651 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2653 if name not in os_dict:
2656 # parameters is a list of lists instead of list of tuples due to
2657 # JSON lacking a real tuple type, fix it:
2658 parameters = [tuple(v) for v in parameters]
2659 os_dict[name].append((os_path, status, diagnose,
2660 set(variants), set(parameters), set(api_ver)))
2662 nimg.oslist = os_dict
2664 def _VerifyNodeOS(self, ninfo, nimg, base):
2665 """Verifies the node OS list.
2667 @type ninfo: L{objects.Node}
2668 @param ninfo: the node to check
2669 @param nimg: the node image object
2670 @param base: the 'template' node we match against (e.g. from the master)
2674 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2676 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2678 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2679 for os_name, os_data in nimg.oslist.items():
2680 assert os_data, "Empty OS status for OS %s?!" % os_name
2681 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2682 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2683 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2684 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2685 "OS '%s' has multiple entries (first one shadows the rest): %s",
2686 os_name, utils.CommaJoin([v[0] for v in os_data]))
2687 # comparisons with the 'base' image
2688 test = os_name not in base.oslist
2689 _ErrorIf(test, constants.CV_ENODEOS, node,
2690 "Extra OS %s not present on reference node (%s)",
2694 assert base.oslist[os_name], "Base node has empty OS status?"
2695 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2697 # base OS is invalid, skipping
2699 for kind, a, b in [("API version", f_api, b_api),
2700 ("variants list", f_var, b_var),
2701 ("parameters", beautify_params(f_param),
2702 beautify_params(b_param))]:
2703 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2704 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2705 kind, os_name, base.name,
2706 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2708 # check any missing OSes
2709 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2710 _ErrorIf(missing, constants.CV_ENODEOS, node,
2711 "OSes present on reference node %s but missing on this node: %s",
2712 base.name, utils.CommaJoin(missing))
2714 def _VerifyOob(self, ninfo, nresult):
2715 """Verifies out of band functionality of a node.
2717 @type ninfo: L{objects.Node}
2718 @param ninfo: the node to check
2719 @param nresult: the remote results for the node
2723 # We just have to verify the paths on master and/or master candidates
2724 # as the oob helper is invoked on the master
2725 if ((ninfo.master_candidate or ninfo.master_capable) and
2726 constants.NV_OOB_PATHS in nresult):
2727 for path_result in nresult[constants.NV_OOB_PATHS]:
2728 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2730 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2731 """Verifies and updates the node volume data.
2733 This function will update a L{NodeImage}'s internal structures
2734 with data from the remote call.
2736 @type ninfo: L{objects.Node}
2737 @param ninfo: the node to check
2738 @param nresult: the remote results for the node
2739 @param nimg: the node image object
2740 @param vg_name: the configured VG name
2744 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2746 nimg.lvm_fail = True
2747 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2750 elif isinstance(lvdata, basestring):
2751 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2752 utils.SafeEncode(lvdata))
2753 elif not isinstance(lvdata, dict):
2754 _ErrorIf(True, constants.CV_ENODELVM, node,
2755 "rpc call to node failed (lvlist)")
2757 nimg.volumes = lvdata
2758 nimg.lvm_fail = False
2760 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2761 """Verifies and updates the node instance list.
2763 If the listing was successful, then updates this node's instance
2764 list. Otherwise, it marks the RPC call as failed for the instance
2767 @type ninfo: L{objects.Node}
2768 @param ninfo: the node to check
2769 @param nresult: the remote results for the node
2770 @param nimg: the node image object
2773 idata = nresult.get(constants.NV_INSTANCELIST, None)
2774 test = not isinstance(idata, list)
2775 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2776 "rpc call to node failed (instancelist): %s",
2777 utils.SafeEncode(str(idata)))
2779 nimg.hyp_fail = True
2781 nimg.instances = idata
2783 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2784 """Verifies and computes a node information map
2786 @type ninfo: L{objects.Node}
2787 @param ninfo: the node to check
2788 @param nresult: the remote results for the node
2789 @param nimg: the node image object
2790 @param vg_name: the configured VG name
2794 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2796 # try to read free memory (from the hypervisor)
2797 hv_info = nresult.get(constants.NV_HVINFO, None)
2798 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2799 _ErrorIf(test, constants.CV_ENODEHV, node,
2800 "rpc call to node failed (hvinfo)")
2803 nimg.mfree = int(hv_info["memory_free"])
2804 except (ValueError, TypeError):
2805 _ErrorIf(True, constants.CV_ENODERPC, node,
2806 "node returned invalid nodeinfo, check hypervisor")
2808 # FIXME: devise a free space model for file based instances as well
2809 if vg_name is not None:
2810 test = (constants.NV_VGLIST not in nresult or
2811 vg_name not in nresult[constants.NV_VGLIST])
2812 _ErrorIf(test, constants.CV_ENODELVM, node,
2813 "node didn't return data for the volume group '%s'"
2814 " - it is either missing or broken", vg_name)
2817 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2818 except (ValueError, TypeError):
2819 _ErrorIf(True, constants.CV_ENODERPC, node,
2820 "node returned invalid LVM info, check LVM status")
2822 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2823 """Gets per-disk status information for all instances.
2825 @type nodelist: list of strings
2826 @param nodelist: Node names
2827 @type node_image: dict of (name, L{objects.Node})
2828 @param node_image: Node objects
2829 @type instanceinfo: dict of (name, L{objects.Instance})
2830 @param instanceinfo: Instance objects
2831 @rtype: {instance: {node: [(succes, payload)]}}
2832 @return: a dictionary of per-instance dictionaries with nodes as
2833 keys and disk information as values; the disk information is a
2834 list of tuples (success, payload)
2837 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2840 node_disks_devonly = {}
2841 diskless_instances = set()
2842 diskless = constants.DT_DISKLESS
2844 for nname in nodelist:
2845 node_instances = list(itertools.chain(node_image[nname].pinst,
2846 node_image[nname].sinst))
2847 diskless_instances.update(inst for inst in node_instances
2848 if instanceinfo[inst].disk_template == diskless)
2849 disks = [(inst, disk)
2850 for inst in node_instances
2851 for disk in instanceinfo[inst].disks]
2854 # No need to collect data
2857 node_disks[nname] = disks
2859 # Creating copies as SetDiskID below will modify the objects and that can
2860 # lead to incorrect data returned from nodes
2861 devonly = [dev.Copy() for (_, dev) in disks]
2864 self.cfg.SetDiskID(dev, nname)
2866 node_disks_devonly[nname] = devonly
2868 assert len(node_disks) == len(node_disks_devonly)
2870 # Collect data from all nodes with disks
2871 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2874 assert len(result) == len(node_disks)
2878 for (nname, nres) in result.items():
2879 disks = node_disks[nname]
2882 # No data from this node
2883 data = len(disks) * [(False, "node offline")]
2886 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2887 "while getting disk information: %s", msg)
2889 # No data from this node
2890 data = len(disks) * [(False, msg)]
2893 for idx, i in enumerate(nres.payload):
2894 if isinstance(i, (tuple, list)) and len(i) == 2:
2897 logging.warning("Invalid result from node %s, entry %d: %s",
2899 data.append((False, "Invalid result from the remote node"))
2901 for ((inst, _), status) in zip(disks, data):
2902 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2904 # Add empty entries for diskless instances.
2905 for inst in diskless_instances:
2906 assert inst not in instdisk
2909 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2910 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2911 compat.all(isinstance(s, (tuple, list)) and
2912 len(s) == 2 for s in statuses)
2913 for inst, nnames in instdisk.items()
2914 for nname, statuses in nnames.items())
2915 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2920 def _SshNodeSelector(group_uuid, all_nodes):
2921 """Create endless iterators for all potential SSH check hosts.
2924 nodes = [node for node in all_nodes
2925 if (node.group != group_uuid and
2927 keyfunc = operator.attrgetter("group")
2929 return map(itertools.cycle,
2930 [sorted(map(operator.attrgetter("name"), names))
2931 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2935 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2936 """Choose which nodes should talk to which other nodes.
2938 We will make nodes contact all nodes in their group, and one node from
2941 @warning: This algorithm has a known issue if one node group is much
2942 smaller than others (e.g. just one node). In such a case all other
2943 nodes will talk to the single node.
2946 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2947 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2949 return (online_nodes,
2950 dict((name, sorted([i.next() for i in sel]))
2951 for name in online_nodes))
2953 def BuildHooksEnv(self):
2956 Cluster-Verify hooks just ran in the post phase and their failure makes
2957 the output be logged in the verify output and the verification to fail.
2961 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2964 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2965 for node in self.my_node_info.values())
2969 def BuildHooksNodes(self):
2970 """Build hooks nodes.
2973 return ([], self.my_node_names)
2975 def Exec(self, feedback_fn):
2976 """Verify integrity of the node group, performing various test on nodes.
2979 # This method has too many local variables. pylint: disable=R0914
2980 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2982 if not self.my_node_names:
2984 feedback_fn("* Empty node group, skipping verification")
2988 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2989 verbose = self.op.verbose
2990 self._feedback_fn = feedback_fn
2992 vg_name = self.cfg.GetVGName()
2993 drbd_helper = self.cfg.GetDRBDHelper()
2994 cluster = self.cfg.GetClusterInfo()
2995 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2996 hypervisors = cluster.enabled_hypervisors
2997 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2999 i_non_redundant = [] # Non redundant instances
3000 i_non_a_balanced = [] # Non auto-balanced instances
3001 i_offline = 0 # Count of offline instances
3002 n_offline = 0 # Count of offline nodes
3003 n_drained = 0 # Count of nodes being drained
3004 node_vol_should = {}
3006 # FIXME: verify OS list
3009 filemap = _ComputeAncillaryFiles(cluster, False)
3011 # do local checksums
3012 master_node = self.master_node = self.cfg.GetMasterNode()
3013 master_ip = self.cfg.GetMasterIP()
3015 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3018 if self.cfg.GetUseExternalMipScript():
3019 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3021 node_verify_param = {
3022 constants.NV_FILELIST:
3023 utils.UniqueSequence(filename
3024 for files in filemap
3025 for filename in files),
3026 constants.NV_NODELIST:
3027 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3028 self.all_node_info.values()),
3029 constants.NV_HYPERVISOR: hypervisors,
3030 constants.NV_HVPARAMS:
3031 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3032 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3033 for node in node_data_list
3034 if not node.offline],
3035 constants.NV_INSTANCELIST: hypervisors,
3036 constants.NV_VERSION: None,
3037 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3038 constants.NV_NODESETUP: None,
3039 constants.NV_TIME: None,
3040 constants.NV_MASTERIP: (master_node, master_ip),
3041 constants.NV_OSLIST: None,
3042 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3043 constants.NV_USERSCRIPTS: user_scripts,
3046 if vg_name is not None:
3047 node_verify_param[constants.NV_VGLIST] = None
3048 node_verify_param[constants.NV_LVLIST] = vg_name
3049 node_verify_param[constants.NV_PVLIST] = [vg_name]
3050 node_verify_param[constants.NV_DRBDLIST] = None
3053 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3056 # FIXME: this needs to be changed per node-group, not cluster-wide
3058 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3059 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3060 bridges.add(default_nicpp[constants.NIC_LINK])
3061 for instance in self.my_inst_info.values():
3062 for nic in instance.nics:
3063 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3064 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3065 bridges.add(full_nic[constants.NIC_LINK])
3068 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3070 # Build our expected cluster state
3071 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3073 vm_capable=node.vm_capable))
3074 for node in node_data_list)
3078 for node in self.all_node_info.values():
3079 path = _SupportsOob(self.cfg, node)
3080 if path and path not in oob_paths:
3081 oob_paths.append(path)
3084 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3086 for instance in self.my_inst_names:
3087 inst_config = self.my_inst_info[instance]
3089 for nname in inst_config.all_nodes:
3090 if nname not in node_image:
3091 gnode = self.NodeImage(name=nname)
3092 gnode.ghost = (nname not in self.all_node_info)
3093 node_image[nname] = gnode
3095 inst_config.MapLVsByNode(node_vol_should)
3097 pnode = inst_config.primary_node
3098 node_image[pnode].pinst.append(instance)
3100 for snode in inst_config.secondary_nodes:
3101 nimg = node_image[snode]
3102 nimg.sinst.append(instance)
3103 if pnode not in nimg.sbp:
3104 nimg.sbp[pnode] = []
3105 nimg.sbp[pnode].append(instance)
3107 # At this point, we have the in-memory data structures complete,
3108 # except for the runtime information, which we'll gather next
3110 # Due to the way our RPC system works, exact response times cannot be
3111 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3112 # time before and after executing the request, we can at least have a time
3114 nvinfo_starttime = time.time()
3115 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3117 self.cfg.GetClusterName())
3118 nvinfo_endtime = time.time()
3120 if self.extra_lv_nodes and vg_name is not None:
3122 self.rpc.call_node_verify(self.extra_lv_nodes,
3123 {constants.NV_LVLIST: vg_name},
3124 self.cfg.GetClusterName())
3126 extra_lv_nvinfo = {}
3128 all_drbd_map = self.cfg.ComputeDRBDMap()
3130 feedback_fn("* Gathering disk information (%s nodes)" %
3131 len(self.my_node_names))
3132 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3135 feedback_fn("* Verifying configuration file consistency")
3137 # If not all nodes are being checked, we need to make sure the master node
3138 # and a non-checked vm_capable node are in the list.
3139 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3141 vf_nvinfo = all_nvinfo.copy()
3142 vf_node_info = list(self.my_node_info.values())
3143 additional_nodes = []
3144 if master_node not in self.my_node_info:
3145 additional_nodes.append(master_node)
3146 vf_node_info.append(self.all_node_info[master_node])
3147 # Add the first vm_capable node we find which is not included
3148 for node in absent_nodes:
3149 nodeinfo = self.all_node_info[node]
3150 if nodeinfo.vm_capable and not nodeinfo.offline:
3151 additional_nodes.append(node)
3152 vf_node_info.append(self.all_node_info[node])
3154 key = constants.NV_FILELIST
3155 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3156 {key: node_verify_param[key]},
3157 self.cfg.GetClusterName()))
3159 vf_nvinfo = all_nvinfo
3160 vf_node_info = self.my_node_info.values()
3162 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3164 feedback_fn("* Verifying node status")
3168 for node_i in node_data_list:
3170 nimg = node_image[node]
3174 feedback_fn("* Skipping offline node %s" % (node,))
3178 if node == master_node:
3180 elif node_i.master_candidate:
3181 ntype = "master candidate"
3182 elif node_i.drained:
3188 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3190 msg = all_nvinfo[node].fail_msg
3191 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3194 nimg.rpc_fail = True
3197 nresult = all_nvinfo[node].payload
3199 nimg.call_ok = self._VerifyNode(node_i, nresult)
3200 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3201 self._VerifyNodeNetwork(node_i, nresult)
3202 self._VerifyNodeUserScripts(node_i, nresult)
3203 self._VerifyOob(node_i, nresult)
3206 self._VerifyNodeLVM(node_i, nresult, vg_name)
3207 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3210 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3211 self._UpdateNodeInstances(node_i, nresult, nimg)
3212 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3213 self._UpdateNodeOS(node_i, nresult, nimg)
3215 if not nimg.os_fail:
3216 if refos_img is None:
3218 self._VerifyNodeOS(node_i, nimg, refos_img)
3219 self._VerifyNodeBridges(node_i, nresult, bridges)
3221 # Check whether all running instancies are primary for the node. (This
3222 # can no longer be done from _VerifyInstance below, since some of the
3223 # wrong instances could be from other node groups.)
3224 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3226 for inst in non_primary_inst:
3227 # FIXME: investigate best way to handle offline insts
3228 if inst.admin_state == constants.ADMINST_OFFLINE:
3230 feedback_fn("* Skipping offline instance %s" % inst.name)
3233 test = inst in self.all_inst_info
3234 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3235 "instance should not run on node %s", node_i.name)
3236 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3237 "node is running unknown instance %s", inst)
3239 for node, result in extra_lv_nvinfo.items():
3240 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3241 node_image[node], vg_name)
3243 feedback_fn("* Verifying instance status")
3244 for instance in self.my_inst_names:
3246 feedback_fn("* Verifying instance %s" % instance)
3247 inst_config = self.my_inst_info[instance]
3248 self._VerifyInstance(instance, inst_config, node_image,
3250 inst_nodes_offline = []
3252 pnode = inst_config.primary_node
3253 pnode_img = node_image[pnode]
3254 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3255 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3256 " primary node failed", instance)
3258 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3260 constants.CV_EINSTANCEBADNODE, instance,
3261 "instance is marked as running and lives on offline node %s",
3262 inst_config.primary_node)
3264 # If the instance is non-redundant we cannot survive losing its primary
3265 # node, so we are not N+1 compliant. On the other hand we have no disk
3266 # templates with more than one secondary so that situation is not well
3268 # FIXME: does not support file-backed instances
3269 if not inst_config.secondary_nodes:
3270 i_non_redundant.append(instance)
3272 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3273 constants.CV_EINSTANCELAYOUT,
3274 instance, "instance has multiple secondary nodes: %s",
3275 utils.CommaJoin(inst_config.secondary_nodes),
3276 code=self.ETYPE_WARNING)
3278 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3279 pnode = inst_config.primary_node
3280 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3281 instance_groups = {}
3283 for node in instance_nodes:
3284 instance_groups.setdefault(self.all_node_info[node].group,
3288 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3289 # Sort so that we always list the primary node first.
3290 for group, nodes in sorted(instance_groups.items(),
3291 key=lambda (_, nodes): pnode in nodes,
3294 self._ErrorIf(len(instance_groups) > 1,
3295 constants.CV_EINSTANCESPLITGROUPS,
3296 instance, "instance has primary and secondary nodes in"
3297 " different groups: %s", utils.CommaJoin(pretty_list),
3298 code=self.ETYPE_WARNING)
3300 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3301 i_non_a_balanced.append(instance)
3303 for snode in inst_config.secondary_nodes:
3304 s_img = node_image[snode]
3305 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3306 snode, "instance %s, connection to secondary node failed",
3310 inst_nodes_offline.append(snode)
3312 # warn that the instance lives on offline nodes
3313 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3314 "instance has offline secondary node(s) %s",
3315 utils.CommaJoin(inst_nodes_offline))
3316 # ... or ghost/non-vm_capable nodes
3317 for node in inst_config.all_nodes:
3318 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3319 instance, "instance lives on ghost node %s", node)
3320 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3321 instance, "instance lives on non-vm_capable node %s", node)
3323 feedback_fn("* Verifying orphan volumes")
3324 reserved = utils.FieldSet(*cluster.reserved_lvs)
3326 # We will get spurious "unknown volume" warnings if any node of this group
3327 # is secondary for an instance whose primary is in another group. To avoid
3328 # them, we find these instances and add their volumes to node_vol_should.
3329 for inst in self.all_inst_info.values():
3330 for secondary in inst.secondary_nodes:
3331 if (secondary in self.my_node_info
3332 and inst.name not in self.my_inst_info):
3333 inst.MapLVsByNode(node_vol_should)
3336 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3338 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3339 feedback_fn("* Verifying N+1 Memory redundancy")
3340 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3342 feedback_fn("* Other Notes")
3344 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3345 % len(i_non_redundant))
3347 if i_non_a_balanced:
3348 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3349 % len(i_non_a_balanced))
3352 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3355 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3358 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3362 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3363 """Analyze the post-hooks' result
3365 This method analyses the hook result, handles it, and sends some
3366 nicely-formatted feedback back to the user.
3368 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3369 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3370 @param hooks_results: the results of the multi-node hooks rpc call
3371 @param feedback_fn: function used send feedback back to the caller
3372 @param lu_result: previous Exec result
3373 @return: the new Exec result, based on the previous result
3377 # We only really run POST phase hooks, only for non-empty groups,
3378 # and are only interested in their results
3379 if not self.my_node_names:
3382 elif phase == constants.HOOKS_PHASE_POST:
3383 # Used to change hooks' output to proper indentation
3384 feedback_fn("* Hooks Results")
3385 assert hooks_results, "invalid result from hooks"
3387 for node_name in hooks_results:
3388 res = hooks_results[node_name]
3390 test = msg and not res.offline
3391 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3392 "Communication failure in hooks execution: %s", msg)
3393 if res.offline or msg:
3394 # No need to investigate payload if node is offline or gave
3397 for script, hkr, output in res.payload:
3398 test = hkr == constants.HKR_FAIL
3399 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3400 "Script %s failed, output:", script)
3402 output = self._HOOKS_INDENT_RE.sub(" ", output)
3403 feedback_fn("%s" % output)
3409 class LUClusterVerifyDisks(NoHooksLU):
3410 """Verifies the cluster disks status.
3415 def ExpandNames(self):
3416 self.share_locks = _ShareAll()
3417 self.needed_locks = {
3418 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3421 def Exec(self, feedback_fn):
3422 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3424 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3425 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3426 for group in group_names])
3429 class LUGroupVerifyDisks(NoHooksLU):
3430 """Verifies the status of all disks in a node group.
3435 def ExpandNames(self):
3436 # Raises errors.OpPrereqError on its own if group can't be found
3437 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3439 self.share_locks = _ShareAll()
3440 self.needed_locks = {
3441 locking.LEVEL_INSTANCE: [],
3442 locking.LEVEL_NODEGROUP: [],
3443 locking.LEVEL_NODE: [],
3446 def DeclareLocks(self, level):
3447 if level == locking.LEVEL_INSTANCE:
3448 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3450 # Lock instances optimistically, needs verification once node and group
3451 # locks have been acquired
3452 self.needed_locks[locking.LEVEL_INSTANCE] = \
3453 self.cfg.GetNodeGroupInstances(self.group_uuid)
3455 elif level == locking.LEVEL_NODEGROUP:
3456 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3458 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3459 set([self.group_uuid] +
3460 # Lock all groups used by instances optimistically; this requires
3461 # going via the node before it's locked, requiring verification
3464 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3465 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3467 elif level == locking.LEVEL_NODE:
3468 # This will only lock the nodes in the group to be verified which contain
3470 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3471 self._LockInstancesNodes()
3473 # Lock all nodes in group to be verified
3474 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3475 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3476 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3478 def CheckPrereq(self):
3479 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3480 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3481 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3483 assert self.group_uuid in owned_groups
3485 # Check if locked instances are still correct
3486 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3488 # Get instance information
3489 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3491 # Check if node groups for locked instances are still correct
3492 for (instance_name, inst) in self.instances.items():
3493 assert owned_nodes.issuperset(inst.all_nodes), \
3494 "Instance %s's nodes changed while we kept the lock" % instance_name
3496 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3499 assert self.group_uuid in inst_groups, \
3500 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3502 def Exec(self, feedback_fn):
3503 """Verify integrity of cluster disks.
3505 @rtype: tuple of three items
3506 @return: a tuple of (dict of node-to-node_error, list of instances
3507 which need activate-disks, dict of instance: (node, volume) for
3512 res_instances = set()
3515 nv_dict = _MapInstanceDisksToNodes([inst
3516 for inst in self.instances.values()
3517 if inst.admin_state == constants.ADMINST_UP])
3520 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3521 set(self.cfg.GetVmCapableNodeList()))
3523 node_lvs = self.rpc.call_lv_list(nodes, [])
3525 for (node, node_res) in node_lvs.items():
3526 if node_res.offline:
3529 msg = node_res.fail_msg
3531 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3532 res_nodes[node] = msg
3535 for lv_name, (_, _, lv_online) in node_res.payload.items():
3536 inst = nv_dict.pop((node, lv_name), None)
3537 if not (lv_online or inst is None):
3538 res_instances.add(inst)
3540 # any leftover items in nv_dict are missing LVs, let's arrange the data
3542 for key, inst in nv_dict.iteritems():
3543 res_missing.setdefault(inst, []).append(list(key))
3545 return (res_nodes, list(res_instances), res_missing)
3548 class LUClusterRepairDiskSizes(NoHooksLU):
3549 """Verifies the cluster disks sizes.
3554 def ExpandNames(self):
3555 if self.op.instances:
3556 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3557 self.needed_locks = {
3558 locking.LEVEL_NODE_RES: [],
3559 locking.LEVEL_INSTANCE: self.wanted_names,
3561 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3563 self.wanted_names = None
3564 self.needed_locks = {
3565 locking.LEVEL_NODE_RES: locking.ALL_SET,
3566 locking.LEVEL_INSTANCE: locking.ALL_SET,
3568 self.share_locks = {
3569 locking.LEVEL_NODE_RES: 1,
3570 locking.LEVEL_INSTANCE: 0,
3573 def DeclareLocks(self, level):
3574 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3575 self._LockInstancesNodes(primary_only=True, level=level)
3577 def CheckPrereq(self):
3578 """Check prerequisites.
3580 This only checks the optional instance list against the existing names.
3583 if self.wanted_names is None:
3584 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3586 self.wanted_instances = \
3587 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3589 def _EnsureChildSizes(self, disk):
3590 """Ensure children of the disk have the needed disk size.
3592 This is valid mainly for DRBD8 and fixes an issue where the
3593 children have smaller disk size.
3595 @param disk: an L{ganeti.objects.Disk} object
3598 if disk.dev_type == constants.LD_DRBD8:
3599 assert disk.children, "Empty children for DRBD8?"
3600 fchild = disk.children[0]
3601 mismatch = fchild.size < disk.size
3603 self.LogInfo("Child disk has size %d, parent %d, fixing",
3604 fchild.size, disk.size)
3605 fchild.size = disk.size
3607 # and we recurse on this child only, not on the metadev
3608 return self._EnsureChildSizes(fchild) or mismatch
3612 def Exec(self, feedback_fn):
3613 """Verify the size of cluster disks.
3616 # TODO: check child disks too
3617 # TODO: check differences in size between primary/secondary nodes
3619 for instance in self.wanted_instances:
3620 pnode = instance.primary_node
3621 if pnode not in per_node_disks:
3622 per_node_disks[pnode] = []
3623 for idx, disk in enumerate(instance.disks):
3624 per_node_disks[pnode].append((instance, idx, disk))
3626 assert not (frozenset(per_node_disks.keys()) -
3627 self.owned_locks(locking.LEVEL_NODE_RES)), \
3628 "Not owning correct locks"
3629 assert not self.owned_locks(locking.LEVEL_NODE)
3632 for node, dskl in per_node_disks.items():
3633 newl = [v[2].Copy() for v in dskl]
3635 self.cfg.SetDiskID(dsk, node)
3636 result = self.rpc.call_blockdev_getsize(node, newl)
3638 self.LogWarning("Failure in blockdev_getsize call to node"
3639 " %s, ignoring", node)
3641 if len(result.payload) != len(dskl):
3642 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3643 " result.payload=%s", node, len(dskl), result.payload)
3644 self.LogWarning("Invalid result from node %s, ignoring node results",
3647 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3649 self.LogWarning("Disk %d of instance %s did not return size"
3650 " information, ignoring", idx, instance.name)
3652 if not isinstance(size, (int, long)):
3653 self.LogWarning("Disk %d of instance %s did not return valid"
3654 " size information, ignoring", idx, instance.name)
3657 if size != disk.size:
3658 self.LogInfo("Disk %d of instance %s has mismatched size,"
3659 " correcting: recorded %d, actual %d", idx,
3660 instance.name, disk.size, size)
3662 self.cfg.Update(instance, feedback_fn)
3663 changed.append((instance.name, idx, size))
3664 if self._EnsureChildSizes(disk):
3665 self.cfg.Update(instance, feedback_fn)
3666 changed.append((instance.name, idx, disk.size))
3670 class LUClusterRename(LogicalUnit):
3671 """Rename the cluster.
3674 HPATH = "cluster-rename"
3675 HTYPE = constants.HTYPE_CLUSTER
3677 def BuildHooksEnv(self):
3682 "OP_TARGET": self.cfg.GetClusterName(),
3683 "NEW_NAME": self.op.name,
3686 def BuildHooksNodes(self):
3687 """Build hooks nodes.
3690 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3692 def CheckPrereq(self):
3693 """Verify that the passed name is a valid one.
3696 hostname = netutils.GetHostname(name=self.op.name,
3697 family=self.cfg.GetPrimaryIPFamily())
3699 new_name = hostname.name
3700 self.ip = new_ip = hostname.ip
3701 old_name = self.cfg.GetClusterName()
3702 old_ip = self.cfg.GetMasterIP()
3703 if new_name == old_name and new_ip == old_ip:
3704 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3705 " cluster has changed",
3707 if new_ip != old_ip:
3708 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3709 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3710 " reachable on the network" %
3711 new_ip, errors.ECODE_NOTUNIQUE)
3713 self.op.name = new_name
3715 def Exec(self, feedback_fn):
3716 """Rename the cluster.
3719 clustername = self.op.name
3722 # shutdown the master IP
3723 master_params = self.cfg.GetMasterNetworkParameters()
3724 ems = self.cfg.GetUseExternalMipScript()
3725 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3727 result.Raise("Could not disable the master role")
3730 cluster = self.cfg.GetClusterInfo()
3731 cluster.cluster_name = clustername
3732 cluster.master_ip = new_ip
3733 self.cfg.Update(cluster, feedback_fn)
3735 # update the known hosts file
3736 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3737 node_list = self.cfg.GetOnlineNodeList()
3739 node_list.remove(master_params.name)
3742 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3744 master_params.ip = new_ip
3745 result = self.rpc.call_node_activate_master_ip(master_params.name,
3747 msg = result.fail_msg
3749 self.LogWarning("Could not re-enable the master role on"
3750 " the master, please restart manually: %s", msg)
3755 def _ValidateNetmask(cfg, netmask):
3756 """Checks if a netmask is valid.
3758 @type cfg: L{config.ConfigWriter}
3759 @param cfg: The cluster configuration
3761 @param netmask: the netmask to be verified
3762 @raise errors.OpPrereqError: if the validation fails
3765 ip_family = cfg.GetPrimaryIPFamily()
3767 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3768 except errors.ProgrammerError:
3769 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3771 if not ipcls.ValidateNetmask(netmask):
3772 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3776 class LUClusterSetParams(LogicalUnit):
3777 """Change the parameters of the cluster.
3780 HPATH = "cluster-modify"
3781 HTYPE = constants.HTYPE_CLUSTER
3784 def CheckArguments(self):
3788 if self.op.uid_pool:
3789 uidpool.CheckUidPool(self.op.uid_pool)
3791 if self.op.add_uids:
3792 uidpool.CheckUidPool(self.op.add_uids)
3794 if self.op.remove_uids:
3795 uidpool.CheckUidPool(self.op.remove_uids)
3797 if self.op.master_netmask is not None:
3798 _ValidateNetmask(self.cfg, self.op.master_netmask)
3800 if self.op.diskparams:
3801 for dt_params in self.op.diskparams.values():
3802 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3804 def ExpandNames(self):
3805 # FIXME: in the future maybe other cluster params won't require checking on
3806 # all nodes to be modified.
3807 self.needed_locks = {
3808 locking.LEVEL_NODE: locking.ALL_SET,
3809 locking.LEVEL_INSTANCE: locking.ALL_SET,
3810 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3812 self.share_locks = {
3813 locking.LEVEL_NODE: 1,
3814 locking.LEVEL_INSTANCE: 1,
3815 locking.LEVEL_NODEGROUP: 1,
3818 def BuildHooksEnv(self):
3823 "OP_TARGET": self.cfg.GetClusterName(),
3824 "NEW_VG_NAME": self.op.vg_name,
3827 def BuildHooksNodes(self):
3828 """Build hooks nodes.
3831 mn = self.cfg.GetMasterNode()
3834 def CheckPrereq(self):
3835 """Check prerequisites.
3837 This checks whether the given params don't conflict and
3838 if the given volume group is valid.
3841 if self.op.vg_name is not None and not self.op.vg_name:
3842 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3843 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3844 " instances exist", errors.ECODE_INVAL)
3846 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3847 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3848 raise errors.OpPrereqError("Cannot disable drbd helper while"
3849 " drbd-based instances exist",
3852 node_list = self.owned_locks(locking.LEVEL_NODE)
3854 # if vg_name not None, checks given volume group on all nodes
3856 vglist = self.rpc.call_vg_list(node_list)
3857 for node in node_list:
3858 msg = vglist[node].fail_msg
3860 # ignoring down node
3861 self.LogWarning("Error while gathering data on node %s"
3862 " (ignoring node): %s", node, msg)
3864 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3866 constants.MIN_VG_SIZE)
3868 raise errors.OpPrereqError("Error on node '%s': %s" %
3869 (node, vgstatus), errors.ECODE_ENVIRON)
3871 if self.op.drbd_helper:
3872 # checks given drbd helper on all nodes
3873 helpers = self.rpc.call_drbd_helper(node_list)
3874 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3876 self.LogInfo("Not checking drbd helper on offline node %s", node)
3878 msg = helpers[node].fail_msg
3880 raise errors.OpPrereqError("Error checking drbd helper on node"
3881 " '%s': %s" % (node, msg),
3882 errors.ECODE_ENVIRON)
3883 node_helper = helpers[node].payload
3884 if node_helper != self.op.drbd_helper:
3885 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3886 (node, node_helper), errors.ECODE_ENVIRON)
3888 self.cluster = cluster = self.cfg.GetClusterInfo()
3889 # validate params changes
3890 if self.op.beparams:
3891 objects.UpgradeBeParams(self.op.beparams)
3892 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3893 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3895 if self.op.ndparams:
3896 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3897 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3899 # TODO: we need a more general way to handle resetting
3900 # cluster-level parameters to default values
3901 if self.new_ndparams["oob_program"] == "":
3902 self.new_ndparams["oob_program"] = \
3903 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3905 if self.op.hv_state:
3906 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3907 self.cluster.hv_state_static)
3908 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3909 for hv, values in new_hv_state.items())
3911 if self.op.disk_state:
3912 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3913 self.cluster.disk_state_static)
3914 self.new_disk_state = \
3915 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3916 for name, values in svalues.items()))
3917 for storage, svalues in new_disk_state.items())
3920 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3923 all_instances = self.cfg.GetAllInstancesInfo().values()
3925 for group in self.cfg.GetAllNodeGroupsInfo().values():
3926 instances = frozenset([inst for inst in all_instances
3927 if compat.any(node in group.members
3928 for node in inst.all_nodes)])
3929 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3930 new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3932 new_ipolicy, instances)
3934 violations.update(new)
3937 self.LogWarning("After the ipolicy change the following instances"
3938 " violate them: %s",
3939 utils.CommaJoin(violations))
3941 if self.op.nicparams:
3942 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3943 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3944 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3947 # check all instances for consistency
3948 for instance in self.cfg.GetAllInstancesInfo().values():
3949 for nic_idx, nic in enumerate(instance.nics):
3950 params_copy = copy.deepcopy(nic.nicparams)
3951 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3953 # check parameter syntax
3955 objects.NIC.CheckParameterSyntax(params_filled)
3956 except errors.ConfigurationError, err:
3957 nic_errors.append("Instance %s, nic/%d: %s" %
3958 (instance.name, nic_idx, err))
3960 # if we're moving instances to routed, check that they have an ip
3961 target_mode = params_filled[constants.NIC_MODE]
3962 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3963 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3964 " address" % (instance.name, nic_idx))
3966 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3967 "\n".join(nic_errors))
3969 # hypervisor list/parameters
3970 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3971 if self.op.hvparams:
3972 for hv_name, hv_dict in self.op.hvparams.items():
3973 if hv_name not in self.new_hvparams:
3974 self.new_hvparams[hv_name] = hv_dict
3976 self.new_hvparams[hv_name].update(hv_dict)
3978 # disk template parameters
3979 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3980 if self.op.diskparams:
3981 for dt_name, dt_params in self.op.diskparams.items():
3982 if dt_name not in self.op.diskparams:
3983 self.new_diskparams[dt_name] = dt_params
3985 self.new_diskparams[dt_name].update(dt_params)
3987 # os hypervisor parameters
3988 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3990 for os_name, hvs in self.op.os_hvp.items():
3991 if os_name not in self.new_os_hvp:
3992 self.new_os_hvp[os_name] = hvs
3994 for hv_name, hv_dict in hvs.items():
3995 if hv_name not in self.new_os_hvp[os_name]:
3996 self.new_os_hvp[os_name][hv_name] = hv_dict
3998 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4001 self.new_osp = objects.FillDict(cluster.osparams, {})
4002 if self.op.osparams:
4003 for os_name, osp in self.op.osparams.items():
4004 if os_name not in self.new_osp:
4005 self.new_osp[os_name] = {}
4007 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4010 if not self.new_osp[os_name]:
4011 # we removed all parameters
4012 del self.new_osp[os_name]
4014 # check the parameter validity (remote check)
4015 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4016 os_name, self.new_osp[os_name])
4018 # changes to the hypervisor list
4019 if self.op.enabled_hypervisors is not None:
4020 self.hv_list = self.op.enabled_hypervisors
4021 for hv in self.hv_list:
4022 # if the hypervisor doesn't already exist in the cluster
4023 # hvparams, we initialize it to empty, and then (in both
4024 # cases) we make sure to fill the defaults, as we might not
4025 # have a complete defaults list if the hypervisor wasn't
4027 if hv not in new_hvp:
4029 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4030 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4032 self.hv_list = cluster.enabled_hypervisors
4034 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4035 # either the enabled list has changed, or the parameters have, validate
4036 for hv_name, hv_params in self.new_hvparams.items():
4037 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4038 (self.op.enabled_hypervisors and
4039 hv_name in self.op.enabled_hypervisors)):
4040 # either this is a new hypervisor, or its parameters have changed
4041 hv_class = hypervisor.GetHypervisor(hv_name)
4042 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4043 hv_class.CheckParameterSyntax(hv_params)
4044 _CheckHVParams(self, node_list, hv_name, hv_params)
4047 # no need to check any newly-enabled hypervisors, since the
4048 # defaults have already been checked in the above code-block
4049 for os_name, os_hvp in self.new_os_hvp.items():
4050 for hv_name, hv_params in os_hvp.items():
4051 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4052 # we need to fill in the new os_hvp on top of the actual hv_p
4053 cluster_defaults = self.new_hvparams.get(hv_name, {})
4054 new_osp = objects.FillDict(cluster_defaults, hv_params)
4055 hv_class = hypervisor.GetHypervisor(hv_name)
4056 hv_class.CheckParameterSyntax(new_osp)
4057 _CheckHVParams(self, node_list, hv_name, new_osp)
4059 if self.op.default_iallocator:
4060 alloc_script = utils.FindFile(self.op.default_iallocator,
4061 constants.IALLOCATOR_SEARCH_PATH,
4063 if alloc_script is None:
4064 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4065 " specified" % self.op.default_iallocator,
4068 def Exec(self, feedback_fn):
4069 """Change the parameters of the cluster.
4072 if self.op.vg_name is not None:
4073 new_volume = self.op.vg_name
4076 if new_volume != self.cfg.GetVGName():
4077 self.cfg.SetVGName(new_volume)
4079 feedback_fn("Cluster LVM configuration already in desired"
4080 " state, not changing")
4081 if self.op.drbd_helper is not None:
4082 new_helper = self.op.drbd_helper
4085 if new_helper != self.cfg.GetDRBDHelper():
4086 self.cfg.SetDRBDHelper(new_helper)
4088 feedback_fn("Cluster DRBD helper already in desired state,"
4090 if self.op.hvparams:
4091 self.cluster.hvparams = self.new_hvparams
4093 self.cluster.os_hvp = self.new_os_hvp
4094 if self.op.enabled_hypervisors is not None:
4095 self.cluster.hvparams = self.new_hvparams
4096 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4097 if self.op.beparams:
4098 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4099 if self.op.nicparams:
4100 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4102 self.cluster.ipolicy = self.new_ipolicy
4103 if self.op.osparams:
4104 self.cluster.osparams = self.new_osp
4105 if self.op.ndparams:
4106 self.cluster.ndparams = self.new_ndparams
4107 if self.op.diskparams:
4108 self.cluster.diskparams = self.new_diskparams
4109 if self.op.hv_state:
4110 self.cluster.hv_state_static = self.new_hv_state
4111 if self.op.disk_state:
4112 self.cluster.disk_state_static = self.new_disk_state
4114 if self.op.candidate_pool_size is not None:
4115 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4116 # we need to update the pool size here, otherwise the save will fail
4117 _AdjustCandidatePool(self, [])
4119 if self.op.maintain_node_health is not None:
4120 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4121 feedback_fn("Note: CONFD was disabled at build time, node health"
4122 " maintenance is not useful (still enabling it)")
4123 self.cluster.maintain_node_health = self.op.maintain_node_health
4125 if self.op.prealloc_wipe_disks is not None:
4126 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4128 if self.op.add_uids is not None:
4129 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4131 if self.op.remove_uids is not None:
4132 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4134 if self.op.uid_pool is not None:
4135 self.cluster.uid_pool = self.op.uid_pool
4137 if self.op.default_iallocator is not None:
4138 self.cluster.default_iallocator = self.op.default_iallocator
4140 if self.op.reserved_lvs is not None:
4141 self.cluster.reserved_lvs = self.op.reserved_lvs
4143 if self.op.use_external_mip_script is not None:
4144 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4146 def helper_os(aname, mods, desc):
4148 lst = getattr(self.cluster, aname)
4149 for key, val in mods:
4150 if key == constants.DDM_ADD:
4152 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4155 elif key == constants.DDM_REMOVE:
4159 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4161 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4163 if self.op.hidden_os:
4164 helper_os("hidden_os", self.op.hidden_os, "hidden")
4166 if self.op.blacklisted_os:
4167 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4169 if self.op.master_netdev:
4170 master_params = self.cfg.GetMasterNetworkParameters()
4171 ems = self.cfg.GetUseExternalMipScript()
4172 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4173 self.cluster.master_netdev)
4174 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4176 result.Raise("Could not disable the master ip")
4177 feedback_fn("Changing master_netdev from %s to %s" %
4178 (master_params.netdev, self.op.master_netdev))
4179 self.cluster.master_netdev = self.op.master_netdev
4181 if self.op.master_netmask:
4182 master_params = self.cfg.GetMasterNetworkParameters()
4183 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4184 result = self.rpc.call_node_change_master_netmask(master_params.name,
4185 master_params.netmask,
4186 self.op.master_netmask,
4188 master_params.netdev)
4190 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4193 self.cluster.master_netmask = self.op.master_netmask
4195 self.cfg.Update(self.cluster, feedback_fn)
4197 if self.op.master_netdev:
4198 master_params = self.cfg.GetMasterNetworkParameters()
4199 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4200 self.op.master_netdev)
4201 ems = self.cfg.GetUseExternalMipScript()
4202 result = self.rpc.call_node_activate_master_ip(master_params.name,
4205 self.LogWarning("Could not re-enable the master ip on"
4206 " the master, please restart manually: %s",
4210 def _UploadHelper(lu, nodes, fname):
4211 """Helper for uploading a file and showing warnings.
4214 if os.path.exists(fname):
4215 result = lu.rpc.call_upload_file(nodes, fname)
4216 for to_node, to_result in result.items():
4217 msg = to_result.fail_msg
4219 msg = ("Copy of file %s to node %s failed: %s" %
4220 (fname, to_node, msg))
4221 lu.proc.LogWarning(msg)
4224 def _ComputeAncillaryFiles(cluster, redist):
4225 """Compute files external to Ganeti which need to be consistent.
4227 @type redist: boolean
4228 @param redist: Whether to include files which need to be redistributed
4231 # Compute files for all nodes
4233 constants.SSH_KNOWN_HOSTS_FILE,
4234 constants.CONFD_HMAC_KEY,
4235 constants.CLUSTER_DOMAIN_SECRET_FILE,
4236 constants.SPICE_CERT_FILE,
4237 constants.SPICE_CACERT_FILE,
4238 constants.RAPI_USERS_FILE,
4242 files_all.update(constants.ALL_CERT_FILES)
4243 files_all.update(ssconf.SimpleStore().GetFileList())
4245 # we need to ship at least the RAPI certificate
4246 files_all.add(constants.RAPI_CERT_FILE)
4248 if cluster.modify_etc_hosts:
4249 files_all.add(constants.ETC_HOSTS)
4251 # Files which are optional, these must:
4252 # - be present in one other category as well
4253 # - either exist or not exist on all nodes of that category (mc, vm all)
4255 constants.RAPI_USERS_FILE,
4258 # Files which should only be on master candidates
4262 files_mc.add(constants.CLUSTER_CONF_FILE)
4264 # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4266 files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4268 # Files which should only be on VM-capable nodes
4269 files_vm = set(filename
4270 for hv_name in cluster.enabled_hypervisors
4271 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4273 files_opt |= set(filename
4274 for hv_name in cluster.enabled_hypervisors
4275 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4277 # Filenames in each category must be unique
4278 all_files_set = files_all | files_mc | files_vm
4279 assert (len(all_files_set) ==
4280 sum(map(len, [files_all, files_mc, files_vm]))), \
4281 "Found file listed in more than one file list"
4283 # Optional files must be present in one other category
4284 assert all_files_set.issuperset(files_opt), \
4285 "Optional file not in a different required list"
4287 return (files_all, files_opt, files_mc, files_vm)
4290 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4291 """Distribute additional files which are part of the cluster configuration.
4293 ConfigWriter takes care of distributing the config and ssconf files, but
4294 there are more files which should be distributed to all nodes. This function
4295 makes sure those are copied.
4297 @param lu: calling logical unit
4298 @param additional_nodes: list of nodes not in the config to distribute to
4299 @type additional_vm: boolean
4300 @param additional_vm: whether the additional nodes are vm-capable or not
4303 # Gather target nodes
4304 cluster = lu.cfg.GetClusterInfo()
4305 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4307 online_nodes = lu.cfg.GetOnlineNodeList()
4308 vm_nodes = lu.cfg.GetVmCapableNodeList()
4310 if additional_nodes is not None:
4311 online_nodes.extend(additional_nodes)
4313 vm_nodes.extend(additional_nodes)
4315 # Never distribute to master node
4316 for nodelist in [online_nodes, vm_nodes]:
4317 if master_info.name in nodelist:
4318 nodelist.remove(master_info.name)
4321 (files_all, _, files_mc, files_vm) = \
4322 _ComputeAncillaryFiles(cluster, True)
4324 # Never re-distribute configuration file from here
4325 assert not (constants.CLUSTER_CONF_FILE in files_all or
4326 constants.CLUSTER_CONF_FILE in files_vm)
4327 assert not files_mc, "Master candidates not handled in this function"
4330 (online_nodes, files_all),
4331 (vm_nodes, files_vm),
4335 for (node_list, files) in filemap:
4337 _UploadHelper(lu, node_list, fname)
4340 class LUClusterRedistConf(NoHooksLU):
4341 """Force the redistribution of cluster configuration.
4343 This is a very simple LU.
4348 def ExpandNames(self):
4349 self.needed_locks = {
4350 locking.LEVEL_NODE: locking.ALL_SET,
4352 self.share_locks[locking.LEVEL_NODE] = 1
4354 def Exec(self, feedback_fn):
4355 """Redistribute the configuration.
4358 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4359 _RedistributeAncillaryFiles(self)
4362 class LUClusterActivateMasterIp(NoHooksLU):
4363 """Activate the master IP on the master node.
4366 def Exec(self, feedback_fn):
4367 """Activate the master IP.
4370 master_params = self.cfg.GetMasterNetworkParameters()
4371 ems = self.cfg.GetUseExternalMipScript()
4372 result = self.rpc.call_node_activate_master_ip(master_params.name,
4374 result.Raise("Could not activate the master IP")
4377 class LUClusterDeactivateMasterIp(NoHooksLU):
4378 """Deactivate the master IP on the master node.
4381 def Exec(self, feedback_fn):
4382 """Deactivate the master IP.
4385 master_params = self.cfg.GetMasterNetworkParameters()
4386 ems = self.cfg.GetUseExternalMipScript()
4387 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4389 result.Raise("Could not deactivate the master IP")
4392 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4393 """Sleep and poll for an instance's disk to sync.
4396 if not instance.disks or disks is not None and not disks:
4399 disks = _ExpandCheckDisks(instance, disks)
4402 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4404 node = instance.primary_node
4407 lu.cfg.SetDiskID(dev, node)
4409 # TODO: Convert to utils.Retry
4412 degr_retries = 10 # in seconds, as we sleep 1 second each time
4416 cumul_degraded = False
4417 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4418 msg = rstats.fail_msg
4420 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4423 raise errors.RemoteError("Can't contact node %s for mirror data,"
4424 " aborting." % node)
4427 rstats = rstats.payload
4429 for i, mstat in enumerate(rstats):
4431 lu.LogWarning("Can't compute data for node %s/%s",
4432 node, disks[i].iv_name)
4435 cumul_degraded = (cumul_degraded or
4436 (mstat.is_degraded and mstat.sync_percent is None))
4437 if mstat.sync_percent is not None:
4439 if mstat.estimated_time is not None:
4440 rem_time = ("%s remaining (estimated)" %
4441 utils.FormatSeconds(mstat.estimated_time))
4442 max_time = mstat.estimated_time
4444 rem_time = "no time estimate"
4445 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4446 (disks[i].iv_name, mstat.sync_percent, rem_time))
4448 # if we're done but degraded, let's do a few small retries, to
4449 # make sure we see a stable and not transient situation; therefore
4450 # we force restart of the loop
4451 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4452 logging.info("Degraded disks found, %d retries left", degr_retries)
4460 time.sleep(min(60, max_time))
4463 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4464 return not cumul_degraded
4467 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4468 """Check that mirrors are not degraded.
4470 The ldisk parameter, if True, will change the test from the
4471 is_degraded attribute (which represents overall non-ok status for
4472 the device(s)) to the ldisk (representing the local storage status).
4475 lu.cfg.SetDiskID(dev, node)
4479 if on_primary or dev.AssembleOnSecondary():
4480 rstats = lu.rpc.call_blockdev_find(node, dev)
4481 msg = rstats.fail_msg
4483 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4485 elif not rstats.payload:
4486 lu.LogWarning("Can't find disk on node %s", node)
4490 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4492 result = result and not rstats.payload.is_degraded
4495 for child in dev.children:
4496 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4501 class LUOobCommand(NoHooksLU):
4502 """Logical unit for OOB handling.
4506 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4508 def ExpandNames(self):
4509 """Gather locks we need.
4512 if self.op.node_names:
4513 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4514 lock_names = self.op.node_names
4516 lock_names = locking.ALL_SET
4518 self.needed_locks = {
4519 locking.LEVEL_NODE: lock_names,
4522 def CheckPrereq(self):
4523 """Check prerequisites.
4526 - the node exists in the configuration
4529 Any errors are signaled by raising errors.OpPrereqError.
4533 self.master_node = self.cfg.GetMasterNode()
4535 assert self.op.power_delay >= 0.0
4537 if self.op.node_names:
4538 if (self.op.command in self._SKIP_MASTER and
4539 self.master_node in self.op.node_names):
4540 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4541 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4543 if master_oob_handler:
4544 additional_text = ("run '%s %s %s' if you want to operate on the"
4545 " master regardless") % (master_oob_handler,
4549 additional_text = "it does not support out-of-band operations"
4551 raise errors.OpPrereqError(("Operating on the master node %s is not"
4552 " allowed for %s; %s") %
4553 (self.master_node, self.op.command,
4554 additional_text), errors.ECODE_INVAL)
4556 self.op.node_names = self.cfg.GetNodeList()
4557 if self.op.command in self._SKIP_MASTER:
4558 self.op.node_names.remove(self.master_node)
4560 if self.op.command in self._SKIP_MASTER:
4561 assert self.master_node not in self.op.node_names
4563 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4565 raise errors.OpPrereqError("Node %s not found" % node_name,
4568 self.nodes.append(node)
4570 if (not self.op.ignore_status and
4571 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4572 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4573 " not marked offline") % node_name,
4576 def Exec(self, feedback_fn):
4577 """Execute OOB and return result if we expect any.
4580 master_node = self.master_node
4583 for idx, node in enumerate(utils.NiceSort(self.nodes,
4584 key=lambda node: node.name)):
4585 node_entry = [(constants.RS_NORMAL, node.name)]
4586 ret.append(node_entry)
4588 oob_program = _SupportsOob(self.cfg, node)
4591 node_entry.append((constants.RS_UNAVAIL, None))
4594 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4595 self.op.command, oob_program, node.name)
4596 result = self.rpc.call_run_oob(master_node, oob_program,
4597 self.op.command, node.name,
4601 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4602 node.name, result.fail_msg)
4603 node_entry.append((constants.RS_NODATA, None))
4606 self._CheckPayload(result)
4607 except errors.OpExecError, err:
4608 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4610 node_entry.append((constants.RS_NODATA, None))
4612 if self.op.command == constants.OOB_HEALTH:
4613 # For health we should log important events
4614 for item, status in result.payload:
4615 if status in [constants.OOB_STATUS_WARNING,
4616 constants.OOB_STATUS_CRITICAL]:
4617 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4618 item, node.name, status)
4620 if self.op.command == constants.OOB_POWER_ON:
4622 elif self.op.command == constants.OOB_POWER_OFF:
4623 node.powered = False
4624 elif self.op.command == constants.OOB_POWER_STATUS:
4625 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4626 if powered != node.powered:
4627 logging.warning(("Recorded power state (%s) of node '%s' does not"
4628 " match actual power state (%s)"), node.powered,
4631 # For configuration changing commands we should update the node
4632 if self.op.command in (constants.OOB_POWER_ON,
4633 constants.OOB_POWER_OFF):
4634 self.cfg.Update(node, feedback_fn)
4636 node_entry.append((constants.RS_NORMAL, result.payload))
4638 if (self.op.command == constants.OOB_POWER_ON and
4639 idx < len(self.nodes) - 1):
4640 time.sleep(self.op.power_delay)
4644 def _CheckPayload(self, result):
4645 """Checks if the payload is valid.
4647 @param result: RPC result
4648 @raises errors.OpExecError: If payload is not valid
4652 if self.op.command == constants.OOB_HEALTH:
4653 if not isinstance(result.payload, list):
4654 errs.append("command 'health' is expected to return a list but got %s" %
4655 type(result.payload))
4657 for item, status in result.payload:
4658 if status not in constants.OOB_STATUSES:
4659 errs.append("health item '%s' has invalid status '%s'" %
4662 if self.op.command == constants.OOB_POWER_STATUS:
4663 if not isinstance(result.payload, dict):
4664 errs.append("power-status is expected to return a dict but got %s" %
4665 type(result.payload))
4667 if self.op.command in [
4668 constants.OOB_POWER_ON,
4669 constants.OOB_POWER_OFF,
4670 constants.OOB_POWER_CYCLE,
4672 if result.payload is not None:
4673 errs.append("%s is expected to not return payload but got '%s'" %
4674 (self.op.command, result.payload))
4677 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4678 utils.CommaJoin(errs))
4681 class _OsQuery(_QueryBase):
4682 FIELDS = query.OS_FIELDS
4684 def ExpandNames(self, lu):
4685 # Lock all nodes in shared mode
4686 # Temporary removal of locks, should be reverted later
4687 # TODO: reintroduce locks when they are lighter-weight
4688 lu.needed_locks = {}
4689 #self.share_locks[locking.LEVEL_NODE] = 1
4690 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4692 # The following variables interact with _QueryBase._GetNames
4694 self.wanted = self.names
4696 self.wanted = locking.ALL_SET
4698 self.do_locking = self.use_locking
4700 def DeclareLocks(self, lu, level):
4704 def _DiagnoseByOS(rlist):
4705 """Remaps a per-node return list into an a per-os per-node dictionary
4707 @param rlist: a map with node names as keys and OS objects as values
4710 @return: a dictionary with osnames as keys and as value another
4711 map, with nodes as keys and tuples of (path, status, diagnose,
4712 variants, parameters, api_versions) as values, eg::
4714 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4715 (/srv/..., False, "invalid api")],
4716 "node2": [(/srv/..., True, "", [], [])]}
4721 # we build here the list of nodes that didn't fail the RPC (at RPC
4722 # level), so that nodes with a non-responding node daemon don't
4723 # make all OSes invalid
4724 good_nodes = [node_name for node_name in rlist
4725 if not rlist[node_name].fail_msg]
4726 for node_name, nr in rlist.items():
4727 if nr.fail_msg or not nr.payload:
4729 for (name, path, status, diagnose, variants,
4730 params, api_versions) in nr.payload:
4731 if name not in all_os:
4732 # build a list of nodes for this os containing empty lists
4733 # for each node in node_list
4735 for nname in good_nodes:
4736 all_os[name][nname] = []
4737 # convert params from [name, help] to (name, help)
4738 params = [tuple(v) for v in params]
4739 all_os[name][node_name].append((path, status, diagnose,
4740 variants, params, api_versions))
4743 def _GetQueryData(self, lu):
4744 """Computes the list of nodes and their attributes.
4747 # Locking is not used
4748 assert not (compat.any(lu.glm.is_owned(level)
4749 for level in locking.LEVELS
4750 if level != locking.LEVEL_CLUSTER) or
4751 self.do_locking or self.use_locking)
4753 valid_nodes = [node.name
4754 for node in lu.cfg.GetAllNodesInfo().values()
4755 if not node.offline and node.vm_capable]
4756 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4757 cluster = lu.cfg.GetClusterInfo()
4761 for (os_name, os_data) in pol.items():
4762 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4763 hidden=(os_name in cluster.hidden_os),
4764 blacklisted=(os_name in cluster.blacklisted_os))
4768 api_versions = set()
4770 for idx, osl in enumerate(os_data.values()):
4771 info.valid = bool(info.valid and osl and osl[0][1])
4775 (node_variants, node_params, node_api) = osl[0][3:6]
4778 variants.update(node_variants)
4779 parameters.update(node_params)
4780 api_versions.update(node_api)
4782 # Filter out inconsistent values
4783 variants.intersection_update(node_variants)
4784 parameters.intersection_update(node_params)
4785 api_versions.intersection_update(node_api)
4787 info.variants = list(variants)
4788 info.parameters = list(parameters)
4789 info.api_versions = list(api_versions)
4791 data[os_name] = info
4793 # Prepare data in requested order
4794 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4798 class LUOsDiagnose(NoHooksLU):
4799 """Logical unit for OS diagnose/query.
4805 def _BuildFilter(fields, names):
4806 """Builds a filter for querying OSes.
4809 name_filter = qlang.MakeSimpleFilter("name", names)
4811 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4812 # respective field is not requested
4813 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4814 for fname in ["hidden", "blacklisted"]
4815 if fname not in fields]
4816 if "valid" not in fields:
4817 status_filter.append([qlang.OP_TRUE, "valid"])
4820 status_filter.insert(0, qlang.OP_AND)
4822 status_filter = None
4824 if name_filter and status_filter:
4825 return [qlang.OP_AND, name_filter, status_filter]
4829 return status_filter
4831 def CheckArguments(self):
4832 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4833 self.op.output_fields, False)
4835 def ExpandNames(self):
4836 self.oq.ExpandNames(self)
4838 def Exec(self, feedback_fn):
4839 return self.oq.OldStyleQuery(self)
4842 class LUNodeRemove(LogicalUnit):
4843 """Logical unit for removing a node.
4846 HPATH = "node-remove"
4847 HTYPE = constants.HTYPE_NODE
4849 def BuildHooksEnv(self):
4854 "OP_TARGET": self.op.node_name,
4855 "NODE_NAME": self.op.node_name,
4858 def BuildHooksNodes(self):
4859 """Build hooks nodes.
4861 This doesn't run on the target node in the pre phase as a failed
4862 node would then be impossible to remove.
4865 all_nodes = self.cfg.GetNodeList()
4867 all_nodes.remove(self.op.node_name)
4870 return (all_nodes, all_nodes)
4872 def CheckPrereq(self):
4873 """Check prerequisites.
4876 - the node exists in the configuration
4877 - it does not have primary or secondary instances
4878 - it's not the master
4880 Any errors are signaled by raising errors.OpPrereqError.
4883 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4884 node = self.cfg.GetNodeInfo(self.op.node_name)
4885 assert node is not None
4887 masternode = self.cfg.GetMasterNode()
4888 if node.name == masternode:
4889 raise errors.OpPrereqError("Node is the master node, failover to another"
4890 " node is required", errors.ECODE_INVAL)
4892 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4893 if node.name in instance.all_nodes:
4894 raise errors.OpPrereqError("Instance %s is still running on the node,"
4895 " please remove first" % instance_name,
4897 self.op.node_name = node.name
4900 def Exec(self, feedback_fn):
4901 """Removes the node from the cluster.
4905 logging.info("Stopping the node daemon and removing configs from node %s",
4908 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4910 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4913 # Promote nodes to master candidate as needed
4914 _AdjustCandidatePool(self, exceptions=[node.name])
4915 self.context.RemoveNode(node.name)
4917 # Run post hooks on the node before it's removed
4918 _RunPostHook(self, node.name)
4920 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4921 msg = result.fail_msg
4923 self.LogWarning("Errors encountered on the remote node while leaving"
4924 " the cluster: %s", msg)
4926 # Remove node from our /etc/hosts
4927 if self.cfg.GetClusterInfo().modify_etc_hosts:
4928 master_node = self.cfg.GetMasterNode()
4929 result = self.rpc.call_etc_hosts_modify(master_node,
4930 constants.ETC_HOSTS_REMOVE,
4932 result.Raise("Can't update hosts file with new host data")
4933 _RedistributeAncillaryFiles(self)
4936 class _NodeQuery(_QueryBase):
4937 FIELDS = query.NODE_FIELDS
4939 def ExpandNames(self, lu):
4940 lu.needed_locks = {}
4941 lu.share_locks = _ShareAll()
4944 self.wanted = _GetWantedNodes(lu, self.names)
4946 self.wanted = locking.ALL_SET
4948 self.do_locking = (self.use_locking and
4949 query.NQ_LIVE in self.requested_data)
4952 # If any non-static field is requested we need to lock the nodes
4953 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4955 def DeclareLocks(self, lu, level):
4958 def _GetQueryData(self, lu):
4959 """Computes the list of nodes and their attributes.
4962 all_info = lu.cfg.GetAllNodesInfo()
4964 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4966 # Gather data as requested
4967 if query.NQ_LIVE in self.requested_data:
4968 # filter out non-vm_capable nodes
4969 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4971 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4972 [lu.cfg.GetHypervisorType()])
4973 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4974 for (name, nresult) in node_data.items()
4975 if not nresult.fail_msg and nresult.payload)
4979 if query.NQ_INST in self.requested_data:
4980 node_to_primary = dict([(name, set()) for name in nodenames])
4981 node_to_secondary = dict([(name, set()) for name in nodenames])
4983 inst_data = lu.cfg.GetAllInstancesInfo()
4985 for inst in inst_data.values():
4986 if inst.primary_node in node_to_primary:
4987 node_to_primary[inst.primary_node].add(inst.name)
4988 for secnode in inst.secondary_nodes:
4989 if secnode in node_to_secondary:
4990 node_to_secondary[secnode].add(inst.name)
4992 node_to_primary = None
4993 node_to_secondary = None
4995 if query.NQ_OOB in self.requested_data:
4996 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4997 for name, node in all_info.iteritems())
5001 if query.NQ_GROUP in self.requested_data:
5002 groups = lu.cfg.GetAllNodeGroupsInfo()
5006 return query.NodeQueryData([all_info[name] for name in nodenames],
5007 live_data, lu.cfg.GetMasterNode(),
5008 node_to_primary, node_to_secondary, groups,
5009 oob_support, lu.cfg.GetClusterInfo())
5012 class LUNodeQuery(NoHooksLU):
5013 """Logical unit for querying nodes.
5016 # pylint: disable=W0142
5019 def CheckArguments(self):
5020 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5021 self.op.output_fields, self.op.use_locking)
5023 def ExpandNames(self):
5024 self.nq.ExpandNames(self)
5026 def DeclareLocks(self, level):
5027 self.nq.DeclareLocks(self, level)
5029 def Exec(self, feedback_fn):
5030 return self.nq.OldStyleQuery(self)
5033 class LUNodeQueryvols(NoHooksLU):
5034 """Logical unit for getting volumes on node(s).
5038 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5039 _FIELDS_STATIC = utils.FieldSet("node")
5041 def CheckArguments(self):
5042 _CheckOutputFields(static=self._FIELDS_STATIC,
5043 dynamic=self._FIELDS_DYNAMIC,
5044 selected=self.op.output_fields)
5046 def ExpandNames(self):
5047 self.share_locks = _ShareAll()
5048 self.needed_locks = {}
5050 if not self.op.nodes:
5051 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5053 self.needed_locks[locking.LEVEL_NODE] = \
5054 _GetWantedNodes(self, self.op.nodes)
5056 def Exec(self, feedback_fn):
5057 """Computes the list of nodes and their attributes.
5060 nodenames = self.owned_locks(locking.LEVEL_NODE)
5061 volumes = self.rpc.call_node_volumes(nodenames)
5063 ilist = self.cfg.GetAllInstancesInfo()
5064 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5067 for node in nodenames:
5068 nresult = volumes[node]
5071 msg = nresult.fail_msg
5073 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5076 node_vols = sorted(nresult.payload,
5077 key=operator.itemgetter("dev"))
5079 for vol in node_vols:
5081 for field in self.op.output_fields:
5084 elif field == "phys":
5088 elif field == "name":
5090 elif field == "size":
5091 val = int(float(vol["size"]))
5092 elif field == "instance":
5093 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5095 raise errors.ParameterError(field)
5096 node_output.append(str(val))
5098 output.append(node_output)
5103 class LUNodeQueryStorage(NoHooksLU):
5104 """Logical unit for getting information on storage units on node(s).
5107 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5110 def CheckArguments(self):
5111 _CheckOutputFields(static=self._FIELDS_STATIC,
5112 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5113 selected=self.op.output_fields)
5115 def ExpandNames(self):
5116 self.share_locks = _ShareAll()
5117 self.needed_locks = {}
5120 self.needed_locks[locking.LEVEL_NODE] = \
5121 _GetWantedNodes(self, self.op.nodes)
5123 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5125 def Exec(self, feedback_fn):
5126 """Computes the list of nodes and their attributes.
5129 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5131 # Always get name to sort by
5132 if constants.SF_NAME in self.op.output_fields:
5133 fields = self.op.output_fields[:]
5135 fields = [constants.SF_NAME] + self.op.output_fields
5137 # Never ask for node or type as it's only known to the LU
5138 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5139 while extra in fields:
5140 fields.remove(extra)
5142 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5143 name_idx = field_idx[constants.SF_NAME]
5145 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5146 data = self.rpc.call_storage_list(self.nodes,
5147 self.op.storage_type, st_args,
5148 self.op.name, fields)
5152 for node in utils.NiceSort(self.nodes):
5153 nresult = data[node]
5157 msg = nresult.fail_msg
5159 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5162 rows = dict([(row[name_idx], row) for row in nresult.payload])
5164 for name in utils.NiceSort(rows.keys()):
5169 for field in self.op.output_fields:
5170 if field == constants.SF_NODE:
5172 elif field == constants.SF_TYPE:
5173 val = self.op.storage_type
5174 elif field in field_idx:
5175 val = row[field_idx[field]]
5177 raise errors.ParameterError(field)
5186 class _InstanceQuery(_QueryBase):
5187 FIELDS = query.INSTANCE_FIELDS
5189 def ExpandNames(self, lu):
5190 lu.needed_locks = {}
5191 lu.share_locks = _ShareAll()
5194 self.wanted = _GetWantedInstances(lu, self.names)
5196 self.wanted = locking.ALL_SET
5198 self.do_locking = (self.use_locking and
5199 query.IQ_LIVE in self.requested_data)
5201 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5202 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5203 lu.needed_locks[locking.LEVEL_NODE] = []
5204 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5206 self.do_grouplocks = (self.do_locking and
5207 query.IQ_NODES in self.requested_data)
5209 def DeclareLocks(self, lu, level):
5211 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5212 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5214 # Lock all groups used by instances optimistically; this requires going
5215 # via the node before it's locked, requiring verification later on
5216 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5218 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5219 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5220 elif level == locking.LEVEL_NODE:
5221 lu._LockInstancesNodes() # pylint: disable=W0212
5224 def _CheckGroupLocks(lu):
5225 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5226 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5228 # Check if node groups for locked instances are still correct
5229 for instance_name in owned_instances:
5230 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5232 def _GetQueryData(self, lu):
5233 """Computes the list of instances and their attributes.
5236 if self.do_grouplocks:
5237 self._CheckGroupLocks(lu)
5239 cluster = lu.cfg.GetClusterInfo()
5240 all_info = lu.cfg.GetAllInstancesInfo()
5242 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5244 instance_list = [all_info[name] for name in instance_names]
5245 nodes = frozenset(itertools.chain(*(inst.all_nodes
5246 for inst in instance_list)))
5247 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5250 wrongnode_inst = set()
5252 # Gather data as requested
5253 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5255 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5257 result = node_data[name]
5259 # offline nodes will be in both lists
5260 assert result.fail_msg
5261 offline_nodes.append(name)
5263 bad_nodes.append(name)
5264 elif result.payload:
5265 for inst in result.payload:
5266 if inst in all_info:
5267 if all_info[inst].primary_node == name:
5268 live_data.update(result.payload)
5270 wrongnode_inst.add(inst)
5272 # orphan instance; we don't list it here as we don't
5273 # handle this case yet in the output of instance listing
5274 logging.warning("Orphan instance '%s' found on node %s",
5276 # else no instance is alive
5280 if query.IQ_DISKUSAGE in self.requested_data:
5281 disk_usage = dict((inst.name,
5282 _ComputeDiskSize(inst.disk_template,
5283 [{constants.IDISK_SIZE: disk.size}
5284 for disk in inst.disks]))
5285 for inst in instance_list)
5289 if query.IQ_CONSOLE in self.requested_data:
5291 for inst in instance_list:
5292 if inst.name in live_data:
5293 # Instance is running
5294 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5296 consinfo[inst.name] = None
5297 assert set(consinfo.keys()) == set(instance_names)
5301 if query.IQ_NODES in self.requested_data:
5302 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5304 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5305 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5306 for uuid in set(map(operator.attrgetter("group"),
5312 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5313 disk_usage, offline_nodes, bad_nodes,
5314 live_data, wrongnode_inst, consinfo,
5318 class LUQuery(NoHooksLU):
5319 """Query for resources/items of a certain kind.
5322 # pylint: disable=W0142
5325 def CheckArguments(self):
5326 qcls = _GetQueryImplementation(self.op.what)
5328 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5330 def ExpandNames(self):
5331 self.impl.ExpandNames(self)
5333 def DeclareLocks(self, level):
5334 self.impl.DeclareLocks(self, level)
5336 def Exec(self, feedback_fn):
5337 return self.impl.NewStyleQuery(self)
5340 class LUQueryFields(NoHooksLU):
5341 """Query for resources/items of a certain kind.
5344 # pylint: disable=W0142
5347 def CheckArguments(self):
5348 self.qcls = _GetQueryImplementation(self.op.what)
5350 def ExpandNames(self):
5351 self.needed_locks = {}
5353 def Exec(self, feedback_fn):
5354 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5357 class LUNodeModifyStorage(NoHooksLU):
5358 """Logical unit for modifying a storage volume on a node.
5363 def CheckArguments(self):
5364 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5366 storage_type = self.op.storage_type
5369 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5371 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5372 " modified" % storage_type,
5375 diff = set(self.op.changes.keys()) - modifiable
5377 raise errors.OpPrereqError("The following fields can not be modified for"
5378 " storage units of type '%s': %r" %
5379 (storage_type, list(diff)),
5382 def ExpandNames(self):
5383 self.needed_locks = {
5384 locking.LEVEL_NODE: self.op.node_name,
5387 def Exec(self, feedback_fn):
5388 """Computes the list of nodes and their attributes.
5391 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5392 result = self.rpc.call_storage_modify(self.op.node_name,
5393 self.op.storage_type, st_args,
5394 self.op.name, self.op.changes)
5395 result.Raise("Failed to modify storage unit '%s' on %s" %
5396 (self.op.name, self.op.node_name))
5399 class LUNodeAdd(LogicalUnit):
5400 """Logical unit for adding node to the cluster.
5404 HTYPE = constants.HTYPE_NODE
5405 _NFLAGS = ["master_capable", "vm_capable"]
5407 def CheckArguments(self):
5408 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5409 # validate/normalize the node name
5410 self.hostname = netutils.GetHostname(name=self.op.node_name,
5411 family=self.primary_ip_family)
5412 self.op.node_name = self.hostname.name
5414 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5415 raise errors.OpPrereqError("Cannot readd the master node",
5418 if self.op.readd and self.op.group:
5419 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5420 " being readded", errors.ECODE_INVAL)
5422 def BuildHooksEnv(self):
5425 This will run on all nodes before, and on all nodes + the new node after.
5429 "OP_TARGET": self.op.node_name,
5430 "NODE_NAME": self.op.node_name,
5431 "NODE_PIP": self.op.primary_ip,
5432 "NODE_SIP": self.op.secondary_ip,
5433 "MASTER_CAPABLE": str(self.op.master_capable),
5434 "VM_CAPABLE": str(self.op.vm_capable),
5437 def BuildHooksNodes(self):
5438 """Build hooks nodes.
5441 # Exclude added node
5442 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5443 post_nodes = pre_nodes + [self.op.node_name, ]
5445 return (pre_nodes, post_nodes)
5447 def CheckPrereq(self):
5448 """Check prerequisites.
5451 - the new node is not already in the config
5453 - its parameters (single/dual homed) matches the cluster
5455 Any errors are signaled by raising errors.OpPrereqError.
5459 hostname = self.hostname
5460 node = hostname.name
5461 primary_ip = self.op.primary_ip = hostname.ip
5462 if self.op.secondary_ip is None:
5463 if self.primary_ip_family == netutils.IP6Address.family:
5464 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5465 " IPv4 address must be given as secondary",
5467 self.op.secondary_ip = primary_ip
5469 secondary_ip = self.op.secondary_ip
5470 if not netutils.IP4Address.IsValid(secondary_ip):
5471 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5472 " address" % secondary_ip, errors.ECODE_INVAL)
5474 node_list = cfg.GetNodeList()
5475 if not self.op.readd and node in node_list:
5476 raise errors.OpPrereqError("Node %s is already in the configuration" %
5477 node, errors.ECODE_EXISTS)
5478 elif self.op.readd and node not in node_list:
5479 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5482 self.changed_primary_ip = False
5484 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5485 if self.op.readd and node == existing_node_name:
5486 if existing_node.secondary_ip != secondary_ip:
5487 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5488 " address configuration as before",
5490 if existing_node.primary_ip != primary_ip:
5491 self.changed_primary_ip = True
5495 if (existing_node.primary_ip == primary_ip or
5496 existing_node.secondary_ip == primary_ip or
5497 existing_node.primary_ip == secondary_ip or
5498 existing_node.secondary_ip == secondary_ip):
5499 raise errors.OpPrereqError("New node ip address(es) conflict with"
5500 " existing node %s" % existing_node.name,
5501 errors.ECODE_NOTUNIQUE)
5503 # After this 'if' block, None is no longer a valid value for the
5504 # _capable op attributes
5506 old_node = self.cfg.GetNodeInfo(node)
5507 assert old_node is not None, "Can't retrieve locked node %s" % node
5508 for attr in self._NFLAGS:
5509 if getattr(self.op, attr) is None:
5510 setattr(self.op, attr, getattr(old_node, attr))
5512 for attr in self._NFLAGS:
5513 if getattr(self.op, attr) is None:
5514 setattr(self.op, attr, True)
5516 if self.op.readd and not self.op.vm_capable:
5517 pri, sec = cfg.GetNodeInstances(node)
5519 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5520 " flag set to false, but it already holds"
5521 " instances" % node,
5524 # check that the type of the node (single versus dual homed) is the
5525 # same as for the master
5526 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5527 master_singlehomed = myself.secondary_ip == myself.primary_ip
5528 newbie_singlehomed = secondary_ip == primary_ip
5529 if master_singlehomed != newbie_singlehomed:
5530 if master_singlehomed:
5531 raise errors.OpPrereqError("The master has no secondary ip but the"
5532 " new node has one",
5535 raise errors.OpPrereqError("The master has a secondary ip but the"
5536 " new node doesn't have one",
5539 # checks reachability
5540 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5541 raise errors.OpPrereqError("Node not reachable by ping",
5542 errors.ECODE_ENVIRON)
5544 if not newbie_singlehomed:
5545 # check reachability from my secondary ip to newbie's secondary ip
5546 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5547 source=myself.secondary_ip):
5548 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5549 " based ping to node daemon port",
5550 errors.ECODE_ENVIRON)
5557 if self.op.master_capable:
5558 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5560 self.master_candidate = False
5563 self.new_node = old_node
5565 node_group = cfg.LookupNodeGroup(self.op.group)
5566 self.new_node = objects.Node(name=node,
5567 primary_ip=primary_ip,
5568 secondary_ip=secondary_ip,
5569 master_candidate=self.master_candidate,
5570 offline=False, drained=False,
5573 if self.op.ndparams:
5574 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5576 if self.op.hv_state:
5577 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5579 if self.op.disk_state:
5580 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5582 def Exec(self, feedback_fn):
5583 """Adds the new node to the cluster.
5586 new_node = self.new_node
5587 node = new_node.name
5589 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5592 # We adding a new node so we assume it's powered
5593 new_node.powered = True
5595 # for re-adds, reset the offline/drained/master-candidate flags;
5596 # we need to reset here, otherwise offline would prevent RPC calls
5597 # later in the procedure; this also means that if the re-add
5598 # fails, we are left with a non-offlined, broken node
5600 new_node.drained = new_node.offline = False # pylint: disable=W0201
5601 self.LogInfo("Readding a node, the offline/drained flags were reset")
5602 # if we demote the node, we do cleanup later in the procedure
5603 new_node.master_candidate = self.master_candidate
5604 if self.changed_primary_ip:
5605 new_node.primary_ip = self.op.primary_ip
5607 # copy the master/vm_capable flags
5608 for attr in self._NFLAGS:
5609 setattr(new_node, attr, getattr(self.op, attr))
5611 # notify the user about any possible mc promotion
5612 if new_node.master_candidate:
5613 self.LogInfo("Node will be a master candidate")
5615 if self.op.ndparams:
5616 new_node.ndparams = self.op.ndparams
5618 new_node.ndparams = {}
5620 if self.op.hv_state:
5621 new_node.hv_state_static = self.new_hv_state
5623 if self.op.disk_state:
5624 new_node.disk_state_static = self.new_disk_state
5626 # check connectivity
5627 result = self.rpc.call_version([node])[node]
5628 result.Raise("Can't get version information from node %s" % node)
5629 if constants.PROTOCOL_VERSION == result.payload:
5630 logging.info("Communication to node %s fine, sw version %s match",
5631 node, result.payload)
5633 raise errors.OpExecError("Version mismatch master version %s,"
5634 " node version %s" %
5635 (constants.PROTOCOL_VERSION, result.payload))
5637 # Add node to our /etc/hosts, and add key to known_hosts
5638 if self.cfg.GetClusterInfo().modify_etc_hosts:
5639 master_node = self.cfg.GetMasterNode()
5640 result = self.rpc.call_etc_hosts_modify(master_node,
5641 constants.ETC_HOSTS_ADD,
5644 result.Raise("Can't update hosts file with new host data")
5646 if new_node.secondary_ip != new_node.primary_ip:
5647 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5650 node_verify_list = [self.cfg.GetMasterNode()]
5651 node_verify_param = {
5652 constants.NV_NODELIST: ([node], {}),
5653 # TODO: do a node-net-test as well?
5656 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5657 self.cfg.GetClusterName())
5658 for verifier in node_verify_list:
5659 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5660 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5662 for failed in nl_payload:
5663 feedback_fn("ssh/hostname verification failed"
5664 " (checking from %s): %s" %
5665 (verifier, nl_payload[failed]))
5666 raise errors.OpExecError("ssh/hostname verification failed")
5669 _RedistributeAncillaryFiles(self)
5670 self.context.ReaddNode(new_node)
5671 # make sure we redistribute the config
5672 self.cfg.Update(new_node, feedback_fn)
5673 # and make sure the new node will not have old files around
5674 if not new_node.master_candidate:
5675 result = self.rpc.call_node_demote_from_mc(new_node.name)
5676 msg = result.fail_msg
5678 self.LogWarning("Node failed to demote itself from master"
5679 " candidate status: %s" % msg)
5681 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5682 additional_vm=self.op.vm_capable)
5683 self.context.AddNode(new_node, self.proc.GetECId())
5686 class LUNodeSetParams(LogicalUnit):
5687 """Modifies the parameters of a node.
5689 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5690 to the node role (as _ROLE_*)
5691 @cvar _R2F: a dictionary from node role to tuples of flags
5692 @cvar _FLAGS: a list of attribute names corresponding to the flags
5695 HPATH = "node-modify"
5696 HTYPE = constants.HTYPE_NODE
5698 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5700 (True, False, False): _ROLE_CANDIDATE,
5701 (False, True, False): _ROLE_DRAINED,
5702 (False, False, True): _ROLE_OFFLINE,
5703 (False, False, False): _ROLE_REGULAR,
5705 _R2F = dict((v, k) for k, v in _F2R.items())
5706 _FLAGS = ["master_candidate", "drained", "offline"]
5708 def CheckArguments(self):
5709 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5710 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5711 self.op.master_capable, self.op.vm_capable,
5712 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5714 if all_mods.count(None) == len(all_mods):
5715 raise errors.OpPrereqError("Please pass at least one modification",
5717 if all_mods.count(True) > 1:
5718 raise errors.OpPrereqError("Can't set the node into more than one"
5719 " state at the same time",
5722 # Boolean value that tells us whether we might be demoting from MC
5723 self.might_demote = (self.op.master_candidate == False or
5724 self.op.offline == True or
5725 self.op.drained == True or
5726 self.op.master_capable == False)
5728 if self.op.secondary_ip:
5729 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5730 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5731 " address" % self.op.secondary_ip,
5734 self.lock_all = self.op.auto_promote and self.might_demote
5735 self.lock_instances = self.op.secondary_ip is not None
5737 def _InstanceFilter(self, instance):
5738 """Filter for getting affected instances.
5741 return (instance.disk_template in constants.DTS_INT_MIRROR and
5742 self.op.node_name in instance.all_nodes)
5744 def ExpandNames(self):
5746 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5748 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5750 # Since modifying a node can have severe effects on currently running
5751 # operations the resource lock is at least acquired in shared mode
5752 self.needed_locks[locking.LEVEL_NODE_RES] = \
5753 self.needed_locks[locking.LEVEL_NODE]
5755 # Get node resource and instance locks in shared mode; they are not used
5756 # for anything but read-only access
5757 self.share_locks[locking.LEVEL_NODE_RES] = 1
5758 self.share_locks[locking.LEVEL_INSTANCE] = 1
5760 if self.lock_instances:
5761 self.needed_locks[locking.LEVEL_INSTANCE] = \
5762 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5764 def BuildHooksEnv(self):
5767 This runs on the master node.
5771 "OP_TARGET": self.op.node_name,
5772 "MASTER_CANDIDATE": str(self.op.master_candidate),
5773 "OFFLINE": str(self.op.offline),
5774 "DRAINED": str(self.op.drained),
5775 "MASTER_CAPABLE": str(self.op.master_capable),
5776 "VM_CAPABLE": str(self.op.vm_capable),
5779 def BuildHooksNodes(self):
5780 """Build hooks nodes.
5783 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5786 def CheckPrereq(self):
5787 """Check prerequisites.
5789 This only checks the instance list against the existing names.
5792 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5794 if self.lock_instances:
5795 affected_instances = \
5796 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5798 # Verify instance locks
5799 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5800 wanted_instances = frozenset(affected_instances.keys())
5801 if wanted_instances - owned_instances:
5802 raise errors.OpPrereqError("Instances affected by changing node %s's"
5803 " secondary IP address have changed since"
5804 " locks were acquired, wanted '%s', have"
5805 " '%s'; retry the operation" %
5807 utils.CommaJoin(wanted_instances),
5808 utils.CommaJoin(owned_instances)),
5811 affected_instances = None
5813 if (self.op.master_candidate is not None or
5814 self.op.drained is not None or
5815 self.op.offline is not None):
5816 # we can't change the master's node flags
5817 if self.op.node_name == self.cfg.GetMasterNode():
5818 raise errors.OpPrereqError("The master role can be changed"
5819 " only via master-failover",
5822 if self.op.master_candidate and not node.master_capable:
5823 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5824 " it a master candidate" % node.name,
5827 if self.op.vm_capable == False:
5828 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5830 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5831 " the vm_capable flag" % node.name,
5834 if node.master_candidate and self.might_demote and not self.lock_all:
5835 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5836 # check if after removing the current node, we're missing master
5838 (mc_remaining, mc_should, _) = \
5839 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5840 if mc_remaining < mc_should:
5841 raise errors.OpPrereqError("Not enough master candidates, please"
5842 " pass auto promote option to allow"
5843 " promotion", errors.ECODE_STATE)
5845 self.old_flags = old_flags = (node.master_candidate,
5846 node.drained, node.offline)
5847 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5848 self.old_role = old_role = self._F2R[old_flags]
5850 # Check for ineffective changes
5851 for attr in self._FLAGS:
5852 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5853 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5854 setattr(self.op, attr, None)
5856 # Past this point, any flag change to False means a transition
5857 # away from the respective state, as only real changes are kept
5859 # TODO: We might query the real power state if it supports OOB
5860 if _SupportsOob(self.cfg, node):
5861 if self.op.offline is False and not (node.powered or
5862 self.op.powered == True):
5863 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5864 " offline status can be reset") %
5866 elif self.op.powered is not None:
5867 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5868 " as it does not support out-of-band"
5869 " handling") % self.op.node_name)
5871 # If we're being deofflined/drained, we'll MC ourself if needed
5872 if (self.op.drained == False or self.op.offline == False or
5873 (self.op.master_capable and not node.master_capable)):
5874 if _DecideSelfPromotion(self):
5875 self.op.master_candidate = True
5876 self.LogInfo("Auto-promoting node to master candidate")
5878 # If we're no longer master capable, we'll demote ourselves from MC
5879 if self.op.master_capable == False and node.master_candidate:
5880 self.LogInfo("Demoting from master candidate")
5881 self.op.master_candidate = False
5884 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5885 if self.op.master_candidate:
5886 new_role = self._ROLE_CANDIDATE
5887 elif self.op.drained:
5888 new_role = self._ROLE_DRAINED
5889 elif self.op.offline:
5890 new_role = self._ROLE_OFFLINE
5891 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5892 # False is still in new flags, which means we're un-setting (the
5894 new_role = self._ROLE_REGULAR
5895 else: # no new flags, nothing, keep old role
5898 self.new_role = new_role
5900 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5901 # Trying to transition out of offline status
5902 # TODO: Use standard RPC runner, but make sure it works when the node is
5903 # still marked offline
5904 result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5906 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5907 " to report its version: %s" %
5908 (node.name, result.fail_msg),
5911 self.LogWarning("Transitioning node from offline to online state"
5912 " without using re-add. Please make sure the node"
5915 if self.op.secondary_ip:
5916 # Ok even without locking, because this can't be changed by any LU
5917 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5918 master_singlehomed = master.secondary_ip == master.primary_ip
5919 if master_singlehomed and self.op.secondary_ip:
5920 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5921 " homed cluster", errors.ECODE_INVAL)
5923 assert not (frozenset(affected_instances) -
5924 self.owned_locks(locking.LEVEL_INSTANCE))
5927 if affected_instances:
5928 raise errors.OpPrereqError("Cannot change secondary IP address:"
5929 " offline node has instances (%s)"
5930 " configured to use it" %
5931 utils.CommaJoin(affected_instances.keys()))
5933 # On online nodes, check that no instances are running, and that
5934 # the node has the new ip and we can reach it.
5935 for instance in affected_instances.values():
5936 _CheckInstanceState(self, instance, INSTANCE_DOWN,
5937 msg="cannot change secondary ip")
5939 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5940 if master.name != node.name:
5941 # check reachability from master secondary ip to new secondary ip
5942 if not netutils.TcpPing(self.op.secondary_ip,
5943 constants.DEFAULT_NODED_PORT,
5944 source=master.secondary_ip):
5945 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5946 " based ping to node daemon port",
5947 errors.ECODE_ENVIRON)
5949 if self.op.ndparams:
5950 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5951 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5952 self.new_ndparams = new_ndparams
5954 if self.op.hv_state:
5955 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5956 self.node.hv_state_static)
5958 if self.op.disk_state:
5959 self.new_disk_state = \
5960 _MergeAndVerifyDiskState(self.op.disk_state,
5961 self.node.disk_state_static)
5963 def Exec(self, feedback_fn):
5968 old_role = self.old_role
5969 new_role = self.new_role
5973 if self.op.ndparams:
5974 node.ndparams = self.new_ndparams
5976 if self.op.powered is not None:
5977 node.powered = self.op.powered
5979 if self.op.hv_state:
5980 node.hv_state_static = self.new_hv_state
5982 if self.op.disk_state:
5983 node.disk_state_static = self.new_disk_state
5985 for attr in ["master_capable", "vm_capable"]:
5986 val = getattr(self.op, attr)
5988 setattr(node, attr, val)
5989 result.append((attr, str(val)))
5991 if new_role != old_role:
5992 # Tell the node to demote itself, if no longer MC and not offline
5993 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5994 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5996 self.LogWarning("Node failed to demote itself: %s", msg)
5998 new_flags = self._R2F[new_role]
5999 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6001 result.append((desc, str(nf)))
6002 (node.master_candidate, node.drained, node.offline) = new_flags
6004 # we locked all nodes, we adjust the CP before updating this node
6006 _AdjustCandidatePool(self, [node.name])
6008 if self.op.secondary_ip:
6009 node.secondary_ip = self.op.secondary_ip
6010 result.append(("secondary_ip", self.op.secondary_ip))
6012 # this will trigger configuration file update, if needed
6013 self.cfg.Update(node, feedback_fn)
6015 # this will trigger job queue propagation or cleanup if the mc
6017 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6018 self.context.ReaddNode(node)
6023 class LUNodePowercycle(NoHooksLU):
6024 """Powercycles a node.
6029 def CheckArguments(self):
6030 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6031 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6032 raise errors.OpPrereqError("The node is the master and the force"
6033 " parameter was not set",
6036 def ExpandNames(self):
6037 """Locking for PowercycleNode.
6039 This is a last-resort option and shouldn't block on other
6040 jobs. Therefore, we grab no locks.
6043 self.needed_locks = {}
6045 def Exec(self, feedback_fn):
6049 result = self.rpc.call_node_powercycle(self.op.node_name,
6050 self.cfg.GetHypervisorType())
6051 result.Raise("Failed to schedule the reboot")
6052 return result.payload
6055 class LUClusterQuery(NoHooksLU):
6056 """Query cluster configuration.
6061 def ExpandNames(self):
6062 self.needed_locks = {}
6064 def Exec(self, feedback_fn):
6065 """Return cluster config.
6068 cluster = self.cfg.GetClusterInfo()
6071 # Filter just for enabled hypervisors
6072 for os_name, hv_dict in cluster.os_hvp.items():
6073 os_hvp[os_name] = {}
6074 for hv_name, hv_params in hv_dict.items():
6075 if hv_name in cluster.enabled_hypervisors:
6076 os_hvp[os_name][hv_name] = hv_params
6078 # Convert ip_family to ip_version
6079 primary_ip_version = constants.IP4_VERSION
6080 if cluster.primary_ip_family == netutils.IP6Address.family:
6081 primary_ip_version = constants.IP6_VERSION
6084 "software_version": constants.RELEASE_VERSION,
6085 "protocol_version": constants.PROTOCOL_VERSION,
6086 "config_version": constants.CONFIG_VERSION,
6087 "os_api_version": max(constants.OS_API_VERSIONS),
6088 "export_version": constants.EXPORT_VERSION,
6089 "architecture": (platform.architecture()[0], platform.machine()),
6090 "name": cluster.cluster_name,
6091 "master": cluster.master_node,
6092 "default_hypervisor": cluster.primary_hypervisor,
6093 "enabled_hypervisors": cluster.enabled_hypervisors,
6094 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6095 for hypervisor_name in cluster.enabled_hypervisors]),
6097 "beparams": cluster.beparams,
6098 "osparams": cluster.osparams,
6099 "ipolicy": cluster.ipolicy,
6100 "nicparams": cluster.nicparams,
6101 "ndparams": cluster.ndparams,
6102 "candidate_pool_size": cluster.candidate_pool_size,
6103 "master_netdev": cluster.master_netdev,
6104 "master_netmask": cluster.master_netmask,
6105 "use_external_mip_script": cluster.use_external_mip_script,
6106 "volume_group_name": cluster.volume_group_name,
6107 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6108 "file_storage_dir": cluster.file_storage_dir,
6109 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6110 "maintain_node_health": cluster.maintain_node_health,
6111 "ctime": cluster.ctime,
6112 "mtime": cluster.mtime,
6113 "uuid": cluster.uuid,
6114 "tags": list(cluster.GetTags()),
6115 "uid_pool": cluster.uid_pool,
6116 "default_iallocator": cluster.default_iallocator,
6117 "reserved_lvs": cluster.reserved_lvs,
6118 "primary_ip_version": primary_ip_version,
6119 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6120 "hidden_os": cluster.hidden_os,
6121 "blacklisted_os": cluster.blacklisted_os,
6127 class LUClusterConfigQuery(NoHooksLU):
6128 """Return configuration values.
6132 _FIELDS_DYNAMIC = utils.FieldSet()
6133 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
6134 "watcher_pause", "volume_group_name")
6136 def CheckArguments(self):
6137 _CheckOutputFields(static=self._FIELDS_STATIC,
6138 dynamic=self._FIELDS_DYNAMIC,
6139 selected=self.op.output_fields)
6141 def ExpandNames(self):
6142 self.needed_locks = {}
6144 def Exec(self, feedback_fn):
6145 """Dump a representation of the cluster config to the standard output.
6149 for field in self.op.output_fields:
6150 if field == "cluster_name":
6151 entry = self.cfg.GetClusterName()
6152 elif field == "master_node":
6153 entry = self.cfg.GetMasterNode()
6154 elif field == "drain_flag":
6155 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6156 elif field == "watcher_pause":
6157 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6158 elif field == "volume_group_name":
6159 entry = self.cfg.GetVGName()
6161 raise errors.ParameterError(field)
6162 values.append(entry)
6166 class LUInstanceActivateDisks(NoHooksLU):
6167 """Bring up an instance's disks.
6172 def ExpandNames(self):
6173 self._ExpandAndLockInstance()
6174 self.needed_locks[locking.LEVEL_NODE] = []
6175 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6177 def DeclareLocks(self, level):
6178 if level == locking.LEVEL_NODE:
6179 self._LockInstancesNodes()
6181 def CheckPrereq(self):
6182 """Check prerequisites.
6184 This checks that the instance is in the cluster.
6187 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6188 assert self.instance is not None, \
6189 "Cannot retrieve locked instance %s" % self.op.instance_name
6190 _CheckNodeOnline(self, self.instance.primary_node)
6192 def Exec(self, feedback_fn):
6193 """Activate the disks.
6196 disks_ok, disks_info = \
6197 _AssembleInstanceDisks(self, self.instance,
6198 ignore_size=self.op.ignore_size)
6200 raise errors.OpExecError("Cannot activate block devices")
6205 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6207 """Prepare the block devices for an instance.
6209 This sets up the block devices on all nodes.
6211 @type lu: L{LogicalUnit}
6212 @param lu: the logical unit on whose behalf we execute
6213 @type instance: L{objects.Instance}
6214 @param instance: the instance for whose disks we assemble
6215 @type disks: list of L{objects.Disk} or None
6216 @param disks: which disks to assemble (or all, if None)
6217 @type ignore_secondaries: boolean
6218 @param ignore_secondaries: if true, errors on secondary nodes
6219 won't result in an error return from the function
6220 @type ignore_size: boolean
6221 @param ignore_size: if true, the current known size of the disk
6222 will not be used during the disk activation, useful for cases
6223 when the size is wrong
6224 @return: False if the operation failed, otherwise a list of
6225 (host, instance_visible_name, node_visible_name)
6226 with the mapping from node devices to instance devices
6231 iname = instance.name
6232 disks = _ExpandCheckDisks(instance, disks)
6234 # With the two passes mechanism we try to reduce the window of
6235 # opportunity for the race condition of switching DRBD to primary
6236 # before handshaking occured, but we do not eliminate it
6238 # The proper fix would be to wait (with some limits) until the
6239 # connection has been made and drbd transitions from WFConnection
6240 # into any other network-connected state (Connected, SyncTarget,
6243 # 1st pass, assemble on all nodes in secondary mode
6244 for idx, inst_disk in enumerate(disks):
6245 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6247 node_disk = node_disk.Copy()
6248 node_disk.UnsetSize()
6249 lu.cfg.SetDiskID(node_disk, node)
6250 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
6251 msg = result.fail_msg
6253 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6254 " (is_primary=False, pass=1): %s",
6255 inst_disk.iv_name, node, msg)
6256 if not ignore_secondaries:
6259 # FIXME: race condition on drbd migration to primary
6261 # 2nd pass, do only the primary node
6262 for idx, inst_disk in enumerate(disks):
6265 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6266 if node != instance.primary_node:
6269 node_disk = node_disk.Copy()
6270 node_disk.UnsetSize()
6271 lu.cfg.SetDiskID(node_disk, node)
6272 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6273 msg = result.fail_msg
6275 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6276 " (is_primary=True, pass=2): %s",
6277 inst_disk.iv_name, node, msg)
6280 dev_path = result.payload
6282 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6284 # leave the disks configured for the primary node
6285 # this is a workaround that would be fixed better by
6286 # improving the logical/physical id handling
6288 lu.cfg.SetDiskID(disk, instance.primary_node)
6290 return disks_ok, device_info
6293 def _StartInstanceDisks(lu, instance, force):
6294 """Start the disks of an instance.
6297 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6298 ignore_secondaries=force)
6300 _ShutdownInstanceDisks(lu, instance)
6301 if force is not None and not force:
6302 lu.proc.LogWarning("", hint="If the message above refers to a"
6304 " you can retry the operation using '--force'.")
6305 raise errors.OpExecError("Disk consistency error")
6308 class LUInstanceDeactivateDisks(NoHooksLU):
6309 """Shutdown an instance's disks.
6314 def ExpandNames(self):
6315 self._ExpandAndLockInstance()
6316 self.needed_locks[locking.LEVEL_NODE] = []
6317 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6319 def DeclareLocks(self, level):
6320 if level == locking.LEVEL_NODE:
6321 self._LockInstancesNodes()
6323 def CheckPrereq(self):
6324 """Check prerequisites.
6326 This checks that the instance is in the cluster.
6329 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6330 assert self.instance is not None, \
6331 "Cannot retrieve locked instance %s" % self.op.instance_name
6333 def Exec(self, feedback_fn):
6334 """Deactivate the disks
6337 instance = self.instance
6339 _ShutdownInstanceDisks(self, instance)
6341 _SafeShutdownInstanceDisks(self, instance)
6344 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6345 """Shutdown block devices of an instance.
6347 This function checks if an instance is running, before calling
6348 _ShutdownInstanceDisks.
6351 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6352 _ShutdownInstanceDisks(lu, instance, disks=disks)
6355 def _ExpandCheckDisks(instance, disks):
6356 """Return the instance disks selected by the disks list
6358 @type disks: list of L{objects.Disk} or None
6359 @param disks: selected disks
6360 @rtype: list of L{objects.Disk}
6361 @return: selected instance disks to act on
6365 return instance.disks
6367 if not set(disks).issubset(instance.disks):
6368 raise errors.ProgrammerError("Can only act on disks belonging to the"
6373 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6374 """Shutdown block devices of an instance.
6376 This does the shutdown on all nodes of the instance.
6378 If the ignore_primary is false, errors on the primary node are
6383 disks = _ExpandCheckDisks(instance, disks)
6386 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6387 lu.cfg.SetDiskID(top_disk, node)
6388 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6389 msg = result.fail_msg
6391 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6392 disk.iv_name, node, msg)
6393 if ((node == instance.primary_node and not ignore_primary) or
6394 (node != instance.primary_node and not result.offline)):
6399 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6400 """Checks if a node has enough free memory.
6402 This function check if a given node has the needed amount of free
6403 memory. In case the node has less memory or we cannot get the
6404 information from the node, this function raise an OpPrereqError
6407 @type lu: C{LogicalUnit}
6408 @param lu: a logical unit from which we get configuration data
6410 @param node: the node to check
6411 @type reason: C{str}
6412 @param reason: string to use in the error message
6413 @type requested: C{int}
6414 @param requested: the amount of memory in MiB to check for
6415 @type hypervisor_name: C{str}
6416 @param hypervisor_name: the hypervisor to ask for memory stats
6418 @return: node current free memory
6419 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6420 we cannot check the node
6423 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6424 nodeinfo[node].Raise("Can't get data from node %s" % node,
6425 prereq=True, ecode=errors.ECODE_ENVIRON)
6426 (_, _, (hv_info, )) = nodeinfo[node].payload
6428 free_mem = hv_info.get("memory_free", None)
6429 if not isinstance(free_mem, int):
6430 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6431 " was '%s'" % (node, free_mem),
6432 errors.ECODE_ENVIRON)
6433 if requested > free_mem:
6434 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6435 " needed %s MiB, available %s MiB" %
6436 (node, reason, requested, free_mem),
6441 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6442 """Checks if nodes have enough free disk space in the all VGs.
6444 This function check if all given nodes have the needed amount of
6445 free disk. In case any node has less disk or we cannot get the
6446 information from the node, this function raise an OpPrereqError
6449 @type lu: C{LogicalUnit}
6450 @param lu: a logical unit from which we get configuration data
6451 @type nodenames: C{list}
6452 @param nodenames: the list of node names to check
6453 @type req_sizes: C{dict}
6454 @param req_sizes: the hash of vg and corresponding amount of disk in
6456 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6457 or we cannot check the node
6460 for vg, req_size in req_sizes.items():
6461 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6464 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6465 """Checks if nodes have enough free disk space in the specified VG.
6467 This function check if all given nodes have the needed amount of
6468 free disk. In case any node has less disk or we cannot get the
6469 information from the node, this function raise an OpPrereqError
6472 @type lu: C{LogicalUnit}
6473 @param lu: a logical unit from which we get configuration data
6474 @type nodenames: C{list}
6475 @param nodenames: the list of node names to check
6477 @param vg: the volume group to check
6478 @type requested: C{int}
6479 @param requested: the amount of disk in MiB to check for
6480 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6481 or we cannot check the node
6484 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6485 for node in nodenames:
6486 info = nodeinfo[node]
6487 info.Raise("Cannot get current information from node %s" % node,
6488 prereq=True, ecode=errors.ECODE_ENVIRON)
6489 (_, (vg_info, ), _) = info.payload
6490 vg_free = vg_info.get("vg_free", None)
6491 if not isinstance(vg_free, int):
6492 raise errors.OpPrereqError("Can't compute free disk space on node"
6493 " %s for vg %s, result was '%s'" %
6494 (node, vg, vg_free), errors.ECODE_ENVIRON)
6495 if requested > vg_free:
6496 raise errors.OpPrereqError("Not enough disk space on target node %s"
6497 " vg %s: required %d MiB, available %d MiB" %
6498 (node, vg, requested, vg_free),
6502 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6503 """Checks if nodes have enough physical CPUs
6505 This function checks if all given nodes have the needed number of
6506 physical CPUs. In case any node has less CPUs or we cannot get the
6507 information from the node, this function raises an OpPrereqError
6510 @type lu: C{LogicalUnit}
6511 @param lu: a logical unit from which we get configuration data
6512 @type nodenames: C{list}
6513 @param nodenames: the list of node names to check
6514 @type requested: C{int}
6515 @param requested: the minimum acceptable number of physical CPUs
6516 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6517 or we cannot check the node
6520 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6521 for node in nodenames:
6522 info = nodeinfo[node]
6523 info.Raise("Cannot get current information from node %s" % node,
6524 prereq=True, ecode=errors.ECODE_ENVIRON)
6525 (_, _, (hv_info, )) = info.payload
6526 num_cpus = hv_info.get("cpu_total", None)
6527 if not isinstance(num_cpus, int):
6528 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6529 " on node %s, result was '%s'" %
6530 (node, num_cpus), errors.ECODE_ENVIRON)
6531 if requested > num_cpus:
6532 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6533 "required" % (node, num_cpus, requested),
6537 class LUInstanceStartup(LogicalUnit):
6538 """Starts an instance.
6541 HPATH = "instance-start"
6542 HTYPE = constants.HTYPE_INSTANCE
6545 def CheckArguments(self):
6547 if self.op.beparams:
6548 # fill the beparams dict
6549 objects.UpgradeBeParams(self.op.beparams)
6550 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6552 def ExpandNames(self):
6553 self._ExpandAndLockInstance()
6554 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6556 def DeclareLocks(self, level):
6557 if level == locking.LEVEL_NODE_RES:
6558 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6560 def BuildHooksEnv(self):
6563 This runs on master, primary and secondary nodes of the instance.
6567 "FORCE": self.op.force,
6570 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6574 def BuildHooksNodes(self):
6575 """Build hooks nodes.
6578 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6581 def CheckPrereq(self):
6582 """Check prerequisites.
6584 This checks that the instance is in the cluster.
6587 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6588 assert self.instance is not None, \
6589 "Cannot retrieve locked instance %s" % self.op.instance_name
6592 if self.op.hvparams:
6593 # check hypervisor parameter syntax (locally)
6594 cluster = self.cfg.GetClusterInfo()
6595 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6596 filled_hvp = cluster.FillHV(instance)
6597 filled_hvp.update(self.op.hvparams)
6598 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6599 hv_type.CheckParameterSyntax(filled_hvp)
6600 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6602 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6604 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6606 if self.primary_offline and self.op.ignore_offline_nodes:
6607 self.proc.LogWarning("Ignoring offline primary node")
6609 if self.op.hvparams or self.op.beparams:
6610 self.proc.LogWarning("Overridden parameters are ignored")
6612 _CheckNodeOnline(self, instance.primary_node)
6614 bep = self.cfg.GetClusterInfo().FillBE(instance)
6615 bep.update(self.op.beparams)
6617 # check bridges existence
6618 _CheckInstanceBridgesExist(self, instance)
6620 remote_info = self.rpc.call_instance_info(instance.primary_node,
6622 instance.hypervisor)
6623 remote_info.Raise("Error checking node %s" % instance.primary_node,
6624 prereq=True, ecode=errors.ECODE_ENVIRON)
6625 if not remote_info.payload: # not running already
6626 _CheckNodeFreeMemory(self, instance.primary_node,
6627 "starting instance %s" % instance.name,
6628 bep[constants.BE_MINMEM], instance.hypervisor)
6630 def Exec(self, feedback_fn):
6631 """Start the instance.
6634 instance = self.instance
6635 force = self.op.force
6637 if not self.op.no_remember:
6638 self.cfg.MarkInstanceUp(instance.name)
6640 if self.primary_offline:
6641 assert self.op.ignore_offline_nodes
6642 self.proc.LogInfo("Primary node offline, marked instance as started")
6644 node_current = instance.primary_node
6646 _StartInstanceDisks(self, instance, force)
6649 self.rpc.call_instance_start(node_current,
6650 (instance, self.op.hvparams,
6652 self.op.startup_paused)
6653 msg = result.fail_msg
6655 _ShutdownInstanceDisks(self, instance)
6656 raise errors.OpExecError("Could not start instance: %s" % msg)
6659 class LUInstanceReboot(LogicalUnit):
6660 """Reboot an instance.
6663 HPATH = "instance-reboot"
6664 HTYPE = constants.HTYPE_INSTANCE
6667 def ExpandNames(self):
6668 self._ExpandAndLockInstance()
6670 def BuildHooksEnv(self):
6673 This runs on master, primary and secondary nodes of the instance.
6677 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6678 "REBOOT_TYPE": self.op.reboot_type,
6679 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6682 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6686 def BuildHooksNodes(self):
6687 """Build hooks nodes.
6690 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6693 def CheckPrereq(self):
6694 """Check prerequisites.
6696 This checks that the instance is in the cluster.
6699 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6700 assert self.instance is not None, \
6701 "Cannot retrieve locked instance %s" % self.op.instance_name
6702 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6703 _CheckNodeOnline(self, instance.primary_node)
6705 # check bridges existence
6706 _CheckInstanceBridgesExist(self, instance)
6708 def Exec(self, feedback_fn):
6709 """Reboot the instance.
6712 instance = self.instance
6713 ignore_secondaries = self.op.ignore_secondaries
6714 reboot_type = self.op.reboot_type
6716 remote_info = self.rpc.call_instance_info(instance.primary_node,
6718 instance.hypervisor)
6719 remote_info.Raise("Error checking node %s" % instance.primary_node)
6720 instance_running = bool(remote_info.payload)
6722 node_current = instance.primary_node
6724 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6725 constants.INSTANCE_REBOOT_HARD]:
6726 for disk in instance.disks:
6727 self.cfg.SetDiskID(disk, node_current)
6728 result = self.rpc.call_instance_reboot(node_current, instance,
6730 self.op.shutdown_timeout)
6731 result.Raise("Could not reboot instance")
6733 if instance_running:
6734 result = self.rpc.call_instance_shutdown(node_current, instance,
6735 self.op.shutdown_timeout)
6736 result.Raise("Could not shutdown instance for full reboot")
6737 _ShutdownInstanceDisks(self, instance)
6739 self.LogInfo("Instance %s was already stopped, starting now",
6741 _StartInstanceDisks(self, instance, ignore_secondaries)
6742 result = self.rpc.call_instance_start(node_current,
6743 (instance, None, None), False)
6744 msg = result.fail_msg
6746 _ShutdownInstanceDisks(self, instance)
6747 raise errors.OpExecError("Could not start instance for"
6748 " full reboot: %s" % msg)
6750 self.cfg.MarkInstanceUp(instance.name)
6753 class LUInstanceShutdown(LogicalUnit):
6754 """Shutdown an instance.
6757 HPATH = "instance-stop"
6758 HTYPE = constants.HTYPE_INSTANCE
6761 def ExpandNames(self):
6762 self._ExpandAndLockInstance()
6764 def BuildHooksEnv(self):
6767 This runs on master, primary and secondary nodes of the instance.
6770 env = _BuildInstanceHookEnvByObject(self, self.instance)
6771 env["TIMEOUT"] = self.op.timeout
6774 def BuildHooksNodes(self):
6775 """Build hooks nodes.
6778 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6781 def CheckPrereq(self):
6782 """Check prerequisites.
6784 This checks that the instance is in the cluster.
6787 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6788 assert self.instance is not None, \
6789 "Cannot retrieve locked instance %s" % self.op.instance_name
6791 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6793 self.primary_offline = \
6794 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6796 if self.primary_offline and self.op.ignore_offline_nodes:
6797 self.proc.LogWarning("Ignoring offline primary node")
6799 _CheckNodeOnline(self, self.instance.primary_node)
6801 def Exec(self, feedback_fn):
6802 """Shutdown the instance.
6805 instance = self.instance
6806 node_current = instance.primary_node
6807 timeout = self.op.timeout
6809 if not self.op.no_remember:
6810 self.cfg.MarkInstanceDown(instance.name)
6812 if self.primary_offline:
6813 assert self.op.ignore_offline_nodes
6814 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6816 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6817 msg = result.fail_msg
6819 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6821 _ShutdownInstanceDisks(self, instance)
6824 class LUInstanceReinstall(LogicalUnit):
6825 """Reinstall an instance.
6828 HPATH = "instance-reinstall"
6829 HTYPE = constants.HTYPE_INSTANCE
6832 def ExpandNames(self):
6833 self._ExpandAndLockInstance()
6835 def BuildHooksEnv(self):
6838 This runs on master, primary and secondary nodes of the instance.
6841 return _BuildInstanceHookEnvByObject(self, self.instance)
6843 def BuildHooksNodes(self):
6844 """Build hooks nodes.
6847 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6850 def CheckPrereq(self):
6851 """Check prerequisites.
6853 This checks that the instance is in the cluster and is not running.
6856 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6857 assert instance is not None, \
6858 "Cannot retrieve locked instance %s" % self.op.instance_name
6859 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6860 " offline, cannot reinstall")
6861 for node in instance.secondary_nodes:
6862 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6863 " cannot reinstall")
6865 if instance.disk_template == constants.DT_DISKLESS:
6866 raise errors.OpPrereqError("Instance '%s' has no disks" %
6867 self.op.instance_name,
6869 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6871 if self.op.os_type is not None:
6873 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6874 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6875 instance_os = self.op.os_type
6877 instance_os = instance.os
6879 nodelist = list(instance.all_nodes)
6881 if self.op.osparams:
6882 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6883 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6884 self.os_inst = i_osdict # the new dict (without defaults)
6888 self.instance = instance
6890 def Exec(self, feedback_fn):
6891 """Reinstall the instance.
6894 inst = self.instance
6896 if self.op.os_type is not None:
6897 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6898 inst.os = self.op.os_type
6899 # Write to configuration
6900 self.cfg.Update(inst, feedback_fn)
6902 _StartInstanceDisks(self, inst, None)
6904 feedback_fn("Running the instance OS create scripts...")
6905 # FIXME: pass debug option from opcode to backend
6906 result = self.rpc.call_instance_os_add(inst.primary_node,
6907 (inst, self.os_inst), True,
6908 self.op.debug_level)
6909 result.Raise("Could not install OS for instance %s on node %s" %
6910 (inst.name, inst.primary_node))
6912 _ShutdownInstanceDisks(self, inst)
6915 class LUInstanceRecreateDisks(LogicalUnit):
6916 """Recreate an instance's missing disks.
6919 HPATH = "instance-recreate-disks"
6920 HTYPE = constants.HTYPE_INSTANCE
6923 _MODIFYABLE = frozenset([
6924 constants.IDISK_SIZE,
6925 constants.IDISK_MODE,
6928 # New or changed disk parameters may have different semantics
6929 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
6930 constants.IDISK_ADOPT,
6932 # TODO: Implement support changing VG while recreating
6934 constants.IDISK_METAVG,
6937 def CheckArguments(self):
6938 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
6939 # Normalize and convert deprecated list of disk indices
6940 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
6942 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
6944 raise errors.OpPrereqError("Some disks have been specified more than"
6945 " once: %s" % utils.CommaJoin(duplicates),
6948 for (idx, params) in self.op.disks:
6949 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
6950 unsupported = frozenset(params.keys()) - self._MODIFYABLE
6952 raise errors.OpPrereqError("Parameters for disk %s try to change"
6953 " unmodifyable parameter(s): %s" %
6954 (idx, utils.CommaJoin(unsupported)),
6957 def ExpandNames(self):
6958 self._ExpandAndLockInstance()
6959 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6961 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6962 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6964 self.needed_locks[locking.LEVEL_NODE] = []
6965 self.needed_locks[locking.LEVEL_NODE_RES] = []
6967 def DeclareLocks(self, level):
6968 if level == locking.LEVEL_NODE:
6969 # if we replace the nodes, we only need to lock the old primary,
6970 # otherwise we need to lock all nodes for disk re-creation
6971 primary_only = bool(self.op.nodes)
6972 self._LockInstancesNodes(primary_only=primary_only)
6973 elif level == locking.LEVEL_NODE_RES:
6975 self.needed_locks[locking.LEVEL_NODE_RES] = \
6976 self.needed_locks[locking.LEVEL_NODE][:]
6978 def BuildHooksEnv(self):
6981 This runs on master, primary and secondary nodes of the instance.
6984 return _BuildInstanceHookEnvByObject(self, self.instance)
6986 def BuildHooksNodes(self):
6987 """Build hooks nodes.
6990 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6993 def CheckPrereq(self):
6994 """Check prerequisites.
6996 This checks that the instance is in the cluster and is not running.
6999 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7000 assert instance is not None, \
7001 "Cannot retrieve locked instance %s" % self.op.instance_name
7003 if len(self.op.nodes) != len(instance.all_nodes):
7004 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7005 " %d replacement nodes were specified" %
7006 (instance.name, len(instance.all_nodes),
7007 len(self.op.nodes)),
7009 assert instance.disk_template != constants.DT_DRBD8 or \
7010 len(self.op.nodes) == 2
7011 assert instance.disk_template != constants.DT_PLAIN or \
7012 len(self.op.nodes) == 1
7013 primary_node = self.op.nodes[0]
7015 primary_node = instance.primary_node
7016 _CheckNodeOnline(self, primary_node)
7018 if instance.disk_template == constants.DT_DISKLESS:
7019 raise errors.OpPrereqError("Instance '%s' has no disks" %
7020 self.op.instance_name, errors.ECODE_INVAL)
7022 # if we replace nodes *and* the old primary is offline, we don't
7024 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7025 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7026 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7027 if not (self.op.nodes and old_pnode.offline):
7028 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7029 msg="cannot recreate disks")
7032 self.disks = dict(self.op.disks)
7034 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7036 maxidx = max(self.disks.keys())
7037 if maxidx >= len(instance.disks):
7038 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7041 if (self.op.nodes and
7042 sorted(self.disks.keys()) != range(len(instance.disks))):
7043 raise errors.OpPrereqError("Can't recreate disks partially and"
7044 " change the nodes at the same time",
7047 self.instance = instance
7049 def Exec(self, feedback_fn):
7050 """Recreate the disks.
7053 instance = self.instance
7055 assert (self.owned_locks(locking.LEVEL_NODE) ==
7056 self.owned_locks(locking.LEVEL_NODE_RES))
7059 mods = [] # keeps track of needed changes
7061 for idx, disk in enumerate(instance.disks):
7063 changes = self.disks[idx]
7065 # Disk should not be recreated
7069 # update secondaries for disks, if needed
7070 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7071 # need to update the nodes and minors
7072 assert len(self.op.nodes) == 2
7073 assert len(disk.logical_id) == 6 # otherwise disk internals
7075 (_, _, old_port, _, _, old_secret) = disk.logical_id
7076 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7077 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7078 new_minors[0], new_minors[1], old_secret)
7079 assert len(disk.logical_id) == len(new_id)
7083 mods.append((idx, new_id, changes))
7085 # now that we have passed all asserts above, we can apply the mods
7086 # in a single run (to avoid partial changes)
7087 for idx, new_id, changes in mods:
7088 disk = instance.disks[idx]
7089 if new_id is not None:
7090 assert disk.dev_type == constants.LD_DRBD8
7091 disk.logical_id = new_id
7093 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7094 mode=changes.get(constants.IDISK_MODE, None))
7096 # change primary node, if needed
7098 instance.primary_node = self.op.nodes[0]
7099 self.LogWarning("Changing the instance's nodes, you will have to"
7100 " remove any disks left on the older nodes manually")
7103 self.cfg.Update(instance, feedback_fn)
7105 _CreateDisks(self, instance, to_skip=to_skip)
7108 class LUInstanceRename(LogicalUnit):
7109 """Rename an instance.
7112 HPATH = "instance-rename"
7113 HTYPE = constants.HTYPE_INSTANCE
7115 def CheckArguments(self):
7119 if self.op.ip_check and not self.op.name_check:
7120 # TODO: make the ip check more flexible and not depend on the name check
7121 raise errors.OpPrereqError("IP address check requires a name check",
7124 def BuildHooksEnv(self):
7127 This runs on master, primary and secondary nodes of the instance.
7130 env = _BuildInstanceHookEnvByObject(self, self.instance)
7131 env["INSTANCE_NEW_NAME"] = self.op.new_name
7134 def BuildHooksNodes(self):
7135 """Build hooks nodes.
7138 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7141 def CheckPrereq(self):
7142 """Check prerequisites.
7144 This checks that the instance is in the cluster and is not running.
7147 self.op.instance_name = _ExpandInstanceName(self.cfg,
7148 self.op.instance_name)
7149 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7150 assert instance is not None
7151 _CheckNodeOnline(self, instance.primary_node)
7152 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7153 msg="cannot rename")
7154 self.instance = instance
7156 new_name = self.op.new_name
7157 if self.op.name_check:
7158 hostname = netutils.GetHostname(name=new_name)
7159 if hostname.name != new_name:
7160 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7162 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7163 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7164 " same as given hostname '%s'") %
7165 (hostname.name, self.op.new_name),
7167 new_name = self.op.new_name = hostname.name
7168 if (self.op.ip_check and
7169 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7170 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7171 (hostname.ip, new_name),
7172 errors.ECODE_NOTUNIQUE)
7174 instance_list = self.cfg.GetInstanceList()
7175 if new_name in instance_list and new_name != instance.name:
7176 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7177 new_name, errors.ECODE_EXISTS)
7179 def Exec(self, feedback_fn):
7180 """Rename the instance.
7183 inst = self.instance
7184 old_name = inst.name
7186 rename_file_storage = False
7187 if (inst.disk_template in constants.DTS_FILEBASED and
7188 self.op.new_name != inst.name):
7189 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7190 rename_file_storage = True
7192 self.cfg.RenameInstance(inst.name, self.op.new_name)
7193 # Change the instance lock. This is definitely safe while we hold the BGL.
7194 # Otherwise the new lock would have to be added in acquired mode.
7196 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7197 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7199 # re-read the instance from the configuration after rename
7200 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7202 if rename_file_storage:
7203 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7204 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7205 old_file_storage_dir,
7206 new_file_storage_dir)
7207 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7208 " (but the instance has been renamed in Ganeti)" %
7209 (inst.primary_node, old_file_storage_dir,
7210 new_file_storage_dir))
7212 _StartInstanceDisks(self, inst, None)
7214 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7215 old_name, self.op.debug_level)
7216 msg = result.fail_msg
7218 msg = ("Could not run OS rename script for instance %s on node %s"
7219 " (but the instance has been renamed in Ganeti): %s" %
7220 (inst.name, inst.primary_node, msg))
7221 self.proc.LogWarning(msg)
7223 _ShutdownInstanceDisks(self, inst)
7228 class LUInstanceRemove(LogicalUnit):
7229 """Remove an instance.
7232 HPATH = "instance-remove"
7233 HTYPE = constants.HTYPE_INSTANCE
7236 def ExpandNames(self):
7237 self._ExpandAndLockInstance()
7238 self.needed_locks[locking.LEVEL_NODE] = []
7239 self.needed_locks[locking.LEVEL_NODE_RES] = []
7240 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7242 def DeclareLocks(self, level):
7243 if level == locking.LEVEL_NODE:
7244 self._LockInstancesNodes()
7245 elif level == locking.LEVEL_NODE_RES:
7247 self.needed_locks[locking.LEVEL_NODE_RES] = \
7248 self.needed_locks[locking.LEVEL_NODE][:]
7250 def BuildHooksEnv(self):
7253 This runs on master, primary and secondary nodes of the instance.
7256 env = _BuildInstanceHookEnvByObject(self, self.instance)
7257 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7260 def BuildHooksNodes(self):
7261 """Build hooks nodes.
7264 nl = [self.cfg.GetMasterNode()]
7265 nl_post = list(self.instance.all_nodes) + nl
7266 return (nl, nl_post)
7268 def CheckPrereq(self):
7269 """Check prerequisites.
7271 This checks that the instance is in the cluster.
7274 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7275 assert self.instance is not None, \
7276 "Cannot retrieve locked instance %s" % self.op.instance_name
7278 def Exec(self, feedback_fn):
7279 """Remove the instance.
7282 instance = self.instance
7283 logging.info("Shutting down instance %s on node %s",
7284 instance.name, instance.primary_node)
7286 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7287 self.op.shutdown_timeout)
7288 msg = result.fail_msg
7290 if self.op.ignore_failures:
7291 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7293 raise errors.OpExecError("Could not shutdown instance %s on"
7295 (instance.name, instance.primary_node, msg))
7297 assert (self.owned_locks(locking.LEVEL_NODE) ==
7298 self.owned_locks(locking.LEVEL_NODE_RES))
7299 assert not (set(instance.all_nodes) -
7300 self.owned_locks(locking.LEVEL_NODE)), \
7301 "Not owning correct locks"
7303 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7306 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7307 """Utility function to remove an instance.
7310 logging.info("Removing block devices for instance %s", instance.name)
7312 if not _RemoveDisks(lu, instance):
7313 if not ignore_failures:
7314 raise errors.OpExecError("Can't remove instance's disks")
7315 feedback_fn("Warning: can't remove instance's disks")
7317 logging.info("Removing instance %s out of cluster config", instance.name)
7319 lu.cfg.RemoveInstance(instance.name)
7321 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7322 "Instance lock removal conflict"
7324 # Remove lock for the instance
7325 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7328 class LUInstanceQuery(NoHooksLU):
7329 """Logical unit for querying instances.
7332 # pylint: disable=W0142
7335 def CheckArguments(self):
7336 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7337 self.op.output_fields, self.op.use_locking)
7339 def ExpandNames(self):
7340 self.iq.ExpandNames(self)
7342 def DeclareLocks(self, level):
7343 self.iq.DeclareLocks(self, level)
7345 def Exec(self, feedback_fn):
7346 return self.iq.OldStyleQuery(self)
7349 class LUInstanceFailover(LogicalUnit):
7350 """Failover an instance.
7353 HPATH = "instance-failover"
7354 HTYPE = constants.HTYPE_INSTANCE
7357 def CheckArguments(self):
7358 """Check the arguments.
7361 self.iallocator = getattr(self.op, "iallocator", None)
7362 self.target_node = getattr(self.op, "target_node", None)
7364 def ExpandNames(self):
7365 self._ExpandAndLockInstance()
7367 if self.op.target_node is not None:
7368 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7370 self.needed_locks[locking.LEVEL_NODE] = []
7371 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7373 self.needed_locks[locking.LEVEL_NODE_RES] = []
7374 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7376 ignore_consistency = self.op.ignore_consistency
7377 shutdown_timeout = self.op.shutdown_timeout
7378 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7381 ignore_consistency=ignore_consistency,
7382 shutdown_timeout=shutdown_timeout,
7383 ignore_ipolicy=self.op.ignore_ipolicy)
7384 self.tasklets = [self._migrater]
7386 def DeclareLocks(self, level):
7387 if level == locking.LEVEL_NODE:
7388 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7389 if instance.disk_template in constants.DTS_EXT_MIRROR:
7390 if self.op.target_node is None:
7391 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7393 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7394 self.op.target_node]
7395 del self.recalculate_locks[locking.LEVEL_NODE]
7397 self._LockInstancesNodes()
7398 elif level == locking.LEVEL_NODE_RES:
7400 self.needed_locks[locking.LEVEL_NODE_RES] = \
7401 self.needed_locks[locking.LEVEL_NODE][:]
7403 def BuildHooksEnv(self):
7406 This runs on master, primary and secondary nodes of the instance.
7409 instance = self._migrater.instance
7410 source_node = instance.primary_node
7411 target_node = self.op.target_node
7413 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7414 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7415 "OLD_PRIMARY": source_node,
7416 "NEW_PRIMARY": target_node,
7419 if instance.disk_template in constants.DTS_INT_MIRROR:
7420 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7421 env["NEW_SECONDARY"] = source_node
7423 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7425 env.update(_BuildInstanceHookEnvByObject(self, instance))
7429 def BuildHooksNodes(self):
7430 """Build hooks nodes.
7433 instance = self._migrater.instance
7434 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7435 return (nl, nl + [instance.primary_node])
7438 class LUInstanceMigrate(LogicalUnit):
7439 """Migrate an instance.
7441 This is migration without shutting down, compared to the failover,
7442 which is done with shutdown.
7445 HPATH = "instance-migrate"
7446 HTYPE = constants.HTYPE_INSTANCE
7449 def ExpandNames(self):
7450 self._ExpandAndLockInstance()
7452 if self.op.target_node is not None:
7453 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7455 self.needed_locks[locking.LEVEL_NODE] = []
7456 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7458 self.needed_locks[locking.LEVEL_NODE] = []
7459 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7462 TLMigrateInstance(self, self.op.instance_name,
7463 cleanup=self.op.cleanup,
7465 fallback=self.op.allow_failover,
7466 allow_runtime_changes=self.op.allow_runtime_changes,
7467 ignore_ipolicy=self.op.ignore_ipolicy)
7468 self.tasklets = [self._migrater]
7470 def DeclareLocks(self, level):
7471 if level == locking.LEVEL_NODE:
7472 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7473 if instance.disk_template in constants.DTS_EXT_MIRROR:
7474 if self.op.target_node is None:
7475 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7477 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7478 self.op.target_node]
7479 del self.recalculate_locks[locking.LEVEL_NODE]
7481 self._LockInstancesNodes()
7482 elif level == locking.LEVEL_NODE_RES:
7484 self.needed_locks[locking.LEVEL_NODE_RES] = \
7485 self.needed_locks[locking.LEVEL_NODE][:]
7487 def BuildHooksEnv(self):
7490 This runs on master, primary and secondary nodes of the instance.
7493 instance = self._migrater.instance
7494 source_node = instance.primary_node
7495 target_node = self.op.target_node
7496 env = _BuildInstanceHookEnvByObject(self, instance)
7498 "MIGRATE_LIVE": self._migrater.live,
7499 "MIGRATE_CLEANUP": self.op.cleanup,
7500 "OLD_PRIMARY": source_node,
7501 "NEW_PRIMARY": target_node,
7502 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7505 if instance.disk_template in constants.DTS_INT_MIRROR:
7506 env["OLD_SECONDARY"] = target_node
7507 env["NEW_SECONDARY"] = source_node
7509 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7513 def BuildHooksNodes(self):
7514 """Build hooks nodes.
7517 instance = self._migrater.instance
7518 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7519 return (nl, nl + [instance.primary_node])
7522 class LUInstanceMove(LogicalUnit):
7523 """Move an instance by data-copying.
7526 HPATH = "instance-move"
7527 HTYPE = constants.HTYPE_INSTANCE
7530 def ExpandNames(self):
7531 self._ExpandAndLockInstance()
7532 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7533 self.op.target_node = target_node
7534 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7535 self.needed_locks[locking.LEVEL_NODE_RES] = []
7536 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7538 def DeclareLocks(self, level):
7539 if level == locking.LEVEL_NODE:
7540 self._LockInstancesNodes(primary_only=True)
7541 elif level == locking.LEVEL_NODE_RES:
7543 self.needed_locks[locking.LEVEL_NODE_RES] = \
7544 self.needed_locks[locking.LEVEL_NODE][:]
7546 def BuildHooksEnv(self):
7549 This runs on master, primary and secondary nodes of the instance.
7553 "TARGET_NODE": self.op.target_node,
7554 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7556 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7559 def BuildHooksNodes(self):
7560 """Build hooks nodes.
7564 self.cfg.GetMasterNode(),
7565 self.instance.primary_node,
7566 self.op.target_node,
7570 def CheckPrereq(self):
7571 """Check prerequisites.
7573 This checks that the instance is in the cluster.
7576 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7577 assert self.instance is not None, \
7578 "Cannot retrieve locked instance %s" % self.op.instance_name
7580 node = self.cfg.GetNodeInfo(self.op.target_node)
7581 assert node is not None, \
7582 "Cannot retrieve locked node %s" % self.op.target_node
7584 self.target_node = target_node = node.name
7586 if target_node == instance.primary_node:
7587 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7588 (instance.name, target_node),
7591 bep = self.cfg.GetClusterInfo().FillBE(instance)
7593 for idx, dsk in enumerate(instance.disks):
7594 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7595 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7596 " cannot copy" % idx, errors.ECODE_STATE)
7598 _CheckNodeOnline(self, target_node)
7599 _CheckNodeNotDrained(self, target_node)
7600 _CheckNodeVmCapable(self, target_node)
7601 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7602 self.cfg.GetNodeGroup(node.group))
7603 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7604 ignore=self.op.ignore_ipolicy)
7606 if instance.admin_state == constants.ADMINST_UP:
7607 # check memory requirements on the secondary node
7608 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7609 instance.name, bep[constants.BE_MAXMEM],
7610 instance.hypervisor)
7612 self.LogInfo("Not checking memory on the secondary node as"
7613 " instance will not be started")
7615 # check bridge existance
7616 _CheckInstanceBridgesExist(self, instance, node=target_node)
7618 def Exec(self, feedback_fn):
7619 """Move an instance.
7621 The move is done by shutting it down on its present node, copying
7622 the data over (slow) and starting it on the new node.
7625 instance = self.instance
7627 source_node = instance.primary_node
7628 target_node = self.target_node
7630 self.LogInfo("Shutting down instance %s on source node %s",
7631 instance.name, source_node)
7633 assert (self.owned_locks(locking.LEVEL_NODE) ==
7634 self.owned_locks(locking.LEVEL_NODE_RES))
7636 result = self.rpc.call_instance_shutdown(source_node, instance,
7637 self.op.shutdown_timeout)
7638 msg = result.fail_msg
7640 if self.op.ignore_consistency:
7641 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7642 " Proceeding anyway. Please make sure node"
7643 " %s is down. Error details: %s",
7644 instance.name, source_node, source_node, msg)
7646 raise errors.OpExecError("Could not shutdown instance %s on"
7648 (instance.name, source_node, msg))
7650 # create the target disks
7652 _CreateDisks(self, instance, target_node=target_node)
7653 except errors.OpExecError:
7654 self.LogWarning("Device creation failed, reverting...")
7656 _RemoveDisks(self, instance, target_node=target_node)
7658 self.cfg.ReleaseDRBDMinors(instance.name)
7661 cluster_name = self.cfg.GetClusterInfo().cluster_name
7664 # activate, get path, copy the data over
7665 for idx, disk in enumerate(instance.disks):
7666 self.LogInfo("Copying data for disk %d", idx)
7667 result = self.rpc.call_blockdev_assemble(target_node, disk,
7668 instance.name, True, idx)
7670 self.LogWarning("Can't assemble newly created disk %d: %s",
7671 idx, result.fail_msg)
7672 errs.append(result.fail_msg)
7674 dev_path = result.payload
7675 result = self.rpc.call_blockdev_export(source_node, disk,
7676 target_node, dev_path,
7679 self.LogWarning("Can't copy data over for disk %d: %s",
7680 idx, result.fail_msg)
7681 errs.append(result.fail_msg)
7685 self.LogWarning("Some disks failed to copy, aborting")
7687 _RemoveDisks(self, instance, target_node=target_node)
7689 self.cfg.ReleaseDRBDMinors(instance.name)
7690 raise errors.OpExecError("Errors during disk copy: %s" %
7693 instance.primary_node = target_node
7694 self.cfg.Update(instance, feedback_fn)
7696 self.LogInfo("Removing the disks on the original node")
7697 _RemoveDisks(self, instance, target_node=source_node)
7699 # Only start the instance if it's marked as up
7700 if instance.admin_state == constants.ADMINST_UP:
7701 self.LogInfo("Starting instance %s on node %s",
7702 instance.name, target_node)
7704 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7705 ignore_secondaries=True)
7707 _ShutdownInstanceDisks(self, instance)
7708 raise errors.OpExecError("Can't activate the instance's disks")
7710 result = self.rpc.call_instance_start(target_node,
7711 (instance, None, None), False)
7712 msg = result.fail_msg
7714 _ShutdownInstanceDisks(self, instance)
7715 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7716 (instance.name, target_node, msg))
7719 class LUNodeMigrate(LogicalUnit):
7720 """Migrate all instances from a node.
7723 HPATH = "node-migrate"
7724 HTYPE = constants.HTYPE_NODE
7727 def CheckArguments(self):
7730 def ExpandNames(self):
7731 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7733 self.share_locks = _ShareAll()
7734 self.needed_locks = {
7735 locking.LEVEL_NODE: [self.op.node_name],
7738 def BuildHooksEnv(self):
7741 This runs on the master, the primary and all the secondaries.
7745 "NODE_NAME": self.op.node_name,
7746 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7749 def BuildHooksNodes(self):
7750 """Build hooks nodes.
7753 nl = [self.cfg.GetMasterNode()]
7756 def CheckPrereq(self):
7759 def Exec(self, feedback_fn):
7760 # Prepare jobs for migration instances
7761 allow_runtime_changes = self.op.allow_runtime_changes
7763 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7766 iallocator=self.op.iallocator,
7767 target_node=self.op.target_node,
7768 allow_runtime_changes=allow_runtime_changes,
7769 ignore_ipolicy=self.op.ignore_ipolicy)]
7770 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7773 # TODO: Run iallocator in this opcode and pass correct placement options to
7774 # OpInstanceMigrate. Since other jobs can modify the cluster between
7775 # running the iallocator and the actual migration, a good consistency model
7776 # will have to be found.
7778 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7779 frozenset([self.op.node_name]))
7781 return ResultWithJobs(jobs)
7784 class TLMigrateInstance(Tasklet):
7785 """Tasklet class for instance migration.
7788 @ivar live: whether the migration will be done live or non-live;
7789 this variable is initalized only after CheckPrereq has run
7790 @type cleanup: boolean
7791 @ivar cleanup: Wheater we cleanup from a failed migration
7792 @type iallocator: string
7793 @ivar iallocator: The iallocator used to determine target_node
7794 @type target_node: string
7795 @ivar target_node: If given, the target_node to reallocate the instance to
7796 @type failover: boolean
7797 @ivar failover: Whether operation results in failover or migration
7798 @type fallback: boolean
7799 @ivar fallback: Whether fallback to failover is allowed if migration not
7801 @type ignore_consistency: boolean
7802 @ivar ignore_consistency: Wheter we should ignore consistency between source
7804 @type shutdown_timeout: int
7805 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7806 @type ignore_ipolicy: bool
7807 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7812 _MIGRATION_POLL_INTERVAL = 1 # seconds
7813 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7815 def __init__(self, lu, instance_name, cleanup=False,
7816 failover=False, fallback=False,
7817 ignore_consistency=False,
7818 allow_runtime_changes=True,
7819 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7820 ignore_ipolicy=False):
7821 """Initializes this class.
7824 Tasklet.__init__(self, lu)
7827 self.instance_name = instance_name
7828 self.cleanup = cleanup
7829 self.live = False # will be overridden later
7830 self.failover = failover
7831 self.fallback = fallback
7832 self.ignore_consistency = ignore_consistency
7833 self.shutdown_timeout = shutdown_timeout
7834 self.ignore_ipolicy = ignore_ipolicy
7835 self.allow_runtime_changes = allow_runtime_changes
7837 def CheckPrereq(self):
7838 """Check prerequisites.
7840 This checks that the instance is in the cluster.
7843 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7844 instance = self.cfg.GetInstanceInfo(instance_name)
7845 assert instance is not None
7846 self.instance = instance
7847 cluster = self.cfg.GetClusterInfo()
7849 if (not self.cleanup and
7850 not instance.admin_state == constants.ADMINST_UP and
7851 not self.failover and self.fallback):
7852 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7853 " switching to failover")
7854 self.failover = True
7856 if instance.disk_template not in constants.DTS_MIRRORED:
7861 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7862 " %s" % (instance.disk_template, text),
7865 if instance.disk_template in constants.DTS_EXT_MIRROR:
7866 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7868 if self.lu.op.iallocator:
7869 self._RunAllocator()
7871 # We set set self.target_node as it is required by
7873 self.target_node = self.lu.op.target_node
7875 # Check that the target node is correct in terms of instance policy
7876 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
7877 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7878 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7879 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7880 ignore=self.ignore_ipolicy)
7882 # self.target_node is already populated, either directly or by the
7884 target_node = self.target_node
7885 if self.target_node == instance.primary_node:
7886 raise errors.OpPrereqError("Cannot migrate instance %s"
7887 " to its primary (%s)" %
7888 (instance.name, instance.primary_node))
7890 if len(self.lu.tasklets) == 1:
7891 # It is safe to release locks only when we're the only tasklet
7893 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7894 keep=[instance.primary_node, self.target_node])
7897 secondary_nodes = instance.secondary_nodes
7898 if not secondary_nodes:
7899 raise errors.ConfigurationError("No secondary node but using"
7900 " %s disk template" %
7901 instance.disk_template)
7902 target_node = secondary_nodes[0]
7903 if self.lu.op.iallocator or (self.lu.op.target_node and
7904 self.lu.op.target_node != target_node):
7906 text = "failed over"
7909 raise errors.OpPrereqError("Instances with disk template %s cannot"
7910 " be %s to arbitrary nodes"
7911 " (neither an iallocator nor a target"
7912 " node can be passed)" %
7913 (instance.disk_template, text),
7915 nodeinfo = self.cfg.GetNodeInfo(target_node)
7916 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7917 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7918 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7919 ignore=self.ignore_ipolicy)
7921 i_be = cluster.FillBE(instance)
7923 # check memory requirements on the secondary node
7924 if (not self.cleanup and
7925 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
7926 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
7927 "migrating instance %s" %
7929 i_be[constants.BE_MINMEM],
7930 instance.hypervisor)
7932 self.lu.LogInfo("Not checking memory on the secondary node as"
7933 " instance will not be started")
7935 # check if failover must be forced instead of migration
7936 if (not self.cleanup and not self.failover and
7937 i_be[constants.BE_ALWAYS_FAILOVER]):
7939 self.lu.LogInfo("Instance configured to always failover; fallback"
7941 self.failover = True
7943 raise errors.OpPrereqError("This instance has been configured to"
7944 " always failover, please allow failover",
7947 # check bridge existance
7948 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7950 if not self.cleanup:
7951 _CheckNodeNotDrained(self.lu, target_node)
7952 if not self.failover:
7953 result = self.rpc.call_instance_migratable(instance.primary_node,
7955 if result.fail_msg and self.fallback:
7956 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7958 self.failover = True
7960 result.Raise("Can't migrate, please use failover",
7961 prereq=True, ecode=errors.ECODE_STATE)
7963 assert not (self.failover and self.cleanup)
7965 if not self.failover:
7966 if self.lu.op.live is not None and self.lu.op.mode is not None:
7967 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7968 " parameters are accepted",
7970 if self.lu.op.live is not None:
7972 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7974 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7975 # reset the 'live' parameter to None so that repeated
7976 # invocations of CheckPrereq do not raise an exception
7977 self.lu.op.live = None
7978 elif self.lu.op.mode is None:
7979 # read the default value from the hypervisor
7980 i_hv = cluster.FillHV(self.instance, skip_globals=False)
7981 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7983 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7985 # Failover is never live
7988 if not (self.failover or self.cleanup):
7989 remote_info = self.rpc.call_instance_info(instance.primary_node,
7991 instance.hypervisor)
7992 remote_info.Raise("Error checking instance on node %s" %
7993 instance.primary_node)
7994 instance_running = bool(remote_info.payload)
7995 if instance_running:
7996 self.current_mem = int(remote_info.payload["memory"])
7998 def _RunAllocator(self):
7999 """Run the allocator based on input opcode.
8002 # FIXME: add a self.ignore_ipolicy option
8003 ial = IAllocator(self.cfg, self.rpc,
8004 mode=constants.IALLOCATOR_MODE_RELOC,
8005 name=self.instance_name,
8006 # TODO See why hail breaks with a single node below
8007 relocate_from=[self.instance.primary_node,
8008 self.instance.primary_node],
8011 ial.Run(self.lu.op.iallocator)
8014 raise errors.OpPrereqError("Can't compute nodes using"
8015 " iallocator '%s': %s" %
8016 (self.lu.op.iallocator, ial.info),
8018 if len(ial.result) != ial.required_nodes:
8019 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8020 " of nodes (%s), required %s" %
8021 (self.lu.op.iallocator, len(ial.result),
8022 ial.required_nodes), errors.ECODE_FAULT)
8023 self.target_node = ial.result[0]
8024 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8025 self.instance_name, self.lu.op.iallocator,
8026 utils.CommaJoin(ial.result))
8028 def _WaitUntilSync(self):
8029 """Poll with custom rpc for disk sync.
8031 This uses our own step-based rpc call.
8034 self.feedback_fn("* wait until resync is done")
8038 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8040 self.instance.disks)
8042 for node, nres in result.items():
8043 nres.Raise("Cannot resync disks on node %s" % node)
8044 node_done, node_percent = nres.payload
8045 all_done = all_done and node_done
8046 if node_percent is not None:
8047 min_percent = min(min_percent, node_percent)
8049 if min_percent < 100:
8050 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8053 def _EnsureSecondary(self, node):
8054 """Demote a node to secondary.
8057 self.feedback_fn("* switching node %s to secondary mode" % node)
8059 for dev in self.instance.disks:
8060 self.cfg.SetDiskID(dev, node)
8062 result = self.rpc.call_blockdev_close(node, self.instance.name,
8063 self.instance.disks)
8064 result.Raise("Cannot change disk to secondary on node %s" % node)
8066 def _GoStandalone(self):
8067 """Disconnect from the network.
8070 self.feedback_fn("* changing into standalone mode")
8071 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8072 self.instance.disks)
8073 for node, nres in result.items():
8074 nres.Raise("Cannot disconnect disks node %s" % node)
8076 def _GoReconnect(self, multimaster):
8077 """Reconnect to the network.
8083 msg = "single-master"
8084 self.feedback_fn("* changing disks into %s mode" % msg)
8085 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8086 self.instance.disks,
8087 self.instance.name, multimaster)
8088 for node, nres in result.items():
8089 nres.Raise("Cannot change disks config on node %s" % node)
8091 def _ExecCleanup(self):
8092 """Try to cleanup after a failed migration.
8094 The cleanup is done by:
8095 - check that the instance is running only on one node
8096 (and update the config if needed)
8097 - change disks on its secondary node to secondary
8098 - wait until disks are fully synchronized
8099 - disconnect from the network
8100 - change disks into single-master mode
8101 - wait again until disks are fully synchronized
8104 instance = self.instance
8105 target_node = self.target_node
8106 source_node = self.source_node
8108 # check running on only one node
8109 self.feedback_fn("* checking where the instance actually runs"
8110 " (if this hangs, the hypervisor might be in"
8112 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8113 for node, result in ins_l.items():
8114 result.Raise("Can't contact node %s" % node)
8116 runningon_source = instance.name in ins_l[source_node].payload
8117 runningon_target = instance.name in ins_l[target_node].payload
8119 if runningon_source and runningon_target:
8120 raise errors.OpExecError("Instance seems to be running on two nodes,"
8121 " or the hypervisor is confused; you will have"
8122 " to ensure manually that it runs only on one"
8123 " and restart this operation")
8125 if not (runningon_source or runningon_target):
8126 raise errors.OpExecError("Instance does not seem to be running at all;"
8127 " in this case it's safer to repair by"
8128 " running 'gnt-instance stop' to ensure disk"
8129 " shutdown, and then restarting it")
8131 if runningon_target:
8132 # the migration has actually succeeded, we need to update the config
8133 self.feedback_fn("* instance running on secondary node (%s),"
8134 " updating config" % target_node)
8135 instance.primary_node = target_node
8136 self.cfg.Update(instance, self.feedback_fn)
8137 demoted_node = source_node
8139 self.feedback_fn("* instance confirmed to be running on its"
8140 " primary node (%s)" % source_node)
8141 demoted_node = target_node
8143 if instance.disk_template in constants.DTS_INT_MIRROR:
8144 self._EnsureSecondary(demoted_node)
8146 self._WaitUntilSync()
8147 except errors.OpExecError:
8148 # we ignore here errors, since if the device is standalone, it
8149 # won't be able to sync
8151 self._GoStandalone()
8152 self._GoReconnect(False)
8153 self._WaitUntilSync()
8155 self.feedback_fn("* done")
8157 def _RevertDiskStatus(self):
8158 """Try to revert the disk status after a failed migration.
8161 target_node = self.target_node
8162 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8166 self._EnsureSecondary(target_node)
8167 self._GoStandalone()
8168 self._GoReconnect(False)
8169 self._WaitUntilSync()
8170 except errors.OpExecError, err:
8171 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8172 " please try to recover the instance manually;"
8173 " error '%s'" % str(err))
8175 def _AbortMigration(self):
8176 """Call the hypervisor code to abort a started migration.
8179 instance = self.instance
8180 target_node = self.target_node
8181 source_node = self.source_node
8182 migration_info = self.migration_info
8184 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8188 abort_msg = abort_result.fail_msg
8190 logging.error("Aborting migration failed on target node %s: %s",
8191 target_node, abort_msg)
8192 # Don't raise an exception here, as we stil have to try to revert the
8193 # disk status, even if this step failed.
8195 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8196 instance, False, self.live)
8197 abort_msg = abort_result.fail_msg
8199 logging.error("Aborting migration failed on source node %s: %s",
8200 source_node, abort_msg)
8202 def _ExecMigration(self):
8203 """Migrate an instance.
8205 The migrate is done by:
8206 - change the disks into dual-master mode
8207 - wait until disks are fully synchronized again
8208 - migrate the instance
8209 - change disks on the new secondary node (the old primary) to secondary
8210 - wait until disks are fully synchronized
8211 - change disks into single-master mode
8214 instance = self.instance
8215 target_node = self.target_node
8216 source_node = self.source_node
8218 # Check for hypervisor version mismatch and warn the user.
8219 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8220 None, [self.instance.hypervisor])
8221 for ninfo in nodeinfo.values():
8222 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8224 (_, _, (src_info, )) = nodeinfo[source_node].payload
8225 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8227 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8228 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8229 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8230 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8231 if src_version != dst_version:
8232 self.feedback_fn("* warning: hypervisor version mismatch between"
8233 " source (%s) and target (%s) node" %
8234 (src_version, dst_version))
8236 self.feedback_fn("* checking disk consistency between source and target")
8237 for (idx, dev) in enumerate(instance.disks):
8238 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8239 raise errors.OpExecError("Disk %s is degraded or not fully"
8240 " synchronized on target node,"
8241 " aborting migration" % idx)
8243 if self.current_mem > self.tgt_free_mem:
8244 if not self.allow_runtime_changes:
8245 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8246 " free memory to fit instance %s on target"
8247 " node %s (have %dMB, need %dMB)" %
8248 (instance.name, target_node,
8249 self.tgt_free_mem, self.current_mem))
8250 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8251 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8254 rpcres.Raise("Cannot modify instance runtime memory")
8256 # First get the migration information from the remote node
8257 result = self.rpc.call_migration_info(source_node, instance)
8258 msg = result.fail_msg
8260 log_err = ("Failed fetching source migration information from %s: %s" %
8262 logging.error(log_err)
8263 raise errors.OpExecError(log_err)
8265 self.migration_info = migration_info = result.payload
8267 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8268 # Then switch the disks to master/master mode
8269 self._EnsureSecondary(target_node)
8270 self._GoStandalone()
8271 self._GoReconnect(True)
8272 self._WaitUntilSync()
8274 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8275 result = self.rpc.call_accept_instance(target_node,
8278 self.nodes_ip[target_node])
8280 msg = result.fail_msg
8282 logging.error("Instance pre-migration failed, trying to revert"
8283 " disk status: %s", msg)
8284 self.feedback_fn("Pre-migration failed, aborting")
8285 self._AbortMigration()
8286 self._RevertDiskStatus()
8287 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8288 (instance.name, msg))
8290 self.feedback_fn("* migrating instance to %s" % target_node)
8291 result = self.rpc.call_instance_migrate(source_node, instance,
8292 self.nodes_ip[target_node],
8294 msg = result.fail_msg
8296 logging.error("Instance migration failed, trying to revert"
8297 " disk status: %s", msg)
8298 self.feedback_fn("Migration failed, aborting")
8299 self._AbortMigration()
8300 self._RevertDiskStatus()
8301 raise errors.OpExecError("Could not migrate instance %s: %s" %
8302 (instance.name, msg))
8304 self.feedback_fn("* starting memory transfer")
8305 last_feedback = time.time()
8307 result = self.rpc.call_instance_get_migration_status(source_node,
8309 msg = result.fail_msg
8310 ms = result.payload # MigrationStatus instance
8311 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8312 logging.error("Instance migration failed, trying to revert"
8313 " disk status: %s", msg)
8314 self.feedback_fn("Migration failed, aborting")
8315 self._AbortMigration()
8316 self._RevertDiskStatus()
8317 raise errors.OpExecError("Could not migrate instance %s: %s" %
8318 (instance.name, msg))
8320 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8321 self.feedback_fn("* memory transfer complete")
8324 if (utils.TimeoutExpired(last_feedback,
8325 self._MIGRATION_FEEDBACK_INTERVAL) and
8326 ms.transferred_ram is not None):
8327 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8328 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8329 last_feedback = time.time()
8331 time.sleep(self._MIGRATION_POLL_INTERVAL)
8333 result = self.rpc.call_instance_finalize_migration_src(source_node,
8337 msg = result.fail_msg
8339 logging.error("Instance migration succeeded, but finalization failed"
8340 " on the source node: %s", msg)
8341 raise errors.OpExecError("Could not finalize instance migration: %s" %
8344 instance.primary_node = target_node
8346 # distribute new instance config to the other nodes
8347 self.cfg.Update(instance, self.feedback_fn)
8349 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8353 msg = result.fail_msg
8355 logging.error("Instance migration succeeded, but finalization failed"
8356 " on the target node: %s", msg)
8357 raise errors.OpExecError("Could not finalize instance migration: %s" %
8360 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8361 self._EnsureSecondary(source_node)
8362 self._WaitUntilSync()
8363 self._GoStandalone()
8364 self._GoReconnect(False)
8365 self._WaitUntilSync()
8367 # If the instance's disk template is `rbd' and there was a successful
8368 # migration, unmap the device from the source node.
8369 if self.instance.disk_template == constants.DT_RBD:
8370 disks = _ExpandCheckDisks(instance, instance.disks)
8371 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8373 result = self.rpc.call_blockdev_shutdown(source_node, disk)
8374 msg = result.fail_msg
8376 logging.error("Migration was successful, but couldn't unmap the"
8377 " block device %s on source node %s: %s",
8378 disk.iv_name, source_node, msg)
8379 logging.error("You need to unmap the device %s manually on %s",
8380 disk.iv_name, source_node)
8382 self.feedback_fn("* done")
8384 def _ExecFailover(self):
8385 """Failover an instance.
8387 The failover is done by shutting it down on its present node and
8388 starting it on the secondary.
8391 instance = self.instance
8392 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8394 source_node = instance.primary_node
8395 target_node = self.target_node
8397 if instance.admin_state == constants.ADMINST_UP:
8398 self.feedback_fn("* checking disk consistency between source and target")
8399 for (idx, dev) in enumerate(instance.disks):
8400 # for drbd, these are drbd over lvm
8401 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8402 if primary_node.offline:
8403 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8405 (primary_node.name, idx, target_node))
8406 elif not self.ignore_consistency:
8407 raise errors.OpExecError("Disk %s is degraded on target node,"
8408 " aborting failover" % idx)
8410 self.feedback_fn("* not checking disk consistency as instance is not"
8413 self.feedback_fn("* shutting down instance on source node")
8414 logging.info("Shutting down instance %s on node %s",
8415 instance.name, source_node)
8417 result = self.rpc.call_instance_shutdown(source_node, instance,
8418 self.shutdown_timeout)
8419 msg = result.fail_msg
8421 if self.ignore_consistency or primary_node.offline:
8422 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8423 " proceeding anyway; please make sure node"
8424 " %s is down; error details: %s",
8425 instance.name, source_node, source_node, msg)
8427 raise errors.OpExecError("Could not shutdown instance %s on"
8429 (instance.name, source_node, msg))
8431 self.feedback_fn("* deactivating the instance's disks on source node")
8432 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8433 raise errors.OpExecError("Can't shut down the instance's disks")
8435 instance.primary_node = target_node
8436 # distribute new instance config to the other nodes
8437 self.cfg.Update(instance, self.feedback_fn)
8439 # Only start the instance if it's marked as up
8440 if instance.admin_state == constants.ADMINST_UP:
8441 self.feedback_fn("* activating the instance's disks on target node %s" %
8443 logging.info("Starting instance %s on node %s",
8444 instance.name, target_node)
8446 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8447 ignore_secondaries=True)
8449 _ShutdownInstanceDisks(self.lu, instance)
8450 raise errors.OpExecError("Can't activate the instance's disks")
8452 self.feedback_fn("* starting the instance on the target node %s" %
8454 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8456 msg = result.fail_msg
8458 _ShutdownInstanceDisks(self.lu, instance)
8459 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8460 (instance.name, target_node, msg))
8462 def Exec(self, feedback_fn):
8463 """Perform the migration.
8466 self.feedback_fn = feedback_fn
8467 self.source_node = self.instance.primary_node
8469 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8470 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8471 self.target_node = self.instance.secondary_nodes[0]
8472 # Otherwise self.target_node has been populated either
8473 # directly, or through an iallocator.
8475 self.all_nodes = [self.source_node, self.target_node]
8476 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8477 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8480 feedback_fn("Failover instance %s" % self.instance.name)
8481 self._ExecFailover()
8483 feedback_fn("Migrating instance %s" % self.instance.name)
8486 return self._ExecCleanup()
8488 return self._ExecMigration()
8491 def _CreateBlockDev(lu, node, instance, device, force_create,
8493 """Create a tree of block devices on a given node.
8495 If this device type has to be created on secondaries, create it and
8498 If not, just recurse to children keeping the same 'force' value.
8500 @param lu: the lu on whose behalf we execute
8501 @param node: the node on which to create the device
8502 @type instance: L{objects.Instance}
8503 @param instance: the instance which owns the device
8504 @type device: L{objects.Disk}
8505 @param device: the device to create
8506 @type force_create: boolean
8507 @param force_create: whether to force creation of this device; this
8508 will be change to True whenever we find a device which has
8509 CreateOnSecondary() attribute
8510 @param info: the extra 'metadata' we should attach to the device
8511 (this will be represented as a LVM tag)
8512 @type force_open: boolean
8513 @param force_open: this parameter will be passes to the
8514 L{backend.BlockdevCreate} function where it specifies
8515 whether we run on primary or not, and it affects both
8516 the child assembly and the device own Open() execution
8519 if device.CreateOnSecondary():
8523 for child in device.children:
8524 _CreateBlockDev(lu, node, instance, child, force_create,
8527 if not force_create:
8530 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8533 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8534 """Create a single block device on a given node.
8536 This will not recurse over children of the device, so they must be
8539 @param lu: the lu on whose behalf we execute
8540 @param node: the node on which to create the device
8541 @type instance: L{objects.Instance}
8542 @param instance: the instance which owns the device
8543 @type device: L{objects.Disk}
8544 @param device: the device to create
8545 @param info: the extra 'metadata' we should attach to the device
8546 (this will be represented as a LVM tag)
8547 @type force_open: boolean
8548 @param force_open: this parameter will be passes to the
8549 L{backend.BlockdevCreate} function where it specifies
8550 whether we run on primary or not, and it affects both
8551 the child assembly and the device own Open() execution
8554 lu.cfg.SetDiskID(device, node)
8555 result = lu.rpc.call_blockdev_create(node, device, device.size,
8556 instance.name, force_open, info)
8557 result.Raise("Can't create block device %s on"
8558 " node %s for instance %s" % (device, node, instance.name))
8559 if device.physical_id is None:
8560 device.physical_id = result.payload
8563 def _GenerateUniqueNames(lu, exts):
8564 """Generate a suitable LV name.
8566 This will generate a logical volume name for the given instance.
8571 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8572 results.append("%s%s" % (new_id, val))
8576 def _ComputeLDParams(disk_template, disk_params):
8577 """Computes Logical Disk parameters from Disk Template parameters.
8579 @type disk_template: string
8580 @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8581 @type disk_params: dict
8582 @param disk_params: disk template parameters; dict(template_name -> parameters
8584 @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8585 contains the LD parameters of the node. The tree is flattened in-order.
8588 if disk_template not in constants.DISK_TEMPLATES:
8589 raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8592 dt_params = disk_params[disk_template]
8593 if disk_template == constants.DT_DRBD8:
8595 constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8596 constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8597 constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8598 constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8599 constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8600 constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8601 constants.LDP_DYNAMIC_RESYNC: dt_params[constants.DRBD_DYNAMIC_RESYNC],
8602 constants.LDP_PLAN_AHEAD: dt_params[constants.DRBD_PLAN_AHEAD],
8603 constants.LDP_FILL_TARGET: dt_params[constants.DRBD_FILL_TARGET],
8604 constants.LDP_DELAY_TARGET: dt_params[constants.DRBD_DELAY_TARGET],
8605 constants.LDP_MAX_RATE: dt_params[constants.DRBD_MAX_RATE],
8606 constants.LDP_MIN_RATE: dt_params[constants.DRBD_MIN_RATE],
8610 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8613 result.append(drbd_params)
8617 constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8620 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8622 result.append(data_params)
8626 constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8629 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8631 result.append(meta_params)
8633 elif (disk_template == constants.DT_FILE or
8634 disk_template == constants.DT_SHARED_FILE):
8635 result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8637 elif disk_template == constants.DT_PLAIN:
8639 constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8642 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8644 result.append(params)
8646 elif disk_template == constants.DT_BLOCK:
8647 result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8649 elif disk_template == constants.DT_RBD:
8651 constants.LDP_POOL: dt_params[constants.RBD_POOL]
8654 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_RBD],
8656 result.append(params)
8661 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8662 iv_name, p_minor, s_minor, drbd_params, data_params,
8664 """Generate a drbd8 device complete with its children.
8667 assert len(vgnames) == len(names) == 2
8668 port = lu.cfg.AllocatePort()
8669 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8671 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8672 logical_id=(vgnames[0], names[0]),
8674 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8675 logical_id=(vgnames[1], names[1]),
8677 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8678 logical_id=(primary, secondary, port,
8681 children=[dev_data, dev_meta],
8682 iv_name=iv_name, params=drbd_params)
8686 _DISK_TEMPLATE_NAME_PREFIX = {
8687 constants.DT_PLAIN: "",
8688 constants.DT_RBD: ".rbd",
8692 _DISK_TEMPLATE_DEVICE_TYPE = {
8693 constants.DT_PLAIN: constants.LD_LV,
8694 constants.DT_FILE: constants.LD_FILE,
8695 constants.DT_SHARED_FILE: constants.LD_FILE,
8696 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8697 constants.DT_RBD: constants.LD_RBD,
8701 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8702 secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8703 feedback_fn, disk_params,
8704 _req_file_storage=opcodes.RequireFileStorage,
8705 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8706 """Generate the entire disk layout for a given template type.
8709 #TODO: compute space requirements
8711 vgname = lu.cfg.GetVGName()
8712 disk_count = len(disk_info)
8714 ld_params = _ComputeLDParams(template_name, disk_params)
8716 if template_name == constants.DT_DISKLESS:
8718 elif template_name == constants.DT_DRBD8:
8719 drbd_params, data_params, meta_params = ld_params
8720 if len(secondary_nodes) != 1:
8721 raise errors.ProgrammerError("Wrong template configuration")
8722 remote_node = secondary_nodes[0]
8723 minors = lu.cfg.AllocateDRBDMinor(
8724 [primary_node, remote_node] * len(disk_info), instance_name)
8727 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8728 for i in range(disk_count)]):
8729 names.append(lv_prefix + "_data")
8730 names.append(lv_prefix + "_meta")
8731 for idx, disk in enumerate(disk_info):
8732 disk_index = idx + base_index
8733 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8734 data_vg = disk.get(constants.IDISK_VG, vgname)
8735 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8736 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8737 disk[constants.IDISK_SIZE],
8739 names[idx * 2:idx * 2 + 2],
8740 "disk/%d" % disk_index,
8741 minors[idx * 2], minors[idx * 2 + 1],
8742 drbd_params, data_params, meta_params)
8743 disk_dev.mode = disk[constants.IDISK_MODE]
8744 disks.append(disk_dev)
8747 raise errors.ProgrammerError("Wrong template configuration")
8749 if template_name == constants.DT_FILE:
8751 elif template_name == constants.DT_SHARED_FILE:
8752 _req_shr_file_storage()
8754 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8755 if name_prefix is None:
8758 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8759 (name_prefix, base_index + i)
8760 for i in range(disk_count)])
8762 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8764 if template_name == constants.DT_PLAIN:
8765 def logical_id_fn(idx, _, disk):
8766 vg = disk.get(constants.IDISK_VG, vgname)
8767 return (vg, names[idx])
8768 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8770 lambda _, disk_index, disk: (file_driver,
8771 "%s/disk%d" % (file_storage_dir,
8773 elif template_name == constants.DT_BLOCK:
8775 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8776 disk[constants.IDISK_ADOPT])
8777 elif template_name == constants.DT_RBD:
8778 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8780 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8782 for idx, disk in enumerate(disk_info):
8783 disk_index = idx + base_index
8784 size = disk[constants.IDISK_SIZE]
8785 feedback_fn("* disk %s, size %s" %
8786 (disk_index, utils.FormatUnit(size, "h")))
8787 disks.append(objects.Disk(dev_type=dev_type, size=size,
8788 logical_id=logical_id_fn(idx, disk_index, disk),
8789 iv_name="disk/%d" % disk_index,
8790 mode=disk[constants.IDISK_MODE],
8791 params=ld_params[0]))
8796 def _GetInstanceInfoText(instance):
8797 """Compute that text that should be added to the disk's metadata.
8800 return "originstname+%s" % instance.name
8803 def _CalcEta(time_taken, written, total_size):
8804 """Calculates the ETA based on size written and total size.
8806 @param time_taken: The time taken so far
8807 @param written: amount written so far
8808 @param total_size: The total size of data to be written
8809 @return: The remaining time in seconds
8812 avg_time = time_taken / float(written)
8813 return (total_size - written) * avg_time
8816 def _WipeDisks(lu, instance):
8817 """Wipes instance disks.
8819 @type lu: L{LogicalUnit}
8820 @param lu: the logical unit on whose behalf we execute
8821 @type instance: L{objects.Instance}
8822 @param instance: the instance whose disks we should create
8823 @return: the success of the wipe
8826 node = instance.primary_node
8828 for device in instance.disks:
8829 lu.cfg.SetDiskID(device, node)
8831 logging.info("Pause sync of instance %s disks", instance.name)
8832 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8834 for idx, success in enumerate(result.payload):
8836 logging.warn("pause-sync of instance %s for disks %d failed",
8840 for idx, device in enumerate(instance.disks):
8841 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8842 # MAX_WIPE_CHUNK at max
8843 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8844 constants.MIN_WIPE_CHUNK_PERCENT)
8845 # we _must_ make this an int, otherwise rounding errors will
8847 wipe_chunk_size = int(wipe_chunk_size)
8849 lu.LogInfo("* Wiping disk %d", idx)
8850 logging.info("Wiping disk %d for instance %s, node %s using"
8851 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8856 start_time = time.time()
8858 while offset < size:
8859 wipe_size = min(wipe_chunk_size, size - offset)
8860 logging.debug("Wiping disk %d, offset %s, chunk %s",
8861 idx, offset, wipe_size)
8862 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8863 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8864 (idx, offset, wipe_size))
8867 if now - last_output >= 60:
8868 eta = _CalcEta(now - start_time, offset, size)
8869 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8870 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8873 logging.info("Resume sync of instance %s disks", instance.name)
8875 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8877 for idx, success in enumerate(result.payload):
8879 lu.LogWarning("Resume sync of disk %d failed, please have a"
8880 " look at the status and troubleshoot the issue", idx)
8881 logging.warn("resume-sync of instance %s for disks %d failed",
8885 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8886 """Create all disks for an instance.
8888 This abstracts away some work from AddInstance.
8890 @type lu: L{LogicalUnit}
8891 @param lu: the logical unit on whose behalf we execute
8892 @type instance: L{objects.Instance}
8893 @param instance: the instance whose disks we should create
8895 @param to_skip: list of indices to skip
8896 @type target_node: string
8897 @param target_node: if passed, overrides the target node for creation
8899 @return: the success of the creation
8902 info = _GetInstanceInfoText(instance)
8903 if target_node is None:
8904 pnode = instance.primary_node
8905 all_nodes = instance.all_nodes
8910 if instance.disk_template in constants.DTS_FILEBASED:
8911 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8912 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8914 result.Raise("Failed to create directory '%s' on"
8915 " node %s" % (file_storage_dir, pnode))
8917 # Note: this needs to be kept in sync with adding of disks in
8918 # LUInstanceSetParams
8919 for idx, device in enumerate(instance.disks):
8920 if to_skip and idx in to_skip:
8922 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
8924 for node in all_nodes:
8925 f_create = node == pnode
8926 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8929 def _RemoveDisks(lu, instance, target_node=None):
8930 """Remove all disks for an instance.
8932 This abstracts away some work from `AddInstance()` and
8933 `RemoveInstance()`. Note that in case some of the devices couldn't
8934 be removed, the removal will continue with the other ones (compare
8935 with `_CreateDisks()`).
8937 @type lu: L{LogicalUnit}
8938 @param lu: the logical unit on whose behalf we execute
8939 @type instance: L{objects.Instance}
8940 @param instance: the instance whose disks we should remove
8941 @type target_node: string
8942 @param target_node: used to override the node on which to remove the disks
8944 @return: the success of the removal
8947 logging.info("Removing block devices for instance %s", instance.name)
8950 for (idx, device) in enumerate(instance.disks):
8952 edata = [(target_node, device)]
8954 edata = device.ComputeNodeTree(instance.primary_node)
8955 for node, disk in edata:
8956 lu.cfg.SetDiskID(disk, node)
8957 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8959 lu.LogWarning("Could not remove disk %s on node %s,"
8960 " continuing anyway: %s", idx, node, msg)
8963 # if this is a DRBD disk, return its port to the pool
8964 if device.dev_type in constants.LDS_DRBD:
8965 tcp_port = device.logical_id[2]
8966 lu.cfg.AddTcpUdpPort(tcp_port)
8968 if instance.disk_template == constants.DT_FILE:
8969 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8973 tgt = instance.primary_node
8974 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8976 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8977 file_storage_dir, instance.primary_node, result.fail_msg)
8983 def _ComputeDiskSizePerVG(disk_template, disks):
8984 """Compute disk size requirements in the volume group
8987 def _compute(disks, payload):
8988 """Universal algorithm.
8993 vgs[disk[constants.IDISK_VG]] = \
8994 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8998 # Required free disk space as a function of disk and swap space
9000 constants.DT_DISKLESS: {},
9001 constants.DT_PLAIN: _compute(disks, 0),
9002 # 128 MB are added for drbd metadata for each disk
9003 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9004 constants.DT_FILE: {},
9005 constants.DT_SHARED_FILE: {},
9008 if disk_template not in req_size_dict:
9009 raise errors.ProgrammerError("Disk template '%s' size requirement"
9010 " is unknown" % disk_template)
9012 return req_size_dict[disk_template]
9015 def _ComputeDiskSize(disk_template, disks):
9016 """Compute disk size requirements in the volume group
9019 # Required free disk space as a function of disk and swap space
9021 constants.DT_DISKLESS: None,
9022 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9023 # 128 MB are added for drbd metadata for each disk
9025 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9026 constants.DT_FILE: None,
9027 constants.DT_SHARED_FILE: 0,
9028 constants.DT_BLOCK: 0,
9029 constants.DT_RBD: 0,
9032 if disk_template not in req_size_dict:
9033 raise errors.ProgrammerError("Disk template '%s' size requirement"
9034 " is unknown" % disk_template)
9036 return req_size_dict[disk_template]
9039 def _FilterVmNodes(lu, nodenames):
9040 """Filters out non-vm_capable nodes from a list.
9042 @type lu: L{LogicalUnit}
9043 @param lu: the logical unit for which we check
9044 @type nodenames: list
9045 @param nodenames: the list of nodes on which we should check
9047 @return: the list of vm-capable nodes
9050 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9051 return [name for name in nodenames if name not in vm_nodes]
9054 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9055 """Hypervisor parameter validation.
9057 This function abstract the hypervisor parameter validation to be
9058 used in both instance create and instance modify.
9060 @type lu: L{LogicalUnit}
9061 @param lu: the logical unit for which we check
9062 @type nodenames: list
9063 @param nodenames: the list of nodes on which we should check
9064 @type hvname: string
9065 @param hvname: the name of the hypervisor we should use
9066 @type hvparams: dict
9067 @param hvparams: the parameters which we need to check
9068 @raise errors.OpPrereqError: if the parameters are not valid
9071 nodenames = _FilterVmNodes(lu, nodenames)
9073 cluster = lu.cfg.GetClusterInfo()
9074 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9076 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9077 for node in nodenames:
9081 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9084 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9085 """OS parameters validation.
9087 @type lu: L{LogicalUnit}
9088 @param lu: the logical unit for which we check
9089 @type required: boolean
9090 @param required: whether the validation should fail if the OS is not
9092 @type nodenames: list
9093 @param nodenames: the list of nodes on which we should check
9094 @type osname: string
9095 @param osname: the name of the hypervisor we should use
9096 @type osparams: dict
9097 @param osparams: the parameters which we need to check
9098 @raise errors.OpPrereqError: if the parameters are not valid
9101 nodenames = _FilterVmNodes(lu, nodenames)
9102 result = lu.rpc.call_os_validate(nodenames, required, osname,
9103 [constants.OS_VALIDATE_PARAMETERS],
9105 for node, nres in result.items():
9106 # we don't check for offline cases since this should be run only
9107 # against the master node and/or an instance's nodes
9108 nres.Raise("OS Parameters validation failed on node %s" % node)
9109 if not nres.payload:
9110 lu.LogInfo("OS %s not found on node %s, validation skipped",
9114 class LUInstanceCreate(LogicalUnit):
9115 """Create an instance.
9118 HPATH = "instance-add"
9119 HTYPE = constants.HTYPE_INSTANCE
9122 def CheckArguments(self):
9126 # do not require name_check to ease forward/backward compatibility
9128 if self.op.no_install and self.op.start:
9129 self.LogInfo("No-installation mode selected, disabling startup")
9130 self.op.start = False
9131 # validate/normalize the instance name
9132 self.op.instance_name = \
9133 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9135 if self.op.ip_check and not self.op.name_check:
9136 # TODO: make the ip check more flexible and not depend on the name check
9137 raise errors.OpPrereqError("Cannot do IP address check without a name"
9138 " check", errors.ECODE_INVAL)
9140 # check nics' parameter names
9141 for nic in self.op.nics:
9142 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9144 # check disks. parameter names and consistent adopt/no-adopt strategy
9145 has_adopt = has_no_adopt = False
9146 for disk in self.op.disks:
9147 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9148 if constants.IDISK_ADOPT in disk:
9152 if has_adopt and has_no_adopt:
9153 raise errors.OpPrereqError("Either all disks are adopted or none is",
9156 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9157 raise errors.OpPrereqError("Disk adoption is not supported for the"
9158 " '%s' disk template" %
9159 self.op.disk_template,
9161 if self.op.iallocator is not None:
9162 raise errors.OpPrereqError("Disk adoption not allowed with an"
9163 " iallocator script", errors.ECODE_INVAL)
9164 if self.op.mode == constants.INSTANCE_IMPORT:
9165 raise errors.OpPrereqError("Disk adoption not allowed for"
9166 " instance import", errors.ECODE_INVAL)
9168 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9169 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9170 " but no 'adopt' parameter given" %
9171 self.op.disk_template,
9174 self.adopt_disks = has_adopt
9176 # instance name verification
9177 if self.op.name_check:
9178 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9179 self.op.instance_name = self.hostname1.name
9180 # used in CheckPrereq for ip ping check
9181 self.check_ip = self.hostname1.ip
9183 self.check_ip = None
9185 # file storage checks
9186 if (self.op.file_driver and
9187 not self.op.file_driver in constants.FILE_DRIVER):
9188 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9189 self.op.file_driver, errors.ECODE_INVAL)
9191 if self.op.disk_template == constants.DT_FILE:
9192 opcodes.RequireFileStorage()
9193 elif self.op.disk_template == constants.DT_SHARED_FILE:
9194 opcodes.RequireSharedFileStorage()
9196 ### Node/iallocator related checks
9197 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9199 if self.op.pnode is not None:
9200 if self.op.disk_template in constants.DTS_INT_MIRROR:
9201 if self.op.snode is None:
9202 raise errors.OpPrereqError("The networked disk templates need"
9203 " a mirror node", errors.ECODE_INVAL)
9205 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9207 self.op.snode = None
9209 self._cds = _GetClusterDomainSecret()
9211 if self.op.mode == constants.INSTANCE_IMPORT:
9212 # On import force_variant must be True, because if we forced it at
9213 # initial install, our only chance when importing it back is that it
9215 self.op.force_variant = True
9217 if self.op.no_install:
9218 self.LogInfo("No-installation mode has no effect during import")
9220 elif self.op.mode == constants.INSTANCE_CREATE:
9221 if self.op.os_type is None:
9222 raise errors.OpPrereqError("No guest OS specified",
9224 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9225 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9226 " installation" % self.op.os_type,
9228 if self.op.disk_template is None:
9229 raise errors.OpPrereqError("No disk template specified",
9232 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9233 # Check handshake to ensure both clusters have the same domain secret
9234 src_handshake = self.op.source_handshake
9235 if not src_handshake:
9236 raise errors.OpPrereqError("Missing source handshake",
9239 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9242 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9245 # Load and check source CA
9246 self.source_x509_ca_pem = self.op.source_x509_ca
9247 if not self.source_x509_ca_pem:
9248 raise errors.OpPrereqError("Missing source X509 CA",
9252 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9254 except OpenSSL.crypto.Error, err:
9255 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9256 (err, ), errors.ECODE_INVAL)
9258 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9259 if errcode is not None:
9260 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9263 self.source_x509_ca = cert
9265 src_instance_name = self.op.source_instance_name
9266 if not src_instance_name:
9267 raise errors.OpPrereqError("Missing source instance name",
9270 self.source_instance_name = \
9271 netutils.GetHostname(name=src_instance_name).name
9274 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9275 self.op.mode, errors.ECODE_INVAL)
9277 def ExpandNames(self):
9278 """ExpandNames for CreateInstance.
9280 Figure out the right locks for instance creation.
9283 self.needed_locks = {}
9285 instance_name = self.op.instance_name
9286 # this is just a preventive check, but someone might still add this
9287 # instance in the meantime, and creation will fail at lock-add time
9288 if instance_name in self.cfg.GetInstanceList():
9289 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9290 instance_name, errors.ECODE_EXISTS)
9292 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9294 if self.op.iallocator:
9295 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9296 # specifying a group on instance creation and then selecting nodes from
9298 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9299 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9301 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9302 nodelist = [self.op.pnode]
9303 if self.op.snode is not None:
9304 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9305 nodelist.append(self.op.snode)
9306 self.needed_locks[locking.LEVEL_NODE] = nodelist
9307 # Lock resources of instance's primary and secondary nodes (copy to
9308 # prevent accidential modification)
9309 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9311 # in case of import lock the source node too
9312 if self.op.mode == constants.INSTANCE_IMPORT:
9313 src_node = self.op.src_node
9314 src_path = self.op.src_path
9316 if src_path is None:
9317 self.op.src_path = src_path = self.op.instance_name
9319 if src_node is None:
9320 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9321 self.op.src_node = None
9322 if os.path.isabs(src_path):
9323 raise errors.OpPrereqError("Importing an instance from a path"
9324 " requires a source node option",
9327 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9328 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9329 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9330 if not os.path.isabs(src_path):
9331 self.op.src_path = src_path = \
9332 utils.PathJoin(constants.EXPORT_DIR, src_path)
9334 def _RunAllocator(self):
9335 """Run the allocator based on input opcode.
9338 nics = [n.ToDict() for n in self.nics]
9339 ial = IAllocator(self.cfg, self.rpc,
9340 mode=constants.IALLOCATOR_MODE_ALLOC,
9341 name=self.op.instance_name,
9342 disk_template=self.op.disk_template,
9345 vcpus=self.be_full[constants.BE_VCPUS],
9346 memory=self.be_full[constants.BE_MAXMEM],
9349 hypervisor=self.op.hypervisor,
9352 ial.Run(self.op.iallocator)
9355 raise errors.OpPrereqError("Can't compute nodes using"
9356 " iallocator '%s': %s" %
9357 (self.op.iallocator, ial.info),
9359 if len(ial.result) != ial.required_nodes:
9360 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9361 " of nodes (%s), required %s" %
9362 (self.op.iallocator, len(ial.result),
9363 ial.required_nodes), errors.ECODE_FAULT)
9364 self.op.pnode = ial.result[0]
9365 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9366 self.op.instance_name, self.op.iallocator,
9367 utils.CommaJoin(ial.result))
9368 if ial.required_nodes == 2:
9369 self.op.snode = ial.result[1]
9371 def BuildHooksEnv(self):
9374 This runs on master, primary and secondary nodes of the instance.
9378 "ADD_MODE": self.op.mode,
9380 if self.op.mode == constants.INSTANCE_IMPORT:
9381 env["SRC_NODE"] = self.op.src_node
9382 env["SRC_PATH"] = self.op.src_path
9383 env["SRC_IMAGES"] = self.src_images
9385 env.update(_BuildInstanceHookEnv(
9386 name=self.op.instance_name,
9387 primary_node=self.op.pnode,
9388 secondary_nodes=self.secondaries,
9389 status=self.op.start,
9390 os_type=self.op.os_type,
9391 minmem=self.be_full[constants.BE_MINMEM],
9392 maxmem=self.be_full[constants.BE_MAXMEM],
9393 vcpus=self.be_full[constants.BE_VCPUS],
9394 nics=_NICListToTuple(self, self.nics),
9395 disk_template=self.op.disk_template,
9396 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9397 for d in self.disks],
9400 hypervisor_name=self.op.hypervisor,
9406 def BuildHooksNodes(self):
9407 """Build hooks nodes.
9410 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9413 def _ReadExportInfo(self):
9414 """Reads the export information from disk.
9416 It will override the opcode source node and path with the actual
9417 information, if these two were not specified before.
9419 @return: the export information
9422 assert self.op.mode == constants.INSTANCE_IMPORT
9424 src_node = self.op.src_node
9425 src_path = self.op.src_path
9427 if src_node is None:
9428 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9429 exp_list = self.rpc.call_export_list(locked_nodes)
9431 for node in exp_list:
9432 if exp_list[node].fail_msg:
9434 if src_path in exp_list[node].payload:
9436 self.op.src_node = src_node = node
9437 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9441 raise errors.OpPrereqError("No export found for relative path %s" %
9442 src_path, errors.ECODE_INVAL)
9444 _CheckNodeOnline(self, src_node)
9445 result = self.rpc.call_export_info(src_node, src_path)
9446 result.Raise("No export or invalid export found in dir %s" % src_path)
9448 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9449 if not export_info.has_section(constants.INISECT_EXP):
9450 raise errors.ProgrammerError("Corrupted export config",
9451 errors.ECODE_ENVIRON)
9453 ei_version = export_info.get(constants.INISECT_EXP, "version")
9454 if (int(ei_version) != constants.EXPORT_VERSION):
9455 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9456 (ei_version, constants.EXPORT_VERSION),
9457 errors.ECODE_ENVIRON)
9460 def _ReadExportParams(self, einfo):
9461 """Use export parameters as defaults.
9463 In case the opcode doesn't specify (as in override) some instance
9464 parameters, then try to use them from the export information, if
9468 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9470 if self.op.disk_template is None:
9471 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9472 self.op.disk_template = einfo.get(constants.INISECT_INS,
9474 if self.op.disk_template not in constants.DISK_TEMPLATES:
9475 raise errors.OpPrereqError("Disk template specified in configuration"
9476 " file is not one of the allowed values:"
9477 " %s" % " ".join(constants.DISK_TEMPLATES))
9479 raise errors.OpPrereqError("No disk template specified and the export"
9480 " is missing the disk_template information",
9483 if not self.op.disks:
9485 # TODO: import the disk iv_name too
9486 for idx in range(constants.MAX_DISKS):
9487 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9488 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9489 disks.append({constants.IDISK_SIZE: disk_sz})
9490 self.op.disks = disks
9491 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9492 raise errors.OpPrereqError("No disk info specified and the export"
9493 " is missing the disk information",
9496 if not self.op.nics:
9498 for idx in range(constants.MAX_NICS):
9499 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9501 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9502 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9509 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9510 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9512 if (self.op.hypervisor is None and
9513 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9514 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9516 if einfo.has_section(constants.INISECT_HYP):
9517 # use the export parameters but do not override the ones
9518 # specified by the user
9519 for name, value in einfo.items(constants.INISECT_HYP):
9520 if name not in self.op.hvparams:
9521 self.op.hvparams[name] = value
9523 if einfo.has_section(constants.INISECT_BEP):
9524 # use the parameters, without overriding
9525 for name, value in einfo.items(constants.INISECT_BEP):
9526 if name not in self.op.beparams:
9527 self.op.beparams[name] = value
9528 # Compatibility for the old "memory" be param
9529 if name == constants.BE_MEMORY:
9530 if constants.BE_MAXMEM not in self.op.beparams:
9531 self.op.beparams[constants.BE_MAXMEM] = value
9532 if constants.BE_MINMEM not in self.op.beparams:
9533 self.op.beparams[constants.BE_MINMEM] = value
9535 # try to read the parameters old style, from the main section
9536 for name in constants.BES_PARAMETERS:
9537 if (name not in self.op.beparams and
9538 einfo.has_option(constants.INISECT_INS, name)):
9539 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9541 if einfo.has_section(constants.INISECT_OSP):
9542 # use the parameters, without overriding
9543 for name, value in einfo.items(constants.INISECT_OSP):
9544 if name not in self.op.osparams:
9545 self.op.osparams[name] = value
9547 def _RevertToDefaults(self, cluster):
9548 """Revert the instance parameters to the default values.
9552 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9553 for name in self.op.hvparams.keys():
9554 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9555 del self.op.hvparams[name]
9557 be_defs = cluster.SimpleFillBE({})
9558 for name in self.op.beparams.keys():
9559 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9560 del self.op.beparams[name]
9562 nic_defs = cluster.SimpleFillNIC({})
9563 for nic in self.op.nics:
9564 for name in constants.NICS_PARAMETERS:
9565 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9568 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9569 for name in self.op.osparams.keys():
9570 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9571 del self.op.osparams[name]
9573 def _CalculateFileStorageDir(self):
9574 """Calculate final instance file storage dir.
9577 # file storage dir calculation/check
9578 self.instance_file_storage_dir = None
9579 if self.op.disk_template in constants.DTS_FILEBASED:
9580 # build the full file storage dir path
9583 if self.op.disk_template == constants.DT_SHARED_FILE:
9584 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9586 get_fsd_fn = self.cfg.GetFileStorageDir
9588 cfg_storagedir = get_fsd_fn()
9589 if not cfg_storagedir:
9590 raise errors.OpPrereqError("Cluster file storage dir not defined")
9591 joinargs.append(cfg_storagedir)
9593 if self.op.file_storage_dir is not None:
9594 joinargs.append(self.op.file_storage_dir)
9596 joinargs.append(self.op.instance_name)
9598 # pylint: disable=W0142
9599 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9601 def CheckPrereq(self): # pylint: disable=R0914
9602 """Check prerequisites.
9605 self._CalculateFileStorageDir()
9607 if self.op.mode == constants.INSTANCE_IMPORT:
9608 export_info = self._ReadExportInfo()
9609 self._ReadExportParams(export_info)
9611 if (not self.cfg.GetVGName() and
9612 self.op.disk_template not in constants.DTS_NOT_LVM):
9613 raise errors.OpPrereqError("Cluster does not support lvm-based"
9614 " instances", errors.ECODE_STATE)
9616 if (self.op.hypervisor is None or
9617 self.op.hypervisor == constants.VALUE_AUTO):
9618 self.op.hypervisor = self.cfg.GetHypervisorType()
9620 cluster = self.cfg.GetClusterInfo()
9621 enabled_hvs = cluster.enabled_hypervisors
9622 if self.op.hypervisor not in enabled_hvs:
9623 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9624 " cluster (%s)" % (self.op.hypervisor,
9625 ",".join(enabled_hvs)),
9628 # Check tag validity
9629 for tag in self.op.tags:
9630 objects.TaggableObject.ValidateTag(tag)
9632 # check hypervisor parameter syntax (locally)
9633 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9634 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9636 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9637 hv_type.CheckParameterSyntax(filled_hvp)
9638 self.hv_full = filled_hvp
9639 # check that we don't specify global parameters on an instance
9640 _CheckGlobalHvParams(self.op.hvparams)
9642 # fill and remember the beparams dict
9643 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9644 for param, value in self.op.beparams.iteritems():
9645 if value == constants.VALUE_AUTO:
9646 self.op.beparams[param] = default_beparams[param]
9647 objects.UpgradeBeParams(self.op.beparams)
9648 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9649 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9651 # build os parameters
9652 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9654 # now that hvp/bep are in final format, let's reset to defaults,
9656 if self.op.identify_defaults:
9657 self._RevertToDefaults(cluster)
9661 for idx, nic in enumerate(self.op.nics):
9662 nic_mode_req = nic.get(constants.INIC_MODE, None)
9663 nic_mode = nic_mode_req
9664 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9665 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9667 # in routed mode, for the first nic, the default ip is 'auto'
9668 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9669 default_ip_mode = constants.VALUE_AUTO
9671 default_ip_mode = constants.VALUE_NONE
9673 # ip validity checks
9674 ip = nic.get(constants.INIC_IP, default_ip_mode)
9675 if ip is None or ip.lower() == constants.VALUE_NONE:
9677 elif ip.lower() == constants.VALUE_AUTO:
9678 if not self.op.name_check:
9679 raise errors.OpPrereqError("IP address set to auto but name checks"
9680 " have been skipped",
9682 nic_ip = self.hostname1.ip
9684 if not netutils.IPAddress.IsValid(ip):
9685 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9689 # TODO: check the ip address for uniqueness
9690 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9691 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9694 # MAC address verification
9695 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9696 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9697 mac = utils.NormalizeAndValidateMac(mac)
9700 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9701 except errors.ReservationError:
9702 raise errors.OpPrereqError("MAC address %s already in use"
9703 " in cluster" % mac,
9704 errors.ECODE_NOTUNIQUE)
9706 # Build nic parameters
9707 link = nic.get(constants.INIC_LINK, None)
9708 if link == constants.VALUE_AUTO:
9709 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9712 nicparams[constants.NIC_MODE] = nic_mode
9714 nicparams[constants.NIC_LINK] = link
9716 check_params = cluster.SimpleFillNIC(nicparams)
9717 objects.NIC.CheckParameterSyntax(check_params)
9718 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9720 # disk checks/pre-build
9721 default_vg = self.cfg.GetVGName()
9723 for disk in self.op.disks:
9724 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9725 if mode not in constants.DISK_ACCESS_SET:
9726 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9727 mode, errors.ECODE_INVAL)
9728 size = disk.get(constants.IDISK_SIZE, None)
9730 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9733 except (TypeError, ValueError):
9734 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9737 data_vg = disk.get(constants.IDISK_VG, default_vg)
9739 constants.IDISK_SIZE: size,
9740 constants.IDISK_MODE: mode,
9741 constants.IDISK_VG: data_vg,
9743 if constants.IDISK_METAVG in disk:
9744 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9745 if constants.IDISK_ADOPT in disk:
9746 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9747 self.disks.append(new_disk)
9749 if self.op.mode == constants.INSTANCE_IMPORT:
9751 for idx in range(len(self.disks)):
9752 option = "disk%d_dump" % idx
9753 if export_info.has_option(constants.INISECT_INS, option):
9754 # FIXME: are the old os-es, disk sizes, etc. useful?
9755 export_name = export_info.get(constants.INISECT_INS, option)
9756 image = utils.PathJoin(self.op.src_path, export_name)
9757 disk_images.append(image)
9759 disk_images.append(False)
9761 self.src_images = disk_images
9763 old_name = export_info.get(constants.INISECT_INS, "name")
9764 if self.op.instance_name == old_name:
9765 for idx, nic in enumerate(self.nics):
9766 if nic.mac == constants.VALUE_AUTO:
9767 nic_mac_ini = "nic%d_mac" % idx
9768 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9770 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9772 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9773 if self.op.ip_check:
9774 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9775 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9776 (self.check_ip, self.op.instance_name),
9777 errors.ECODE_NOTUNIQUE)
9779 #### mac address generation
9780 # By generating here the mac address both the allocator and the hooks get
9781 # the real final mac address rather than the 'auto' or 'generate' value.
9782 # There is a race condition between the generation and the instance object
9783 # creation, which means that we know the mac is valid now, but we're not
9784 # sure it will be when we actually add the instance. If things go bad
9785 # adding the instance will abort because of a duplicate mac, and the
9786 # creation job will fail.
9787 for nic in self.nics:
9788 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9789 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9793 if self.op.iallocator is not None:
9794 self._RunAllocator()
9796 # Release all unneeded node locks
9797 _ReleaseLocks(self, locking.LEVEL_NODE,
9798 keep=filter(None, [self.op.pnode, self.op.snode,
9800 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9801 keep=filter(None, [self.op.pnode, self.op.snode,
9804 #### node related checks
9806 # check primary node
9807 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9808 assert self.pnode is not None, \
9809 "Cannot retrieve locked node %s" % self.op.pnode
9811 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9812 pnode.name, errors.ECODE_STATE)
9814 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9815 pnode.name, errors.ECODE_STATE)
9816 if not pnode.vm_capable:
9817 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9818 " '%s'" % pnode.name, errors.ECODE_STATE)
9820 self.secondaries = []
9822 # mirror node verification
9823 if self.op.disk_template in constants.DTS_INT_MIRROR:
9824 if self.op.snode == pnode.name:
9825 raise errors.OpPrereqError("The secondary node cannot be the"
9826 " primary node", errors.ECODE_INVAL)
9827 _CheckNodeOnline(self, self.op.snode)
9828 _CheckNodeNotDrained(self, self.op.snode)
9829 _CheckNodeVmCapable(self, self.op.snode)
9830 self.secondaries.append(self.op.snode)
9832 snode = self.cfg.GetNodeInfo(self.op.snode)
9833 if pnode.group != snode.group:
9834 self.LogWarning("The primary and secondary nodes are in two"
9835 " different node groups; the disk parameters"
9836 " from the first disk's node group will be"
9839 nodenames = [pnode.name] + self.secondaries
9841 # Verify instance specs
9843 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9844 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9845 constants.ISPEC_DISK_COUNT: len(self.disks),
9846 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9847 constants.ISPEC_NIC_COUNT: len(self.nics),
9850 group_info = self.cfg.GetNodeGroup(pnode.group)
9851 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9852 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9853 if not self.op.ignore_ipolicy and res:
9854 raise errors.OpPrereqError(("Instance allocation to group %s violates"
9855 " policy: %s") % (pnode.group,
9856 utils.CommaJoin(res)),
9859 # disk parameters (not customizable at instance or node level)
9860 # just use the primary node parameters, ignoring the secondary.
9861 self.diskparams = group_info.diskparams
9863 if not self.adopt_disks:
9864 if self.op.disk_template == constants.DT_RBD:
9865 # _CheckRADOSFreeSpace() is just a placeholder.
9866 # Any function that checks prerequisites can be placed here.
9867 # Check if there is enough space on the RADOS cluster.
9868 _CheckRADOSFreeSpace()
9870 # Check lv size requirements, if not adopting
9871 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9872 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9874 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9875 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9876 disk[constants.IDISK_ADOPT])
9877 for disk in self.disks])
9878 if len(all_lvs) != len(self.disks):
9879 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9881 for lv_name in all_lvs:
9883 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9884 # to ReserveLV uses the same syntax
9885 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9886 except errors.ReservationError:
9887 raise errors.OpPrereqError("LV named %s used by another instance" %
9888 lv_name, errors.ECODE_NOTUNIQUE)
9890 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9891 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9893 node_lvs = self.rpc.call_lv_list([pnode.name],
9894 vg_names.payload.keys())[pnode.name]
9895 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9896 node_lvs = node_lvs.payload
9898 delta = all_lvs.difference(node_lvs.keys())
9900 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9901 utils.CommaJoin(delta),
9903 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9905 raise errors.OpPrereqError("Online logical volumes found, cannot"
9906 " adopt: %s" % utils.CommaJoin(online_lvs),
9908 # update the size of disk based on what is found
9909 for dsk in self.disks:
9910 dsk[constants.IDISK_SIZE] = \
9911 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9912 dsk[constants.IDISK_ADOPT])][0]))
9914 elif self.op.disk_template == constants.DT_BLOCK:
9915 # Normalize and de-duplicate device paths
9916 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9917 for disk in self.disks])
9918 if len(all_disks) != len(self.disks):
9919 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9921 baddisks = [d for d in all_disks
9922 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9924 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9925 " cannot be adopted" %
9926 (", ".join(baddisks),
9927 constants.ADOPTABLE_BLOCKDEV_ROOT),
9930 node_disks = self.rpc.call_bdev_sizes([pnode.name],
9931 list(all_disks))[pnode.name]
9932 node_disks.Raise("Cannot get block device information from node %s" %
9934 node_disks = node_disks.payload
9935 delta = all_disks.difference(node_disks.keys())
9937 raise errors.OpPrereqError("Missing block device(s): %s" %
9938 utils.CommaJoin(delta),
9940 for dsk in self.disks:
9941 dsk[constants.IDISK_SIZE] = \
9942 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9944 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9946 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9947 # check OS parameters (remotely)
9948 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9950 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9952 # memory check on primary node
9953 #TODO(dynmem): use MINMEM for checking
9955 _CheckNodeFreeMemory(self, self.pnode.name,
9956 "creating instance %s" % self.op.instance_name,
9957 self.be_full[constants.BE_MAXMEM],
9960 self.dry_run_result = list(nodenames)
9962 def Exec(self, feedback_fn):
9963 """Create and add the instance to the cluster.
9966 instance = self.op.instance_name
9967 pnode_name = self.pnode.name
9969 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9970 self.owned_locks(locking.LEVEL_NODE)), \
9971 "Node locks differ from node resource locks"
9973 ht_kind = self.op.hypervisor
9974 if ht_kind in constants.HTS_REQ_PORT:
9975 network_port = self.cfg.AllocatePort()
9979 disks = _GenerateDiskTemplate(self,
9980 self.op.disk_template,
9981 instance, pnode_name,
9984 self.instance_file_storage_dir,
9985 self.op.file_driver,
9990 iobj = objects.Instance(name=instance, os=self.op.os_type,
9991 primary_node=pnode_name,
9992 nics=self.nics, disks=disks,
9993 disk_template=self.op.disk_template,
9994 admin_state=constants.ADMINST_DOWN,
9995 network_port=network_port,
9996 beparams=self.op.beparams,
9997 hvparams=self.op.hvparams,
9998 hypervisor=self.op.hypervisor,
9999 osparams=self.op.osparams,
10003 for tag in self.op.tags:
10006 if self.adopt_disks:
10007 if self.op.disk_template == constants.DT_PLAIN:
10008 # rename LVs to the newly-generated names; we need to construct
10009 # 'fake' LV disks with the old data, plus the new unique_id
10010 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10012 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10013 rename_to.append(t_dsk.logical_id)
10014 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10015 self.cfg.SetDiskID(t_dsk, pnode_name)
10016 result = self.rpc.call_blockdev_rename(pnode_name,
10017 zip(tmp_disks, rename_to))
10018 result.Raise("Failed to rename adoped LVs")
10020 feedback_fn("* creating instance disks...")
10022 _CreateDisks(self, iobj)
10023 except errors.OpExecError:
10024 self.LogWarning("Device creation failed, reverting...")
10026 _RemoveDisks(self, iobj)
10028 self.cfg.ReleaseDRBDMinors(instance)
10031 feedback_fn("adding instance %s to cluster config" % instance)
10033 self.cfg.AddInstance(iobj, self.proc.GetECId())
10035 # Declare that we don't want to remove the instance lock anymore, as we've
10036 # added the instance to the config
10037 del self.remove_locks[locking.LEVEL_INSTANCE]
10039 if self.op.mode == constants.INSTANCE_IMPORT:
10040 # Release unused nodes
10041 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10043 # Release all nodes
10044 _ReleaseLocks(self, locking.LEVEL_NODE)
10047 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10048 feedback_fn("* wiping instance disks...")
10050 _WipeDisks(self, iobj)
10051 except errors.OpExecError, err:
10052 logging.exception("Wiping disks failed")
10053 self.LogWarning("Wiping instance disks failed (%s)", err)
10057 # Something is already wrong with the disks, don't do anything else
10059 elif self.op.wait_for_sync:
10060 disk_abort = not _WaitForSync(self, iobj)
10061 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10062 # make sure the disks are not degraded (still sync-ing is ok)
10063 feedback_fn("* checking mirrors status")
10064 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10069 _RemoveDisks(self, iobj)
10070 self.cfg.RemoveInstance(iobj.name)
10071 # Make sure the instance lock gets removed
10072 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10073 raise errors.OpExecError("There are some degraded disks for"
10076 # Release all node resource locks
10077 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10079 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10080 if self.op.mode == constants.INSTANCE_CREATE:
10081 if not self.op.no_install:
10082 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10083 not self.op.wait_for_sync)
10085 feedback_fn("* pausing disk sync to install instance OS")
10086 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10088 for idx, success in enumerate(result.payload):
10090 logging.warn("pause-sync of instance %s for disk %d failed",
10093 feedback_fn("* running the instance OS create scripts...")
10094 # FIXME: pass debug option from opcode to backend
10096 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10097 self.op.debug_level)
10099 feedback_fn("* resuming disk sync")
10100 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10102 for idx, success in enumerate(result.payload):
10104 logging.warn("resume-sync of instance %s for disk %d failed",
10107 os_add_result.Raise("Could not add os for instance %s"
10108 " on node %s" % (instance, pnode_name))
10110 elif self.op.mode == constants.INSTANCE_IMPORT:
10111 feedback_fn("* running the instance OS import scripts...")
10115 for idx, image in enumerate(self.src_images):
10119 # FIXME: pass debug option from opcode to backend
10120 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10121 constants.IEIO_FILE, (image, ),
10122 constants.IEIO_SCRIPT,
10123 (iobj.disks[idx], idx),
10125 transfers.append(dt)
10128 masterd.instance.TransferInstanceData(self, feedback_fn,
10129 self.op.src_node, pnode_name,
10130 self.pnode.secondary_ip,
10132 if not compat.all(import_result):
10133 self.LogWarning("Some disks for instance %s on node %s were not"
10134 " imported successfully" % (instance, pnode_name))
10136 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10137 feedback_fn("* preparing remote import...")
10138 # The source cluster will stop the instance before attempting to make a
10139 # connection. In some cases stopping an instance can take a long time,
10140 # hence the shutdown timeout is added to the connection timeout.
10141 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10142 self.op.source_shutdown_timeout)
10143 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10145 assert iobj.primary_node == self.pnode.name
10147 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10148 self.source_x509_ca,
10149 self._cds, timeouts)
10150 if not compat.all(disk_results):
10151 # TODO: Should the instance still be started, even if some disks
10152 # failed to import (valid for local imports, too)?
10153 self.LogWarning("Some disks for instance %s on node %s were not"
10154 " imported successfully" % (instance, pnode_name))
10156 # Run rename script on newly imported instance
10157 assert iobj.name == instance
10158 feedback_fn("Running rename script for %s" % instance)
10159 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10160 self.source_instance_name,
10161 self.op.debug_level)
10162 if result.fail_msg:
10163 self.LogWarning("Failed to run rename script for %s on node"
10164 " %s: %s" % (instance, pnode_name, result.fail_msg))
10167 # also checked in the prereq part
10168 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10171 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10174 iobj.admin_state = constants.ADMINST_UP
10175 self.cfg.Update(iobj, feedback_fn)
10176 logging.info("Starting instance %s on node %s", instance, pnode_name)
10177 feedback_fn("* starting instance...")
10178 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10180 result.Raise("Could not start instance")
10182 return list(iobj.all_nodes)
10185 def _CheckRADOSFreeSpace():
10186 """Compute disk size requirements inside the RADOS cluster.
10189 # For the RADOS cluster we assume there is always enough space.
10193 class LUInstanceConsole(NoHooksLU):
10194 """Connect to an instance's console.
10196 This is somewhat special in that it returns the command line that
10197 you need to run on the master node in order to connect to the
10203 def ExpandNames(self):
10204 self.share_locks = _ShareAll()
10205 self._ExpandAndLockInstance()
10207 def CheckPrereq(self):
10208 """Check prerequisites.
10210 This checks that the instance is in the cluster.
10213 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10214 assert self.instance is not None, \
10215 "Cannot retrieve locked instance %s" % self.op.instance_name
10216 _CheckNodeOnline(self, self.instance.primary_node)
10218 def Exec(self, feedback_fn):
10219 """Connect to the console of an instance
10222 instance = self.instance
10223 node = instance.primary_node
10225 node_insts = self.rpc.call_instance_list([node],
10226 [instance.hypervisor])[node]
10227 node_insts.Raise("Can't get node information from %s" % node)
10229 if instance.name not in node_insts.payload:
10230 if instance.admin_state == constants.ADMINST_UP:
10231 state = constants.INSTST_ERRORDOWN
10232 elif instance.admin_state == constants.ADMINST_DOWN:
10233 state = constants.INSTST_ADMINDOWN
10235 state = constants.INSTST_ADMINOFFLINE
10236 raise errors.OpExecError("Instance %s is not running (state %s)" %
10237 (instance.name, state))
10239 logging.debug("Connecting to console of %s on %s", instance.name, node)
10241 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10244 def _GetInstanceConsole(cluster, instance):
10245 """Returns console information for an instance.
10247 @type cluster: L{objects.Cluster}
10248 @type instance: L{objects.Instance}
10252 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10253 # beparams and hvparams are passed separately, to avoid editing the
10254 # instance and then saving the defaults in the instance itself.
10255 hvparams = cluster.FillHV(instance)
10256 beparams = cluster.FillBE(instance)
10257 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10259 assert console.instance == instance.name
10260 assert console.Validate()
10262 return console.ToDict()
10265 class LUInstanceReplaceDisks(LogicalUnit):
10266 """Replace the disks of an instance.
10269 HPATH = "mirrors-replace"
10270 HTYPE = constants.HTYPE_INSTANCE
10273 def CheckArguments(self):
10274 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10275 self.op.iallocator)
10277 def ExpandNames(self):
10278 self._ExpandAndLockInstance()
10280 assert locking.LEVEL_NODE not in self.needed_locks
10281 assert locking.LEVEL_NODE_RES not in self.needed_locks
10282 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10284 assert self.op.iallocator is None or self.op.remote_node is None, \
10285 "Conflicting options"
10287 if self.op.remote_node is not None:
10288 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10290 # Warning: do not remove the locking of the new secondary here
10291 # unless DRBD8.AddChildren is changed to work in parallel;
10292 # currently it doesn't since parallel invocations of
10293 # FindUnusedMinor will conflict
10294 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10295 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10297 self.needed_locks[locking.LEVEL_NODE] = []
10298 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10300 if self.op.iallocator is not None:
10301 # iallocator will select a new node in the same group
10302 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10304 self.needed_locks[locking.LEVEL_NODE_RES] = []
10306 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10307 self.op.iallocator, self.op.remote_node,
10308 self.op.disks, False, self.op.early_release,
10309 self.op.ignore_ipolicy)
10311 self.tasklets = [self.replacer]
10313 def DeclareLocks(self, level):
10314 if level == locking.LEVEL_NODEGROUP:
10315 assert self.op.remote_node is None
10316 assert self.op.iallocator is not None
10317 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10319 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10320 # Lock all groups used by instance optimistically; this requires going
10321 # via the node before it's locked, requiring verification later on
10322 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10323 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10325 elif level == locking.LEVEL_NODE:
10326 if self.op.iallocator is not None:
10327 assert self.op.remote_node is None
10328 assert not self.needed_locks[locking.LEVEL_NODE]
10330 # Lock member nodes of all locked groups
10331 self.needed_locks[locking.LEVEL_NODE] = [node_name
10332 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10333 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10335 self._LockInstancesNodes()
10336 elif level == locking.LEVEL_NODE_RES:
10338 self.needed_locks[locking.LEVEL_NODE_RES] = \
10339 self.needed_locks[locking.LEVEL_NODE]
10341 def BuildHooksEnv(self):
10342 """Build hooks env.
10344 This runs on the master, the primary and all the secondaries.
10347 instance = self.replacer.instance
10349 "MODE": self.op.mode,
10350 "NEW_SECONDARY": self.op.remote_node,
10351 "OLD_SECONDARY": instance.secondary_nodes[0],
10353 env.update(_BuildInstanceHookEnvByObject(self, instance))
10356 def BuildHooksNodes(self):
10357 """Build hooks nodes.
10360 instance = self.replacer.instance
10362 self.cfg.GetMasterNode(),
10363 instance.primary_node,
10365 if self.op.remote_node is not None:
10366 nl.append(self.op.remote_node)
10369 def CheckPrereq(self):
10370 """Check prerequisites.
10373 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10374 self.op.iallocator is None)
10376 # Verify if node group locks are still correct
10377 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10379 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10381 return LogicalUnit.CheckPrereq(self)
10384 class TLReplaceDisks(Tasklet):
10385 """Replaces disks for an instance.
10387 Note: Locking is not within the scope of this class.
10390 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10391 disks, delay_iallocator, early_release, ignore_ipolicy):
10392 """Initializes this class.
10395 Tasklet.__init__(self, lu)
10398 self.instance_name = instance_name
10400 self.iallocator_name = iallocator_name
10401 self.remote_node = remote_node
10403 self.delay_iallocator = delay_iallocator
10404 self.early_release = early_release
10405 self.ignore_ipolicy = ignore_ipolicy
10408 self.instance = None
10409 self.new_node = None
10410 self.target_node = None
10411 self.other_node = None
10412 self.remote_node_info = None
10413 self.node_secondary_ip = None
10416 def CheckArguments(mode, remote_node, iallocator):
10417 """Helper function for users of this class.
10420 # check for valid parameter combination
10421 if mode == constants.REPLACE_DISK_CHG:
10422 if remote_node is None and iallocator is None:
10423 raise errors.OpPrereqError("When changing the secondary either an"
10424 " iallocator script must be used or the"
10425 " new node given", errors.ECODE_INVAL)
10427 if remote_node is not None and iallocator is not None:
10428 raise errors.OpPrereqError("Give either the iallocator or the new"
10429 " secondary, not both", errors.ECODE_INVAL)
10431 elif remote_node is not None or iallocator is not None:
10432 # Not replacing the secondary
10433 raise errors.OpPrereqError("The iallocator and new node options can"
10434 " only be used when changing the"
10435 " secondary node", errors.ECODE_INVAL)
10438 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10439 """Compute a new secondary node using an IAllocator.
10442 ial = IAllocator(lu.cfg, lu.rpc,
10443 mode=constants.IALLOCATOR_MODE_RELOC,
10444 name=instance_name,
10445 relocate_from=list(relocate_from))
10447 ial.Run(iallocator_name)
10449 if not ial.success:
10450 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10451 " %s" % (iallocator_name, ial.info),
10452 errors.ECODE_NORES)
10454 if len(ial.result) != ial.required_nodes:
10455 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10456 " of nodes (%s), required %s" %
10458 len(ial.result), ial.required_nodes),
10459 errors.ECODE_FAULT)
10461 remote_node_name = ial.result[0]
10463 lu.LogInfo("Selected new secondary for instance '%s': %s",
10464 instance_name, remote_node_name)
10466 return remote_node_name
10468 def _FindFaultyDisks(self, node_name):
10469 """Wrapper for L{_FindFaultyInstanceDisks}.
10472 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10475 def _CheckDisksActivated(self, instance):
10476 """Checks if the instance disks are activated.
10478 @param instance: The instance to check disks
10479 @return: True if they are activated, False otherwise
10482 nodes = instance.all_nodes
10484 for idx, dev in enumerate(instance.disks):
10486 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10487 self.cfg.SetDiskID(dev, node)
10489 result = self.rpc.call_blockdev_find(node, dev)
10493 elif result.fail_msg or not result.payload:
10498 def CheckPrereq(self):
10499 """Check prerequisites.
10501 This checks that the instance is in the cluster.
10504 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10505 assert instance is not None, \
10506 "Cannot retrieve locked instance %s" % self.instance_name
10508 if instance.disk_template != constants.DT_DRBD8:
10509 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10510 " instances", errors.ECODE_INVAL)
10512 if len(instance.secondary_nodes) != 1:
10513 raise errors.OpPrereqError("The instance has a strange layout,"
10514 " expected one secondary but found %d" %
10515 len(instance.secondary_nodes),
10516 errors.ECODE_FAULT)
10518 if not self.delay_iallocator:
10519 self._CheckPrereq2()
10521 def _CheckPrereq2(self):
10522 """Check prerequisites, second part.
10524 This function should always be part of CheckPrereq. It was separated and is
10525 now called from Exec because during node evacuation iallocator was only
10526 called with an unmodified cluster model, not taking planned changes into
10530 instance = self.instance
10531 secondary_node = instance.secondary_nodes[0]
10533 if self.iallocator_name is None:
10534 remote_node = self.remote_node
10536 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10537 instance.name, instance.secondary_nodes)
10539 if remote_node is None:
10540 self.remote_node_info = None
10542 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10543 "Remote node '%s' is not locked" % remote_node
10545 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10546 assert self.remote_node_info is not None, \
10547 "Cannot retrieve locked node %s" % remote_node
10549 if remote_node == self.instance.primary_node:
10550 raise errors.OpPrereqError("The specified node is the primary node of"
10551 " the instance", errors.ECODE_INVAL)
10553 if remote_node == secondary_node:
10554 raise errors.OpPrereqError("The specified node is already the"
10555 " secondary node of the instance",
10556 errors.ECODE_INVAL)
10558 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10559 constants.REPLACE_DISK_CHG):
10560 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10561 errors.ECODE_INVAL)
10563 if self.mode == constants.REPLACE_DISK_AUTO:
10564 if not self._CheckDisksActivated(instance):
10565 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10566 " first" % self.instance_name,
10567 errors.ECODE_STATE)
10568 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10569 faulty_secondary = self._FindFaultyDisks(secondary_node)
10571 if faulty_primary and faulty_secondary:
10572 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10573 " one node and can not be repaired"
10574 " automatically" % self.instance_name,
10575 errors.ECODE_STATE)
10578 self.disks = faulty_primary
10579 self.target_node = instance.primary_node
10580 self.other_node = secondary_node
10581 check_nodes = [self.target_node, self.other_node]
10582 elif faulty_secondary:
10583 self.disks = faulty_secondary
10584 self.target_node = secondary_node
10585 self.other_node = instance.primary_node
10586 check_nodes = [self.target_node, self.other_node]
10592 # Non-automatic modes
10593 if self.mode == constants.REPLACE_DISK_PRI:
10594 self.target_node = instance.primary_node
10595 self.other_node = secondary_node
10596 check_nodes = [self.target_node, self.other_node]
10598 elif self.mode == constants.REPLACE_DISK_SEC:
10599 self.target_node = secondary_node
10600 self.other_node = instance.primary_node
10601 check_nodes = [self.target_node, self.other_node]
10603 elif self.mode == constants.REPLACE_DISK_CHG:
10604 self.new_node = remote_node
10605 self.other_node = instance.primary_node
10606 self.target_node = secondary_node
10607 check_nodes = [self.new_node, self.other_node]
10609 _CheckNodeNotDrained(self.lu, remote_node)
10610 _CheckNodeVmCapable(self.lu, remote_node)
10612 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10613 assert old_node_info is not None
10614 if old_node_info.offline and not self.early_release:
10615 # doesn't make sense to delay the release
10616 self.early_release = True
10617 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10618 " early-release mode", secondary_node)
10621 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10624 # If not specified all disks should be replaced
10626 self.disks = range(len(self.instance.disks))
10628 # TODO: This is ugly, but right now we can't distinguish between internal
10629 # submitted opcode and external one. We should fix that.
10630 if self.remote_node_info:
10631 # We change the node, lets verify it still meets instance policy
10632 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10633 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10635 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10636 ignore=self.ignore_ipolicy)
10638 # TODO: compute disk parameters
10639 primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10640 secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10641 if primary_node_info.group != secondary_node_info.group:
10642 self.lu.LogInfo("The instance primary and secondary nodes are in two"
10643 " different node groups; the disk parameters of the"
10644 " primary node's group will be applied.")
10646 self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10648 for node in check_nodes:
10649 _CheckNodeOnline(self.lu, node)
10651 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10654 if node_name is not None)
10656 # Release unneeded node and node resource locks
10657 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10658 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10660 # Release any owned node group
10661 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10662 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10664 # Check whether disks are valid
10665 for disk_idx in self.disks:
10666 instance.FindDisk(disk_idx)
10668 # Get secondary node IP addresses
10669 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10670 in self.cfg.GetMultiNodeInfo(touched_nodes))
10672 def Exec(self, feedback_fn):
10673 """Execute disk replacement.
10675 This dispatches the disk replacement to the appropriate handler.
10678 if self.delay_iallocator:
10679 self._CheckPrereq2()
10682 # Verify owned locks before starting operation
10683 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10684 assert set(owned_nodes) == set(self.node_secondary_ip), \
10685 ("Incorrect node locks, owning %s, expected %s" %
10686 (owned_nodes, self.node_secondary_ip.keys()))
10687 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10688 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10690 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10691 assert list(owned_instances) == [self.instance_name], \
10692 "Instance '%s' not locked" % self.instance_name
10694 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10695 "Should not own any node group lock at this point"
10698 feedback_fn("No disks need replacement")
10701 feedback_fn("Replacing disk(s) %s for %s" %
10702 (utils.CommaJoin(self.disks), self.instance.name))
10704 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10706 # Activate the instance disks if we're replacing them on a down instance
10708 _StartInstanceDisks(self.lu, self.instance, True)
10711 # Should we replace the secondary node?
10712 if self.new_node is not None:
10713 fn = self._ExecDrbd8Secondary
10715 fn = self._ExecDrbd8DiskOnly
10717 result = fn(feedback_fn)
10719 # Deactivate the instance disks if we're replacing them on a
10722 _SafeShutdownInstanceDisks(self.lu, self.instance)
10724 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10727 # Verify owned locks
10728 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10729 nodes = frozenset(self.node_secondary_ip)
10730 assert ((self.early_release and not owned_nodes) or
10731 (not self.early_release and not (set(owned_nodes) - nodes))), \
10732 ("Not owning the correct locks, early_release=%s, owned=%r,"
10733 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10737 def _CheckVolumeGroup(self, nodes):
10738 self.lu.LogInfo("Checking volume groups")
10740 vgname = self.cfg.GetVGName()
10742 # Make sure volume group exists on all involved nodes
10743 results = self.rpc.call_vg_list(nodes)
10745 raise errors.OpExecError("Can't list volume groups on the nodes")
10748 res = results[node]
10749 res.Raise("Error checking node %s" % node)
10750 if vgname not in res.payload:
10751 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10754 def _CheckDisksExistence(self, nodes):
10755 # Check disk existence
10756 for idx, dev in enumerate(self.instance.disks):
10757 if idx not in self.disks:
10761 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10762 self.cfg.SetDiskID(dev, node)
10764 result = self.rpc.call_blockdev_find(node, dev)
10766 msg = result.fail_msg
10767 if msg or not result.payload:
10769 msg = "disk not found"
10770 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10773 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10774 for idx, dev in enumerate(self.instance.disks):
10775 if idx not in self.disks:
10778 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10781 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10783 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10784 " replace disks for instance %s" %
10785 (node_name, self.instance.name))
10787 def _CreateNewStorage(self, node_name):
10788 """Create new storage on the primary or secondary node.
10790 This is only used for same-node replaces, not for changing the
10791 secondary node, hence we don't want to modify the existing disk.
10796 for idx, dev in enumerate(self.instance.disks):
10797 if idx not in self.disks:
10800 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10802 self.cfg.SetDiskID(dev, node_name)
10804 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10805 names = _GenerateUniqueNames(self.lu, lv_names)
10807 _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10809 vg_data = dev.children[0].logical_id[0]
10810 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10811 logical_id=(vg_data, names[0]), params=data_p)
10812 vg_meta = dev.children[1].logical_id[0]
10813 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10814 logical_id=(vg_meta, names[1]), params=meta_p)
10816 new_lvs = [lv_data, lv_meta]
10817 old_lvs = [child.Copy() for child in dev.children]
10818 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10820 # we pass force_create=True to force the LVM creation
10821 for new_lv in new_lvs:
10822 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10823 _GetInstanceInfoText(self.instance), False)
10827 def _CheckDevices(self, node_name, iv_names):
10828 for name, (dev, _, _) in iv_names.iteritems():
10829 self.cfg.SetDiskID(dev, node_name)
10831 result = self.rpc.call_blockdev_find(node_name, dev)
10833 msg = result.fail_msg
10834 if msg or not result.payload:
10836 msg = "disk not found"
10837 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10840 if result.payload.is_degraded:
10841 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10843 def _RemoveOldStorage(self, node_name, iv_names):
10844 for name, (_, old_lvs, _) in iv_names.iteritems():
10845 self.lu.LogInfo("Remove logical volumes for %s" % name)
10848 self.cfg.SetDiskID(lv, node_name)
10850 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10852 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10853 hint="remove unused LVs manually")
10855 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10856 """Replace a disk on the primary or secondary for DRBD 8.
10858 The algorithm for replace is quite complicated:
10860 1. for each disk to be replaced:
10862 1. create new LVs on the target node with unique names
10863 1. detach old LVs from the drbd device
10864 1. rename old LVs to name_replaced.<time_t>
10865 1. rename new LVs to old LVs
10866 1. attach the new LVs (with the old names now) to the drbd device
10868 1. wait for sync across all devices
10870 1. for each modified disk:
10872 1. remove old LVs (which have the name name_replaces.<time_t>)
10874 Failures are not very well handled.
10879 # Step: check device activation
10880 self.lu.LogStep(1, steps_total, "Check device existence")
10881 self._CheckDisksExistence([self.other_node, self.target_node])
10882 self._CheckVolumeGroup([self.target_node, self.other_node])
10884 # Step: check other node consistency
10885 self.lu.LogStep(2, steps_total, "Check peer consistency")
10886 self._CheckDisksConsistency(self.other_node,
10887 self.other_node == self.instance.primary_node,
10890 # Step: create new storage
10891 self.lu.LogStep(3, steps_total, "Allocate new storage")
10892 iv_names = self._CreateNewStorage(self.target_node)
10894 # Step: for each lv, detach+rename*2+attach
10895 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10896 for dev, old_lvs, new_lvs in iv_names.itervalues():
10897 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10899 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10901 result.Raise("Can't detach drbd from local storage on node"
10902 " %s for device %s" % (self.target_node, dev.iv_name))
10904 #cfg.Update(instance)
10906 # ok, we created the new LVs, so now we know we have the needed
10907 # storage; as such, we proceed on the target node to rename
10908 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10909 # using the assumption that logical_id == physical_id (which in
10910 # turn is the unique_id on that node)
10912 # FIXME(iustin): use a better name for the replaced LVs
10913 temp_suffix = int(time.time())
10914 ren_fn = lambda d, suff: (d.physical_id[0],
10915 d.physical_id[1] + "_replaced-%s" % suff)
10917 # Build the rename list based on what LVs exist on the node
10918 rename_old_to_new = []
10919 for to_ren in old_lvs:
10920 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10921 if not result.fail_msg and result.payload:
10923 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10925 self.lu.LogInfo("Renaming the old LVs on the target node")
10926 result = self.rpc.call_blockdev_rename(self.target_node,
10928 result.Raise("Can't rename old LVs on node %s" % self.target_node)
10930 # Now we rename the new LVs to the old LVs
10931 self.lu.LogInfo("Renaming the new LVs on the target node")
10932 rename_new_to_old = [(new, old.physical_id)
10933 for old, new in zip(old_lvs, new_lvs)]
10934 result = self.rpc.call_blockdev_rename(self.target_node,
10936 result.Raise("Can't rename new LVs on node %s" % self.target_node)
10938 # Intermediate steps of in memory modifications
10939 for old, new in zip(old_lvs, new_lvs):
10940 new.logical_id = old.logical_id
10941 self.cfg.SetDiskID(new, self.target_node)
10943 # We need to modify old_lvs so that removal later removes the
10944 # right LVs, not the newly added ones; note that old_lvs is a
10946 for disk in old_lvs:
10947 disk.logical_id = ren_fn(disk, temp_suffix)
10948 self.cfg.SetDiskID(disk, self.target_node)
10950 # Now that the new lvs have the old name, we can add them to the device
10951 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10952 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10954 msg = result.fail_msg
10956 for new_lv in new_lvs:
10957 msg2 = self.rpc.call_blockdev_remove(self.target_node,
10960 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10961 hint=("cleanup manually the unused logical"
10963 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10965 cstep = itertools.count(5)
10967 if self.early_release:
10968 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10969 self._RemoveOldStorage(self.target_node, iv_names)
10970 # TODO: Check if releasing locks early still makes sense
10971 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10973 # Release all resource locks except those used by the instance
10974 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10975 keep=self.node_secondary_ip.keys())
10977 # Release all node locks while waiting for sync
10978 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10980 # TODO: Can the instance lock be downgraded here? Take the optional disk
10981 # shutdown in the caller into consideration.
10984 # This can fail as the old devices are degraded and _WaitForSync
10985 # does a combined result over all disks, so we don't check its return value
10986 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10987 _WaitForSync(self.lu, self.instance)
10989 # Check all devices manually
10990 self._CheckDevices(self.instance.primary_node, iv_names)
10992 # Step: remove old storage
10993 if not self.early_release:
10994 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10995 self._RemoveOldStorage(self.target_node, iv_names)
10997 def _ExecDrbd8Secondary(self, feedback_fn):
10998 """Replace the secondary node for DRBD 8.
11000 The algorithm for replace is quite complicated:
11001 - for all disks of the instance:
11002 - create new LVs on the new node with same names
11003 - shutdown the drbd device on the old secondary
11004 - disconnect the drbd network on the primary
11005 - create the drbd device on the new secondary
11006 - network attach the drbd on the primary, using an artifice:
11007 the drbd code for Attach() will connect to the network if it
11008 finds a device which is connected to the good local disks but
11009 not network enabled
11010 - wait for sync across all devices
11011 - remove all disks from the old secondary
11013 Failures are not very well handled.
11018 pnode = self.instance.primary_node
11020 # Step: check device activation
11021 self.lu.LogStep(1, steps_total, "Check device existence")
11022 self._CheckDisksExistence([self.instance.primary_node])
11023 self._CheckVolumeGroup([self.instance.primary_node])
11025 # Step: check other node consistency
11026 self.lu.LogStep(2, steps_total, "Check peer consistency")
11027 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11029 # Step: create new storage
11030 self.lu.LogStep(3, steps_total, "Allocate new storage")
11031 for idx, dev in enumerate(self.instance.disks):
11032 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11033 (self.new_node, idx))
11034 # we pass force_create=True to force LVM creation
11035 for new_lv in dev.children:
11036 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
11037 _GetInstanceInfoText(self.instance), False)
11039 # Step 4: dbrd minors and drbd setups changes
11040 # after this, we must manually remove the drbd minors on both the
11041 # error and the success paths
11042 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11043 minors = self.cfg.AllocateDRBDMinor([self.new_node
11044 for dev in self.instance.disks],
11045 self.instance.name)
11046 logging.debug("Allocated minors %r", minors)
11049 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11050 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11051 (self.new_node, idx))
11052 # create new devices on new_node; note that we create two IDs:
11053 # one without port, so the drbd will be activated without
11054 # networking information on the new node at this stage, and one
11055 # with network, for the latter activation in step 4
11056 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11057 if self.instance.primary_node == o_node1:
11060 assert self.instance.primary_node == o_node2, "Three-node instance?"
11063 new_alone_id = (self.instance.primary_node, self.new_node, None,
11064 p_minor, new_minor, o_secret)
11065 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11066 p_minor, new_minor, o_secret)
11068 iv_names[idx] = (dev, dev.children, new_net_id)
11069 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11071 drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
11072 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11073 logical_id=new_alone_id,
11074 children=dev.children,
11076 params=drbd_params)
11078 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
11079 _GetInstanceInfoText(self.instance), False)
11080 except errors.GenericError:
11081 self.cfg.ReleaseDRBDMinors(self.instance.name)
11084 # We have new devices, shutdown the drbd on the old secondary
11085 for idx, dev in enumerate(self.instance.disks):
11086 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11087 self.cfg.SetDiskID(dev, self.target_node)
11088 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
11090 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11091 "node: %s" % (idx, msg),
11092 hint=("Please cleanup this device manually as"
11093 " soon as possible"))
11095 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11096 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11097 self.instance.disks)[pnode]
11099 msg = result.fail_msg
11101 # detaches didn't succeed (unlikely)
11102 self.cfg.ReleaseDRBDMinors(self.instance.name)
11103 raise errors.OpExecError("Can't detach the disks from the network on"
11104 " old node: %s" % (msg,))
11106 # if we managed to detach at least one, we update all the disks of
11107 # the instance to point to the new secondary
11108 self.lu.LogInfo("Updating instance configuration")
11109 for dev, _, new_logical_id in iv_names.itervalues():
11110 dev.logical_id = new_logical_id
11111 self.cfg.SetDiskID(dev, self.instance.primary_node)
11113 self.cfg.Update(self.instance, feedback_fn)
11115 # Release all node locks (the configuration has been updated)
11116 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11118 # and now perform the drbd attach
11119 self.lu.LogInfo("Attaching primary drbds to new secondary"
11120 " (standalone => connected)")
11121 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11123 self.node_secondary_ip,
11124 self.instance.disks,
11125 self.instance.name,
11127 for to_node, to_result in result.items():
11128 msg = to_result.fail_msg
11130 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11132 hint=("please do a gnt-instance info to see the"
11133 " status of disks"))
11135 cstep = itertools.count(5)
11137 if self.early_release:
11138 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11139 self._RemoveOldStorage(self.target_node, iv_names)
11140 # TODO: Check if releasing locks early still makes sense
11141 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11143 # Release all resource locks except those used by the instance
11144 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11145 keep=self.node_secondary_ip.keys())
11147 # TODO: Can the instance lock be downgraded here? Take the optional disk
11148 # shutdown in the caller into consideration.
11151 # This can fail as the old devices are degraded and _WaitForSync
11152 # does a combined result over all disks, so we don't check its return value
11153 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11154 _WaitForSync(self.lu, self.instance)
11156 # Check all devices manually
11157 self._CheckDevices(self.instance.primary_node, iv_names)
11159 # Step: remove old storage
11160 if not self.early_release:
11161 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11162 self._RemoveOldStorage(self.target_node, iv_names)
11165 class LURepairNodeStorage(NoHooksLU):
11166 """Repairs the volume group on a node.
11171 def CheckArguments(self):
11172 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11174 storage_type = self.op.storage_type
11176 if (constants.SO_FIX_CONSISTENCY not in
11177 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11178 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11179 " repaired" % storage_type,
11180 errors.ECODE_INVAL)
11182 def ExpandNames(self):
11183 self.needed_locks = {
11184 locking.LEVEL_NODE: [self.op.node_name],
11187 def _CheckFaultyDisks(self, instance, node_name):
11188 """Ensure faulty disks abort the opcode or at least warn."""
11190 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11192 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11193 " node '%s'" % (instance.name, node_name),
11194 errors.ECODE_STATE)
11195 except errors.OpPrereqError, err:
11196 if self.op.ignore_consistency:
11197 self.proc.LogWarning(str(err.args[0]))
11201 def CheckPrereq(self):
11202 """Check prerequisites.
11205 # Check whether any instance on this node has faulty disks
11206 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11207 if inst.admin_state != constants.ADMINST_UP:
11209 check_nodes = set(inst.all_nodes)
11210 check_nodes.discard(self.op.node_name)
11211 for inst_node_name in check_nodes:
11212 self._CheckFaultyDisks(inst, inst_node_name)
11214 def Exec(self, feedback_fn):
11215 feedback_fn("Repairing storage unit '%s' on %s ..." %
11216 (self.op.name, self.op.node_name))
11218 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11219 result = self.rpc.call_storage_execute(self.op.node_name,
11220 self.op.storage_type, st_args,
11222 constants.SO_FIX_CONSISTENCY)
11223 result.Raise("Failed to repair storage unit '%s' on %s" %
11224 (self.op.name, self.op.node_name))
11227 class LUNodeEvacuate(NoHooksLU):
11228 """Evacuates instances off a list of nodes.
11233 _MODE2IALLOCATOR = {
11234 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11235 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11236 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11238 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11239 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11240 constants.IALLOCATOR_NEVAC_MODES)
11242 def CheckArguments(self):
11243 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11245 def ExpandNames(self):
11246 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11248 if self.op.remote_node is not None:
11249 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11250 assert self.op.remote_node
11252 if self.op.remote_node == self.op.node_name:
11253 raise errors.OpPrereqError("Can not use evacuated node as a new"
11254 " secondary node", errors.ECODE_INVAL)
11256 if self.op.mode != constants.NODE_EVAC_SEC:
11257 raise errors.OpPrereqError("Without the use of an iallocator only"
11258 " secondary instances can be evacuated",
11259 errors.ECODE_INVAL)
11262 self.share_locks = _ShareAll()
11263 self.needed_locks = {
11264 locking.LEVEL_INSTANCE: [],
11265 locking.LEVEL_NODEGROUP: [],
11266 locking.LEVEL_NODE: [],
11269 # Determine nodes (via group) optimistically, needs verification once locks
11270 # have been acquired
11271 self.lock_nodes = self._DetermineNodes()
11273 def _DetermineNodes(self):
11274 """Gets the list of nodes to operate on.
11277 if self.op.remote_node is None:
11278 # Iallocator will choose any node(s) in the same group
11279 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11281 group_nodes = frozenset([self.op.remote_node])
11283 # Determine nodes to be locked
11284 return set([self.op.node_name]) | group_nodes
11286 def _DetermineInstances(self):
11287 """Builds list of instances to operate on.
11290 assert self.op.mode in constants.NODE_EVAC_MODES
11292 if self.op.mode == constants.NODE_EVAC_PRI:
11293 # Primary instances only
11294 inst_fn = _GetNodePrimaryInstances
11295 assert self.op.remote_node is None, \
11296 "Evacuating primary instances requires iallocator"
11297 elif self.op.mode == constants.NODE_EVAC_SEC:
11298 # Secondary instances only
11299 inst_fn = _GetNodeSecondaryInstances
11302 assert self.op.mode == constants.NODE_EVAC_ALL
11303 inst_fn = _GetNodeInstances
11304 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11306 raise errors.OpPrereqError("Due to an issue with the iallocator"
11307 " interface it is not possible to evacuate"
11308 " all instances at once; specify explicitly"
11309 " whether to evacuate primary or secondary"
11311 errors.ECODE_INVAL)
11313 return inst_fn(self.cfg, self.op.node_name)
11315 def DeclareLocks(self, level):
11316 if level == locking.LEVEL_INSTANCE:
11317 # Lock instances optimistically, needs verification once node and group
11318 # locks have been acquired
11319 self.needed_locks[locking.LEVEL_INSTANCE] = \
11320 set(i.name for i in self._DetermineInstances())
11322 elif level == locking.LEVEL_NODEGROUP:
11323 # Lock node groups for all potential target nodes optimistically, needs
11324 # verification once nodes have been acquired
11325 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11326 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11328 elif level == locking.LEVEL_NODE:
11329 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11331 def CheckPrereq(self):
11333 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11334 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11335 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11337 need_nodes = self._DetermineNodes()
11339 if not owned_nodes.issuperset(need_nodes):
11340 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11341 " locks were acquired, current nodes are"
11342 " are '%s', used to be '%s'; retry the"
11344 (self.op.node_name,
11345 utils.CommaJoin(need_nodes),
11346 utils.CommaJoin(owned_nodes)),
11347 errors.ECODE_STATE)
11349 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11350 if owned_groups != wanted_groups:
11351 raise errors.OpExecError("Node groups changed since locks were acquired,"
11352 " current groups are '%s', used to be '%s';"
11353 " retry the operation" %
11354 (utils.CommaJoin(wanted_groups),
11355 utils.CommaJoin(owned_groups)))
11357 # Determine affected instances
11358 self.instances = self._DetermineInstances()
11359 self.instance_names = [i.name for i in self.instances]
11361 if set(self.instance_names) != owned_instances:
11362 raise errors.OpExecError("Instances on node '%s' changed since locks"
11363 " were acquired, current instances are '%s',"
11364 " used to be '%s'; retry the operation" %
11365 (self.op.node_name,
11366 utils.CommaJoin(self.instance_names),
11367 utils.CommaJoin(owned_instances)))
11369 if self.instance_names:
11370 self.LogInfo("Evacuating instances from node '%s': %s",
11372 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11374 self.LogInfo("No instances to evacuate from node '%s'",
11377 if self.op.remote_node is not None:
11378 for i in self.instances:
11379 if i.primary_node == self.op.remote_node:
11380 raise errors.OpPrereqError("Node %s is the primary node of"
11381 " instance %s, cannot use it as"
11383 (self.op.remote_node, i.name),
11384 errors.ECODE_INVAL)
11386 def Exec(self, feedback_fn):
11387 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11389 if not self.instance_names:
11390 # No instances to evacuate
11393 elif self.op.iallocator is not None:
11394 # TODO: Implement relocation to other group
11395 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11396 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11397 instances=list(self.instance_names))
11399 ial.Run(self.op.iallocator)
11401 if not ial.success:
11402 raise errors.OpPrereqError("Can't compute node evacuation using"
11403 " iallocator '%s': %s" %
11404 (self.op.iallocator, ial.info),
11405 errors.ECODE_NORES)
11407 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11409 elif self.op.remote_node is not None:
11410 assert self.op.mode == constants.NODE_EVAC_SEC
11412 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11413 remote_node=self.op.remote_node,
11415 mode=constants.REPLACE_DISK_CHG,
11416 early_release=self.op.early_release)]
11417 for instance_name in self.instance_names
11421 raise errors.ProgrammerError("No iallocator or remote node")
11423 return ResultWithJobs(jobs)
11426 def _SetOpEarlyRelease(early_release, op):
11427 """Sets C{early_release} flag on opcodes if available.
11431 op.early_release = early_release
11432 except AttributeError:
11433 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11438 def _NodeEvacDest(use_nodes, group, nodes):
11439 """Returns group or nodes depending on caller's choice.
11443 return utils.CommaJoin(nodes)
11448 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11449 """Unpacks the result of change-group and node-evacuate iallocator requests.
11451 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11452 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11454 @type lu: L{LogicalUnit}
11455 @param lu: Logical unit instance
11456 @type alloc_result: tuple/list
11457 @param alloc_result: Result from iallocator
11458 @type early_release: bool
11459 @param early_release: Whether to release locks early if possible
11460 @type use_nodes: bool
11461 @param use_nodes: Whether to display node names instead of groups
11464 (moved, failed, jobs) = alloc_result
11467 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11468 for (name, reason) in failed)
11469 lu.LogWarning("Unable to evacuate instances %s", failreason)
11470 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11473 lu.LogInfo("Instances to be moved: %s",
11474 utils.CommaJoin("%s (to %s)" %
11475 (name, _NodeEvacDest(use_nodes, group, nodes))
11476 for (name, group, nodes) in moved))
11478 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11479 map(opcodes.OpCode.LoadOpCode, ops))
11483 class LUInstanceGrowDisk(LogicalUnit):
11484 """Grow a disk of an instance.
11487 HPATH = "disk-grow"
11488 HTYPE = constants.HTYPE_INSTANCE
11491 def ExpandNames(self):
11492 self._ExpandAndLockInstance()
11493 self.needed_locks[locking.LEVEL_NODE] = []
11494 self.needed_locks[locking.LEVEL_NODE_RES] = []
11495 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11496 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11498 def DeclareLocks(self, level):
11499 if level == locking.LEVEL_NODE:
11500 self._LockInstancesNodes()
11501 elif level == locking.LEVEL_NODE_RES:
11503 self.needed_locks[locking.LEVEL_NODE_RES] = \
11504 self.needed_locks[locking.LEVEL_NODE][:]
11506 def BuildHooksEnv(self):
11507 """Build hooks env.
11509 This runs on the master, the primary and all the secondaries.
11513 "DISK": self.op.disk,
11514 "AMOUNT": self.op.amount,
11516 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11519 def BuildHooksNodes(self):
11520 """Build hooks nodes.
11523 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11526 def CheckPrereq(self):
11527 """Check prerequisites.
11529 This checks that the instance is in the cluster.
11532 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11533 assert instance is not None, \
11534 "Cannot retrieve locked instance %s" % self.op.instance_name
11535 nodenames = list(instance.all_nodes)
11536 for node in nodenames:
11537 _CheckNodeOnline(self, node)
11539 self.instance = instance
11541 if instance.disk_template not in constants.DTS_GROWABLE:
11542 raise errors.OpPrereqError("Instance's disk layout does not support"
11543 " growing", errors.ECODE_INVAL)
11545 self.disk = instance.FindDisk(self.op.disk)
11547 if instance.disk_template not in (constants.DT_FILE,
11548 constants.DT_SHARED_FILE,
11550 # TODO: check the free disk space for file, when that feature will be
11552 _CheckNodesFreeDiskPerVG(self, nodenames,
11553 self.disk.ComputeGrowth(self.op.amount))
11555 def Exec(self, feedback_fn):
11556 """Execute disk grow.
11559 instance = self.instance
11562 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11563 assert (self.owned_locks(locking.LEVEL_NODE) ==
11564 self.owned_locks(locking.LEVEL_NODE_RES))
11566 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11568 raise errors.OpExecError("Cannot activate block device to grow")
11570 feedback_fn("Growing disk %s of instance '%s' by %s" %
11571 (self.op.disk, instance.name,
11572 utils.FormatUnit(self.op.amount, "h")))
11574 # First run all grow ops in dry-run mode
11575 for node in instance.all_nodes:
11576 self.cfg.SetDiskID(disk, node)
11577 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11578 result.Raise("Grow request failed to node %s" % node)
11580 # We know that (as far as we can test) operations across different
11581 # nodes will succeed, time to run it for real
11582 for node in instance.all_nodes:
11583 self.cfg.SetDiskID(disk, node)
11584 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11585 result.Raise("Grow request failed to node %s" % node)
11587 # TODO: Rewrite code to work properly
11588 # DRBD goes into sync mode for a short amount of time after executing the
11589 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11590 # calling "resize" in sync mode fails. Sleeping for a short amount of
11591 # time is a work-around.
11594 disk.RecordGrow(self.op.amount)
11595 self.cfg.Update(instance, feedback_fn)
11597 # Changes have been recorded, release node lock
11598 _ReleaseLocks(self, locking.LEVEL_NODE)
11600 # Downgrade lock while waiting for sync
11601 self.glm.downgrade(locking.LEVEL_INSTANCE)
11603 if self.op.wait_for_sync:
11604 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11606 self.proc.LogWarning("Disk sync-ing has not returned a good"
11607 " status; please check the instance")
11608 if instance.admin_state != constants.ADMINST_UP:
11609 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11610 elif instance.admin_state != constants.ADMINST_UP:
11611 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11612 " not supposed to be running because no wait for"
11613 " sync mode was requested")
11615 assert self.owned_locks(locking.LEVEL_NODE_RES)
11616 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11619 class LUInstanceQueryData(NoHooksLU):
11620 """Query runtime instance data.
11625 def ExpandNames(self):
11626 self.needed_locks = {}
11628 # Use locking if requested or when non-static information is wanted
11629 if not (self.op.static or self.op.use_locking):
11630 self.LogWarning("Non-static data requested, locks need to be acquired")
11631 self.op.use_locking = True
11633 if self.op.instances or not self.op.use_locking:
11634 # Expand instance names right here
11635 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11637 # Will use acquired locks
11638 self.wanted_names = None
11640 if self.op.use_locking:
11641 self.share_locks = _ShareAll()
11643 if self.wanted_names is None:
11644 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11646 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11648 self.needed_locks[locking.LEVEL_NODE] = []
11649 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11651 def DeclareLocks(self, level):
11652 if self.op.use_locking and level == locking.LEVEL_NODE:
11653 self._LockInstancesNodes()
11655 def CheckPrereq(self):
11656 """Check prerequisites.
11658 This only checks the optional instance list against the existing names.
11661 if self.wanted_names is None:
11662 assert self.op.use_locking, "Locking was not used"
11663 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11665 self.wanted_instances = \
11666 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11668 def _ComputeBlockdevStatus(self, node, instance_name, dev):
11669 """Returns the status of a block device
11672 if self.op.static or not node:
11675 self.cfg.SetDiskID(dev, node)
11677 result = self.rpc.call_blockdev_find(node, dev)
11681 result.Raise("Can't compute disk status for %s" % instance_name)
11683 status = result.payload
11687 return (status.dev_path, status.major, status.minor,
11688 status.sync_percent, status.estimated_time,
11689 status.is_degraded, status.ldisk_status)
11691 def _ComputeDiskStatus(self, instance, snode, dev):
11692 """Compute block device status.
11695 if dev.dev_type in constants.LDS_DRBD:
11696 # we change the snode then (otherwise we use the one passed in)
11697 if dev.logical_id[0] == instance.primary_node:
11698 snode = dev.logical_id[1]
11700 snode = dev.logical_id[0]
11702 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11703 instance.name, dev)
11704 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11707 dev_children = map(compat.partial(self._ComputeDiskStatus,
11714 "iv_name": dev.iv_name,
11715 "dev_type": dev.dev_type,
11716 "logical_id": dev.logical_id,
11717 "physical_id": dev.physical_id,
11718 "pstatus": dev_pstatus,
11719 "sstatus": dev_sstatus,
11720 "children": dev_children,
11725 def Exec(self, feedback_fn):
11726 """Gather and return data"""
11729 cluster = self.cfg.GetClusterInfo()
11731 pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11732 for i in self.wanted_instances)
11733 for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11734 if self.op.static or pnode.offline:
11735 remote_state = None
11737 self.LogWarning("Primary node %s is marked offline, returning static"
11738 " information only for instance %s" %
11739 (pnode.name, instance.name))
11741 remote_info = self.rpc.call_instance_info(instance.primary_node,
11743 instance.hypervisor)
11744 remote_info.Raise("Error checking node %s" % instance.primary_node)
11745 remote_info = remote_info.payload
11746 if remote_info and "state" in remote_info:
11747 remote_state = "up"
11749 if instance.admin_state == constants.ADMINST_UP:
11750 remote_state = "down"
11752 remote_state = instance.admin_state
11754 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11757 result[instance.name] = {
11758 "name": instance.name,
11759 "config_state": instance.admin_state,
11760 "run_state": remote_state,
11761 "pnode": instance.primary_node,
11762 "snodes": instance.secondary_nodes,
11764 # this happens to be the same format used for hooks
11765 "nics": _NICListToTuple(self, instance.nics),
11766 "disk_template": instance.disk_template,
11768 "hypervisor": instance.hypervisor,
11769 "network_port": instance.network_port,
11770 "hv_instance": instance.hvparams,
11771 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11772 "be_instance": instance.beparams,
11773 "be_actual": cluster.FillBE(instance),
11774 "os_instance": instance.osparams,
11775 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11776 "serial_no": instance.serial_no,
11777 "mtime": instance.mtime,
11778 "ctime": instance.ctime,
11779 "uuid": instance.uuid,
11785 def PrepareContainerMods(mods, private_fn):
11786 """Prepares a list of container modifications by adding a private data field.
11788 @type mods: list of tuples; (operation, index, parameters)
11789 @param mods: List of modifications
11790 @type private_fn: callable or None
11791 @param private_fn: Callable for constructing a private data field for a
11796 if private_fn is None:
11801 return [(op, idx, params, fn()) for (op, idx, params) in mods]
11804 #: Type description for changes as returned by L{ApplyContainerMods}'s
11806 _TApplyContModsCbChanges = \
11807 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
11808 ht.TNonEmptyString,
11813 def ApplyContainerMods(kind, container, chgdesc, mods,
11814 create_fn, modify_fn, remove_fn):
11815 """Applies descriptions in C{mods} to C{container}.
11818 @param kind: One-word item description
11819 @type container: list
11820 @param container: Container to modify
11821 @type chgdesc: None or list
11822 @param chgdesc: List of applied changes
11824 @param mods: Modifications as returned by L{PrepareContainerMods}
11825 @type create_fn: callable
11826 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
11827 receives absolute item index, parameters and private data object as added
11828 by L{PrepareContainerMods}, returns tuple containing new item and changes
11830 @type modify_fn: callable
11831 @param modify_fn: Callback for modifying an existing item
11832 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
11833 and private data object as added by L{PrepareContainerMods}, returns
11835 @type remove_fn: callable
11836 @param remove_fn: Callback on removing item; receives absolute item index,
11837 item and private data object as added by L{PrepareContainerMods}
11840 for (op, idx, params, private) in mods:
11843 absidx = len(container) - 1
11845 raise IndexError("Not accepting negative indices other than -1")
11846 elif idx > len(container):
11847 raise IndexError("Got %s index %s, but there are only %s" %
11848 (kind, idx, len(container)))
11854 if op == constants.DDM_ADD:
11855 # Calculate where item will be added
11857 addidx = len(container)
11861 if create_fn is None:
11864 (item, changes) = create_fn(addidx, params, private)
11867 container.append(item)
11870 assert idx <= len(container)
11871 # list.insert does so before the specified index
11872 container.insert(idx, item)
11874 # Retrieve existing item
11876 item = container[absidx]
11878 raise IndexError("Invalid %s index %s" % (kind, idx))
11880 if op == constants.DDM_REMOVE:
11883 if remove_fn is not None:
11884 remove_fn(absidx, item, private)
11886 changes = [("%s/%s" % (kind, absidx), "remove")]
11888 assert container[absidx] == item
11889 del container[absidx]
11890 elif op == constants.DDM_MODIFY:
11891 if modify_fn is not None:
11892 changes = modify_fn(absidx, item, params, private)
11894 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
11896 assert _TApplyContModsCbChanges(changes)
11898 if not (chgdesc is None or changes is None):
11899 chgdesc.extend(changes)
11902 def _UpdateIvNames(base_index, disks):
11903 """Updates the C{iv_name} attribute of disks.
11905 @type disks: list of L{objects.Disk}
11908 for (idx, disk) in enumerate(disks):
11909 disk.iv_name = "disk/%s" % (base_index + idx, )
11912 class _InstNicModPrivate:
11913 """Data structure for network interface modifications.
11915 Used by L{LUInstanceSetParams}.
11918 def __init__(self):
11923 class LUInstanceSetParams(LogicalUnit):
11924 """Modifies an instances's parameters.
11927 HPATH = "instance-modify"
11928 HTYPE = constants.HTYPE_INSTANCE
11932 def _UpgradeDiskNicMods(kind, mods, verify_fn):
11933 assert ht.TList(mods)
11934 assert not mods or len(mods[0]) in (2, 3)
11936 if mods and len(mods[0]) == 2:
11940 for op, params in mods:
11941 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
11942 result.append((op, -1, params))
11946 raise errors.OpPrereqError("Only one %s add or remove operation is"
11947 " supported at a time" % kind,
11948 errors.ECODE_INVAL)
11950 result.append((constants.DDM_MODIFY, op, params))
11952 assert verify_fn(result)
11959 def _CheckMods(kind, mods, key_types, item_fn):
11960 """Ensures requested disk/NIC modifications are valid.
11963 for (op, _, params) in mods:
11964 assert ht.TDict(params)
11966 utils.ForceDictType(params, key_types)
11968 if op == constants.DDM_REMOVE:
11970 raise errors.OpPrereqError("No settings should be passed when"
11971 " removing a %s" % kind,
11972 errors.ECODE_INVAL)
11973 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
11974 item_fn(op, params)
11976 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
11979 def _VerifyDiskModification(op, params):
11980 """Verifies a disk modification.
11983 if op == constants.DDM_ADD:
11984 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11985 if mode not in constants.DISK_ACCESS_SET:
11986 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11987 errors.ECODE_INVAL)
11989 size = params.get(constants.IDISK_SIZE, None)
11991 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
11992 constants.IDISK_SIZE, errors.ECODE_INVAL)
11996 except (TypeError, ValueError), err:
11997 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
11998 errors.ECODE_INVAL)
12000 params[constants.IDISK_SIZE] = size
12002 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12003 raise errors.OpPrereqError("Disk size change not possible, use"
12004 " grow-disk", errors.ECODE_INVAL)
12007 def _VerifyNicModification(op, params):
12008 """Verifies a network interface modification.
12011 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12012 ip = params.get(constants.INIC_IP, None)
12015 elif ip.lower() == constants.VALUE_NONE:
12016 params[constants.INIC_IP] = None
12017 elif not netutils.IPAddress.IsValid(ip):
12018 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12019 errors.ECODE_INVAL)
12021 bridge = params.get("bridge", None)
12022 link = params.get(constants.INIC_LINK, None)
12023 if bridge and link:
12024 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12025 " at the same time", errors.ECODE_INVAL)
12026 elif bridge and bridge.lower() == constants.VALUE_NONE:
12027 params["bridge"] = None
12028 elif link and link.lower() == constants.VALUE_NONE:
12029 params[constants.INIC_LINK] = None
12031 if op == constants.DDM_ADD:
12032 macaddr = params.get(constants.INIC_MAC, None)
12033 if macaddr is None:
12034 params[constants.INIC_MAC] = constants.VALUE_AUTO
12036 if constants.INIC_MAC in params:
12037 macaddr = params[constants.INIC_MAC]
12038 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12039 macaddr = utils.NormalizeAndValidateMac(macaddr)
12041 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12042 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12043 " modifying an existing NIC",
12044 errors.ECODE_INVAL)
12046 def CheckArguments(self):
12047 if not (self.op.nics or self.op.disks or self.op.disk_template or
12048 self.op.hvparams or self.op.beparams or self.op.os_name or
12049 self.op.offline is not None or self.op.runtime_mem):
12050 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12052 if self.op.hvparams:
12053 _CheckGlobalHvParams(self.op.hvparams)
12056 self._UpgradeDiskNicMods("disk", self.op.disks,
12057 opcodes.OpInstanceSetParams.TestDiskModifications)
12059 self._UpgradeDiskNicMods("NIC", self.op.nics,
12060 opcodes.OpInstanceSetParams.TestNicModifications)
12062 # Check disk modifications
12063 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12064 self._VerifyDiskModification)
12066 if self.op.disks and self.op.disk_template is not None:
12067 raise errors.OpPrereqError("Disk template conversion and other disk"
12068 " changes not supported at the same time",
12069 errors.ECODE_INVAL)
12071 if (self.op.disk_template and
12072 self.op.disk_template in constants.DTS_INT_MIRROR and
12073 self.op.remote_node is None):
12074 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12075 " one requires specifying a secondary node",
12076 errors.ECODE_INVAL)
12078 # Check NIC modifications
12079 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12080 self._VerifyNicModification)
12082 def ExpandNames(self):
12083 self._ExpandAndLockInstance()
12084 # Can't even acquire node locks in shared mode as upcoming changes in
12085 # Ganeti 2.6 will start to modify the node object on disk conversion
12086 self.needed_locks[locking.LEVEL_NODE] = []
12087 self.needed_locks[locking.LEVEL_NODE_RES] = []
12088 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12090 def DeclareLocks(self, level):
12091 # TODO: Acquire group lock in shared mode (disk parameters)
12092 if level == locking.LEVEL_NODE:
12093 self._LockInstancesNodes()
12094 if self.op.disk_template and self.op.remote_node:
12095 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12096 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12097 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12099 self.needed_locks[locking.LEVEL_NODE_RES] = \
12100 self.needed_locks[locking.LEVEL_NODE][:]
12102 def BuildHooksEnv(self):
12103 """Build hooks env.
12105 This runs on the master, primary and secondaries.
12109 if constants.BE_MINMEM in self.be_new:
12110 args["minmem"] = self.be_new[constants.BE_MINMEM]
12111 if constants.BE_MAXMEM in self.be_new:
12112 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12113 if constants.BE_VCPUS in self.be_new:
12114 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12115 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12116 # information at all.
12118 if self._new_nics is not None:
12121 for nic in self._new_nics:
12122 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12123 mode = nicparams[constants.NIC_MODE]
12124 link = nicparams[constants.NIC_LINK]
12125 nics.append((nic.ip, nic.mac, mode, link))
12127 args["nics"] = nics
12129 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12130 if self.op.disk_template:
12131 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12132 if self.op.runtime_mem:
12133 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12137 def BuildHooksNodes(self):
12138 """Build hooks nodes.
12141 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12144 def _PrepareNicModification(self, params, private, old_ip, old_params,
12146 update_params_dict = dict([(key, params[key])
12147 for key in constants.NICS_PARAMETERS
12150 if "bridge" in params:
12151 update_params_dict[constants.NIC_LINK] = params["bridge"]
12153 new_params = _GetUpdatedParams(old_params, update_params_dict)
12154 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12156 new_filled_params = cluster.SimpleFillNIC(new_params)
12157 objects.NIC.CheckParameterSyntax(new_filled_params)
12159 new_mode = new_filled_params[constants.NIC_MODE]
12160 if new_mode == constants.NIC_MODE_BRIDGED:
12161 bridge = new_filled_params[constants.NIC_LINK]
12162 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12164 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12166 self.warn.append(msg)
12168 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12170 elif new_mode == constants.NIC_MODE_ROUTED:
12171 ip = params.get(constants.INIC_IP, old_ip)
12173 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12174 " on a routed NIC", errors.ECODE_INVAL)
12176 if constants.INIC_MAC in params:
12177 mac = params[constants.INIC_MAC]
12179 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12180 errors.ECODE_INVAL)
12181 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12182 # otherwise generate the MAC address
12183 params[constants.INIC_MAC] = \
12184 self.cfg.GenerateMAC(self.proc.GetECId())
12186 # or validate/reserve the current one
12188 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12189 except errors.ReservationError:
12190 raise errors.OpPrereqError("MAC address '%s' already in use"
12191 " in cluster" % mac,
12192 errors.ECODE_NOTUNIQUE)
12194 private.params = new_params
12195 private.filled = new_filled_params
12197 return (None, None)
12199 def CheckPrereq(self):
12200 """Check prerequisites.
12202 This only checks the instance list against the existing names.
12205 # checking the new params on the primary/secondary nodes
12207 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12208 cluster = self.cluster = self.cfg.GetClusterInfo()
12209 assert self.instance is not None, \
12210 "Cannot retrieve locked instance %s" % self.op.instance_name
12211 pnode = instance.primary_node
12212 nodelist = list(instance.all_nodes)
12213 pnode_info = self.cfg.GetNodeInfo(pnode)
12214 self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
12216 # Prepare disk/NIC modifications
12217 self.diskmod = PrepareContainerMods(self.op.disks, None)
12218 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12221 if self.op.os_name and not self.op.force:
12222 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12223 self.op.force_variant)
12224 instance_os = self.op.os_name
12226 instance_os = instance.os
12228 assert not (self.op.disk_template and self.op.disks), \
12229 "Can't modify disk template and apply disk changes at the same time"
12231 if self.op.disk_template:
12232 if instance.disk_template == self.op.disk_template:
12233 raise errors.OpPrereqError("Instance already has disk template %s" %
12234 instance.disk_template, errors.ECODE_INVAL)
12236 if (instance.disk_template,
12237 self.op.disk_template) not in self._DISK_CONVERSIONS:
12238 raise errors.OpPrereqError("Unsupported disk template conversion from"
12239 " %s to %s" % (instance.disk_template,
12240 self.op.disk_template),
12241 errors.ECODE_INVAL)
12242 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12243 msg="cannot change disk template")
12244 if self.op.disk_template in constants.DTS_INT_MIRROR:
12245 if self.op.remote_node == pnode:
12246 raise errors.OpPrereqError("Given new secondary node %s is the same"
12247 " as the primary node of the instance" %
12248 self.op.remote_node, errors.ECODE_STATE)
12249 _CheckNodeOnline(self, self.op.remote_node)
12250 _CheckNodeNotDrained(self, self.op.remote_node)
12251 # FIXME: here we assume that the old instance type is DT_PLAIN
12252 assert instance.disk_template == constants.DT_PLAIN
12253 disks = [{constants.IDISK_SIZE: d.size,
12254 constants.IDISK_VG: d.logical_id[0]}
12255 for d in instance.disks]
12256 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12257 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12259 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12260 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12261 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12262 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12263 ignore=self.op.ignore_ipolicy)
12264 if pnode_info.group != snode_info.group:
12265 self.LogWarning("The primary and secondary nodes are in two"
12266 " different node groups; the disk parameters"
12267 " from the first disk's node group will be"
12270 # hvparams processing
12271 if self.op.hvparams:
12272 hv_type = instance.hypervisor
12273 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12274 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12275 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12278 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12279 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12280 self.hv_proposed = self.hv_new = hv_new # the new actual values
12281 self.hv_inst = i_hvdict # the new dict (without defaults)
12283 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12285 self.hv_new = self.hv_inst = {}
12287 # beparams processing
12288 if self.op.beparams:
12289 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12291 objects.UpgradeBeParams(i_bedict)
12292 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12293 be_new = cluster.SimpleFillBE(i_bedict)
12294 self.be_proposed = self.be_new = be_new # the new actual values
12295 self.be_inst = i_bedict # the new dict (without defaults)
12297 self.be_new = self.be_inst = {}
12298 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12299 be_old = cluster.FillBE(instance)
12301 # CPU param validation -- checking every time a paramtere is
12302 # changed to cover all cases where either CPU mask or vcpus have
12304 if (constants.BE_VCPUS in self.be_proposed and
12305 constants.HV_CPU_MASK in self.hv_proposed):
12307 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12308 # Verify mask is consistent with number of vCPUs. Can skip this
12309 # test if only 1 entry in the CPU mask, which means same mask
12310 # is applied to all vCPUs.
12311 if (len(cpu_list) > 1 and
12312 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12313 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12315 (self.be_proposed[constants.BE_VCPUS],
12316 self.hv_proposed[constants.HV_CPU_MASK]),
12317 errors.ECODE_INVAL)
12319 # Only perform this test if a new CPU mask is given
12320 if constants.HV_CPU_MASK in self.hv_new:
12321 # Calculate the largest CPU number requested
12322 max_requested_cpu = max(map(max, cpu_list))
12323 # Check that all of the instance's nodes have enough physical CPUs to
12324 # satisfy the requested CPU mask
12325 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12326 max_requested_cpu + 1, instance.hypervisor)
12328 # osparams processing
12329 if self.op.osparams:
12330 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12331 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12332 self.os_inst = i_osdict # the new dict (without defaults)
12338 #TODO(dynmem): do the appropriate check involving MINMEM
12339 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12340 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12341 mem_check_list = [pnode]
12342 if be_new[constants.BE_AUTO_BALANCE]:
12343 # either we changed auto_balance to yes or it was from before
12344 mem_check_list.extend(instance.secondary_nodes)
12345 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12346 instance.hypervisor)
12347 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12348 [instance.hypervisor])
12349 pninfo = nodeinfo[pnode]
12350 msg = pninfo.fail_msg
12352 # Assume the primary node is unreachable and go ahead
12353 self.warn.append("Can't get info from primary node %s: %s" %
12356 (_, _, (pnhvinfo, )) = pninfo.payload
12357 if not isinstance(pnhvinfo.get("memory_free", None), int):
12358 self.warn.append("Node data from primary node %s doesn't contain"
12359 " free memory information" % pnode)
12360 elif instance_info.fail_msg:
12361 self.warn.append("Can't get instance runtime information: %s" %
12362 instance_info.fail_msg)
12364 if instance_info.payload:
12365 current_mem = int(instance_info.payload["memory"])
12367 # Assume instance not running
12368 # (there is a slight race condition here, but it's not very
12369 # probable, and we have no other way to check)
12370 # TODO: Describe race condition
12372 #TODO(dynmem): do the appropriate check involving MINMEM
12373 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12374 pnhvinfo["memory_free"])
12376 raise errors.OpPrereqError("This change will prevent the instance"
12377 " from starting, due to %d MB of memory"
12378 " missing on its primary node" %
12380 errors.ECODE_NORES)
12382 if be_new[constants.BE_AUTO_BALANCE]:
12383 for node, nres in nodeinfo.items():
12384 if node not in instance.secondary_nodes:
12386 nres.Raise("Can't get info from secondary node %s" % node,
12387 prereq=True, ecode=errors.ECODE_STATE)
12388 (_, _, (nhvinfo, )) = nres.payload
12389 if not isinstance(nhvinfo.get("memory_free", None), int):
12390 raise errors.OpPrereqError("Secondary node %s didn't return free"
12391 " memory information" % node,
12392 errors.ECODE_STATE)
12393 #TODO(dynmem): do the appropriate check involving MINMEM
12394 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12395 raise errors.OpPrereqError("This change will prevent the instance"
12396 " from failover to its secondary node"
12397 " %s, due to not enough memory" % node,
12398 errors.ECODE_STATE)
12400 if self.op.runtime_mem:
12401 remote_info = self.rpc.call_instance_info(instance.primary_node,
12403 instance.hypervisor)
12404 remote_info.Raise("Error checking node %s" % instance.primary_node)
12405 if not remote_info.payload: # not running already
12406 raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12407 errors.ECODE_STATE)
12409 current_memory = remote_info.payload["memory"]
12410 if (not self.op.force and
12411 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12412 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12413 raise errors.OpPrereqError("Instance %s must have memory between %d"
12414 " and %d MB of memory unless --force is"
12415 " given" % (instance.name,
12416 self.be_proposed[constants.BE_MINMEM],
12417 self.be_proposed[constants.BE_MAXMEM]),
12418 errors.ECODE_INVAL)
12420 if self.op.runtime_mem > current_memory:
12421 _CheckNodeFreeMemory(self, instance.primary_node,
12422 "ballooning memory for instance %s" %
12424 self.op.memory - current_memory,
12425 instance.hypervisor)
12427 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12428 raise errors.OpPrereqError("Disk operations not supported for"
12429 " diskless instances",
12430 errors.ECODE_INVAL)
12432 def _PrepareNicCreate(_, params, private):
12433 return self._PrepareNicModification(params, private, None, {},
12436 def _PrepareNicMod(_, nic, params, private):
12437 return self._PrepareNicModification(params, private, nic.ip,
12438 nic.nicparams, cluster, pnode)
12440 # Verify NIC changes (operating on copy)
12441 nics = instance.nics[:]
12442 ApplyContainerMods("NIC", nics, None, self.nicmod,
12443 _PrepareNicCreate, _PrepareNicMod, None)
12444 if len(nics) > constants.MAX_NICS:
12445 raise errors.OpPrereqError("Instance has too many network interfaces"
12446 " (%d), cannot add more" % constants.MAX_NICS,
12447 errors.ECODE_STATE)
12449 # Verify disk changes (operating on a copy)
12450 disks = instance.disks[:]
12451 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12452 if len(disks) > constants.MAX_DISKS:
12453 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12454 " more" % constants.MAX_DISKS,
12455 errors.ECODE_STATE)
12457 if self.op.offline is not None:
12458 if self.op.offline:
12459 msg = "can't change to offline"
12461 msg = "can't change to online"
12462 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12464 # Pre-compute NIC changes (necessary to use result in hooks)
12465 self._nic_chgdesc = []
12467 # Operate on copies as this is still in prereq
12468 nics = [nic.Copy() for nic in instance.nics]
12469 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12470 self._CreateNewNic, self._ApplyNicMods, None)
12471 self._new_nics = nics
12473 self._new_nics = None
12475 def _ConvertPlainToDrbd(self, feedback_fn):
12476 """Converts an instance from plain to drbd.
12479 feedback_fn("Converting template to drbd")
12480 instance = self.instance
12481 pnode = instance.primary_node
12482 snode = self.op.remote_node
12484 assert instance.disk_template == constants.DT_PLAIN
12486 # create a fake disk info for _GenerateDiskTemplate
12487 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12488 constants.IDISK_VG: d.logical_id[0]}
12489 for d in instance.disks]
12490 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12491 instance.name, pnode, [snode],
12492 disk_info, None, None, 0, feedback_fn,
12494 info = _GetInstanceInfoText(instance)
12495 feedback_fn("Creating aditional volumes...")
12496 # first, create the missing data and meta devices
12497 for disk in new_disks:
12498 # unfortunately this is... not too nice
12499 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12501 for child in disk.children:
12502 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12503 # at this stage, all new LVs have been created, we can rename the
12505 feedback_fn("Renaming original volumes...")
12506 rename_list = [(o, n.children[0].logical_id)
12507 for (o, n) in zip(instance.disks, new_disks)]
12508 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12509 result.Raise("Failed to rename original LVs")
12511 feedback_fn("Initializing DRBD devices...")
12512 # all child devices are in place, we can now create the DRBD devices
12513 for disk in new_disks:
12514 for node in [pnode, snode]:
12515 f_create = node == pnode
12516 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12518 # at this point, the instance has been modified
12519 instance.disk_template = constants.DT_DRBD8
12520 instance.disks = new_disks
12521 self.cfg.Update(instance, feedback_fn)
12523 # Release node locks while waiting for sync
12524 _ReleaseLocks(self, locking.LEVEL_NODE)
12526 # disks are created, waiting for sync
12527 disk_abort = not _WaitForSync(self, instance,
12528 oneshot=not self.op.wait_for_sync)
12530 raise errors.OpExecError("There are some degraded disks for"
12531 " this instance, please cleanup manually")
12533 # Node resource locks will be released by caller
12535 def _ConvertDrbdToPlain(self, feedback_fn):
12536 """Converts an instance from drbd to plain.
12539 instance = self.instance
12541 assert len(instance.secondary_nodes) == 1
12542 assert instance.disk_template == constants.DT_DRBD8
12544 pnode = instance.primary_node
12545 snode = instance.secondary_nodes[0]
12546 feedback_fn("Converting template to plain")
12548 old_disks = instance.disks
12549 new_disks = [d.children[0] for d in old_disks]
12551 # copy over size and mode
12552 for parent, child in zip(old_disks, new_disks):
12553 child.size = parent.size
12554 child.mode = parent.mode
12556 # update instance structure
12557 instance.disks = new_disks
12558 instance.disk_template = constants.DT_PLAIN
12559 self.cfg.Update(instance, feedback_fn)
12561 # Release locks in case removing disks takes a while
12562 _ReleaseLocks(self, locking.LEVEL_NODE)
12564 feedback_fn("Removing volumes on the secondary node...")
12565 for disk in old_disks:
12566 self.cfg.SetDiskID(disk, snode)
12567 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12569 self.LogWarning("Could not remove block device %s on node %s,"
12570 " continuing anyway: %s", disk.iv_name, snode, msg)
12572 feedback_fn("Removing unneeded volumes on the primary node...")
12573 for idx, disk in enumerate(old_disks):
12574 meta = disk.children[1]
12575 self.cfg.SetDiskID(meta, pnode)
12576 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12578 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12579 " continuing anyway: %s", idx, pnode, msg)
12581 # this is a DRBD disk, return its port to the pool
12582 for disk in old_disks:
12583 tcp_port = disk.logical_id[2]
12584 self.cfg.AddTcpUdpPort(tcp_port)
12586 # Node resource locks will be released by caller
12588 def _CreateNewDisk(self, idx, params, _):
12589 """Creates a new disk.
12592 instance = self.instance
12595 if instance.disk_template in constants.DTS_FILEBASED:
12596 (file_driver, file_path) = instance.disks[0].logical_id
12597 file_path = os.path.dirname(file_path)
12599 file_driver = file_path = None
12602 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12603 instance.primary_node, instance.secondary_nodes,
12604 [params], file_path, file_driver, idx,
12605 self.Log, self.diskparams)[0]
12607 info = _GetInstanceInfoText(instance)
12609 logging.info("Creating volume %s for instance %s",
12610 disk.iv_name, instance.name)
12611 # Note: this needs to be kept in sync with _CreateDisks
12613 for node in instance.all_nodes:
12614 f_create = (node == instance.primary_node)
12616 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12617 except errors.OpExecError, err:
12618 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12619 disk.iv_name, disk, node, err)
12622 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12626 def _ModifyDisk(idx, disk, params, _):
12627 """Modifies a disk.
12630 disk.mode = params[constants.IDISK_MODE]
12633 ("disk.mode/%d" % idx, disk.mode),
12636 def _RemoveDisk(self, idx, root, _):
12640 for node, disk in root.ComputeNodeTree(self.instance.primary_node):
12641 self.cfg.SetDiskID(disk, node)
12642 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12644 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12645 " continuing anyway", idx, node, msg)
12647 # if this is a DRBD disk, return its port to the pool
12648 if root.dev_type in constants.LDS_DRBD:
12649 self.cfg.AddTcpUdpPort(root.logical_id[2])
12652 def _CreateNewNic(idx, params, private):
12653 """Creates data structure for a new network interface.
12656 mac = params[constants.INIC_MAC]
12657 ip = params.get(constants.INIC_IP, None)
12658 nicparams = private.params
12660 return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12662 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12663 (mac, ip, private.filled[constants.NIC_MODE],
12664 private.filled[constants.NIC_LINK])),
12668 def _ApplyNicMods(idx, nic, params, private):
12669 """Modifies a network interface.
12674 for key in [constants.INIC_MAC, constants.INIC_IP]:
12676 changes.append(("nic.%s/%d" % (key, idx), params[key]))
12677 setattr(nic, key, params[key])
12680 nic.nicparams = private.params
12682 for (key, val) in params.items():
12683 changes.append(("nic.%s/%d" % (key, idx), val))
12687 def Exec(self, feedback_fn):
12688 """Modifies an instance.
12690 All parameters take effect only at the next restart of the instance.
12693 # Process here the warnings from CheckPrereq, as we don't have a
12694 # feedback_fn there.
12695 # TODO: Replace with self.LogWarning
12696 for warn in self.warn:
12697 feedback_fn("WARNING: %s" % warn)
12699 assert ((self.op.disk_template is None) ^
12700 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12701 "Not owning any node resource locks"
12704 instance = self.instance
12707 if self.op.runtime_mem:
12708 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12710 self.op.runtime_mem)
12711 rpcres.Raise("Cannot modify instance runtime memory")
12712 result.append(("runtime_memory", self.op.runtime_mem))
12714 # Apply disk changes
12715 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12716 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12717 _UpdateIvNames(0, instance.disks)
12719 if self.op.disk_template:
12721 check_nodes = set(instance.all_nodes)
12722 if self.op.remote_node:
12723 check_nodes.add(self.op.remote_node)
12724 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12725 owned = self.owned_locks(level)
12726 assert not (check_nodes - owned), \
12727 ("Not owning the correct locks, owning %r, expected at least %r" %
12728 (owned, check_nodes))
12730 r_shut = _ShutdownInstanceDisks(self, instance)
12732 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12733 " proceed with disk template conversion")
12734 mode = (instance.disk_template, self.op.disk_template)
12736 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12738 self.cfg.ReleaseDRBDMinors(instance.name)
12740 result.append(("disk_template", self.op.disk_template))
12742 assert instance.disk_template == self.op.disk_template, \
12743 ("Expected disk template '%s', found '%s'" %
12744 (self.op.disk_template, instance.disk_template))
12746 # Release node and resource locks if there are any (they might already have
12747 # been released during disk conversion)
12748 _ReleaseLocks(self, locking.LEVEL_NODE)
12749 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12751 # Apply NIC changes
12752 if self._new_nics is not None:
12753 instance.nics = self._new_nics
12754 result.extend(self._nic_chgdesc)
12757 if self.op.hvparams:
12758 instance.hvparams = self.hv_inst
12759 for key, val in self.op.hvparams.iteritems():
12760 result.append(("hv/%s" % key, val))
12763 if self.op.beparams:
12764 instance.beparams = self.be_inst
12765 for key, val in self.op.beparams.iteritems():
12766 result.append(("be/%s" % key, val))
12769 if self.op.os_name:
12770 instance.os = self.op.os_name
12773 if self.op.osparams:
12774 instance.osparams = self.os_inst
12775 for key, val in self.op.osparams.iteritems():
12776 result.append(("os/%s" % key, val))
12778 if self.op.offline is None:
12781 elif self.op.offline:
12782 # Mark instance as offline
12783 self.cfg.MarkInstanceOffline(instance.name)
12784 result.append(("admin_state", constants.ADMINST_OFFLINE))
12786 # Mark instance as online, but stopped
12787 self.cfg.MarkInstanceDown(instance.name)
12788 result.append(("admin_state", constants.ADMINST_DOWN))
12790 self.cfg.Update(instance, feedback_fn)
12792 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12793 self.owned_locks(locking.LEVEL_NODE)), \
12794 "All node locks should have been released by now"
12798 _DISK_CONVERSIONS = {
12799 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12800 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12804 class LUInstanceChangeGroup(LogicalUnit):
12805 HPATH = "instance-change-group"
12806 HTYPE = constants.HTYPE_INSTANCE
12809 def ExpandNames(self):
12810 self.share_locks = _ShareAll()
12811 self.needed_locks = {
12812 locking.LEVEL_NODEGROUP: [],
12813 locking.LEVEL_NODE: [],
12816 self._ExpandAndLockInstance()
12818 if self.op.target_groups:
12819 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12820 self.op.target_groups)
12822 self.req_target_uuids = None
12824 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12826 def DeclareLocks(self, level):
12827 if level == locking.LEVEL_NODEGROUP:
12828 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12830 if self.req_target_uuids:
12831 lock_groups = set(self.req_target_uuids)
12833 # Lock all groups used by instance optimistically; this requires going
12834 # via the node before it's locked, requiring verification later on
12835 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12836 lock_groups.update(instance_groups)
12838 # No target groups, need to lock all of them
12839 lock_groups = locking.ALL_SET
12841 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12843 elif level == locking.LEVEL_NODE:
12844 if self.req_target_uuids:
12845 # Lock all nodes used by instances
12846 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12847 self._LockInstancesNodes()
12849 # Lock all nodes in all potential target groups
12850 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12851 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12852 member_nodes = [node_name
12853 for group in lock_groups
12854 for node_name in self.cfg.GetNodeGroup(group).members]
12855 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12857 # Lock all nodes as all groups are potential targets
12858 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12860 def CheckPrereq(self):
12861 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12862 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12863 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12865 assert (self.req_target_uuids is None or
12866 owned_groups.issuperset(self.req_target_uuids))
12867 assert owned_instances == set([self.op.instance_name])
12869 # Get instance information
12870 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12872 # Check if node groups for locked instance are still correct
12873 assert owned_nodes.issuperset(self.instance.all_nodes), \
12874 ("Instance %s's nodes changed while we kept the lock" %
12875 self.op.instance_name)
12877 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12880 if self.req_target_uuids:
12881 # User requested specific target groups
12882 self.target_uuids = self.req_target_uuids
12884 # All groups except those used by the instance are potential targets
12885 self.target_uuids = owned_groups - inst_groups
12887 conflicting_groups = self.target_uuids & inst_groups
12888 if conflicting_groups:
12889 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12890 " used by the instance '%s'" %
12891 (utils.CommaJoin(conflicting_groups),
12892 self.op.instance_name),
12893 errors.ECODE_INVAL)
12895 if not self.target_uuids:
12896 raise errors.OpPrereqError("There are no possible target groups",
12897 errors.ECODE_INVAL)
12899 def BuildHooksEnv(self):
12900 """Build hooks env.
12903 assert self.target_uuids
12906 "TARGET_GROUPS": " ".join(self.target_uuids),
12909 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12913 def BuildHooksNodes(self):
12914 """Build hooks nodes.
12917 mn = self.cfg.GetMasterNode()
12918 return ([mn], [mn])
12920 def Exec(self, feedback_fn):
12921 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12923 assert instances == [self.op.instance_name], "Instance not locked"
12925 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12926 instances=instances, target_groups=list(self.target_uuids))
12928 ial.Run(self.op.iallocator)
12930 if not ial.success:
12931 raise errors.OpPrereqError("Can't compute solution for changing group of"
12932 " instance '%s' using iallocator '%s': %s" %
12933 (self.op.instance_name, self.op.iallocator,
12935 errors.ECODE_NORES)
12937 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12939 self.LogInfo("Iallocator returned %s job(s) for changing group of"
12940 " instance '%s'", len(jobs), self.op.instance_name)
12942 return ResultWithJobs(jobs)
12945 class LUBackupQuery(NoHooksLU):
12946 """Query the exports list
12951 def ExpandNames(self):
12952 self.needed_locks = {}
12953 self.share_locks[locking.LEVEL_NODE] = 1
12954 if not self.op.nodes:
12955 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12957 self.needed_locks[locking.LEVEL_NODE] = \
12958 _GetWantedNodes(self, self.op.nodes)
12960 def Exec(self, feedback_fn):
12961 """Compute the list of all the exported system images.
12964 @return: a dictionary with the structure node->(export-list)
12965 where export-list is a list of the instances exported on
12969 self.nodes = self.owned_locks(locking.LEVEL_NODE)
12970 rpcresult = self.rpc.call_export_list(self.nodes)
12972 for node in rpcresult:
12973 if rpcresult[node].fail_msg:
12974 result[node] = False
12976 result[node] = rpcresult[node].payload
12981 class LUBackupPrepare(NoHooksLU):
12982 """Prepares an instance for an export and returns useful information.
12987 def ExpandNames(self):
12988 self._ExpandAndLockInstance()
12990 def CheckPrereq(self):
12991 """Check prerequisites.
12994 instance_name = self.op.instance_name
12996 self.instance = self.cfg.GetInstanceInfo(instance_name)
12997 assert self.instance is not None, \
12998 "Cannot retrieve locked instance %s" % self.op.instance_name
12999 _CheckNodeOnline(self, self.instance.primary_node)
13001 self._cds = _GetClusterDomainSecret()
13003 def Exec(self, feedback_fn):
13004 """Prepares an instance for an export.
13007 instance = self.instance
13009 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13010 salt = utils.GenerateSecret(8)
13012 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13013 result = self.rpc.call_x509_cert_create(instance.primary_node,
13014 constants.RIE_CERT_VALIDITY)
13015 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13017 (name, cert_pem) = result.payload
13019 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13023 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13024 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13026 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13032 class LUBackupExport(LogicalUnit):
13033 """Export an instance to an image in the cluster.
13036 HPATH = "instance-export"
13037 HTYPE = constants.HTYPE_INSTANCE
13040 def CheckArguments(self):
13041 """Check the arguments.
13044 self.x509_key_name = self.op.x509_key_name
13045 self.dest_x509_ca_pem = self.op.destination_x509_ca
13047 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13048 if not self.x509_key_name:
13049 raise errors.OpPrereqError("Missing X509 key name for encryption",
13050 errors.ECODE_INVAL)
13052 if not self.dest_x509_ca_pem:
13053 raise errors.OpPrereqError("Missing destination X509 CA",
13054 errors.ECODE_INVAL)
13056 def ExpandNames(self):
13057 self._ExpandAndLockInstance()
13059 # Lock all nodes for local exports
13060 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13061 # FIXME: lock only instance primary and destination node
13063 # Sad but true, for now we have do lock all nodes, as we don't know where
13064 # the previous export might be, and in this LU we search for it and
13065 # remove it from its current node. In the future we could fix this by:
13066 # - making a tasklet to search (share-lock all), then create the
13067 # new one, then one to remove, after
13068 # - removing the removal operation altogether
13069 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13071 def DeclareLocks(self, level):
13072 """Last minute lock declaration."""
13073 # All nodes are locked anyway, so nothing to do here.
13075 def BuildHooksEnv(self):
13076 """Build hooks env.
13078 This will run on the master, primary node and target node.
13082 "EXPORT_MODE": self.op.mode,
13083 "EXPORT_NODE": self.op.target_node,
13084 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13085 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13086 # TODO: Generic function for boolean env variables
13087 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13090 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13094 def BuildHooksNodes(self):
13095 """Build hooks nodes.
13098 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13100 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13101 nl.append(self.op.target_node)
13105 def CheckPrereq(self):
13106 """Check prerequisites.
13108 This checks that the instance and node names are valid.
13111 instance_name = self.op.instance_name
13113 self.instance = self.cfg.GetInstanceInfo(instance_name)
13114 assert self.instance is not None, \
13115 "Cannot retrieve locked instance %s" % self.op.instance_name
13116 _CheckNodeOnline(self, self.instance.primary_node)
13118 if (self.op.remove_instance and
13119 self.instance.admin_state == constants.ADMINST_UP and
13120 not self.op.shutdown):
13121 raise errors.OpPrereqError("Can not remove instance without shutting it"
13124 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13125 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13126 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13127 assert self.dst_node is not None
13129 _CheckNodeOnline(self, self.dst_node.name)
13130 _CheckNodeNotDrained(self, self.dst_node.name)
13133 self.dest_disk_info = None
13134 self.dest_x509_ca = None
13136 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13137 self.dst_node = None
13139 if len(self.op.target_node) != len(self.instance.disks):
13140 raise errors.OpPrereqError(("Received destination information for %s"
13141 " disks, but instance %s has %s disks") %
13142 (len(self.op.target_node), instance_name,
13143 len(self.instance.disks)),
13144 errors.ECODE_INVAL)
13146 cds = _GetClusterDomainSecret()
13148 # Check X509 key name
13150 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13151 except (TypeError, ValueError), err:
13152 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13154 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13155 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13156 errors.ECODE_INVAL)
13158 # Load and verify CA
13160 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13161 except OpenSSL.crypto.Error, err:
13162 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13163 (err, ), errors.ECODE_INVAL)
13165 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13166 if errcode is not None:
13167 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13168 (msg, ), errors.ECODE_INVAL)
13170 self.dest_x509_ca = cert
13172 # Verify target information
13174 for idx, disk_data in enumerate(self.op.target_node):
13176 (host, port, magic) = \
13177 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13178 except errors.GenericError, err:
13179 raise errors.OpPrereqError("Target info for disk %s: %s" %
13180 (idx, err), errors.ECODE_INVAL)
13182 disk_info.append((host, port, magic))
13184 assert len(disk_info) == len(self.op.target_node)
13185 self.dest_disk_info = disk_info
13188 raise errors.ProgrammerError("Unhandled export mode %r" %
13191 # instance disk type verification
13192 # TODO: Implement export support for file-based disks
13193 for disk in self.instance.disks:
13194 if disk.dev_type == constants.LD_FILE:
13195 raise errors.OpPrereqError("Export not supported for instances with"
13196 " file-based disks", errors.ECODE_INVAL)
13198 def _CleanupExports(self, feedback_fn):
13199 """Removes exports of current instance from all other nodes.
13201 If an instance in a cluster with nodes A..D was exported to node C, its
13202 exports will be removed from the nodes A, B and D.
13205 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13207 nodelist = self.cfg.GetNodeList()
13208 nodelist.remove(self.dst_node.name)
13210 # on one-node clusters nodelist will be empty after the removal
13211 # if we proceed the backup would be removed because OpBackupQuery
13212 # substitutes an empty list with the full cluster node list.
13213 iname = self.instance.name
13215 feedback_fn("Removing old exports for instance %s" % iname)
13216 exportlist = self.rpc.call_export_list(nodelist)
13217 for node in exportlist:
13218 if exportlist[node].fail_msg:
13220 if iname in exportlist[node].payload:
13221 msg = self.rpc.call_export_remove(node, iname).fail_msg
13223 self.LogWarning("Could not remove older export for instance %s"
13224 " on node %s: %s", iname, node, msg)
13226 def Exec(self, feedback_fn):
13227 """Export an instance to an image in the cluster.
13230 assert self.op.mode in constants.EXPORT_MODES
13232 instance = self.instance
13233 src_node = instance.primary_node
13235 if self.op.shutdown:
13236 # shutdown the instance, but not the disks
13237 feedback_fn("Shutting down instance %s" % instance.name)
13238 result = self.rpc.call_instance_shutdown(src_node, instance,
13239 self.op.shutdown_timeout)
13240 # TODO: Maybe ignore failures if ignore_remove_failures is set
13241 result.Raise("Could not shutdown instance %s on"
13242 " node %s" % (instance.name, src_node))
13244 # set the disks ID correctly since call_instance_start needs the
13245 # correct drbd minor to create the symlinks
13246 for disk in instance.disks:
13247 self.cfg.SetDiskID(disk, src_node)
13249 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13252 # Activate the instance disks if we'exporting a stopped instance
13253 feedback_fn("Activating disks for %s" % instance.name)
13254 _StartInstanceDisks(self, instance, None)
13257 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13260 helper.CreateSnapshots()
13262 if (self.op.shutdown and
13263 instance.admin_state == constants.ADMINST_UP and
13264 not self.op.remove_instance):
13265 assert not activate_disks
13266 feedback_fn("Starting instance %s" % instance.name)
13267 result = self.rpc.call_instance_start(src_node,
13268 (instance, None, None), False)
13269 msg = result.fail_msg
13271 feedback_fn("Failed to start instance: %s" % msg)
13272 _ShutdownInstanceDisks(self, instance)
13273 raise errors.OpExecError("Could not start instance: %s" % msg)
13275 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13276 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13277 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13278 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13279 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13281 (key_name, _, _) = self.x509_key_name
13284 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13287 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13288 key_name, dest_ca_pem,
13293 # Check for backwards compatibility
13294 assert len(dresults) == len(instance.disks)
13295 assert compat.all(isinstance(i, bool) for i in dresults), \
13296 "Not all results are boolean: %r" % dresults
13300 feedback_fn("Deactivating disks for %s" % instance.name)
13301 _ShutdownInstanceDisks(self, instance)
13303 if not (compat.all(dresults) and fin_resu):
13306 failures.append("export finalization")
13307 if not compat.all(dresults):
13308 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13310 failures.append("disk export: disk(s) %s" % fdsk)
13312 raise errors.OpExecError("Export failed, errors in %s" %
13313 utils.CommaJoin(failures))
13315 # At this point, the export was successful, we can cleanup/finish
13317 # Remove instance if requested
13318 if self.op.remove_instance:
13319 feedback_fn("Removing instance %s" % instance.name)
13320 _RemoveInstance(self, feedback_fn, instance,
13321 self.op.ignore_remove_failures)
13323 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13324 self._CleanupExports(feedback_fn)
13326 return fin_resu, dresults
13329 class LUBackupRemove(NoHooksLU):
13330 """Remove exports related to the named instance.
13335 def ExpandNames(self):
13336 self.needed_locks = {}
13337 # We need all nodes to be locked in order for RemoveExport to work, but we
13338 # don't need to lock the instance itself, as nothing will happen to it (and
13339 # we can remove exports also for a removed instance)
13340 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13342 def Exec(self, feedback_fn):
13343 """Remove any export.
13346 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13347 # If the instance was not found we'll try with the name that was passed in.
13348 # This will only work if it was an FQDN, though.
13350 if not instance_name:
13352 instance_name = self.op.instance_name
13354 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13355 exportlist = self.rpc.call_export_list(locked_nodes)
13357 for node in exportlist:
13358 msg = exportlist[node].fail_msg
13360 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13362 if instance_name in exportlist[node].payload:
13364 result = self.rpc.call_export_remove(node, instance_name)
13365 msg = result.fail_msg
13367 logging.error("Could not remove export for instance %s"
13368 " on node %s: %s", instance_name, node, msg)
13370 if fqdn_warn and not found:
13371 feedback_fn("Export not found. If trying to remove an export belonging"
13372 " to a deleted instance please use its Fully Qualified"
13376 class LUGroupAdd(LogicalUnit):
13377 """Logical unit for creating node groups.
13380 HPATH = "group-add"
13381 HTYPE = constants.HTYPE_GROUP
13384 def ExpandNames(self):
13385 # We need the new group's UUID here so that we can create and acquire the
13386 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13387 # that it should not check whether the UUID exists in the configuration.
13388 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13389 self.needed_locks = {}
13390 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13392 def CheckPrereq(self):
13393 """Check prerequisites.
13395 This checks that the given group name is not an existing node group
13400 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13401 except errors.OpPrereqError:
13404 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13405 " node group (UUID: %s)" %
13406 (self.op.group_name, existing_uuid),
13407 errors.ECODE_EXISTS)
13409 if self.op.ndparams:
13410 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13412 if self.op.hv_state:
13413 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13415 self.new_hv_state = None
13417 if self.op.disk_state:
13418 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13420 self.new_disk_state = None
13422 if self.op.diskparams:
13423 for templ in constants.DISK_TEMPLATES:
13424 if templ not in self.op.diskparams:
13425 self.op.diskparams[templ] = {}
13426 utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
13428 self.op.diskparams = self.cfg.GetClusterInfo().diskparams
13430 if self.op.ipolicy:
13431 cluster = self.cfg.GetClusterInfo()
13432 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13434 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
13435 except errors.ConfigurationError, err:
13436 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13437 errors.ECODE_INVAL)
13439 def BuildHooksEnv(self):
13440 """Build hooks env.
13444 "GROUP_NAME": self.op.group_name,
13447 def BuildHooksNodes(self):
13448 """Build hooks nodes.
13451 mn = self.cfg.GetMasterNode()
13452 return ([mn], [mn])
13454 def Exec(self, feedback_fn):
13455 """Add the node group to the cluster.
13458 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13459 uuid=self.group_uuid,
13460 alloc_policy=self.op.alloc_policy,
13461 ndparams=self.op.ndparams,
13462 diskparams=self.op.diskparams,
13463 ipolicy=self.op.ipolicy,
13464 hv_state_static=self.new_hv_state,
13465 disk_state_static=self.new_disk_state)
13467 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13468 del self.remove_locks[locking.LEVEL_NODEGROUP]
13471 class LUGroupAssignNodes(NoHooksLU):
13472 """Logical unit for assigning nodes to groups.
13477 def ExpandNames(self):
13478 # These raise errors.OpPrereqError on their own:
13479 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13480 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13482 # We want to lock all the affected nodes and groups. We have readily
13483 # available the list of nodes, and the *destination* group. To gather the
13484 # list of "source" groups, we need to fetch node information later on.
13485 self.needed_locks = {
13486 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13487 locking.LEVEL_NODE: self.op.nodes,
13490 def DeclareLocks(self, level):
13491 if level == locking.LEVEL_NODEGROUP:
13492 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13494 # Try to get all affected nodes' groups without having the group or node
13495 # lock yet. Needs verification later in the code flow.
13496 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13498 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13500 def CheckPrereq(self):
13501 """Check prerequisites.
13504 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13505 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13506 frozenset(self.op.nodes))
13508 expected_locks = (set([self.group_uuid]) |
13509 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13510 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13511 if actual_locks != expected_locks:
13512 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13513 " current groups are '%s', used to be '%s'" %
13514 (utils.CommaJoin(expected_locks),
13515 utils.CommaJoin(actual_locks)))
13517 self.node_data = self.cfg.GetAllNodesInfo()
13518 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13519 instance_data = self.cfg.GetAllInstancesInfo()
13521 if self.group is None:
13522 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13523 (self.op.group_name, self.group_uuid))
13525 (new_splits, previous_splits) = \
13526 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13527 for node in self.op.nodes],
13528 self.node_data, instance_data)
13531 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13533 if not self.op.force:
13534 raise errors.OpExecError("The following instances get split by this"
13535 " change and --force was not given: %s" %
13538 self.LogWarning("This operation will split the following instances: %s",
13541 if previous_splits:
13542 self.LogWarning("In addition, these already-split instances continue"
13543 " to be split across groups: %s",
13544 utils.CommaJoin(utils.NiceSort(previous_splits)))
13546 def Exec(self, feedback_fn):
13547 """Assign nodes to a new group.
13550 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13552 self.cfg.AssignGroupNodes(mods)
13555 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13556 """Check for split instances after a node assignment.
13558 This method considers a series of node assignments as an atomic operation,
13559 and returns information about split instances after applying the set of
13562 In particular, it returns information about newly split instances, and
13563 instances that were already split, and remain so after the change.
13565 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13568 @type changes: list of (node_name, new_group_uuid) pairs.
13569 @param changes: list of node assignments to consider.
13570 @param node_data: a dict with data for all nodes
13571 @param instance_data: a dict with all instances to consider
13572 @rtype: a two-tuple
13573 @return: a list of instances that were previously okay and result split as a
13574 consequence of this change, and a list of instances that were previously
13575 split and this change does not fix.
13578 changed_nodes = dict((node, group) for node, group in changes
13579 if node_data[node].group != group)
13581 all_split_instances = set()
13582 previously_split_instances = set()
13584 def InstanceNodes(instance):
13585 return [instance.primary_node] + list(instance.secondary_nodes)
13587 for inst in instance_data.values():
13588 if inst.disk_template not in constants.DTS_INT_MIRROR:
13591 instance_nodes = InstanceNodes(inst)
13593 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13594 previously_split_instances.add(inst.name)
13596 if len(set(changed_nodes.get(node, node_data[node].group)
13597 for node in instance_nodes)) > 1:
13598 all_split_instances.add(inst.name)
13600 return (list(all_split_instances - previously_split_instances),
13601 list(previously_split_instances & all_split_instances))
13604 class _GroupQuery(_QueryBase):
13605 FIELDS = query.GROUP_FIELDS
13607 def ExpandNames(self, lu):
13608 lu.needed_locks = {}
13610 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13611 self._cluster = lu.cfg.GetClusterInfo()
13612 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13615 self.wanted = [name_to_uuid[name]
13616 for name in utils.NiceSort(name_to_uuid.keys())]
13618 # Accept names to be either names or UUIDs.
13621 all_uuid = frozenset(self._all_groups.keys())
13623 for name in self.names:
13624 if name in all_uuid:
13625 self.wanted.append(name)
13626 elif name in name_to_uuid:
13627 self.wanted.append(name_to_uuid[name])
13629 missing.append(name)
13632 raise errors.OpPrereqError("Some groups do not exist: %s" %
13633 utils.CommaJoin(missing),
13634 errors.ECODE_NOENT)
13636 def DeclareLocks(self, lu, level):
13639 def _GetQueryData(self, lu):
13640 """Computes the list of node groups and their attributes.
13643 do_nodes = query.GQ_NODE in self.requested_data
13644 do_instances = query.GQ_INST in self.requested_data
13646 group_to_nodes = None
13647 group_to_instances = None
13649 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13650 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13651 # latter GetAllInstancesInfo() is not enough, for we have to go through
13652 # instance->node. Hence, we will need to process nodes even if we only need
13653 # instance information.
13654 if do_nodes or do_instances:
13655 all_nodes = lu.cfg.GetAllNodesInfo()
13656 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13659 for node in all_nodes.values():
13660 if node.group in group_to_nodes:
13661 group_to_nodes[node.group].append(node.name)
13662 node_to_group[node.name] = node.group
13665 all_instances = lu.cfg.GetAllInstancesInfo()
13666 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13668 for instance in all_instances.values():
13669 node = instance.primary_node
13670 if node in node_to_group:
13671 group_to_instances[node_to_group[node]].append(instance.name)
13674 # Do not pass on node information if it was not requested.
13675 group_to_nodes = None
13677 return query.GroupQueryData(self._cluster,
13678 [self._all_groups[uuid]
13679 for uuid in self.wanted],
13680 group_to_nodes, group_to_instances)
13683 class LUGroupQuery(NoHooksLU):
13684 """Logical unit for querying node groups.
13689 def CheckArguments(self):
13690 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13691 self.op.output_fields, False)
13693 def ExpandNames(self):
13694 self.gq.ExpandNames(self)
13696 def DeclareLocks(self, level):
13697 self.gq.DeclareLocks(self, level)
13699 def Exec(self, feedback_fn):
13700 return self.gq.OldStyleQuery(self)
13703 class LUGroupSetParams(LogicalUnit):
13704 """Modifies the parameters of a node group.
13707 HPATH = "group-modify"
13708 HTYPE = constants.HTYPE_GROUP
13711 def CheckArguments(self):
13714 self.op.diskparams,
13715 self.op.alloc_policy,
13717 self.op.disk_state,
13721 if all_changes.count(None) == len(all_changes):
13722 raise errors.OpPrereqError("Please pass at least one modification",
13723 errors.ECODE_INVAL)
13725 def ExpandNames(self):
13726 # This raises errors.OpPrereqError on its own:
13727 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13729 self.needed_locks = {
13730 locking.LEVEL_INSTANCE: [],
13731 locking.LEVEL_NODEGROUP: [self.group_uuid],
13734 self.share_locks[locking.LEVEL_INSTANCE] = 1
13736 def DeclareLocks(self, level):
13737 if level == locking.LEVEL_INSTANCE:
13738 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13740 # Lock instances optimistically, needs verification once group lock has
13742 self.needed_locks[locking.LEVEL_INSTANCE] = \
13743 self.cfg.GetNodeGroupInstances(self.group_uuid)
13745 def CheckPrereq(self):
13746 """Check prerequisites.
13749 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13751 # Check if locked instances are still correct
13752 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13754 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13755 cluster = self.cfg.GetClusterInfo()
13757 if self.group is None:
13758 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13759 (self.op.group_name, self.group_uuid))
13761 if self.op.ndparams:
13762 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13763 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13764 self.new_ndparams = new_ndparams
13766 if self.op.diskparams:
13767 self.new_diskparams = dict()
13768 for templ in constants.DISK_TEMPLATES:
13769 if templ not in self.op.diskparams:
13770 self.op.diskparams[templ] = {}
13771 new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13772 self.op.diskparams[templ])
13773 utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13774 self.new_diskparams[templ] = new_templ_params
13776 if self.op.hv_state:
13777 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13778 self.group.hv_state_static)
13780 if self.op.disk_state:
13781 self.new_disk_state = \
13782 _MergeAndVerifyDiskState(self.op.disk_state,
13783 self.group.disk_state_static)
13785 if self.op.ipolicy:
13786 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
13790 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
13791 inst_filter = lambda inst: inst.name in owned_instances
13792 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
13794 _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
13796 new_ipolicy, instances)
13799 self.LogWarning("After the ipolicy change the following instances"
13800 " violate them: %s",
13801 utils.CommaJoin(violations))
13803 def BuildHooksEnv(self):
13804 """Build hooks env.
13808 "GROUP_NAME": self.op.group_name,
13809 "NEW_ALLOC_POLICY": self.op.alloc_policy,
13812 def BuildHooksNodes(self):
13813 """Build hooks nodes.
13816 mn = self.cfg.GetMasterNode()
13817 return ([mn], [mn])
13819 def Exec(self, feedback_fn):
13820 """Modifies the node group.
13825 if self.op.ndparams:
13826 self.group.ndparams = self.new_ndparams
13827 result.append(("ndparams", str(self.group.ndparams)))
13829 if self.op.diskparams:
13830 self.group.diskparams = self.new_diskparams
13831 result.append(("diskparams", str(self.group.diskparams)))
13833 if self.op.alloc_policy:
13834 self.group.alloc_policy = self.op.alloc_policy
13836 if self.op.hv_state:
13837 self.group.hv_state_static = self.new_hv_state
13839 if self.op.disk_state:
13840 self.group.disk_state_static = self.new_disk_state
13842 if self.op.ipolicy:
13843 self.group.ipolicy = self.new_ipolicy
13845 self.cfg.Update(self.group, feedback_fn)
13849 class LUGroupRemove(LogicalUnit):
13850 HPATH = "group-remove"
13851 HTYPE = constants.HTYPE_GROUP
13854 def ExpandNames(self):
13855 # This will raises errors.OpPrereqError on its own:
13856 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13857 self.needed_locks = {
13858 locking.LEVEL_NODEGROUP: [self.group_uuid],
13861 def CheckPrereq(self):
13862 """Check prerequisites.
13864 This checks that the given group name exists as a node group, that is
13865 empty (i.e., contains no nodes), and that is not the last group of the
13869 # Verify that the group is empty.
13870 group_nodes = [node.name
13871 for node in self.cfg.GetAllNodesInfo().values()
13872 if node.group == self.group_uuid]
13875 raise errors.OpPrereqError("Group '%s' not empty, has the following"
13877 (self.op.group_name,
13878 utils.CommaJoin(utils.NiceSort(group_nodes))),
13879 errors.ECODE_STATE)
13881 # Verify the cluster would not be left group-less.
13882 if len(self.cfg.GetNodeGroupList()) == 1:
13883 raise errors.OpPrereqError("Group '%s' is the only group,"
13884 " cannot be removed" %
13885 self.op.group_name,
13886 errors.ECODE_STATE)
13888 def BuildHooksEnv(self):
13889 """Build hooks env.
13893 "GROUP_NAME": self.op.group_name,
13896 def BuildHooksNodes(self):
13897 """Build hooks nodes.
13900 mn = self.cfg.GetMasterNode()
13901 return ([mn], [mn])
13903 def Exec(self, feedback_fn):
13904 """Remove the node group.
13908 self.cfg.RemoveNodeGroup(self.group_uuid)
13909 except errors.ConfigurationError:
13910 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13911 (self.op.group_name, self.group_uuid))
13913 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13916 class LUGroupRename(LogicalUnit):
13917 HPATH = "group-rename"
13918 HTYPE = constants.HTYPE_GROUP
13921 def ExpandNames(self):
13922 # This raises errors.OpPrereqError on its own:
13923 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13925 self.needed_locks = {
13926 locking.LEVEL_NODEGROUP: [self.group_uuid],
13929 def CheckPrereq(self):
13930 """Check prerequisites.
13932 Ensures requested new name is not yet used.
13936 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13937 except errors.OpPrereqError:
13940 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13941 " node group (UUID: %s)" %
13942 (self.op.new_name, new_name_uuid),
13943 errors.ECODE_EXISTS)
13945 def BuildHooksEnv(self):
13946 """Build hooks env.
13950 "OLD_NAME": self.op.group_name,
13951 "NEW_NAME": self.op.new_name,
13954 def BuildHooksNodes(self):
13955 """Build hooks nodes.
13958 mn = self.cfg.GetMasterNode()
13960 all_nodes = self.cfg.GetAllNodesInfo()
13961 all_nodes.pop(mn, None)
13964 run_nodes.extend(node.name for node in all_nodes.values()
13965 if node.group == self.group_uuid)
13967 return (run_nodes, run_nodes)
13969 def Exec(self, feedback_fn):
13970 """Rename the node group.
13973 group = self.cfg.GetNodeGroup(self.group_uuid)
13976 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13977 (self.op.group_name, self.group_uuid))
13979 group.name = self.op.new_name
13980 self.cfg.Update(group, feedback_fn)
13982 return self.op.new_name
13985 class LUGroupEvacuate(LogicalUnit):
13986 HPATH = "group-evacuate"
13987 HTYPE = constants.HTYPE_GROUP
13990 def ExpandNames(self):
13991 # This raises errors.OpPrereqError on its own:
13992 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13994 if self.op.target_groups:
13995 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13996 self.op.target_groups)
13998 self.req_target_uuids = []
14000 if self.group_uuid in self.req_target_uuids:
14001 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14002 " as a target group (targets are %s)" %
14004 utils.CommaJoin(self.req_target_uuids)),
14005 errors.ECODE_INVAL)
14007 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14009 self.share_locks = _ShareAll()
14010 self.needed_locks = {
14011 locking.LEVEL_INSTANCE: [],
14012 locking.LEVEL_NODEGROUP: [],
14013 locking.LEVEL_NODE: [],
14016 def DeclareLocks(self, level):
14017 if level == locking.LEVEL_INSTANCE:
14018 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14020 # Lock instances optimistically, needs verification once node and group
14021 # locks have been acquired
14022 self.needed_locks[locking.LEVEL_INSTANCE] = \
14023 self.cfg.GetNodeGroupInstances(self.group_uuid)
14025 elif level == locking.LEVEL_NODEGROUP:
14026 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14028 if self.req_target_uuids:
14029 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14031 # Lock all groups used by instances optimistically; this requires going
14032 # via the node before it's locked, requiring verification later on
14033 lock_groups.update(group_uuid
14034 for instance_name in
14035 self.owned_locks(locking.LEVEL_INSTANCE)
14037 self.cfg.GetInstanceNodeGroups(instance_name))
14039 # No target groups, need to lock all of them
14040 lock_groups = locking.ALL_SET
14042 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14044 elif level == locking.LEVEL_NODE:
14045 # This will only lock the nodes in the group to be evacuated which
14046 # contain actual instances
14047 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14048 self._LockInstancesNodes()
14050 # Lock all nodes in group to be evacuated and target groups
14051 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14052 assert self.group_uuid in owned_groups
14053 member_nodes = [node_name
14054 for group in owned_groups
14055 for node_name in self.cfg.GetNodeGroup(group).members]
14056 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14058 def CheckPrereq(self):
14059 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14060 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14061 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14063 assert owned_groups.issuperset(self.req_target_uuids)
14064 assert self.group_uuid in owned_groups
14066 # Check if locked instances are still correct
14067 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14069 # Get instance information
14070 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14072 # Check if node groups for locked instances are still correct
14073 for instance_name in owned_instances:
14074 inst = self.instances[instance_name]
14075 assert owned_nodes.issuperset(inst.all_nodes), \
14076 "Instance %s's nodes changed while we kept the lock" % instance_name
14078 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
14081 assert self.group_uuid in inst_groups, \
14082 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
14084 if self.req_target_uuids:
14085 # User requested specific target groups
14086 self.target_uuids = self.req_target_uuids
14088 # All groups except the one to be evacuated are potential targets
14089 self.target_uuids = [group_uuid for group_uuid in owned_groups
14090 if group_uuid != self.group_uuid]
14092 if not self.target_uuids:
14093 raise errors.OpPrereqError("There are no possible target groups",
14094 errors.ECODE_INVAL)
14096 def BuildHooksEnv(self):
14097 """Build hooks env.
14101 "GROUP_NAME": self.op.group_name,
14102 "TARGET_GROUPS": " ".join(self.target_uuids),
14105 def BuildHooksNodes(self):
14106 """Build hooks nodes.
14109 mn = self.cfg.GetMasterNode()
14111 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14113 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14115 return (run_nodes, run_nodes)
14117 def Exec(self, feedback_fn):
14118 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14120 assert self.group_uuid not in self.target_uuids
14122 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14123 instances=instances, target_groups=self.target_uuids)
14125 ial.Run(self.op.iallocator)
14127 if not ial.success:
14128 raise errors.OpPrereqError("Can't compute group evacuation using"
14129 " iallocator '%s': %s" %
14130 (self.op.iallocator, ial.info),
14131 errors.ECODE_NORES)
14133 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14135 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14136 len(jobs), self.op.group_name)
14138 return ResultWithJobs(jobs)
14141 class TagsLU(NoHooksLU): # pylint: disable=W0223
14142 """Generic tags LU.
14144 This is an abstract class which is the parent of all the other tags LUs.
14147 def ExpandNames(self):
14148 self.group_uuid = None
14149 self.needed_locks = {}
14150 if self.op.kind == constants.TAG_NODE:
14151 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14152 self.needed_locks[locking.LEVEL_NODE] = self.op.name
14153 elif self.op.kind == constants.TAG_INSTANCE:
14154 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14155 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
14156 elif self.op.kind == constants.TAG_NODEGROUP:
14157 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14159 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14160 # not possible to acquire the BGL based on opcode parameters)
14162 def CheckPrereq(self):
14163 """Check prerequisites.
14166 if self.op.kind == constants.TAG_CLUSTER:
14167 self.target = self.cfg.GetClusterInfo()
14168 elif self.op.kind == constants.TAG_NODE:
14169 self.target = self.cfg.GetNodeInfo(self.op.name)
14170 elif self.op.kind == constants.TAG_INSTANCE:
14171 self.target = self.cfg.GetInstanceInfo(self.op.name)
14172 elif self.op.kind == constants.TAG_NODEGROUP:
14173 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14175 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14176 str(self.op.kind), errors.ECODE_INVAL)
14179 class LUTagsGet(TagsLU):
14180 """Returns the tags of a given object.
14185 def ExpandNames(self):
14186 TagsLU.ExpandNames(self)
14188 # Share locks as this is only a read operation
14189 self.share_locks = _ShareAll()
14191 def Exec(self, feedback_fn):
14192 """Returns the tag list.
14195 return list(self.target.GetTags())
14198 class LUTagsSearch(NoHooksLU):
14199 """Searches the tags for a given pattern.
14204 def ExpandNames(self):
14205 self.needed_locks = {}
14207 def CheckPrereq(self):
14208 """Check prerequisites.
14210 This checks the pattern passed for validity by compiling it.
14214 self.re = re.compile(self.op.pattern)
14215 except re.error, err:
14216 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14217 (self.op.pattern, err), errors.ECODE_INVAL)
14219 def Exec(self, feedback_fn):
14220 """Returns the tag list.
14224 tgts = [("/cluster", cfg.GetClusterInfo())]
14225 ilist = cfg.GetAllInstancesInfo().values()
14226 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14227 nlist = cfg.GetAllNodesInfo().values()
14228 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14229 tgts.extend(("/nodegroup/%s" % n.name, n)
14230 for n in cfg.GetAllNodeGroupsInfo().values())
14232 for path, target in tgts:
14233 for tag in target.GetTags():
14234 if self.re.search(tag):
14235 results.append((path, tag))
14239 class LUTagsSet(TagsLU):
14240 """Sets a tag on a given object.
14245 def CheckPrereq(self):
14246 """Check prerequisites.
14248 This checks the type and length of the tag name and value.
14251 TagsLU.CheckPrereq(self)
14252 for tag in self.op.tags:
14253 objects.TaggableObject.ValidateTag(tag)
14255 def Exec(self, feedback_fn):
14260 for tag in self.op.tags:
14261 self.target.AddTag(tag)
14262 except errors.TagError, err:
14263 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14264 self.cfg.Update(self.target, feedback_fn)
14267 class LUTagsDel(TagsLU):
14268 """Delete a list of tags from a given object.
14273 def CheckPrereq(self):
14274 """Check prerequisites.
14276 This checks that we have the given tag.
14279 TagsLU.CheckPrereq(self)
14280 for tag in self.op.tags:
14281 objects.TaggableObject.ValidateTag(tag)
14282 del_tags = frozenset(self.op.tags)
14283 cur_tags = self.target.GetTags()
14285 diff_tags = del_tags - cur_tags
14287 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14288 raise errors.OpPrereqError("Tag(s) %s not found" %
14289 (utils.CommaJoin(diff_names), ),
14290 errors.ECODE_NOENT)
14292 def Exec(self, feedback_fn):
14293 """Remove the tag from the object.
14296 for tag in self.op.tags:
14297 self.target.RemoveTag(tag)
14298 self.cfg.Update(self.target, feedback_fn)
14301 class LUTestDelay(NoHooksLU):
14302 """Sleep for a specified amount of time.
14304 This LU sleeps on the master and/or nodes for a specified amount of
14310 def ExpandNames(self):
14311 """Expand names and set required locks.
14313 This expands the node list, if any.
14316 self.needed_locks = {}
14317 if self.op.on_nodes:
14318 # _GetWantedNodes can be used here, but is not always appropriate to use
14319 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14320 # more information.
14321 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14322 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14324 def _TestDelay(self):
14325 """Do the actual sleep.
14328 if self.op.on_master:
14329 if not utils.TestDelay(self.op.duration):
14330 raise errors.OpExecError("Error during master delay test")
14331 if self.op.on_nodes:
14332 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14333 for node, node_result in result.items():
14334 node_result.Raise("Failure during rpc call to node %s" % node)
14336 def Exec(self, feedback_fn):
14337 """Execute the test delay opcode, with the wanted repetitions.
14340 if self.op.repeat == 0:
14343 top_value = self.op.repeat - 1
14344 for i in range(self.op.repeat):
14345 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14349 class LUTestJqueue(NoHooksLU):
14350 """Utility LU to test some aspects of the job queue.
14355 # Must be lower than default timeout for WaitForJobChange to see whether it
14356 # notices changed jobs
14357 _CLIENT_CONNECT_TIMEOUT = 20.0
14358 _CLIENT_CONFIRM_TIMEOUT = 60.0
14361 def _NotifyUsingSocket(cls, cb, errcls):
14362 """Opens a Unix socket and waits for another program to connect.
14365 @param cb: Callback to send socket name to client
14366 @type errcls: class
14367 @param errcls: Exception class to use for errors
14370 # Using a temporary directory as there's no easy way to create temporary
14371 # sockets without writing a custom loop around tempfile.mktemp and
14373 tmpdir = tempfile.mkdtemp()
14375 tmpsock = utils.PathJoin(tmpdir, "sock")
14377 logging.debug("Creating temporary socket at %s", tmpsock)
14378 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14383 # Send details to client
14386 # Wait for client to connect before continuing
14387 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14389 (conn, _) = sock.accept()
14390 except socket.error, err:
14391 raise errcls("Client didn't connect in time (%s)" % err)
14395 # Remove as soon as client is connected
14396 shutil.rmtree(tmpdir)
14398 # Wait for client to close
14401 # pylint: disable=E1101
14402 # Instance of '_socketobject' has no ... member
14403 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14405 except socket.error, err:
14406 raise errcls("Client failed to confirm notification (%s)" % err)
14410 def _SendNotification(self, test, arg, sockname):
14411 """Sends a notification to the client.
14414 @param test: Test name
14415 @param arg: Test argument (depends on test)
14416 @type sockname: string
14417 @param sockname: Socket path
14420 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14422 def _Notify(self, prereq, test, arg):
14423 """Notifies the client of a test.
14426 @param prereq: Whether this is a prereq-phase test
14428 @param test: Test name
14429 @param arg: Test argument (depends on test)
14433 errcls = errors.OpPrereqError
14435 errcls = errors.OpExecError
14437 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14441 def CheckArguments(self):
14442 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14443 self.expandnames_calls = 0
14445 def ExpandNames(self):
14446 checkargs_calls = getattr(self, "checkargs_calls", 0)
14447 if checkargs_calls < 1:
14448 raise errors.ProgrammerError("CheckArguments was not called")
14450 self.expandnames_calls += 1
14452 if self.op.notify_waitlock:
14453 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14455 self.LogInfo("Expanding names")
14457 # Get lock on master node (just to get a lock, not for a particular reason)
14458 self.needed_locks = {
14459 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14462 def Exec(self, feedback_fn):
14463 if self.expandnames_calls < 1:
14464 raise errors.ProgrammerError("ExpandNames was not called")
14466 if self.op.notify_exec:
14467 self._Notify(False, constants.JQT_EXEC, None)
14469 self.LogInfo("Executing")
14471 if self.op.log_messages:
14472 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14473 for idx, msg in enumerate(self.op.log_messages):
14474 self.LogInfo("Sending log message %s", idx + 1)
14475 feedback_fn(constants.JQT_MSGPREFIX + msg)
14476 # Report how many test messages have been sent
14477 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14480 raise errors.OpExecError("Opcode failure was requested")
14485 class IAllocator(object):
14486 """IAllocator framework.
14488 An IAllocator instance has three sets of attributes:
14489 - cfg that is needed to query the cluster
14490 - input data (all members of the _KEYS class attribute are required)
14491 - four buffer attributes (in|out_data|text), that represent the
14492 input (to the external script) in text and data structure format,
14493 and the output from it, again in two formats
14494 - the result variables from the script (success, info, nodes) for
14498 # pylint: disable=R0902
14499 # lots of instance attributes
14501 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14503 self.rpc = rpc_runner
14504 # init buffer variables
14505 self.in_text = self.out_text = self.in_data = self.out_data = None
14506 # init all input fields so that pylint is happy
14508 self.memory = self.disks = self.disk_template = None
14509 self.os = self.tags = self.nics = self.vcpus = None
14510 self.hypervisor = None
14511 self.relocate_from = None
14513 self.instances = None
14514 self.evac_mode = None
14515 self.target_groups = []
14517 self.required_nodes = None
14518 # init result fields
14519 self.success = self.info = self.result = None
14522 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14524 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14525 " IAllocator" % self.mode)
14527 keyset = [n for (n, _) in keydata]
14530 if key not in keyset:
14531 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14532 " IAllocator" % key)
14533 setattr(self, key, kwargs[key])
14536 if key not in kwargs:
14537 raise errors.ProgrammerError("Missing input parameter '%s' to"
14538 " IAllocator" % key)
14539 self._BuildInputData(compat.partial(fn, self), keydata)
14541 def _ComputeClusterData(self):
14542 """Compute the generic allocator input data.
14544 This is the data that is independent of the actual operation.
14548 cluster_info = cfg.GetClusterInfo()
14551 "version": constants.IALLOCATOR_VERSION,
14552 "cluster_name": cfg.GetClusterName(),
14553 "cluster_tags": list(cluster_info.GetTags()),
14554 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14555 "ipolicy": cluster_info.ipolicy,
14557 ninfo = cfg.GetAllNodesInfo()
14558 iinfo = cfg.GetAllInstancesInfo().values()
14559 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14562 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14564 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14565 hypervisor_name = self.hypervisor
14566 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14567 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14569 hypervisor_name = cluster_info.primary_hypervisor
14571 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14574 self.rpc.call_all_instances_info(node_list,
14575 cluster_info.enabled_hypervisors)
14577 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14579 config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14580 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14581 i_list, config_ndata)
14582 assert len(data["nodes"]) == len(ninfo), \
14583 "Incomplete node data computed"
14585 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14587 self.in_data = data
14590 def _ComputeNodeGroupData(cfg):
14591 """Compute node groups data.
14594 cluster = cfg.GetClusterInfo()
14595 ng = dict((guuid, {
14596 "name": gdata.name,
14597 "alloc_policy": gdata.alloc_policy,
14598 "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14600 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14605 def _ComputeBasicNodeData(cfg, node_cfg):
14606 """Compute global node data.
14609 @returns: a dict of name: (node dict, node config)
14612 # fill in static (config-based) values
14613 node_results = dict((ninfo.name, {
14614 "tags": list(ninfo.GetTags()),
14615 "primary_ip": ninfo.primary_ip,
14616 "secondary_ip": ninfo.secondary_ip,
14617 "offline": ninfo.offline,
14618 "drained": ninfo.drained,
14619 "master_candidate": ninfo.master_candidate,
14620 "group": ninfo.group,
14621 "master_capable": ninfo.master_capable,
14622 "vm_capable": ninfo.vm_capable,
14623 "ndparams": cfg.GetNdParams(ninfo),
14625 for ninfo in node_cfg.values())
14627 return node_results
14630 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14632 """Compute global node data.
14634 @param node_results: the basic node structures as filled from the config
14637 #TODO(dynmem): compute the right data on MAX and MIN memory
14638 # make a copy of the current dict
14639 node_results = dict(node_results)
14640 for nname, nresult in node_data.items():
14641 assert nname in node_results, "Missing basic data for node %s" % nname
14642 ninfo = node_cfg[nname]
14644 if not (ninfo.offline or ninfo.drained):
14645 nresult.Raise("Can't get data for node %s" % nname)
14646 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14648 remote_info = _MakeLegacyNodeInfo(nresult.payload)
14650 for attr in ["memory_total", "memory_free", "memory_dom0",
14651 "vg_size", "vg_free", "cpu_total"]:
14652 if attr not in remote_info:
14653 raise errors.OpExecError("Node '%s' didn't return attribute"
14654 " '%s'" % (nname, attr))
14655 if not isinstance(remote_info[attr], int):
14656 raise errors.OpExecError("Node '%s' returned invalid value"
14658 (nname, attr, remote_info[attr]))
14659 # compute memory used by primary instances
14660 i_p_mem = i_p_up_mem = 0
14661 for iinfo, beinfo in i_list:
14662 if iinfo.primary_node == nname:
14663 i_p_mem += beinfo[constants.BE_MAXMEM]
14664 if iinfo.name not in node_iinfo[nname].payload:
14667 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14668 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14669 remote_info["memory_free"] -= max(0, i_mem_diff)
14671 if iinfo.admin_state == constants.ADMINST_UP:
14672 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14674 # compute memory used by instances
14676 "total_memory": remote_info["memory_total"],
14677 "reserved_memory": remote_info["memory_dom0"],
14678 "free_memory": remote_info["memory_free"],
14679 "total_disk": remote_info["vg_size"],
14680 "free_disk": remote_info["vg_free"],
14681 "total_cpus": remote_info["cpu_total"],
14682 "i_pri_memory": i_p_mem,
14683 "i_pri_up_memory": i_p_up_mem,
14685 pnr_dyn.update(node_results[nname])
14686 node_results[nname] = pnr_dyn
14688 return node_results
14691 def _ComputeInstanceData(cluster_info, i_list):
14692 """Compute global instance data.
14696 for iinfo, beinfo in i_list:
14698 for nic in iinfo.nics:
14699 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14703 "mode": filled_params[constants.NIC_MODE],
14704 "link": filled_params[constants.NIC_LINK],
14706 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14707 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14708 nic_data.append(nic_dict)
14710 "tags": list(iinfo.GetTags()),
14711 "admin_state": iinfo.admin_state,
14712 "vcpus": beinfo[constants.BE_VCPUS],
14713 "memory": beinfo[constants.BE_MAXMEM],
14715 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14717 "disks": [{constants.IDISK_SIZE: dsk.size,
14718 constants.IDISK_MODE: dsk.mode}
14719 for dsk in iinfo.disks],
14720 "disk_template": iinfo.disk_template,
14721 "hypervisor": iinfo.hypervisor,
14723 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14725 instance_data[iinfo.name] = pir
14727 return instance_data
14729 def _AddNewInstance(self):
14730 """Add new instance data to allocator structure.
14732 This in combination with _AllocatorGetClusterData will create the
14733 correct structure needed as input for the allocator.
14735 The checks for the completeness of the opcode must have already been
14739 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14741 if self.disk_template in constants.DTS_INT_MIRROR:
14742 self.required_nodes = 2
14744 self.required_nodes = 1
14748 "disk_template": self.disk_template,
14751 "vcpus": self.vcpus,
14752 "memory": self.memory,
14753 "disks": self.disks,
14754 "disk_space_total": disk_space,
14756 "required_nodes": self.required_nodes,
14757 "hypervisor": self.hypervisor,
14762 def _AddRelocateInstance(self):
14763 """Add relocate instance data to allocator structure.
14765 This in combination with _IAllocatorGetClusterData will create the
14766 correct structure needed as input for the allocator.
14768 The checks for the completeness of the opcode must have already been
14772 instance = self.cfg.GetInstanceInfo(self.name)
14773 if instance is None:
14774 raise errors.ProgrammerError("Unknown instance '%s' passed to"
14775 " IAllocator" % self.name)
14777 if instance.disk_template not in constants.DTS_MIRRORED:
14778 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14779 errors.ECODE_INVAL)
14781 if instance.disk_template in constants.DTS_INT_MIRROR and \
14782 len(instance.secondary_nodes) != 1:
14783 raise errors.OpPrereqError("Instance has not exactly one secondary node",
14784 errors.ECODE_STATE)
14786 self.required_nodes = 1
14787 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14788 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14792 "disk_space_total": disk_space,
14793 "required_nodes": self.required_nodes,
14794 "relocate_from": self.relocate_from,
14798 def _AddNodeEvacuate(self):
14799 """Get data for node-evacuate requests.
14803 "instances": self.instances,
14804 "evac_mode": self.evac_mode,
14807 def _AddChangeGroup(self):
14808 """Get data for node-evacuate requests.
14812 "instances": self.instances,
14813 "target_groups": self.target_groups,
14816 def _BuildInputData(self, fn, keydata):
14817 """Build input data structures.
14820 self._ComputeClusterData()
14823 request["type"] = self.mode
14824 for keyname, keytype in keydata:
14825 if keyname not in request:
14826 raise errors.ProgrammerError("Request parameter %s is missing" %
14828 val = request[keyname]
14829 if not keytype(val):
14830 raise errors.ProgrammerError("Request parameter %s doesn't pass"
14831 " validation, value %s, expected"
14832 " type %s" % (keyname, val, keytype))
14833 self.in_data["request"] = request
14835 self.in_text = serializer.Dump(self.in_data)
14837 _STRING_LIST = ht.TListOf(ht.TString)
14838 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14839 # pylint: disable=E1101
14840 # Class '...' has no 'OP_ID' member
14841 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14842 opcodes.OpInstanceMigrate.OP_ID,
14843 opcodes.OpInstanceReplaceDisks.OP_ID])
14847 ht.TListOf(ht.TAnd(ht.TIsLength(3),
14848 ht.TItems([ht.TNonEmptyString,
14849 ht.TNonEmptyString,
14850 ht.TListOf(ht.TNonEmptyString),
14853 ht.TListOf(ht.TAnd(ht.TIsLength(2),
14854 ht.TItems([ht.TNonEmptyString,
14857 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14858 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14861 constants.IALLOCATOR_MODE_ALLOC:
14864 ("name", ht.TString),
14865 ("memory", ht.TInt),
14866 ("disks", ht.TListOf(ht.TDict)),
14867 ("disk_template", ht.TString),
14868 ("os", ht.TString),
14869 ("tags", _STRING_LIST),
14870 ("nics", ht.TListOf(ht.TDict)),
14871 ("vcpus", ht.TInt),
14872 ("hypervisor", ht.TString),
14874 constants.IALLOCATOR_MODE_RELOC:
14875 (_AddRelocateInstance,
14876 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14878 constants.IALLOCATOR_MODE_NODE_EVAC:
14879 (_AddNodeEvacuate, [
14880 ("instances", _STRING_LIST),
14881 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14883 constants.IALLOCATOR_MODE_CHG_GROUP:
14884 (_AddChangeGroup, [
14885 ("instances", _STRING_LIST),
14886 ("target_groups", _STRING_LIST),
14890 def Run(self, name, validate=True, call_fn=None):
14891 """Run an instance allocator and return the results.
14894 if call_fn is None:
14895 call_fn = self.rpc.call_iallocator_runner
14897 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14898 result.Raise("Failure while running the iallocator script")
14900 self.out_text = result.payload
14902 self._ValidateResult()
14904 def _ValidateResult(self):
14905 """Process the allocator results.
14907 This will process and if successful save the result in
14908 self.out_data and the other parameters.
14912 rdict = serializer.Load(self.out_text)
14913 except Exception, err:
14914 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14916 if not isinstance(rdict, dict):
14917 raise errors.OpExecError("Can't parse iallocator results: not a dict")
14919 # TODO: remove backwards compatiblity in later versions
14920 if "nodes" in rdict and "result" not in rdict:
14921 rdict["result"] = rdict["nodes"]
14924 for key in "success", "info", "result":
14925 if key not in rdict:
14926 raise errors.OpExecError("Can't parse iallocator results:"
14927 " missing key '%s'" % key)
14928 setattr(self, key, rdict[key])
14930 if not self._result_check(self.result):
14931 raise errors.OpExecError("Iallocator returned invalid result,"
14932 " expected %s, got %s" %
14933 (self._result_check, self.result),
14934 errors.ECODE_INVAL)
14936 if self.mode == constants.IALLOCATOR_MODE_RELOC:
14937 assert self.relocate_from is not None
14938 assert self.required_nodes == 1
14940 node2group = dict((name, ndata["group"])
14941 for (name, ndata) in self.in_data["nodes"].items())
14943 fn = compat.partial(self._NodesToGroups, node2group,
14944 self.in_data["nodegroups"])
14946 instance = self.cfg.GetInstanceInfo(self.name)
14947 request_groups = fn(self.relocate_from + [instance.primary_node])
14948 result_groups = fn(rdict["result"] + [instance.primary_node])
14950 if self.success and not set(result_groups).issubset(request_groups):
14951 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14952 " differ from original groups (%s)" %
14953 (utils.CommaJoin(result_groups),
14954 utils.CommaJoin(request_groups)))
14956 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14957 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14959 self.out_data = rdict
14962 def _NodesToGroups(node2group, groups, nodes):
14963 """Returns a list of unique group names for a list of nodes.
14965 @type node2group: dict
14966 @param node2group: Map from node name to group UUID
14968 @param groups: Group information
14970 @param nodes: Node names
14977 group_uuid = node2group[node]
14979 # Ignore unknown node
14983 group = groups[group_uuid]
14985 # Can't find group, let's use UUID
14986 group_name = group_uuid
14988 group_name = group["name"]
14990 result.add(group_name)
14992 return sorted(result)
14995 class LUTestAllocator(NoHooksLU):
14996 """Run allocator tests.
14998 This LU runs the allocator tests
15001 def CheckPrereq(self):
15002 """Check prerequisites.
15004 This checks the opcode parameters depending on the director and mode test.
15007 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15008 for attr in ["memory", "disks", "disk_template",
15009 "os", "tags", "nics", "vcpus"]:
15010 if not hasattr(self.op, attr):
15011 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15012 attr, errors.ECODE_INVAL)
15013 iname = self.cfg.ExpandInstanceName(self.op.name)
15014 if iname is not None:
15015 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15016 iname, errors.ECODE_EXISTS)
15017 if not isinstance(self.op.nics, list):
15018 raise errors.OpPrereqError("Invalid parameter 'nics'",
15019 errors.ECODE_INVAL)
15020 if not isinstance(self.op.disks, list):
15021 raise errors.OpPrereqError("Invalid parameter 'disks'",
15022 errors.ECODE_INVAL)
15023 for row in self.op.disks:
15024 if (not isinstance(row, dict) or
15025 constants.IDISK_SIZE not in row or
15026 not isinstance(row[constants.IDISK_SIZE], int) or
15027 constants.IDISK_MODE not in row or
15028 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15029 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15030 " parameter", errors.ECODE_INVAL)
15031 if self.op.hypervisor is None:
15032 self.op.hypervisor = self.cfg.GetHypervisorType()
15033 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15034 fname = _ExpandInstanceName(self.cfg, self.op.name)
15035 self.op.name = fname
15036 self.relocate_from = \
15037 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15038 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15039 constants.IALLOCATOR_MODE_NODE_EVAC):
15040 if not self.op.instances:
15041 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15042 self.op.instances = _GetWantedInstances(self, self.op.instances)
15044 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15045 self.op.mode, errors.ECODE_INVAL)
15047 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15048 if self.op.allocator is None:
15049 raise errors.OpPrereqError("Missing allocator name",
15050 errors.ECODE_INVAL)
15051 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15052 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15053 self.op.direction, errors.ECODE_INVAL)
15055 def Exec(self, feedback_fn):
15056 """Run the allocator test.
15059 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15060 ial = IAllocator(self.cfg, self.rpc,
15063 memory=self.op.memory,
15064 disks=self.op.disks,
15065 disk_template=self.op.disk_template,
15069 vcpus=self.op.vcpus,
15070 hypervisor=self.op.hypervisor,
15072 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15073 ial = IAllocator(self.cfg, self.rpc,
15076 relocate_from=list(self.relocate_from),
15078 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15079 ial = IAllocator(self.cfg, self.rpc,
15081 instances=self.op.instances,
15082 target_groups=self.op.target_groups)
15083 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15084 ial = IAllocator(self.cfg, self.rpc,
15086 instances=self.op.instances,
15087 evac_mode=self.op.evac_mode)
15089 raise errors.ProgrammerError("Uncatched mode %s in"
15090 " LUTestAllocator.Exec", self.op.mode)
15092 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15093 result = ial.in_text
15095 ial.Run(self.op.allocator, validate=False)
15096 result = ial.out_text
15100 #: Query type implementations
15102 constants.QR_INSTANCE: _InstanceQuery,
15103 constants.QR_NODE: _NodeQuery,
15104 constants.QR_GROUP: _GroupQuery,
15105 constants.QR_OS: _OsQuery,
15108 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15111 def _GetQueryImplementation(name):
15112 """Returns the implemtnation for a query type.
15114 @param name: Query type, must be one of L{constants.QR_VIA_OP}
15118 return _QUERY_IMPL[name]
15120 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15121 errors.ECODE_INVAL)