4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
62 from ganeti import rpc
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
141 # Dicts used to declare locking needs to mcpu
142 self.needed_locks = None
143 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.remove_locks = {}
146 # Used to force good behavior when calling helper functions
147 self.recalculate_locks = {}
149 self.Log = processor.Log # pylint: disable=C0103
150 self.LogWarning = processor.LogWarning # pylint: disable=C0103
151 self.LogInfo = processor.LogInfo # pylint: disable=C0103
152 self.LogStep = processor.LogStep # pylint: disable=C0103
153 # support for dry-run
154 self.dry_run_result = None
155 # support for generic debug attribute
156 if (not hasattr(self.op, "debug_level") or
157 not isinstance(self.op.debug_level, int)):
158 self.op.debug_level = 0
163 # Validate opcode parameters and set defaults
164 self.op.Validate(True)
166 self.CheckArguments()
168 def CheckArguments(self):
169 """Check syntactic validity for the opcode arguments.
171 This method is for doing a simple syntactic check and ensure
172 validity of opcode parameters, without any cluster-related
173 checks. While the same can be accomplished in ExpandNames and/or
174 CheckPrereq, doing these separate is better because:
176 - ExpandNames is left as as purely a lock-related function
177 - CheckPrereq is run after we have acquired locks (and possible
180 The function is allowed to change the self.op attribute so that
181 later methods can no longer worry about missing parameters.
186 def ExpandNames(self):
187 """Expand names for this LU.
189 This method is called before starting to execute the opcode, and it should
190 update all the parameters of the opcode to their canonical form (e.g. a
191 short node name must be fully expanded after this method has successfully
192 completed). This way locking, hooks, logging, etc. can work correctly.
194 LUs which implement this method must also populate the self.needed_locks
195 member, as a dict with lock levels as keys, and a list of needed lock names
198 - use an empty dict if you don't need any lock
199 - if you don't need any lock at a particular level omit that
200 level (note that in this case C{DeclareLocks} won't be called
201 at all for that level)
202 - if you need locks at a level, but you can't calculate it in
203 this function, initialise that level with an empty list and do
204 further processing in L{LogicalUnit.DeclareLocks} (see that
205 function's docstring)
206 - don't put anything for the BGL level
207 - if you want all locks at a level use L{locking.ALL_SET} as a value
209 If you need to share locks (rather than acquire them exclusively) at one
210 level you can modify self.share_locks, setting a true value (usually 1) for
211 that level. By default locks are not shared.
213 This function can also define a list of tasklets, which then will be
214 executed in order instead of the usual LU-level CheckPrereq and Exec
215 functions, if those are not defined by the LU.
219 # Acquire all nodes and one instance
220 self.needed_locks = {
221 locking.LEVEL_NODE: locking.ALL_SET,
222 locking.LEVEL_INSTANCE: ['instance1.example.com'],
224 # Acquire just two nodes
225 self.needed_locks = {
226 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
229 self.needed_locks = {} # No, you can't leave it to the default value None
232 # The implementation of this method is mandatory only if the new LU is
233 # concurrent, so that old LUs don't need to be changed all at the same
236 self.needed_locks = {} # Exclusive LUs don't need locks.
238 raise NotImplementedError
240 def DeclareLocks(self, level):
241 """Declare LU locking needs for a level
243 While most LUs can just declare their locking needs at ExpandNames time,
244 sometimes there's the need to calculate some locks after having acquired
245 the ones before. This function is called just before acquiring locks at a
246 particular level, but after acquiring the ones at lower levels, and permits
247 such calculations. It can be used to modify self.needed_locks, and by
248 default it does nothing.
250 This function is only called if you have something already set in
251 self.needed_locks for the level.
253 @param level: Locking level which is going to be locked
254 @type level: member of L{ganeti.locking.LEVELS}
258 def CheckPrereq(self):
259 """Check prerequisites for this LU.
261 This method should check that the prerequisites for the execution
262 of this LU are fulfilled. It can do internode communication, but
263 it should be idempotent - no cluster or system changes are
266 The method should raise errors.OpPrereqError in case something is
267 not fulfilled. Its return value is ignored.
269 This method should also update all the parameters of the opcode to
270 their canonical form if it hasn't been done by ExpandNames before.
273 if self.tasklets is not None:
274 for (idx, tl) in enumerate(self.tasklets):
275 logging.debug("Checking prerequisites for tasklet %s/%s",
276 idx + 1, len(self.tasklets))
281 def Exec(self, feedback_fn):
284 This method should implement the actual work. It should raise
285 errors.OpExecError for failures that are somewhat dealt with in
289 if self.tasklets is not None:
290 for (idx, tl) in enumerate(self.tasklets):
291 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
294 raise NotImplementedError
296 def BuildHooksEnv(self):
297 """Build hooks environment for this LU.
300 @return: Dictionary containing the environment that will be used for
301 running the hooks for this LU. The keys of the dict must not be prefixed
302 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
303 will extend the environment with additional variables. If no environment
304 should be defined, an empty dictionary should be returned (not C{None}).
305 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
309 raise NotImplementedError
311 def BuildHooksNodes(self):
312 """Build list of nodes to run LU's hooks.
314 @rtype: tuple; (list, list)
315 @return: Tuple containing a list of node names on which the hook
316 should run before the execution and a list of node names on which the
317 hook should run after the execution. No nodes should be returned as an
318 empty list (and not None).
319 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
323 raise NotImplementedError
325 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
326 """Notify the LU about the results of its hooks.
328 This method is called every time a hooks phase is executed, and notifies
329 the Logical Unit about the hooks' result. The LU can then use it to alter
330 its result based on the hooks. By default the method does nothing and the
331 previous result is passed back unchanged but any LU can define it if it
332 wants to use the local cluster hook-scripts somehow.
334 @param phase: one of L{constants.HOOKS_PHASE_POST} or
335 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
336 @param hook_results: the results of the multi-node hooks rpc call
337 @param feedback_fn: function used send feedback back to the caller
338 @param lu_result: the previous Exec result this LU had, or None
340 @return: the new Exec result, based on the previous result
344 # API must be kept, thus we ignore the unused argument and could
345 # be a function warnings
346 # pylint: disable=W0613,R0201
349 def _ExpandAndLockInstance(self):
350 """Helper function to expand and lock an instance.
352 Many LUs that work on an instance take its name in self.op.instance_name
353 and need to expand it and then declare the expanded name for locking. This
354 function does it, and then updates self.op.instance_name to the expanded
355 name. It also initializes needed_locks as a dict, if this hasn't been done
359 if self.needed_locks is None:
360 self.needed_locks = {}
362 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
363 "_ExpandAndLockInstance called with instance-level locks set"
364 self.op.instance_name = _ExpandInstanceName(self.cfg,
365 self.op.instance_name)
366 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
368 def _LockInstancesNodes(self, primary_only=False,
369 level=locking.LEVEL_NODE):
370 """Helper function to declare instances' nodes for locking.
372 This function should be called after locking one or more instances to lock
373 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
374 with all primary or secondary nodes for instances already locked and
375 present in self.needed_locks[locking.LEVEL_INSTANCE].
377 It should be called from DeclareLocks, and for safety only works if
378 self.recalculate_locks[locking.LEVEL_NODE] is set.
380 In the future it may grow parameters to just lock some instance's nodes, or
381 to just lock primaries or secondary nodes, if needed.
383 If should be called in DeclareLocks in a way similar to::
385 if level == locking.LEVEL_NODE:
386 self._LockInstancesNodes()
388 @type primary_only: boolean
389 @param primary_only: only lock primary nodes of locked instances
390 @param level: Which lock level to use for locking nodes
393 assert level in self.recalculate_locks, \
394 "_LockInstancesNodes helper function called with no nodes to recalculate"
396 # TODO: check if we're really been called with the instance locks held
398 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
399 # future we might want to have different behaviors depending on the value
400 # of self.recalculate_locks[locking.LEVEL_NODE]
402 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
403 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
404 wanted_nodes.append(instance.primary_node)
406 wanted_nodes.extend(instance.secondary_nodes)
408 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
409 self.needed_locks[level] = wanted_nodes
410 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
411 self.needed_locks[level].extend(wanted_nodes)
413 raise errors.ProgrammerError("Unknown recalculation mode")
415 del self.recalculate_locks[level]
418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
419 """Simple LU which runs no hooks.
421 This LU is intended as a parent for other LogicalUnits which will
422 run no hooks, in order to reduce duplicate code.
428 def BuildHooksEnv(self):
429 """Empty BuildHooksEnv for NoHooksLu.
431 This just raises an error.
434 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
436 def BuildHooksNodes(self):
437 """Empty BuildHooksNodes for NoHooksLU.
440 raise AssertionError("BuildHooksNodes called for NoHooksLU")
444 """Tasklet base class.
446 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
447 they can mix legacy code with tasklets. Locking needs to be done in the LU,
448 tasklets know nothing about locks.
450 Subclasses must follow these rules:
451 - Implement CheckPrereq
455 def __init__(self, lu):
462 def CheckPrereq(self):
463 """Check prerequisites for this tasklets.
465 This method should check whether the prerequisites for the execution of
466 this tasklet are fulfilled. It can do internode communication, but it
467 should be idempotent - no cluster or system changes are allowed.
469 The method should raise errors.OpPrereqError in case something is not
470 fulfilled. Its return value is ignored.
472 This method should also update all parameters to their canonical form if it
473 hasn't been done before.
478 def Exec(self, feedback_fn):
479 """Execute the tasklet.
481 This method should implement the actual work. It should raise
482 errors.OpExecError for failures that are somewhat dealt with in code, or
486 raise NotImplementedError
490 """Base for query utility classes.
493 #: Attribute holding field definitions
496 def __init__(self, qfilter, fields, use_locking):
497 """Initializes this class.
500 self.use_locking = use_locking
502 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
504 self.requested_data = self.query.RequestedData()
505 self.names = self.query.RequestedNames()
507 # Sort only if no names were requested
508 self.sort_by_name = not self.names
510 self.do_locking = None
513 def _GetNames(self, lu, all_names, lock_level):
514 """Helper function to determine names asked for in the query.
518 names = lu.owned_locks(lock_level)
522 if self.wanted == locking.ALL_SET:
523 assert not self.names
524 # caller didn't specify names, so ordering is not important
525 return utils.NiceSort(names)
527 # caller specified names and we must keep the same order
529 assert not self.do_locking or lu.glm.is_owned(lock_level)
531 missing = set(self.wanted).difference(names)
533 raise errors.OpExecError("Some items were removed before retrieving"
534 " their data: %s" % missing)
536 # Return expanded names
539 def ExpandNames(self, lu):
540 """Expand names for this query.
542 See L{LogicalUnit.ExpandNames}.
545 raise NotImplementedError()
547 def DeclareLocks(self, lu, level):
548 """Declare locks for this query.
550 See L{LogicalUnit.DeclareLocks}.
553 raise NotImplementedError()
555 def _GetQueryData(self, lu):
556 """Collects all data for this query.
558 @return: Query data object
561 raise NotImplementedError()
563 def NewStyleQuery(self, lu):
564 """Collect data and execute query.
567 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
568 sort_by_name=self.sort_by_name)
570 def OldStyleQuery(self, lu):
571 """Collect data and execute query.
574 return self.query.OldStyleQuery(self._GetQueryData(lu),
575 sort_by_name=self.sort_by_name)
579 """Returns a dict declaring all lock levels shared.
582 return dict.fromkeys(locking.LEVELS, 1)
585 def _MakeLegacyNodeInfo(data):
586 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
588 Converts the data into a single dictionary. This is fine for most use cases,
589 but some require information from more than one volume group or hypervisor.
592 (bootid, (vg_info, ), (hv_info, )) = data
594 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
599 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
600 """Checks if the owned node groups are still correct for an instance.
602 @type cfg: L{config.ConfigWriter}
603 @param cfg: The cluster configuration
604 @type instance_name: string
605 @param instance_name: Instance name
606 @type owned_groups: set or frozenset
607 @param owned_groups: List of currently owned node groups
610 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
612 if not owned_groups.issuperset(inst_groups):
613 raise errors.OpPrereqError("Instance %s's node groups changed since"
614 " locks were acquired, current groups are"
615 " are '%s', owning groups '%s'; retry the"
618 utils.CommaJoin(inst_groups),
619 utils.CommaJoin(owned_groups)),
625 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
626 """Checks if the instances in a node group are still correct.
628 @type cfg: L{config.ConfigWriter}
629 @param cfg: The cluster configuration
630 @type group_uuid: string
631 @param group_uuid: Node group UUID
632 @type owned_instances: set or frozenset
633 @param owned_instances: List of currently owned instances
636 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
637 if owned_instances != wanted_instances:
638 raise errors.OpPrereqError("Instances in node group '%s' changed since"
639 " locks were acquired, wanted '%s', have '%s';"
640 " retry the operation" %
642 utils.CommaJoin(wanted_instances),
643 utils.CommaJoin(owned_instances)),
646 return wanted_instances
649 def _SupportsOob(cfg, node):
650 """Tells if node supports OOB.
652 @type cfg: L{config.ConfigWriter}
653 @param cfg: The cluster configuration
654 @type node: L{objects.Node}
655 @param node: The node
656 @return: The OOB script if supported or an empty string otherwise
659 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
662 def _GetWantedNodes(lu, nodes):
663 """Returns list of checked and expanded node names.
665 @type lu: L{LogicalUnit}
666 @param lu: the logical unit on whose behalf we execute
668 @param nodes: list of node names or None for all nodes
670 @return: the list of nodes, sorted
671 @raise errors.ProgrammerError: if the nodes parameter is wrong type
675 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
677 return utils.NiceSort(lu.cfg.GetNodeList())
680 def _GetWantedInstances(lu, instances):
681 """Returns list of checked and expanded instance names.
683 @type lu: L{LogicalUnit}
684 @param lu: the logical unit on whose behalf we execute
685 @type instances: list
686 @param instances: list of instance names or None for all instances
688 @return: the list of instances, sorted
689 @raise errors.OpPrereqError: if the instances parameter is wrong type
690 @raise errors.OpPrereqError: if any of the passed instances is not found
694 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
696 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
700 def _GetUpdatedParams(old_params, update_dict,
701 use_default=True, use_none=False):
702 """Return the new version of a parameter dictionary.
704 @type old_params: dict
705 @param old_params: old parameters
706 @type update_dict: dict
707 @param update_dict: dict containing new parameter values, or
708 constants.VALUE_DEFAULT to reset the parameter to its default
710 @param use_default: boolean
711 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
712 values as 'to be deleted' values
713 @param use_none: boolean
714 @type use_none: whether to recognise C{None} values as 'to be
717 @return: the new parameter dictionary
720 params_copy = copy.deepcopy(old_params)
721 for key, val in update_dict.iteritems():
722 if ((use_default and val == constants.VALUE_DEFAULT) or
723 (use_none and val is None)):
729 params_copy[key] = val
733 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
734 """Return the new version of a instance policy.
736 @param group_policy: whether this policy applies to a group and thus
737 we should support removal of policy entries
740 use_none = use_default = group_policy
741 ipolicy = copy.deepcopy(old_ipolicy)
742 for key, value in new_ipolicy.items():
743 if key not in constants.IPOLICY_ALL_KEYS:
744 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
746 if key in constants.IPOLICY_ISPECS:
747 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
748 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
750 use_default=use_default)
752 if not value or value == [constants.VALUE_DEFAULT]:
756 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
757 " on the cluster'" % key,
760 if key in constants.IPOLICY_PARAMETERS:
761 # FIXME: we assume all such values are float
763 ipolicy[key] = float(value)
764 except (TypeError, ValueError), err:
765 raise errors.OpPrereqError("Invalid value for attribute"
766 " '%s': '%s', error: %s" %
767 (key, value, err), errors.ECODE_INVAL)
769 # FIXME: we assume all others are lists; this should be redone
771 ipolicy[key] = list(value)
773 objects.InstancePolicy.CheckParameterSyntax(ipolicy)
774 except errors.ConfigurationError, err:
775 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
780 def _UpdateAndVerifySubDict(base, updates, type_check):
781 """Updates and verifies a dict with sub dicts of the same type.
783 @param base: The dict with the old data
784 @param updates: The dict with the new data
785 @param type_check: Dict suitable to ForceDictType to verify correct types
786 @returns: A new dict with updated and verified values
790 new = _GetUpdatedParams(old, value)
791 utils.ForceDictType(new, type_check)
794 ret = copy.deepcopy(base)
795 ret.update(dict((key, fn(base.get(key, {}), value))
796 for key, value in updates.items()))
800 def _MergeAndVerifyHvState(op_input, obj_input):
801 """Combines the hv state from an opcode with the one of the object
803 @param op_input: The input dict from the opcode
804 @param obj_input: The input dict from the objects
805 @return: The verified and updated dict
809 invalid_hvs = set(op_input) - constants.HYPER_TYPES
811 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
812 " %s" % utils.CommaJoin(invalid_hvs),
814 if obj_input is None:
816 type_check = constants.HVSTS_PARAMETER_TYPES
817 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
822 def _MergeAndVerifyDiskState(op_input, obj_input):
823 """Combines the disk state from an opcode with the one of the object
825 @param op_input: The input dict from the opcode
826 @param obj_input: The input dict from the objects
827 @return: The verified and updated dict
830 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
832 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
833 utils.CommaJoin(invalid_dst),
835 type_check = constants.DSS_PARAMETER_TYPES
836 if obj_input is None:
838 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
840 for key, value in op_input.items())
845 def _ReleaseLocks(lu, level, names=None, keep=None):
846 """Releases locks owned by an LU.
848 @type lu: L{LogicalUnit}
849 @param level: Lock level
850 @type names: list or None
851 @param names: Names of locks to release
852 @type keep: list or None
853 @param keep: Names of locks to retain
856 assert not (keep is not None and names is not None), \
857 "Only one of the 'names' and the 'keep' parameters can be given"
859 if names is not None:
860 should_release = names.__contains__
862 should_release = lambda name: name not in keep
864 should_release = None
866 owned = lu.owned_locks(level)
868 # Not owning any lock at this level, do nothing
875 # Determine which locks to release
877 if should_release(name):
882 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
884 # Release just some locks
885 lu.glm.release(level, names=release)
887 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
890 lu.glm.release(level)
892 assert not lu.glm.is_owned(level), "No locks should be owned"
895 def _MapInstanceDisksToNodes(instances):
896 """Creates a map from (node, volume) to instance name.
898 @type instances: list of L{objects.Instance}
899 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
902 return dict(((node, vol), inst.name)
903 for inst in instances
904 for (node, vols) in inst.MapLVsByNode().items()
908 def _RunPostHook(lu, node_name):
909 """Runs the post-hook for an opcode on a single node.
912 hm = lu.proc.BuildHooksManager(lu)
914 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
916 # pylint: disable=W0702
917 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
920 def _CheckOutputFields(static, dynamic, selected):
921 """Checks whether all selected fields are valid.
923 @type static: L{utils.FieldSet}
924 @param static: static fields set
925 @type dynamic: L{utils.FieldSet}
926 @param dynamic: dynamic fields set
933 delta = f.NonMatching(selected)
935 raise errors.OpPrereqError("Unknown output fields selected: %s"
936 % ",".join(delta), errors.ECODE_INVAL)
939 def _CheckGlobalHvParams(params):
940 """Validates that given hypervisor params are not global ones.
942 This will ensure that instances don't get customised versions of
946 used_globals = constants.HVC_GLOBALS.intersection(params)
948 msg = ("The following hypervisor parameters are global and cannot"
949 " be customized at instance level, please modify them at"
950 " cluster level: %s" % utils.CommaJoin(used_globals))
951 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
954 def _CheckNodeOnline(lu, node, msg=None):
955 """Ensure that a given node is online.
957 @param lu: the LU on behalf of which we make the check
958 @param node: the node to check
959 @param msg: if passed, should be a message to replace the default one
960 @raise errors.OpPrereqError: if the node is offline
964 msg = "Can't use offline node"
965 if lu.cfg.GetNodeInfo(node).offline:
966 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
969 def _CheckNodeNotDrained(lu, node):
970 """Ensure that a given node is not drained.
972 @param lu: the LU on behalf of which we make the check
973 @param node: the node to check
974 @raise errors.OpPrereqError: if the node is drained
977 if lu.cfg.GetNodeInfo(node).drained:
978 raise errors.OpPrereqError("Can't use drained node %s" % node,
982 def _CheckNodeVmCapable(lu, node):
983 """Ensure that a given node is vm capable.
985 @param lu: the LU on behalf of which we make the check
986 @param node: the node to check
987 @raise errors.OpPrereqError: if the node is not vm capable
990 if not lu.cfg.GetNodeInfo(node).vm_capable:
991 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
995 def _CheckNodeHasOS(lu, node, os_name, force_variant):
996 """Ensure that a node supports a given OS.
998 @param lu: the LU on behalf of which we make the check
999 @param node: the node to check
1000 @param os_name: the OS to query about
1001 @param force_variant: whether to ignore variant errors
1002 @raise errors.OpPrereqError: if the node is not supporting the OS
1005 result = lu.rpc.call_os_get(node, os_name)
1006 result.Raise("OS '%s' not in supported OS list for node %s" %
1008 prereq=True, ecode=errors.ECODE_INVAL)
1009 if not force_variant:
1010 _CheckOSVariant(result.payload, os_name)
1013 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1014 """Ensure that a node has the given secondary ip.
1016 @type lu: L{LogicalUnit}
1017 @param lu: the LU on behalf of which we make the check
1019 @param node: the node to check
1020 @type secondary_ip: string
1021 @param secondary_ip: the ip to check
1022 @type prereq: boolean
1023 @param prereq: whether to throw a prerequisite or an execute error
1024 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1025 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1028 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1029 result.Raise("Failure checking secondary ip on node %s" % node,
1030 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1031 if not result.payload:
1032 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1033 " please fix and re-run this command" % secondary_ip)
1035 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1037 raise errors.OpExecError(msg)
1040 def _GetClusterDomainSecret():
1041 """Reads the cluster domain secret.
1044 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1048 def _CheckInstanceState(lu, instance, req_states, msg=None):
1049 """Ensure that an instance is in one of the required states.
1051 @param lu: the LU on behalf of which we make the check
1052 @param instance: the instance to check
1053 @param msg: if passed, should be a message to replace the default one
1054 @raise errors.OpPrereqError: if the instance is not in the required state
1058 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1059 if instance.admin_state not in req_states:
1060 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1061 (instance.name, instance.admin_state, msg),
1064 if constants.ADMINST_UP not in req_states:
1065 pnode = instance.primary_node
1066 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1067 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1068 prereq=True, ecode=errors.ECODE_ENVIRON)
1070 if instance.name in ins_l.payload:
1071 raise errors.OpPrereqError("Instance %s is running, %s" %
1072 (instance.name, msg), errors.ECODE_STATE)
1075 def _ComputeMinMaxSpec(name, ipolicy, value):
1076 """Computes if value is in the desired range.
1078 @param name: name of the parameter for which we perform the check
1079 @param ipolicy: dictionary containing min, max and std values
1080 @param value: actual value that we want to use
1081 @return: None or element not meeting the criteria
1085 if value in [None, constants.VALUE_AUTO]:
1087 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1088 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1089 if value > max_v or min_v > value:
1090 return ("%s value %s is not in range [%s, %s]" %
1091 (name, value, min_v, max_v))
1095 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1096 nic_count, disk_sizes,
1097 _compute_fn=_ComputeMinMaxSpec):
1098 """Verifies ipolicy against provided specs.
1101 @param ipolicy: The ipolicy
1103 @param mem_size: The memory size
1104 @type cpu_count: int
1105 @param cpu_count: Used cpu cores
1106 @type disk_count: int
1107 @param disk_count: Number of disks used
1108 @type nic_count: int
1109 @param nic_count: Number of nics used
1110 @type disk_sizes: list of ints
1111 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1112 @param _compute_fn: The compute function (unittest only)
1113 @return: A list of violations, or an empty list of no violations are found
1116 assert disk_count == len(disk_sizes)
1119 (constants.ISPEC_MEM_SIZE, mem_size),
1120 (constants.ISPEC_CPU_COUNT, cpu_count),
1121 (constants.ISPEC_DISK_COUNT, disk_count),
1122 (constants.ISPEC_NIC_COUNT, nic_count),
1123 ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
1126 (_compute_fn(name, ipolicy, value)
1127 for (name, value) in test_settings))
1130 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1131 _compute_fn=_ComputeIPolicySpecViolation):
1132 """Compute if instance meets the specs of ipolicy.
1135 @param ipolicy: The ipolicy to verify against
1136 @type instance: L{objects.Instance}
1137 @param instance: The instance to verify
1138 @param _compute_fn: The function to verify ipolicy (unittest only)
1139 @see: L{_ComputeIPolicySpecViolation}
1142 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1143 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1144 disk_count = len(instance.disks)
1145 disk_sizes = [disk.size for disk in instance.disks]
1146 nic_count = len(instance.nics)
1148 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1152 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1153 _compute_fn=_ComputeIPolicySpecViolation):
1154 """Compute if instance specs meets the specs of ipolicy.
1157 @param ipolicy: The ipolicy to verify against
1158 @param instance_spec: dict
1159 @param instance_spec: The instance spec to verify
1160 @param _compute_fn: The function to verify ipolicy (unittest only)
1161 @see: L{_ComputeIPolicySpecViolation}
1164 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1165 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1166 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1167 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1168 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1170 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1174 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1176 _compute_fn=_ComputeIPolicyInstanceViolation):
1177 """Compute if instance meets the specs of the new target group.
1179 @param ipolicy: The ipolicy to verify
1180 @param instance: The instance object to verify
1181 @param current_group: The current group of the instance
1182 @param target_group: The new group of the instance
1183 @param _compute_fn: The function to verify ipolicy (unittest only)
1184 @see: L{_ComputeIPolicySpecViolation}
1187 if current_group == target_group:
1190 return _compute_fn(ipolicy, instance)
1193 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1194 _compute_fn=_ComputeIPolicyNodeViolation):
1195 """Checks that the target node is correct in terms of instance policy.
1197 @param ipolicy: The ipolicy to verify
1198 @param instance: The instance object to verify
1199 @param node: The new node to relocate
1200 @param ignore: Ignore violations of the ipolicy
1201 @param _compute_fn: The function to verify ipolicy (unittest only)
1202 @see: L{_ComputeIPolicySpecViolation}
1205 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1206 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1209 msg = ("Instance does not meet target node group's (%s) instance"
1210 " policy: %s") % (node.group, utils.CommaJoin(res))
1214 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1217 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1218 """Computes a set of any instances that would violate the new ipolicy.
1220 @param old_ipolicy: The current (still in-place) ipolicy
1221 @param new_ipolicy: The new (to become) ipolicy
1222 @param instances: List of instances to verify
1223 @return: A list of instances which violates the new ipolicy but did not before
1226 return (_ComputeViolatingInstances(old_ipolicy, instances) -
1227 _ComputeViolatingInstances(new_ipolicy, instances))
1230 def _ExpandItemName(fn, name, kind):
1231 """Expand an item name.
1233 @param fn: the function to use for expansion
1234 @param name: requested item name
1235 @param kind: text description ('Node' or 'Instance')
1236 @return: the resolved (full) name
1237 @raise errors.OpPrereqError: if the item is not found
1240 full_name = fn(name)
1241 if full_name is None:
1242 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1247 def _ExpandNodeName(cfg, name):
1248 """Wrapper over L{_ExpandItemName} for nodes."""
1249 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1252 def _ExpandInstanceName(cfg, name):
1253 """Wrapper over L{_ExpandItemName} for instance."""
1254 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1257 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1258 minmem, maxmem, vcpus, nics, disk_template, disks,
1259 bep, hvp, hypervisor_name, tags):
1260 """Builds instance related env variables for hooks
1262 This builds the hook environment from individual variables.
1265 @param name: the name of the instance
1266 @type primary_node: string
1267 @param primary_node: the name of the instance's primary node
1268 @type secondary_nodes: list
1269 @param secondary_nodes: list of secondary nodes as strings
1270 @type os_type: string
1271 @param os_type: the name of the instance's OS
1272 @type status: string
1273 @param status: the desired status of the instance
1274 @type minmem: string
1275 @param minmem: the minimum memory size of the instance
1276 @type maxmem: string
1277 @param maxmem: the maximum memory size of the instance
1279 @param vcpus: the count of VCPUs the instance has
1281 @param nics: list of tuples (ip, mac, mode, link) representing
1282 the NICs the instance has
1283 @type disk_template: string
1284 @param disk_template: the disk template of the instance
1286 @param disks: the list of (size, mode) pairs
1288 @param bep: the backend parameters for the instance
1290 @param hvp: the hypervisor parameters for the instance
1291 @type hypervisor_name: string
1292 @param hypervisor_name: the hypervisor for the instance
1294 @param tags: list of instance tags as strings
1296 @return: the hook environment for this instance
1301 "INSTANCE_NAME": name,
1302 "INSTANCE_PRIMARY": primary_node,
1303 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1304 "INSTANCE_OS_TYPE": os_type,
1305 "INSTANCE_STATUS": status,
1306 "INSTANCE_MINMEM": minmem,
1307 "INSTANCE_MAXMEM": maxmem,
1308 # TODO(2.7) remove deprecated "memory" value
1309 "INSTANCE_MEMORY": maxmem,
1310 "INSTANCE_VCPUS": vcpus,
1311 "INSTANCE_DISK_TEMPLATE": disk_template,
1312 "INSTANCE_HYPERVISOR": hypervisor_name,
1315 nic_count = len(nics)
1316 for idx, (ip, mac, mode, link) in enumerate(nics):
1319 env["INSTANCE_NIC%d_IP" % idx] = ip
1320 env["INSTANCE_NIC%d_MAC" % idx] = mac
1321 env["INSTANCE_NIC%d_MODE" % idx] = mode
1322 env["INSTANCE_NIC%d_LINK" % idx] = link
1323 if mode == constants.NIC_MODE_BRIDGED:
1324 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1328 env["INSTANCE_NIC_COUNT"] = nic_count
1331 disk_count = len(disks)
1332 for idx, (size, mode) in enumerate(disks):
1333 env["INSTANCE_DISK%d_SIZE" % idx] = size
1334 env["INSTANCE_DISK%d_MODE" % idx] = mode
1338 env["INSTANCE_DISK_COUNT"] = disk_count
1343 env["INSTANCE_TAGS"] = " ".join(tags)
1345 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1346 for key, value in source.items():
1347 env["INSTANCE_%s_%s" % (kind, key)] = value
1352 def _NICListToTuple(lu, nics):
1353 """Build a list of nic information tuples.
1355 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1356 value in LUInstanceQueryData.
1358 @type lu: L{LogicalUnit}
1359 @param lu: the logical unit on whose behalf we execute
1360 @type nics: list of L{objects.NIC}
1361 @param nics: list of nics to convert to hooks tuples
1365 cluster = lu.cfg.GetClusterInfo()
1369 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1370 mode = filled_params[constants.NIC_MODE]
1371 link = filled_params[constants.NIC_LINK]
1372 hooks_nics.append((ip, mac, mode, link))
1376 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1377 """Builds instance related env variables for hooks from an object.
1379 @type lu: L{LogicalUnit}
1380 @param lu: the logical unit on whose behalf we execute
1381 @type instance: L{objects.Instance}
1382 @param instance: the instance for which we should build the
1384 @type override: dict
1385 @param override: dictionary with key/values that will override
1388 @return: the hook environment dictionary
1391 cluster = lu.cfg.GetClusterInfo()
1392 bep = cluster.FillBE(instance)
1393 hvp = cluster.FillHV(instance)
1395 "name": instance.name,
1396 "primary_node": instance.primary_node,
1397 "secondary_nodes": instance.secondary_nodes,
1398 "os_type": instance.os,
1399 "status": instance.admin_state,
1400 "maxmem": bep[constants.BE_MAXMEM],
1401 "minmem": bep[constants.BE_MINMEM],
1402 "vcpus": bep[constants.BE_VCPUS],
1403 "nics": _NICListToTuple(lu, instance.nics),
1404 "disk_template": instance.disk_template,
1405 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1408 "hypervisor_name": instance.hypervisor,
1409 "tags": instance.tags,
1412 args.update(override)
1413 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1416 def _AdjustCandidatePool(lu, exceptions):
1417 """Adjust the candidate pool after node operations.
1420 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1422 lu.LogInfo("Promoted nodes to master candidate role: %s",
1423 utils.CommaJoin(node.name for node in mod_list))
1424 for name in mod_list:
1425 lu.context.ReaddNode(name)
1426 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1428 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1432 def _DecideSelfPromotion(lu, exceptions=None):
1433 """Decide whether I should promote myself as a master candidate.
1436 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1437 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1438 # the new node will increase mc_max with one, so:
1439 mc_should = min(mc_should + 1, cp_size)
1440 return mc_now < mc_should
1443 def _CalculateGroupIPolicy(cluster, group):
1444 """Calculate instance policy for group.
1447 return cluster.SimpleFillIPolicy(group.ipolicy)
1450 def _ComputeViolatingInstances(ipolicy, instances):
1451 """Computes a set of instances who violates given ipolicy.
1453 @param ipolicy: The ipolicy to verify
1454 @type instances: object.Instance
1455 @param instances: List of instances to verify
1456 @return: A frozenset of instance names violating the ipolicy
1459 return frozenset([inst.name for inst in instances
1460 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1463 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1464 """Check that the brigdes needed by a list of nics exist.
1467 cluster = lu.cfg.GetClusterInfo()
1468 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1469 brlist = [params[constants.NIC_LINK] for params in paramslist
1470 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1472 result = lu.rpc.call_bridges_exist(target_node, brlist)
1473 result.Raise("Error checking bridges on destination node '%s'" %
1474 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1477 def _CheckInstanceBridgesExist(lu, instance, node=None):
1478 """Check that the brigdes needed by an instance exist.
1482 node = instance.primary_node
1483 _CheckNicsBridgesExist(lu, instance.nics, node)
1486 def _CheckOSVariant(os_obj, name):
1487 """Check whether an OS name conforms to the os variants specification.
1489 @type os_obj: L{objects.OS}
1490 @param os_obj: OS object to check
1492 @param name: OS name passed by the user, to check for validity
1495 variant = objects.OS.GetVariant(name)
1496 if not os_obj.supported_variants:
1498 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1499 " passed)" % (os_obj.name, variant),
1503 raise errors.OpPrereqError("OS name must include a variant",
1506 if variant not in os_obj.supported_variants:
1507 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1510 def _GetNodeInstancesInner(cfg, fn):
1511 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1514 def _GetNodeInstances(cfg, node_name):
1515 """Returns a list of all primary and secondary instances on a node.
1519 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1522 def _GetNodePrimaryInstances(cfg, node_name):
1523 """Returns primary instances on a node.
1526 return _GetNodeInstancesInner(cfg,
1527 lambda inst: node_name == inst.primary_node)
1530 def _GetNodeSecondaryInstances(cfg, node_name):
1531 """Returns secondary instances on a node.
1534 return _GetNodeInstancesInner(cfg,
1535 lambda inst: node_name in inst.secondary_nodes)
1538 def _GetStorageTypeArgs(cfg, storage_type):
1539 """Returns the arguments for a storage type.
1542 # Special case for file storage
1543 if storage_type == constants.ST_FILE:
1544 # storage.FileStorage wants a list of storage directories
1545 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1550 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1553 for dev in instance.disks:
1554 cfg.SetDiskID(dev, node_name)
1556 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1557 result.Raise("Failed to get disk status from node %s" % node_name,
1558 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1560 for idx, bdev_status in enumerate(result.payload):
1561 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1567 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1568 """Check the sanity of iallocator and node arguments and use the
1569 cluster-wide iallocator if appropriate.
1571 Check that at most one of (iallocator, node) is specified. If none is
1572 specified, then the LU's opcode's iallocator slot is filled with the
1573 cluster-wide default iallocator.
1575 @type iallocator_slot: string
1576 @param iallocator_slot: the name of the opcode iallocator slot
1577 @type node_slot: string
1578 @param node_slot: the name of the opcode target node slot
1581 node = getattr(lu.op, node_slot, None)
1582 iallocator = getattr(lu.op, iallocator_slot, None)
1584 if node is not None and iallocator is not None:
1585 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1587 elif node is None and iallocator is None:
1588 default_iallocator = lu.cfg.GetDefaultIAllocator()
1589 if default_iallocator:
1590 setattr(lu.op, iallocator_slot, default_iallocator)
1592 raise errors.OpPrereqError("No iallocator or node given and no"
1593 " cluster-wide default iallocator found;"
1594 " please specify either an iallocator or a"
1595 " node, or set a cluster-wide default"
1599 def _GetDefaultIAllocator(cfg, iallocator):
1600 """Decides on which iallocator to use.
1602 @type cfg: L{config.ConfigWriter}
1603 @param cfg: Cluster configuration object
1604 @type iallocator: string or None
1605 @param iallocator: Iallocator specified in opcode
1607 @return: Iallocator name
1611 # Use default iallocator
1612 iallocator = cfg.GetDefaultIAllocator()
1615 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1616 " opcode nor as a cluster-wide default",
1622 class LUClusterPostInit(LogicalUnit):
1623 """Logical unit for running hooks after cluster initialization.
1626 HPATH = "cluster-init"
1627 HTYPE = constants.HTYPE_CLUSTER
1629 def BuildHooksEnv(self):
1634 "OP_TARGET": self.cfg.GetClusterName(),
1637 def BuildHooksNodes(self):
1638 """Build hooks nodes.
1641 return ([], [self.cfg.GetMasterNode()])
1643 def Exec(self, feedback_fn):
1650 class LUClusterDestroy(LogicalUnit):
1651 """Logical unit for destroying the cluster.
1654 HPATH = "cluster-destroy"
1655 HTYPE = constants.HTYPE_CLUSTER
1657 def BuildHooksEnv(self):
1662 "OP_TARGET": self.cfg.GetClusterName(),
1665 def BuildHooksNodes(self):
1666 """Build hooks nodes.
1671 def CheckPrereq(self):
1672 """Check prerequisites.
1674 This checks whether the cluster is empty.
1676 Any errors are signaled by raising errors.OpPrereqError.
1679 master = self.cfg.GetMasterNode()
1681 nodelist = self.cfg.GetNodeList()
1682 if len(nodelist) != 1 or nodelist[0] != master:
1683 raise errors.OpPrereqError("There are still %d node(s) in"
1684 " this cluster." % (len(nodelist) - 1),
1686 instancelist = self.cfg.GetInstanceList()
1688 raise errors.OpPrereqError("There are still %d instance(s) in"
1689 " this cluster." % len(instancelist),
1692 def Exec(self, feedback_fn):
1693 """Destroys the cluster.
1696 master_params = self.cfg.GetMasterNetworkParameters()
1698 # Run post hooks on master node before it's removed
1699 _RunPostHook(self, master_params.name)
1701 ems = self.cfg.GetUseExternalMipScript()
1702 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1705 self.LogWarning("Error disabling the master IP address: %s",
1708 return master_params.name
1711 def _VerifyCertificate(filename):
1712 """Verifies a certificate for L{LUClusterVerifyConfig}.
1714 @type filename: string
1715 @param filename: Path to PEM file
1719 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1720 utils.ReadFile(filename))
1721 except Exception, err: # pylint: disable=W0703
1722 return (LUClusterVerifyConfig.ETYPE_ERROR,
1723 "Failed to load X509 certificate %s: %s" % (filename, err))
1726 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1727 constants.SSL_CERT_EXPIRATION_ERROR)
1730 fnamemsg = "While verifying %s: %s" % (filename, msg)
1735 return (None, fnamemsg)
1736 elif errcode == utils.CERT_WARNING:
1737 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1738 elif errcode == utils.CERT_ERROR:
1739 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1741 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1744 def _GetAllHypervisorParameters(cluster, instances):
1745 """Compute the set of all hypervisor parameters.
1747 @type cluster: L{objects.Cluster}
1748 @param cluster: the cluster object
1749 @param instances: list of L{objects.Instance}
1750 @param instances: additional instances from which to obtain parameters
1751 @rtype: list of (origin, hypervisor, parameters)
1752 @return: a list with all parameters found, indicating the hypervisor they
1753 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1758 for hv_name in cluster.enabled_hypervisors:
1759 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1761 for os_name, os_hvp in cluster.os_hvp.items():
1762 for hv_name, hv_params in os_hvp.items():
1764 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1765 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1767 # TODO: collapse identical parameter values in a single one
1768 for instance in instances:
1769 if instance.hvparams:
1770 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1771 cluster.FillHV(instance)))
1776 class _VerifyErrors(object):
1777 """Mix-in for cluster/group verify LUs.
1779 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1780 self.op and self._feedback_fn to be available.)
1784 ETYPE_FIELD = "code"
1785 ETYPE_ERROR = "ERROR"
1786 ETYPE_WARNING = "WARNING"
1788 def _Error(self, ecode, item, msg, *args, **kwargs):
1789 """Format an error message.
1791 Based on the opcode's error_codes parameter, either format a
1792 parseable error code, or a simpler error string.
1794 This must be called only from Exec and functions called from Exec.
1797 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1798 itype, etxt, _ = ecode
1799 # first complete the msg
1802 # then format the whole message
1803 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1804 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1810 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1811 # and finally report it via the feedback_fn
1812 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1814 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1815 """Log an error message if the passed condition is True.
1819 or self.op.debug_simulate_errors) # pylint: disable=E1101
1821 # If the error code is in the list of ignored errors, demote the error to a
1823 (_, etxt, _) = ecode
1824 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1825 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1828 self._Error(ecode, *args, **kwargs)
1830 # do not mark the operation as failed for WARN cases only
1831 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1832 self.bad = self.bad or cond
1835 class LUClusterVerify(NoHooksLU):
1836 """Submits all jobs necessary to verify the cluster.
1841 def ExpandNames(self):
1842 self.needed_locks = {}
1844 def Exec(self, feedback_fn):
1847 if self.op.group_name:
1848 groups = [self.op.group_name]
1849 depends_fn = lambda: None
1851 groups = self.cfg.GetNodeGroupList()
1853 # Verify global configuration
1855 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1858 # Always depend on global verification
1859 depends_fn = lambda: [(-len(jobs), [])]
1861 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1862 ignore_errors=self.op.ignore_errors,
1863 depends=depends_fn())]
1864 for group in groups)
1866 # Fix up all parameters
1867 for op in itertools.chain(*jobs): # pylint: disable=W0142
1868 op.debug_simulate_errors = self.op.debug_simulate_errors
1869 op.verbose = self.op.verbose
1870 op.error_codes = self.op.error_codes
1872 op.skip_checks = self.op.skip_checks
1873 except AttributeError:
1874 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1876 return ResultWithJobs(jobs)
1879 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1880 """Verifies the cluster config.
1885 def _VerifyHVP(self, hvp_data):
1886 """Verifies locally the syntax of the hypervisor parameters.
1889 for item, hv_name, hv_params in hvp_data:
1890 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1893 hv_class = hypervisor.GetHypervisor(hv_name)
1894 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1895 hv_class.CheckParameterSyntax(hv_params)
1896 except errors.GenericError, err:
1897 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1899 def ExpandNames(self):
1900 # Information can be safely retrieved as the BGL is acquired in exclusive
1902 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1903 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1904 self.all_node_info = self.cfg.GetAllNodesInfo()
1905 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1906 self.needed_locks = {}
1908 def Exec(self, feedback_fn):
1909 """Verify integrity of cluster, performing various test on nodes.
1913 self._feedback_fn = feedback_fn
1915 feedback_fn("* Verifying cluster config")
1917 for msg in self.cfg.VerifyConfig():
1918 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1920 feedback_fn("* Verifying cluster certificate files")
1922 for cert_filename in constants.ALL_CERT_FILES:
1923 (errcode, msg) = _VerifyCertificate(cert_filename)
1924 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1926 feedback_fn("* Verifying hypervisor parameters")
1928 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1929 self.all_inst_info.values()))
1931 feedback_fn("* Verifying all nodes belong to an existing group")
1933 # We do this verification here because, should this bogus circumstance
1934 # occur, it would never be caught by VerifyGroup, which only acts on
1935 # nodes/instances reachable from existing node groups.
1937 dangling_nodes = set(node.name for node in self.all_node_info.values()
1938 if node.group not in self.all_group_info)
1940 dangling_instances = {}
1941 no_node_instances = []
1943 for inst in self.all_inst_info.values():
1944 if inst.primary_node in dangling_nodes:
1945 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1946 elif inst.primary_node not in self.all_node_info:
1947 no_node_instances.append(inst.name)
1952 utils.CommaJoin(dangling_instances.get(node.name,
1954 for node in dangling_nodes]
1956 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1958 "the following nodes (and their instances) belong to a non"
1959 " existing group: %s", utils.CommaJoin(pretty_dangling))
1961 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1963 "the following instances have a non-existing primary-node:"
1964 " %s", utils.CommaJoin(no_node_instances))
1969 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1970 """Verifies the status of a node group.
1973 HPATH = "cluster-verify"
1974 HTYPE = constants.HTYPE_CLUSTER
1977 _HOOKS_INDENT_RE = re.compile("^", re.M)
1979 class NodeImage(object):
1980 """A class representing the logical and physical status of a node.
1983 @ivar name: the node name to which this object refers
1984 @ivar volumes: a structure as returned from
1985 L{ganeti.backend.GetVolumeList} (runtime)
1986 @ivar instances: a list of running instances (runtime)
1987 @ivar pinst: list of configured primary instances (config)
1988 @ivar sinst: list of configured secondary instances (config)
1989 @ivar sbp: dictionary of {primary-node: list of instances} for all
1990 instances for which this node is secondary (config)
1991 @ivar mfree: free memory, as reported by hypervisor (runtime)
1992 @ivar dfree: free disk, as reported by the node (runtime)
1993 @ivar offline: the offline status (config)
1994 @type rpc_fail: boolean
1995 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1996 not whether the individual keys were correct) (runtime)
1997 @type lvm_fail: boolean
1998 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1999 @type hyp_fail: boolean
2000 @ivar hyp_fail: whether the RPC call didn't return the instance list
2001 @type ghost: boolean
2002 @ivar ghost: whether this is a known node or not (config)
2003 @type os_fail: boolean
2004 @ivar os_fail: whether the RPC call didn't return valid OS data
2006 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2007 @type vm_capable: boolean
2008 @ivar vm_capable: whether the node can host instances
2011 def __init__(self, offline=False, name=None, vm_capable=True):
2020 self.offline = offline
2021 self.vm_capable = vm_capable
2022 self.rpc_fail = False
2023 self.lvm_fail = False
2024 self.hyp_fail = False
2026 self.os_fail = False
2029 def ExpandNames(self):
2030 # This raises errors.OpPrereqError on its own:
2031 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2033 # Get instances in node group; this is unsafe and needs verification later
2035 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2037 self.needed_locks = {
2038 locking.LEVEL_INSTANCE: inst_names,
2039 locking.LEVEL_NODEGROUP: [self.group_uuid],
2040 locking.LEVEL_NODE: [],
2043 self.share_locks = _ShareAll()
2045 def DeclareLocks(self, level):
2046 if level == locking.LEVEL_NODE:
2047 # Get members of node group; this is unsafe and needs verification later
2048 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2050 all_inst_info = self.cfg.GetAllInstancesInfo()
2052 # In Exec(), we warn about mirrored instances that have primary and
2053 # secondary living in separate node groups. To fully verify that
2054 # volumes for these instances are healthy, we will need to do an
2055 # extra call to their secondaries. We ensure here those nodes will
2057 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2058 # Important: access only the instances whose lock is owned
2059 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2060 nodes.update(all_inst_info[inst].secondary_nodes)
2062 self.needed_locks[locking.LEVEL_NODE] = nodes
2064 def CheckPrereq(self):
2065 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2066 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2068 group_nodes = set(self.group_info.members)
2070 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2073 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2075 unlocked_instances = \
2076 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2079 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2080 utils.CommaJoin(unlocked_nodes),
2083 if unlocked_instances:
2084 raise errors.OpPrereqError("Missing lock for instances: %s" %
2085 utils.CommaJoin(unlocked_instances),
2088 self.all_node_info = self.cfg.GetAllNodesInfo()
2089 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2091 self.my_node_names = utils.NiceSort(group_nodes)
2092 self.my_inst_names = utils.NiceSort(group_instances)
2094 self.my_node_info = dict((name, self.all_node_info[name])
2095 for name in self.my_node_names)
2097 self.my_inst_info = dict((name, self.all_inst_info[name])
2098 for name in self.my_inst_names)
2100 # We detect here the nodes that will need the extra RPC calls for verifying
2101 # split LV volumes; they should be locked.
2102 extra_lv_nodes = set()
2104 for inst in self.my_inst_info.values():
2105 if inst.disk_template in constants.DTS_INT_MIRROR:
2106 for nname in inst.all_nodes:
2107 if self.all_node_info[nname].group != self.group_uuid:
2108 extra_lv_nodes.add(nname)
2110 unlocked_lv_nodes = \
2111 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2113 if unlocked_lv_nodes:
2114 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2115 utils.CommaJoin(unlocked_lv_nodes),
2117 self.extra_lv_nodes = list(extra_lv_nodes)
2119 def _VerifyNode(self, ninfo, nresult):
2120 """Perform some basic validation on data returned from a node.
2122 - check the result data structure is well formed and has all the
2124 - check ganeti version
2126 @type ninfo: L{objects.Node}
2127 @param ninfo: the node to check
2128 @param nresult: the results from the node
2130 @return: whether overall this call was successful (and we can expect
2131 reasonable values in the respose)
2135 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2137 # main result, nresult should be a non-empty dict
2138 test = not nresult or not isinstance(nresult, dict)
2139 _ErrorIf(test, constants.CV_ENODERPC, node,
2140 "unable to verify node: no data returned")
2144 # compares ganeti version
2145 local_version = constants.PROTOCOL_VERSION
2146 remote_version = nresult.get("version", None)
2147 test = not (remote_version and
2148 isinstance(remote_version, (list, tuple)) and
2149 len(remote_version) == 2)
2150 _ErrorIf(test, constants.CV_ENODERPC, node,
2151 "connection to node returned invalid data")
2155 test = local_version != remote_version[0]
2156 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2157 "incompatible protocol versions: master %s,"
2158 " node %s", local_version, remote_version[0])
2162 # node seems compatible, we can actually try to look into its results
2164 # full package version
2165 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2166 constants.CV_ENODEVERSION, node,
2167 "software version mismatch: master %s, node %s",
2168 constants.RELEASE_VERSION, remote_version[1],
2169 code=self.ETYPE_WARNING)
2171 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2172 if ninfo.vm_capable and isinstance(hyp_result, dict):
2173 for hv_name, hv_result in hyp_result.iteritems():
2174 test = hv_result is not None
2175 _ErrorIf(test, constants.CV_ENODEHV, node,
2176 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2178 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2179 if ninfo.vm_capable and isinstance(hvp_result, list):
2180 for item, hv_name, hv_result in hvp_result:
2181 _ErrorIf(True, constants.CV_ENODEHV, node,
2182 "hypervisor %s parameter verify failure (source %s): %s",
2183 hv_name, item, hv_result)
2185 test = nresult.get(constants.NV_NODESETUP,
2186 ["Missing NODESETUP results"])
2187 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2192 def _VerifyNodeTime(self, ninfo, nresult,
2193 nvinfo_starttime, nvinfo_endtime):
2194 """Check the node time.
2196 @type ninfo: L{objects.Node}
2197 @param ninfo: the node to check
2198 @param nresult: the remote results for the node
2199 @param nvinfo_starttime: the start time of the RPC call
2200 @param nvinfo_endtime: the end time of the RPC call
2204 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2206 ntime = nresult.get(constants.NV_TIME, None)
2208 ntime_merged = utils.MergeTime(ntime)
2209 except (ValueError, TypeError):
2210 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2213 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2214 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2215 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2216 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2220 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2221 "Node time diverges by at least %s from master node time",
2224 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2225 """Check the node LVM results.
2227 @type ninfo: L{objects.Node}
2228 @param ninfo: the node to check
2229 @param nresult: the remote results for the node
2230 @param vg_name: the configured VG name
2237 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2239 # checks vg existence and size > 20G
2240 vglist = nresult.get(constants.NV_VGLIST, None)
2242 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2244 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2245 constants.MIN_VG_SIZE)
2246 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2249 pvlist = nresult.get(constants.NV_PVLIST, None)
2250 test = pvlist is None
2251 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2253 # check that ':' is not present in PV names, since it's a
2254 # special character for lvcreate (denotes the range of PEs to
2256 for _, pvname, owner_vg in pvlist:
2257 test = ":" in pvname
2258 _ErrorIf(test, constants.CV_ENODELVM, node,
2259 "Invalid character ':' in PV '%s' of VG '%s'",
2262 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2263 """Check the node bridges.
2265 @type ninfo: L{objects.Node}
2266 @param ninfo: the node to check
2267 @param nresult: the remote results for the node
2268 @param bridges: the expected list of bridges
2275 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2277 missing = nresult.get(constants.NV_BRIDGES, None)
2278 test = not isinstance(missing, list)
2279 _ErrorIf(test, constants.CV_ENODENET, node,
2280 "did not return valid bridge information")
2282 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2283 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2285 def _VerifyNodeUserScripts(self, ninfo, nresult):
2286 """Check the results of user scripts presence and executability on the node
2288 @type ninfo: L{objects.Node}
2289 @param ninfo: the node to check
2290 @param nresult: the remote results for the node
2295 test = not constants.NV_USERSCRIPTS in nresult
2296 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2297 "did not return user scripts information")
2299 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2301 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2302 "user scripts not present or not executable: %s" %
2303 utils.CommaJoin(sorted(broken_scripts)))
2305 def _VerifyNodeNetwork(self, ninfo, nresult):
2306 """Check the node network connectivity results.
2308 @type ninfo: L{objects.Node}
2309 @param ninfo: the node to check
2310 @param nresult: the remote results for the node
2314 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2316 test = constants.NV_NODELIST not in nresult
2317 _ErrorIf(test, constants.CV_ENODESSH, node,
2318 "node hasn't returned node ssh connectivity data")
2320 if nresult[constants.NV_NODELIST]:
2321 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2322 _ErrorIf(True, constants.CV_ENODESSH, node,
2323 "ssh communication with node '%s': %s", a_node, a_msg)
2325 test = constants.NV_NODENETTEST not in nresult
2326 _ErrorIf(test, constants.CV_ENODENET, node,
2327 "node hasn't returned node tcp connectivity data")
2329 if nresult[constants.NV_NODENETTEST]:
2330 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2332 _ErrorIf(True, constants.CV_ENODENET, node,
2333 "tcp communication with node '%s': %s",
2334 anode, nresult[constants.NV_NODENETTEST][anode])
2336 test = constants.NV_MASTERIP not in nresult
2337 _ErrorIf(test, constants.CV_ENODENET, node,
2338 "node hasn't returned node master IP reachability data")
2340 if not nresult[constants.NV_MASTERIP]:
2341 if node == self.master_node:
2342 msg = "the master node cannot reach the master IP (not configured?)"
2344 msg = "cannot reach the master IP"
2345 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2347 def _VerifyInstance(self, instance, instanceconfig, node_image,
2349 """Verify an instance.
2351 This function checks to see if the required block devices are
2352 available on the instance's node.
2355 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2356 node_current = instanceconfig.primary_node
2358 node_vol_should = {}
2359 instanceconfig.MapLVsByNode(node_vol_should)
2361 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2362 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2363 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, err)
2365 for node in node_vol_should:
2366 n_img = node_image[node]
2367 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2368 # ignore missing volumes on offline or broken nodes
2370 for volume in node_vol_should[node]:
2371 test = volume not in n_img.volumes
2372 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2373 "volume %s missing on node %s", volume, node)
2375 if instanceconfig.admin_state == constants.ADMINST_UP:
2376 pri_img = node_image[node_current]
2377 test = instance not in pri_img.instances and not pri_img.offline
2378 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2379 "instance not running on its primary node %s",
2382 diskdata = [(nname, success, status, idx)
2383 for (nname, disks) in diskstatus.items()
2384 for idx, (success, status) in enumerate(disks)]
2386 for nname, success, bdev_status, idx in diskdata:
2387 # the 'ghost node' construction in Exec() ensures that we have a
2389 snode = node_image[nname]
2390 bad_snode = snode.ghost or snode.offline
2391 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2392 not success and not bad_snode,
2393 constants.CV_EINSTANCEFAULTYDISK, instance,
2394 "couldn't retrieve status for disk/%s on %s: %s",
2395 idx, nname, bdev_status)
2396 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2397 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2398 constants.CV_EINSTANCEFAULTYDISK, instance,
2399 "disk/%s on %s is faulty", idx, nname)
2401 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2402 """Verify if there are any unknown volumes in the cluster.
2404 The .os, .swap and backup volumes are ignored. All other volumes are
2405 reported as unknown.
2407 @type reserved: L{ganeti.utils.FieldSet}
2408 @param reserved: a FieldSet of reserved volume names
2411 for node, n_img in node_image.items():
2412 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2413 self.all_node_info[node].group != self.group_uuid):
2414 # skip non-healthy nodes
2416 for volume in n_img.volumes:
2417 test = ((node not in node_vol_should or
2418 volume not in node_vol_should[node]) and
2419 not reserved.Matches(volume))
2420 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2421 "volume %s is unknown", volume)
2423 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2424 """Verify N+1 Memory Resilience.
2426 Check that if one single node dies we can still start all the
2427 instances it was primary for.
2430 cluster_info = self.cfg.GetClusterInfo()
2431 for node, n_img in node_image.items():
2432 # This code checks that every node which is now listed as
2433 # secondary has enough memory to host all instances it is
2434 # supposed to should a single other node in the cluster fail.
2435 # FIXME: not ready for failover to an arbitrary node
2436 # FIXME: does not support file-backed instances
2437 # WARNING: we currently take into account down instances as well
2438 # as up ones, considering that even if they're down someone
2439 # might want to start them even in the event of a node failure.
2440 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2441 # we're skipping nodes marked offline and nodes in other groups from
2442 # the N+1 warning, since most likely we don't have good memory
2443 # infromation from them; we already list instances living on such
2444 # nodes, and that's enough warning
2446 #TODO(dynmem): also consider ballooning out other instances
2447 for prinode, instances in n_img.sbp.items():
2449 for instance in instances:
2450 bep = cluster_info.FillBE(instance_cfg[instance])
2451 if bep[constants.BE_AUTO_BALANCE]:
2452 needed_mem += bep[constants.BE_MINMEM]
2453 test = n_img.mfree < needed_mem
2454 self._ErrorIf(test, constants.CV_ENODEN1, node,
2455 "not enough memory to accomodate instance failovers"
2456 " should node %s fail (%dMiB needed, %dMiB available)",
2457 prinode, needed_mem, n_img.mfree)
2460 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2461 (files_all, files_opt, files_mc, files_vm)):
2462 """Verifies file checksums collected from all nodes.
2464 @param errorif: Callback for reporting errors
2465 @param nodeinfo: List of L{objects.Node} objects
2466 @param master_node: Name of master node
2467 @param all_nvinfo: RPC results
2470 # Define functions determining which nodes to consider for a file
2473 (files_mc, lambda node: (node.master_candidate or
2474 node.name == master_node)),
2475 (files_vm, lambda node: node.vm_capable),
2478 # Build mapping from filename to list of nodes which should have the file
2480 for (files, fn) in files2nodefn:
2482 filenodes = nodeinfo
2484 filenodes = filter(fn, nodeinfo)
2485 nodefiles.update((filename,
2486 frozenset(map(operator.attrgetter("name"), filenodes)))
2487 for filename in files)
2489 assert set(nodefiles) == (files_all | files_mc | files_vm)
2491 fileinfo = dict((filename, {}) for filename in nodefiles)
2492 ignore_nodes = set()
2494 for node in nodeinfo:
2496 ignore_nodes.add(node.name)
2499 nresult = all_nvinfo[node.name]
2501 if nresult.fail_msg or not nresult.payload:
2504 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2506 test = not (node_files and isinstance(node_files, dict))
2507 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2508 "Node did not return file checksum data")
2510 ignore_nodes.add(node.name)
2513 # Build per-checksum mapping from filename to nodes having it
2514 for (filename, checksum) in node_files.items():
2515 assert filename in nodefiles
2516 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2518 for (filename, checksums) in fileinfo.items():
2519 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2521 # Nodes having the file
2522 with_file = frozenset(node_name
2523 for nodes in fileinfo[filename].values()
2524 for node_name in nodes) - ignore_nodes
2526 expected_nodes = nodefiles[filename] - ignore_nodes
2528 # Nodes missing file
2529 missing_file = expected_nodes - with_file
2531 if filename in files_opt:
2533 errorif(missing_file and missing_file != expected_nodes,
2534 constants.CV_ECLUSTERFILECHECK, None,
2535 "File %s is optional, but it must exist on all or no"
2536 " nodes (not found on %s)",
2537 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2539 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2540 "File %s is missing from node(s) %s", filename,
2541 utils.CommaJoin(utils.NiceSort(missing_file)))
2543 # Warn if a node has a file it shouldn't
2544 unexpected = with_file - expected_nodes
2546 constants.CV_ECLUSTERFILECHECK, None,
2547 "File %s should not exist on node(s) %s",
2548 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2550 # See if there are multiple versions of the file
2551 test = len(checksums) > 1
2553 variants = ["variant %s on %s" %
2554 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2555 for (idx, (checksum, nodes)) in
2556 enumerate(sorted(checksums.items()))]
2560 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2561 "File %s found with %s different checksums (%s)",
2562 filename, len(checksums), "; ".join(variants))
2564 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2566 """Verifies and the node DRBD status.
2568 @type ninfo: L{objects.Node}
2569 @param ninfo: the node to check
2570 @param nresult: the remote results for the node
2571 @param instanceinfo: the dict of instances
2572 @param drbd_helper: the configured DRBD usermode helper
2573 @param drbd_map: the DRBD map as returned by
2574 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2578 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2581 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2582 test = (helper_result == None)
2583 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2584 "no drbd usermode helper returned")
2586 status, payload = helper_result
2588 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2589 "drbd usermode helper check unsuccessful: %s", payload)
2590 test = status and (payload != drbd_helper)
2591 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2592 "wrong drbd usermode helper: %s", payload)
2594 # compute the DRBD minors
2596 for minor, instance in drbd_map[node].items():
2597 test = instance not in instanceinfo
2598 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2599 "ghost instance '%s' in temporary DRBD map", instance)
2600 # ghost instance should not be running, but otherwise we
2601 # don't give double warnings (both ghost instance and
2602 # unallocated minor in use)
2604 node_drbd[minor] = (instance, False)
2606 instance = instanceinfo[instance]
2607 node_drbd[minor] = (instance.name,
2608 instance.admin_state == constants.ADMINST_UP)
2610 # and now check them
2611 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2612 test = not isinstance(used_minors, (tuple, list))
2613 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2614 "cannot parse drbd status file: %s", str(used_minors))
2616 # we cannot check drbd status
2619 for minor, (iname, must_exist) in node_drbd.items():
2620 test = minor not in used_minors and must_exist
2621 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2622 "drbd minor %d of instance %s is not active", minor, iname)
2623 for minor in used_minors:
2624 test = minor not in node_drbd
2625 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2626 "unallocated drbd minor %d is in use", minor)
2628 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2629 """Builds the node OS structures.
2631 @type ninfo: L{objects.Node}
2632 @param ninfo: the node to check
2633 @param nresult: the remote results for the node
2634 @param nimg: the node image object
2638 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2640 remote_os = nresult.get(constants.NV_OSLIST, None)
2641 test = (not isinstance(remote_os, list) or
2642 not compat.all(isinstance(v, list) and len(v) == 7
2643 for v in remote_os))
2645 _ErrorIf(test, constants.CV_ENODEOS, node,
2646 "node hasn't returned valid OS data")
2655 for (name, os_path, status, diagnose,
2656 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2658 if name not in os_dict:
2661 # parameters is a list of lists instead of list of tuples due to
2662 # JSON lacking a real tuple type, fix it:
2663 parameters = [tuple(v) for v in parameters]
2664 os_dict[name].append((os_path, status, diagnose,
2665 set(variants), set(parameters), set(api_ver)))
2667 nimg.oslist = os_dict
2669 def _VerifyNodeOS(self, ninfo, nimg, base):
2670 """Verifies the node OS list.
2672 @type ninfo: L{objects.Node}
2673 @param ninfo: the node to check
2674 @param nimg: the node image object
2675 @param base: the 'template' node we match against (e.g. from the master)
2679 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2681 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2683 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2684 for os_name, os_data in nimg.oslist.items():
2685 assert os_data, "Empty OS status for OS %s?!" % os_name
2686 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2687 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2688 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2689 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2690 "OS '%s' has multiple entries (first one shadows the rest): %s",
2691 os_name, utils.CommaJoin([v[0] for v in os_data]))
2692 # comparisons with the 'base' image
2693 test = os_name not in base.oslist
2694 _ErrorIf(test, constants.CV_ENODEOS, node,
2695 "Extra OS %s not present on reference node (%s)",
2699 assert base.oslist[os_name], "Base node has empty OS status?"
2700 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2702 # base OS is invalid, skipping
2704 for kind, a, b in [("API version", f_api, b_api),
2705 ("variants list", f_var, b_var),
2706 ("parameters", beautify_params(f_param),
2707 beautify_params(b_param))]:
2708 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2709 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2710 kind, os_name, base.name,
2711 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2713 # check any missing OSes
2714 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2715 _ErrorIf(missing, constants.CV_ENODEOS, node,
2716 "OSes present on reference node %s but missing on this node: %s",
2717 base.name, utils.CommaJoin(missing))
2719 def _VerifyOob(self, ninfo, nresult):
2720 """Verifies out of band functionality of a node.
2722 @type ninfo: L{objects.Node}
2723 @param ninfo: the node to check
2724 @param nresult: the remote results for the node
2728 # We just have to verify the paths on master and/or master candidates
2729 # as the oob helper is invoked on the master
2730 if ((ninfo.master_candidate or ninfo.master_capable) and
2731 constants.NV_OOB_PATHS in nresult):
2732 for path_result in nresult[constants.NV_OOB_PATHS]:
2733 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2735 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2736 """Verifies and updates the node volume data.
2738 This function will update a L{NodeImage}'s internal structures
2739 with data from the remote call.
2741 @type ninfo: L{objects.Node}
2742 @param ninfo: the node to check
2743 @param nresult: the remote results for the node
2744 @param nimg: the node image object
2745 @param vg_name: the configured VG name
2749 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2751 nimg.lvm_fail = True
2752 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2755 elif isinstance(lvdata, basestring):
2756 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2757 utils.SafeEncode(lvdata))
2758 elif not isinstance(lvdata, dict):
2759 _ErrorIf(True, constants.CV_ENODELVM, node,
2760 "rpc call to node failed (lvlist)")
2762 nimg.volumes = lvdata
2763 nimg.lvm_fail = False
2765 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2766 """Verifies and updates the node instance list.
2768 If the listing was successful, then updates this node's instance
2769 list. Otherwise, it marks the RPC call as failed for the instance
2772 @type ninfo: L{objects.Node}
2773 @param ninfo: the node to check
2774 @param nresult: the remote results for the node
2775 @param nimg: the node image object
2778 idata = nresult.get(constants.NV_INSTANCELIST, None)
2779 test = not isinstance(idata, list)
2780 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2781 "rpc call to node failed (instancelist): %s",
2782 utils.SafeEncode(str(idata)))
2784 nimg.hyp_fail = True
2786 nimg.instances = idata
2788 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2789 """Verifies and computes a node information map
2791 @type ninfo: L{objects.Node}
2792 @param ninfo: the node to check
2793 @param nresult: the remote results for the node
2794 @param nimg: the node image object
2795 @param vg_name: the configured VG name
2799 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2801 # try to read free memory (from the hypervisor)
2802 hv_info = nresult.get(constants.NV_HVINFO, None)
2803 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2804 _ErrorIf(test, constants.CV_ENODEHV, node,
2805 "rpc call to node failed (hvinfo)")
2808 nimg.mfree = int(hv_info["memory_free"])
2809 except (ValueError, TypeError):
2810 _ErrorIf(True, constants.CV_ENODERPC, node,
2811 "node returned invalid nodeinfo, check hypervisor")
2813 # FIXME: devise a free space model for file based instances as well
2814 if vg_name is not None:
2815 test = (constants.NV_VGLIST not in nresult or
2816 vg_name not in nresult[constants.NV_VGLIST])
2817 _ErrorIf(test, constants.CV_ENODELVM, node,
2818 "node didn't return data for the volume group '%s'"
2819 " - it is either missing or broken", vg_name)
2822 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2823 except (ValueError, TypeError):
2824 _ErrorIf(True, constants.CV_ENODERPC, node,
2825 "node returned invalid LVM info, check LVM status")
2827 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2828 """Gets per-disk status information for all instances.
2830 @type nodelist: list of strings
2831 @param nodelist: Node names
2832 @type node_image: dict of (name, L{objects.Node})
2833 @param node_image: Node objects
2834 @type instanceinfo: dict of (name, L{objects.Instance})
2835 @param instanceinfo: Instance objects
2836 @rtype: {instance: {node: [(succes, payload)]}}
2837 @return: a dictionary of per-instance dictionaries with nodes as
2838 keys and disk information as values; the disk information is a
2839 list of tuples (success, payload)
2842 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2845 node_disks_devonly = {}
2846 diskless_instances = set()
2847 diskless = constants.DT_DISKLESS
2849 for nname in nodelist:
2850 node_instances = list(itertools.chain(node_image[nname].pinst,
2851 node_image[nname].sinst))
2852 diskless_instances.update(inst for inst in node_instances
2853 if instanceinfo[inst].disk_template == diskless)
2854 disks = [(inst, disk)
2855 for inst in node_instances
2856 for disk in instanceinfo[inst].disks]
2859 # No need to collect data
2862 node_disks[nname] = disks
2864 # Creating copies as SetDiskID below will modify the objects and that can
2865 # lead to incorrect data returned from nodes
2866 devonly = [dev.Copy() for (_, dev) in disks]
2869 self.cfg.SetDiskID(dev, nname)
2871 node_disks_devonly[nname] = devonly
2873 assert len(node_disks) == len(node_disks_devonly)
2875 # Collect data from all nodes with disks
2876 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2879 assert len(result) == len(node_disks)
2883 for (nname, nres) in result.items():
2884 disks = node_disks[nname]
2887 # No data from this node
2888 data = len(disks) * [(False, "node offline")]
2891 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2892 "while getting disk information: %s", msg)
2894 # No data from this node
2895 data = len(disks) * [(False, msg)]
2898 for idx, i in enumerate(nres.payload):
2899 if isinstance(i, (tuple, list)) and len(i) == 2:
2902 logging.warning("Invalid result from node %s, entry %d: %s",
2904 data.append((False, "Invalid result from the remote node"))
2906 for ((inst, _), status) in zip(disks, data):
2907 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2909 # Add empty entries for diskless instances.
2910 for inst in diskless_instances:
2911 assert inst not in instdisk
2914 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2915 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2916 compat.all(isinstance(s, (tuple, list)) and
2917 len(s) == 2 for s in statuses)
2918 for inst, nnames in instdisk.items()
2919 for nname, statuses in nnames.items())
2920 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2925 def _SshNodeSelector(group_uuid, all_nodes):
2926 """Create endless iterators for all potential SSH check hosts.
2929 nodes = [node for node in all_nodes
2930 if (node.group != group_uuid and
2932 keyfunc = operator.attrgetter("group")
2934 return map(itertools.cycle,
2935 [sorted(map(operator.attrgetter("name"), names))
2936 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2940 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2941 """Choose which nodes should talk to which other nodes.
2943 We will make nodes contact all nodes in their group, and one node from
2946 @warning: This algorithm has a known issue if one node group is much
2947 smaller than others (e.g. just one node). In such a case all other
2948 nodes will talk to the single node.
2951 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2952 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2954 return (online_nodes,
2955 dict((name, sorted([i.next() for i in sel]))
2956 for name in online_nodes))
2958 def BuildHooksEnv(self):
2961 Cluster-Verify hooks just ran in the post phase and their failure makes
2962 the output be logged in the verify output and the verification to fail.
2966 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2969 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2970 for node in self.my_node_info.values())
2974 def BuildHooksNodes(self):
2975 """Build hooks nodes.
2978 return ([], self.my_node_names)
2980 def Exec(self, feedback_fn):
2981 """Verify integrity of the node group, performing various test on nodes.
2984 # This method has too many local variables. pylint: disable=R0914
2985 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2987 if not self.my_node_names:
2989 feedback_fn("* Empty node group, skipping verification")
2993 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2994 verbose = self.op.verbose
2995 self._feedback_fn = feedback_fn
2997 vg_name = self.cfg.GetVGName()
2998 drbd_helper = self.cfg.GetDRBDHelper()
2999 cluster = self.cfg.GetClusterInfo()
3000 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3001 hypervisors = cluster.enabled_hypervisors
3002 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3004 i_non_redundant = [] # Non redundant instances
3005 i_non_a_balanced = [] # Non auto-balanced instances
3006 i_offline = 0 # Count of offline instances
3007 n_offline = 0 # Count of offline nodes
3008 n_drained = 0 # Count of nodes being drained
3009 node_vol_should = {}
3011 # FIXME: verify OS list
3014 filemap = _ComputeAncillaryFiles(cluster, False)
3016 # do local checksums
3017 master_node = self.master_node = self.cfg.GetMasterNode()
3018 master_ip = self.cfg.GetMasterIP()
3020 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3023 if self.cfg.GetUseExternalMipScript():
3024 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3026 node_verify_param = {
3027 constants.NV_FILELIST:
3028 utils.UniqueSequence(filename
3029 for files in filemap
3030 for filename in files),
3031 constants.NV_NODELIST:
3032 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3033 self.all_node_info.values()),
3034 constants.NV_HYPERVISOR: hypervisors,
3035 constants.NV_HVPARAMS:
3036 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3037 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3038 for node in node_data_list
3039 if not node.offline],
3040 constants.NV_INSTANCELIST: hypervisors,
3041 constants.NV_VERSION: None,
3042 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3043 constants.NV_NODESETUP: None,
3044 constants.NV_TIME: None,
3045 constants.NV_MASTERIP: (master_node, master_ip),
3046 constants.NV_OSLIST: None,
3047 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3048 constants.NV_USERSCRIPTS: user_scripts,
3051 if vg_name is not None:
3052 node_verify_param[constants.NV_VGLIST] = None
3053 node_verify_param[constants.NV_LVLIST] = vg_name
3054 node_verify_param[constants.NV_PVLIST] = [vg_name]
3055 node_verify_param[constants.NV_DRBDLIST] = None
3058 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3061 # FIXME: this needs to be changed per node-group, not cluster-wide
3063 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3064 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3065 bridges.add(default_nicpp[constants.NIC_LINK])
3066 for instance in self.my_inst_info.values():
3067 for nic in instance.nics:
3068 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3069 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3070 bridges.add(full_nic[constants.NIC_LINK])
3073 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3075 # Build our expected cluster state
3076 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3078 vm_capable=node.vm_capable))
3079 for node in node_data_list)
3083 for node in self.all_node_info.values():
3084 path = _SupportsOob(self.cfg, node)
3085 if path and path not in oob_paths:
3086 oob_paths.append(path)
3089 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3091 for instance in self.my_inst_names:
3092 inst_config = self.my_inst_info[instance]
3094 for nname in inst_config.all_nodes:
3095 if nname not in node_image:
3096 gnode = self.NodeImage(name=nname)
3097 gnode.ghost = (nname not in self.all_node_info)
3098 node_image[nname] = gnode
3100 inst_config.MapLVsByNode(node_vol_should)
3102 pnode = inst_config.primary_node
3103 node_image[pnode].pinst.append(instance)
3105 for snode in inst_config.secondary_nodes:
3106 nimg = node_image[snode]
3107 nimg.sinst.append(instance)
3108 if pnode not in nimg.sbp:
3109 nimg.sbp[pnode] = []
3110 nimg.sbp[pnode].append(instance)
3112 # At this point, we have the in-memory data structures complete,
3113 # except for the runtime information, which we'll gather next
3115 # Due to the way our RPC system works, exact response times cannot be
3116 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3117 # time before and after executing the request, we can at least have a time
3119 nvinfo_starttime = time.time()
3120 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3122 self.cfg.GetClusterName())
3123 nvinfo_endtime = time.time()
3125 if self.extra_lv_nodes and vg_name is not None:
3127 self.rpc.call_node_verify(self.extra_lv_nodes,
3128 {constants.NV_LVLIST: vg_name},
3129 self.cfg.GetClusterName())
3131 extra_lv_nvinfo = {}
3133 all_drbd_map = self.cfg.ComputeDRBDMap()
3135 feedback_fn("* Gathering disk information (%s nodes)" %
3136 len(self.my_node_names))
3137 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3140 feedback_fn("* Verifying configuration file consistency")
3142 # If not all nodes are being checked, we need to make sure the master node
3143 # and a non-checked vm_capable node are in the list.
3144 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3146 vf_nvinfo = all_nvinfo.copy()
3147 vf_node_info = list(self.my_node_info.values())
3148 additional_nodes = []
3149 if master_node not in self.my_node_info:
3150 additional_nodes.append(master_node)
3151 vf_node_info.append(self.all_node_info[master_node])
3152 # Add the first vm_capable node we find which is not included
3153 for node in absent_nodes:
3154 nodeinfo = self.all_node_info[node]
3155 if nodeinfo.vm_capable and not nodeinfo.offline:
3156 additional_nodes.append(node)
3157 vf_node_info.append(self.all_node_info[node])
3159 key = constants.NV_FILELIST
3160 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3161 {key: node_verify_param[key]},
3162 self.cfg.GetClusterName()))
3164 vf_nvinfo = all_nvinfo
3165 vf_node_info = self.my_node_info.values()
3167 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3169 feedback_fn("* Verifying node status")
3173 for node_i in node_data_list:
3175 nimg = node_image[node]
3179 feedback_fn("* Skipping offline node %s" % (node,))
3183 if node == master_node:
3185 elif node_i.master_candidate:
3186 ntype = "master candidate"
3187 elif node_i.drained:
3193 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3195 msg = all_nvinfo[node].fail_msg
3196 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3199 nimg.rpc_fail = True
3202 nresult = all_nvinfo[node].payload
3204 nimg.call_ok = self._VerifyNode(node_i, nresult)
3205 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3206 self._VerifyNodeNetwork(node_i, nresult)
3207 self._VerifyNodeUserScripts(node_i, nresult)
3208 self._VerifyOob(node_i, nresult)
3211 self._VerifyNodeLVM(node_i, nresult, vg_name)
3212 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3215 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3216 self._UpdateNodeInstances(node_i, nresult, nimg)
3217 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3218 self._UpdateNodeOS(node_i, nresult, nimg)
3220 if not nimg.os_fail:
3221 if refos_img is None:
3223 self._VerifyNodeOS(node_i, nimg, refos_img)
3224 self._VerifyNodeBridges(node_i, nresult, bridges)
3226 # Check whether all running instancies are primary for the node. (This
3227 # can no longer be done from _VerifyInstance below, since some of the
3228 # wrong instances could be from other node groups.)
3229 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3231 for inst in non_primary_inst:
3232 # FIXME: investigate best way to handle offline insts
3233 if inst.admin_state == constants.ADMINST_OFFLINE:
3235 feedback_fn("* Skipping offline instance %s" % inst.name)
3238 test = inst in self.all_inst_info
3239 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3240 "instance should not run on node %s", node_i.name)
3241 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3242 "node is running unknown instance %s", inst)
3244 for node, result in extra_lv_nvinfo.items():
3245 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3246 node_image[node], vg_name)
3248 feedback_fn("* Verifying instance status")
3249 for instance in self.my_inst_names:
3251 feedback_fn("* Verifying instance %s" % instance)
3252 inst_config = self.my_inst_info[instance]
3253 self._VerifyInstance(instance, inst_config, node_image,
3255 inst_nodes_offline = []
3257 pnode = inst_config.primary_node
3258 pnode_img = node_image[pnode]
3259 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3260 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3261 " primary node failed", instance)
3263 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3265 constants.CV_EINSTANCEBADNODE, instance,
3266 "instance is marked as running and lives on offline node %s",
3267 inst_config.primary_node)
3269 # If the instance is non-redundant we cannot survive losing its primary
3270 # node, so we are not N+1 compliant. On the other hand we have no disk
3271 # templates with more than one secondary so that situation is not well
3273 # FIXME: does not support file-backed instances
3274 if not inst_config.secondary_nodes:
3275 i_non_redundant.append(instance)
3277 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3278 constants.CV_EINSTANCELAYOUT,
3279 instance, "instance has multiple secondary nodes: %s",
3280 utils.CommaJoin(inst_config.secondary_nodes),
3281 code=self.ETYPE_WARNING)
3283 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3284 pnode = inst_config.primary_node
3285 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3286 instance_groups = {}
3288 for node in instance_nodes:
3289 instance_groups.setdefault(self.all_node_info[node].group,
3293 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3294 # Sort so that we always list the primary node first.
3295 for group, nodes in sorted(instance_groups.items(),
3296 key=lambda (_, nodes): pnode in nodes,
3299 self._ErrorIf(len(instance_groups) > 1,
3300 constants.CV_EINSTANCESPLITGROUPS,
3301 instance, "instance has primary and secondary nodes in"
3302 " different groups: %s", utils.CommaJoin(pretty_list),
3303 code=self.ETYPE_WARNING)
3305 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3306 i_non_a_balanced.append(instance)
3308 for snode in inst_config.secondary_nodes:
3309 s_img = node_image[snode]
3310 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3311 snode, "instance %s, connection to secondary node failed",
3315 inst_nodes_offline.append(snode)
3317 # warn that the instance lives on offline nodes
3318 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3319 "instance has offline secondary node(s) %s",
3320 utils.CommaJoin(inst_nodes_offline))
3321 # ... or ghost/non-vm_capable nodes
3322 for node in inst_config.all_nodes:
3323 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3324 instance, "instance lives on ghost node %s", node)
3325 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3326 instance, "instance lives on non-vm_capable node %s", node)
3328 feedback_fn("* Verifying orphan volumes")
3329 reserved = utils.FieldSet(*cluster.reserved_lvs)
3331 # We will get spurious "unknown volume" warnings if any node of this group
3332 # is secondary for an instance whose primary is in another group. To avoid
3333 # them, we find these instances and add their volumes to node_vol_should.
3334 for inst in self.all_inst_info.values():
3335 for secondary in inst.secondary_nodes:
3336 if (secondary in self.my_node_info
3337 and inst.name not in self.my_inst_info):
3338 inst.MapLVsByNode(node_vol_should)
3341 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3343 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3344 feedback_fn("* Verifying N+1 Memory redundancy")
3345 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3347 feedback_fn("* Other Notes")
3349 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3350 % len(i_non_redundant))
3352 if i_non_a_balanced:
3353 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3354 % len(i_non_a_balanced))
3357 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3360 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3363 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3367 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3368 """Analyze the post-hooks' result
3370 This method analyses the hook result, handles it, and sends some
3371 nicely-formatted feedback back to the user.
3373 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3374 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3375 @param hooks_results: the results of the multi-node hooks rpc call
3376 @param feedback_fn: function used send feedback back to the caller
3377 @param lu_result: previous Exec result
3378 @return: the new Exec result, based on the previous result
3382 # We only really run POST phase hooks, only for non-empty groups,
3383 # and are only interested in their results
3384 if not self.my_node_names:
3387 elif phase == constants.HOOKS_PHASE_POST:
3388 # Used to change hooks' output to proper indentation
3389 feedback_fn("* Hooks Results")
3390 assert hooks_results, "invalid result from hooks"
3392 for node_name in hooks_results:
3393 res = hooks_results[node_name]
3395 test = msg and not res.offline
3396 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3397 "Communication failure in hooks execution: %s", msg)
3398 if res.offline or msg:
3399 # No need to investigate payload if node is offline or gave
3402 for script, hkr, output in res.payload:
3403 test = hkr == constants.HKR_FAIL
3404 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3405 "Script %s failed, output:", script)
3407 output = self._HOOKS_INDENT_RE.sub(" ", output)
3408 feedback_fn("%s" % output)
3414 class LUClusterVerifyDisks(NoHooksLU):
3415 """Verifies the cluster disks status.
3420 def ExpandNames(self):
3421 self.share_locks = _ShareAll()
3422 self.needed_locks = {
3423 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3426 def Exec(self, feedback_fn):
3427 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3429 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3430 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3431 for group in group_names])
3434 class LUGroupVerifyDisks(NoHooksLU):
3435 """Verifies the status of all disks in a node group.
3440 def ExpandNames(self):
3441 # Raises errors.OpPrereqError on its own if group can't be found
3442 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3444 self.share_locks = _ShareAll()
3445 self.needed_locks = {
3446 locking.LEVEL_INSTANCE: [],
3447 locking.LEVEL_NODEGROUP: [],
3448 locking.LEVEL_NODE: [],
3451 def DeclareLocks(self, level):
3452 if level == locking.LEVEL_INSTANCE:
3453 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3455 # Lock instances optimistically, needs verification once node and group
3456 # locks have been acquired
3457 self.needed_locks[locking.LEVEL_INSTANCE] = \
3458 self.cfg.GetNodeGroupInstances(self.group_uuid)
3460 elif level == locking.LEVEL_NODEGROUP:
3461 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3463 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3464 set([self.group_uuid] +
3465 # Lock all groups used by instances optimistically; this requires
3466 # going via the node before it's locked, requiring verification
3469 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3470 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3472 elif level == locking.LEVEL_NODE:
3473 # This will only lock the nodes in the group to be verified which contain
3475 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3476 self._LockInstancesNodes()
3478 # Lock all nodes in group to be verified
3479 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3480 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3481 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3483 def CheckPrereq(self):
3484 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3485 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3486 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3488 assert self.group_uuid in owned_groups
3490 # Check if locked instances are still correct
3491 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3493 # Get instance information
3494 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3496 # Check if node groups for locked instances are still correct
3497 for (instance_name, inst) in self.instances.items():
3498 assert owned_nodes.issuperset(inst.all_nodes), \
3499 "Instance %s's nodes changed while we kept the lock" % instance_name
3501 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3504 assert self.group_uuid in inst_groups, \
3505 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3507 def Exec(self, feedback_fn):
3508 """Verify integrity of cluster disks.
3510 @rtype: tuple of three items
3511 @return: a tuple of (dict of node-to-node_error, list of instances
3512 which need activate-disks, dict of instance: (node, volume) for
3517 res_instances = set()
3520 nv_dict = _MapInstanceDisksToNodes([inst
3521 for inst in self.instances.values()
3522 if inst.admin_state == constants.ADMINST_UP])
3525 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3526 set(self.cfg.GetVmCapableNodeList()))
3528 node_lvs = self.rpc.call_lv_list(nodes, [])
3530 for (node, node_res) in node_lvs.items():
3531 if node_res.offline:
3534 msg = node_res.fail_msg
3536 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3537 res_nodes[node] = msg
3540 for lv_name, (_, _, lv_online) in node_res.payload.items():
3541 inst = nv_dict.pop((node, lv_name), None)
3542 if not (lv_online or inst is None):
3543 res_instances.add(inst)
3545 # any leftover items in nv_dict are missing LVs, let's arrange the data
3547 for key, inst in nv_dict.iteritems():
3548 res_missing.setdefault(inst, []).append(list(key))
3550 return (res_nodes, list(res_instances), res_missing)
3553 class LUClusterRepairDiskSizes(NoHooksLU):
3554 """Verifies the cluster disks sizes.
3559 def ExpandNames(self):
3560 if self.op.instances:
3561 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3562 self.needed_locks = {
3563 locking.LEVEL_NODE_RES: [],
3564 locking.LEVEL_INSTANCE: self.wanted_names,
3566 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3568 self.wanted_names = None
3569 self.needed_locks = {
3570 locking.LEVEL_NODE_RES: locking.ALL_SET,
3571 locking.LEVEL_INSTANCE: locking.ALL_SET,
3573 self.share_locks = {
3574 locking.LEVEL_NODE_RES: 1,
3575 locking.LEVEL_INSTANCE: 0,
3578 def DeclareLocks(self, level):
3579 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3580 self._LockInstancesNodes(primary_only=True, level=level)
3582 def CheckPrereq(self):
3583 """Check prerequisites.
3585 This only checks the optional instance list against the existing names.
3588 if self.wanted_names is None:
3589 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3591 self.wanted_instances = \
3592 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3594 def _EnsureChildSizes(self, disk):
3595 """Ensure children of the disk have the needed disk size.
3597 This is valid mainly for DRBD8 and fixes an issue where the
3598 children have smaller disk size.
3600 @param disk: an L{ganeti.objects.Disk} object
3603 if disk.dev_type == constants.LD_DRBD8:
3604 assert disk.children, "Empty children for DRBD8?"
3605 fchild = disk.children[0]
3606 mismatch = fchild.size < disk.size
3608 self.LogInfo("Child disk has size %d, parent %d, fixing",
3609 fchild.size, disk.size)
3610 fchild.size = disk.size
3612 # and we recurse on this child only, not on the metadev
3613 return self._EnsureChildSizes(fchild) or mismatch
3617 def Exec(self, feedback_fn):
3618 """Verify the size of cluster disks.
3621 # TODO: check child disks too
3622 # TODO: check differences in size between primary/secondary nodes
3624 for instance in self.wanted_instances:
3625 pnode = instance.primary_node
3626 if pnode not in per_node_disks:
3627 per_node_disks[pnode] = []
3628 for idx, disk in enumerate(instance.disks):
3629 per_node_disks[pnode].append((instance, idx, disk))
3631 assert not (frozenset(per_node_disks.keys()) -
3632 self.owned_locks(locking.LEVEL_NODE_RES)), \
3633 "Not owning correct locks"
3634 assert not self.owned_locks(locking.LEVEL_NODE)
3637 for node, dskl in per_node_disks.items():
3638 newl = [v[2].Copy() for v in dskl]
3640 self.cfg.SetDiskID(dsk, node)
3641 result = self.rpc.call_blockdev_getsize(node, newl)
3643 self.LogWarning("Failure in blockdev_getsize call to node"
3644 " %s, ignoring", node)
3646 if len(result.payload) != len(dskl):
3647 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3648 " result.payload=%s", node, len(dskl), result.payload)
3649 self.LogWarning("Invalid result from node %s, ignoring node results",
3652 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3654 self.LogWarning("Disk %d of instance %s did not return size"
3655 " information, ignoring", idx, instance.name)
3657 if not isinstance(size, (int, long)):
3658 self.LogWarning("Disk %d of instance %s did not return valid"
3659 " size information, ignoring", idx, instance.name)
3662 if size != disk.size:
3663 self.LogInfo("Disk %d of instance %s has mismatched size,"
3664 " correcting: recorded %d, actual %d", idx,
3665 instance.name, disk.size, size)
3667 self.cfg.Update(instance, feedback_fn)
3668 changed.append((instance.name, idx, size))
3669 if self._EnsureChildSizes(disk):
3670 self.cfg.Update(instance, feedback_fn)
3671 changed.append((instance.name, idx, disk.size))
3675 class LUClusterRename(LogicalUnit):
3676 """Rename the cluster.
3679 HPATH = "cluster-rename"
3680 HTYPE = constants.HTYPE_CLUSTER
3682 def BuildHooksEnv(self):
3687 "OP_TARGET": self.cfg.GetClusterName(),
3688 "NEW_NAME": self.op.name,
3691 def BuildHooksNodes(self):
3692 """Build hooks nodes.
3695 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3697 def CheckPrereq(self):
3698 """Verify that the passed name is a valid one.
3701 hostname = netutils.GetHostname(name=self.op.name,
3702 family=self.cfg.GetPrimaryIPFamily())
3704 new_name = hostname.name
3705 self.ip = new_ip = hostname.ip
3706 old_name = self.cfg.GetClusterName()
3707 old_ip = self.cfg.GetMasterIP()
3708 if new_name == old_name and new_ip == old_ip:
3709 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3710 " cluster has changed",
3712 if new_ip != old_ip:
3713 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3714 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3715 " reachable on the network" %
3716 new_ip, errors.ECODE_NOTUNIQUE)
3718 self.op.name = new_name
3720 def Exec(self, feedback_fn):
3721 """Rename the cluster.
3724 clustername = self.op.name
3727 # shutdown the master IP
3728 master_params = self.cfg.GetMasterNetworkParameters()
3729 ems = self.cfg.GetUseExternalMipScript()
3730 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3732 result.Raise("Could not disable the master role")
3735 cluster = self.cfg.GetClusterInfo()
3736 cluster.cluster_name = clustername
3737 cluster.master_ip = new_ip
3738 self.cfg.Update(cluster, feedback_fn)
3740 # update the known hosts file
3741 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3742 node_list = self.cfg.GetOnlineNodeList()
3744 node_list.remove(master_params.name)
3747 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3749 master_params.ip = new_ip
3750 result = self.rpc.call_node_activate_master_ip(master_params.name,
3752 msg = result.fail_msg
3754 self.LogWarning("Could not re-enable the master role on"
3755 " the master, please restart manually: %s", msg)
3760 def _ValidateNetmask(cfg, netmask):
3761 """Checks if a netmask is valid.
3763 @type cfg: L{config.ConfigWriter}
3764 @param cfg: The cluster configuration
3766 @param netmask: the netmask to be verified
3767 @raise errors.OpPrereqError: if the validation fails
3770 ip_family = cfg.GetPrimaryIPFamily()
3772 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3773 except errors.ProgrammerError:
3774 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3776 if not ipcls.ValidateNetmask(netmask):
3777 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3781 class LUClusterSetParams(LogicalUnit):
3782 """Change the parameters of the cluster.
3785 HPATH = "cluster-modify"
3786 HTYPE = constants.HTYPE_CLUSTER
3789 def CheckArguments(self):
3793 if self.op.uid_pool:
3794 uidpool.CheckUidPool(self.op.uid_pool)
3796 if self.op.add_uids:
3797 uidpool.CheckUidPool(self.op.add_uids)
3799 if self.op.remove_uids:
3800 uidpool.CheckUidPool(self.op.remove_uids)
3802 if self.op.master_netmask is not None:
3803 _ValidateNetmask(self.cfg, self.op.master_netmask)
3805 if self.op.diskparams:
3806 for dt_params in self.op.diskparams.values():
3807 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3809 def ExpandNames(self):
3810 # FIXME: in the future maybe other cluster params won't require checking on
3811 # all nodes to be modified.
3812 self.needed_locks = {
3813 locking.LEVEL_NODE: locking.ALL_SET,
3814 locking.LEVEL_INSTANCE: locking.ALL_SET,
3815 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3817 self.share_locks = {
3818 locking.LEVEL_NODE: 1,
3819 locking.LEVEL_INSTANCE: 1,
3820 locking.LEVEL_NODEGROUP: 1,
3823 def BuildHooksEnv(self):
3828 "OP_TARGET": self.cfg.GetClusterName(),
3829 "NEW_VG_NAME": self.op.vg_name,
3832 def BuildHooksNodes(self):
3833 """Build hooks nodes.
3836 mn = self.cfg.GetMasterNode()
3839 def CheckPrereq(self):
3840 """Check prerequisites.
3842 This checks whether the given params don't conflict and
3843 if the given volume group is valid.
3846 if self.op.vg_name is not None and not self.op.vg_name:
3847 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3848 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3849 " instances exist", errors.ECODE_INVAL)
3851 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3852 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3853 raise errors.OpPrereqError("Cannot disable drbd helper while"
3854 " drbd-based instances exist",
3857 node_list = self.owned_locks(locking.LEVEL_NODE)
3859 # if vg_name not None, checks given volume group on all nodes
3861 vglist = self.rpc.call_vg_list(node_list)
3862 for node in node_list:
3863 msg = vglist[node].fail_msg
3865 # ignoring down node
3866 self.LogWarning("Error while gathering data on node %s"
3867 " (ignoring node): %s", node, msg)
3869 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3871 constants.MIN_VG_SIZE)
3873 raise errors.OpPrereqError("Error on node '%s': %s" %
3874 (node, vgstatus), errors.ECODE_ENVIRON)
3876 if self.op.drbd_helper:
3877 # checks given drbd helper on all nodes
3878 helpers = self.rpc.call_drbd_helper(node_list)
3879 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3881 self.LogInfo("Not checking drbd helper on offline node %s", node)
3883 msg = helpers[node].fail_msg
3885 raise errors.OpPrereqError("Error checking drbd helper on node"
3886 " '%s': %s" % (node, msg),
3887 errors.ECODE_ENVIRON)
3888 node_helper = helpers[node].payload
3889 if node_helper != self.op.drbd_helper:
3890 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3891 (node, node_helper), errors.ECODE_ENVIRON)
3893 self.cluster = cluster = self.cfg.GetClusterInfo()
3894 # validate params changes
3895 if self.op.beparams:
3896 objects.UpgradeBeParams(self.op.beparams)
3897 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3898 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3900 if self.op.ndparams:
3901 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3902 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3904 # TODO: we need a more general way to handle resetting
3905 # cluster-level parameters to default values
3906 if self.new_ndparams["oob_program"] == "":
3907 self.new_ndparams["oob_program"] = \
3908 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3910 if self.op.hv_state:
3911 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3912 self.cluster.hv_state_static)
3913 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3914 for hv, values in new_hv_state.items())
3916 if self.op.disk_state:
3917 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3918 self.cluster.disk_state_static)
3919 self.new_disk_state = \
3920 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3921 for name, values in svalues.items()))
3922 for storage, svalues in new_disk_state.items())
3925 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3928 all_instances = self.cfg.GetAllInstancesInfo().values()
3930 for group in self.cfg.GetAllNodeGroupsInfo().values():
3931 instances = frozenset([inst for inst in all_instances
3932 if compat.any(node in group.members
3933 for node in inst.all_nodes)])
3934 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3935 new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3937 new_ipolicy, instances)
3939 violations.update(new)
3942 self.LogWarning("After the ipolicy change the following instances"
3943 " violate them: %s",
3944 utils.CommaJoin(violations))
3946 if self.op.nicparams:
3947 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3948 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3949 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3952 # check all instances for consistency
3953 for instance in self.cfg.GetAllInstancesInfo().values():
3954 for nic_idx, nic in enumerate(instance.nics):
3955 params_copy = copy.deepcopy(nic.nicparams)
3956 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3958 # check parameter syntax
3960 objects.NIC.CheckParameterSyntax(params_filled)
3961 except errors.ConfigurationError, err:
3962 nic_errors.append("Instance %s, nic/%d: %s" %
3963 (instance.name, nic_idx, err))
3965 # if we're moving instances to routed, check that they have an ip
3966 target_mode = params_filled[constants.NIC_MODE]
3967 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3968 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3969 " address" % (instance.name, nic_idx))
3971 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3972 "\n".join(nic_errors))
3974 # hypervisor list/parameters
3975 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3976 if self.op.hvparams:
3977 for hv_name, hv_dict in self.op.hvparams.items():
3978 if hv_name not in self.new_hvparams:
3979 self.new_hvparams[hv_name] = hv_dict
3981 self.new_hvparams[hv_name].update(hv_dict)
3983 # disk template parameters
3984 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3985 if self.op.diskparams:
3986 for dt_name, dt_params in self.op.diskparams.items():
3987 if dt_name not in self.op.diskparams:
3988 self.new_diskparams[dt_name] = dt_params
3990 self.new_diskparams[dt_name].update(dt_params)
3992 # os hypervisor parameters
3993 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3995 for os_name, hvs in self.op.os_hvp.items():
3996 if os_name not in self.new_os_hvp:
3997 self.new_os_hvp[os_name] = hvs
3999 for hv_name, hv_dict in hvs.items():
4000 if hv_name not in self.new_os_hvp[os_name]:
4001 self.new_os_hvp[os_name][hv_name] = hv_dict
4003 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4006 self.new_osp = objects.FillDict(cluster.osparams, {})
4007 if self.op.osparams:
4008 for os_name, osp in self.op.osparams.items():
4009 if os_name not in self.new_osp:
4010 self.new_osp[os_name] = {}
4012 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4015 if not self.new_osp[os_name]:
4016 # we removed all parameters
4017 del self.new_osp[os_name]
4019 # check the parameter validity (remote check)
4020 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4021 os_name, self.new_osp[os_name])
4023 # changes to the hypervisor list
4024 if self.op.enabled_hypervisors is not None:
4025 self.hv_list = self.op.enabled_hypervisors
4026 for hv in self.hv_list:
4027 # if the hypervisor doesn't already exist in the cluster
4028 # hvparams, we initialize it to empty, and then (in both
4029 # cases) we make sure to fill the defaults, as we might not
4030 # have a complete defaults list if the hypervisor wasn't
4032 if hv not in new_hvp:
4034 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4035 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4037 self.hv_list = cluster.enabled_hypervisors
4039 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4040 # either the enabled list has changed, or the parameters have, validate
4041 for hv_name, hv_params in self.new_hvparams.items():
4042 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4043 (self.op.enabled_hypervisors and
4044 hv_name in self.op.enabled_hypervisors)):
4045 # either this is a new hypervisor, or its parameters have changed
4046 hv_class = hypervisor.GetHypervisor(hv_name)
4047 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4048 hv_class.CheckParameterSyntax(hv_params)
4049 _CheckHVParams(self, node_list, hv_name, hv_params)
4052 # no need to check any newly-enabled hypervisors, since the
4053 # defaults have already been checked in the above code-block
4054 for os_name, os_hvp in self.new_os_hvp.items():
4055 for hv_name, hv_params in os_hvp.items():
4056 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4057 # we need to fill in the new os_hvp on top of the actual hv_p
4058 cluster_defaults = self.new_hvparams.get(hv_name, {})
4059 new_osp = objects.FillDict(cluster_defaults, hv_params)
4060 hv_class = hypervisor.GetHypervisor(hv_name)
4061 hv_class.CheckParameterSyntax(new_osp)
4062 _CheckHVParams(self, node_list, hv_name, new_osp)
4064 if self.op.default_iallocator:
4065 alloc_script = utils.FindFile(self.op.default_iallocator,
4066 constants.IALLOCATOR_SEARCH_PATH,
4068 if alloc_script is None:
4069 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4070 " specified" % self.op.default_iallocator,
4073 def Exec(self, feedback_fn):
4074 """Change the parameters of the cluster.
4077 if self.op.vg_name is not None:
4078 new_volume = self.op.vg_name
4081 if new_volume != self.cfg.GetVGName():
4082 self.cfg.SetVGName(new_volume)
4084 feedback_fn("Cluster LVM configuration already in desired"
4085 " state, not changing")
4086 if self.op.drbd_helper is not None:
4087 new_helper = self.op.drbd_helper
4090 if new_helper != self.cfg.GetDRBDHelper():
4091 self.cfg.SetDRBDHelper(new_helper)
4093 feedback_fn("Cluster DRBD helper already in desired state,"
4095 if self.op.hvparams:
4096 self.cluster.hvparams = self.new_hvparams
4098 self.cluster.os_hvp = self.new_os_hvp
4099 if self.op.enabled_hypervisors is not None:
4100 self.cluster.hvparams = self.new_hvparams
4101 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4102 if self.op.beparams:
4103 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4104 if self.op.nicparams:
4105 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4107 self.cluster.ipolicy = self.new_ipolicy
4108 if self.op.osparams:
4109 self.cluster.osparams = self.new_osp
4110 if self.op.ndparams:
4111 self.cluster.ndparams = self.new_ndparams
4112 if self.op.diskparams:
4113 self.cluster.diskparams = self.new_diskparams
4114 if self.op.hv_state:
4115 self.cluster.hv_state_static = self.new_hv_state
4116 if self.op.disk_state:
4117 self.cluster.disk_state_static = self.new_disk_state
4119 if self.op.candidate_pool_size is not None:
4120 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4121 # we need to update the pool size here, otherwise the save will fail
4122 _AdjustCandidatePool(self, [])
4124 if self.op.maintain_node_health is not None:
4125 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4126 feedback_fn("Note: CONFD was disabled at build time, node health"
4127 " maintenance is not useful (still enabling it)")
4128 self.cluster.maintain_node_health = self.op.maintain_node_health
4130 if self.op.prealloc_wipe_disks is not None:
4131 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4133 if self.op.add_uids is not None:
4134 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4136 if self.op.remove_uids is not None:
4137 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4139 if self.op.uid_pool is not None:
4140 self.cluster.uid_pool = self.op.uid_pool
4142 if self.op.default_iallocator is not None:
4143 self.cluster.default_iallocator = self.op.default_iallocator
4145 if self.op.reserved_lvs is not None:
4146 self.cluster.reserved_lvs = self.op.reserved_lvs
4148 if self.op.use_external_mip_script is not None:
4149 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4151 def helper_os(aname, mods, desc):
4153 lst = getattr(self.cluster, aname)
4154 for key, val in mods:
4155 if key == constants.DDM_ADD:
4157 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4160 elif key == constants.DDM_REMOVE:
4164 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4166 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4168 if self.op.hidden_os:
4169 helper_os("hidden_os", self.op.hidden_os, "hidden")
4171 if self.op.blacklisted_os:
4172 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4174 if self.op.master_netdev:
4175 master_params = self.cfg.GetMasterNetworkParameters()
4176 ems = self.cfg.GetUseExternalMipScript()
4177 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4178 self.cluster.master_netdev)
4179 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4181 result.Raise("Could not disable the master ip")
4182 feedback_fn("Changing master_netdev from %s to %s" %
4183 (master_params.netdev, self.op.master_netdev))
4184 self.cluster.master_netdev = self.op.master_netdev
4186 if self.op.master_netmask:
4187 master_params = self.cfg.GetMasterNetworkParameters()
4188 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4189 result = self.rpc.call_node_change_master_netmask(master_params.name,
4190 master_params.netmask,
4191 self.op.master_netmask,
4193 master_params.netdev)
4195 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4198 self.cluster.master_netmask = self.op.master_netmask
4200 self.cfg.Update(self.cluster, feedback_fn)
4202 if self.op.master_netdev:
4203 master_params = self.cfg.GetMasterNetworkParameters()
4204 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4205 self.op.master_netdev)
4206 ems = self.cfg.GetUseExternalMipScript()
4207 result = self.rpc.call_node_activate_master_ip(master_params.name,
4210 self.LogWarning("Could not re-enable the master ip on"
4211 " the master, please restart manually: %s",
4215 def _UploadHelper(lu, nodes, fname):
4216 """Helper for uploading a file and showing warnings.
4219 if os.path.exists(fname):
4220 result = lu.rpc.call_upload_file(nodes, fname)
4221 for to_node, to_result in result.items():
4222 msg = to_result.fail_msg
4224 msg = ("Copy of file %s to node %s failed: %s" %
4225 (fname, to_node, msg))
4226 lu.proc.LogWarning(msg)
4229 def _ComputeAncillaryFiles(cluster, redist):
4230 """Compute files external to Ganeti which need to be consistent.
4232 @type redist: boolean
4233 @param redist: Whether to include files which need to be redistributed
4236 # Compute files for all nodes
4238 constants.SSH_KNOWN_HOSTS_FILE,
4239 constants.CONFD_HMAC_KEY,
4240 constants.CLUSTER_DOMAIN_SECRET_FILE,
4241 constants.SPICE_CERT_FILE,
4242 constants.SPICE_CACERT_FILE,
4243 constants.RAPI_USERS_FILE,
4247 files_all.update(constants.ALL_CERT_FILES)
4248 files_all.update(ssconf.SimpleStore().GetFileList())
4250 # we need to ship at least the RAPI certificate
4251 files_all.add(constants.RAPI_CERT_FILE)
4253 if cluster.modify_etc_hosts:
4254 files_all.add(constants.ETC_HOSTS)
4256 # Files which are optional, these must:
4257 # - be present in one other category as well
4258 # - either exist or not exist on all nodes of that category (mc, vm all)
4260 constants.RAPI_USERS_FILE,
4263 # Files which should only be on master candidates
4267 files_mc.add(constants.CLUSTER_CONF_FILE)
4269 # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4271 files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4273 # Files which should only be on VM-capable nodes
4274 files_vm = set(filename
4275 for hv_name in cluster.enabled_hypervisors
4276 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4278 files_opt |= set(filename
4279 for hv_name in cluster.enabled_hypervisors
4280 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4282 # Filenames in each category must be unique
4283 all_files_set = files_all | files_mc | files_vm
4284 assert (len(all_files_set) ==
4285 sum(map(len, [files_all, files_mc, files_vm]))), \
4286 "Found file listed in more than one file list"
4288 # Optional files must be present in one other category
4289 assert all_files_set.issuperset(files_opt), \
4290 "Optional file not in a different required list"
4292 return (files_all, files_opt, files_mc, files_vm)
4295 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4296 """Distribute additional files which are part of the cluster configuration.
4298 ConfigWriter takes care of distributing the config and ssconf files, but
4299 there are more files which should be distributed to all nodes. This function
4300 makes sure those are copied.
4302 @param lu: calling logical unit
4303 @param additional_nodes: list of nodes not in the config to distribute to
4304 @type additional_vm: boolean
4305 @param additional_vm: whether the additional nodes are vm-capable or not
4308 # Gather target nodes
4309 cluster = lu.cfg.GetClusterInfo()
4310 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4312 online_nodes = lu.cfg.GetOnlineNodeList()
4313 vm_nodes = lu.cfg.GetVmCapableNodeList()
4315 if additional_nodes is not None:
4316 online_nodes.extend(additional_nodes)
4318 vm_nodes.extend(additional_nodes)
4320 # Never distribute to master node
4321 for nodelist in [online_nodes, vm_nodes]:
4322 if master_info.name in nodelist:
4323 nodelist.remove(master_info.name)
4326 (files_all, _, files_mc, files_vm) = \
4327 _ComputeAncillaryFiles(cluster, True)
4329 # Never re-distribute configuration file from here
4330 assert not (constants.CLUSTER_CONF_FILE in files_all or
4331 constants.CLUSTER_CONF_FILE in files_vm)
4332 assert not files_mc, "Master candidates not handled in this function"
4335 (online_nodes, files_all),
4336 (vm_nodes, files_vm),
4340 for (node_list, files) in filemap:
4342 _UploadHelper(lu, node_list, fname)
4345 class LUClusterRedistConf(NoHooksLU):
4346 """Force the redistribution of cluster configuration.
4348 This is a very simple LU.
4353 def ExpandNames(self):
4354 self.needed_locks = {
4355 locking.LEVEL_NODE: locking.ALL_SET,
4357 self.share_locks[locking.LEVEL_NODE] = 1
4359 def Exec(self, feedback_fn):
4360 """Redistribute the configuration.
4363 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4364 _RedistributeAncillaryFiles(self)
4367 class LUClusterActivateMasterIp(NoHooksLU):
4368 """Activate the master IP on the master node.
4371 def Exec(self, feedback_fn):
4372 """Activate the master IP.
4375 master_params = self.cfg.GetMasterNetworkParameters()
4376 ems = self.cfg.GetUseExternalMipScript()
4377 result = self.rpc.call_node_activate_master_ip(master_params.name,
4379 result.Raise("Could not activate the master IP")
4382 class LUClusterDeactivateMasterIp(NoHooksLU):
4383 """Deactivate the master IP on the master node.
4386 def Exec(self, feedback_fn):
4387 """Deactivate the master IP.
4390 master_params = self.cfg.GetMasterNetworkParameters()
4391 ems = self.cfg.GetUseExternalMipScript()
4392 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4394 result.Raise("Could not deactivate the master IP")
4397 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4398 """Sleep and poll for an instance's disk to sync.
4401 if not instance.disks or disks is not None and not disks:
4404 disks = _ExpandCheckDisks(instance, disks)
4407 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4409 node = instance.primary_node
4412 lu.cfg.SetDiskID(dev, node)
4414 # TODO: Convert to utils.Retry
4417 degr_retries = 10 # in seconds, as we sleep 1 second each time
4421 cumul_degraded = False
4422 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4423 msg = rstats.fail_msg
4425 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4428 raise errors.RemoteError("Can't contact node %s for mirror data,"
4429 " aborting." % node)
4432 rstats = rstats.payload
4434 for i, mstat in enumerate(rstats):
4436 lu.LogWarning("Can't compute data for node %s/%s",
4437 node, disks[i].iv_name)
4440 cumul_degraded = (cumul_degraded or
4441 (mstat.is_degraded and mstat.sync_percent is None))
4442 if mstat.sync_percent is not None:
4444 if mstat.estimated_time is not None:
4445 rem_time = ("%s remaining (estimated)" %
4446 utils.FormatSeconds(mstat.estimated_time))
4447 max_time = mstat.estimated_time
4449 rem_time = "no time estimate"
4450 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4451 (disks[i].iv_name, mstat.sync_percent, rem_time))
4453 # if we're done but degraded, let's do a few small retries, to
4454 # make sure we see a stable and not transient situation; therefore
4455 # we force restart of the loop
4456 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4457 logging.info("Degraded disks found, %d retries left", degr_retries)
4465 time.sleep(min(60, max_time))
4468 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4469 return not cumul_degraded
4472 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4473 """Check that mirrors are not degraded.
4475 The ldisk parameter, if True, will change the test from the
4476 is_degraded attribute (which represents overall non-ok status for
4477 the device(s)) to the ldisk (representing the local storage status).
4480 lu.cfg.SetDiskID(dev, node)
4484 if on_primary or dev.AssembleOnSecondary():
4485 rstats = lu.rpc.call_blockdev_find(node, dev)
4486 msg = rstats.fail_msg
4488 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4490 elif not rstats.payload:
4491 lu.LogWarning("Can't find disk on node %s", node)
4495 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4497 result = result and not rstats.payload.is_degraded
4500 for child in dev.children:
4501 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4506 class LUOobCommand(NoHooksLU):
4507 """Logical unit for OOB handling.
4511 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4513 def ExpandNames(self):
4514 """Gather locks we need.
4517 if self.op.node_names:
4518 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4519 lock_names = self.op.node_names
4521 lock_names = locking.ALL_SET
4523 self.needed_locks = {
4524 locking.LEVEL_NODE: lock_names,
4527 def CheckPrereq(self):
4528 """Check prerequisites.
4531 - the node exists in the configuration
4534 Any errors are signaled by raising errors.OpPrereqError.
4538 self.master_node = self.cfg.GetMasterNode()
4540 assert self.op.power_delay >= 0.0
4542 if self.op.node_names:
4543 if (self.op.command in self._SKIP_MASTER and
4544 self.master_node in self.op.node_names):
4545 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4546 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4548 if master_oob_handler:
4549 additional_text = ("run '%s %s %s' if you want to operate on the"
4550 " master regardless") % (master_oob_handler,
4554 additional_text = "it does not support out-of-band operations"
4556 raise errors.OpPrereqError(("Operating on the master node %s is not"
4557 " allowed for %s; %s") %
4558 (self.master_node, self.op.command,
4559 additional_text), errors.ECODE_INVAL)
4561 self.op.node_names = self.cfg.GetNodeList()
4562 if self.op.command in self._SKIP_MASTER:
4563 self.op.node_names.remove(self.master_node)
4565 if self.op.command in self._SKIP_MASTER:
4566 assert self.master_node not in self.op.node_names
4568 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4570 raise errors.OpPrereqError("Node %s not found" % node_name,
4573 self.nodes.append(node)
4575 if (not self.op.ignore_status and
4576 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4577 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4578 " not marked offline") % node_name,
4581 def Exec(self, feedback_fn):
4582 """Execute OOB and return result if we expect any.
4585 master_node = self.master_node
4588 for idx, node in enumerate(utils.NiceSort(self.nodes,
4589 key=lambda node: node.name)):
4590 node_entry = [(constants.RS_NORMAL, node.name)]
4591 ret.append(node_entry)
4593 oob_program = _SupportsOob(self.cfg, node)
4596 node_entry.append((constants.RS_UNAVAIL, None))
4599 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4600 self.op.command, oob_program, node.name)
4601 result = self.rpc.call_run_oob(master_node, oob_program,
4602 self.op.command, node.name,
4606 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4607 node.name, result.fail_msg)
4608 node_entry.append((constants.RS_NODATA, None))
4611 self._CheckPayload(result)
4612 except errors.OpExecError, err:
4613 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4615 node_entry.append((constants.RS_NODATA, None))
4617 if self.op.command == constants.OOB_HEALTH:
4618 # For health we should log important events
4619 for item, status in result.payload:
4620 if status in [constants.OOB_STATUS_WARNING,
4621 constants.OOB_STATUS_CRITICAL]:
4622 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4623 item, node.name, status)
4625 if self.op.command == constants.OOB_POWER_ON:
4627 elif self.op.command == constants.OOB_POWER_OFF:
4628 node.powered = False
4629 elif self.op.command == constants.OOB_POWER_STATUS:
4630 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4631 if powered != node.powered:
4632 logging.warning(("Recorded power state (%s) of node '%s' does not"
4633 " match actual power state (%s)"), node.powered,
4636 # For configuration changing commands we should update the node
4637 if self.op.command in (constants.OOB_POWER_ON,
4638 constants.OOB_POWER_OFF):
4639 self.cfg.Update(node, feedback_fn)
4641 node_entry.append((constants.RS_NORMAL, result.payload))
4643 if (self.op.command == constants.OOB_POWER_ON and
4644 idx < len(self.nodes) - 1):
4645 time.sleep(self.op.power_delay)
4649 def _CheckPayload(self, result):
4650 """Checks if the payload is valid.
4652 @param result: RPC result
4653 @raises errors.OpExecError: If payload is not valid
4657 if self.op.command == constants.OOB_HEALTH:
4658 if not isinstance(result.payload, list):
4659 errs.append("command 'health' is expected to return a list but got %s" %
4660 type(result.payload))
4662 for item, status in result.payload:
4663 if status not in constants.OOB_STATUSES:
4664 errs.append("health item '%s' has invalid status '%s'" %
4667 if self.op.command == constants.OOB_POWER_STATUS:
4668 if not isinstance(result.payload, dict):
4669 errs.append("power-status is expected to return a dict but got %s" %
4670 type(result.payload))
4672 if self.op.command in [
4673 constants.OOB_POWER_ON,
4674 constants.OOB_POWER_OFF,
4675 constants.OOB_POWER_CYCLE,
4677 if result.payload is not None:
4678 errs.append("%s is expected to not return payload but got '%s'" %
4679 (self.op.command, result.payload))
4682 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4683 utils.CommaJoin(errs))
4686 class _OsQuery(_QueryBase):
4687 FIELDS = query.OS_FIELDS
4689 def ExpandNames(self, lu):
4690 # Lock all nodes in shared mode
4691 # Temporary removal of locks, should be reverted later
4692 # TODO: reintroduce locks when they are lighter-weight
4693 lu.needed_locks = {}
4694 #self.share_locks[locking.LEVEL_NODE] = 1
4695 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4697 # The following variables interact with _QueryBase._GetNames
4699 self.wanted = self.names
4701 self.wanted = locking.ALL_SET
4703 self.do_locking = self.use_locking
4705 def DeclareLocks(self, lu, level):
4709 def _DiagnoseByOS(rlist):
4710 """Remaps a per-node return list into an a per-os per-node dictionary
4712 @param rlist: a map with node names as keys and OS objects as values
4715 @return: a dictionary with osnames as keys and as value another
4716 map, with nodes as keys and tuples of (path, status, diagnose,
4717 variants, parameters, api_versions) as values, eg::
4719 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4720 (/srv/..., False, "invalid api")],
4721 "node2": [(/srv/..., True, "", [], [])]}
4726 # we build here the list of nodes that didn't fail the RPC (at RPC
4727 # level), so that nodes with a non-responding node daemon don't
4728 # make all OSes invalid
4729 good_nodes = [node_name for node_name in rlist
4730 if not rlist[node_name].fail_msg]
4731 for node_name, nr in rlist.items():
4732 if nr.fail_msg or not nr.payload:
4734 for (name, path, status, diagnose, variants,
4735 params, api_versions) in nr.payload:
4736 if name not in all_os:
4737 # build a list of nodes for this os containing empty lists
4738 # for each node in node_list
4740 for nname in good_nodes:
4741 all_os[name][nname] = []
4742 # convert params from [name, help] to (name, help)
4743 params = [tuple(v) for v in params]
4744 all_os[name][node_name].append((path, status, diagnose,
4745 variants, params, api_versions))
4748 def _GetQueryData(self, lu):
4749 """Computes the list of nodes and their attributes.
4752 # Locking is not used
4753 assert not (compat.any(lu.glm.is_owned(level)
4754 for level in locking.LEVELS
4755 if level != locking.LEVEL_CLUSTER) or
4756 self.do_locking or self.use_locking)
4758 valid_nodes = [node.name
4759 for node in lu.cfg.GetAllNodesInfo().values()
4760 if not node.offline and node.vm_capable]
4761 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4762 cluster = lu.cfg.GetClusterInfo()
4766 for (os_name, os_data) in pol.items():
4767 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4768 hidden=(os_name in cluster.hidden_os),
4769 blacklisted=(os_name in cluster.blacklisted_os))
4773 api_versions = set()
4775 for idx, osl in enumerate(os_data.values()):
4776 info.valid = bool(info.valid and osl and osl[0][1])
4780 (node_variants, node_params, node_api) = osl[0][3:6]
4783 variants.update(node_variants)
4784 parameters.update(node_params)
4785 api_versions.update(node_api)
4787 # Filter out inconsistent values
4788 variants.intersection_update(node_variants)
4789 parameters.intersection_update(node_params)
4790 api_versions.intersection_update(node_api)
4792 info.variants = list(variants)
4793 info.parameters = list(parameters)
4794 info.api_versions = list(api_versions)
4796 data[os_name] = info
4798 # Prepare data in requested order
4799 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4803 class LUOsDiagnose(NoHooksLU):
4804 """Logical unit for OS diagnose/query.
4810 def _BuildFilter(fields, names):
4811 """Builds a filter for querying OSes.
4814 name_filter = qlang.MakeSimpleFilter("name", names)
4816 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4817 # respective field is not requested
4818 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4819 for fname in ["hidden", "blacklisted"]
4820 if fname not in fields]
4821 if "valid" not in fields:
4822 status_filter.append([qlang.OP_TRUE, "valid"])
4825 status_filter.insert(0, qlang.OP_AND)
4827 status_filter = None
4829 if name_filter and status_filter:
4830 return [qlang.OP_AND, name_filter, status_filter]
4834 return status_filter
4836 def CheckArguments(self):
4837 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4838 self.op.output_fields, False)
4840 def ExpandNames(self):
4841 self.oq.ExpandNames(self)
4843 def Exec(self, feedback_fn):
4844 return self.oq.OldStyleQuery(self)
4847 class LUNodeRemove(LogicalUnit):
4848 """Logical unit for removing a node.
4851 HPATH = "node-remove"
4852 HTYPE = constants.HTYPE_NODE
4854 def BuildHooksEnv(self):
4859 "OP_TARGET": self.op.node_name,
4860 "NODE_NAME": self.op.node_name,
4863 def BuildHooksNodes(self):
4864 """Build hooks nodes.
4866 This doesn't run on the target node in the pre phase as a failed
4867 node would then be impossible to remove.
4870 all_nodes = self.cfg.GetNodeList()
4872 all_nodes.remove(self.op.node_name)
4875 return (all_nodes, all_nodes)
4877 def CheckPrereq(self):
4878 """Check prerequisites.
4881 - the node exists in the configuration
4882 - it does not have primary or secondary instances
4883 - it's not the master
4885 Any errors are signaled by raising errors.OpPrereqError.
4888 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4889 node = self.cfg.GetNodeInfo(self.op.node_name)
4890 assert node is not None
4892 masternode = self.cfg.GetMasterNode()
4893 if node.name == masternode:
4894 raise errors.OpPrereqError("Node is the master node, failover to another"
4895 " node is required", errors.ECODE_INVAL)
4897 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4898 if node.name in instance.all_nodes:
4899 raise errors.OpPrereqError("Instance %s is still running on the node,"
4900 " please remove first" % instance_name,
4902 self.op.node_name = node.name
4905 def Exec(self, feedback_fn):
4906 """Removes the node from the cluster.
4910 logging.info("Stopping the node daemon and removing configs from node %s",
4913 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4915 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4918 # Promote nodes to master candidate as needed
4919 _AdjustCandidatePool(self, exceptions=[node.name])
4920 self.context.RemoveNode(node.name)
4922 # Run post hooks on the node before it's removed
4923 _RunPostHook(self, node.name)
4925 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4926 msg = result.fail_msg
4928 self.LogWarning("Errors encountered on the remote node while leaving"
4929 " the cluster: %s", msg)
4931 # Remove node from our /etc/hosts
4932 if self.cfg.GetClusterInfo().modify_etc_hosts:
4933 master_node = self.cfg.GetMasterNode()
4934 result = self.rpc.call_etc_hosts_modify(master_node,
4935 constants.ETC_HOSTS_REMOVE,
4937 result.Raise("Can't update hosts file with new host data")
4938 _RedistributeAncillaryFiles(self)
4941 class _NodeQuery(_QueryBase):
4942 FIELDS = query.NODE_FIELDS
4944 def ExpandNames(self, lu):
4945 lu.needed_locks = {}
4946 lu.share_locks = _ShareAll()
4949 self.wanted = _GetWantedNodes(lu, self.names)
4951 self.wanted = locking.ALL_SET
4953 self.do_locking = (self.use_locking and
4954 query.NQ_LIVE in self.requested_data)
4957 # If any non-static field is requested we need to lock the nodes
4958 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4960 def DeclareLocks(self, lu, level):
4963 def _GetQueryData(self, lu):
4964 """Computes the list of nodes and their attributes.
4967 all_info = lu.cfg.GetAllNodesInfo()
4969 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4971 # Gather data as requested
4972 if query.NQ_LIVE in self.requested_data:
4973 # filter out non-vm_capable nodes
4974 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4976 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4977 [lu.cfg.GetHypervisorType()])
4978 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4979 for (name, nresult) in node_data.items()
4980 if not nresult.fail_msg and nresult.payload)
4984 if query.NQ_INST in self.requested_data:
4985 node_to_primary = dict([(name, set()) for name in nodenames])
4986 node_to_secondary = dict([(name, set()) for name in nodenames])
4988 inst_data = lu.cfg.GetAllInstancesInfo()
4990 for inst in inst_data.values():
4991 if inst.primary_node in node_to_primary:
4992 node_to_primary[inst.primary_node].add(inst.name)
4993 for secnode in inst.secondary_nodes:
4994 if secnode in node_to_secondary:
4995 node_to_secondary[secnode].add(inst.name)
4997 node_to_primary = None
4998 node_to_secondary = None
5000 if query.NQ_OOB in self.requested_data:
5001 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5002 for name, node in all_info.iteritems())
5006 if query.NQ_GROUP in self.requested_data:
5007 groups = lu.cfg.GetAllNodeGroupsInfo()
5011 return query.NodeQueryData([all_info[name] for name in nodenames],
5012 live_data, lu.cfg.GetMasterNode(),
5013 node_to_primary, node_to_secondary, groups,
5014 oob_support, lu.cfg.GetClusterInfo())
5017 class LUNodeQuery(NoHooksLU):
5018 """Logical unit for querying nodes.
5021 # pylint: disable=W0142
5024 def CheckArguments(self):
5025 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5026 self.op.output_fields, self.op.use_locking)
5028 def ExpandNames(self):
5029 self.nq.ExpandNames(self)
5031 def DeclareLocks(self, level):
5032 self.nq.DeclareLocks(self, level)
5034 def Exec(self, feedback_fn):
5035 return self.nq.OldStyleQuery(self)
5038 class LUNodeQueryvols(NoHooksLU):
5039 """Logical unit for getting volumes on node(s).
5043 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5044 _FIELDS_STATIC = utils.FieldSet("node")
5046 def CheckArguments(self):
5047 _CheckOutputFields(static=self._FIELDS_STATIC,
5048 dynamic=self._FIELDS_DYNAMIC,
5049 selected=self.op.output_fields)
5051 def ExpandNames(self):
5052 self.share_locks = _ShareAll()
5053 self.needed_locks = {}
5055 if not self.op.nodes:
5056 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5058 self.needed_locks[locking.LEVEL_NODE] = \
5059 _GetWantedNodes(self, self.op.nodes)
5061 def Exec(self, feedback_fn):
5062 """Computes the list of nodes and their attributes.
5065 nodenames = self.owned_locks(locking.LEVEL_NODE)
5066 volumes = self.rpc.call_node_volumes(nodenames)
5068 ilist = self.cfg.GetAllInstancesInfo()
5069 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5072 for node in nodenames:
5073 nresult = volumes[node]
5076 msg = nresult.fail_msg
5078 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5081 node_vols = sorted(nresult.payload,
5082 key=operator.itemgetter("dev"))
5084 for vol in node_vols:
5086 for field in self.op.output_fields:
5089 elif field == "phys":
5093 elif field == "name":
5095 elif field == "size":
5096 val = int(float(vol["size"]))
5097 elif field == "instance":
5098 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5100 raise errors.ParameterError(field)
5101 node_output.append(str(val))
5103 output.append(node_output)
5108 class LUNodeQueryStorage(NoHooksLU):
5109 """Logical unit for getting information on storage units on node(s).
5112 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5115 def CheckArguments(self):
5116 _CheckOutputFields(static=self._FIELDS_STATIC,
5117 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5118 selected=self.op.output_fields)
5120 def ExpandNames(self):
5121 self.share_locks = _ShareAll()
5122 self.needed_locks = {}
5125 self.needed_locks[locking.LEVEL_NODE] = \
5126 _GetWantedNodes(self, self.op.nodes)
5128 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5130 def Exec(self, feedback_fn):
5131 """Computes the list of nodes and their attributes.
5134 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5136 # Always get name to sort by
5137 if constants.SF_NAME in self.op.output_fields:
5138 fields = self.op.output_fields[:]
5140 fields = [constants.SF_NAME] + self.op.output_fields
5142 # Never ask for node or type as it's only known to the LU
5143 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5144 while extra in fields:
5145 fields.remove(extra)
5147 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5148 name_idx = field_idx[constants.SF_NAME]
5150 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5151 data = self.rpc.call_storage_list(self.nodes,
5152 self.op.storage_type, st_args,
5153 self.op.name, fields)
5157 for node in utils.NiceSort(self.nodes):
5158 nresult = data[node]
5162 msg = nresult.fail_msg
5164 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5167 rows = dict([(row[name_idx], row) for row in nresult.payload])
5169 for name in utils.NiceSort(rows.keys()):
5174 for field in self.op.output_fields:
5175 if field == constants.SF_NODE:
5177 elif field == constants.SF_TYPE:
5178 val = self.op.storage_type
5179 elif field in field_idx:
5180 val = row[field_idx[field]]
5182 raise errors.ParameterError(field)
5191 class _InstanceQuery(_QueryBase):
5192 FIELDS = query.INSTANCE_FIELDS
5194 def ExpandNames(self, lu):
5195 lu.needed_locks = {}
5196 lu.share_locks = _ShareAll()
5199 self.wanted = _GetWantedInstances(lu, self.names)
5201 self.wanted = locking.ALL_SET
5203 self.do_locking = (self.use_locking and
5204 query.IQ_LIVE in self.requested_data)
5206 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5207 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5208 lu.needed_locks[locking.LEVEL_NODE] = []
5209 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5211 self.do_grouplocks = (self.do_locking and
5212 query.IQ_NODES in self.requested_data)
5214 def DeclareLocks(self, lu, level):
5216 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5217 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5219 # Lock all groups used by instances optimistically; this requires going
5220 # via the node before it's locked, requiring verification later on
5221 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5223 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5224 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5225 elif level == locking.LEVEL_NODE:
5226 lu._LockInstancesNodes() # pylint: disable=W0212
5229 def _CheckGroupLocks(lu):
5230 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5231 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5233 # Check if node groups for locked instances are still correct
5234 for instance_name in owned_instances:
5235 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5237 def _GetQueryData(self, lu):
5238 """Computes the list of instances and their attributes.
5241 if self.do_grouplocks:
5242 self._CheckGroupLocks(lu)
5244 cluster = lu.cfg.GetClusterInfo()
5245 all_info = lu.cfg.GetAllInstancesInfo()
5247 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5249 instance_list = [all_info[name] for name in instance_names]
5250 nodes = frozenset(itertools.chain(*(inst.all_nodes
5251 for inst in instance_list)))
5252 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5255 wrongnode_inst = set()
5257 # Gather data as requested
5258 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5260 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5262 result = node_data[name]
5264 # offline nodes will be in both lists
5265 assert result.fail_msg
5266 offline_nodes.append(name)
5268 bad_nodes.append(name)
5269 elif result.payload:
5270 for inst in result.payload:
5271 if inst in all_info:
5272 if all_info[inst].primary_node == name:
5273 live_data.update(result.payload)
5275 wrongnode_inst.add(inst)
5277 # orphan instance; we don't list it here as we don't
5278 # handle this case yet in the output of instance listing
5279 logging.warning("Orphan instance '%s' found on node %s",
5281 # else no instance is alive
5285 if query.IQ_DISKUSAGE in self.requested_data:
5286 disk_usage = dict((inst.name,
5287 _ComputeDiskSize(inst.disk_template,
5288 [{constants.IDISK_SIZE: disk.size}
5289 for disk in inst.disks]))
5290 for inst in instance_list)
5294 if query.IQ_CONSOLE in self.requested_data:
5296 for inst in instance_list:
5297 if inst.name in live_data:
5298 # Instance is running
5299 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5301 consinfo[inst.name] = None
5302 assert set(consinfo.keys()) == set(instance_names)
5306 if query.IQ_NODES in self.requested_data:
5307 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5309 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5310 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5311 for uuid in set(map(operator.attrgetter("group"),
5317 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5318 disk_usage, offline_nodes, bad_nodes,
5319 live_data, wrongnode_inst, consinfo,
5323 class LUQuery(NoHooksLU):
5324 """Query for resources/items of a certain kind.
5327 # pylint: disable=W0142
5330 def CheckArguments(self):
5331 qcls = _GetQueryImplementation(self.op.what)
5333 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5335 def ExpandNames(self):
5336 self.impl.ExpandNames(self)
5338 def DeclareLocks(self, level):
5339 self.impl.DeclareLocks(self, level)
5341 def Exec(self, feedback_fn):
5342 return self.impl.NewStyleQuery(self)
5345 class LUQueryFields(NoHooksLU):
5346 """Query for resources/items of a certain kind.
5349 # pylint: disable=W0142
5352 def CheckArguments(self):
5353 self.qcls = _GetQueryImplementation(self.op.what)
5355 def ExpandNames(self):
5356 self.needed_locks = {}
5358 def Exec(self, feedback_fn):
5359 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5362 class LUNodeModifyStorage(NoHooksLU):
5363 """Logical unit for modifying a storage volume on a node.
5368 def CheckArguments(self):
5369 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5371 storage_type = self.op.storage_type
5374 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5376 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5377 " modified" % storage_type,
5380 diff = set(self.op.changes.keys()) - modifiable
5382 raise errors.OpPrereqError("The following fields can not be modified for"
5383 " storage units of type '%s': %r" %
5384 (storage_type, list(diff)),
5387 def ExpandNames(self):
5388 self.needed_locks = {
5389 locking.LEVEL_NODE: self.op.node_name,
5392 def Exec(self, feedback_fn):
5393 """Computes the list of nodes and their attributes.
5396 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5397 result = self.rpc.call_storage_modify(self.op.node_name,
5398 self.op.storage_type, st_args,
5399 self.op.name, self.op.changes)
5400 result.Raise("Failed to modify storage unit '%s' on %s" %
5401 (self.op.name, self.op.node_name))
5404 class LUNodeAdd(LogicalUnit):
5405 """Logical unit for adding node to the cluster.
5409 HTYPE = constants.HTYPE_NODE
5410 _NFLAGS = ["master_capable", "vm_capable"]
5412 def CheckArguments(self):
5413 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5414 # validate/normalize the node name
5415 self.hostname = netutils.GetHostname(name=self.op.node_name,
5416 family=self.primary_ip_family)
5417 self.op.node_name = self.hostname.name
5419 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5420 raise errors.OpPrereqError("Cannot readd the master node",
5423 if self.op.readd and self.op.group:
5424 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5425 " being readded", errors.ECODE_INVAL)
5427 def BuildHooksEnv(self):
5430 This will run on all nodes before, and on all nodes + the new node after.
5434 "OP_TARGET": self.op.node_name,
5435 "NODE_NAME": self.op.node_name,
5436 "NODE_PIP": self.op.primary_ip,
5437 "NODE_SIP": self.op.secondary_ip,
5438 "MASTER_CAPABLE": str(self.op.master_capable),
5439 "VM_CAPABLE": str(self.op.vm_capable),
5442 def BuildHooksNodes(self):
5443 """Build hooks nodes.
5446 # Exclude added node
5447 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5448 post_nodes = pre_nodes + [self.op.node_name, ]
5450 return (pre_nodes, post_nodes)
5452 def CheckPrereq(self):
5453 """Check prerequisites.
5456 - the new node is not already in the config
5458 - its parameters (single/dual homed) matches the cluster
5460 Any errors are signaled by raising errors.OpPrereqError.
5464 hostname = self.hostname
5465 node = hostname.name
5466 primary_ip = self.op.primary_ip = hostname.ip
5467 if self.op.secondary_ip is None:
5468 if self.primary_ip_family == netutils.IP6Address.family:
5469 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5470 " IPv4 address must be given as secondary",
5472 self.op.secondary_ip = primary_ip
5474 secondary_ip = self.op.secondary_ip
5475 if not netutils.IP4Address.IsValid(secondary_ip):
5476 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5477 " address" % secondary_ip, errors.ECODE_INVAL)
5479 node_list = cfg.GetNodeList()
5480 if not self.op.readd and node in node_list:
5481 raise errors.OpPrereqError("Node %s is already in the configuration" %
5482 node, errors.ECODE_EXISTS)
5483 elif self.op.readd and node not in node_list:
5484 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5487 self.changed_primary_ip = False
5489 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5490 if self.op.readd and node == existing_node_name:
5491 if existing_node.secondary_ip != secondary_ip:
5492 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5493 " address configuration as before",
5495 if existing_node.primary_ip != primary_ip:
5496 self.changed_primary_ip = True
5500 if (existing_node.primary_ip == primary_ip or
5501 existing_node.secondary_ip == primary_ip or
5502 existing_node.primary_ip == secondary_ip or
5503 existing_node.secondary_ip == secondary_ip):
5504 raise errors.OpPrereqError("New node ip address(es) conflict with"
5505 " existing node %s" % existing_node.name,
5506 errors.ECODE_NOTUNIQUE)
5508 # After this 'if' block, None is no longer a valid value for the
5509 # _capable op attributes
5511 old_node = self.cfg.GetNodeInfo(node)
5512 assert old_node is not None, "Can't retrieve locked node %s" % node
5513 for attr in self._NFLAGS:
5514 if getattr(self.op, attr) is None:
5515 setattr(self.op, attr, getattr(old_node, attr))
5517 for attr in self._NFLAGS:
5518 if getattr(self.op, attr) is None:
5519 setattr(self.op, attr, True)
5521 if self.op.readd and not self.op.vm_capable:
5522 pri, sec = cfg.GetNodeInstances(node)
5524 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5525 " flag set to false, but it already holds"
5526 " instances" % node,
5529 # check that the type of the node (single versus dual homed) is the
5530 # same as for the master
5531 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5532 master_singlehomed = myself.secondary_ip == myself.primary_ip
5533 newbie_singlehomed = secondary_ip == primary_ip
5534 if master_singlehomed != newbie_singlehomed:
5535 if master_singlehomed:
5536 raise errors.OpPrereqError("The master has no secondary ip but the"
5537 " new node has one",
5540 raise errors.OpPrereqError("The master has a secondary ip but the"
5541 " new node doesn't have one",
5544 # checks reachability
5545 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5546 raise errors.OpPrereqError("Node not reachable by ping",
5547 errors.ECODE_ENVIRON)
5549 if not newbie_singlehomed:
5550 # check reachability from my secondary ip to newbie's secondary ip
5551 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5552 source=myself.secondary_ip):
5553 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5554 " based ping to node daemon port",
5555 errors.ECODE_ENVIRON)
5562 if self.op.master_capable:
5563 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5565 self.master_candidate = False
5568 self.new_node = old_node
5570 node_group = cfg.LookupNodeGroup(self.op.group)
5571 self.new_node = objects.Node(name=node,
5572 primary_ip=primary_ip,
5573 secondary_ip=secondary_ip,
5574 master_candidate=self.master_candidate,
5575 offline=False, drained=False,
5578 if self.op.ndparams:
5579 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5581 if self.op.hv_state:
5582 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5584 if self.op.disk_state:
5585 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5587 def Exec(self, feedback_fn):
5588 """Adds the new node to the cluster.
5591 new_node = self.new_node
5592 node = new_node.name
5594 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5597 # We adding a new node so we assume it's powered
5598 new_node.powered = True
5600 # for re-adds, reset the offline/drained/master-candidate flags;
5601 # we need to reset here, otherwise offline would prevent RPC calls
5602 # later in the procedure; this also means that if the re-add
5603 # fails, we are left with a non-offlined, broken node
5605 new_node.drained = new_node.offline = False # pylint: disable=W0201
5606 self.LogInfo("Readding a node, the offline/drained flags were reset")
5607 # if we demote the node, we do cleanup later in the procedure
5608 new_node.master_candidate = self.master_candidate
5609 if self.changed_primary_ip:
5610 new_node.primary_ip = self.op.primary_ip
5612 # copy the master/vm_capable flags
5613 for attr in self._NFLAGS:
5614 setattr(new_node, attr, getattr(self.op, attr))
5616 # notify the user about any possible mc promotion
5617 if new_node.master_candidate:
5618 self.LogInfo("Node will be a master candidate")
5620 if self.op.ndparams:
5621 new_node.ndparams = self.op.ndparams
5623 new_node.ndparams = {}
5625 if self.op.hv_state:
5626 new_node.hv_state_static = self.new_hv_state
5628 if self.op.disk_state:
5629 new_node.disk_state_static = self.new_disk_state
5631 # check connectivity
5632 result = self.rpc.call_version([node])[node]
5633 result.Raise("Can't get version information from node %s" % node)
5634 if constants.PROTOCOL_VERSION == result.payload:
5635 logging.info("Communication to node %s fine, sw version %s match",
5636 node, result.payload)
5638 raise errors.OpExecError("Version mismatch master version %s,"
5639 " node version %s" %
5640 (constants.PROTOCOL_VERSION, result.payload))
5642 # Add node to our /etc/hosts, and add key to known_hosts
5643 if self.cfg.GetClusterInfo().modify_etc_hosts:
5644 master_node = self.cfg.GetMasterNode()
5645 result = self.rpc.call_etc_hosts_modify(master_node,
5646 constants.ETC_HOSTS_ADD,
5649 result.Raise("Can't update hosts file with new host data")
5651 if new_node.secondary_ip != new_node.primary_ip:
5652 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5655 node_verify_list = [self.cfg.GetMasterNode()]
5656 node_verify_param = {
5657 constants.NV_NODELIST: ([node], {}),
5658 # TODO: do a node-net-test as well?
5661 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5662 self.cfg.GetClusterName())
5663 for verifier in node_verify_list:
5664 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5665 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5667 for failed in nl_payload:
5668 feedback_fn("ssh/hostname verification failed"
5669 " (checking from %s): %s" %
5670 (verifier, nl_payload[failed]))
5671 raise errors.OpExecError("ssh/hostname verification failed")
5674 _RedistributeAncillaryFiles(self)
5675 self.context.ReaddNode(new_node)
5676 # make sure we redistribute the config
5677 self.cfg.Update(new_node, feedback_fn)
5678 # and make sure the new node will not have old files around
5679 if not new_node.master_candidate:
5680 result = self.rpc.call_node_demote_from_mc(new_node.name)
5681 msg = result.fail_msg
5683 self.LogWarning("Node failed to demote itself from master"
5684 " candidate status: %s" % msg)
5686 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5687 additional_vm=self.op.vm_capable)
5688 self.context.AddNode(new_node, self.proc.GetECId())
5691 class LUNodeSetParams(LogicalUnit):
5692 """Modifies the parameters of a node.
5694 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5695 to the node role (as _ROLE_*)
5696 @cvar _R2F: a dictionary from node role to tuples of flags
5697 @cvar _FLAGS: a list of attribute names corresponding to the flags
5700 HPATH = "node-modify"
5701 HTYPE = constants.HTYPE_NODE
5703 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5705 (True, False, False): _ROLE_CANDIDATE,
5706 (False, True, False): _ROLE_DRAINED,
5707 (False, False, True): _ROLE_OFFLINE,
5708 (False, False, False): _ROLE_REGULAR,
5710 _R2F = dict((v, k) for k, v in _F2R.items())
5711 _FLAGS = ["master_candidate", "drained", "offline"]
5713 def CheckArguments(self):
5714 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5715 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5716 self.op.master_capable, self.op.vm_capable,
5717 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5719 if all_mods.count(None) == len(all_mods):
5720 raise errors.OpPrereqError("Please pass at least one modification",
5722 if all_mods.count(True) > 1:
5723 raise errors.OpPrereqError("Can't set the node into more than one"
5724 " state at the same time",
5727 # Boolean value that tells us whether we might be demoting from MC
5728 self.might_demote = (self.op.master_candidate == False or
5729 self.op.offline == True or
5730 self.op.drained == True or
5731 self.op.master_capable == False)
5733 if self.op.secondary_ip:
5734 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5735 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5736 " address" % self.op.secondary_ip,
5739 self.lock_all = self.op.auto_promote and self.might_demote
5740 self.lock_instances = self.op.secondary_ip is not None
5742 def _InstanceFilter(self, instance):
5743 """Filter for getting affected instances.
5746 return (instance.disk_template in constants.DTS_INT_MIRROR and
5747 self.op.node_name in instance.all_nodes)
5749 def ExpandNames(self):
5751 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5753 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5755 # Since modifying a node can have severe effects on currently running
5756 # operations the resource lock is at least acquired in shared mode
5757 self.needed_locks[locking.LEVEL_NODE_RES] = \
5758 self.needed_locks[locking.LEVEL_NODE]
5760 # Get node resource and instance locks in shared mode; they are not used
5761 # for anything but read-only access
5762 self.share_locks[locking.LEVEL_NODE_RES] = 1
5763 self.share_locks[locking.LEVEL_INSTANCE] = 1
5765 if self.lock_instances:
5766 self.needed_locks[locking.LEVEL_INSTANCE] = \
5767 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5769 def BuildHooksEnv(self):
5772 This runs on the master node.
5776 "OP_TARGET": self.op.node_name,
5777 "MASTER_CANDIDATE": str(self.op.master_candidate),
5778 "OFFLINE": str(self.op.offline),
5779 "DRAINED": str(self.op.drained),
5780 "MASTER_CAPABLE": str(self.op.master_capable),
5781 "VM_CAPABLE": str(self.op.vm_capable),
5784 def BuildHooksNodes(self):
5785 """Build hooks nodes.
5788 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5791 def CheckPrereq(self):
5792 """Check prerequisites.
5794 This only checks the instance list against the existing names.
5797 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5799 if self.lock_instances:
5800 affected_instances = \
5801 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5803 # Verify instance locks
5804 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5805 wanted_instances = frozenset(affected_instances.keys())
5806 if wanted_instances - owned_instances:
5807 raise errors.OpPrereqError("Instances affected by changing node %s's"
5808 " secondary IP address have changed since"
5809 " locks were acquired, wanted '%s', have"
5810 " '%s'; retry the operation" %
5812 utils.CommaJoin(wanted_instances),
5813 utils.CommaJoin(owned_instances)),
5816 affected_instances = None
5818 if (self.op.master_candidate is not None or
5819 self.op.drained is not None or
5820 self.op.offline is not None):
5821 # we can't change the master's node flags
5822 if self.op.node_name == self.cfg.GetMasterNode():
5823 raise errors.OpPrereqError("The master role can be changed"
5824 " only via master-failover",
5827 if self.op.master_candidate and not node.master_capable:
5828 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5829 " it a master candidate" % node.name,
5832 if self.op.vm_capable == False:
5833 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5835 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5836 " the vm_capable flag" % node.name,
5839 if node.master_candidate and self.might_demote and not self.lock_all:
5840 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5841 # check if after removing the current node, we're missing master
5843 (mc_remaining, mc_should, _) = \
5844 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5845 if mc_remaining < mc_should:
5846 raise errors.OpPrereqError("Not enough master candidates, please"
5847 " pass auto promote option to allow"
5848 " promotion", errors.ECODE_STATE)
5850 self.old_flags = old_flags = (node.master_candidate,
5851 node.drained, node.offline)
5852 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5853 self.old_role = old_role = self._F2R[old_flags]
5855 # Check for ineffective changes
5856 for attr in self._FLAGS:
5857 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5858 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5859 setattr(self.op, attr, None)
5861 # Past this point, any flag change to False means a transition
5862 # away from the respective state, as only real changes are kept
5864 # TODO: We might query the real power state if it supports OOB
5865 if _SupportsOob(self.cfg, node):
5866 if self.op.offline is False and not (node.powered or
5867 self.op.powered == True):
5868 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5869 " offline status can be reset") %
5871 elif self.op.powered is not None:
5872 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5873 " as it does not support out-of-band"
5874 " handling") % self.op.node_name)
5876 # If we're being deofflined/drained, we'll MC ourself if needed
5877 if (self.op.drained == False or self.op.offline == False or
5878 (self.op.master_capable and not node.master_capable)):
5879 if _DecideSelfPromotion(self):
5880 self.op.master_candidate = True
5881 self.LogInfo("Auto-promoting node to master candidate")
5883 # If we're no longer master capable, we'll demote ourselves from MC
5884 if self.op.master_capable == False and node.master_candidate:
5885 self.LogInfo("Demoting from master candidate")
5886 self.op.master_candidate = False
5889 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5890 if self.op.master_candidate:
5891 new_role = self._ROLE_CANDIDATE
5892 elif self.op.drained:
5893 new_role = self._ROLE_DRAINED
5894 elif self.op.offline:
5895 new_role = self._ROLE_OFFLINE
5896 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5897 # False is still in new flags, which means we're un-setting (the
5899 new_role = self._ROLE_REGULAR
5900 else: # no new flags, nothing, keep old role
5903 self.new_role = new_role
5905 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5906 # Trying to transition out of offline status
5907 # TODO: Use standard RPC runner, but make sure it works when the node is
5908 # still marked offline
5909 result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5911 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5912 " to report its version: %s" %
5913 (node.name, result.fail_msg),
5916 self.LogWarning("Transitioning node from offline to online state"
5917 " without using re-add. Please make sure the node"
5920 if self.op.secondary_ip:
5921 # Ok even without locking, because this can't be changed by any LU
5922 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5923 master_singlehomed = master.secondary_ip == master.primary_ip
5924 if master_singlehomed and self.op.secondary_ip:
5925 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5926 " homed cluster", errors.ECODE_INVAL)
5928 assert not (frozenset(affected_instances) -
5929 self.owned_locks(locking.LEVEL_INSTANCE))
5932 if affected_instances:
5933 raise errors.OpPrereqError("Cannot change secondary IP address:"
5934 " offline node has instances (%s)"
5935 " configured to use it" %
5936 utils.CommaJoin(affected_instances.keys()))
5938 # On online nodes, check that no instances are running, and that
5939 # the node has the new ip and we can reach it.
5940 for instance in affected_instances.values():
5941 _CheckInstanceState(self, instance, INSTANCE_DOWN,
5942 msg="cannot change secondary ip")
5944 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5945 if master.name != node.name:
5946 # check reachability from master secondary ip to new secondary ip
5947 if not netutils.TcpPing(self.op.secondary_ip,
5948 constants.DEFAULT_NODED_PORT,
5949 source=master.secondary_ip):
5950 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5951 " based ping to node daemon port",
5952 errors.ECODE_ENVIRON)
5954 if self.op.ndparams:
5955 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5956 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5957 self.new_ndparams = new_ndparams
5959 if self.op.hv_state:
5960 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5961 self.node.hv_state_static)
5963 if self.op.disk_state:
5964 self.new_disk_state = \
5965 _MergeAndVerifyDiskState(self.op.disk_state,
5966 self.node.disk_state_static)
5968 def Exec(self, feedback_fn):
5973 old_role = self.old_role
5974 new_role = self.new_role
5978 if self.op.ndparams:
5979 node.ndparams = self.new_ndparams
5981 if self.op.powered is not None:
5982 node.powered = self.op.powered
5984 if self.op.hv_state:
5985 node.hv_state_static = self.new_hv_state
5987 if self.op.disk_state:
5988 node.disk_state_static = self.new_disk_state
5990 for attr in ["master_capable", "vm_capable"]:
5991 val = getattr(self.op, attr)
5993 setattr(node, attr, val)
5994 result.append((attr, str(val)))
5996 if new_role != old_role:
5997 # Tell the node to demote itself, if no longer MC and not offline
5998 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5999 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6001 self.LogWarning("Node failed to demote itself: %s", msg)
6003 new_flags = self._R2F[new_role]
6004 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6006 result.append((desc, str(nf)))
6007 (node.master_candidate, node.drained, node.offline) = new_flags
6009 # we locked all nodes, we adjust the CP before updating this node
6011 _AdjustCandidatePool(self, [node.name])
6013 if self.op.secondary_ip:
6014 node.secondary_ip = self.op.secondary_ip
6015 result.append(("secondary_ip", self.op.secondary_ip))
6017 # this will trigger configuration file update, if needed
6018 self.cfg.Update(node, feedback_fn)
6020 # this will trigger job queue propagation or cleanup if the mc
6022 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6023 self.context.ReaddNode(node)
6028 class LUNodePowercycle(NoHooksLU):
6029 """Powercycles a node.
6034 def CheckArguments(self):
6035 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6036 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6037 raise errors.OpPrereqError("The node is the master and the force"
6038 " parameter was not set",
6041 def ExpandNames(self):
6042 """Locking for PowercycleNode.
6044 This is a last-resort option and shouldn't block on other
6045 jobs. Therefore, we grab no locks.
6048 self.needed_locks = {}
6050 def Exec(self, feedback_fn):
6054 result = self.rpc.call_node_powercycle(self.op.node_name,
6055 self.cfg.GetHypervisorType())
6056 result.Raise("Failed to schedule the reboot")
6057 return result.payload
6060 class LUClusterQuery(NoHooksLU):
6061 """Query cluster configuration.
6066 def ExpandNames(self):
6067 self.needed_locks = {}
6069 def Exec(self, feedback_fn):
6070 """Return cluster config.
6073 cluster = self.cfg.GetClusterInfo()
6076 # Filter just for enabled hypervisors
6077 for os_name, hv_dict in cluster.os_hvp.items():
6078 os_hvp[os_name] = {}
6079 for hv_name, hv_params in hv_dict.items():
6080 if hv_name in cluster.enabled_hypervisors:
6081 os_hvp[os_name][hv_name] = hv_params
6083 # Convert ip_family to ip_version
6084 primary_ip_version = constants.IP4_VERSION
6085 if cluster.primary_ip_family == netutils.IP6Address.family:
6086 primary_ip_version = constants.IP6_VERSION
6089 "software_version": constants.RELEASE_VERSION,
6090 "protocol_version": constants.PROTOCOL_VERSION,
6091 "config_version": constants.CONFIG_VERSION,
6092 "os_api_version": max(constants.OS_API_VERSIONS),
6093 "export_version": constants.EXPORT_VERSION,
6094 "architecture": (platform.architecture()[0], platform.machine()),
6095 "name": cluster.cluster_name,
6096 "master": cluster.master_node,
6097 "default_hypervisor": cluster.primary_hypervisor,
6098 "enabled_hypervisors": cluster.enabled_hypervisors,
6099 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6100 for hypervisor_name in cluster.enabled_hypervisors]),
6102 "beparams": cluster.beparams,
6103 "osparams": cluster.osparams,
6104 "ipolicy": cluster.ipolicy,
6105 "nicparams": cluster.nicparams,
6106 "ndparams": cluster.ndparams,
6107 "candidate_pool_size": cluster.candidate_pool_size,
6108 "master_netdev": cluster.master_netdev,
6109 "master_netmask": cluster.master_netmask,
6110 "use_external_mip_script": cluster.use_external_mip_script,
6111 "volume_group_name": cluster.volume_group_name,
6112 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6113 "file_storage_dir": cluster.file_storage_dir,
6114 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6115 "maintain_node_health": cluster.maintain_node_health,
6116 "ctime": cluster.ctime,
6117 "mtime": cluster.mtime,
6118 "uuid": cluster.uuid,
6119 "tags": list(cluster.GetTags()),
6120 "uid_pool": cluster.uid_pool,
6121 "default_iallocator": cluster.default_iallocator,
6122 "reserved_lvs": cluster.reserved_lvs,
6123 "primary_ip_version": primary_ip_version,
6124 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6125 "hidden_os": cluster.hidden_os,
6126 "blacklisted_os": cluster.blacklisted_os,
6132 class LUClusterConfigQuery(NoHooksLU):
6133 """Return configuration values.
6137 _FIELDS_DYNAMIC = utils.FieldSet()
6138 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
6139 "watcher_pause", "volume_group_name")
6141 def CheckArguments(self):
6142 _CheckOutputFields(static=self._FIELDS_STATIC,
6143 dynamic=self._FIELDS_DYNAMIC,
6144 selected=self.op.output_fields)
6146 def ExpandNames(self):
6147 self.needed_locks = {}
6149 def Exec(self, feedback_fn):
6150 """Dump a representation of the cluster config to the standard output.
6154 for field in self.op.output_fields:
6155 if field == "cluster_name":
6156 entry = self.cfg.GetClusterName()
6157 elif field == "master_node":
6158 entry = self.cfg.GetMasterNode()
6159 elif field == "drain_flag":
6160 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6161 elif field == "watcher_pause":
6162 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6163 elif field == "volume_group_name":
6164 entry = self.cfg.GetVGName()
6166 raise errors.ParameterError(field)
6167 values.append(entry)
6171 class LUInstanceActivateDisks(NoHooksLU):
6172 """Bring up an instance's disks.
6177 def ExpandNames(self):
6178 self._ExpandAndLockInstance()
6179 self.needed_locks[locking.LEVEL_NODE] = []
6180 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6182 def DeclareLocks(self, level):
6183 if level == locking.LEVEL_NODE:
6184 self._LockInstancesNodes()
6186 def CheckPrereq(self):
6187 """Check prerequisites.
6189 This checks that the instance is in the cluster.
6192 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6193 assert self.instance is not None, \
6194 "Cannot retrieve locked instance %s" % self.op.instance_name
6195 _CheckNodeOnline(self, self.instance.primary_node)
6197 def Exec(self, feedback_fn):
6198 """Activate the disks.
6201 disks_ok, disks_info = \
6202 _AssembleInstanceDisks(self, self.instance,
6203 ignore_size=self.op.ignore_size)
6205 raise errors.OpExecError("Cannot activate block devices")
6210 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6212 """Prepare the block devices for an instance.
6214 This sets up the block devices on all nodes.
6216 @type lu: L{LogicalUnit}
6217 @param lu: the logical unit on whose behalf we execute
6218 @type instance: L{objects.Instance}
6219 @param instance: the instance for whose disks we assemble
6220 @type disks: list of L{objects.Disk} or None
6221 @param disks: which disks to assemble (or all, if None)
6222 @type ignore_secondaries: boolean
6223 @param ignore_secondaries: if true, errors on secondary nodes
6224 won't result in an error return from the function
6225 @type ignore_size: boolean
6226 @param ignore_size: if true, the current known size of the disk
6227 will not be used during the disk activation, useful for cases
6228 when the size is wrong
6229 @return: False if the operation failed, otherwise a list of
6230 (host, instance_visible_name, node_visible_name)
6231 with the mapping from node devices to instance devices
6236 iname = instance.name
6237 disks = _ExpandCheckDisks(instance, disks)
6239 # With the two passes mechanism we try to reduce the window of
6240 # opportunity for the race condition of switching DRBD to primary
6241 # before handshaking occured, but we do not eliminate it
6243 # The proper fix would be to wait (with some limits) until the
6244 # connection has been made and drbd transitions from WFConnection
6245 # into any other network-connected state (Connected, SyncTarget,
6248 # 1st pass, assemble on all nodes in secondary mode
6249 for idx, inst_disk in enumerate(disks):
6250 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6252 node_disk = node_disk.Copy()
6253 node_disk.UnsetSize()
6254 lu.cfg.SetDiskID(node_disk, node)
6255 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
6256 msg = result.fail_msg
6258 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6259 " (is_primary=False, pass=1): %s",
6260 inst_disk.iv_name, node, msg)
6261 if not ignore_secondaries:
6264 # FIXME: race condition on drbd migration to primary
6266 # 2nd pass, do only the primary node
6267 for idx, inst_disk in enumerate(disks):
6270 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6271 if node != instance.primary_node:
6274 node_disk = node_disk.Copy()
6275 node_disk.UnsetSize()
6276 lu.cfg.SetDiskID(node_disk, node)
6277 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6278 msg = result.fail_msg
6280 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6281 " (is_primary=True, pass=2): %s",
6282 inst_disk.iv_name, node, msg)
6285 dev_path = result.payload
6287 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6289 # leave the disks configured for the primary node
6290 # this is a workaround that would be fixed better by
6291 # improving the logical/physical id handling
6293 lu.cfg.SetDiskID(disk, instance.primary_node)
6295 return disks_ok, device_info
6298 def _StartInstanceDisks(lu, instance, force):
6299 """Start the disks of an instance.
6302 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6303 ignore_secondaries=force)
6305 _ShutdownInstanceDisks(lu, instance)
6306 if force is not None and not force:
6307 lu.proc.LogWarning("", hint="If the message above refers to a"
6309 " you can retry the operation using '--force'.")
6310 raise errors.OpExecError("Disk consistency error")
6313 class LUInstanceDeactivateDisks(NoHooksLU):
6314 """Shutdown an instance's disks.
6319 def ExpandNames(self):
6320 self._ExpandAndLockInstance()
6321 self.needed_locks[locking.LEVEL_NODE] = []
6322 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6324 def DeclareLocks(self, level):
6325 if level == locking.LEVEL_NODE:
6326 self._LockInstancesNodes()
6328 def CheckPrereq(self):
6329 """Check prerequisites.
6331 This checks that the instance is in the cluster.
6334 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6335 assert self.instance is not None, \
6336 "Cannot retrieve locked instance %s" % self.op.instance_name
6338 def Exec(self, feedback_fn):
6339 """Deactivate the disks
6342 instance = self.instance
6344 _ShutdownInstanceDisks(self, instance)
6346 _SafeShutdownInstanceDisks(self, instance)
6349 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6350 """Shutdown block devices of an instance.
6352 This function checks if an instance is running, before calling
6353 _ShutdownInstanceDisks.
6356 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6357 _ShutdownInstanceDisks(lu, instance, disks=disks)
6360 def _ExpandCheckDisks(instance, disks):
6361 """Return the instance disks selected by the disks list
6363 @type disks: list of L{objects.Disk} or None
6364 @param disks: selected disks
6365 @rtype: list of L{objects.Disk}
6366 @return: selected instance disks to act on
6370 return instance.disks
6372 if not set(disks).issubset(instance.disks):
6373 raise errors.ProgrammerError("Can only act on disks belonging to the"
6378 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6379 """Shutdown block devices of an instance.
6381 This does the shutdown on all nodes of the instance.
6383 If the ignore_primary is false, errors on the primary node are
6388 disks = _ExpandCheckDisks(instance, disks)
6391 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6392 lu.cfg.SetDiskID(top_disk, node)
6393 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6394 msg = result.fail_msg
6396 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6397 disk.iv_name, node, msg)
6398 if ((node == instance.primary_node and not ignore_primary) or
6399 (node != instance.primary_node and not result.offline)):
6404 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6405 """Checks if a node has enough free memory.
6407 This function check if a given node has the needed amount of free
6408 memory. In case the node has less memory or we cannot get the
6409 information from the node, this function raise an OpPrereqError
6412 @type lu: C{LogicalUnit}
6413 @param lu: a logical unit from which we get configuration data
6415 @param node: the node to check
6416 @type reason: C{str}
6417 @param reason: string to use in the error message
6418 @type requested: C{int}
6419 @param requested: the amount of memory in MiB to check for
6420 @type hypervisor_name: C{str}
6421 @param hypervisor_name: the hypervisor to ask for memory stats
6423 @return: node current free memory
6424 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6425 we cannot check the node
6428 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6429 nodeinfo[node].Raise("Can't get data from node %s" % node,
6430 prereq=True, ecode=errors.ECODE_ENVIRON)
6431 (_, _, (hv_info, )) = nodeinfo[node].payload
6433 free_mem = hv_info.get("memory_free", None)
6434 if not isinstance(free_mem, int):
6435 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6436 " was '%s'" % (node, free_mem),
6437 errors.ECODE_ENVIRON)
6438 if requested > free_mem:
6439 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6440 " needed %s MiB, available %s MiB" %
6441 (node, reason, requested, free_mem),
6446 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6447 """Checks if nodes have enough free disk space in the all VGs.
6449 This function check if all given nodes have the needed amount of
6450 free disk. In case any node has less disk or we cannot get the
6451 information from the node, this function raise an OpPrereqError
6454 @type lu: C{LogicalUnit}
6455 @param lu: a logical unit from which we get configuration data
6456 @type nodenames: C{list}
6457 @param nodenames: the list of node names to check
6458 @type req_sizes: C{dict}
6459 @param req_sizes: the hash of vg and corresponding amount of disk in
6461 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6462 or we cannot check the node
6465 for vg, req_size in req_sizes.items():
6466 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6469 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6470 """Checks if nodes have enough free disk space in the specified VG.
6472 This function check if all given nodes have the needed amount of
6473 free disk. In case any node has less disk or we cannot get the
6474 information from the node, this function raise an OpPrereqError
6477 @type lu: C{LogicalUnit}
6478 @param lu: a logical unit from which we get configuration data
6479 @type nodenames: C{list}
6480 @param nodenames: the list of node names to check
6482 @param vg: the volume group to check
6483 @type requested: C{int}
6484 @param requested: the amount of disk in MiB to check for
6485 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6486 or we cannot check the node
6489 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6490 for node in nodenames:
6491 info = nodeinfo[node]
6492 info.Raise("Cannot get current information from node %s" % node,
6493 prereq=True, ecode=errors.ECODE_ENVIRON)
6494 (_, (vg_info, ), _) = info.payload
6495 vg_free = vg_info.get("vg_free", None)
6496 if not isinstance(vg_free, int):
6497 raise errors.OpPrereqError("Can't compute free disk space on node"
6498 " %s for vg %s, result was '%s'" %
6499 (node, vg, vg_free), errors.ECODE_ENVIRON)
6500 if requested > vg_free:
6501 raise errors.OpPrereqError("Not enough disk space on target node %s"
6502 " vg %s: required %d MiB, available %d MiB" %
6503 (node, vg, requested, vg_free),
6507 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6508 """Checks if nodes have enough physical CPUs
6510 This function checks if all given nodes have the needed number of
6511 physical CPUs. In case any node has less CPUs or we cannot get the
6512 information from the node, this function raises an OpPrereqError
6515 @type lu: C{LogicalUnit}
6516 @param lu: a logical unit from which we get configuration data
6517 @type nodenames: C{list}
6518 @param nodenames: the list of node names to check
6519 @type requested: C{int}
6520 @param requested: the minimum acceptable number of physical CPUs
6521 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6522 or we cannot check the node
6525 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6526 for node in nodenames:
6527 info = nodeinfo[node]
6528 info.Raise("Cannot get current information from node %s" % node,
6529 prereq=True, ecode=errors.ECODE_ENVIRON)
6530 (_, _, (hv_info, )) = info.payload
6531 num_cpus = hv_info.get("cpu_total", None)
6532 if not isinstance(num_cpus, int):
6533 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6534 " on node %s, result was '%s'" %
6535 (node, num_cpus), errors.ECODE_ENVIRON)
6536 if requested > num_cpus:
6537 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6538 "required" % (node, num_cpus, requested),
6542 class LUInstanceStartup(LogicalUnit):
6543 """Starts an instance.
6546 HPATH = "instance-start"
6547 HTYPE = constants.HTYPE_INSTANCE
6550 def CheckArguments(self):
6552 if self.op.beparams:
6553 # fill the beparams dict
6554 objects.UpgradeBeParams(self.op.beparams)
6555 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6557 def ExpandNames(self):
6558 self._ExpandAndLockInstance()
6559 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6561 def DeclareLocks(self, level):
6562 if level == locking.LEVEL_NODE_RES:
6563 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6565 def BuildHooksEnv(self):
6568 This runs on master, primary and secondary nodes of the instance.
6572 "FORCE": self.op.force,
6575 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6579 def BuildHooksNodes(self):
6580 """Build hooks nodes.
6583 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6586 def CheckPrereq(self):
6587 """Check prerequisites.
6589 This checks that the instance is in the cluster.
6592 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6593 assert self.instance is not None, \
6594 "Cannot retrieve locked instance %s" % self.op.instance_name
6597 if self.op.hvparams:
6598 # check hypervisor parameter syntax (locally)
6599 cluster = self.cfg.GetClusterInfo()
6600 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6601 filled_hvp = cluster.FillHV(instance)
6602 filled_hvp.update(self.op.hvparams)
6603 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6604 hv_type.CheckParameterSyntax(filled_hvp)
6605 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6607 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6609 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6611 if self.primary_offline and self.op.ignore_offline_nodes:
6612 self.proc.LogWarning("Ignoring offline primary node")
6614 if self.op.hvparams or self.op.beparams:
6615 self.proc.LogWarning("Overridden parameters are ignored")
6617 _CheckNodeOnline(self, instance.primary_node)
6619 bep = self.cfg.GetClusterInfo().FillBE(instance)
6620 bep.update(self.op.beparams)
6622 # check bridges existence
6623 _CheckInstanceBridgesExist(self, instance)
6625 remote_info = self.rpc.call_instance_info(instance.primary_node,
6627 instance.hypervisor)
6628 remote_info.Raise("Error checking node %s" % instance.primary_node,
6629 prereq=True, ecode=errors.ECODE_ENVIRON)
6630 if not remote_info.payload: # not running already
6631 _CheckNodeFreeMemory(self, instance.primary_node,
6632 "starting instance %s" % instance.name,
6633 bep[constants.BE_MINMEM], instance.hypervisor)
6635 def Exec(self, feedback_fn):
6636 """Start the instance.
6639 instance = self.instance
6640 force = self.op.force
6642 if not self.op.no_remember:
6643 self.cfg.MarkInstanceUp(instance.name)
6645 if self.primary_offline:
6646 assert self.op.ignore_offline_nodes
6647 self.proc.LogInfo("Primary node offline, marked instance as started")
6649 node_current = instance.primary_node
6651 _StartInstanceDisks(self, instance, force)
6654 self.rpc.call_instance_start(node_current,
6655 (instance, self.op.hvparams,
6657 self.op.startup_paused)
6658 msg = result.fail_msg
6660 _ShutdownInstanceDisks(self, instance)
6661 raise errors.OpExecError("Could not start instance: %s" % msg)
6664 class LUInstanceReboot(LogicalUnit):
6665 """Reboot an instance.
6668 HPATH = "instance-reboot"
6669 HTYPE = constants.HTYPE_INSTANCE
6672 def ExpandNames(self):
6673 self._ExpandAndLockInstance()
6675 def BuildHooksEnv(self):
6678 This runs on master, primary and secondary nodes of the instance.
6682 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6683 "REBOOT_TYPE": self.op.reboot_type,
6684 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6687 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6691 def BuildHooksNodes(self):
6692 """Build hooks nodes.
6695 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6698 def CheckPrereq(self):
6699 """Check prerequisites.
6701 This checks that the instance is in the cluster.
6704 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6705 assert self.instance is not None, \
6706 "Cannot retrieve locked instance %s" % self.op.instance_name
6707 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6708 _CheckNodeOnline(self, instance.primary_node)
6710 # check bridges existence
6711 _CheckInstanceBridgesExist(self, instance)
6713 def Exec(self, feedback_fn):
6714 """Reboot the instance.
6717 instance = self.instance
6718 ignore_secondaries = self.op.ignore_secondaries
6719 reboot_type = self.op.reboot_type
6721 remote_info = self.rpc.call_instance_info(instance.primary_node,
6723 instance.hypervisor)
6724 remote_info.Raise("Error checking node %s" % instance.primary_node)
6725 instance_running = bool(remote_info.payload)
6727 node_current = instance.primary_node
6729 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6730 constants.INSTANCE_REBOOT_HARD]:
6731 for disk in instance.disks:
6732 self.cfg.SetDiskID(disk, node_current)
6733 result = self.rpc.call_instance_reboot(node_current, instance,
6735 self.op.shutdown_timeout)
6736 result.Raise("Could not reboot instance")
6738 if instance_running:
6739 result = self.rpc.call_instance_shutdown(node_current, instance,
6740 self.op.shutdown_timeout)
6741 result.Raise("Could not shutdown instance for full reboot")
6742 _ShutdownInstanceDisks(self, instance)
6744 self.LogInfo("Instance %s was already stopped, starting now",
6746 _StartInstanceDisks(self, instance, ignore_secondaries)
6747 result = self.rpc.call_instance_start(node_current,
6748 (instance, None, None), False)
6749 msg = result.fail_msg
6751 _ShutdownInstanceDisks(self, instance)
6752 raise errors.OpExecError("Could not start instance for"
6753 " full reboot: %s" % msg)
6755 self.cfg.MarkInstanceUp(instance.name)
6758 class LUInstanceShutdown(LogicalUnit):
6759 """Shutdown an instance.
6762 HPATH = "instance-stop"
6763 HTYPE = constants.HTYPE_INSTANCE
6766 def ExpandNames(self):
6767 self._ExpandAndLockInstance()
6769 def BuildHooksEnv(self):
6772 This runs on master, primary and secondary nodes of the instance.
6775 env = _BuildInstanceHookEnvByObject(self, self.instance)
6776 env["TIMEOUT"] = self.op.timeout
6779 def BuildHooksNodes(self):
6780 """Build hooks nodes.
6783 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6786 def CheckPrereq(self):
6787 """Check prerequisites.
6789 This checks that the instance is in the cluster.
6792 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6793 assert self.instance is not None, \
6794 "Cannot retrieve locked instance %s" % self.op.instance_name
6796 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6798 self.primary_offline = \
6799 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6801 if self.primary_offline and self.op.ignore_offline_nodes:
6802 self.proc.LogWarning("Ignoring offline primary node")
6804 _CheckNodeOnline(self, self.instance.primary_node)
6806 def Exec(self, feedback_fn):
6807 """Shutdown the instance.
6810 instance = self.instance
6811 node_current = instance.primary_node
6812 timeout = self.op.timeout
6814 if not self.op.no_remember:
6815 self.cfg.MarkInstanceDown(instance.name)
6817 if self.primary_offline:
6818 assert self.op.ignore_offline_nodes
6819 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6821 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6822 msg = result.fail_msg
6824 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6826 _ShutdownInstanceDisks(self, instance)
6829 class LUInstanceReinstall(LogicalUnit):
6830 """Reinstall an instance.
6833 HPATH = "instance-reinstall"
6834 HTYPE = constants.HTYPE_INSTANCE
6837 def ExpandNames(self):
6838 self._ExpandAndLockInstance()
6840 def BuildHooksEnv(self):
6843 This runs on master, primary and secondary nodes of the instance.
6846 return _BuildInstanceHookEnvByObject(self, self.instance)
6848 def BuildHooksNodes(self):
6849 """Build hooks nodes.
6852 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6855 def CheckPrereq(self):
6856 """Check prerequisites.
6858 This checks that the instance is in the cluster and is not running.
6861 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6862 assert instance is not None, \
6863 "Cannot retrieve locked instance %s" % self.op.instance_name
6864 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6865 " offline, cannot reinstall")
6866 for node in instance.secondary_nodes:
6867 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6868 " cannot reinstall")
6870 if instance.disk_template == constants.DT_DISKLESS:
6871 raise errors.OpPrereqError("Instance '%s' has no disks" %
6872 self.op.instance_name,
6874 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6876 if self.op.os_type is not None:
6878 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6879 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6880 instance_os = self.op.os_type
6882 instance_os = instance.os
6884 nodelist = list(instance.all_nodes)
6886 if self.op.osparams:
6887 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6888 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6889 self.os_inst = i_osdict # the new dict (without defaults)
6893 self.instance = instance
6895 def Exec(self, feedback_fn):
6896 """Reinstall the instance.
6899 inst = self.instance
6901 if self.op.os_type is not None:
6902 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6903 inst.os = self.op.os_type
6904 # Write to configuration
6905 self.cfg.Update(inst, feedback_fn)
6907 _StartInstanceDisks(self, inst, None)
6909 feedback_fn("Running the instance OS create scripts...")
6910 # FIXME: pass debug option from opcode to backend
6911 result = self.rpc.call_instance_os_add(inst.primary_node,
6912 (inst, self.os_inst), True,
6913 self.op.debug_level)
6914 result.Raise("Could not install OS for instance %s on node %s" %
6915 (inst.name, inst.primary_node))
6917 _ShutdownInstanceDisks(self, inst)
6920 class LUInstanceRecreateDisks(LogicalUnit):
6921 """Recreate an instance's missing disks.
6924 HPATH = "instance-recreate-disks"
6925 HTYPE = constants.HTYPE_INSTANCE
6928 _MODIFYABLE = frozenset([
6929 constants.IDISK_SIZE,
6930 constants.IDISK_MODE,
6933 # New or changed disk parameters may have different semantics
6934 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
6935 constants.IDISK_ADOPT,
6937 # TODO: Implement support changing VG while recreating
6939 constants.IDISK_METAVG,
6942 def CheckArguments(self):
6943 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
6944 # Normalize and convert deprecated list of disk indices
6945 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
6947 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
6949 raise errors.OpPrereqError("Some disks have been specified more than"
6950 " once: %s" % utils.CommaJoin(duplicates),
6953 for (idx, params) in self.op.disks:
6954 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
6955 unsupported = frozenset(params.keys()) - self._MODIFYABLE
6957 raise errors.OpPrereqError("Parameters for disk %s try to change"
6958 " unmodifyable parameter(s): %s" %
6959 (idx, utils.CommaJoin(unsupported)),
6962 def ExpandNames(self):
6963 self._ExpandAndLockInstance()
6964 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6966 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6967 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6969 self.needed_locks[locking.LEVEL_NODE] = []
6970 self.needed_locks[locking.LEVEL_NODE_RES] = []
6972 def DeclareLocks(self, level):
6973 if level == locking.LEVEL_NODE:
6974 # if we replace the nodes, we only need to lock the old primary,
6975 # otherwise we need to lock all nodes for disk re-creation
6976 primary_only = bool(self.op.nodes)
6977 self._LockInstancesNodes(primary_only=primary_only)
6978 elif level == locking.LEVEL_NODE_RES:
6980 self.needed_locks[locking.LEVEL_NODE_RES] = \
6981 self.needed_locks[locking.LEVEL_NODE][:]
6983 def BuildHooksEnv(self):
6986 This runs on master, primary and secondary nodes of the instance.
6989 return _BuildInstanceHookEnvByObject(self, self.instance)
6991 def BuildHooksNodes(self):
6992 """Build hooks nodes.
6995 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6998 def CheckPrereq(self):
6999 """Check prerequisites.
7001 This checks that the instance is in the cluster and is not running.
7004 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7005 assert instance is not None, \
7006 "Cannot retrieve locked instance %s" % self.op.instance_name
7008 if len(self.op.nodes) != len(instance.all_nodes):
7009 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7010 " %d replacement nodes were specified" %
7011 (instance.name, len(instance.all_nodes),
7012 len(self.op.nodes)),
7014 assert instance.disk_template != constants.DT_DRBD8 or \
7015 len(self.op.nodes) == 2
7016 assert instance.disk_template != constants.DT_PLAIN or \
7017 len(self.op.nodes) == 1
7018 primary_node = self.op.nodes[0]
7020 primary_node = instance.primary_node
7021 _CheckNodeOnline(self, primary_node)
7023 if instance.disk_template == constants.DT_DISKLESS:
7024 raise errors.OpPrereqError("Instance '%s' has no disks" %
7025 self.op.instance_name, errors.ECODE_INVAL)
7027 # if we replace nodes *and* the old primary is offline, we don't
7029 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7030 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7031 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7032 if not (self.op.nodes and old_pnode.offline):
7033 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7034 msg="cannot recreate disks")
7037 self.disks = dict(self.op.disks)
7039 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7041 maxidx = max(self.disks.keys())
7042 if maxidx >= len(instance.disks):
7043 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7046 if (self.op.nodes and
7047 sorted(self.disks.keys()) != range(len(instance.disks))):
7048 raise errors.OpPrereqError("Can't recreate disks partially and"
7049 " change the nodes at the same time",
7052 self.instance = instance
7054 def Exec(self, feedback_fn):
7055 """Recreate the disks.
7058 instance = self.instance
7060 assert (self.owned_locks(locking.LEVEL_NODE) ==
7061 self.owned_locks(locking.LEVEL_NODE_RES))
7064 mods = [] # keeps track of needed changes
7066 for idx, disk in enumerate(instance.disks):
7068 changes = self.disks[idx]
7070 # Disk should not be recreated
7074 # update secondaries for disks, if needed
7075 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7076 # need to update the nodes and minors
7077 assert len(self.op.nodes) == 2
7078 assert len(disk.logical_id) == 6 # otherwise disk internals
7080 (_, _, old_port, _, _, old_secret) = disk.logical_id
7081 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7082 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7083 new_minors[0], new_minors[1], old_secret)
7084 assert len(disk.logical_id) == len(new_id)
7088 mods.append((idx, new_id, changes))
7090 # now that we have passed all asserts above, we can apply the mods
7091 # in a single run (to avoid partial changes)
7092 for idx, new_id, changes in mods:
7093 disk = instance.disks[idx]
7094 if new_id is not None:
7095 assert disk.dev_type == constants.LD_DRBD8
7096 disk.logical_id = new_id
7098 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7099 mode=changes.get(constants.IDISK_MODE, None))
7101 # change primary node, if needed
7103 instance.primary_node = self.op.nodes[0]
7104 self.LogWarning("Changing the instance's nodes, you will have to"
7105 " remove any disks left on the older nodes manually")
7108 self.cfg.Update(instance, feedback_fn)
7110 _CreateDisks(self, instance, to_skip=to_skip)
7113 class LUInstanceRename(LogicalUnit):
7114 """Rename an instance.
7117 HPATH = "instance-rename"
7118 HTYPE = constants.HTYPE_INSTANCE
7120 def CheckArguments(self):
7124 if self.op.ip_check and not self.op.name_check:
7125 # TODO: make the ip check more flexible and not depend on the name check
7126 raise errors.OpPrereqError("IP address check requires a name check",
7129 def BuildHooksEnv(self):
7132 This runs on master, primary and secondary nodes of the instance.
7135 env = _BuildInstanceHookEnvByObject(self, self.instance)
7136 env["INSTANCE_NEW_NAME"] = self.op.new_name
7139 def BuildHooksNodes(self):
7140 """Build hooks nodes.
7143 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7146 def CheckPrereq(self):
7147 """Check prerequisites.
7149 This checks that the instance is in the cluster and is not running.
7152 self.op.instance_name = _ExpandInstanceName(self.cfg,
7153 self.op.instance_name)
7154 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7155 assert instance is not None
7156 _CheckNodeOnline(self, instance.primary_node)
7157 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7158 msg="cannot rename")
7159 self.instance = instance
7161 new_name = self.op.new_name
7162 if self.op.name_check:
7163 hostname = netutils.GetHostname(name=new_name)
7164 if hostname.name != new_name:
7165 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7167 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7168 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7169 " same as given hostname '%s'") %
7170 (hostname.name, self.op.new_name),
7172 new_name = self.op.new_name = hostname.name
7173 if (self.op.ip_check and
7174 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7175 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7176 (hostname.ip, new_name),
7177 errors.ECODE_NOTUNIQUE)
7179 instance_list = self.cfg.GetInstanceList()
7180 if new_name in instance_list and new_name != instance.name:
7181 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7182 new_name, errors.ECODE_EXISTS)
7184 def Exec(self, feedback_fn):
7185 """Rename the instance.
7188 inst = self.instance
7189 old_name = inst.name
7191 rename_file_storage = False
7192 if (inst.disk_template in constants.DTS_FILEBASED and
7193 self.op.new_name != inst.name):
7194 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7195 rename_file_storage = True
7197 self.cfg.RenameInstance(inst.name, self.op.new_name)
7198 # Change the instance lock. This is definitely safe while we hold the BGL.
7199 # Otherwise the new lock would have to be added in acquired mode.
7201 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7202 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7204 # re-read the instance from the configuration after rename
7205 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7207 if rename_file_storage:
7208 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7209 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7210 old_file_storage_dir,
7211 new_file_storage_dir)
7212 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7213 " (but the instance has been renamed in Ganeti)" %
7214 (inst.primary_node, old_file_storage_dir,
7215 new_file_storage_dir))
7217 _StartInstanceDisks(self, inst, None)
7219 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7220 old_name, self.op.debug_level)
7221 msg = result.fail_msg
7223 msg = ("Could not run OS rename script for instance %s on node %s"
7224 " (but the instance has been renamed in Ganeti): %s" %
7225 (inst.name, inst.primary_node, msg))
7226 self.proc.LogWarning(msg)
7228 _ShutdownInstanceDisks(self, inst)
7233 class LUInstanceRemove(LogicalUnit):
7234 """Remove an instance.
7237 HPATH = "instance-remove"
7238 HTYPE = constants.HTYPE_INSTANCE
7241 def ExpandNames(self):
7242 self._ExpandAndLockInstance()
7243 self.needed_locks[locking.LEVEL_NODE] = []
7244 self.needed_locks[locking.LEVEL_NODE_RES] = []
7245 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7247 def DeclareLocks(self, level):
7248 if level == locking.LEVEL_NODE:
7249 self._LockInstancesNodes()
7250 elif level == locking.LEVEL_NODE_RES:
7252 self.needed_locks[locking.LEVEL_NODE_RES] = \
7253 self.needed_locks[locking.LEVEL_NODE][:]
7255 def BuildHooksEnv(self):
7258 This runs on master, primary and secondary nodes of the instance.
7261 env = _BuildInstanceHookEnvByObject(self, self.instance)
7262 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7265 def BuildHooksNodes(self):
7266 """Build hooks nodes.
7269 nl = [self.cfg.GetMasterNode()]
7270 nl_post = list(self.instance.all_nodes) + nl
7271 return (nl, nl_post)
7273 def CheckPrereq(self):
7274 """Check prerequisites.
7276 This checks that the instance is in the cluster.
7279 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7280 assert self.instance is not None, \
7281 "Cannot retrieve locked instance %s" % self.op.instance_name
7283 def Exec(self, feedback_fn):
7284 """Remove the instance.
7287 instance = self.instance
7288 logging.info("Shutting down instance %s on node %s",
7289 instance.name, instance.primary_node)
7291 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7292 self.op.shutdown_timeout)
7293 msg = result.fail_msg
7295 if self.op.ignore_failures:
7296 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7298 raise errors.OpExecError("Could not shutdown instance %s on"
7300 (instance.name, instance.primary_node, msg))
7302 assert (self.owned_locks(locking.LEVEL_NODE) ==
7303 self.owned_locks(locking.LEVEL_NODE_RES))
7304 assert not (set(instance.all_nodes) -
7305 self.owned_locks(locking.LEVEL_NODE)), \
7306 "Not owning correct locks"
7308 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7311 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7312 """Utility function to remove an instance.
7315 logging.info("Removing block devices for instance %s", instance.name)
7317 if not _RemoveDisks(lu, instance):
7318 if not ignore_failures:
7319 raise errors.OpExecError("Can't remove instance's disks")
7320 feedback_fn("Warning: can't remove instance's disks")
7322 logging.info("Removing instance %s out of cluster config", instance.name)
7324 lu.cfg.RemoveInstance(instance.name)
7326 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7327 "Instance lock removal conflict"
7329 # Remove lock for the instance
7330 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7333 class LUInstanceQuery(NoHooksLU):
7334 """Logical unit for querying instances.
7337 # pylint: disable=W0142
7340 def CheckArguments(self):
7341 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7342 self.op.output_fields, self.op.use_locking)
7344 def ExpandNames(self):
7345 self.iq.ExpandNames(self)
7347 def DeclareLocks(self, level):
7348 self.iq.DeclareLocks(self, level)
7350 def Exec(self, feedback_fn):
7351 return self.iq.OldStyleQuery(self)
7354 class LUInstanceFailover(LogicalUnit):
7355 """Failover an instance.
7358 HPATH = "instance-failover"
7359 HTYPE = constants.HTYPE_INSTANCE
7362 def CheckArguments(self):
7363 """Check the arguments.
7366 self.iallocator = getattr(self.op, "iallocator", None)
7367 self.target_node = getattr(self.op, "target_node", None)
7369 def ExpandNames(self):
7370 self._ExpandAndLockInstance()
7372 if self.op.target_node is not None:
7373 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7375 self.needed_locks[locking.LEVEL_NODE] = []
7376 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7378 self.needed_locks[locking.LEVEL_NODE_RES] = []
7379 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7381 ignore_consistency = self.op.ignore_consistency
7382 shutdown_timeout = self.op.shutdown_timeout
7383 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7386 ignore_consistency=ignore_consistency,
7387 shutdown_timeout=shutdown_timeout,
7388 ignore_ipolicy=self.op.ignore_ipolicy)
7389 self.tasklets = [self._migrater]
7391 def DeclareLocks(self, level):
7392 if level == locking.LEVEL_NODE:
7393 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7394 if instance.disk_template in constants.DTS_EXT_MIRROR:
7395 if self.op.target_node is None:
7396 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7398 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7399 self.op.target_node]
7400 del self.recalculate_locks[locking.LEVEL_NODE]
7402 self._LockInstancesNodes()
7403 elif level == locking.LEVEL_NODE_RES:
7405 self.needed_locks[locking.LEVEL_NODE_RES] = \
7406 self.needed_locks[locking.LEVEL_NODE][:]
7408 def BuildHooksEnv(self):
7411 This runs on master, primary and secondary nodes of the instance.
7414 instance = self._migrater.instance
7415 source_node = instance.primary_node
7416 target_node = self.op.target_node
7418 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7419 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7420 "OLD_PRIMARY": source_node,
7421 "NEW_PRIMARY": target_node,
7424 if instance.disk_template in constants.DTS_INT_MIRROR:
7425 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7426 env["NEW_SECONDARY"] = source_node
7428 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7430 env.update(_BuildInstanceHookEnvByObject(self, instance))
7434 def BuildHooksNodes(self):
7435 """Build hooks nodes.
7438 instance = self._migrater.instance
7439 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7440 return (nl, nl + [instance.primary_node])
7443 class LUInstanceMigrate(LogicalUnit):
7444 """Migrate an instance.
7446 This is migration without shutting down, compared to the failover,
7447 which is done with shutdown.
7450 HPATH = "instance-migrate"
7451 HTYPE = constants.HTYPE_INSTANCE
7454 def ExpandNames(self):
7455 self._ExpandAndLockInstance()
7457 if self.op.target_node is not None:
7458 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7460 self.needed_locks[locking.LEVEL_NODE] = []
7461 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7463 self.needed_locks[locking.LEVEL_NODE] = []
7464 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7467 TLMigrateInstance(self, self.op.instance_name,
7468 cleanup=self.op.cleanup,
7470 fallback=self.op.allow_failover,
7471 allow_runtime_changes=self.op.allow_runtime_changes,
7472 ignore_ipolicy=self.op.ignore_ipolicy)
7473 self.tasklets = [self._migrater]
7475 def DeclareLocks(self, level):
7476 if level == locking.LEVEL_NODE:
7477 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7478 if instance.disk_template in constants.DTS_EXT_MIRROR:
7479 if self.op.target_node is None:
7480 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7482 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7483 self.op.target_node]
7484 del self.recalculate_locks[locking.LEVEL_NODE]
7486 self._LockInstancesNodes()
7487 elif level == locking.LEVEL_NODE_RES:
7489 self.needed_locks[locking.LEVEL_NODE_RES] = \
7490 self.needed_locks[locking.LEVEL_NODE][:]
7492 def BuildHooksEnv(self):
7495 This runs on master, primary and secondary nodes of the instance.
7498 instance = self._migrater.instance
7499 source_node = instance.primary_node
7500 target_node = self.op.target_node
7501 env = _BuildInstanceHookEnvByObject(self, instance)
7503 "MIGRATE_LIVE": self._migrater.live,
7504 "MIGRATE_CLEANUP": self.op.cleanup,
7505 "OLD_PRIMARY": source_node,
7506 "NEW_PRIMARY": target_node,
7507 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7510 if instance.disk_template in constants.DTS_INT_MIRROR:
7511 env["OLD_SECONDARY"] = target_node
7512 env["NEW_SECONDARY"] = source_node
7514 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7518 def BuildHooksNodes(self):
7519 """Build hooks nodes.
7522 instance = self._migrater.instance
7523 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7524 return (nl, nl + [instance.primary_node])
7527 class LUInstanceMove(LogicalUnit):
7528 """Move an instance by data-copying.
7531 HPATH = "instance-move"
7532 HTYPE = constants.HTYPE_INSTANCE
7535 def ExpandNames(self):
7536 self._ExpandAndLockInstance()
7537 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7538 self.op.target_node = target_node
7539 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7540 self.needed_locks[locking.LEVEL_NODE_RES] = []
7541 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7543 def DeclareLocks(self, level):
7544 if level == locking.LEVEL_NODE:
7545 self._LockInstancesNodes(primary_only=True)
7546 elif level == locking.LEVEL_NODE_RES:
7548 self.needed_locks[locking.LEVEL_NODE_RES] = \
7549 self.needed_locks[locking.LEVEL_NODE][:]
7551 def BuildHooksEnv(self):
7554 This runs on master, primary and secondary nodes of the instance.
7558 "TARGET_NODE": self.op.target_node,
7559 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7561 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7564 def BuildHooksNodes(self):
7565 """Build hooks nodes.
7569 self.cfg.GetMasterNode(),
7570 self.instance.primary_node,
7571 self.op.target_node,
7575 def CheckPrereq(self):
7576 """Check prerequisites.
7578 This checks that the instance is in the cluster.
7581 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7582 assert self.instance is not None, \
7583 "Cannot retrieve locked instance %s" % self.op.instance_name
7585 node = self.cfg.GetNodeInfo(self.op.target_node)
7586 assert node is not None, \
7587 "Cannot retrieve locked node %s" % self.op.target_node
7589 self.target_node = target_node = node.name
7591 if target_node == instance.primary_node:
7592 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7593 (instance.name, target_node),
7596 bep = self.cfg.GetClusterInfo().FillBE(instance)
7598 for idx, dsk in enumerate(instance.disks):
7599 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7600 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7601 " cannot copy" % idx, errors.ECODE_STATE)
7603 _CheckNodeOnline(self, target_node)
7604 _CheckNodeNotDrained(self, target_node)
7605 _CheckNodeVmCapable(self, target_node)
7606 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7607 self.cfg.GetNodeGroup(node.group))
7608 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7609 ignore=self.op.ignore_ipolicy)
7611 if instance.admin_state == constants.ADMINST_UP:
7612 # check memory requirements on the secondary node
7613 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7614 instance.name, bep[constants.BE_MAXMEM],
7615 instance.hypervisor)
7617 self.LogInfo("Not checking memory on the secondary node as"
7618 " instance will not be started")
7620 # check bridge existance
7621 _CheckInstanceBridgesExist(self, instance, node=target_node)
7623 def Exec(self, feedback_fn):
7624 """Move an instance.
7626 The move is done by shutting it down on its present node, copying
7627 the data over (slow) and starting it on the new node.
7630 instance = self.instance
7632 source_node = instance.primary_node
7633 target_node = self.target_node
7635 self.LogInfo("Shutting down instance %s on source node %s",
7636 instance.name, source_node)
7638 assert (self.owned_locks(locking.LEVEL_NODE) ==
7639 self.owned_locks(locking.LEVEL_NODE_RES))
7641 result = self.rpc.call_instance_shutdown(source_node, instance,
7642 self.op.shutdown_timeout)
7643 msg = result.fail_msg
7645 if self.op.ignore_consistency:
7646 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7647 " Proceeding anyway. Please make sure node"
7648 " %s is down. Error details: %s",
7649 instance.name, source_node, source_node, msg)
7651 raise errors.OpExecError("Could not shutdown instance %s on"
7653 (instance.name, source_node, msg))
7655 # create the target disks
7657 _CreateDisks(self, instance, target_node=target_node)
7658 except errors.OpExecError:
7659 self.LogWarning("Device creation failed, reverting...")
7661 _RemoveDisks(self, instance, target_node=target_node)
7663 self.cfg.ReleaseDRBDMinors(instance.name)
7666 cluster_name = self.cfg.GetClusterInfo().cluster_name
7669 # activate, get path, copy the data over
7670 for idx, disk in enumerate(instance.disks):
7671 self.LogInfo("Copying data for disk %d", idx)
7672 result = self.rpc.call_blockdev_assemble(target_node, disk,
7673 instance.name, True, idx)
7675 self.LogWarning("Can't assemble newly created disk %d: %s",
7676 idx, result.fail_msg)
7677 errs.append(result.fail_msg)
7679 dev_path = result.payload
7680 result = self.rpc.call_blockdev_export(source_node, disk,
7681 target_node, dev_path,
7684 self.LogWarning("Can't copy data over for disk %d: %s",
7685 idx, result.fail_msg)
7686 errs.append(result.fail_msg)
7690 self.LogWarning("Some disks failed to copy, aborting")
7692 _RemoveDisks(self, instance, target_node=target_node)
7694 self.cfg.ReleaseDRBDMinors(instance.name)
7695 raise errors.OpExecError("Errors during disk copy: %s" %
7698 instance.primary_node = target_node
7699 self.cfg.Update(instance, feedback_fn)
7701 self.LogInfo("Removing the disks on the original node")
7702 _RemoveDisks(self, instance, target_node=source_node)
7704 # Only start the instance if it's marked as up
7705 if instance.admin_state == constants.ADMINST_UP:
7706 self.LogInfo("Starting instance %s on node %s",
7707 instance.name, target_node)
7709 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7710 ignore_secondaries=True)
7712 _ShutdownInstanceDisks(self, instance)
7713 raise errors.OpExecError("Can't activate the instance's disks")
7715 result = self.rpc.call_instance_start(target_node,
7716 (instance, None, None), False)
7717 msg = result.fail_msg
7719 _ShutdownInstanceDisks(self, instance)
7720 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7721 (instance.name, target_node, msg))
7724 class LUNodeMigrate(LogicalUnit):
7725 """Migrate all instances from a node.
7728 HPATH = "node-migrate"
7729 HTYPE = constants.HTYPE_NODE
7732 def CheckArguments(self):
7735 def ExpandNames(self):
7736 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7738 self.share_locks = _ShareAll()
7739 self.needed_locks = {
7740 locking.LEVEL_NODE: [self.op.node_name],
7743 def BuildHooksEnv(self):
7746 This runs on the master, the primary and all the secondaries.
7750 "NODE_NAME": self.op.node_name,
7751 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7754 def BuildHooksNodes(self):
7755 """Build hooks nodes.
7758 nl = [self.cfg.GetMasterNode()]
7761 def CheckPrereq(self):
7764 def Exec(self, feedback_fn):
7765 # Prepare jobs for migration instances
7766 allow_runtime_changes = self.op.allow_runtime_changes
7768 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7771 iallocator=self.op.iallocator,
7772 target_node=self.op.target_node,
7773 allow_runtime_changes=allow_runtime_changes,
7774 ignore_ipolicy=self.op.ignore_ipolicy)]
7775 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7778 # TODO: Run iallocator in this opcode and pass correct placement options to
7779 # OpInstanceMigrate. Since other jobs can modify the cluster between
7780 # running the iallocator and the actual migration, a good consistency model
7781 # will have to be found.
7783 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7784 frozenset([self.op.node_name]))
7786 return ResultWithJobs(jobs)
7789 class TLMigrateInstance(Tasklet):
7790 """Tasklet class for instance migration.
7793 @ivar live: whether the migration will be done live or non-live;
7794 this variable is initalized only after CheckPrereq has run
7795 @type cleanup: boolean
7796 @ivar cleanup: Wheater we cleanup from a failed migration
7797 @type iallocator: string
7798 @ivar iallocator: The iallocator used to determine target_node
7799 @type target_node: string
7800 @ivar target_node: If given, the target_node to reallocate the instance to
7801 @type failover: boolean
7802 @ivar failover: Whether operation results in failover or migration
7803 @type fallback: boolean
7804 @ivar fallback: Whether fallback to failover is allowed if migration not
7806 @type ignore_consistency: boolean
7807 @ivar ignore_consistency: Wheter we should ignore consistency between source
7809 @type shutdown_timeout: int
7810 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7811 @type ignore_ipolicy: bool
7812 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7817 _MIGRATION_POLL_INTERVAL = 1 # seconds
7818 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7820 def __init__(self, lu, instance_name, cleanup=False,
7821 failover=False, fallback=False,
7822 ignore_consistency=False,
7823 allow_runtime_changes=True,
7824 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7825 ignore_ipolicy=False):
7826 """Initializes this class.
7829 Tasklet.__init__(self, lu)
7832 self.instance_name = instance_name
7833 self.cleanup = cleanup
7834 self.live = False # will be overridden later
7835 self.failover = failover
7836 self.fallback = fallback
7837 self.ignore_consistency = ignore_consistency
7838 self.shutdown_timeout = shutdown_timeout
7839 self.ignore_ipolicy = ignore_ipolicy
7840 self.allow_runtime_changes = allow_runtime_changes
7842 def CheckPrereq(self):
7843 """Check prerequisites.
7845 This checks that the instance is in the cluster.
7848 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7849 instance = self.cfg.GetInstanceInfo(instance_name)
7850 assert instance is not None
7851 self.instance = instance
7852 cluster = self.cfg.GetClusterInfo()
7854 if (not self.cleanup and
7855 not instance.admin_state == constants.ADMINST_UP and
7856 not self.failover and self.fallback):
7857 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7858 " switching to failover")
7859 self.failover = True
7861 if instance.disk_template not in constants.DTS_MIRRORED:
7866 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7867 " %s" % (instance.disk_template, text),
7870 if instance.disk_template in constants.DTS_EXT_MIRROR:
7871 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7873 if self.lu.op.iallocator:
7874 self._RunAllocator()
7876 # We set set self.target_node as it is required by
7878 self.target_node = self.lu.op.target_node
7880 # Check that the target node is correct in terms of instance policy
7881 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
7882 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7883 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7884 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7885 ignore=self.ignore_ipolicy)
7887 # self.target_node is already populated, either directly or by the
7889 target_node = self.target_node
7890 if self.target_node == instance.primary_node:
7891 raise errors.OpPrereqError("Cannot migrate instance %s"
7892 " to its primary (%s)" %
7893 (instance.name, instance.primary_node))
7895 if len(self.lu.tasklets) == 1:
7896 # It is safe to release locks only when we're the only tasklet
7898 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7899 keep=[instance.primary_node, self.target_node])
7902 secondary_nodes = instance.secondary_nodes
7903 if not secondary_nodes:
7904 raise errors.ConfigurationError("No secondary node but using"
7905 " %s disk template" %
7906 instance.disk_template)
7907 target_node = secondary_nodes[0]
7908 if self.lu.op.iallocator or (self.lu.op.target_node and
7909 self.lu.op.target_node != target_node):
7911 text = "failed over"
7914 raise errors.OpPrereqError("Instances with disk template %s cannot"
7915 " be %s to arbitrary nodes"
7916 " (neither an iallocator nor a target"
7917 " node can be passed)" %
7918 (instance.disk_template, text),
7920 nodeinfo = self.cfg.GetNodeInfo(target_node)
7921 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7922 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7923 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7924 ignore=self.ignore_ipolicy)
7926 i_be = cluster.FillBE(instance)
7928 # check memory requirements on the secondary node
7929 if (not self.cleanup and
7930 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
7931 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
7932 "migrating instance %s" %
7934 i_be[constants.BE_MINMEM],
7935 instance.hypervisor)
7937 self.lu.LogInfo("Not checking memory on the secondary node as"
7938 " instance will not be started")
7940 # check if failover must be forced instead of migration
7941 if (not self.cleanup and not self.failover and
7942 i_be[constants.BE_ALWAYS_FAILOVER]):
7944 self.lu.LogInfo("Instance configured to always failover; fallback"
7946 self.failover = True
7948 raise errors.OpPrereqError("This instance has been configured to"
7949 " always failover, please allow failover",
7952 # check bridge existance
7953 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7955 if not self.cleanup:
7956 _CheckNodeNotDrained(self.lu, target_node)
7957 if not self.failover:
7958 result = self.rpc.call_instance_migratable(instance.primary_node,
7960 if result.fail_msg and self.fallback:
7961 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7963 self.failover = True
7965 result.Raise("Can't migrate, please use failover",
7966 prereq=True, ecode=errors.ECODE_STATE)
7968 assert not (self.failover and self.cleanup)
7970 if not self.failover:
7971 if self.lu.op.live is not None and self.lu.op.mode is not None:
7972 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7973 " parameters are accepted",
7975 if self.lu.op.live is not None:
7977 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7979 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7980 # reset the 'live' parameter to None so that repeated
7981 # invocations of CheckPrereq do not raise an exception
7982 self.lu.op.live = None
7983 elif self.lu.op.mode is None:
7984 # read the default value from the hypervisor
7985 i_hv = cluster.FillHV(self.instance, skip_globals=False)
7986 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7988 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7990 # Failover is never live
7993 if not (self.failover or self.cleanup):
7994 remote_info = self.rpc.call_instance_info(instance.primary_node,
7996 instance.hypervisor)
7997 remote_info.Raise("Error checking instance on node %s" %
7998 instance.primary_node)
7999 instance_running = bool(remote_info.payload)
8000 if instance_running:
8001 self.current_mem = int(remote_info.payload["memory"])
8003 def _RunAllocator(self):
8004 """Run the allocator based on input opcode.
8007 # FIXME: add a self.ignore_ipolicy option
8008 ial = IAllocator(self.cfg, self.rpc,
8009 mode=constants.IALLOCATOR_MODE_RELOC,
8010 name=self.instance_name,
8011 # TODO See why hail breaks with a single node below
8012 relocate_from=[self.instance.primary_node,
8013 self.instance.primary_node],
8016 ial.Run(self.lu.op.iallocator)
8019 raise errors.OpPrereqError("Can't compute nodes using"
8020 " iallocator '%s': %s" %
8021 (self.lu.op.iallocator, ial.info),
8023 if len(ial.result) != ial.required_nodes:
8024 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8025 " of nodes (%s), required %s" %
8026 (self.lu.op.iallocator, len(ial.result),
8027 ial.required_nodes), errors.ECODE_FAULT)
8028 self.target_node = ial.result[0]
8029 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8030 self.instance_name, self.lu.op.iallocator,
8031 utils.CommaJoin(ial.result))
8033 def _WaitUntilSync(self):
8034 """Poll with custom rpc for disk sync.
8036 This uses our own step-based rpc call.
8039 self.feedback_fn("* wait until resync is done")
8043 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8045 self.instance.disks)
8047 for node, nres in result.items():
8048 nres.Raise("Cannot resync disks on node %s" % node)
8049 node_done, node_percent = nres.payload
8050 all_done = all_done and node_done
8051 if node_percent is not None:
8052 min_percent = min(min_percent, node_percent)
8054 if min_percent < 100:
8055 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8058 def _EnsureSecondary(self, node):
8059 """Demote a node to secondary.
8062 self.feedback_fn("* switching node %s to secondary mode" % node)
8064 for dev in self.instance.disks:
8065 self.cfg.SetDiskID(dev, node)
8067 result = self.rpc.call_blockdev_close(node, self.instance.name,
8068 self.instance.disks)
8069 result.Raise("Cannot change disk to secondary on node %s" % node)
8071 def _GoStandalone(self):
8072 """Disconnect from the network.
8075 self.feedback_fn("* changing into standalone mode")
8076 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8077 self.instance.disks)
8078 for node, nres in result.items():
8079 nres.Raise("Cannot disconnect disks node %s" % node)
8081 def _GoReconnect(self, multimaster):
8082 """Reconnect to the network.
8088 msg = "single-master"
8089 self.feedback_fn("* changing disks into %s mode" % msg)
8090 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8091 self.instance.disks,
8092 self.instance.name, multimaster)
8093 for node, nres in result.items():
8094 nres.Raise("Cannot change disks config on node %s" % node)
8096 def _ExecCleanup(self):
8097 """Try to cleanup after a failed migration.
8099 The cleanup is done by:
8100 - check that the instance is running only on one node
8101 (and update the config if needed)
8102 - change disks on its secondary node to secondary
8103 - wait until disks are fully synchronized
8104 - disconnect from the network
8105 - change disks into single-master mode
8106 - wait again until disks are fully synchronized
8109 instance = self.instance
8110 target_node = self.target_node
8111 source_node = self.source_node
8113 # check running on only one node
8114 self.feedback_fn("* checking where the instance actually runs"
8115 " (if this hangs, the hypervisor might be in"
8117 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8118 for node, result in ins_l.items():
8119 result.Raise("Can't contact node %s" % node)
8121 runningon_source = instance.name in ins_l[source_node].payload
8122 runningon_target = instance.name in ins_l[target_node].payload
8124 if runningon_source and runningon_target:
8125 raise errors.OpExecError("Instance seems to be running on two nodes,"
8126 " or the hypervisor is confused; you will have"
8127 " to ensure manually that it runs only on one"
8128 " and restart this operation")
8130 if not (runningon_source or runningon_target):
8131 raise errors.OpExecError("Instance does not seem to be running at all;"
8132 " in this case it's safer to repair by"
8133 " running 'gnt-instance stop' to ensure disk"
8134 " shutdown, and then restarting it")
8136 if runningon_target:
8137 # the migration has actually succeeded, we need to update the config
8138 self.feedback_fn("* instance running on secondary node (%s),"
8139 " updating config" % target_node)
8140 instance.primary_node = target_node
8141 self.cfg.Update(instance, self.feedback_fn)
8142 demoted_node = source_node
8144 self.feedback_fn("* instance confirmed to be running on its"
8145 " primary node (%s)" % source_node)
8146 demoted_node = target_node
8148 if instance.disk_template in constants.DTS_INT_MIRROR:
8149 self._EnsureSecondary(demoted_node)
8151 self._WaitUntilSync()
8152 except errors.OpExecError:
8153 # we ignore here errors, since if the device is standalone, it
8154 # won't be able to sync
8156 self._GoStandalone()
8157 self._GoReconnect(False)
8158 self._WaitUntilSync()
8160 self.feedback_fn("* done")
8162 def _RevertDiskStatus(self):
8163 """Try to revert the disk status after a failed migration.
8166 target_node = self.target_node
8167 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8171 self._EnsureSecondary(target_node)
8172 self._GoStandalone()
8173 self._GoReconnect(False)
8174 self._WaitUntilSync()
8175 except errors.OpExecError, err:
8176 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8177 " please try to recover the instance manually;"
8178 " error '%s'" % str(err))
8180 def _AbortMigration(self):
8181 """Call the hypervisor code to abort a started migration.
8184 instance = self.instance
8185 target_node = self.target_node
8186 source_node = self.source_node
8187 migration_info = self.migration_info
8189 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8193 abort_msg = abort_result.fail_msg
8195 logging.error("Aborting migration failed on target node %s: %s",
8196 target_node, abort_msg)
8197 # Don't raise an exception here, as we stil have to try to revert the
8198 # disk status, even if this step failed.
8200 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8201 instance, False, self.live)
8202 abort_msg = abort_result.fail_msg
8204 logging.error("Aborting migration failed on source node %s: %s",
8205 source_node, abort_msg)
8207 def _ExecMigration(self):
8208 """Migrate an instance.
8210 The migrate is done by:
8211 - change the disks into dual-master mode
8212 - wait until disks are fully synchronized again
8213 - migrate the instance
8214 - change disks on the new secondary node (the old primary) to secondary
8215 - wait until disks are fully synchronized
8216 - change disks into single-master mode
8219 instance = self.instance
8220 target_node = self.target_node
8221 source_node = self.source_node
8223 # Check for hypervisor version mismatch and warn the user.
8224 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8225 None, [self.instance.hypervisor])
8226 for ninfo in nodeinfo.values():
8227 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8229 (_, _, (src_info, )) = nodeinfo[source_node].payload
8230 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8232 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8233 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8234 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8235 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8236 if src_version != dst_version:
8237 self.feedback_fn("* warning: hypervisor version mismatch between"
8238 " source (%s) and target (%s) node" %
8239 (src_version, dst_version))
8241 self.feedback_fn("* checking disk consistency between source and target")
8242 for (idx, dev) in enumerate(instance.disks):
8243 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8244 raise errors.OpExecError("Disk %s is degraded or not fully"
8245 " synchronized on target node,"
8246 " aborting migration" % idx)
8248 if self.current_mem > self.tgt_free_mem:
8249 if not self.allow_runtime_changes:
8250 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8251 " free memory to fit instance %s on target"
8252 " node %s (have %dMB, need %dMB)" %
8253 (instance.name, target_node,
8254 self.tgt_free_mem, self.current_mem))
8255 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8256 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8259 rpcres.Raise("Cannot modify instance runtime memory")
8261 # First get the migration information from the remote node
8262 result = self.rpc.call_migration_info(source_node, instance)
8263 msg = result.fail_msg
8265 log_err = ("Failed fetching source migration information from %s: %s" %
8267 logging.error(log_err)
8268 raise errors.OpExecError(log_err)
8270 self.migration_info = migration_info = result.payload
8272 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8273 # Then switch the disks to master/master mode
8274 self._EnsureSecondary(target_node)
8275 self._GoStandalone()
8276 self._GoReconnect(True)
8277 self._WaitUntilSync()
8279 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8280 result = self.rpc.call_accept_instance(target_node,
8283 self.nodes_ip[target_node])
8285 msg = result.fail_msg
8287 logging.error("Instance pre-migration failed, trying to revert"
8288 " disk status: %s", msg)
8289 self.feedback_fn("Pre-migration failed, aborting")
8290 self._AbortMigration()
8291 self._RevertDiskStatus()
8292 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8293 (instance.name, msg))
8295 self.feedback_fn("* migrating instance to %s" % target_node)
8296 result = self.rpc.call_instance_migrate(source_node, instance,
8297 self.nodes_ip[target_node],
8299 msg = result.fail_msg
8301 logging.error("Instance migration failed, trying to revert"
8302 " disk status: %s", msg)
8303 self.feedback_fn("Migration failed, aborting")
8304 self._AbortMigration()
8305 self._RevertDiskStatus()
8306 raise errors.OpExecError("Could not migrate instance %s: %s" %
8307 (instance.name, msg))
8309 self.feedback_fn("* starting memory transfer")
8310 last_feedback = time.time()
8312 result = self.rpc.call_instance_get_migration_status(source_node,
8314 msg = result.fail_msg
8315 ms = result.payload # MigrationStatus instance
8316 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8317 logging.error("Instance migration failed, trying to revert"
8318 " disk status: %s", msg)
8319 self.feedback_fn("Migration failed, aborting")
8320 self._AbortMigration()
8321 self._RevertDiskStatus()
8322 raise errors.OpExecError("Could not migrate instance %s: %s" %
8323 (instance.name, msg))
8325 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8326 self.feedback_fn("* memory transfer complete")
8329 if (utils.TimeoutExpired(last_feedback,
8330 self._MIGRATION_FEEDBACK_INTERVAL) and
8331 ms.transferred_ram is not None):
8332 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8333 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8334 last_feedback = time.time()
8336 time.sleep(self._MIGRATION_POLL_INTERVAL)
8338 result = self.rpc.call_instance_finalize_migration_src(source_node,
8342 msg = result.fail_msg
8344 logging.error("Instance migration succeeded, but finalization failed"
8345 " on the source node: %s", msg)
8346 raise errors.OpExecError("Could not finalize instance migration: %s" %
8349 instance.primary_node = target_node
8351 # distribute new instance config to the other nodes
8352 self.cfg.Update(instance, self.feedback_fn)
8354 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8358 msg = result.fail_msg
8360 logging.error("Instance migration succeeded, but finalization failed"
8361 " on the target node: %s", msg)
8362 raise errors.OpExecError("Could not finalize instance migration: %s" %
8365 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8366 self._EnsureSecondary(source_node)
8367 self._WaitUntilSync()
8368 self._GoStandalone()
8369 self._GoReconnect(False)
8370 self._WaitUntilSync()
8372 # If the instance's disk template is `rbd' and there was a successful
8373 # migration, unmap the device from the source node.
8374 if self.instance.disk_template == constants.DT_RBD:
8375 disks = _ExpandCheckDisks(instance, instance.disks)
8376 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8378 result = self.rpc.call_blockdev_shutdown(source_node, disk)
8379 msg = result.fail_msg
8381 logging.error("Migration was successful, but couldn't unmap the"
8382 " block device %s on source node %s: %s",
8383 disk.iv_name, source_node, msg)
8384 logging.error("You need to unmap the device %s manually on %s",
8385 disk.iv_name, source_node)
8387 self.feedback_fn("* done")
8389 def _ExecFailover(self):
8390 """Failover an instance.
8392 The failover is done by shutting it down on its present node and
8393 starting it on the secondary.
8396 instance = self.instance
8397 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8399 source_node = instance.primary_node
8400 target_node = self.target_node
8402 if instance.admin_state == constants.ADMINST_UP:
8403 self.feedback_fn("* checking disk consistency between source and target")
8404 for (idx, dev) in enumerate(instance.disks):
8405 # for drbd, these are drbd over lvm
8406 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8407 if primary_node.offline:
8408 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8410 (primary_node.name, idx, target_node))
8411 elif not self.ignore_consistency:
8412 raise errors.OpExecError("Disk %s is degraded on target node,"
8413 " aborting failover" % idx)
8415 self.feedback_fn("* not checking disk consistency as instance is not"
8418 self.feedback_fn("* shutting down instance on source node")
8419 logging.info("Shutting down instance %s on node %s",
8420 instance.name, source_node)
8422 result = self.rpc.call_instance_shutdown(source_node, instance,
8423 self.shutdown_timeout)
8424 msg = result.fail_msg
8426 if self.ignore_consistency or primary_node.offline:
8427 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8428 " proceeding anyway; please make sure node"
8429 " %s is down; error details: %s",
8430 instance.name, source_node, source_node, msg)
8432 raise errors.OpExecError("Could not shutdown instance %s on"
8434 (instance.name, source_node, msg))
8436 self.feedback_fn("* deactivating the instance's disks on source node")
8437 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8438 raise errors.OpExecError("Can't shut down the instance's disks")
8440 instance.primary_node = target_node
8441 # distribute new instance config to the other nodes
8442 self.cfg.Update(instance, self.feedback_fn)
8444 # Only start the instance if it's marked as up
8445 if instance.admin_state == constants.ADMINST_UP:
8446 self.feedback_fn("* activating the instance's disks on target node %s" %
8448 logging.info("Starting instance %s on node %s",
8449 instance.name, target_node)
8451 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8452 ignore_secondaries=True)
8454 _ShutdownInstanceDisks(self.lu, instance)
8455 raise errors.OpExecError("Can't activate the instance's disks")
8457 self.feedback_fn("* starting the instance on the target node %s" %
8459 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8461 msg = result.fail_msg
8463 _ShutdownInstanceDisks(self.lu, instance)
8464 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8465 (instance.name, target_node, msg))
8467 def Exec(self, feedback_fn):
8468 """Perform the migration.
8471 self.feedback_fn = feedback_fn
8472 self.source_node = self.instance.primary_node
8474 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8475 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8476 self.target_node = self.instance.secondary_nodes[0]
8477 # Otherwise self.target_node has been populated either
8478 # directly, or through an iallocator.
8480 self.all_nodes = [self.source_node, self.target_node]
8481 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8482 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8485 feedback_fn("Failover instance %s" % self.instance.name)
8486 self._ExecFailover()
8488 feedback_fn("Migrating instance %s" % self.instance.name)
8491 return self._ExecCleanup()
8493 return self._ExecMigration()
8496 def _CreateBlockDev(lu, node, instance, device, force_create,
8498 """Create a tree of block devices on a given node.
8500 If this device type has to be created on secondaries, create it and
8503 If not, just recurse to children keeping the same 'force' value.
8505 @param lu: the lu on whose behalf we execute
8506 @param node: the node on which to create the device
8507 @type instance: L{objects.Instance}
8508 @param instance: the instance which owns the device
8509 @type device: L{objects.Disk}
8510 @param device: the device to create
8511 @type force_create: boolean
8512 @param force_create: whether to force creation of this device; this
8513 will be change to True whenever we find a device which has
8514 CreateOnSecondary() attribute
8515 @param info: the extra 'metadata' we should attach to the device
8516 (this will be represented as a LVM tag)
8517 @type force_open: boolean
8518 @param force_open: this parameter will be passes to the
8519 L{backend.BlockdevCreate} function where it specifies
8520 whether we run on primary or not, and it affects both
8521 the child assembly and the device own Open() execution
8524 if device.CreateOnSecondary():
8528 for child in device.children:
8529 _CreateBlockDev(lu, node, instance, child, force_create,
8532 if not force_create:
8535 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8538 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8539 """Create a single block device on a given node.
8541 This will not recurse over children of the device, so they must be
8544 @param lu: the lu on whose behalf we execute
8545 @param node: the node on which to create the device
8546 @type instance: L{objects.Instance}
8547 @param instance: the instance which owns the device
8548 @type device: L{objects.Disk}
8549 @param device: the device to create
8550 @param info: the extra 'metadata' we should attach to the device
8551 (this will be represented as a LVM tag)
8552 @type force_open: boolean
8553 @param force_open: this parameter will be passes to the
8554 L{backend.BlockdevCreate} function where it specifies
8555 whether we run on primary or not, and it affects both
8556 the child assembly and the device own Open() execution
8559 lu.cfg.SetDiskID(device, node)
8560 result = lu.rpc.call_blockdev_create(node, device, device.size,
8561 instance.name, force_open, info)
8562 result.Raise("Can't create block device %s on"
8563 " node %s for instance %s" % (device, node, instance.name))
8564 if device.physical_id is None:
8565 device.physical_id = result.payload
8568 def _GenerateUniqueNames(lu, exts):
8569 """Generate a suitable LV name.
8571 This will generate a logical volume name for the given instance.
8576 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8577 results.append("%s%s" % (new_id, val))
8581 def _ComputeLDParams(disk_template, disk_params):
8582 """Computes Logical Disk parameters from Disk Template parameters.
8584 @type disk_template: string
8585 @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8586 @type disk_params: dict
8587 @param disk_params: disk template parameters; dict(template_name -> parameters
8589 @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8590 contains the LD parameters of the node. The tree is flattened in-order.
8593 if disk_template not in constants.DISK_TEMPLATES:
8594 raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8597 dt_params = disk_params[disk_template]
8598 if disk_template == constants.DT_DRBD8:
8600 constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8601 constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8602 constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8603 constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8604 constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8605 constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8606 constants.LDP_DYNAMIC_RESYNC: dt_params[constants.DRBD_DYNAMIC_RESYNC],
8607 constants.LDP_PLAN_AHEAD: dt_params[constants.DRBD_PLAN_AHEAD],
8608 constants.LDP_FILL_TARGET: dt_params[constants.DRBD_FILL_TARGET],
8609 constants.LDP_DELAY_TARGET: dt_params[constants.DRBD_DELAY_TARGET],
8610 constants.LDP_MAX_RATE: dt_params[constants.DRBD_MAX_RATE],
8611 constants.LDP_MIN_RATE: dt_params[constants.DRBD_MIN_RATE],
8615 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8618 result.append(drbd_params)
8622 constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8625 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8627 result.append(data_params)
8631 constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8634 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8636 result.append(meta_params)
8638 elif (disk_template == constants.DT_FILE or
8639 disk_template == constants.DT_SHARED_FILE):
8640 result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8642 elif disk_template == constants.DT_PLAIN:
8644 constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8647 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8649 result.append(params)
8651 elif disk_template == constants.DT_BLOCK:
8652 result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8654 elif disk_template == constants.DT_RBD:
8656 constants.LDP_POOL: dt_params[constants.RBD_POOL]
8659 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_RBD],
8661 result.append(params)
8666 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8667 iv_name, p_minor, s_minor, drbd_params, data_params,
8669 """Generate a drbd8 device complete with its children.
8672 assert len(vgnames) == len(names) == 2
8673 port = lu.cfg.AllocatePort()
8674 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8676 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8677 logical_id=(vgnames[0], names[0]),
8679 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8680 logical_id=(vgnames[1], names[1]),
8682 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8683 logical_id=(primary, secondary, port,
8686 children=[dev_data, dev_meta],
8687 iv_name=iv_name, params=drbd_params)
8691 _DISK_TEMPLATE_NAME_PREFIX = {
8692 constants.DT_PLAIN: "",
8693 constants.DT_RBD: ".rbd",
8697 _DISK_TEMPLATE_DEVICE_TYPE = {
8698 constants.DT_PLAIN: constants.LD_LV,
8699 constants.DT_FILE: constants.LD_FILE,
8700 constants.DT_SHARED_FILE: constants.LD_FILE,
8701 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8702 constants.DT_RBD: constants.LD_RBD,
8706 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8707 secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8708 feedback_fn, disk_params,
8709 _req_file_storage=opcodes.RequireFileStorage,
8710 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8711 """Generate the entire disk layout for a given template type.
8714 #TODO: compute space requirements
8716 vgname = lu.cfg.GetVGName()
8717 disk_count = len(disk_info)
8719 ld_params = _ComputeLDParams(template_name, disk_params)
8721 if template_name == constants.DT_DISKLESS:
8723 elif template_name == constants.DT_DRBD8:
8724 drbd_params, data_params, meta_params = ld_params
8725 if len(secondary_nodes) != 1:
8726 raise errors.ProgrammerError("Wrong template configuration")
8727 remote_node = secondary_nodes[0]
8728 minors = lu.cfg.AllocateDRBDMinor(
8729 [primary_node, remote_node] * len(disk_info), instance_name)
8732 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8733 for i in range(disk_count)]):
8734 names.append(lv_prefix + "_data")
8735 names.append(lv_prefix + "_meta")
8736 for idx, disk in enumerate(disk_info):
8737 disk_index = idx + base_index
8738 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8739 data_vg = disk.get(constants.IDISK_VG, vgname)
8740 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8741 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8742 disk[constants.IDISK_SIZE],
8744 names[idx * 2:idx * 2 + 2],
8745 "disk/%d" % disk_index,
8746 minors[idx * 2], minors[idx * 2 + 1],
8747 drbd_params, data_params, meta_params)
8748 disk_dev.mode = disk[constants.IDISK_MODE]
8749 disks.append(disk_dev)
8752 raise errors.ProgrammerError("Wrong template configuration")
8754 if template_name == constants.DT_FILE:
8756 elif template_name == constants.DT_SHARED_FILE:
8757 _req_shr_file_storage()
8759 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8760 if name_prefix is None:
8763 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8764 (name_prefix, base_index + i)
8765 for i in range(disk_count)])
8767 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8769 if template_name == constants.DT_PLAIN:
8770 def logical_id_fn(idx, _, disk):
8771 vg = disk.get(constants.IDISK_VG, vgname)
8772 return (vg, names[idx])
8773 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8775 lambda _, disk_index, disk: (file_driver,
8776 "%s/disk%d" % (file_storage_dir,
8778 elif template_name == constants.DT_BLOCK:
8780 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8781 disk[constants.IDISK_ADOPT])
8782 elif template_name == constants.DT_RBD:
8783 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8785 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8787 for idx, disk in enumerate(disk_info):
8788 disk_index = idx + base_index
8789 size = disk[constants.IDISK_SIZE]
8790 feedback_fn("* disk %s, size %s" %
8791 (disk_index, utils.FormatUnit(size, "h")))
8792 disks.append(objects.Disk(dev_type=dev_type, size=size,
8793 logical_id=logical_id_fn(idx, disk_index, disk),
8794 iv_name="disk/%d" % disk_index,
8795 mode=disk[constants.IDISK_MODE],
8796 params=ld_params[0]))
8801 def _GetInstanceInfoText(instance):
8802 """Compute that text that should be added to the disk's metadata.
8805 return "originstname+%s" % instance.name
8808 def _CalcEta(time_taken, written, total_size):
8809 """Calculates the ETA based on size written and total size.
8811 @param time_taken: The time taken so far
8812 @param written: amount written so far
8813 @param total_size: The total size of data to be written
8814 @return: The remaining time in seconds
8817 avg_time = time_taken / float(written)
8818 return (total_size - written) * avg_time
8821 def _WipeDisks(lu, instance):
8822 """Wipes instance disks.
8824 @type lu: L{LogicalUnit}
8825 @param lu: the logical unit on whose behalf we execute
8826 @type instance: L{objects.Instance}
8827 @param instance: the instance whose disks we should create
8828 @return: the success of the wipe
8831 node = instance.primary_node
8833 for device in instance.disks:
8834 lu.cfg.SetDiskID(device, node)
8836 logging.info("Pause sync of instance %s disks", instance.name)
8837 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8839 for idx, success in enumerate(result.payload):
8841 logging.warn("pause-sync of instance %s for disks %d failed",
8845 for idx, device in enumerate(instance.disks):
8846 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8847 # MAX_WIPE_CHUNK at max
8848 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8849 constants.MIN_WIPE_CHUNK_PERCENT)
8850 # we _must_ make this an int, otherwise rounding errors will
8852 wipe_chunk_size = int(wipe_chunk_size)
8854 lu.LogInfo("* Wiping disk %d", idx)
8855 logging.info("Wiping disk %d for instance %s, node %s using"
8856 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8861 start_time = time.time()
8863 while offset < size:
8864 wipe_size = min(wipe_chunk_size, size - offset)
8865 logging.debug("Wiping disk %d, offset %s, chunk %s",
8866 idx, offset, wipe_size)
8867 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8868 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8869 (idx, offset, wipe_size))
8872 if now - last_output >= 60:
8873 eta = _CalcEta(now - start_time, offset, size)
8874 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8875 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8878 logging.info("Resume sync of instance %s disks", instance.name)
8880 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8882 for idx, success in enumerate(result.payload):
8884 lu.LogWarning("Resume sync of disk %d failed, please have a"
8885 " look at the status and troubleshoot the issue", idx)
8886 logging.warn("resume-sync of instance %s for disks %d failed",
8890 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8891 """Create all disks for an instance.
8893 This abstracts away some work from AddInstance.
8895 @type lu: L{LogicalUnit}
8896 @param lu: the logical unit on whose behalf we execute
8897 @type instance: L{objects.Instance}
8898 @param instance: the instance whose disks we should create
8900 @param to_skip: list of indices to skip
8901 @type target_node: string
8902 @param target_node: if passed, overrides the target node for creation
8904 @return: the success of the creation
8907 info = _GetInstanceInfoText(instance)
8908 if target_node is None:
8909 pnode = instance.primary_node
8910 all_nodes = instance.all_nodes
8915 if instance.disk_template in constants.DTS_FILEBASED:
8916 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8917 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8919 result.Raise("Failed to create directory '%s' on"
8920 " node %s" % (file_storage_dir, pnode))
8922 # Note: this needs to be kept in sync with adding of disks in
8923 # LUInstanceSetParams
8924 for idx, device in enumerate(instance.disks):
8925 if to_skip and idx in to_skip:
8927 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
8929 for node in all_nodes:
8930 f_create = node == pnode
8931 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8934 def _RemoveDisks(lu, instance, target_node=None):
8935 """Remove all disks for an instance.
8937 This abstracts away some work from `AddInstance()` and
8938 `RemoveInstance()`. Note that in case some of the devices couldn't
8939 be removed, the removal will continue with the other ones (compare
8940 with `_CreateDisks()`).
8942 @type lu: L{LogicalUnit}
8943 @param lu: the logical unit on whose behalf we execute
8944 @type instance: L{objects.Instance}
8945 @param instance: the instance whose disks we should remove
8946 @type target_node: string
8947 @param target_node: used to override the node on which to remove the disks
8949 @return: the success of the removal
8952 logging.info("Removing block devices for instance %s", instance.name)
8955 for (idx, device) in enumerate(instance.disks):
8957 edata = [(target_node, device)]
8959 edata = device.ComputeNodeTree(instance.primary_node)
8960 for node, disk in edata:
8961 lu.cfg.SetDiskID(disk, node)
8962 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8964 lu.LogWarning("Could not remove disk %s on node %s,"
8965 " continuing anyway: %s", idx, node, msg)
8968 # if this is a DRBD disk, return its port to the pool
8969 if device.dev_type in constants.LDS_DRBD:
8970 tcp_port = device.logical_id[2]
8971 lu.cfg.AddTcpUdpPort(tcp_port)
8973 if instance.disk_template == constants.DT_FILE:
8974 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8978 tgt = instance.primary_node
8979 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8981 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8982 file_storage_dir, instance.primary_node, result.fail_msg)
8988 def _ComputeDiskSizePerVG(disk_template, disks):
8989 """Compute disk size requirements in the volume group
8992 def _compute(disks, payload):
8993 """Universal algorithm.
8998 vgs[disk[constants.IDISK_VG]] = \
8999 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9003 # Required free disk space as a function of disk and swap space
9005 constants.DT_DISKLESS: {},
9006 constants.DT_PLAIN: _compute(disks, 0),
9007 # 128 MB are added for drbd metadata for each disk
9008 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9009 constants.DT_FILE: {},
9010 constants.DT_SHARED_FILE: {},
9013 if disk_template not in req_size_dict:
9014 raise errors.ProgrammerError("Disk template '%s' size requirement"
9015 " is unknown" % disk_template)
9017 return req_size_dict[disk_template]
9020 def _ComputeDiskSize(disk_template, disks):
9021 """Compute disk size requirements in the volume group
9024 # Required free disk space as a function of disk and swap space
9026 constants.DT_DISKLESS: None,
9027 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9028 # 128 MB are added for drbd metadata for each disk
9030 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9031 constants.DT_FILE: None,
9032 constants.DT_SHARED_FILE: 0,
9033 constants.DT_BLOCK: 0,
9034 constants.DT_RBD: 0,
9037 if disk_template not in req_size_dict:
9038 raise errors.ProgrammerError("Disk template '%s' size requirement"
9039 " is unknown" % disk_template)
9041 return req_size_dict[disk_template]
9044 def _FilterVmNodes(lu, nodenames):
9045 """Filters out non-vm_capable nodes from a list.
9047 @type lu: L{LogicalUnit}
9048 @param lu: the logical unit for which we check
9049 @type nodenames: list
9050 @param nodenames: the list of nodes on which we should check
9052 @return: the list of vm-capable nodes
9055 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9056 return [name for name in nodenames if name not in vm_nodes]
9059 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9060 """Hypervisor parameter validation.
9062 This function abstract the hypervisor parameter validation to be
9063 used in both instance create and instance modify.
9065 @type lu: L{LogicalUnit}
9066 @param lu: the logical unit for which we check
9067 @type nodenames: list
9068 @param nodenames: the list of nodes on which we should check
9069 @type hvname: string
9070 @param hvname: the name of the hypervisor we should use
9071 @type hvparams: dict
9072 @param hvparams: the parameters which we need to check
9073 @raise errors.OpPrereqError: if the parameters are not valid
9076 nodenames = _FilterVmNodes(lu, nodenames)
9078 cluster = lu.cfg.GetClusterInfo()
9079 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9081 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9082 for node in nodenames:
9086 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9089 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9090 """OS parameters validation.
9092 @type lu: L{LogicalUnit}
9093 @param lu: the logical unit for which we check
9094 @type required: boolean
9095 @param required: whether the validation should fail if the OS is not
9097 @type nodenames: list
9098 @param nodenames: the list of nodes on which we should check
9099 @type osname: string
9100 @param osname: the name of the hypervisor we should use
9101 @type osparams: dict
9102 @param osparams: the parameters which we need to check
9103 @raise errors.OpPrereqError: if the parameters are not valid
9106 nodenames = _FilterVmNodes(lu, nodenames)
9107 result = lu.rpc.call_os_validate(nodenames, required, osname,
9108 [constants.OS_VALIDATE_PARAMETERS],
9110 for node, nres in result.items():
9111 # we don't check for offline cases since this should be run only
9112 # against the master node and/or an instance's nodes
9113 nres.Raise("OS Parameters validation failed on node %s" % node)
9114 if not nres.payload:
9115 lu.LogInfo("OS %s not found on node %s, validation skipped",
9119 class LUInstanceCreate(LogicalUnit):
9120 """Create an instance.
9123 HPATH = "instance-add"
9124 HTYPE = constants.HTYPE_INSTANCE
9127 def CheckArguments(self):
9131 # do not require name_check to ease forward/backward compatibility
9133 if self.op.no_install and self.op.start:
9134 self.LogInfo("No-installation mode selected, disabling startup")
9135 self.op.start = False
9136 # validate/normalize the instance name
9137 self.op.instance_name = \
9138 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9140 if self.op.ip_check and not self.op.name_check:
9141 # TODO: make the ip check more flexible and not depend on the name check
9142 raise errors.OpPrereqError("Cannot do IP address check without a name"
9143 " check", errors.ECODE_INVAL)
9145 # check nics' parameter names
9146 for nic in self.op.nics:
9147 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9149 # check disks. parameter names and consistent adopt/no-adopt strategy
9150 has_adopt = has_no_adopt = False
9151 for disk in self.op.disks:
9152 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9153 if constants.IDISK_ADOPT in disk:
9157 if has_adopt and has_no_adopt:
9158 raise errors.OpPrereqError("Either all disks are adopted or none is",
9161 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9162 raise errors.OpPrereqError("Disk adoption is not supported for the"
9163 " '%s' disk template" %
9164 self.op.disk_template,
9166 if self.op.iallocator is not None:
9167 raise errors.OpPrereqError("Disk adoption not allowed with an"
9168 " iallocator script", errors.ECODE_INVAL)
9169 if self.op.mode == constants.INSTANCE_IMPORT:
9170 raise errors.OpPrereqError("Disk adoption not allowed for"
9171 " instance import", errors.ECODE_INVAL)
9173 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9174 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9175 " but no 'adopt' parameter given" %
9176 self.op.disk_template,
9179 self.adopt_disks = has_adopt
9181 # instance name verification
9182 if self.op.name_check:
9183 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9184 self.op.instance_name = self.hostname1.name
9185 # used in CheckPrereq for ip ping check
9186 self.check_ip = self.hostname1.ip
9188 self.check_ip = None
9190 # file storage checks
9191 if (self.op.file_driver and
9192 not self.op.file_driver in constants.FILE_DRIVER):
9193 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9194 self.op.file_driver, errors.ECODE_INVAL)
9196 if self.op.disk_template == constants.DT_FILE:
9197 opcodes.RequireFileStorage()
9198 elif self.op.disk_template == constants.DT_SHARED_FILE:
9199 opcodes.RequireSharedFileStorage()
9201 ### Node/iallocator related checks
9202 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9204 if self.op.pnode is not None:
9205 if self.op.disk_template in constants.DTS_INT_MIRROR:
9206 if self.op.snode is None:
9207 raise errors.OpPrereqError("The networked disk templates need"
9208 " a mirror node", errors.ECODE_INVAL)
9210 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9212 self.op.snode = None
9214 self._cds = _GetClusterDomainSecret()
9216 if self.op.mode == constants.INSTANCE_IMPORT:
9217 # On import force_variant must be True, because if we forced it at
9218 # initial install, our only chance when importing it back is that it
9220 self.op.force_variant = True
9222 if self.op.no_install:
9223 self.LogInfo("No-installation mode has no effect during import")
9225 elif self.op.mode == constants.INSTANCE_CREATE:
9226 if self.op.os_type is None:
9227 raise errors.OpPrereqError("No guest OS specified",
9229 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9230 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9231 " installation" % self.op.os_type,
9233 if self.op.disk_template is None:
9234 raise errors.OpPrereqError("No disk template specified",
9237 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9238 # Check handshake to ensure both clusters have the same domain secret
9239 src_handshake = self.op.source_handshake
9240 if not src_handshake:
9241 raise errors.OpPrereqError("Missing source handshake",
9244 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9247 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9250 # Load and check source CA
9251 self.source_x509_ca_pem = self.op.source_x509_ca
9252 if not self.source_x509_ca_pem:
9253 raise errors.OpPrereqError("Missing source X509 CA",
9257 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9259 except OpenSSL.crypto.Error, err:
9260 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9261 (err, ), errors.ECODE_INVAL)
9263 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9264 if errcode is not None:
9265 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9268 self.source_x509_ca = cert
9270 src_instance_name = self.op.source_instance_name
9271 if not src_instance_name:
9272 raise errors.OpPrereqError("Missing source instance name",
9275 self.source_instance_name = \
9276 netutils.GetHostname(name=src_instance_name).name
9279 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9280 self.op.mode, errors.ECODE_INVAL)
9282 def ExpandNames(self):
9283 """ExpandNames for CreateInstance.
9285 Figure out the right locks for instance creation.
9288 self.needed_locks = {}
9290 instance_name = self.op.instance_name
9291 # this is just a preventive check, but someone might still add this
9292 # instance in the meantime, and creation will fail at lock-add time
9293 if instance_name in self.cfg.GetInstanceList():
9294 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9295 instance_name, errors.ECODE_EXISTS)
9297 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9299 if self.op.iallocator:
9300 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9301 # specifying a group on instance creation and then selecting nodes from
9303 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9304 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9306 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9307 nodelist = [self.op.pnode]
9308 if self.op.snode is not None:
9309 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9310 nodelist.append(self.op.snode)
9311 self.needed_locks[locking.LEVEL_NODE] = nodelist
9312 # Lock resources of instance's primary and secondary nodes (copy to
9313 # prevent accidential modification)
9314 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9316 # in case of import lock the source node too
9317 if self.op.mode == constants.INSTANCE_IMPORT:
9318 src_node = self.op.src_node
9319 src_path = self.op.src_path
9321 if src_path is None:
9322 self.op.src_path = src_path = self.op.instance_name
9324 if src_node is None:
9325 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9326 self.op.src_node = None
9327 if os.path.isabs(src_path):
9328 raise errors.OpPrereqError("Importing an instance from a path"
9329 " requires a source node option",
9332 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9333 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9334 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9335 if not os.path.isabs(src_path):
9336 self.op.src_path = src_path = \
9337 utils.PathJoin(constants.EXPORT_DIR, src_path)
9339 def _RunAllocator(self):
9340 """Run the allocator based on input opcode.
9343 nics = [n.ToDict() for n in self.nics]
9344 ial = IAllocator(self.cfg, self.rpc,
9345 mode=constants.IALLOCATOR_MODE_ALLOC,
9346 name=self.op.instance_name,
9347 disk_template=self.op.disk_template,
9350 vcpus=self.be_full[constants.BE_VCPUS],
9351 memory=self.be_full[constants.BE_MAXMEM],
9354 hypervisor=self.op.hypervisor,
9357 ial.Run(self.op.iallocator)
9360 raise errors.OpPrereqError("Can't compute nodes using"
9361 " iallocator '%s': %s" %
9362 (self.op.iallocator, ial.info),
9364 if len(ial.result) != ial.required_nodes:
9365 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9366 " of nodes (%s), required %s" %
9367 (self.op.iallocator, len(ial.result),
9368 ial.required_nodes), errors.ECODE_FAULT)
9369 self.op.pnode = ial.result[0]
9370 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9371 self.op.instance_name, self.op.iallocator,
9372 utils.CommaJoin(ial.result))
9373 if ial.required_nodes == 2:
9374 self.op.snode = ial.result[1]
9376 def BuildHooksEnv(self):
9379 This runs on master, primary and secondary nodes of the instance.
9383 "ADD_MODE": self.op.mode,
9385 if self.op.mode == constants.INSTANCE_IMPORT:
9386 env["SRC_NODE"] = self.op.src_node
9387 env["SRC_PATH"] = self.op.src_path
9388 env["SRC_IMAGES"] = self.src_images
9390 env.update(_BuildInstanceHookEnv(
9391 name=self.op.instance_name,
9392 primary_node=self.op.pnode,
9393 secondary_nodes=self.secondaries,
9394 status=self.op.start,
9395 os_type=self.op.os_type,
9396 minmem=self.be_full[constants.BE_MINMEM],
9397 maxmem=self.be_full[constants.BE_MAXMEM],
9398 vcpus=self.be_full[constants.BE_VCPUS],
9399 nics=_NICListToTuple(self, self.nics),
9400 disk_template=self.op.disk_template,
9401 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9402 for d in self.disks],
9405 hypervisor_name=self.op.hypervisor,
9411 def BuildHooksNodes(self):
9412 """Build hooks nodes.
9415 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9418 def _ReadExportInfo(self):
9419 """Reads the export information from disk.
9421 It will override the opcode source node and path with the actual
9422 information, if these two were not specified before.
9424 @return: the export information
9427 assert self.op.mode == constants.INSTANCE_IMPORT
9429 src_node = self.op.src_node
9430 src_path = self.op.src_path
9432 if src_node is None:
9433 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9434 exp_list = self.rpc.call_export_list(locked_nodes)
9436 for node in exp_list:
9437 if exp_list[node].fail_msg:
9439 if src_path in exp_list[node].payload:
9441 self.op.src_node = src_node = node
9442 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9446 raise errors.OpPrereqError("No export found for relative path %s" %
9447 src_path, errors.ECODE_INVAL)
9449 _CheckNodeOnline(self, src_node)
9450 result = self.rpc.call_export_info(src_node, src_path)
9451 result.Raise("No export or invalid export found in dir %s" % src_path)
9453 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9454 if not export_info.has_section(constants.INISECT_EXP):
9455 raise errors.ProgrammerError("Corrupted export config",
9456 errors.ECODE_ENVIRON)
9458 ei_version = export_info.get(constants.INISECT_EXP, "version")
9459 if (int(ei_version) != constants.EXPORT_VERSION):
9460 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9461 (ei_version, constants.EXPORT_VERSION),
9462 errors.ECODE_ENVIRON)
9465 def _ReadExportParams(self, einfo):
9466 """Use export parameters as defaults.
9468 In case the opcode doesn't specify (as in override) some instance
9469 parameters, then try to use them from the export information, if
9473 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9475 if self.op.disk_template is None:
9476 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9477 self.op.disk_template = einfo.get(constants.INISECT_INS,
9479 if self.op.disk_template not in constants.DISK_TEMPLATES:
9480 raise errors.OpPrereqError("Disk template specified in configuration"
9481 " file is not one of the allowed values:"
9482 " %s" % " ".join(constants.DISK_TEMPLATES))
9484 raise errors.OpPrereqError("No disk template specified and the export"
9485 " is missing the disk_template information",
9488 if not self.op.disks:
9490 # TODO: import the disk iv_name too
9491 for idx in range(constants.MAX_DISKS):
9492 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9493 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9494 disks.append({constants.IDISK_SIZE: disk_sz})
9495 self.op.disks = disks
9496 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9497 raise errors.OpPrereqError("No disk info specified and the export"
9498 " is missing the disk information",
9501 if not self.op.nics:
9503 for idx in range(constants.MAX_NICS):
9504 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9506 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9507 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9514 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9515 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9517 if (self.op.hypervisor is None and
9518 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9519 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9521 if einfo.has_section(constants.INISECT_HYP):
9522 # use the export parameters but do not override the ones
9523 # specified by the user
9524 for name, value in einfo.items(constants.INISECT_HYP):
9525 if name not in self.op.hvparams:
9526 self.op.hvparams[name] = value
9528 if einfo.has_section(constants.INISECT_BEP):
9529 # use the parameters, without overriding
9530 for name, value in einfo.items(constants.INISECT_BEP):
9531 if name not in self.op.beparams:
9532 self.op.beparams[name] = value
9533 # Compatibility for the old "memory" be param
9534 if name == constants.BE_MEMORY:
9535 if constants.BE_MAXMEM not in self.op.beparams:
9536 self.op.beparams[constants.BE_MAXMEM] = value
9537 if constants.BE_MINMEM not in self.op.beparams:
9538 self.op.beparams[constants.BE_MINMEM] = value
9540 # try to read the parameters old style, from the main section
9541 for name in constants.BES_PARAMETERS:
9542 if (name not in self.op.beparams and
9543 einfo.has_option(constants.INISECT_INS, name)):
9544 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9546 if einfo.has_section(constants.INISECT_OSP):
9547 # use the parameters, without overriding
9548 for name, value in einfo.items(constants.INISECT_OSP):
9549 if name not in self.op.osparams:
9550 self.op.osparams[name] = value
9552 def _RevertToDefaults(self, cluster):
9553 """Revert the instance parameters to the default values.
9557 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9558 for name in self.op.hvparams.keys():
9559 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9560 del self.op.hvparams[name]
9562 be_defs = cluster.SimpleFillBE({})
9563 for name in self.op.beparams.keys():
9564 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9565 del self.op.beparams[name]
9567 nic_defs = cluster.SimpleFillNIC({})
9568 for nic in self.op.nics:
9569 for name in constants.NICS_PARAMETERS:
9570 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9573 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9574 for name in self.op.osparams.keys():
9575 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9576 del self.op.osparams[name]
9578 def _CalculateFileStorageDir(self):
9579 """Calculate final instance file storage dir.
9582 # file storage dir calculation/check
9583 self.instance_file_storage_dir = None
9584 if self.op.disk_template in constants.DTS_FILEBASED:
9585 # build the full file storage dir path
9588 if self.op.disk_template == constants.DT_SHARED_FILE:
9589 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9591 get_fsd_fn = self.cfg.GetFileStorageDir
9593 cfg_storagedir = get_fsd_fn()
9594 if not cfg_storagedir:
9595 raise errors.OpPrereqError("Cluster file storage dir not defined")
9596 joinargs.append(cfg_storagedir)
9598 if self.op.file_storage_dir is not None:
9599 joinargs.append(self.op.file_storage_dir)
9601 joinargs.append(self.op.instance_name)
9603 # pylint: disable=W0142
9604 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9606 def CheckPrereq(self): # pylint: disable=R0914
9607 """Check prerequisites.
9610 self._CalculateFileStorageDir()
9612 if self.op.mode == constants.INSTANCE_IMPORT:
9613 export_info = self._ReadExportInfo()
9614 self._ReadExportParams(export_info)
9616 if (not self.cfg.GetVGName() and
9617 self.op.disk_template not in constants.DTS_NOT_LVM):
9618 raise errors.OpPrereqError("Cluster does not support lvm-based"
9619 " instances", errors.ECODE_STATE)
9621 if (self.op.hypervisor is None or
9622 self.op.hypervisor == constants.VALUE_AUTO):
9623 self.op.hypervisor = self.cfg.GetHypervisorType()
9625 cluster = self.cfg.GetClusterInfo()
9626 enabled_hvs = cluster.enabled_hypervisors
9627 if self.op.hypervisor not in enabled_hvs:
9628 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9629 " cluster (%s)" % (self.op.hypervisor,
9630 ",".join(enabled_hvs)),
9633 # Check tag validity
9634 for tag in self.op.tags:
9635 objects.TaggableObject.ValidateTag(tag)
9637 # check hypervisor parameter syntax (locally)
9638 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9639 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9641 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9642 hv_type.CheckParameterSyntax(filled_hvp)
9643 self.hv_full = filled_hvp
9644 # check that we don't specify global parameters on an instance
9645 _CheckGlobalHvParams(self.op.hvparams)
9647 # fill and remember the beparams dict
9648 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9649 for param, value in self.op.beparams.iteritems():
9650 if value == constants.VALUE_AUTO:
9651 self.op.beparams[param] = default_beparams[param]
9652 objects.UpgradeBeParams(self.op.beparams)
9653 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9654 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9656 # build os parameters
9657 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9659 # now that hvp/bep are in final format, let's reset to defaults,
9661 if self.op.identify_defaults:
9662 self._RevertToDefaults(cluster)
9666 for idx, nic in enumerate(self.op.nics):
9667 nic_mode_req = nic.get(constants.INIC_MODE, None)
9668 nic_mode = nic_mode_req
9669 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9670 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9672 # in routed mode, for the first nic, the default ip is 'auto'
9673 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9674 default_ip_mode = constants.VALUE_AUTO
9676 default_ip_mode = constants.VALUE_NONE
9678 # ip validity checks
9679 ip = nic.get(constants.INIC_IP, default_ip_mode)
9680 if ip is None or ip.lower() == constants.VALUE_NONE:
9682 elif ip.lower() == constants.VALUE_AUTO:
9683 if not self.op.name_check:
9684 raise errors.OpPrereqError("IP address set to auto but name checks"
9685 " have been skipped",
9687 nic_ip = self.hostname1.ip
9689 if not netutils.IPAddress.IsValid(ip):
9690 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9694 # TODO: check the ip address for uniqueness
9695 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9696 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9699 # MAC address verification
9700 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9701 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9702 mac = utils.NormalizeAndValidateMac(mac)
9705 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9706 except errors.ReservationError:
9707 raise errors.OpPrereqError("MAC address %s already in use"
9708 " in cluster" % mac,
9709 errors.ECODE_NOTUNIQUE)
9711 # Build nic parameters
9712 link = nic.get(constants.INIC_LINK, None)
9713 if link == constants.VALUE_AUTO:
9714 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9717 nicparams[constants.NIC_MODE] = nic_mode
9719 nicparams[constants.NIC_LINK] = link
9721 check_params = cluster.SimpleFillNIC(nicparams)
9722 objects.NIC.CheckParameterSyntax(check_params)
9723 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9725 # disk checks/pre-build
9726 default_vg = self.cfg.GetVGName()
9728 for disk in self.op.disks:
9729 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9730 if mode not in constants.DISK_ACCESS_SET:
9731 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9732 mode, errors.ECODE_INVAL)
9733 size = disk.get(constants.IDISK_SIZE, None)
9735 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9738 except (TypeError, ValueError):
9739 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9742 data_vg = disk.get(constants.IDISK_VG, default_vg)
9744 constants.IDISK_SIZE: size,
9745 constants.IDISK_MODE: mode,
9746 constants.IDISK_VG: data_vg,
9748 if constants.IDISK_METAVG in disk:
9749 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9750 if constants.IDISK_ADOPT in disk:
9751 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9752 self.disks.append(new_disk)
9754 if self.op.mode == constants.INSTANCE_IMPORT:
9756 for idx in range(len(self.disks)):
9757 option = "disk%d_dump" % idx
9758 if export_info.has_option(constants.INISECT_INS, option):
9759 # FIXME: are the old os-es, disk sizes, etc. useful?
9760 export_name = export_info.get(constants.INISECT_INS, option)
9761 image = utils.PathJoin(self.op.src_path, export_name)
9762 disk_images.append(image)
9764 disk_images.append(False)
9766 self.src_images = disk_images
9768 old_name = export_info.get(constants.INISECT_INS, "name")
9769 if self.op.instance_name == old_name:
9770 for idx, nic in enumerate(self.nics):
9771 if nic.mac == constants.VALUE_AUTO:
9772 nic_mac_ini = "nic%d_mac" % idx
9773 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9775 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9777 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9778 if self.op.ip_check:
9779 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9780 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9781 (self.check_ip, self.op.instance_name),
9782 errors.ECODE_NOTUNIQUE)
9784 #### mac address generation
9785 # By generating here the mac address both the allocator and the hooks get
9786 # the real final mac address rather than the 'auto' or 'generate' value.
9787 # There is a race condition between the generation and the instance object
9788 # creation, which means that we know the mac is valid now, but we're not
9789 # sure it will be when we actually add the instance. If things go bad
9790 # adding the instance will abort because of a duplicate mac, and the
9791 # creation job will fail.
9792 for nic in self.nics:
9793 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9794 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9798 if self.op.iallocator is not None:
9799 self._RunAllocator()
9801 # Release all unneeded node locks
9802 _ReleaseLocks(self, locking.LEVEL_NODE,
9803 keep=filter(None, [self.op.pnode, self.op.snode,
9805 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9806 keep=filter(None, [self.op.pnode, self.op.snode,
9809 #### node related checks
9811 # check primary node
9812 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9813 assert self.pnode is not None, \
9814 "Cannot retrieve locked node %s" % self.op.pnode
9816 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9817 pnode.name, errors.ECODE_STATE)
9819 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9820 pnode.name, errors.ECODE_STATE)
9821 if not pnode.vm_capable:
9822 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9823 " '%s'" % pnode.name, errors.ECODE_STATE)
9825 self.secondaries = []
9827 # mirror node verification
9828 if self.op.disk_template in constants.DTS_INT_MIRROR:
9829 if self.op.snode == pnode.name:
9830 raise errors.OpPrereqError("The secondary node cannot be the"
9831 " primary node", errors.ECODE_INVAL)
9832 _CheckNodeOnline(self, self.op.snode)
9833 _CheckNodeNotDrained(self, self.op.snode)
9834 _CheckNodeVmCapable(self, self.op.snode)
9835 self.secondaries.append(self.op.snode)
9837 snode = self.cfg.GetNodeInfo(self.op.snode)
9838 if pnode.group != snode.group:
9839 self.LogWarning("The primary and secondary nodes are in two"
9840 " different node groups; the disk parameters"
9841 " from the first disk's node group will be"
9844 nodenames = [pnode.name] + self.secondaries
9846 # Verify instance specs
9848 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9849 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9850 constants.ISPEC_DISK_COUNT: len(self.disks),
9851 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9852 constants.ISPEC_NIC_COUNT: len(self.nics),
9855 group_info = self.cfg.GetNodeGroup(pnode.group)
9856 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9857 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9858 if not self.op.ignore_ipolicy and res:
9859 raise errors.OpPrereqError(("Instance allocation to group %s violates"
9860 " policy: %s") % (pnode.group,
9861 utils.CommaJoin(res)),
9864 # disk parameters (not customizable at instance or node level)
9865 # just use the primary node parameters, ignoring the secondary.
9866 self.diskparams = group_info.diskparams
9868 if not self.adopt_disks:
9869 if self.op.disk_template == constants.DT_RBD:
9870 # _CheckRADOSFreeSpace() is just a placeholder.
9871 # Any function that checks prerequisites can be placed here.
9872 # Check if there is enough space on the RADOS cluster.
9873 _CheckRADOSFreeSpace()
9875 # Check lv size requirements, if not adopting
9876 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9877 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9879 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9880 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9881 disk[constants.IDISK_ADOPT])
9882 for disk in self.disks])
9883 if len(all_lvs) != len(self.disks):
9884 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9886 for lv_name in all_lvs:
9888 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9889 # to ReserveLV uses the same syntax
9890 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9891 except errors.ReservationError:
9892 raise errors.OpPrereqError("LV named %s used by another instance" %
9893 lv_name, errors.ECODE_NOTUNIQUE)
9895 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9896 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9898 node_lvs = self.rpc.call_lv_list([pnode.name],
9899 vg_names.payload.keys())[pnode.name]
9900 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9901 node_lvs = node_lvs.payload
9903 delta = all_lvs.difference(node_lvs.keys())
9905 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9906 utils.CommaJoin(delta),
9908 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9910 raise errors.OpPrereqError("Online logical volumes found, cannot"
9911 " adopt: %s" % utils.CommaJoin(online_lvs),
9913 # update the size of disk based on what is found
9914 for dsk in self.disks:
9915 dsk[constants.IDISK_SIZE] = \
9916 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9917 dsk[constants.IDISK_ADOPT])][0]))
9919 elif self.op.disk_template == constants.DT_BLOCK:
9920 # Normalize and de-duplicate device paths
9921 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9922 for disk in self.disks])
9923 if len(all_disks) != len(self.disks):
9924 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9926 baddisks = [d for d in all_disks
9927 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9929 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9930 " cannot be adopted" %
9931 (", ".join(baddisks),
9932 constants.ADOPTABLE_BLOCKDEV_ROOT),
9935 node_disks = self.rpc.call_bdev_sizes([pnode.name],
9936 list(all_disks))[pnode.name]
9937 node_disks.Raise("Cannot get block device information from node %s" %
9939 node_disks = node_disks.payload
9940 delta = all_disks.difference(node_disks.keys())
9942 raise errors.OpPrereqError("Missing block device(s): %s" %
9943 utils.CommaJoin(delta),
9945 for dsk in self.disks:
9946 dsk[constants.IDISK_SIZE] = \
9947 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9949 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9951 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9952 # check OS parameters (remotely)
9953 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9955 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9957 # memory check on primary node
9958 #TODO(dynmem): use MINMEM for checking
9960 _CheckNodeFreeMemory(self, self.pnode.name,
9961 "creating instance %s" % self.op.instance_name,
9962 self.be_full[constants.BE_MAXMEM],
9965 self.dry_run_result = list(nodenames)
9967 def Exec(self, feedback_fn):
9968 """Create and add the instance to the cluster.
9971 instance = self.op.instance_name
9972 pnode_name = self.pnode.name
9974 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9975 self.owned_locks(locking.LEVEL_NODE)), \
9976 "Node locks differ from node resource locks"
9978 ht_kind = self.op.hypervisor
9979 if ht_kind in constants.HTS_REQ_PORT:
9980 network_port = self.cfg.AllocatePort()
9984 disks = _GenerateDiskTemplate(self,
9985 self.op.disk_template,
9986 instance, pnode_name,
9989 self.instance_file_storage_dir,
9990 self.op.file_driver,
9995 iobj = objects.Instance(name=instance, os=self.op.os_type,
9996 primary_node=pnode_name,
9997 nics=self.nics, disks=disks,
9998 disk_template=self.op.disk_template,
9999 admin_state=constants.ADMINST_DOWN,
10000 network_port=network_port,
10001 beparams=self.op.beparams,
10002 hvparams=self.op.hvparams,
10003 hypervisor=self.op.hypervisor,
10004 osparams=self.op.osparams,
10008 for tag in self.op.tags:
10011 if self.adopt_disks:
10012 if self.op.disk_template == constants.DT_PLAIN:
10013 # rename LVs to the newly-generated names; we need to construct
10014 # 'fake' LV disks with the old data, plus the new unique_id
10015 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10017 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10018 rename_to.append(t_dsk.logical_id)
10019 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10020 self.cfg.SetDiskID(t_dsk, pnode_name)
10021 result = self.rpc.call_blockdev_rename(pnode_name,
10022 zip(tmp_disks, rename_to))
10023 result.Raise("Failed to rename adoped LVs")
10025 feedback_fn("* creating instance disks...")
10027 _CreateDisks(self, iobj)
10028 except errors.OpExecError:
10029 self.LogWarning("Device creation failed, reverting...")
10031 _RemoveDisks(self, iobj)
10033 self.cfg.ReleaseDRBDMinors(instance)
10036 feedback_fn("adding instance %s to cluster config" % instance)
10038 self.cfg.AddInstance(iobj, self.proc.GetECId())
10040 # Declare that we don't want to remove the instance lock anymore, as we've
10041 # added the instance to the config
10042 del self.remove_locks[locking.LEVEL_INSTANCE]
10044 if self.op.mode == constants.INSTANCE_IMPORT:
10045 # Release unused nodes
10046 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10048 # Release all nodes
10049 _ReleaseLocks(self, locking.LEVEL_NODE)
10052 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10053 feedback_fn("* wiping instance disks...")
10055 _WipeDisks(self, iobj)
10056 except errors.OpExecError, err:
10057 logging.exception("Wiping disks failed")
10058 self.LogWarning("Wiping instance disks failed (%s)", err)
10062 # Something is already wrong with the disks, don't do anything else
10064 elif self.op.wait_for_sync:
10065 disk_abort = not _WaitForSync(self, iobj)
10066 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10067 # make sure the disks are not degraded (still sync-ing is ok)
10068 feedback_fn("* checking mirrors status")
10069 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10074 _RemoveDisks(self, iobj)
10075 self.cfg.RemoveInstance(iobj.name)
10076 # Make sure the instance lock gets removed
10077 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10078 raise errors.OpExecError("There are some degraded disks for"
10081 # Release all node resource locks
10082 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10084 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10085 if self.op.mode == constants.INSTANCE_CREATE:
10086 if not self.op.no_install:
10087 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10088 not self.op.wait_for_sync)
10090 feedback_fn("* pausing disk sync to install instance OS")
10091 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10093 for idx, success in enumerate(result.payload):
10095 logging.warn("pause-sync of instance %s for disk %d failed",
10098 feedback_fn("* running the instance OS create scripts...")
10099 # FIXME: pass debug option from opcode to backend
10101 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10102 self.op.debug_level)
10104 feedback_fn("* resuming disk sync")
10105 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10107 for idx, success in enumerate(result.payload):
10109 logging.warn("resume-sync of instance %s for disk %d failed",
10112 os_add_result.Raise("Could not add os for instance %s"
10113 " on node %s" % (instance, pnode_name))
10115 elif self.op.mode == constants.INSTANCE_IMPORT:
10116 feedback_fn("* running the instance OS import scripts...")
10120 for idx, image in enumerate(self.src_images):
10124 # FIXME: pass debug option from opcode to backend
10125 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10126 constants.IEIO_FILE, (image, ),
10127 constants.IEIO_SCRIPT,
10128 (iobj.disks[idx], idx),
10130 transfers.append(dt)
10133 masterd.instance.TransferInstanceData(self, feedback_fn,
10134 self.op.src_node, pnode_name,
10135 self.pnode.secondary_ip,
10137 if not compat.all(import_result):
10138 self.LogWarning("Some disks for instance %s on node %s were not"
10139 " imported successfully" % (instance, pnode_name))
10141 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10142 feedback_fn("* preparing remote import...")
10143 # The source cluster will stop the instance before attempting to make a
10144 # connection. In some cases stopping an instance can take a long time,
10145 # hence the shutdown timeout is added to the connection timeout.
10146 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10147 self.op.source_shutdown_timeout)
10148 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10150 assert iobj.primary_node == self.pnode.name
10152 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10153 self.source_x509_ca,
10154 self._cds, timeouts)
10155 if not compat.all(disk_results):
10156 # TODO: Should the instance still be started, even if some disks
10157 # failed to import (valid for local imports, too)?
10158 self.LogWarning("Some disks for instance %s on node %s were not"
10159 " imported successfully" % (instance, pnode_name))
10161 # Run rename script on newly imported instance
10162 assert iobj.name == instance
10163 feedback_fn("Running rename script for %s" % instance)
10164 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10165 self.source_instance_name,
10166 self.op.debug_level)
10167 if result.fail_msg:
10168 self.LogWarning("Failed to run rename script for %s on node"
10169 " %s: %s" % (instance, pnode_name, result.fail_msg))
10172 # also checked in the prereq part
10173 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10176 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10179 iobj.admin_state = constants.ADMINST_UP
10180 self.cfg.Update(iobj, feedback_fn)
10181 logging.info("Starting instance %s on node %s", instance, pnode_name)
10182 feedback_fn("* starting instance...")
10183 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10185 result.Raise("Could not start instance")
10187 return list(iobj.all_nodes)
10190 def _CheckRADOSFreeSpace():
10191 """Compute disk size requirements inside the RADOS cluster.
10194 # For the RADOS cluster we assume there is always enough space.
10198 class LUInstanceConsole(NoHooksLU):
10199 """Connect to an instance's console.
10201 This is somewhat special in that it returns the command line that
10202 you need to run on the master node in order to connect to the
10208 def ExpandNames(self):
10209 self.share_locks = _ShareAll()
10210 self._ExpandAndLockInstance()
10212 def CheckPrereq(self):
10213 """Check prerequisites.
10215 This checks that the instance is in the cluster.
10218 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10219 assert self.instance is not None, \
10220 "Cannot retrieve locked instance %s" % self.op.instance_name
10221 _CheckNodeOnline(self, self.instance.primary_node)
10223 def Exec(self, feedback_fn):
10224 """Connect to the console of an instance
10227 instance = self.instance
10228 node = instance.primary_node
10230 node_insts = self.rpc.call_instance_list([node],
10231 [instance.hypervisor])[node]
10232 node_insts.Raise("Can't get node information from %s" % node)
10234 if instance.name not in node_insts.payload:
10235 if instance.admin_state == constants.ADMINST_UP:
10236 state = constants.INSTST_ERRORDOWN
10237 elif instance.admin_state == constants.ADMINST_DOWN:
10238 state = constants.INSTST_ADMINDOWN
10240 state = constants.INSTST_ADMINOFFLINE
10241 raise errors.OpExecError("Instance %s is not running (state %s)" %
10242 (instance.name, state))
10244 logging.debug("Connecting to console of %s on %s", instance.name, node)
10246 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10249 def _GetInstanceConsole(cluster, instance):
10250 """Returns console information for an instance.
10252 @type cluster: L{objects.Cluster}
10253 @type instance: L{objects.Instance}
10257 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10258 # beparams and hvparams are passed separately, to avoid editing the
10259 # instance and then saving the defaults in the instance itself.
10260 hvparams = cluster.FillHV(instance)
10261 beparams = cluster.FillBE(instance)
10262 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10264 assert console.instance == instance.name
10265 assert console.Validate()
10267 return console.ToDict()
10270 class LUInstanceReplaceDisks(LogicalUnit):
10271 """Replace the disks of an instance.
10274 HPATH = "mirrors-replace"
10275 HTYPE = constants.HTYPE_INSTANCE
10278 def CheckArguments(self):
10279 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10280 self.op.iallocator)
10282 def ExpandNames(self):
10283 self._ExpandAndLockInstance()
10285 assert locking.LEVEL_NODE not in self.needed_locks
10286 assert locking.LEVEL_NODE_RES not in self.needed_locks
10287 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10289 assert self.op.iallocator is None or self.op.remote_node is None, \
10290 "Conflicting options"
10292 if self.op.remote_node is not None:
10293 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10295 # Warning: do not remove the locking of the new secondary here
10296 # unless DRBD8.AddChildren is changed to work in parallel;
10297 # currently it doesn't since parallel invocations of
10298 # FindUnusedMinor will conflict
10299 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10300 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10302 self.needed_locks[locking.LEVEL_NODE] = []
10303 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10305 if self.op.iallocator is not None:
10306 # iallocator will select a new node in the same group
10307 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10309 self.needed_locks[locking.LEVEL_NODE_RES] = []
10311 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10312 self.op.iallocator, self.op.remote_node,
10313 self.op.disks, False, self.op.early_release,
10314 self.op.ignore_ipolicy)
10316 self.tasklets = [self.replacer]
10318 def DeclareLocks(self, level):
10319 if level == locking.LEVEL_NODEGROUP:
10320 assert self.op.remote_node is None
10321 assert self.op.iallocator is not None
10322 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10324 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10325 # Lock all groups used by instance optimistically; this requires going
10326 # via the node before it's locked, requiring verification later on
10327 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10328 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10330 elif level == locking.LEVEL_NODE:
10331 if self.op.iallocator is not None:
10332 assert self.op.remote_node is None
10333 assert not self.needed_locks[locking.LEVEL_NODE]
10335 # Lock member nodes of all locked groups
10336 self.needed_locks[locking.LEVEL_NODE] = [node_name
10337 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10338 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10340 self._LockInstancesNodes()
10341 elif level == locking.LEVEL_NODE_RES:
10343 self.needed_locks[locking.LEVEL_NODE_RES] = \
10344 self.needed_locks[locking.LEVEL_NODE]
10346 def BuildHooksEnv(self):
10347 """Build hooks env.
10349 This runs on the master, the primary and all the secondaries.
10352 instance = self.replacer.instance
10354 "MODE": self.op.mode,
10355 "NEW_SECONDARY": self.op.remote_node,
10356 "OLD_SECONDARY": instance.secondary_nodes[0],
10358 env.update(_BuildInstanceHookEnvByObject(self, instance))
10361 def BuildHooksNodes(self):
10362 """Build hooks nodes.
10365 instance = self.replacer.instance
10367 self.cfg.GetMasterNode(),
10368 instance.primary_node,
10370 if self.op.remote_node is not None:
10371 nl.append(self.op.remote_node)
10374 def CheckPrereq(self):
10375 """Check prerequisites.
10378 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10379 self.op.iallocator is None)
10381 # Verify if node group locks are still correct
10382 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10384 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10386 return LogicalUnit.CheckPrereq(self)
10389 class TLReplaceDisks(Tasklet):
10390 """Replaces disks for an instance.
10392 Note: Locking is not within the scope of this class.
10395 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10396 disks, delay_iallocator, early_release, ignore_ipolicy):
10397 """Initializes this class.
10400 Tasklet.__init__(self, lu)
10403 self.instance_name = instance_name
10405 self.iallocator_name = iallocator_name
10406 self.remote_node = remote_node
10408 self.delay_iallocator = delay_iallocator
10409 self.early_release = early_release
10410 self.ignore_ipolicy = ignore_ipolicy
10413 self.instance = None
10414 self.new_node = None
10415 self.target_node = None
10416 self.other_node = None
10417 self.remote_node_info = None
10418 self.node_secondary_ip = None
10421 def CheckArguments(mode, remote_node, iallocator):
10422 """Helper function for users of this class.
10425 # check for valid parameter combination
10426 if mode == constants.REPLACE_DISK_CHG:
10427 if remote_node is None and iallocator is None:
10428 raise errors.OpPrereqError("When changing the secondary either an"
10429 " iallocator script must be used or the"
10430 " new node given", errors.ECODE_INVAL)
10432 if remote_node is not None and iallocator is not None:
10433 raise errors.OpPrereqError("Give either the iallocator or the new"
10434 " secondary, not both", errors.ECODE_INVAL)
10436 elif remote_node is not None or iallocator is not None:
10437 # Not replacing the secondary
10438 raise errors.OpPrereqError("The iallocator and new node options can"
10439 " only be used when changing the"
10440 " secondary node", errors.ECODE_INVAL)
10443 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10444 """Compute a new secondary node using an IAllocator.
10447 ial = IAllocator(lu.cfg, lu.rpc,
10448 mode=constants.IALLOCATOR_MODE_RELOC,
10449 name=instance_name,
10450 relocate_from=list(relocate_from))
10452 ial.Run(iallocator_name)
10454 if not ial.success:
10455 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10456 " %s" % (iallocator_name, ial.info),
10457 errors.ECODE_NORES)
10459 if len(ial.result) != ial.required_nodes:
10460 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10461 " of nodes (%s), required %s" %
10463 len(ial.result), ial.required_nodes),
10464 errors.ECODE_FAULT)
10466 remote_node_name = ial.result[0]
10468 lu.LogInfo("Selected new secondary for instance '%s': %s",
10469 instance_name, remote_node_name)
10471 return remote_node_name
10473 def _FindFaultyDisks(self, node_name):
10474 """Wrapper for L{_FindFaultyInstanceDisks}.
10477 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10480 def _CheckDisksActivated(self, instance):
10481 """Checks if the instance disks are activated.
10483 @param instance: The instance to check disks
10484 @return: True if they are activated, False otherwise
10487 nodes = instance.all_nodes
10489 for idx, dev in enumerate(instance.disks):
10491 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10492 self.cfg.SetDiskID(dev, node)
10494 result = self.rpc.call_blockdev_find(node, dev)
10498 elif result.fail_msg or not result.payload:
10503 def CheckPrereq(self):
10504 """Check prerequisites.
10506 This checks that the instance is in the cluster.
10509 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10510 assert instance is not None, \
10511 "Cannot retrieve locked instance %s" % self.instance_name
10513 if instance.disk_template != constants.DT_DRBD8:
10514 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10515 " instances", errors.ECODE_INVAL)
10517 if len(instance.secondary_nodes) != 1:
10518 raise errors.OpPrereqError("The instance has a strange layout,"
10519 " expected one secondary but found %d" %
10520 len(instance.secondary_nodes),
10521 errors.ECODE_FAULT)
10523 if not self.delay_iallocator:
10524 self._CheckPrereq2()
10526 def _CheckPrereq2(self):
10527 """Check prerequisites, second part.
10529 This function should always be part of CheckPrereq. It was separated and is
10530 now called from Exec because during node evacuation iallocator was only
10531 called with an unmodified cluster model, not taking planned changes into
10535 instance = self.instance
10536 secondary_node = instance.secondary_nodes[0]
10538 if self.iallocator_name is None:
10539 remote_node = self.remote_node
10541 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10542 instance.name, instance.secondary_nodes)
10544 if remote_node is None:
10545 self.remote_node_info = None
10547 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10548 "Remote node '%s' is not locked" % remote_node
10550 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10551 assert self.remote_node_info is not None, \
10552 "Cannot retrieve locked node %s" % remote_node
10554 if remote_node == self.instance.primary_node:
10555 raise errors.OpPrereqError("The specified node is the primary node of"
10556 " the instance", errors.ECODE_INVAL)
10558 if remote_node == secondary_node:
10559 raise errors.OpPrereqError("The specified node is already the"
10560 " secondary node of the instance",
10561 errors.ECODE_INVAL)
10563 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10564 constants.REPLACE_DISK_CHG):
10565 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10566 errors.ECODE_INVAL)
10568 if self.mode == constants.REPLACE_DISK_AUTO:
10569 if not self._CheckDisksActivated(instance):
10570 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10571 " first" % self.instance_name,
10572 errors.ECODE_STATE)
10573 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10574 faulty_secondary = self._FindFaultyDisks(secondary_node)
10576 if faulty_primary and faulty_secondary:
10577 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10578 " one node and can not be repaired"
10579 " automatically" % self.instance_name,
10580 errors.ECODE_STATE)
10583 self.disks = faulty_primary
10584 self.target_node = instance.primary_node
10585 self.other_node = secondary_node
10586 check_nodes = [self.target_node, self.other_node]
10587 elif faulty_secondary:
10588 self.disks = faulty_secondary
10589 self.target_node = secondary_node
10590 self.other_node = instance.primary_node
10591 check_nodes = [self.target_node, self.other_node]
10597 # Non-automatic modes
10598 if self.mode == constants.REPLACE_DISK_PRI:
10599 self.target_node = instance.primary_node
10600 self.other_node = secondary_node
10601 check_nodes = [self.target_node, self.other_node]
10603 elif self.mode == constants.REPLACE_DISK_SEC:
10604 self.target_node = secondary_node
10605 self.other_node = instance.primary_node
10606 check_nodes = [self.target_node, self.other_node]
10608 elif self.mode == constants.REPLACE_DISK_CHG:
10609 self.new_node = remote_node
10610 self.other_node = instance.primary_node
10611 self.target_node = secondary_node
10612 check_nodes = [self.new_node, self.other_node]
10614 _CheckNodeNotDrained(self.lu, remote_node)
10615 _CheckNodeVmCapable(self.lu, remote_node)
10617 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10618 assert old_node_info is not None
10619 if old_node_info.offline and not self.early_release:
10620 # doesn't make sense to delay the release
10621 self.early_release = True
10622 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10623 " early-release mode", secondary_node)
10626 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10629 # If not specified all disks should be replaced
10631 self.disks = range(len(self.instance.disks))
10633 # TODO: This is ugly, but right now we can't distinguish between internal
10634 # submitted opcode and external one. We should fix that.
10635 if self.remote_node_info:
10636 # We change the node, lets verify it still meets instance policy
10637 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10638 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10640 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10641 ignore=self.ignore_ipolicy)
10643 # TODO: compute disk parameters
10644 primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10645 secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10646 if primary_node_info.group != secondary_node_info.group:
10647 self.lu.LogInfo("The instance primary and secondary nodes are in two"
10648 " different node groups; the disk parameters of the"
10649 " primary node's group will be applied.")
10651 self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10653 for node in check_nodes:
10654 _CheckNodeOnline(self.lu, node)
10656 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10659 if node_name is not None)
10661 # Release unneeded node and node resource locks
10662 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10663 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10665 # Release any owned node group
10666 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10667 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10669 # Check whether disks are valid
10670 for disk_idx in self.disks:
10671 instance.FindDisk(disk_idx)
10673 # Get secondary node IP addresses
10674 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10675 in self.cfg.GetMultiNodeInfo(touched_nodes))
10677 def Exec(self, feedback_fn):
10678 """Execute disk replacement.
10680 This dispatches the disk replacement to the appropriate handler.
10683 if self.delay_iallocator:
10684 self._CheckPrereq2()
10687 # Verify owned locks before starting operation
10688 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10689 assert set(owned_nodes) == set(self.node_secondary_ip), \
10690 ("Incorrect node locks, owning %s, expected %s" %
10691 (owned_nodes, self.node_secondary_ip.keys()))
10692 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10693 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10695 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10696 assert list(owned_instances) == [self.instance_name], \
10697 "Instance '%s' not locked" % self.instance_name
10699 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10700 "Should not own any node group lock at this point"
10703 feedback_fn("No disks need replacement")
10706 feedback_fn("Replacing disk(s) %s for %s" %
10707 (utils.CommaJoin(self.disks), self.instance.name))
10709 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10711 # Activate the instance disks if we're replacing them on a down instance
10713 _StartInstanceDisks(self.lu, self.instance, True)
10716 # Should we replace the secondary node?
10717 if self.new_node is not None:
10718 fn = self._ExecDrbd8Secondary
10720 fn = self._ExecDrbd8DiskOnly
10722 result = fn(feedback_fn)
10724 # Deactivate the instance disks if we're replacing them on a
10727 _SafeShutdownInstanceDisks(self.lu, self.instance)
10729 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10732 # Verify owned locks
10733 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10734 nodes = frozenset(self.node_secondary_ip)
10735 assert ((self.early_release and not owned_nodes) or
10736 (not self.early_release and not (set(owned_nodes) - nodes))), \
10737 ("Not owning the correct locks, early_release=%s, owned=%r,"
10738 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10742 def _CheckVolumeGroup(self, nodes):
10743 self.lu.LogInfo("Checking volume groups")
10745 vgname = self.cfg.GetVGName()
10747 # Make sure volume group exists on all involved nodes
10748 results = self.rpc.call_vg_list(nodes)
10750 raise errors.OpExecError("Can't list volume groups on the nodes")
10753 res = results[node]
10754 res.Raise("Error checking node %s" % node)
10755 if vgname not in res.payload:
10756 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10759 def _CheckDisksExistence(self, nodes):
10760 # Check disk existence
10761 for idx, dev in enumerate(self.instance.disks):
10762 if idx not in self.disks:
10766 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10767 self.cfg.SetDiskID(dev, node)
10769 result = self.rpc.call_blockdev_find(node, dev)
10771 msg = result.fail_msg
10772 if msg or not result.payload:
10774 msg = "disk not found"
10775 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10778 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10779 for idx, dev in enumerate(self.instance.disks):
10780 if idx not in self.disks:
10783 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10786 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10788 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10789 " replace disks for instance %s" %
10790 (node_name, self.instance.name))
10792 def _CreateNewStorage(self, node_name):
10793 """Create new storage on the primary or secondary node.
10795 This is only used for same-node replaces, not for changing the
10796 secondary node, hence we don't want to modify the existing disk.
10801 for idx, dev in enumerate(self.instance.disks):
10802 if idx not in self.disks:
10805 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10807 self.cfg.SetDiskID(dev, node_name)
10809 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10810 names = _GenerateUniqueNames(self.lu, lv_names)
10812 _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10814 vg_data = dev.children[0].logical_id[0]
10815 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10816 logical_id=(vg_data, names[0]), params=data_p)
10817 vg_meta = dev.children[1].logical_id[0]
10818 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10819 logical_id=(vg_meta, names[1]), params=meta_p)
10821 new_lvs = [lv_data, lv_meta]
10822 old_lvs = [child.Copy() for child in dev.children]
10823 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10825 # we pass force_create=True to force the LVM creation
10826 for new_lv in new_lvs:
10827 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10828 _GetInstanceInfoText(self.instance), False)
10832 def _CheckDevices(self, node_name, iv_names):
10833 for name, (dev, _, _) in iv_names.iteritems():
10834 self.cfg.SetDiskID(dev, node_name)
10836 result = self.rpc.call_blockdev_find(node_name, dev)
10838 msg = result.fail_msg
10839 if msg or not result.payload:
10841 msg = "disk not found"
10842 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10845 if result.payload.is_degraded:
10846 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10848 def _RemoveOldStorage(self, node_name, iv_names):
10849 for name, (_, old_lvs, _) in iv_names.iteritems():
10850 self.lu.LogInfo("Remove logical volumes for %s" % name)
10853 self.cfg.SetDiskID(lv, node_name)
10855 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10857 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10858 hint="remove unused LVs manually")
10860 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10861 """Replace a disk on the primary or secondary for DRBD 8.
10863 The algorithm for replace is quite complicated:
10865 1. for each disk to be replaced:
10867 1. create new LVs on the target node with unique names
10868 1. detach old LVs from the drbd device
10869 1. rename old LVs to name_replaced.<time_t>
10870 1. rename new LVs to old LVs
10871 1. attach the new LVs (with the old names now) to the drbd device
10873 1. wait for sync across all devices
10875 1. for each modified disk:
10877 1. remove old LVs (which have the name name_replaces.<time_t>)
10879 Failures are not very well handled.
10884 # Step: check device activation
10885 self.lu.LogStep(1, steps_total, "Check device existence")
10886 self._CheckDisksExistence([self.other_node, self.target_node])
10887 self._CheckVolumeGroup([self.target_node, self.other_node])
10889 # Step: check other node consistency
10890 self.lu.LogStep(2, steps_total, "Check peer consistency")
10891 self._CheckDisksConsistency(self.other_node,
10892 self.other_node == self.instance.primary_node,
10895 # Step: create new storage
10896 self.lu.LogStep(3, steps_total, "Allocate new storage")
10897 iv_names = self._CreateNewStorage(self.target_node)
10899 # Step: for each lv, detach+rename*2+attach
10900 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10901 for dev, old_lvs, new_lvs in iv_names.itervalues():
10902 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10904 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10906 result.Raise("Can't detach drbd from local storage on node"
10907 " %s for device %s" % (self.target_node, dev.iv_name))
10909 #cfg.Update(instance)
10911 # ok, we created the new LVs, so now we know we have the needed
10912 # storage; as such, we proceed on the target node to rename
10913 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10914 # using the assumption that logical_id == physical_id (which in
10915 # turn is the unique_id on that node)
10917 # FIXME(iustin): use a better name for the replaced LVs
10918 temp_suffix = int(time.time())
10919 ren_fn = lambda d, suff: (d.physical_id[0],
10920 d.physical_id[1] + "_replaced-%s" % suff)
10922 # Build the rename list based on what LVs exist on the node
10923 rename_old_to_new = []
10924 for to_ren in old_lvs:
10925 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10926 if not result.fail_msg and result.payload:
10928 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10930 self.lu.LogInfo("Renaming the old LVs on the target node")
10931 result = self.rpc.call_blockdev_rename(self.target_node,
10933 result.Raise("Can't rename old LVs on node %s" % self.target_node)
10935 # Now we rename the new LVs to the old LVs
10936 self.lu.LogInfo("Renaming the new LVs on the target node")
10937 rename_new_to_old = [(new, old.physical_id)
10938 for old, new in zip(old_lvs, new_lvs)]
10939 result = self.rpc.call_blockdev_rename(self.target_node,
10941 result.Raise("Can't rename new LVs on node %s" % self.target_node)
10943 # Intermediate steps of in memory modifications
10944 for old, new in zip(old_lvs, new_lvs):
10945 new.logical_id = old.logical_id
10946 self.cfg.SetDiskID(new, self.target_node)
10948 # We need to modify old_lvs so that removal later removes the
10949 # right LVs, not the newly added ones; note that old_lvs is a
10951 for disk in old_lvs:
10952 disk.logical_id = ren_fn(disk, temp_suffix)
10953 self.cfg.SetDiskID(disk, self.target_node)
10955 # Now that the new lvs have the old name, we can add them to the device
10956 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10957 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10959 msg = result.fail_msg
10961 for new_lv in new_lvs:
10962 msg2 = self.rpc.call_blockdev_remove(self.target_node,
10965 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10966 hint=("cleanup manually the unused logical"
10968 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10970 cstep = itertools.count(5)
10972 if self.early_release:
10973 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10974 self._RemoveOldStorage(self.target_node, iv_names)
10975 # TODO: Check if releasing locks early still makes sense
10976 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10978 # Release all resource locks except those used by the instance
10979 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10980 keep=self.node_secondary_ip.keys())
10982 # Release all node locks while waiting for sync
10983 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10985 # TODO: Can the instance lock be downgraded here? Take the optional disk
10986 # shutdown in the caller into consideration.
10989 # This can fail as the old devices are degraded and _WaitForSync
10990 # does a combined result over all disks, so we don't check its return value
10991 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10992 _WaitForSync(self.lu, self.instance)
10994 # Check all devices manually
10995 self._CheckDevices(self.instance.primary_node, iv_names)
10997 # Step: remove old storage
10998 if not self.early_release:
10999 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11000 self._RemoveOldStorage(self.target_node, iv_names)
11002 def _ExecDrbd8Secondary(self, feedback_fn):
11003 """Replace the secondary node for DRBD 8.
11005 The algorithm for replace is quite complicated:
11006 - for all disks of the instance:
11007 - create new LVs on the new node with same names
11008 - shutdown the drbd device on the old secondary
11009 - disconnect the drbd network on the primary
11010 - create the drbd device on the new secondary
11011 - network attach the drbd on the primary, using an artifice:
11012 the drbd code for Attach() will connect to the network if it
11013 finds a device which is connected to the good local disks but
11014 not network enabled
11015 - wait for sync across all devices
11016 - remove all disks from the old secondary
11018 Failures are not very well handled.
11023 pnode = self.instance.primary_node
11025 # Step: check device activation
11026 self.lu.LogStep(1, steps_total, "Check device existence")
11027 self._CheckDisksExistence([self.instance.primary_node])
11028 self._CheckVolumeGroup([self.instance.primary_node])
11030 # Step: check other node consistency
11031 self.lu.LogStep(2, steps_total, "Check peer consistency")
11032 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11034 # Step: create new storage
11035 self.lu.LogStep(3, steps_total, "Allocate new storage")
11036 for idx, dev in enumerate(self.instance.disks):
11037 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11038 (self.new_node, idx))
11039 # we pass force_create=True to force LVM creation
11040 for new_lv in dev.children:
11041 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
11042 _GetInstanceInfoText(self.instance), False)
11044 # Step 4: dbrd minors and drbd setups changes
11045 # after this, we must manually remove the drbd minors on both the
11046 # error and the success paths
11047 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11048 minors = self.cfg.AllocateDRBDMinor([self.new_node
11049 for dev in self.instance.disks],
11050 self.instance.name)
11051 logging.debug("Allocated minors %r", minors)
11054 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11055 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11056 (self.new_node, idx))
11057 # create new devices on new_node; note that we create two IDs:
11058 # one without port, so the drbd will be activated without
11059 # networking information on the new node at this stage, and one
11060 # with network, for the latter activation in step 4
11061 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11062 if self.instance.primary_node == o_node1:
11065 assert self.instance.primary_node == o_node2, "Three-node instance?"
11068 new_alone_id = (self.instance.primary_node, self.new_node, None,
11069 p_minor, new_minor, o_secret)
11070 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11071 p_minor, new_minor, o_secret)
11073 iv_names[idx] = (dev, dev.children, new_net_id)
11074 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11076 drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
11077 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11078 logical_id=new_alone_id,
11079 children=dev.children,
11081 params=drbd_params)
11083 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
11084 _GetInstanceInfoText(self.instance), False)
11085 except errors.GenericError:
11086 self.cfg.ReleaseDRBDMinors(self.instance.name)
11089 # We have new devices, shutdown the drbd on the old secondary
11090 for idx, dev in enumerate(self.instance.disks):
11091 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11092 self.cfg.SetDiskID(dev, self.target_node)
11093 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
11095 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11096 "node: %s" % (idx, msg),
11097 hint=("Please cleanup this device manually as"
11098 " soon as possible"))
11100 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11101 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11102 self.instance.disks)[pnode]
11104 msg = result.fail_msg
11106 # detaches didn't succeed (unlikely)
11107 self.cfg.ReleaseDRBDMinors(self.instance.name)
11108 raise errors.OpExecError("Can't detach the disks from the network on"
11109 " old node: %s" % (msg,))
11111 # if we managed to detach at least one, we update all the disks of
11112 # the instance to point to the new secondary
11113 self.lu.LogInfo("Updating instance configuration")
11114 for dev, _, new_logical_id in iv_names.itervalues():
11115 dev.logical_id = new_logical_id
11116 self.cfg.SetDiskID(dev, self.instance.primary_node)
11118 self.cfg.Update(self.instance, feedback_fn)
11120 # Release all node locks (the configuration has been updated)
11121 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11123 # and now perform the drbd attach
11124 self.lu.LogInfo("Attaching primary drbds to new secondary"
11125 " (standalone => connected)")
11126 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11128 self.node_secondary_ip,
11129 self.instance.disks,
11130 self.instance.name,
11132 for to_node, to_result in result.items():
11133 msg = to_result.fail_msg
11135 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11137 hint=("please do a gnt-instance info to see the"
11138 " status of disks"))
11140 cstep = itertools.count(5)
11142 if self.early_release:
11143 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11144 self._RemoveOldStorage(self.target_node, iv_names)
11145 # TODO: Check if releasing locks early still makes sense
11146 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11148 # Release all resource locks except those used by the instance
11149 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11150 keep=self.node_secondary_ip.keys())
11152 # TODO: Can the instance lock be downgraded here? Take the optional disk
11153 # shutdown in the caller into consideration.
11156 # This can fail as the old devices are degraded and _WaitForSync
11157 # does a combined result over all disks, so we don't check its return value
11158 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11159 _WaitForSync(self.lu, self.instance)
11161 # Check all devices manually
11162 self._CheckDevices(self.instance.primary_node, iv_names)
11164 # Step: remove old storage
11165 if not self.early_release:
11166 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11167 self._RemoveOldStorage(self.target_node, iv_names)
11170 class LURepairNodeStorage(NoHooksLU):
11171 """Repairs the volume group on a node.
11176 def CheckArguments(self):
11177 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11179 storage_type = self.op.storage_type
11181 if (constants.SO_FIX_CONSISTENCY not in
11182 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11183 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11184 " repaired" % storage_type,
11185 errors.ECODE_INVAL)
11187 def ExpandNames(self):
11188 self.needed_locks = {
11189 locking.LEVEL_NODE: [self.op.node_name],
11192 def _CheckFaultyDisks(self, instance, node_name):
11193 """Ensure faulty disks abort the opcode or at least warn."""
11195 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11197 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11198 " node '%s'" % (instance.name, node_name),
11199 errors.ECODE_STATE)
11200 except errors.OpPrereqError, err:
11201 if self.op.ignore_consistency:
11202 self.proc.LogWarning(str(err.args[0]))
11206 def CheckPrereq(self):
11207 """Check prerequisites.
11210 # Check whether any instance on this node has faulty disks
11211 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11212 if inst.admin_state != constants.ADMINST_UP:
11214 check_nodes = set(inst.all_nodes)
11215 check_nodes.discard(self.op.node_name)
11216 for inst_node_name in check_nodes:
11217 self._CheckFaultyDisks(inst, inst_node_name)
11219 def Exec(self, feedback_fn):
11220 feedback_fn("Repairing storage unit '%s' on %s ..." %
11221 (self.op.name, self.op.node_name))
11223 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11224 result = self.rpc.call_storage_execute(self.op.node_name,
11225 self.op.storage_type, st_args,
11227 constants.SO_FIX_CONSISTENCY)
11228 result.Raise("Failed to repair storage unit '%s' on %s" %
11229 (self.op.name, self.op.node_name))
11232 class LUNodeEvacuate(NoHooksLU):
11233 """Evacuates instances off a list of nodes.
11238 _MODE2IALLOCATOR = {
11239 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11240 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11241 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11243 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11244 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11245 constants.IALLOCATOR_NEVAC_MODES)
11247 def CheckArguments(self):
11248 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11250 def ExpandNames(self):
11251 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11253 if self.op.remote_node is not None:
11254 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11255 assert self.op.remote_node
11257 if self.op.remote_node == self.op.node_name:
11258 raise errors.OpPrereqError("Can not use evacuated node as a new"
11259 " secondary node", errors.ECODE_INVAL)
11261 if self.op.mode != constants.NODE_EVAC_SEC:
11262 raise errors.OpPrereqError("Without the use of an iallocator only"
11263 " secondary instances can be evacuated",
11264 errors.ECODE_INVAL)
11267 self.share_locks = _ShareAll()
11268 self.needed_locks = {
11269 locking.LEVEL_INSTANCE: [],
11270 locking.LEVEL_NODEGROUP: [],
11271 locking.LEVEL_NODE: [],
11274 # Determine nodes (via group) optimistically, needs verification once locks
11275 # have been acquired
11276 self.lock_nodes = self._DetermineNodes()
11278 def _DetermineNodes(self):
11279 """Gets the list of nodes to operate on.
11282 if self.op.remote_node is None:
11283 # Iallocator will choose any node(s) in the same group
11284 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11286 group_nodes = frozenset([self.op.remote_node])
11288 # Determine nodes to be locked
11289 return set([self.op.node_name]) | group_nodes
11291 def _DetermineInstances(self):
11292 """Builds list of instances to operate on.
11295 assert self.op.mode in constants.NODE_EVAC_MODES
11297 if self.op.mode == constants.NODE_EVAC_PRI:
11298 # Primary instances only
11299 inst_fn = _GetNodePrimaryInstances
11300 assert self.op.remote_node is None, \
11301 "Evacuating primary instances requires iallocator"
11302 elif self.op.mode == constants.NODE_EVAC_SEC:
11303 # Secondary instances only
11304 inst_fn = _GetNodeSecondaryInstances
11307 assert self.op.mode == constants.NODE_EVAC_ALL
11308 inst_fn = _GetNodeInstances
11309 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11311 raise errors.OpPrereqError("Due to an issue with the iallocator"
11312 " interface it is not possible to evacuate"
11313 " all instances at once; specify explicitly"
11314 " whether to evacuate primary or secondary"
11316 errors.ECODE_INVAL)
11318 return inst_fn(self.cfg, self.op.node_name)
11320 def DeclareLocks(self, level):
11321 if level == locking.LEVEL_INSTANCE:
11322 # Lock instances optimistically, needs verification once node and group
11323 # locks have been acquired
11324 self.needed_locks[locking.LEVEL_INSTANCE] = \
11325 set(i.name for i in self._DetermineInstances())
11327 elif level == locking.LEVEL_NODEGROUP:
11328 # Lock node groups for all potential target nodes optimistically, needs
11329 # verification once nodes have been acquired
11330 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11331 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11333 elif level == locking.LEVEL_NODE:
11334 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11336 def CheckPrereq(self):
11338 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11339 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11340 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11342 need_nodes = self._DetermineNodes()
11344 if not owned_nodes.issuperset(need_nodes):
11345 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11346 " locks were acquired, current nodes are"
11347 " are '%s', used to be '%s'; retry the"
11349 (self.op.node_name,
11350 utils.CommaJoin(need_nodes),
11351 utils.CommaJoin(owned_nodes)),
11352 errors.ECODE_STATE)
11354 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11355 if owned_groups != wanted_groups:
11356 raise errors.OpExecError("Node groups changed since locks were acquired,"
11357 " current groups are '%s', used to be '%s';"
11358 " retry the operation" %
11359 (utils.CommaJoin(wanted_groups),
11360 utils.CommaJoin(owned_groups)))
11362 # Determine affected instances
11363 self.instances = self._DetermineInstances()
11364 self.instance_names = [i.name for i in self.instances]
11366 if set(self.instance_names) != owned_instances:
11367 raise errors.OpExecError("Instances on node '%s' changed since locks"
11368 " were acquired, current instances are '%s',"
11369 " used to be '%s'; retry the operation" %
11370 (self.op.node_name,
11371 utils.CommaJoin(self.instance_names),
11372 utils.CommaJoin(owned_instances)))
11374 if self.instance_names:
11375 self.LogInfo("Evacuating instances from node '%s': %s",
11377 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11379 self.LogInfo("No instances to evacuate from node '%s'",
11382 if self.op.remote_node is not None:
11383 for i in self.instances:
11384 if i.primary_node == self.op.remote_node:
11385 raise errors.OpPrereqError("Node %s is the primary node of"
11386 " instance %s, cannot use it as"
11388 (self.op.remote_node, i.name),
11389 errors.ECODE_INVAL)
11391 def Exec(self, feedback_fn):
11392 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11394 if not self.instance_names:
11395 # No instances to evacuate
11398 elif self.op.iallocator is not None:
11399 # TODO: Implement relocation to other group
11400 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11401 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11402 instances=list(self.instance_names))
11404 ial.Run(self.op.iallocator)
11406 if not ial.success:
11407 raise errors.OpPrereqError("Can't compute node evacuation using"
11408 " iallocator '%s': %s" %
11409 (self.op.iallocator, ial.info),
11410 errors.ECODE_NORES)
11412 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11414 elif self.op.remote_node is not None:
11415 assert self.op.mode == constants.NODE_EVAC_SEC
11417 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11418 remote_node=self.op.remote_node,
11420 mode=constants.REPLACE_DISK_CHG,
11421 early_release=self.op.early_release)]
11422 for instance_name in self.instance_names
11426 raise errors.ProgrammerError("No iallocator or remote node")
11428 return ResultWithJobs(jobs)
11431 def _SetOpEarlyRelease(early_release, op):
11432 """Sets C{early_release} flag on opcodes if available.
11436 op.early_release = early_release
11437 except AttributeError:
11438 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11443 def _NodeEvacDest(use_nodes, group, nodes):
11444 """Returns group or nodes depending on caller's choice.
11448 return utils.CommaJoin(nodes)
11453 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11454 """Unpacks the result of change-group and node-evacuate iallocator requests.
11456 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11457 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11459 @type lu: L{LogicalUnit}
11460 @param lu: Logical unit instance
11461 @type alloc_result: tuple/list
11462 @param alloc_result: Result from iallocator
11463 @type early_release: bool
11464 @param early_release: Whether to release locks early if possible
11465 @type use_nodes: bool
11466 @param use_nodes: Whether to display node names instead of groups
11469 (moved, failed, jobs) = alloc_result
11472 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11473 for (name, reason) in failed)
11474 lu.LogWarning("Unable to evacuate instances %s", failreason)
11475 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11478 lu.LogInfo("Instances to be moved: %s",
11479 utils.CommaJoin("%s (to %s)" %
11480 (name, _NodeEvacDest(use_nodes, group, nodes))
11481 for (name, group, nodes) in moved))
11483 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11484 map(opcodes.OpCode.LoadOpCode, ops))
11488 class LUInstanceGrowDisk(LogicalUnit):
11489 """Grow a disk of an instance.
11492 HPATH = "disk-grow"
11493 HTYPE = constants.HTYPE_INSTANCE
11496 def ExpandNames(self):
11497 self._ExpandAndLockInstance()
11498 self.needed_locks[locking.LEVEL_NODE] = []
11499 self.needed_locks[locking.LEVEL_NODE_RES] = []
11500 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11501 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11503 def DeclareLocks(self, level):
11504 if level == locking.LEVEL_NODE:
11505 self._LockInstancesNodes()
11506 elif level == locking.LEVEL_NODE_RES:
11508 self.needed_locks[locking.LEVEL_NODE_RES] = \
11509 self.needed_locks[locking.LEVEL_NODE][:]
11511 def BuildHooksEnv(self):
11512 """Build hooks env.
11514 This runs on the master, the primary and all the secondaries.
11518 "DISK": self.op.disk,
11519 "AMOUNT": self.op.amount,
11521 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11524 def BuildHooksNodes(self):
11525 """Build hooks nodes.
11528 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11531 def CheckPrereq(self):
11532 """Check prerequisites.
11534 This checks that the instance is in the cluster.
11537 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11538 assert instance is not None, \
11539 "Cannot retrieve locked instance %s" % self.op.instance_name
11540 nodenames = list(instance.all_nodes)
11541 for node in nodenames:
11542 _CheckNodeOnline(self, node)
11544 self.instance = instance
11546 if instance.disk_template not in constants.DTS_GROWABLE:
11547 raise errors.OpPrereqError("Instance's disk layout does not support"
11548 " growing", errors.ECODE_INVAL)
11550 self.disk = instance.FindDisk(self.op.disk)
11552 if instance.disk_template not in (constants.DT_FILE,
11553 constants.DT_SHARED_FILE,
11555 # TODO: check the free disk space for file, when that feature will be
11557 _CheckNodesFreeDiskPerVG(self, nodenames,
11558 self.disk.ComputeGrowth(self.op.amount))
11560 def Exec(self, feedback_fn):
11561 """Execute disk grow.
11564 instance = self.instance
11567 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11568 assert (self.owned_locks(locking.LEVEL_NODE) ==
11569 self.owned_locks(locking.LEVEL_NODE_RES))
11571 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11573 raise errors.OpExecError("Cannot activate block device to grow")
11575 feedback_fn("Growing disk %s of instance '%s' by %s" %
11576 (self.op.disk, instance.name,
11577 utils.FormatUnit(self.op.amount, "h")))
11579 # First run all grow ops in dry-run mode
11580 for node in instance.all_nodes:
11581 self.cfg.SetDiskID(disk, node)
11582 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11583 result.Raise("Grow request failed to node %s" % node)
11585 # We know that (as far as we can test) operations across different
11586 # nodes will succeed, time to run it for real
11587 for node in instance.all_nodes:
11588 self.cfg.SetDiskID(disk, node)
11589 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11590 result.Raise("Grow request failed to node %s" % node)
11592 # TODO: Rewrite code to work properly
11593 # DRBD goes into sync mode for a short amount of time after executing the
11594 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11595 # calling "resize" in sync mode fails. Sleeping for a short amount of
11596 # time is a work-around.
11599 disk.RecordGrow(self.op.amount)
11600 self.cfg.Update(instance, feedback_fn)
11602 # Changes have been recorded, release node lock
11603 _ReleaseLocks(self, locking.LEVEL_NODE)
11605 # Downgrade lock while waiting for sync
11606 self.glm.downgrade(locking.LEVEL_INSTANCE)
11608 if self.op.wait_for_sync:
11609 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11611 self.proc.LogWarning("Disk sync-ing has not returned a good"
11612 " status; please check the instance")
11613 if instance.admin_state != constants.ADMINST_UP:
11614 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11615 elif instance.admin_state != constants.ADMINST_UP:
11616 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11617 " not supposed to be running because no wait for"
11618 " sync mode was requested")
11620 assert self.owned_locks(locking.LEVEL_NODE_RES)
11621 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11624 class LUInstanceQueryData(NoHooksLU):
11625 """Query runtime instance data.
11630 def ExpandNames(self):
11631 self.needed_locks = {}
11633 # Use locking if requested or when non-static information is wanted
11634 if not (self.op.static or self.op.use_locking):
11635 self.LogWarning("Non-static data requested, locks need to be acquired")
11636 self.op.use_locking = True
11638 if self.op.instances or not self.op.use_locking:
11639 # Expand instance names right here
11640 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11642 # Will use acquired locks
11643 self.wanted_names = None
11645 if self.op.use_locking:
11646 self.share_locks = _ShareAll()
11648 if self.wanted_names is None:
11649 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11651 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11653 self.needed_locks[locking.LEVEL_NODE] = []
11654 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11656 def DeclareLocks(self, level):
11657 if self.op.use_locking and level == locking.LEVEL_NODE:
11658 self._LockInstancesNodes()
11660 def CheckPrereq(self):
11661 """Check prerequisites.
11663 This only checks the optional instance list against the existing names.
11666 if self.wanted_names is None:
11667 assert self.op.use_locking, "Locking was not used"
11668 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11670 self.wanted_instances = \
11671 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11673 def _ComputeBlockdevStatus(self, node, instance_name, dev):
11674 """Returns the status of a block device
11677 if self.op.static or not node:
11680 self.cfg.SetDiskID(dev, node)
11682 result = self.rpc.call_blockdev_find(node, dev)
11686 result.Raise("Can't compute disk status for %s" % instance_name)
11688 status = result.payload
11692 return (status.dev_path, status.major, status.minor,
11693 status.sync_percent, status.estimated_time,
11694 status.is_degraded, status.ldisk_status)
11696 def _ComputeDiskStatus(self, instance, snode, dev):
11697 """Compute block device status.
11700 if dev.dev_type in constants.LDS_DRBD:
11701 # we change the snode then (otherwise we use the one passed in)
11702 if dev.logical_id[0] == instance.primary_node:
11703 snode = dev.logical_id[1]
11705 snode = dev.logical_id[0]
11707 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11708 instance.name, dev)
11709 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11712 dev_children = map(compat.partial(self._ComputeDiskStatus,
11719 "iv_name": dev.iv_name,
11720 "dev_type": dev.dev_type,
11721 "logical_id": dev.logical_id,
11722 "physical_id": dev.physical_id,
11723 "pstatus": dev_pstatus,
11724 "sstatus": dev_sstatus,
11725 "children": dev_children,
11730 def Exec(self, feedback_fn):
11731 """Gather and return data"""
11734 cluster = self.cfg.GetClusterInfo()
11736 pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11737 for i in self.wanted_instances)
11738 for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11739 if self.op.static or pnode.offline:
11740 remote_state = None
11742 self.LogWarning("Primary node %s is marked offline, returning static"
11743 " information only for instance %s" %
11744 (pnode.name, instance.name))
11746 remote_info = self.rpc.call_instance_info(instance.primary_node,
11748 instance.hypervisor)
11749 remote_info.Raise("Error checking node %s" % instance.primary_node)
11750 remote_info = remote_info.payload
11751 if remote_info and "state" in remote_info:
11752 remote_state = "up"
11754 if instance.admin_state == constants.ADMINST_UP:
11755 remote_state = "down"
11757 remote_state = instance.admin_state
11759 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11762 result[instance.name] = {
11763 "name": instance.name,
11764 "config_state": instance.admin_state,
11765 "run_state": remote_state,
11766 "pnode": instance.primary_node,
11767 "snodes": instance.secondary_nodes,
11769 # this happens to be the same format used for hooks
11770 "nics": _NICListToTuple(self, instance.nics),
11771 "disk_template": instance.disk_template,
11773 "hypervisor": instance.hypervisor,
11774 "network_port": instance.network_port,
11775 "hv_instance": instance.hvparams,
11776 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11777 "be_instance": instance.beparams,
11778 "be_actual": cluster.FillBE(instance),
11779 "os_instance": instance.osparams,
11780 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11781 "serial_no": instance.serial_no,
11782 "mtime": instance.mtime,
11783 "ctime": instance.ctime,
11784 "uuid": instance.uuid,
11790 def PrepareContainerMods(mods, private_fn):
11791 """Prepares a list of container modifications by adding a private data field.
11793 @type mods: list of tuples; (operation, index, parameters)
11794 @param mods: List of modifications
11795 @type private_fn: callable or None
11796 @param private_fn: Callable for constructing a private data field for a
11801 if private_fn is None:
11806 return [(op, idx, params, fn()) for (op, idx, params) in mods]
11809 #: Type description for changes as returned by L{ApplyContainerMods}'s
11811 _TApplyContModsCbChanges = \
11812 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
11813 ht.TNonEmptyString,
11818 def ApplyContainerMods(kind, container, chgdesc, mods,
11819 create_fn, modify_fn, remove_fn):
11820 """Applies descriptions in C{mods} to C{container}.
11823 @param kind: One-word item description
11824 @type container: list
11825 @param container: Container to modify
11826 @type chgdesc: None or list
11827 @param chgdesc: List of applied changes
11829 @param mods: Modifications as returned by L{PrepareContainerMods}
11830 @type create_fn: callable
11831 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
11832 receives absolute item index, parameters and private data object as added
11833 by L{PrepareContainerMods}, returns tuple containing new item and changes
11835 @type modify_fn: callable
11836 @param modify_fn: Callback for modifying an existing item
11837 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
11838 and private data object as added by L{PrepareContainerMods}, returns
11840 @type remove_fn: callable
11841 @param remove_fn: Callback on removing item; receives absolute item index,
11842 item and private data object as added by L{PrepareContainerMods}
11845 for (op, idx, params, private) in mods:
11848 absidx = len(container) - 1
11850 raise IndexError("Not accepting negative indices other than -1")
11851 elif idx > len(container):
11852 raise IndexError("Got %s index %s, but there are only %s" %
11853 (kind, idx, len(container)))
11859 if op == constants.DDM_ADD:
11860 # Calculate where item will be added
11862 addidx = len(container)
11866 if create_fn is None:
11869 (item, changes) = create_fn(addidx, params, private)
11872 container.append(item)
11875 assert idx <= len(container)
11876 # list.insert does so before the specified index
11877 container.insert(idx, item)
11879 # Retrieve existing item
11881 item = container[absidx]
11883 raise IndexError("Invalid %s index %s" % (kind, idx))
11885 if op == constants.DDM_REMOVE:
11888 if remove_fn is not None:
11889 remove_fn(absidx, item, private)
11891 changes = [("%s/%s" % (kind, absidx), "remove")]
11893 assert container[absidx] == item
11894 del container[absidx]
11895 elif op == constants.DDM_MODIFY:
11896 if modify_fn is not None:
11897 changes = modify_fn(absidx, item, params, private)
11899 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
11901 assert _TApplyContModsCbChanges(changes)
11903 if not (chgdesc is None or changes is None):
11904 chgdesc.extend(changes)
11907 def _UpdateIvNames(base_index, disks):
11908 """Updates the C{iv_name} attribute of disks.
11910 @type disks: list of L{objects.Disk}
11913 for (idx, disk) in enumerate(disks):
11914 disk.iv_name = "disk/%s" % (base_index + idx, )
11917 class _InstNicModPrivate:
11918 """Data structure for network interface modifications.
11920 Used by L{LUInstanceSetParams}.
11923 def __init__(self):
11928 class LUInstanceSetParams(LogicalUnit):
11929 """Modifies an instances's parameters.
11932 HPATH = "instance-modify"
11933 HTYPE = constants.HTYPE_INSTANCE
11937 def _UpgradeDiskNicMods(kind, mods, verify_fn):
11938 assert ht.TList(mods)
11939 assert not mods or len(mods[0]) in (2, 3)
11941 if mods and len(mods[0]) == 2:
11945 for op, params in mods:
11946 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
11947 result.append((op, -1, params))
11951 raise errors.OpPrereqError("Only one %s add or remove operation is"
11952 " supported at a time" % kind,
11953 errors.ECODE_INVAL)
11955 result.append((constants.DDM_MODIFY, op, params))
11957 assert verify_fn(result)
11964 def _CheckMods(kind, mods, key_types, item_fn):
11965 """Ensures requested disk/NIC modifications are valid.
11968 for (op, _, params) in mods:
11969 assert ht.TDict(params)
11971 utils.ForceDictType(params, key_types)
11973 if op == constants.DDM_REMOVE:
11975 raise errors.OpPrereqError("No settings should be passed when"
11976 " removing a %s" % kind,
11977 errors.ECODE_INVAL)
11978 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
11979 item_fn(op, params)
11981 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
11984 def _VerifyDiskModification(op, params):
11985 """Verifies a disk modification.
11988 if op == constants.DDM_ADD:
11989 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11990 if mode not in constants.DISK_ACCESS_SET:
11991 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11992 errors.ECODE_INVAL)
11994 size = params.get(constants.IDISK_SIZE, None)
11996 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
11997 constants.IDISK_SIZE, errors.ECODE_INVAL)
12001 except (TypeError, ValueError), err:
12002 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12003 errors.ECODE_INVAL)
12005 params[constants.IDISK_SIZE] = size
12007 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12008 raise errors.OpPrereqError("Disk size change not possible, use"
12009 " grow-disk", errors.ECODE_INVAL)
12012 def _VerifyNicModification(op, params):
12013 """Verifies a network interface modification.
12016 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12017 ip = params.get(constants.INIC_IP, None)
12020 elif ip.lower() == constants.VALUE_NONE:
12021 params[constants.INIC_IP] = None
12022 elif not netutils.IPAddress.IsValid(ip):
12023 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12024 errors.ECODE_INVAL)
12026 bridge = params.get("bridge", None)
12027 link = params.get(constants.INIC_LINK, None)
12028 if bridge and link:
12029 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12030 " at the same time", errors.ECODE_INVAL)
12031 elif bridge and bridge.lower() == constants.VALUE_NONE:
12032 params["bridge"] = None
12033 elif link and link.lower() == constants.VALUE_NONE:
12034 params[constants.INIC_LINK] = None
12036 if op == constants.DDM_ADD:
12037 macaddr = params.get(constants.INIC_MAC, None)
12038 if macaddr is None:
12039 params[constants.INIC_MAC] = constants.VALUE_AUTO
12041 if constants.INIC_MAC in params:
12042 macaddr = params[constants.INIC_MAC]
12043 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12044 macaddr = utils.NormalizeAndValidateMac(macaddr)
12046 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12047 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12048 " modifying an existing NIC",
12049 errors.ECODE_INVAL)
12051 def CheckArguments(self):
12052 if not (self.op.nics or self.op.disks or self.op.disk_template or
12053 self.op.hvparams or self.op.beparams or self.op.os_name or
12054 self.op.offline is not None or self.op.runtime_mem):
12055 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12057 if self.op.hvparams:
12058 _CheckGlobalHvParams(self.op.hvparams)
12061 self._UpgradeDiskNicMods("disk", self.op.disks,
12062 opcodes.OpInstanceSetParams.TestDiskModifications)
12064 self._UpgradeDiskNicMods("NIC", self.op.nics,
12065 opcodes.OpInstanceSetParams.TestNicModifications)
12067 # Check disk modifications
12068 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12069 self._VerifyDiskModification)
12071 if self.op.disks and self.op.disk_template is not None:
12072 raise errors.OpPrereqError("Disk template conversion and other disk"
12073 " changes not supported at the same time",
12074 errors.ECODE_INVAL)
12076 if (self.op.disk_template and
12077 self.op.disk_template in constants.DTS_INT_MIRROR and
12078 self.op.remote_node is None):
12079 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12080 " one requires specifying a secondary node",
12081 errors.ECODE_INVAL)
12083 # Check NIC modifications
12084 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12085 self._VerifyNicModification)
12087 def ExpandNames(self):
12088 self._ExpandAndLockInstance()
12089 # Can't even acquire node locks in shared mode as upcoming changes in
12090 # Ganeti 2.6 will start to modify the node object on disk conversion
12091 self.needed_locks[locking.LEVEL_NODE] = []
12092 self.needed_locks[locking.LEVEL_NODE_RES] = []
12093 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12095 def DeclareLocks(self, level):
12096 # TODO: Acquire group lock in shared mode (disk parameters)
12097 if level == locking.LEVEL_NODE:
12098 self._LockInstancesNodes()
12099 if self.op.disk_template and self.op.remote_node:
12100 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12101 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12102 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12104 self.needed_locks[locking.LEVEL_NODE_RES] = \
12105 self.needed_locks[locking.LEVEL_NODE][:]
12107 def BuildHooksEnv(self):
12108 """Build hooks env.
12110 This runs on the master, primary and secondaries.
12114 if constants.BE_MINMEM in self.be_new:
12115 args["minmem"] = self.be_new[constants.BE_MINMEM]
12116 if constants.BE_MAXMEM in self.be_new:
12117 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12118 if constants.BE_VCPUS in self.be_new:
12119 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12120 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12121 # information at all.
12123 if self._new_nics is not None:
12126 for nic in self._new_nics:
12127 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12128 mode = nicparams[constants.NIC_MODE]
12129 link = nicparams[constants.NIC_LINK]
12130 nics.append((nic.ip, nic.mac, mode, link))
12132 args["nics"] = nics
12134 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12135 if self.op.disk_template:
12136 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12137 if self.op.runtime_mem:
12138 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12142 def BuildHooksNodes(self):
12143 """Build hooks nodes.
12146 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12149 def _PrepareNicModification(self, params, private, old_ip, old_params,
12151 update_params_dict = dict([(key, params[key])
12152 for key in constants.NICS_PARAMETERS
12155 if "bridge" in params:
12156 update_params_dict[constants.NIC_LINK] = params["bridge"]
12158 new_params = _GetUpdatedParams(old_params, update_params_dict)
12159 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12161 new_filled_params = cluster.SimpleFillNIC(new_params)
12162 objects.NIC.CheckParameterSyntax(new_filled_params)
12164 new_mode = new_filled_params[constants.NIC_MODE]
12165 if new_mode == constants.NIC_MODE_BRIDGED:
12166 bridge = new_filled_params[constants.NIC_LINK]
12167 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12169 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12171 self.warn.append(msg)
12173 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12175 elif new_mode == constants.NIC_MODE_ROUTED:
12176 ip = params.get(constants.INIC_IP, old_ip)
12178 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12179 " on a routed NIC", errors.ECODE_INVAL)
12181 if constants.INIC_MAC in params:
12182 mac = params[constants.INIC_MAC]
12184 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12185 errors.ECODE_INVAL)
12186 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12187 # otherwise generate the MAC address
12188 params[constants.INIC_MAC] = \
12189 self.cfg.GenerateMAC(self.proc.GetECId())
12191 # or validate/reserve the current one
12193 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12194 except errors.ReservationError:
12195 raise errors.OpPrereqError("MAC address '%s' already in use"
12196 " in cluster" % mac,
12197 errors.ECODE_NOTUNIQUE)
12199 private.params = new_params
12200 private.filled = new_filled_params
12202 return (None, None)
12204 def CheckPrereq(self):
12205 """Check prerequisites.
12207 This only checks the instance list against the existing names.
12210 # checking the new params on the primary/secondary nodes
12212 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12213 cluster = self.cluster = self.cfg.GetClusterInfo()
12214 assert self.instance is not None, \
12215 "Cannot retrieve locked instance %s" % self.op.instance_name
12216 pnode = instance.primary_node
12217 nodelist = list(instance.all_nodes)
12218 pnode_info = self.cfg.GetNodeInfo(pnode)
12219 self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
12221 # Prepare disk/NIC modifications
12222 self.diskmod = PrepareContainerMods(self.op.disks, None)
12223 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12226 if self.op.os_name and not self.op.force:
12227 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12228 self.op.force_variant)
12229 instance_os = self.op.os_name
12231 instance_os = instance.os
12233 assert not (self.op.disk_template and self.op.disks), \
12234 "Can't modify disk template and apply disk changes at the same time"
12236 if self.op.disk_template:
12237 if instance.disk_template == self.op.disk_template:
12238 raise errors.OpPrereqError("Instance already has disk template %s" %
12239 instance.disk_template, errors.ECODE_INVAL)
12241 if (instance.disk_template,
12242 self.op.disk_template) not in self._DISK_CONVERSIONS:
12243 raise errors.OpPrereqError("Unsupported disk template conversion from"
12244 " %s to %s" % (instance.disk_template,
12245 self.op.disk_template),
12246 errors.ECODE_INVAL)
12247 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12248 msg="cannot change disk template")
12249 if self.op.disk_template in constants.DTS_INT_MIRROR:
12250 if self.op.remote_node == pnode:
12251 raise errors.OpPrereqError("Given new secondary node %s is the same"
12252 " as the primary node of the instance" %
12253 self.op.remote_node, errors.ECODE_STATE)
12254 _CheckNodeOnline(self, self.op.remote_node)
12255 _CheckNodeNotDrained(self, self.op.remote_node)
12256 # FIXME: here we assume that the old instance type is DT_PLAIN
12257 assert instance.disk_template == constants.DT_PLAIN
12258 disks = [{constants.IDISK_SIZE: d.size,
12259 constants.IDISK_VG: d.logical_id[0]}
12260 for d in instance.disks]
12261 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12262 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12264 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12265 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12266 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12267 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12268 ignore=self.op.ignore_ipolicy)
12269 if pnode_info.group != snode_info.group:
12270 self.LogWarning("The primary and secondary nodes are in two"
12271 " different node groups; the disk parameters"
12272 " from the first disk's node group will be"
12275 # hvparams processing
12276 if self.op.hvparams:
12277 hv_type = instance.hypervisor
12278 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12279 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12280 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12283 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12284 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12285 self.hv_proposed = self.hv_new = hv_new # the new actual values
12286 self.hv_inst = i_hvdict # the new dict (without defaults)
12288 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12290 self.hv_new = self.hv_inst = {}
12292 # beparams processing
12293 if self.op.beparams:
12294 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12296 objects.UpgradeBeParams(i_bedict)
12297 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12298 be_new = cluster.SimpleFillBE(i_bedict)
12299 self.be_proposed = self.be_new = be_new # the new actual values
12300 self.be_inst = i_bedict # the new dict (without defaults)
12302 self.be_new = self.be_inst = {}
12303 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12304 be_old = cluster.FillBE(instance)
12306 # CPU param validation -- checking every time a paramtere is
12307 # changed to cover all cases where either CPU mask or vcpus have
12309 if (constants.BE_VCPUS in self.be_proposed and
12310 constants.HV_CPU_MASK in self.hv_proposed):
12312 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12313 # Verify mask is consistent with number of vCPUs. Can skip this
12314 # test if only 1 entry in the CPU mask, which means same mask
12315 # is applied to all vCPUs.
12316 if (len(cpu_list) > 1 and
12317 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12318 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12320 (self.be_proposed[constants.BE_VCPUS],
12321 self.hv_proposed[constants.HV_CPU_MASK]),
12322 errors.ECODE_INVAL)
12324 # Only perform this test if a new CPU mask is given
12325 if constants.HV_CPU_MASK in self.hv_new:
12326 # Calculate the largest CPU number requested
12327 max_requested_cpu = max(map(max, cpu_list))
12328 # Check that all of the instance's nodes have enough physical CPUs to
12329 # satisfy the requested CPU mask
12330 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12331 max_requested_cpu + 1, instance.hypervisor)
12333 # osparams processing
12334 if self.op.osparams:
12335 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12336 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12337 self.os_inst = i_osdict # the new dict (without defaults)
12343 #TODO(dynmem): do the appropriate check involving MINMEM
12344 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12345 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12346 mem_check_list = [pnode]
12347 if be_new[constants.BE_AUTO_BALANCE]:
12348 # either we changed auto_balance to yes or it was from before
12349 mem_check_list.extend(instance.secondary_nodes)
12350 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12351 instance.hypervisor)
12352 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12353 [instance.hypervisor])
12354 pninfo = nodeinfo[pnode]
12355 msg = pninfo.fail_msg
12357 # Assume the primary node is unreachable and go ahead
12358 self.warn.append("Can't get info from primary node %s: %s" %
12361 (_, _, (pnhvinfo, )) = pninfo.payload
12362 if not isinstance(pnhvinfo.get("memory_free", None), int):
12363 self.warn.append("Node data from primary node %s doesn't contain"
12364 " free memory information" % pnode)
12365 elif instance_info.fail_msg:
12366 self.warn.append("Can't get instance runtime information: %s" %
12367 instance_info.fail_msg)
12369 if instance_info.payload:
12370 current_mem = int(instance_info.payload["memory"])
12372 # Assume instance not running
12373 # (there is a slight race condition here, but it's not very
12374 # probable, and we have no other way to check)
12375 # TODO: Describe race condition
12377 #TODO(dynmem): do the appropriate check involving MINMEM
12378 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12379 pnhvinfo["memory_free"])
12381 raise errors.OpPrereqError("This change will prevent the instance"
12382 " from starting, due to %d MB of memory"
12383 " missing on its primary node" %
12385 errors.ECODE_NORES)
12387 if be_new[constants.BE_AUTO_BALANCE]:
12388 for node, nres in nodeinfo.items():
12389 if node not in instance.secondary_nodes:
12391 nres.Raise("Can't get info from secondary node %s" % node,
12392 prereq=True, ecode=errors.ECODE_STATE)
12393 (_, _, (nhvinfo, )) = nres.payload
12394 if not isinstance(nhvinfo.get("memory_free", None), int):
12395 raise errors.OpPrereqError("Secondary node %s didn't return free"
12396 " memory information" % node,
12397 errors.ECODE_STATE)
12398 #TODO(dynmem): do the appropriate check involving MINMEM
12399 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12400 raise errors.OpPrereqError("This change will prevent the instance"
12401 " from failover to its secondary node"
12402 " %s, due to not enough memory" % node,
12403 errors.ECODE_STATE)
12405 if self.op.runtime_mem:
12406 remote_info = self.rpc.call_instance_info(instance.primary_node,
12408 instance.hypervisor)
12409 remote_info.Raise("Error checking node %s" % instance.primary_node)
12410 if not remote_info.payload: # not running already
12411 raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12412 errors.ECODE_STATE)
12414 current_memory = remote_info.payload["memory"]
12415 if (not self.op.force and
12416 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12417 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12418 raise errors.OpPrereqError("Instance %s must have memory between %d"
12419 " and %d MB of memory unless --force is"
12420 " given" % (instance.name,
12421 self.be_proposed[constants.BE_MINMEM],
12422 self.be_proposed[constants.BE_MAXMEM]),
12423 errors.ECODE_INVAL)
12425 if self.op.runtime_mem > current_memory:
12426 _CheckNodeFreeMemory(self, instance.primary_node,
12427 "ballooning memory for instance %s" %
12429 self.op.memory - current_memory,
12430 instance.hypervisor)
12432 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12433 raise errors.OpPrereqError("Disk operations not supported for"
12434 " diskless instances",
12435 errors.ECODE_INVAL)
12437 def _PrepareNicCreate(_, params, private):
12438 return self._PrepareNicModification(params, private, None, {},
12441 def _PrepareNicMod(_, nic, params, private):
12442 return self._PrepareNicModification(params, private, nic.ip,
12443 nic.nicparams, cluster, pnode)
12445 # Verify NIC changes (operating on copy)
12446 nics = instance.nics[:]
12447 ApplyContainerMods("NIC", nics, None, self.nicmod,
12448 _PrepareNicCreate, _PrepareNicMod, None)
12449 if len(nics) > constants.MAX_NICS:
12450 raise errors.OpPrereqError("Instance has too many network interfaces"
12451 " (%d), cannot add more" % constants.MAX_NICS,
12452 errors.ECODE_STATE)
12454 # Verify disk changes (operating on a copy)
12455 disks = instance.disks[:]
12456 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12457 if len(disks) > constants.MAX_DISKS:
12458 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12459 " more" % constants.MAX_DISKS,
12460 errors.ECODE_STATE)
12462 if self.op.offline is not None:
12463 if self.op.offline:
12464 msg = "can't change to offline"
12466 msg = "can't change to online"
12467 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12469 # Pre-compute NIC changes (necessary to use result in hooks)
12470 self._nic_chgdesc = []
12472 # Operate on copies as this is still in prereq
12473 nics = [nic.Copy() for nic in instance.nics]
12474 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12475 self._CreateNewNic, self._ApplyNicMods, None)
12476 self._new_nics = nics
12478 self._new_nics = None
12480 def _ConvertPlainToDrbd(self, feedback_fn):
12481 """Converts an instance from plain to drbd.
12484 feedback_fn("Converting template to drbd")
12485 instance = self.instance
12486 pnode = instance.primary_node
12487 snode = self.op.remote_node
12489 assert instance.disk_template == constants.DT_PLAIN
12491 # create a fake disk info for _GenerateDiskTemplate
12492 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12493 constants.IDISK_VG: d.logical_id[0]}
12494 for d in instance.disks]
12495 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12496 instance.name, pnode, [snode],
12497 disk_info, None, None, 0, feedback_fn,
12499 info = _GetInstanceInfoText(instance)
12500 feedback_fn("Creating aditional volumes...")
12501 # first, create the missing data and meta devices
12502 for disk in new_disks:
12503 # unfortunately this is... not too nice
12504 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12506 for child in disk.children:
12507 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12508 # at this stage, all new LVs have been created, we can rename the
12510 feedback_fn("Renaming original volumes...")
12511 rename_list = [(o, n.children[0].logical_id)
12512 for (o, n) in zip(instance.disks, new_disks)]
12513 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12514 result.Raise("Failed to rename original LVs")
12516 feedback_fn("Initializing DRBD devices...")
12517 # all child devices are in place, we can now create the DRBD devices
12518 for disk in new_disks:
12519 for node in [pnode, snode]:
12520 f_create = node == pnode
12521 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12523 # at this point, the instance has been modified
12524 instance.disk_template = constants.DT_DRBD8
12525 instance.disks = new_disks
12526 self.cfg.Update(instance, feedback_fn)
12528 # Release node locks while waiting for sync
12529 _ReleaseLocks(self, locking.LEVEL_NODE)
12531 # disks are created, waiting for sync
12532 disk_abort = not _WaitForSync(self, instance,
12533 oneshot=not self.op.wait_for_sync)
12535 raise errors.OpExecError("There are some degraded disks for"
12536 " this instance, please cleanup manually")
12538 # Node resource locks will be released by caller
12540 def _ConvertDrbdToPlain(self, feedback_fn):
12541 """Converts an instance from drbd to plain.
12544 instance = self.instance
12546 assert len(instance.secondary_nodes) == 1
12547 assert instance.disk_template == constants.DT_DRBD8
12549 pnode = instance.primary_node
12550 snode = instance.secondary_nodes[0]
12551 feedback_fn("Converting template to plain")
12553 old_disks = instance.disks
12554 new_disks = [d.children[0] for d in old_disks]
12556 # copy over size and mode
12557 for parent, child in zip(old_disks, new_disks):
12558 child.size = parent.size
12559 child.mode = parent.mode
12561 # update instance structure
12562 instance.disks = new_disks
12563 instance.disk_template = constants.DT_PLAIN
12564 self.cfg.Update(instance, feedback_fn)
12566 # Release locks in case removing disks takes a while
12567 _ReleaseLocks(self, locking.LEVEL_NODE)
12569 feedback_fn("Removing volumes on the secondary node...")
12570 for disk in old_disks:
12571 self.cfg.SetDiskID(disk, snode)
12572 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12574 self.LogWarning("Could not remove block device %s on node %s,"
12575 " continuing anyway: %s", disk.iv_name, snode, msg)
12577 feedback_fn("Removing unneeded volumes on the primary node...")
12578 for idx, disk in enumerate(old_disks):
12579 meta = disk.children[1]
12580 self.cfg.SetDiskID(meta, pnode)
12581 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12583 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12584 " continuing anyway: %s", idx, pnode, msg)
12586 # this is a DRBD disk, return its port to the pool
12587 for disk in old_disks:
12588 tcp_port = disk.logical_id[2]
12589 self.cfg.AddTcpUdpPort(tcp_port)
12591 # Node resource locks will be released by caller
12593 def _CreateNewDisk(self, idx, params, _):
12594 """Creates a new disk.
12597 instance = self.instance
12600 if instance.disk_template in constants.DTS_FILEBASED:
12601 (file_driver, file_path) = instance.disks[0].logical_id
12602 file_path = os.path.dirname(file_path)
12604 file_driver = file_path = None
12607 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12608 instance.primary_node, instance.secondary_nodes,
12609 [params], file_path, file_driver, idx,
12610 self.Log, self.diskparams)[0]
12612 info = _GetInstanceInfoText(instance)
12614 logging.info("Creating volume %s for instance %s",
12615 disk.iv_name, instance.name)
12616 # Note: this needs to be kept in sync with _CreateDisks
12618 for node in instance.all_nodes:
12619 f_create = (node == instance.primary_node)
12621 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12622 except errors.OpExecError, err:
12623 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12624 disk.iv_name, disk, node, err)
12627 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12631 def _ModifyDisk(idx, disk, params, _):
12632 """Modifies a disk.
12635 disk.mode = params[constants.IDISK_MODE]
12638 ("disk.mode/%d" % idx, disk.mode),
12641 def _RemoveDisk(self, idx, root, _):
12645 for node, disk in root.ComputeNodeTree(self.instance.primary_node):
12646 self.cfg.SetDiskID(disk, node)
12647 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12649 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12650 " continuing anyway", idx, node, msg)
12652 # if this is a DRBD disk, return its port to the pool
12653 if root.dev_type in constants.LDS_DRBD:
12654 self.cfg.AddTcpUdpPort(root.logical_id[2])
12657 def _CreateNewNic(idx, params, private):
12658 """Creates data structure for a new network interface.
12661 mac = params[constants.INIC_MAC]
12662 ip = params.get(constants.INIC_IP, None)
12663 nicparams = private.params
12665 return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12667 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12668 (mac, ip, private.filled[constants.NIC_MODE],
12669 private.filled[constants.NIC_LINK])),
12673 def _ApplyNicMods(idx, nic, params, private):
12674 """Modifies a network interface.
12679 for key in [constants.INIC_MAC, constants.INIC_IP]:
12681 changes.append(("nic.%s/%d" % (key, idx), params[key]))
12682 setattr(nic, key, params[key])
12685 nic.nicparams = private.params
12687 for (key, val) in params.items():
12688 changes.append(("nic.%s/%d" % (key, idx), val))
12692 def Exec(self, feedback_fn):
12693 """Modifies an instance.
12695 All parameters take effect only at the next restart of the instance.
12698 # Process here the warnings from CheckPrereq, as we don't have a
12699 # feedback_fn there.
12700 # TODO: Replace with self.LogWarning
12701 for warn in self.warn:
12702 feedback_fn("WARNING: %s" % warn)
12704 assert ((self.op.disk_template is None) ^
12705 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12706 "Not owning any node resource locks"
12709 instance = self.instance
12712 if self.op.runtime_mem:
12713 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12715 self.op.runtime_mem)
12716 rpcres.Raise("Cannot modify instance runtime memory")
12717 result.append(("runtime_memory", self.op.runtime_mem))
12719 # Apply disk changes
12720 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12721 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12722 _UpdateIvNames(0, instance.disks)
12724 if self.op.disk_template:
12726 check_nodes = set(instance.all_nodes)
12727 if self.op.remote_node:
12728 check_nodes.add(self.op.remote_node)
12729 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12730 owned = self.owned_locks(level)
12731 assert not (check_nodes - owned), \
12732 ("Not owning the correct locks, owning %r, expected at least %r" %
12733 (owned, check_nodes))
12735 r_shut = _ShutdownInstanceDisks(self, instance)
12737 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12738 " proceed with disk template conversion")
12739 mode = (instance.disk_template, self.op.disk_template)
12741 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12743 self.cfg.ReleaseDRBDMinors(instance.name)
12745 result.append(("disk_template", self.op.disk_template))
12747 assert instance.disk_template == self.op.disk_template, \
12748 ("Expected disk template '%s', found '%s'" %
12749 (self.op.disk_template, instance.disk_template))
12751 # Release node and resource locks if there are any (they might already have
12752 # been released during disk conversion)
12753 _ReleaseLocks(self, locking.LEVEL_NODE)
12754 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12756 # Apply NIC changes
12757 if self._new_nics is not None:
12758 instance.nics = self._new_nics
12759 result.extend(self._nic_chgdesc)
12762 if self.op.hvparams:
12763 instance.hvparams = self.hv_inst
12764 for key, val in self.op.hvparams.iteritems():
12765 result.append(("hv/%s" % key, val))
12768 if self.op.beparams:
12769 instance.beparams = self.be_inst
12770 for key, val in self.op.beparams.iteritems():
12771 result.append(("be/%s" % key, val))
12774 if self.op.os_name:
12775 instance.os = self.op.os_name
12778 if self.op.osparams:
12779 instance.osparams = self.os_inst
12780 for key, val in self.op.osparams.iteritems():
12781 result.append(("os/%s" % key, val))
12783 if self.op.offline is None:
12786 elif self.op.offline:
12787 # Mark instance as offline
12788 self.cfg.MarkInstanceOffline(instance.name)
12789 result.append(("admin_state", constants.ADMINST_OFFLINE))
12791 # Mark instance as online, but stopped
12792 self.cfg.MarkInstanceDown(instance.name)
12793 result.append(("admin_state", constants.ADMINST_DOWN))
12795 self.cfg.Update(instance, feedback_fn)
12797 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12798 self.owned_locks(locking.LEVEL_NODE)), \
12799 "All node locks should have been released by now"
12803 _DISK_CONVERSIONS = {
12804 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12805 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12809 class LUInstanceChangeGroup(LogicalUnit):
12810 HPATH = "instance-change-group"
12811 HTYPE = constants.HTYPE_INSTANCE
12814 def ExpandNames(self):
12815 self.share_locks = _ShareAll()
12816 self.needed_locks = {
12817 locking.LEVEL_NODEGROUP: [],
12818 locking.LEVEL_NODE: [],
12821 self._ExpandAndLockInstance()
12823 if self.op.target_groups:
12824 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12825 self.op.target_groups)
12827 self.req_target_uuids = None
12829 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12831 def DeclareLocks(self, level):
12832 if level == locking.LEVEL_NODEGROUP:
12833 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12835 if self.req_target_uuids:
12836 lock_groups = set(self.req_target_uuids)
12838 # Lock all groups used by instance optimistically; this requires going
12839 # via the node before it's locked, requiring verification later on
12840 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12841 lock_groups.update(instance_groups)
12843 # No target groups, need to lock all of them
12844 lock_groups = locking.ALL_SET
12846 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12848 elif level == locking.LEVEL_NODE:
12849 if self.req_target_uuids:
12850 # Lock all nodes used by instances
12851 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12852 self._LockInstancesNodes()
12854 # Lock all nodes in all potential target groups
12855 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12856 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12857 member_nodes = [node_name
12858 for group in lock_groups
12859 for node_name in self.cfg.GetNodeGroup(group).members]
12860 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12862 # Lock all nodes as all groups are potential targets
12863 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12865 def CheckPrereq(self):
12866 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12867 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12868 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12870 assert (self.req_target_uuids is None or
12871 owned_groups.issuperset(self.req_target_uuids))
12872 assert owned_instances == set([self.op.instance_name])
12874 # Get instance information
12875 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12877 # Check if node groups for locked instance are still correct
12878 assert owned_nodes.issuperset(self.instance.all_nodes), \
12879 ("Instance %s's nodes changed while we kept the lock" %
12880 self.op.instance_name)
12882 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12885 if self.req_target_uuids:
12886 # User requested specific target groups
12887 self.target_uuids = self.req_target_uuids
12889 # All groups except those used by the instance are potential targets
12890 self.target_uuids = owned_groups - inst_groups
12892 conflicting_groups = self.target_uuids & inst_groups
12893 if conflicting_groups:
12894 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12895 " used by the instance '%s'" %
12896 (utils.CommaJoin(conflicting_groups),
12897 self.op.instance_name),
12898 errors.ECODE_INVAL)
12900 if not self.target_uuids:
12901 raise errors.OpPrereqError("There are no possible target groups",
12902 errors.ECODE_INVAL)
12904 def BuildHooksEnv(self):
12905 """Build hooks env.
12908 assert self.target_uuids
12911 "TARGET_GROUPS": " ".join(self.target_uuids),
12914 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12918 def BuildHooksNodes(self):
12919 """Build hooks nodes.
12922 mn = self.cfg.GetMasterNode()
12923 return ([mn], [mn])
12925 def Exec(self, feedback_fn):
12926 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12928 assert instances == [self.op.instance_name], "Instance not locked"
12930 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12931 instances=instances, target_groups=list(self.target_uuids))
12933 ial.Run(self.op.iallocator)
12935 if not ial.success:
12936 raise errors.OpPrereqError("Can't compute solution for changing group of"
12937 " instance '%s' using iallocator '%s': %s" %
12938 (self.op.instance_name, self.op.iallocator,
12940 errors.ECODE_NORES)
12942 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12944 self.LogInfo("Iallocator returned %s job(s) for changing group of"
12945 " instance '%s'", len(jobs), self.op.instance_name)
12947 return ResultWithJobs(jobs)
12950 class LUBackupQuery(NoHooksLU):
12951 """Query the exports list
12956 def ExpandNames(self):
12957 self.needed_locks = {}
12958 self.share_locks[locking.LEVEL_NODE] = 1
12959 if not self.op.nodes:
12960 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12962 self.needed_locks[locking.LEVEL_NODE] = \
12963 _GetWantedNodes(self, self.op.nodes)
12965 def Exec(self, feedback_fn):
12966 """Compute the list of all the exported system images.
12969 @return: a dictionary with the structure node->(export-list)
12970 where export-list is a list of the instances exported on
12974 self.nodes = self.owned_locks(locking.LEVEL_NODE)
12975 rpcresult = self.rpc.call_export_list(self.nodes)
12977 for node in rpcresult:
12978 if rpcresult[node].fail_msg:
12979 result[node] = False
12981 result[node] = rpcresult[node].payload
12986 class LUBackupPrepare(NoHooksLU):
12987 """Prepares an instance for an export and returns useful information.
12992 def ExpandNames(self):
12993 self._ExpandAndLockInstance()
12995 def CheckPrereq(self):
12996 """Check prerequisites.
12999 instance_name = self.op.instance_name
13001 self.instance = self.cfg.GetInstanceInfo(instance_name)
13002 assert self.instance is not None, \
13003 "Cannot retrieve locked instance %s" % self.op.instance_name
13004 _CheckNodeOnline(self, self.instance.primary_node)
13006 self._cds = _GetClusterDomainSecret()
13008 def Exec(self, feedback_fn):
13009 """Prepares an instance for an export.
13012 instance = self.instance
13014 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13015 salt = utils.GenerateSecret(8)
13017 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13018 result = self.rpc.call_x509_cert_create(instance.primary_node,
13019 constants.RIE_CERT_VALIDITY)
13020 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13022 (name, cert_pem) = result.payload
13024 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13028 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13029 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13031 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13037 class LUBackupExport(LogicalUnit):
13038 """Export an instance to an image in the cluster.
13041 HPATH = "instance-export"
13042 HTYPE = constants.HTYPE_INSTANCE
13045 def CheckArguments(self):
13046 """Check the arguments.
13049 self.x509_key_name = self.op.x509_key_name
13050 self.dest_x509_ca_pem = self.op.destination_x509_ca
13052 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13053 if not self.x509_key_name:
13054 raise errors.OpPrereqError("Missing X509 key name for encryption",
13055 errors.ECODE_INVAL)
13057 if not self.dest_x509_ca_pem:
13058 raise errors.OpPrereqError("Missing destination X509 CA",
13059 errors.ECODE_INVAL)
13061 def ExpandNames(self):
13062 self._ExpandAndLockInstance()
13064 # Lock all nodes for local exports
13065 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13066 # FIXME: lock only instance primary and destination node
13068 # Sad but true, for now we have do lock all nodes, as we don't know where
13069 # the previous export might be, and in this LU we search for it and
13070 # remove it from its current node. In the future we could fix this by:
13071 # - making a tasklet to search (share-lock all), then create the
13072 # new one, then one to remove, after
13073 # - removing the removal operation altogether
13074 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13076 def DeclareLocks(self, level):
13077 """Last minute lock declaration."""
13078 # All nodes are locked anyway, so nothing to do here.
13080 def BuildHooksEnv(self):
13081 """Build hooks env.
13083 This will run on the master, primary node and target node.
13087 "EXPORT_MODE": self.op.mode,
13088 "EXPORT_NODE": self.op.target_node,
13089 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13090 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13091 # TODO: Generic function for boolean env variables
13092 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13095 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13099 def BuildHooksNodes(self):
13100 """Build hooks nodes.
13103 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13105 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13106 nl.append(self.op.target_node)
13110 def CheckPrereq(self):
13111 """Check prerequisites.
13113 This checks that the instance and node names are valid.
13116 instance_name = self.op.instance_name
13118 self.instance = self.cfg.GetInstanceInfo(instance_name)
13119 assert self.instance is not None, \
13120 "Cannot retrieve locked instance %s" % self.op.instance_name
13121 _CheckNodeOnline(self, self.instance.primary_node)
13123 if (self.op.remove_instance and
13124 self.instance.admin_state == constants.ADMINST_UP and
13125 not self.op.shutdown):
13126 raise errors.OpPrereqError("Can not remove instance without shutting it"
13129 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13130 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13131 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13132 assert self.dst_node is not None
13134 _CheckNodeOnline(self, self.dst_node.name)
13135 _CheckNodeNotDrained(self, self.dst_node.name)
13138 self.dest_disk_info = None
13139 self.dest_x509_ca = None
13141 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13142 self.dst_node = None
13144 if len(self.op.target_node) != len(self.instance.disks):
13145 raise errors.OpPrereqError(("Received destination information for %s"
13146 " disks, but instance %s has %s disks") %
13147 (len(self.op.target_node), instance_name,
13148 len(self.instance.disks)),
13149 errors.ECODE_INVAL)
13151 cds = _GetClusterDomainSecret()
13153 # Check X509 key name
13155 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13156 except (TypeError, ValueError), err:
13157 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13159 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13160 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13161 errors.ECODE_INVAL)
13163 # Load and verify CA
13165 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13166 except OpenSSL.crypto.Error, err:
13167 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13168 (err, ), errors.ECODE_INVAL)
13170 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13171 if errcode is not None:
13172 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13173 (msg, ), errors.ECODE_INVAL)
13175 self.dest_x509_ca = cert
13177 # Verify target information
13179 for idx, disk_data in enumerate(self.op.target_node):
13181 (host, port, magic) = \
13182 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13183 except errors.GenericError, err:
13184 raise errors.OpPrereqError("Target info for disk %s: %s" %
13185 (idx, err), errors.ECODE_INVAL)
13187 disk_info.append((host, port, magic))
13189 assert len(disk_info) == len(self.op.target_node)
13190 self.dest_disk_info = disk_info
13193 raise errors.ProgrammerError("Unhandled export mode %r" %
13196 # instance disk type verification
13197 # TODO: Implement export support for file-based disks
13198 for disk in self.instance.disks:
13199 if disk.dev_type == constants.LD_FILE:
13200 raise errors.OpPrereqError("Export not supported for instances with"
13201 " file-based disks", errors.ECODE_INVAL)
13203 def _CleanupExports(self, feedback_fn):
13204 """Removes exports of current instance from all other nodes.
13206 If an instance in a cluster with nodes A..D was exported to node C, its
13207 exports will be removed from the nodes A, B and D.
13210 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13212 nodelist = self.cfg.GetNodeList()
13213 nodelist.remove(self.dst_node.name)
13215 # on one-node clusters nodelist will be empty after the removal
13216 # if we proceed the backup would be removed because OpBackupQuery
13217 # substitutes an empty list with the full cluster node list.
13218 iname = self.instance.name
13220 feedback_fn("Removing old exports for instance %s" % iname)
13221 exportlist = self.rpc.call_export_list(nodelist)
13222 for node in exportlist:
13223 if exportlist[node].fail_msg:
13225 if iname in exportlist[node].payload:
13226 msg = self.rpc.call_export_remove(node, iname).fail_msg
13228 self.LogWarning("Could not remove older export for instance %s"
13229 " on node %s: %s", iname, node, msg)
13231 def Exec(self, feedback_fn):
13232 """Export an instance to an image in the cluster.
13235 assert self.op.mode in constants.EXPORT_MODES
13237 instance = self.instance
13238 src_node = instance.primary_node
13240 if self.op.shutdown:
13241 # shutdown the instance, but not the disks
13242 feedback_fn("Shutting down instance %s" % instance.name)
13243 result = self.rpc.call_instance_shutdown(src_node, instance,
13244 self.op.shutdown_timeout)
13245 # TODO: Maybe ignore failures if ignore_remove_failures is set
13246 result.Raise("Could not shutdown instance %s on"
13247 " node %s" % (instance.name, src_node))
13249 # set the disks ID correctly since call_instance_start needs the
13250 # correct drbd minor to create the symlinks
13251 for disk in instance.disks:
13252 self.cfg.SetDiskID(disk, src_node)
13254 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13257 # Activate the instance disks if we'exporting a stopped instance
13258 feedback_fn("Activating disks for %s" % instance.name)
13259 _StartInstanceDisks(self, instance, None)
13262 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13265 helper.CreateSnapshots()
13267 if (self.op.shutdown and
13268 instance.admin_state == constants.ADMINST_UP and
13269 not self.op.remove_instance):
13270 assert not activate_disks
13271 feedback_fn("Starting instance %s" % instance.name)
13272 result = self.rpc.call_instance_start(src_node,
13273 (instance, None, None), False)
13274 msg = result.fail_msg
13276 feedback_fn("Failed to start instance: %s" % msg)
13277 _ShutdownInstanceDisks(self, instance)
13278 raise errors.OpExecError("Could not start instance: %s" % msg)
13280 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13281 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13282 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13283 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13284 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13286 (key_name, _, _) = self.x509_key_name
13289 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13292 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13293 key_name, dest_ca_pem,
13298 # Check for backwards compatibility
13299 assert len(dresults) == len(instance.disks)
13300 assert compat.all(isinstance(i, bool) for i in dresults), \
13301 "Not all results are boolean: %r" % dresults
13305 feedback_fn("Deactivating disks for %s" % instance.name)
13306 _ShutdownInstanceDisks(self, instance)
13308 if not (compat.all(dresults) and fin_resu):
13311 failures.append("export finalization")
13312 if not compat.all(dresults):
13313 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13315 failures.append("disk export: disk(s) %s" % fdsk)
13317 raise errors.OpExecError("Export failed, errors in %s" %
13318 utils.CommaJoin(failures))
13320 # At this point, the export was successful, we can cleanup/finish
13322 # Remove instance if requested
13323 if self.op.remove_instance:
13324 feedback_fn("Removing instance %s" % instance.name)
13325 _RemoveInstance(self, feedback_fn, instance,
13326 self.op.ignore_remove_failures)
13328 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13329 self._CleanupExports(feedback_fn)
13331 return fin_resu, dresults
13334 class LUBackupRemove(NoHooksLU):
13335 """Remove exports related to the named instance.
13340 def ExpandNames(self):
13341 self.needed_locks = {}
13342 # We need all nodes to be locked in order for RemoveExport to work, but we
13343 # don't need to lock the instance itself, as nothing will happen to it (and
13344 # we can remove exports also for a removed instance)
13345 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13347 def Exec(self, feedback_fn):
13348 """Remove any export.
13351 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13352 # If the instance was not found we'll try with the name that was passed in.
13353 # This will only work if it was an FQDN, though.
13355 if not instance_name:
13357 instance_name = self.op.instance_name
13359 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13360 exportlist = self.rpc.call_export_list(locked_nodes)
13362 for node in exportlist:
13363 msg = exportlist[node].fail_msg
13365 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13367 if instance_name in exportlist[node].payload:
13369 result = self.rpc.call_export_remove(node, instance_name)
13370 msg = result.fail_msg
13372 logging.error("Could not remove export for instance %s"
13373 " on node %s: %s", instance_name, node, msg)
13375 if fqdn_warn and not found:
13376 feedback_fn("Export not found. If trying to remove an export belonging"
13377 " to a deleted instance please use its Fully Qualified"
13381 class LUGroupAdd(LogicalUnit):
13382 """Logical unit for creating node groups.
13385 HPATH = "group-add"
13386 HTYPE = constants.HTYPE_GROUP
13389 def ExpandNames(self):
13390 # We need the new group's UUID here so that we can create and acquire the
13391 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13392 # that it should not check whether the UUID exists in the configuration.
13393 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13394 self.needed_locks = {}
13395 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13397 def CheckPrereq(self):
13398 """Check prerequisites.
13400 This checks that the given group name is not an existing node group
13405 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13406 except errors.OpPrereqError:
13409 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13410 " node group (UUID: %s)" %
13411 (self.op.group_name, existing_uuid),
13412 errors.ECODE_EXISTS)
13414 if self.op.ndparams:
13415 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13417 if self.op.hv_state:
13418 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13420 self.new_hv_state = None
13422 if self.op.disk_state:
13423 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13425 self.new_disk_state = None
13427 if self.op.diskparams:
13428 for templ in constants.DISK_TEMPLATES:
13429 if templ not in self.op.diskparams:
13430 self.op.diskparams[templ] = {}
13431 utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
13433 self.op.diskparams = self.cfg.GetClusterInfo().diskparams
13435 if self.op.ipolicy:
13436 cluster = self.cfg.GetClusterInfo()
13437 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13439 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
13440 except errors.ConfigurationError, err:
13441 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13442 errors.ECODE_INVAL)
13444 def BuildHooksEnv(self):
13445 """Build hooks env.
13449 "GROUP_NAME": self.op.group_name,
13452 def BuildHooksNodes(self):
13453 """Build hooks nodes.
13456 mn = self.cfg.GetMasterNode()
13457 return ([mn], [mn])
13459 def Exec(self, feedback_fn):
13460 """Add the node group to the cluster.
13463 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13464 uuid=self.group_uuid,
13465 alloc_policy=self.op.alloc_policy,
13466 ndparams=self.op.ndparams,
13467 diskparams=self.op.diskparams,
13468 ipolicy=self.op.ipolicy,
13469 hv_state_static=self.new_hv_state,
13470 disk_state_static=self.new_disk_state)
13472 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13473 del self.remove_locks[locking.LEVEL_NODEGROUP]
13476 class LUGroupAssignNodes(NoHooksLU):
13477 """Logical unit for assigning nodes to groups.
13482 def ExpandNames(self):
13483 # These raise errors.OpPrereqError on their own:
13484 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13485 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13487 # We want to lock all the affected nodes and groups. We have readily
13488 # available the list of nodes, and the *destination* group. To gather the
13489 # list of "source" groups, we need to fetch node information later on.
13490 self.needed_locks = {
13491 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13492 locking.LEVEL_NODE: self.op.nodes,
13495 def DeclareLocks(self, level):
13496 if level == locking.LEVEL_NODEGROUP:
13497 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13499 # Try to get all affected nodes' groups without having the group or node
13500 # lock yet. Needs verification later in the code flow.
13501 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13503 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13505 def CheckPrereq(self):
13506 """Check prerequisites.
13509 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13510 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13511 frozenset(self.op.nodes))
13513 expected_locks = (set([self.group_uuid]) |
13514 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13515 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13516 if actual_locks != expected_locks:
13517 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13518 " current groups are '%s', used to be '%s'" %
13519 (utils.CommaJoin(expected_locks),
13520 utils.CommaJoin(actual_locks)))
13522 self.node_data = self.cfg.GetAllNodesInfo()
13523 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13524 instance_data = self.cfg.GetAllInstancesInfo()
13526 if self.group is None:
13527 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13528 (self.op.group_name, self.group_uuid))
13530 (new_splits, previous_splits) = \
13531 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13532 for node in self.op.nodes],
13533 self.node_data, instance_data)
13536 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13538 if not self.op.force:
13539 raise errors.OpExecError("The following instances get split by this"
13540 " change and --force was not given: %s" %
13543 self.LogWarning("This operation will split the following instances: %s",
13546 if previous_splits:
13547 self.LogWarning("In addition, these already-split instances continue"
13548 " to be split across groups: %s",
13549 utils.CommaJoin(utils.NiceSort(previous_splits)))
13551 def Exec(self, feedback_fn):
13552 """Assign nodes to a new group.
13555 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13557 self.cfg.AssignGroupNodes(mods)
13560 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13561 """Check for split instances after a node assignment.
13563 This method considers a series of node assignments as an atomic operation,
13564 and returns information about split instances after applying the set of
13567 In particular, it returns information about newly split instances, and
13568 instances that were already split, and remain so after the change.
13570 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13573 @type changes: list of (node_name, new_group_uuid) pairs.
13574 @param changes: list of node assignments to consider.
13575 @param node_data: a dict with data for all nodes
13576 @param instance_data: a dict with all instances to consider
13577 @rtype: a two-tuple
13578 @return: a list of instances that were previously okay and result split as a
13579 consequence of this change, and a list of instances that were previously
13580 split and this change does not fix.
13583 changed_nodes = dict((node, group) for node, group in changes
13584 if node_data[node].group != group)
13586 all_split_instances = set()
13587 previously_split_instances = set()
13589 def InstanceNodes(instance):
13590 return [instance.primary_node] + list(instance.secondary_nodes)
13592 for inst in instance_data.values():
13593 if inst.disk_template not in constants.DTS_INT_MIRROR:
13596 instance_nodes = InstanceNodes(inst)
13598 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13599 previously_split_instances.add(inst.name)
13601 if len(set(changed_nodes.get(node, node_data[node].group)
13602 for node in instance_nodes)) > 1:
13603 all_split_instances.add(inst.name)
13605 return (list(all_split_instances - previously_split_instances),
13606 list(previously_split_instances & all_split_instances))
13609 class _GroupQuery(_QueryBase):
13610 FIELDS = query.GROUP_FIELDS
13612 def ExpandNames(self, lu):
13613 lu.needed_locks = {}
13615 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13616 self._cluster = lu.cfg.GetClusterInfo()
13617 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13620 self.wanted = [name_to_uuid[name]
13621 for name in utils.NiceSort(name_to_uuid.keys())]
13623 # Accept names to be either names or UUIDs.
13626 all_uuid = frozenset(self._all_groups.keys())
13628 for name in self.names:
13629 if name in all_uuid:
13630 self.wanted.append(name)
13631 elif name in name_to_uuid:
13632 self.wanted.append(name_to_uuid[name])
13634 missing.append(name)
13637 raise errors.OpPrereqError("Some groups do not exist: %s" %
13638 utils.CommaJoin(missing),
13639 errors.ECODE_NOENT)
13641 def DeclareLocks(self, lu, level):
13644 def _GetQueryData(self, lu):
13645 """Computes the list of node groups and their attributes.
13648 do_nodes = query.GQ_NODE in self.requested_data
13649 do_instances = query.GQ_INST in self.requested_data
13651 group_to_nodes = None
13652 group_to_instances = None
13654 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13655 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13656 # latter GetAllInstancesInfo() is not enough, for we have to go through
13657 # instance->node. Hence, we will need to process nodes even if we only need
13658 # instance information.
13659 if do_nodes or do_instances:
13660 all_nodes = lu.cfg.GetAllNodesInfo()
13661 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13664 for node in all_nodes.values():
13665 if node.group in group_to_nodes:
13666 group_to_nodes[node.group].append(node.name)
13667 node_to_group[node.name] = node.group
13670 all_instances = lu.cfg.GetAllInstancesInfo()
13671 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13673 for instance in all_instances.values():
13674 node = instance.primary_node
13675 if node in node_to_group:
13676 group_to_instances[node_to_group[node]].append(instance.name)
13679 # Do not pass on node information if it was not requested.
13680 group_to_nodes = None
13682 return query.GroupQueryData(self._cluster,
13683 [self._all_groups[uuid]
13684 for uuid in self.wanted],
13685 group_to_nodes, group_to_instances)
13688 class LUGroupQuery(NoHooksLU):
13689 """Logical unit for querying node groups.
13694 def CheckArguments(self):
13695 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13696 self.op.output_fields, False)
13698 def ExpandNames(self):
13699 self.gq.ExpandNames(self)
13701 def DeclareLocks(self, level):
13702 self.gq.DeclareLocks(self, level)
13704 def Exec(self, feedback_fn):
13705 return self.gq.OldStyleQuery(self)
13708 class LUGroupSetParams(LogicalUnit):
13709 """Modifies the parameters of a node group.
13712 HPATH = "group-modify"
13713 HTYPE = constants.HTYPE_GROUP
13716 def CheckArguments(self):
13719 self.op.diskparams,
13720 self.op.alloc_policy,
13722 self.op.disk_state,
13726 if all_changes.count(None) == len(all_changes):
13727 raise errors.OpPrereqError("Please pass at least one modification",
13728 errors.ECODE_INVAL)
13730 def ExpandNames(self):
13731 # This raises errors.OpPrereqError on its own:
13732 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13734 self.needed_locks = {
13735 locking.LEVEL_INSTANCE: [],
13736 locking.LEVEL_NODEGROUP: [self.group_uuid],
13739 self.share_locks[locking.LEVEL_INSTANCE] = 1
13741 def DeclareLocks(self, level):
13742 if level == locking.LEVEL_INSTANCE:
13743 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13745 # Lock instances optimistically, needs verification once group lock has
13747 self.needed_locks[locking.LEVEL_INSTANCE] = \
13748 self.cfg.GetNodeGroupInstances(self.group_uuid)
13750 def CheckPrereq(self):
13751 """Check prerequisites.
13754 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13756 # Check if locked instances are still correct
13757 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13759 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13760 cluster = self.cfg.GetClusterInfo()
13762 if self.group is None:
13763 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13764 (self.op.group_name, self.group_uuid))
13766 if self.op.ndparams:
13767 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13768 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13769 self.new_ndparams = new_ndparams
13771 if self.op.diskparams:
13772 self.new_diskparams = dict()
13773 for templ in constants.DISK_TEMPLATES:
13774 if templ not in self.op.diskparams:
13775 self.op.diskparams[templ] = {}
13776 new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13777 self.op.diskparams[templ])
13778 utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13779 self.new_diskparams[templ] = new_templ_params
13781 if self.op.hv_state:
13782 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13783 self.group.hv_state_static)
13785 if self.op.disk_state:
13786 self.new_disk_state = \
13787 _MergeAndVerifyDiskState(self.op.disk_state,
13788 self.group.disk_state_static)
13790 if self.op.ipolicy:
13791 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
13795 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
13796 inst_filter = lambda inst: inst.name in owned_instances
13797 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
13799 _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
13801 new_ipolicy, instances)
13804 self.LogWarning("After the ipolicy change the following instances"
13805 " violate them: %s",
13806 utils.CommaJoin(violations))
13808 def BuildHooksEnv(self):
13809 """Build hooks env.
13813 "GROUP_NAME": self.op.group_name,
13814 "NEW_ALLOC_POLICY": self.op.alloc_policy,
13817 def BuildHooksNodes(self):
13818 """Build hooks nodes.
13821 mn = self.cfg.GetMasterNode()
13822 return ([mn], [mn])
13824 def Exec(self, feedback_fn):
13825 """Modifies the node group.
13830 if self.op.ndparams:
13831 self.group.ndparams = self.new_ndparams
13832 result.append(("ndparams", str(self.group.ndparams)))
13834 if self.op.diskparams:
13835 self.group.diskparams = self.new_diskparams
13836 result.append(("diskparams", str(self.group.diskparams)))
13838 if self.op.alloc_policy:
13839 self.group.alloc_policy = self.op.alloc_policy
13841 if self.op.hv_state:
13842 self.group.hv_state_static = self.new_hv_state
13844 if self.op.disk_state:
13845 self.group.disk_state_static = self.new_disk_state
13847 if self.op.ipolicy:
13848 self.group.ipolicy = self.new_ipolicy
13850 self.cfg.Update(self.group, feedback_fn)
13854 class LUGroupRemove(LogicalUnit):
13855 HPATH = "group-remove"
13856 HTYPE = constants.HTYPE_GROUP
13859 def ExpandNames(self):
13860 # This will raises errors.OpPrereqError on its own:
13861 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13862 self.needed_locks = {
13863 locking.LEVEL_NODEGROUP: [self.group_uuid],
13866 def CheckPrereq(self):
13867 """Check prerequisites.
13869 This checks that the given group name exists as a node group, that is
13870 empty (i.e., contains no nodes), and that is not the last group of the
13874 # Verify that the group is empty.
13875 group_nodes = [node.name
13876 for node in self.cfg.GetAllNodesInfo().values()
13877 if node.group == self.group_uuid]
13880 raise errors.OpPrereqError("Group '%s' not empty, has the following"
13882 (self.op.group_name,
13883 utils.CommaJoin(utils.NiceSort(group_nodes))),
13884 errors.ECODE_STATE)
13886 # Verify the cluster would not be left group-less.
13887 if len(self.cfg.GetNodeGroupList()) == 1:
13888 raise errors.OpPrereqError("Group '%s' is the only group,"
13889 " cannot be removed" %
13890 self.op.group_name,
13891 errors.ECODE_STATE)
13893 def BuildHooksEnv(self):
13894 """Build hooks env.
13898 "GROUP_NAME": self.op.group_name,
13901 def BuildHooksNodes(self):
13902 """Build hooks nodes.
13905 mn = self.cfg.GetMasterNode()
13906 return ([mn], [mn])
13908 def Exec(self, feedback_fn):
13909 """Remove the node group.
13913 self.cfg.RemoveNodeGroup(self.group_uuid)
13914 except errors.ConfigurationError:
13915 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13916 (self.op.group_name, self.group_uuid))
13918 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13921 class LUGroupRename(LogicalUnit):
13922 HPATH = "group-rename"
13923 HTYPE = constants.HTYPE_GROUP
13926 def ExpandNames(self):
13927 # This raises errors.OpPrereqError on its own:
13928 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13930 self.needed_locks = {
13931 locking.LEVEL_NODEGROUP: [self.group_uuid],
13934 def CheckPrereq(self):
13935 """Check prerequisites.
13937 Ensures requested new name is not yet used.
13941 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13942 except errors.OpPrereqError:
13945 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13946 " node group (UUID: %s)" %
13947 (self.op.new_name, new_name_uuid),
13948 errors.ECODE_EXISTS)
13950 def BuildHooksEnv(self):
13951 """Build hooks env.
13955 "OLD_NAME": self.op.group_name,
13956 "NEW_NAME": self.op.new_name,
13959 def BuildHooksNodes(self):
13960 """Build hooks nodes.
13963 mn = self.cfg.GetMasterNode()
13965 all_nodes = self.cfg.GetAllNodesInfo()
13966 all_nodes.pop(mn, None)
13969 run_nodes.extend(node.name for node in all_nodes.values()
13970 if node.group == self.group_uuid)
13972 return (run_nodes, run_nodes)
13974 def Exec(self, feedback_fn):
13975 """Rename the node group.
13978 group = self.cfg.GetNodeGroup(self.group_uuid)
13981 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13982 (self.op.group_name, self.group_uuid))
13984 group.name = self.op.new_name
13985 self.cfg.Update(group, feedback_fn)
13987 return self.op.new_name
13990 class LUGroupEvacuate(LogicalUnit):
13991 HPATH = "group-evacuate"
13992 HTYPE = constants.HTYPE_GROUP
13995 def ExpandNames(self):
13996 # This raises errors.OpPrereqError on its own:
13997 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13999 if self.op.target_groups:
14000 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14001 self.op.target_groups)
14003 self.req_target_uuids = []
14005 if self.group_uuid in self.req_target_uuids:
14006 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14007 " as a target group (targets are %s)" %
14009 utils.CommaJoin(self.req_target_uuids)),
14010 errors.ECODE_INVAL)
14012 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14014 self.share_locks = _ShareAll()
14015 self.needed_locks = {
14016 locking.LEVEL_INSTANCE: [],
14017 locking.LEVEL_NODEGROUP: [],
14018 locking.LEVEL_NODE: [],
14021 def DeclareLocks(self, level):
14022 if level == locking.LEVEL_INSTANCE:
14023 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14025 # Lock instances optimistically, needs verification once node and group
14026 # locks have been acquired
14027 self.needed_locks[locking.LEVEL_INSTANCE] = \
14028 self.cfg.GetNodeGroupInstances(self.group_uuid)
14030 elif level == locking.LEVEL_NODEGROUP:
14031 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14033 if self.req_target_uuids:
14034 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14036 # Lock all groups used by instances optimistically; this requires going
14037 # via the node before it's locked, requiring verification later on
14038 lock_groups.update(group_uuid
14039 for instance_name in
14040 self.owned_locks(locking.LEVEL_INSTANCE)
14042 self.cfg.GetInstanceNodeGroups(instance_name))
14044 # No target groups, need to lock all of them
14045 lock_groups = locking.ALL_SET
14047 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14049 elif level == locking.LEVEL_NODE:
14050 # This will only lock the nodes in the group to be evacuated which
14051 # contain actual instances
14052 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14053 self._LockInstancesNodes()
14055 # Lock all nodes in group to be evacuated and target groups
14056 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14057 assert self.group_uuid in owned_groups
14058 member_nodes = [node_name
14059 for group in owned_groups
14060 for node_name in self.cfg.GetNodeGroup(group).members]
14061 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14063 def CheckPrereq(self):
14064 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14065 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14066 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14068 assert owned_groups.issuperset(self.req_target_uuids)
14069 assert self.group_uuid in owned_groups
14071 # Check if locked instances are still correct
14072 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14074 # Get instance information
14075 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14077 # Check if node groups for locked instances are still correct
14078 for instance_name in owned_instances:
14079 inst = self.instances[instance_name]
14080 assert owned_nodes.issuperset(inst.all_nodes), \
14081 "Instance %s's nodes changed while we kept the lock" % instance_name
14083 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
14086 assert self.group_uuid in inst_groups, \
14087 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
14089 if self.req_target_uuids:
14090 # User requested specific target groups
14091 self.target_uuids = self.req_target_uuids
14093 # All groups except the one to be evacuated are potential targets
14094 self.target_uuids = [group_uuid for group_uuid in owned_groups
14095 if group_uuid != self.group_uuid]
14097 if not self.target_uuids:
14098 raise errors.OpPrereqError("There are no possible target groups",
14099 errors.ECODE_INVAL)
14101 def BuildHooksEnv(self):
14102 """Build hooks env.
14106 "GROUP_NAME": self.op.group_name,
14107 "TARGET_GROUPS": " ".join(self.target_uuids),
14110 def BuildHooksNodes(self):
14111 """Build hooks nodes.
14114 mn = self.cfg.GetMasterNode()
14116 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14118 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14120 return (run_nodes, run_nodes)
14122 def Exec(self, feedback_fn):
14123 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14125 assert self.group_uuid not in self.target_uuids
14127 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14128 instances=instances, target_groups=self.target_uuids)
14130 ial.Run(self.op.iallocator)
14132 if not ial.success:
14133 raise errors.OpPrereqError("Can't compute group evacuation using"
14134 " iallocator '%s': %s" %
14135 (self.op.iallocator, ial.info),
14136 errors.ECODE_NORES)
14138 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14140 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14141 len(jobs), self.op.group_name)
14143 return ResultWithJobs(jobs)
14146 class TagsLU(NoHooksLU): # pylint: disable=W0223
14147 """Generic tags LU.
14149 This is an abstract class which is the parent of all the other tags LUs.
14152 def ExpandNames(self):
14153 self.group_uuid = None
14154 self.needed_locks = {}
14155 if self.op.kind == constants.TAG_NODE:
14156 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14157 self.needed_locks[locking.LEVEL_NODE] = self.op.name
14158 elif self.op.kind == constants.TAG_INSTANCE:
14159 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14160 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
14161 elif self.op.kind == constants.TAG_NODEGROUP:
14162 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14164 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14165 # not possible to acquire the BGL based on opcode parameters)
14167 def CheckPrereq(self):
14168 """Check prerequisites.
14171 if self.op.kind == constants.TAG_CLUSTER:
14172 self.target = self.cfg.GetClusterInfo()
14173 elif self.op.kind == constants.TAG_NODE:
14174 self.target = self.cfg.GetNodeInfo(self.op.name)
14175 elif self.op.kind == constants.TAG_INSTANCE:
14176 self.target = self.cfg.GetInstanceInfo(self.op.name)
14177 elif self.op.kind == constants.TAG_NODEGROUP:
14178 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14180 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14181 str(self.op.kind), errors.ECODE_INVAL)
14184 class LUTagsGet(TagsLU):
14185 """Returns the tags of a given object.
14190 def ExpandNames(self):
14191 TagsLU.ExpandNames(self)
14193 # Share locks as this is only a read operation
14194 self.share_locks = _ShareAll()
14196 def Exec(self, feedback_fn):
14197 """Returns the tag list.
14200 return list(self.target.GetTags())
14203 class LUTagsSearch(NoHooksLU):
14204 """Searches the tags for a given pattern.
14209 def ExpandNames(self):
14210 self.needed_locks = {}
14212 def CheckPrereq(self):
14213 """Check prerequisites.
14215 This checks the pattern passed for validity by compiling it.
14219 self.re = re.compile(self.op.pattern)
14220 except re.error, err:
14221 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14222 (self.op.pattern, err), errors.ECODE_INVAL)
14224 def Exec(self, feedback_fn):
14225 """Returns the tag list.
14229 tgts = [("/cluster", cfg.GetClusterInfo())]
14230 ilist = cfg.GetAllInstancesInfo().values()
14231 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14232 nlist = cfg.GetAllNodesInfo().values()
14233 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14234 tgts.extend(("/nodegroup/%s" % n.name, n)
14235 for n in cfg.GetAllNodeGroupsInfo().values())
14237 for path, target in tgts:
14238 for tag in target.GetTags():
14239 if self.re.search(tag):
14240 results.append((path, tag))
14244 class LUTagsSet(TagsLU):
14245 """Sets a tag on a given object.
14250 def CheckPrereq(self):
14251 """Check prerequisites.
14253 This checks the type and length of the tag name and value.
14256 TagsLU.CheckPrereq(self)
14257 for tag in self.op.tags:
14258 objects.TaggableObject.ValidateTag(tag)
14260 def Exec(self, feedback_fn):
14265 for tag in self.op.tags:
14266 self.target.AddTag(tag)
14267 except errors.TagError, err:
14268 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14269 self.cfg.Update(self.target, feedback_fn)
14272 class LUTagsDel(TagsLU):
14273 """Delete a list of tags from a given object.
14278 def CheckPrereq(self):
14279 """Check prerequisites.
14281 This checks that we have the given tag.
14284 TagsLU.CheckPrereq(self)
14285 for tag in self.op.tags:
14286 objects.TaggableObject.ValidateTag(tag)
14287 del_tags = frozenset(self.op.tags)
14288 cur_tags = self.target.GetTags()
14290 diff_tags = del_tags - cur_tags
14292 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14293 raise errors.OpPrereqError("Tag(s) %s not found" %
14294 (utils.CommaJoin(diff_names), ),
14295 errors.ECODE_NOENT)
14297 def Exec(self, feedback_fn):
14298 """Remove the tag from the object.
14301 for tag in self.op.tags:
14302 self.target.RemoveTag(tag)
14303 self.cfg.Update(self.target, feedback_fn)
14306 class LUTestDelay(NoHooksLU):
14307 """Sleep for a specified amount of time.
14309 This LU sleeps on the master and/or nodes for a specified amount of
14315 def ExpandNames(self):
14316 """Expand names and set required locks.
14318 This expands the node list, if any.
14321 self.needed_locks = {}
14322 if self.op.on_nodes:
14323 # _GetWantedNodes can be used here, but is not always appropriate to use
14324 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14325 # more information.
14326 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14327 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14329 def _TestDelay(self):
14330 """Do the actual sleep.
14333 if self.op.on_master:
14334 if not utils.TestDelay(self.op.duration):
14335 raise errors.OpExecError("Error during master delay test")
14336 if self.op.on_nodes:
14337 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14338 for node, node_result in result.items():
14339 node_result.Raise("Failure during rpc call to node %s" % node)
14341 def Exec(self, feedback_fn):
14342 """Execute the test delay opcode, with the wanted repetitions.
14345 if self.op.repeat == 0:
14348 top_value = self.op.repeat - 1
14349 for i in range(self.op.repeat):
14350 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14354 class LUTestJqueue(NoHooksLU):
14355 """Utility LU to test some aspects of the job queue.
14360 # Must be lower than default timeout for WaitForJobChange to see whether it
14361 # notices changed jobs
14362 _CLIENT_CONNECT_TIMEOUT = 20.0
14363 _CLIENT_CONFIRM_TIMEOUT = 60.0
14366 def _NotifyUsingSocket(cls, cb, errcls):
14367 """Opens a Unix socket and waits for another program to connect.
14370 @param cb: Callback to send socket name to client
14371 @type errcls: class
14372 @param errcls: Exception class to use for errors
14375 # Using a temporary directory as there's no easy way to create temporary
14376 # sockets without writing a custom loop around tempfile.mktemp and
14378 tmpdir = tempfile.mkdtemp()
14380 tmpsock = utils.PathJoin(tmpdir, "sock")
14382 logging.debug("Creating temporary socket at %s", tmpsock)
14383 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14388 # Send details to client
14391 # Wait for client to connect before continuing
14392 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14394 (conn, _) = sock.accept()
14395 except socket.error, err:
14396 raise errcls("Client didn't connect in time (%s)" % err)
14400 # Remove as soon as client is connected
14401 shutil.rmtree(tmpdir)
14403 # Wait for client to close
14406 # pylint: disable=E1101
14407 # Instance of '_socketobject' has no ... member
14408 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14410 except socket.error, err:
14411 raise errcls("Client failed to confirm notification (%s)" % err)
14415 def _SendNotification(self, test, arg, sockname):
14416 """Sends a notification to the client.
14419 @param test: Test name
14420 @param arg: Test argument (depends on test)
14421 @type sockname: string
14422 @param sockname: Socket path
14425 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14427 def _Notify(self, prereq, test, arg):
14428 """Notifies the client of a test.
14431 @param prereq: Whether this is a prereq-phase test
14433 @param test: Test name
14434 @param arg: Test argument (depends on test)
14438 errcls = errors.OpPrereqError
14440 errcls = errors.OpExecError
14442 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14446 def CheckArguments(self):
14447 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14448 self.expandnames_calls = 0
14450 def ExpandNames(self):
14451 checkargs_calls = getattr(self, "checkargs_calls", 0)
14452 if checkargs_calls < 1:
14453 raise errors.ProgrammerError("CheckArguments was not called")
14455 self.expandnames_calls += 1
14457 if self.op.notify_waitlock:
14458 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14460 self.LogInfo("Expanding names")
14462 # Get lock on master node (just to get a lock, not for a particular reason)
14463 self.needed_locks = {
14464 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14467 def Exec(self, feedback_fn):
14468 if self.expandnames_calls < 1:
14469 raise errors.ProgrammerError("ExpandNames was not called")
14471 if self.op.notify_exec:
14472 self._Notify(False, constants.JQT_EXEC, None)
14474 self.LogInfo("Executing")
14476 if self.op.log_messages:
14477 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14478 for idx, msg in enumerate(self.op.log_messages):
14479 self.LogInfo("Sending log message %s", idx + 1)
14480 feedback_fn(constants.JQT_MSGPREFIX + msg)
14481 # Report how many test messages have been sent
14482 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14485 raise errors.OpExecError("Opcode failure was requested")
14490 class IAllocator(object):
14491 """IAllocator framework.
14493 An IAllocator instance has three sets of attributes:
14494 - cfg that is needed to query the cluster
14495 - input data (all members of the _KEYS class attribute are required)
14496 - four buffer attributes (in|out_data|text), that represent the
14497 input (to the external script) in text and data structure format,
14498 and the output from it, again in two formats
14499 - the result variables from the script (success, info, nodes) for
14503 # pylint: disable=R0902
14504 # lots of instance attributes
14506 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14508 self.rpc = rpc_runner
14509 # init buffer variables
14510 self.in_text = self.out_text = self.in_data = self.out_data = None
14511 # init all input fields so that pylint is happy
14513 self.memory = self.disks = self.disk_template = None
14514 self.os = self.tags = self.nics = self.vcpus = None
14515 self.hypervisor = None
14516 self.relocate_from = None
14518 self.instances = None
14519 self.evac_mode = None
14520 self.target_groups = []
14522 self.required_nodes = None
14523 # init result fields
14524 self.success = self.info = self.result = None
14527 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14529 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14530 " IAllocator" % self.mode)
14532 keyset = [n for (n, _) in keydata]
14535 if key not in keyset:
14536 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14537 " IAllocator" % key)
14538 setattr(self, key, kwargs[key])
14541 if key not in kwargs:
14542 raise errors.ProgrammerError("Missing input parameter '%s' to"
14543 " IAllocator" % key)
14544 self._BuildInputData(compat.partial(fn, self), keydata)
14546 def _ComputeClusterData(self):
14547 """Compute the generic allocator input data.
14549 This is the data that is independent of the actual operation.
14553 cluster_info = cfg.GetClusterInfo()
14556 "version": constants.IALLOCATOR_VERSION,
14557 "cluster_name": cfg.GetClusterName(),
14558 "cluster_tags": list(cluster_info.GetTags()),
14559 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14560 "ipolicy": cluster_info.ipolicy,
14562 ninfo = cfg.GetAllNodesInfo()
14563 iinfo = cfg.GetAllInstancesInfo().values()
14564 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14567 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14569 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14570 hypervisor_name = self.hypervisor
14571 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14572 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14574 hypervisor_name = cluster_info.primary_hypervisor
14576 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14579 self.rpc.call_all_instances_info(node_list,
14580 cluster_info.enabled_hypervisors)
14582 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14584 config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14585 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14586 i_list, config_ndata)
14587 assert len(data["nodes"]) == len(ninfo), \
14588 "Incomplete node data computed"
14590 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14592 self.in_data = data
14595 def _ComputeNodeGroupData(cfg):
14596 """Compute node groups data.
14599 cluster = cfg.GetClusterInfo()
14600 ng = dict((guuid, {
14601 "name": gdata.name,
14602 "alloc_policy": gdata.alloc_policy,
14603 "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14605 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14610 def _ComputeBasicNodeData(cfg, node_cfg):
14611 """Compute global node data.
14614 @returns: a dict of name: (node dict, node config)
14617 # fill in static (config-based) values
14618 node_results = dict((ninfo.name, {
14619 "tags": list(ninfo.GetTags()),
14620 "primary_ip": ninfo.primary_ip,
14621 "secondary_ip": ninfo.secondary_ip,
14622 "offline": ninfo.offline,
14623 "drained": ninfo.drained,
14624 "master_candidate": ninfo.master_candidate,
14625 "group": ninfo.group,
14626 "master_capable": ninfo.master_capable,
14627 "vm_capable": ninfo.vm_capable,
14628 "ndparams": cfg.GetNdParams(ninfo),
14630 for ninfo in node_cfg.values())
14632 return node_results
14635 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14637 """Compute global node data.
14639 @param node_results: the basic node structures as filled from the config
14642 #TODO(dynmem): compute the right data on MAX and MIN memory
14643 # make a copy of the current dict
14644 node_results = dict(node_results)
14645 for nname, nresult in node_data.items():
14646 assert nname in node_results, "Missing basic data for node %s" % nname
14647 ninfo = node_cfg[nname]
14649 if not (ninfo.offline or ninfo.drained):
14650 nresult.Raise("Can't get data for node %s" % nname)
14651 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14653 remote_info = _MakeLegacyNodeInfo(nresult.payload)
14655 for attr in ["memory_total", "memory_free", "memory_dom0",
14656 "vg_size", "vg_free", "cpu_total"]:
14657 if attr not in remote_info:
14658 raise errors.OpExecError("Node '%s' didn't return attribute"
14659 " '%s'" % (nname, attr))
14660 if not isinstance(remote_info[attr], int):
14661 raise errors.OpExecError("Node '%s' returned invalid value"
14663 (nname, attr, remote_info[attr]))
14664 # compute memory used by primary instances
14665 i_p_mem = i_p_up_mem = 0
14666 for iinfo, beinfo in i_list:
14667 if iinfo.primary_node == nname:
14668 i_p_mem += beinfo[constants.BE_MAXMEM]
14669 if iinfo.name not in node_iinfo[nname].payload:
14672 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14673 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14674 remote_info["memory_free"] -= max(0, i_mem_diff)
14676 if iinfo.admin_state == constants.ADMINST_UP:
14677 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14679 # compute memory used by instances
14681 "total_memory": remote_info["memory_total"],
14682 "reserved_memory": remote_info["memory_dom0"],
14683 "free_memory": remote_info["memory_free"],
14684 "total_disk": remote_info["vg_size"],
14685 "free_disk": remote_info["vg_free"],
14686 "total_cpus": remote_info["cpu_total"],
14687 "i_pri_memory": i_p_mem,
14688 "i_pri_up_memory": i_p_up_mem,
14690 pnr_dyn.update(node_results[nname])
14691 node_results[nname] = pnr_dyn
14693 return node_results
14696 def _ComputeInstanceData(cluster_info, i_list):
14697 """Compute global instance data.
14701 for iinfo, beinfo in i_list:
14703 for nic in iinfo.nics:
14704 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14708 "mode": filled_params[constants.NIC_MODE],
14709 "link": filled_params[constants.NIC_LINK],
14711 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14712 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14713 nic_data.append(nic_dict)
14715 "tags": list(iinfo.GetTags()),
14716 "admin_state": iinfo.admin_state,
14717 "vcpus": beinfo[constants.BE_VCPUS],
14718 "memory": beinfo[constants.BE_MAXMEM],
14720 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14722 "disks": [{constants.IDISK_SIZE: dsk.size,
14723 constants.IDISK_MODE: dsk.mode}
14724 for dsk in iinfo.disks],
14725 "disk_template": iinfo.disk_template,
14726 "hypervisor": iinfo.hypervisor,
14728 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14730 instance_data[iinfo.name] = pir
14732 return instance_data
14734 def _AddNewInstance(self):
14735 """Add new instance data to allocator structure.
14737 This in combination with _AllocatorGetClusterData will create the
14738 correct structure needed as input for the allocator.
14740 The checks for the completeness of the opcode must have already been
14744 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14746 if self.disk_template in constants.DTS_INT_MIRROR:
14747 self.required_nodes = 2
14749 self.required_nodes = 1
14753 "disk_template": self.disk_template,
14756 "vcpus": self.vcpus,
14757 "memory": self.memory,
14758 "disks": self.disks,
14759 "disk_space_total": disk_space,
14761 "required_nodes": self.required_nodes,
14762 "hypervisor": self.hypervisor,
14767 def _AddRelocateInstance(self):
14768 """Add relocate instance data to allocator structure.
14770 This in combination with _IAllocatorGetClusterData will create the
14771 correct structure needed as input for the allocator.
14773 The checks for the completeness of the opcode must have already been
14777 instance = self.cfg.GetInstanceInfo(self.name)
14778 if instance is None:
14779 raise errors.ProgrammerError("Unknown instance '%s' passed to"
14780 " IAllocator" % self.name)
14782 if instance.disk_template not in constants.DTS_MIRRORED:
14783 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14784 errors.ECODE_INVAL)
14786 if instance.disk_template in constants.DTS_INT_MIRROR and \
14787 len(instance.secondary_nodes) != 1:
14788 raise errors.OpPrereqError("Instance has not exactly one secondary node",
14789 errors.ECODE_STATE)
14791 self.required_nodes = 1
14792 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14793 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14797 "disk_space_total": disk_space,
14798 "required_nodes": self.required_nodes,
14799 "relocate_from": self.relocate_from,
14803 def _AddNodeEvacuate(self):
14804 """Get data for node-evacuate requests.
14808 "instances": self.instances,
14809 "evac_mode": self.evac_mode,
14812 def _AddChangeGroup(self):
14813 """Get data for node-evacuate requests.
14817 "instances": self.instances,
14818 "target_groups": self.target_groups,
14821 def _BuildInputData(self, fn, keydata):
14822 """Build input data structures.
14825 self._ComputeClusterData()
14828 request["type"] = self.mode
14829 for keyname, keytype in keydata:
14830 if keyname not in request:
14831 raise errors.ProgrammerError("Request parameter %s is missing" %
14833 val = request[keyname]
14834 if not keytype(val):
14835 raise errors.ProgrammerError("Request parameter %s doesn't pass"
14836 " validation, value %s, expected"
14837 " type %s" % (keyname, val, keytype))
14838 self.in_data["request"] = request
14840 self.in_text = serializer.Dump(self.in_data)
14842 _STRING_LIST = ht.TListOf(ht.TString)
14843 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14844 # pylint: disable=E1101
14845 # Class '...' has no 'OP_ID' member
14846 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14847 opcodes.OpInstanceMigrate.OP_ID,
14848 opcodes.OpInstanceReplaceDisks.OP_ID])
14852 ht.TListOf(ht.TAnd(ht.TIsLength(3),
14853 ht.TItems([ht.TNonEmptyString,
14854 ht.TNonEmptyString,
14855 ht.TListOf(ht.TNonEmptyString),
14858 ht.TListOf(ht.TAnd(ht.TIsLength(2),
14859 ht.TItems([ht.TNonEmptyString,
14862 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14863 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14866 constants.IALLOCATOR_MODE_ALLOC:
14869 ("name", ht.TString),
14870 ("memory", ht.TInt),
14871 ("disks", ht.TListOf(ht.TDict)),
14872 ("disk_template", ht.TString),
14873 ("os", ht.TString),
14874 ("tags", _STRING_LIST),
14875 ("nics", ht.TListOf(ht.TDict)),
14876 ("vcpus", ht.TInt),
14877 ("hypervisor", ht.TString),
14879 constants.IALLOCATOR_MODE_RELOC:
14880 (_AddRelocateInstance,
14881 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14883 constants.IALLOCATOR_MODE_NODE_EVAC:
14884 (_AddNodeEvacuate, [
14885 ("instances", _STRING_LIST),
14886 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14888 constants.IALLOCATOR_MODE_CHG_GROUP:
14889 (_AddChangeGroup, [
14890 ("instances", _STRING_LIST),
14891 ("target_groups", _STRING_LIST),
14895 def Run(self, name, validate=True, call_fn=None):
14896 """Run an instance allocator and return the results.
14899 if call_fn is None:
14900 call_fn = self.rpc.call_iallocator_runner
14902 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14903 result.Raise("Failure while running the iallocator script")
14905 self.out_text = result.payload
14907 self._ValidateResult()
14909 def _ValidateResult(self):
14910 """Process the allocator results.
14912 This will process and if successful save the result in
14913 self.out_data and the other parameters.
14917 rdict = serializer.Load(self.out_text)
14918 except Exception, err:
14919 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14921 if not isinstance(rdict, dict):
14922 raise errors.OpExecError("Can't parse iallocator results: not a dict")
14924 # TODO: remove backwards compatiblity in later versions
14925 if "nodes" in rdict and "result" not in rdict:
14926 rdict["result"] = rdict["nodes"]
14929 for key in "success", "info", "result":
14930 if key not in rdict:
14931 raise errors.OpExecError("Can't parse iallocator results:"
14932 " missing key '%s'" % key)
14933 setattr(self, key, rdict[key])
14935 if not self._result_check(self.result):
14936 raise errors.OpExecError("Iallocator returned invalid result,"
14937 " expected %s, got %s" %
14938 (self._result_check, self.result),
14939 errors.ECODE_INVAL)
14941 if self.mode == constants.IALLOCATOR_MODE_RELOC:
14942 assert self.relocate_from is not None
14943 assert self.required_nodes == 1
14945 node2group = dict((name, ndata["group"])
14946 for (name, ndata) in self.in_data["nodes"].items())
14948 fn = compat.partial(self._NodesToGroups, node2group,
14949 self.in_data["nodegroups"])
14951 instance = self.cfg.GetInstanceInfo(self.name)
14952 request_groups = fn(self.relocate_from + [instance.primary_node])
14953 result_groups = fn(rdict["result"] + [instance.primary_node])
14955 if self.success and not set(result_groups).issubset(request_groups):
14956 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14957 " differ from original groups (%s)" %
14958 (utils.CommaJoin(result_groups),
14959 utils.CommaJoin(request_groups)))
14961 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14962 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14964 self.out_data = rdict
14967 def _NodesToGroups(node2group, groups, nodes):
14968 """Returns a list of unique group names for a list of nodes.
14970 @type node2group: dict
14971 @param node2group: Map from node name to group UUID
14973 @param groups: Group information
14975 @param nodes: Node names
14982 group_uuid = node2group[node]
14984 # Ignore unknown node
14988 group = groups[group_uuid]
14990 # Can't find group, let's use UUID
14991 group_name = group_uuid
14993 group_name = group["name"]
14995 result.add(group_name)
14997 return sorted(result)
15000 class LUTestAllocator(NoHooksLU):
15001 """Run allocator tests.
15003 This LU runs the allocator tests
15006 def CheckPrereq(self):
15007 """Check prerequisites.
15009 This checks the opcode parameters depending on the director and mode test.
15012 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15013 for attr in ["memory", "disks", "disk_template",
15014 "os", "tags", "nics", "vcpus"]:
15015 if not hasattr(self.op, attr):
15016 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15017 attr, errors.ECODE_INVAL)
15018 iname = self.cfg.ExpandInstanceName(self.op.name)
15019 if iname is not None:
15020 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15021 iname, errors.ECODE_EXISTS)
15022 if not isinstance(self.op.nics, list):
15023 raise errors.OpPrereqError("Invalid parameter 'nics'",
15024 errors.ECODE_INVAL)
15025 if not isinstance(self.op.disks, list):
15026 raise errors.OpPrereqError("Invalid parameter 'disks'",
15027 errors.ECODE_INVAL)
15028 for row in self.op.disks:
15029 if (not isinstance(row, dict) or
15030 constants.IDISK_SIZE not in row or
15031 not isinstance(row[constants.IDISK_SIZE], int) or
15032 constants.IDISK_MODE not in row or
15033 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15034 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15035 " parameter", errors.ECODE_INVAL)
15036 if self.op.hypervisor is None:
15037 self.op.hypervisor = self.cfg.GetHypervisorType()
15038 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15039 fname = _ExpandInstanceName(self.cfg, self.op.name)
15040 self.op.name = fname
15041 self.relocate_from = \
15042 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15043 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15044 constants.IALLOCATOR_MODE_NODE_EVAC):
15045 if not self.op.instances:
15046 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15047 self.op.instances = _GetWantedInstances(self, self.op.instances)
15049 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15050 self.op.mode, errors.ECODE_INVAL)
15052 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15053 if self.op.allocator is None:
15054 raise errors.OpPrereqError("Missing allocator name",
15055 errors.ECODE_INVAL)
15056 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15057 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15058 self.op.direction, errors.ECODE_INVAL)
15060 def Exec(self, feedback_fn):
15061 """Run the allocator test.
15064 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15065 ial = IAllocator(self.cfg, self.rpc,
15068 memory=self.op.memory,
15069 disks=self.op.disks,
15070 disk_template=self.op.disk_template,
15074 vcpus=self.op.vcpus,
15075 hypervisor=self.op.hypervisor,
15077 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15078 ial = IAllocator(self.cfg, self.rpc,
15081 relocate_from=list(self.relocate_from),
15083 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15084 ial = IAllocator(self.cfg, self.rpc,
15086 instances=self.op.instances,
15087 target_groups=self.op.target_groups)
15088 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15089 ial = IAllocator(self.cfg, self.rpc,
15091 instances=self.op.instances,
15092 evac_mode=self.op.evac_mode)
15094 raise errors.ProgrammerError("Uncatched mode %s in"
15095 " LUTestAllocator.Exec", self.op.mode)
15097 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15098 result = ial.in_text
15100 ial.Run(self.op.allocator, validate=False)
15101 result = ial.out_text
15105 #: Query type implementations
15107 constants.QR_INSTANCE: _InstanceQuery,
15108 constants.QR_NODE: _NodeQuery,
15109 constants.QR_GROUP: _GroupQuery,
15110 constants.QR_OS: _OsQuery,
15113 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15116 def _GetQueryImplementation(name):
15117 """Returns the implemtnation for a query type.
15119 @param name: Query type, must be one of L{constants.QR_VIA_OP}
15123 return _QUERY_IMPL[name]
15125 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15126 errors.ECODE_INVAL)