4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
62 from ganeti import rpc
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
141 # Dicts used to declare locking needs to mcpu
142 self.needed_locks = None
143 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.remove_locks = {}
146 # Used to force good behavior when calling helper functions
147 self.recalculate_locks = {}
149 self.Log = processor.Log # pylint: disable=C0103
150 self.LogWarning = processor.LogWarning # pylint: disable=C0103
151 self.LogInfo = processor.LogInfo # pylint: disable=C0103
152 self.LogStep = processor.LogStep # pylint: disable=C0103
153 # support for dry-run
154 self.dry_run_result = None
155 # support for generic debug attribute
156 if (not hasattr(self.op, "debug_level") or
157 not isinstance(self.op.debug_level, int)):
158 self.op.debug_level = 0
163 # Validate opcode parameters and set defaults
164 self.op.Validate(True)
166 self.CheckArguments()
168 def CheckArguments(self):
169 """Check syntactic validity for the opcode arguments.
171 This method is for doing a simple syntactic check and ensure
172 validity of opcode parameters, without any cluster-related
173 checks. While the same can be accomplished in ExpandNames and/or
174 CheckPrereq, doing these separate is better because:
176 - ExpandNames is left as as purely a lock-related function
177 - CheckPrereq is run after we have acquired locks (and possible
180 The function is allowed to change the self.op attribute so that
181 later methods can no longer worry about missing parameters.
186 def ExpandNames(self):
187 """Expand names for this LU.
189 This method is called before starting to execute the opcode, and it should
190 update all the parameters of the opcode to their canonical form (e.g. a
191 short node name must be fully expanded after this method has successfully
192 completed). This way locking, hooks, logging, etc. can work correctly.
194 LUs which implement this method must also populate the self.needed_locks
195 member, as a dict with lock levels as keys, and a list of needed lock names
198 - use an empty dict if you don't need any lock
199 - if you don't need any lock at a particular level omit that
200 level (note that in this case C{DeclareLocks} won't be called
201 at all for that level)
202 - if you need locks at a level, but you can't calculate it in
203 this function, initialise that level with an empty list and do
204 further processing in L{LogicalUnit.DeclareLocks} (see that
205 function's docstring)
206 - don't put anything for the BGL level
207 - if you want all locks at a level use L{locking.ALL_SET} as a value
209 If you need to share locks (rather than acquire them exclusively) at one
210 level you can modify self.share_locks, setting a true value (usually 1) for
211 that level. By default locks are not shared.
213 This function can also define a list of tasklets, which then will be
214 executed in order instead of the usual LU-level CheckPrereq and Exec
215 functions, if those are not defined by the LU.
219 # Acquire all nodes and one instance
220 self.needed_locks = {
221 locking.LEVEL_NODE: locking.ALL_SET,
222 locking.LEVEL_INSTANCE: ['instance1.example.com'],
224 # Acquire just two nodes
225 self.needed_locks = {
226 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
229 self.needed_locks = {} # No, you can't leave it to the default value None
232 # The implementation of this method is mandatory only if the new LU is
233 # concurrent, so that old LUs don't need to be changed all at the same
236 self.needed_locks = {} # Exclusive LUs don't need locks.
238 raise NotImplementedError
240 def DeclareLocks(self, level):
241 """Declare LU locking needs for a level
243 While most LUs can just declare their locking needs at ExpandNames time,
244 sometimes there's the need to calculate some locks after having acquired
245 the ones before. This function is called just before acquiring locks at a
246 particular level, but after acquiring the ones at lower levels, and permits
247 such calculations. It can be used to modify self.needed_locks, and by
248 default it does nothing.
250 This function is only called if you have something already set in
251 self.needed_locks for the level.
253 @param level: Locking level which is going to be locked
254 @type level: member of L{ganeti.locking.LEVELS}
258 def CheckPrereq(self):
259 """Check prerequisites for this LU.
261 This method should check that the prerequisites for the execution
262 of this LU are fulfilled. It can do internode communication, but
263 it should be idempotent - no cluster or system changes are
266 The method should raise errors.OpPrereqError in case something is
267 not fulfilled. Its return value is ignored.
269 This method should also update all the parameters of the opcode to
270 their canonical form if it hasn't been done by ExpandNames before.
273 if self.tasklets is not None:
274 for (idx, tl) in enumerate(self.tasklets):
275 logging.debug("Checking prerequisites for tasklet %s/%s",
276 idx + 1, len(self.tasklets))
281 def Exec(self, feedback_fn):
284 This method should implement the actual work. It should raise
285 errors.OpExecError for failures that are somewhat dealt with in
289 if self.tasklets is not None:
290 for (idx, tl) in enumerate(self.tasklets):
291 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
294 raise NotImplementedError
296 def BuildHooksEnv(self):
297 """Build hooks environment for this LU.
300 @return: Dictionary containing the environment that will be used for
301 running the hooks for this LU. The keys of the dict must not be prefixed
302 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
303 will extend the environment with additional variables. If no environment
304 should be defined, an empty dictionary should be returned (not C{None}).
305 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
309 raise NotImplementedError
311 def BuildHooksNodes(self):
312 """Build list of nodes to run LU's hooks.
314 @rtype: tuple; (list, list)
315 @return: Tuple containing a list of node names on which the hook
316 should run before the execution and a list of node names on which the
317 hook should run after the execution. No nodes should be returned as an
318 empty list (and not None).
319 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
323 raise NotImplementedError
325 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
326 """Notify the LU about the results of its hooks.
328 This method is called every time a hooks phase is executed, and notifies
329 the Logical Unit about the hooks' result. The LU can then use it to alter
330 its result based on the hooks. By default the method does nothing and the
331 previous result is passed back unchanged but any LU can define it if it
332 wants to use the local cluster hook-scripts somehow.
334 @param phase: one of L{constants.HOOKS_PHASE_POST} or
335 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
336 @param hook_results: the results of the multi-node hooks rpc call
337 @param feedback_fn: function used send feedback back to the caller
338 @param lu_result: the previous Exec result this LU had, or None
340 @return: the new Exec result, based on the previous result
344 # API must be kept, thus we ignore the unused argument and could
345 # be a function warnings
346 # pylint: disable=W0613,R0201
349 def _ExpandAndLockInstance(self):
350 """Helper function to expand and lock an instance.
352 Many LUs that work on an instance take its name in self.op.instance_name
353 and need to expand it and then declare the expanded name for locking. This
354 function does it, and then updates self.op.instance_name to the expanded
355 name. It also initializes needed_locks as a dict, if this hasn't been done
359 if self.needed_locks is None:
360 self.needed_locks = {}
362 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
363 "_ExpandAndLockInstance called with instance-level locks set"
364 self.op.instance_name = _ExpandInstanceName(self.cfg,
365 self.op.instance_name)
366 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
368 def _LockInstancesNodes(self, primary_only=False,
369 level=locking.LEVEL_NODE):
370 """Helper function to declare instances' nodes for locking.
372 This function should be called after locking one or more instances to lock
373 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
374 with all primary or secondary nodes for instances already locked and
375 present in self.needed_locks[locking.LEVEL_INSTANCE].
377 It should be called from DeclareLocks, and for safety only works if
378 self.recalculate_locks[locking.LEVEL_NODE] is set.
380 In the future it may grow parameters to just lock some instance's nodes, or
381 to just lock primaries or secondary nodes, if needed.
383 If should be called in DeclareLocks in a way similar to::
385 if level == locking.LEVEL_NODE:
386 self._LockInstancesNodes()
388 @type primary_only: boolean
389 @param primary_only: only lock primary nodes of locked instances
390 @param level: Which lock level to use for locking nodes
393 assert level in self.recalculate_locks, \
394 "_LockInstancesNodes helper function called with no nodes to recalculate"
396 # TODO: check if we're really been called with the instance locks held
398 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
399 # future we might want to have different behaviors depending on the value
400 # of self.recalculate_locks[locking.LEVEL_NODE]
402 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
403 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
404 wanted_nodes.append(instance.primary_node)
406 wanted_nodes.extend(instance.secondary_nodes)
408 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
409 self.needed_locks[level] = wanted_nodes
410 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
411 self.needed_locks[level].extend(wanted_nodes)
413 raise errors.ProgrammerError("Unknown recalculation mode")
415 del self.recalculate_locks[level]
418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
419 """Simple LU which runs no hooks.
421 This LU is intended as a parent for other LogicalUnits which will
422 run no hooks, in order to reduce duplicate code.
428 def BuildHooksEnv(self):
429 """Empty BuildHooksEnv for NoHooksLu.
431 This just raises an error.
434 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
436 def BuildHooksNodes(self):
437 """Empty BuildHooksNodes for NoHooksLU.
440 raise AssertionError("BuildHooksNodes called for NoHooksLU")
444 """Tasklet base class.
446 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
447 they can mix legacy code with tasklets. Locking needs to be done in the LU,
448 tasklets know nothing about locks.
450 Subclasses must follow these rules:
451 - Implement CheckPrereq
455 def __init__(self, lu):
462 def CheckPrereq(self):
463 """Check prerequisites for this tasklets.
465 This method should check whether the prerequisites for the execution of
466 this tasklet are fulfilled. It can do internode communication, but it
467 should be idempotent - no cluster or system changes are allowed.
469 The method should raise errors.OpPrereqError in case something is not
470 fulfilled. Its return value is ignored.
472 This method should also update all parameters to their canonical form if it
473 hasn't been done before.
478 def Exec(self, feedback_fn):
479 """Execute the tasklet.
481 This method should implement the actual work. It should raise
482 errors.OpExecError for failures that are somewhat dealt with in code, or
486 raise NotImplementedError
490 """Base for query utility classes.
493 #: Attribute holding field definitions
496 def __init__(self, qfilter, fields, use_locking):
497 """Initializes this class.
500 self.use_locking = use_locking
502 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
504 self.requested_data = self.query.RequestedData()
505 self.names = self.query.RequestedNames()
507 # Sort only if no names were requested
508 self.sort_by_name = not self.names
510 self.do_locking = None
513 def _GetNames(self, lu, all_names, lock_level):
514 """Helper function to determine names asked for in the query.
518 names = lu.owned_locks(lock_level)
522 if self.wanted == locking.ALL_SET:
523 assert not self.names
524 # caller didn't specify names, so ordering is not important
525 return utils.NiceSort(names)
527 # caller specified names and we must keep the same order
529 assert not self.do_locking or lu.glm.is_owned(lock_level)
531 missing = set(self.wanted).difference(names)
533 raise errors.OpExecError("Some items were removed before retrieving"
534 " their data: %s" % missing)
536 # Return expanded names
539 def ExpandNames(self, lu):
540 """Expand names for this query.
542 See L{LogicalUnit.ExpandNames}.
545 raise NotImplementedError()
547 def DeclareLocks(self, lu, level):
548 """Declare locks for this query.
550 See L{LogicalUnit.DeclareLocks}.
553 raise NotImplementedError()
555 def _GetQueryData(self, lu):
556 """Collects all data for this query.
558 @return: Query data object
561 raise NotImplementedError()
563 def NewStyleQuery(self, lu):
564 """Collect data and execute query.
567 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
568 sort_by_name=self.sort_by_name)
570 def OldStyleQuery(self, lu):
571 """Collect data and execute query.
574 return self.query.OldStyleQuery(self._GetQueryData(lu),
575 sort_by_name=self.sort_by_name)
579 """Returns a dict declaring all lock levels shared.
582 return dict.fromkeys(locking.LEVELS, 1)
585 def _MakeLegacyNodeInfo(data):
586 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
588 Converts the data into a single dictionary. This is fine for most use cases,
589 but some require information from more than one volume group or hypervisor.
592 (bootid, (vg_info, ), (hv_info, )) = data
594 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
599 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
600 """Checks if the owned node groups are still correct for an instance.
602 @type cfg: L{config.ConfigWriter}
603 @param cfg: The cluster configuration
604 @type instance_name: string
605 @param instance_name: Instance name
606 @type owned_groups: set or frozenset
607 @param owned_groups: List of currently owned node groups
610 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
612 if not owned_groups.issuperset(inst_groups):
613 raise errors.OpPrereqError("Instance %s's node groups changed since"
614 " locks were acquired, current groups are"
615 " are '%s', owning groups '%s'; retry the"
618 utils.CommaJoin(inst_groups),
619 utils.CommaJoin(owned_groups)),
625 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
626 """Checks if the instances in a node group are still correct.
628 @type cfg: L{config.ConfigWriter}
629 @param cfg: The cluster configuration
630 @type group_uuid: string
631 @param group_uuid: Node group UUID
632 @type owned_instances: set or frozenset
633 @param owned_instances: List of currently owned instances
636 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
637 if owned_instances != wanted_instances:
638 raise errors.OpPrereqError("Instances in node group '%s' changed since"
639 " locks were acquired, wanted '%s', have '%s';"
640 " retry the operation" %
642 utils.CommaJoin(wanted_instances),
643 utils.CommaJoin(owned_instances)),
646 return wanted_instances
649 def _SupportsOob(cfg, node):
650 """Tells if node supports OOB.
652 @type cfg: L{config.ConfigWriter}
653 @param cfg: The cluster configuration
654 @type node: L{objects.Node}
655 @param node: The node
656 @return: The OOB script if supported or an empty string otherwise
659 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
662 def _GetWantedNodes(lu, nodes):
663 """Returns list of checked and expanded node names.
665 @type lu: L{LogicalUnit}
666 @param lu: the logical unit on whose behalf we execute
668 @param nodes: list of node names or None for all nodes
670 @return: the list of nodes, sorted
671 @raise errors.ProgrammerError: if the nodes parameter is wrong type
675 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
677 return utils.NiceSort(lu.cfg.GetNodeList())
680 def _GetWantedInstances(lu, instances):
681 """Returns list of checked and expanded instance names.
683 @type lu: L{LogicalUnit}
684 @param lu: the logical unit on whose behalf we execute
685 @type instances: list
686 @param instances: list of instance names or None for all instances
688 @return: the list of instances, sorted
689 @raise errors.OpPrereqError: if the instances parameter is wrong type
690 @raise errors.OpPrereqError: if any of the passed instances is not found
694 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
696 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
700 def _GetUpdatedParams(old_params, update_dict,
701 use_default=True, use_none=False):
702 """Return the new version of a parameter dictionary.
704 @type old_params: dict
705 @param old_params: old parameters
706 @type update_dict: dict
707 @param update_dict: dict containing new parameter values, or
708 constants.VALUE_DEFAULT to reset the parameter to its default
710 @param use_default: boolean
711 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
712 values as 'to be deleted' values
713 @param use_none: boolean
714 @type use_none: whether to recognise C{None} values as 'to be
717 @return: the new parameter dictionary
720 params_copy = copy.deepcopy(old_params)
721 for key, val in update_dict.iteritems():
722 if ((use_default and val == constants.VALUE_DEFAULT) or
723 (use_none and val is None)):
729 params_copy[key] = val
733 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
734 """Return the new version of a instance policy.
736 @param group_policy: whether this policy applies to a group and thus
737 we should support removal of policy entries
740 use_none = use_default = group_policy
741 ipolicy = copy.deepcopy(old_ipolicy)
742 for key, value in new_ipolicy.items():
743 if key not in constants.IPOLICY_ALL_KEYS:
744 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
746 if key in constants.IPOLICY_ISPECS:
747 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
748 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
750 use_default=use_default)
752 if not value or value == [constants.VALUE_DEFAULT]:
756 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
757 " on the cluster'" % key,
760 if key in constants.IPOLICY_PARAMETERS:
761 # FIXME: we assume all such values are float
763 ipolicy[key] = float(value)
764 except (TypeError, ValueError), err:
765 raise errors.OpPrereqError("Invalid value for attribute"
766 " '%s': '%s', error: %s" %
767 (key, value, err), errors.ECODE_INVAL)
769 # FIXME: we assume all others are lists; this should be redone
771 ipolicy[key] = list(value)
773 objects.InstancePolicy.CheckParameterSyntax(ipolicy)
774 except errors.ConfigurationError, err:
775 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
780 def _UpdateAndVerifySubDict(base, updates, type_check):
781 """Updates and verifies a dict with sub dicts of the same type.
783 @param base: The dict with the old data
784 @param updates: The dict with the new data
785 @param type_check: Dict suitable to ForceDictType to verify correct types
786 @returns: A new dict with updated and verified values
790 new = _GetUpdatedParams(old, value)
791 utils.ForceDictType(new, type_check)
794 ret = copy.deepcopy(base)
795 ret.update(dict((key, fn(base.get(key, {}), value))
796 for key, value in updates.items()))
800 def _MergeAndVerifyHvState(op_input, obj_input):
801 """Combines the hv state from an opcode with the one of the object
803 @param op_input: The input dict from the opcode
804 @param obj_input: The input dict from the objects
805 @return: The verified and updated dict
809 invalid_hvs = set(op_input) - constants.HYPER_TYPES
811 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
812 " %s" % utils.CommaJoin(invalid_hvs),
814 if obj_input is None:
816 type_check = constants.HVSTS_PARAMETER_TYPES
817 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
822 def _MergeAndVerifyDiskState(op_input, obj_input):
823 """Combines the disk state from an opcode with the one of the object
825 @param op_input: The input dict from the opcode
826 @param obj_input: The input dict from the objects
827 @return: The verified and updated dict
830 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
832 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
833 utils.CommaJoin(invalid_dst),
835 type_check = constants.DSS_PARAMETER_TYPES
836 if obj_input is None:
838 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
840 for key, value in op_input.items())
845 def _ReleaseLocks(lu, level, names=None, keep=None):
846 """Releases locks owned by an LU.
848 @type lu: L{LogicalUnit}
849 @param level: Lock level
850 @type names: list or None
851 @param names: Names of locks to release
852 @type keep: list or None
853 @param keep: Names of locks to retain
856 assert not (keep is not None and names is not None), \
857 "Only one of the 'names' and the 'keep' parameters can be given"
859 if names is not None:
860 should_release = names.__contains__
862 should_release = lambda name: name not in keep
864 should_release = None
866 owned = lu.owned_locks(level)
868 # Not owning any lock at this level, do nothing
875 # Determine which locks to release
877 if should_release(name):
882 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
884 # Release just some locks
885 lu.glm.release(level, names=release)
887 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
890 lu.glm.release(level)
892 assert not lu.glm.is_owned(level), "No locks should be owned"
895 def _MapInstanceDisksToNodes(instances):
896 """Creates a map from (node, volume) to instance name.
898 @type instances: list of L{objects.Instance}
899 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
902 return dict(((node, vol), inst.name)
903 for inst in instances
904 for (node, vols) in inst.MapLVsByNode().items()
908 def _RunPostHook(lu, node_name):
909 """Runs the post-hook for an opcode on a single node.
912 hm = lu.proc.BuildHooksManager(lu)
914 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
916 # pylint: disable=W0702
917 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
920 def _CheckOutputFields(static, dynamic, selected):
921 """Checks whether all selected fields are valid.
923 @type static: L{utils.FieldSet}
924 @param static: static fields set
925 @type dynamic: L{utils.FieldSet}
926 @param dynamic: dynamic fields set
933 delta = f.NonMatching(selected)
935 raise errors.OpPrereqError("Unknown output fields selected: %s"
936 % ",".join(delta), errors.ECODE_INVAL)
939 def _CheckGlobalHvParams(params):
940 """Validates that given hypervisor params are not global ones.
942 This will ensure that instances don't get customised versions of
946 used_globals = constants.HVC_GLOBALS.intersection(params)
948 msg = ("The following hypervisor parameters are global and cannot"
949 " be customized at instance level, please modify them at"
950 " cluster level: %s" % utils.CommaJoin(used_globals))
951 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
954 def _CheckNodeOnline(lu, node, msg=None):
955 """Ensure that a given node is online.
957 @param lu: the LU on behalf of which we make the check
958 @param node: the node to check
959 @param msg: if passed, should be a message to replace the default one
960 @raise errors.OpPrereqError: if the node is offline
964 msg = "Can't use offline node"
965 if lu.cfg.GetNodeInfo(node).offline:
966 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
969 def _CheckNodeNotDrained(lu, node):
970 """Ensure that a given node is not drained.
972 @param lu: the LU on behalf of which we make the check
973 @param node: the node to check
974 @raise errors.OpPrereqError: if the node is drained
977 if lu.cfg.GetNodeInfo(node).drained:
978 raise errors.OpPrereqError("Can't use drained node %s" % node,
982 def _CheckNodeVmCapable(lu, node):
983 """Ensure that a given node is vm capable.
985 @param lu: the LU on behalf of which we make the check
986 @param node: the node to check
987 @raise errors.OpPrereqError: if the node is not vm capable
990 if not lu.cfg.GetNodeInfo(node).vm_capable:
991 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
995 def _CheckNodeHasOS(lu, node, os_name, force_variant):
996 """Ensure that a node supports a given OS.
998 @param lu: the LU on behalf of which we make the check
999 @param node: the node to check
1000 @param os_name: the OS to query about
1001 @param force_variant: whether to ignore variant errors
1002 @raise errors.OpPrereqError: if the node is not supporting the OS
1005 result = lu.rpc.call_os_get(node, os_name)
1006 result.Raise("OS '%s' not in supported OS list for node %s" %
1008 prereq=True, ecode=errors.ECODE_INVAL)
1009 if not force_variant:
1010 _CheckOSVariant(result.payload, os_name)
1013 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1014 """Ensure that a node has the given secondary ip.
1016 @type lu: L{LogicalUnit}
1017 @param lu: the LU on behalf of which we make the check
1019 @param node: the node to check
1020 @type secondary_ip: string
1021 @param secondary_ip: the ip to check
1022 @type prereq: boolean
1023 @param prereq: whether to throw a prerequisite or an execute error
1024 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1025 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1028 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1029 result.Raise("Failure checking secondary ip on node %s" % node,
1030 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1031 if not result.payload:
1032 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1033 " please fix and re-run this command" % secondary_ip)
1035 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1037 raise errors.OpExecError(msg)
1040 def _GetClusterDomainSecret():
1041 """Reads the cluster domain secret.
1044 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1048 def _CheckInstanceState(lu, instance, req_states, msg=None):
1049 """Ensure that an instance is in one of the required states.
1051 @param lu: the LU on behalf of which we make the check
1052 @param instance: the instance to check
1053 @param msg: if passed, should be a message to replace the default one
1054 @raise errors.OpPrereqError: if the instance is not in the required state
1058 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1059 if instance.admin_state not in req_states:
1060 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1061 (instance.name, instance.admin_state, msg),
1064 if constants.ADMINST_UP not in req_states:
1065 pnode = instance.primary_node
1066 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1067 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1068 prereq=True, ecode=errors.ECODE_ENVIRON)
1070 if instance.name in ins_l.payload:
1071 raise errors.OpPrereqError("Instance %s is running, %s" %
1072 (instance.name, msg), errors.ECODE_STATE)
1075 def _ComputeMinMaxSpec(name, ipolicy, value):
1076 """Computes if value is in the desired range.
1078 @param name: name of the parameter for which we perform the check
1079 @param ipolicy: dictionary containing min, max and std values
1080 @param value: actual value that we want to use
1081 @return: None or element not meeting the criteria
1085 if value in [None, constants.VALUE_AUTO]:
1087 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1088 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1089 if value > max_v or min_v > value:
1090 return ("%s value %s is not in range [%s, %s]" %
1091 (name, value, min_v, max_v))
1095 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1096 nic_count, disk_sizes, spindle_use,
1097 _compute_fn=_ComputeMinMaxSpec):
1098 """Verifies ipolicy against provided specs.
1101 @param ipolicy: The ipolicy
1103 @param mem_size: The memory size
1104 @type cpu_count: int
1105 @param cpu_count: Used cpu cores
1106 @type disk_count: int
1107 @param disk_count: Number of disks used
1108 @type nic_count: int
1109 @param nic_count: Number of nics used
1110 @type disk_sizes: list of ints
1111 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1112 @type spindle_use: int
1113 @param spindle_use: The number of spindles this instance uses
1114 @param _compute_fn: The compute function (unittest only)
1115 @return: A list of violations, or an empty list of no violations are found
1118 assert disk_count == len(disk_sizes)
1121 (constants.ISPEC_MEM_SIZE, mem_size),
1122 (constants.ISPEC_CPU_COUNT, cpu_count),
1123 (constants.ISPEC_DISK_COUNT, disk_count),
1124 (constants.ISPEC_NIC_COUNT, nic_count),
1125 (constants.ISPEC_SPINDLE_USE, spindle_use),
1126 ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
1129 (_compute_fn(name, ipolicy, value)
1130 for (name, value) in test_settings))
1133 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1134 _compute_fn=_ComputeIPolicySpecViolation):
1135 """Compute if instance meets the specs of ipolicy.
1138 @param ipolicy: The ipolicy to verify against
1139 @type instance: L{objects.Instance}
1140 @param instance: The instance to verify
1141 @param _compute_fn: The function to verify ipolicy (unittest only)
1142 @see: L{_ComputeIPolicySpecViolation}
1145 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1146 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1147 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USAGE, None)
1148 disk_count = len(instance.disks)
1149 disk_sizes = [disk.size for disk in instance.disks]
1150 nic_count = len(instance.nics)
1152 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1153 disk_sizes, spindle_use)
1156 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1157 _compute_fn=_ComputeIPolicySpecViolation):
1158 """Compute if instance specs meets the specs of ipolicy.
1161 @param ipolicy: The ipolicy to verify against
1162 @param instance_spec: dict
1163 @param instance_spec: The instance spec to verify
1164 @param _compute_fn: The function to verify ipolicy (unittest only)
1165 @see: L{_ComputeIPolicySpecViolation}
1168 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1169 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1170 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1171 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1172 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1173 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1175 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1176 disk_sizes, spindle_use)
1179 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1181 _compute_fn=_ComputeIPolicyInstanceViolation):
1182 """Compute if instance meets the specs of the new target group.
1184 @param ipolicy: The ipolicy to verify
1185 @param instance: The instance object to verify
1186 @param current_group: The current group of the instance
1187 @param target_group: The new group of the instance
1188 @param _compute_fn: The function to verify ipolicy (unittest only)
1189 @see: L{_ComputeIPolicySpecViolation}
1192 if current_group == target_group:
1195 return _compute_fn(ipolicy, instance)
1198 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1199 _compute_fn=_ComputeIPolicyNodeViolation):
1200 """Checks that the target node is correct in terms of instance policy.
1202 @param ipolicy: The ipolicy to verify
1203 @param instance: The instance object to verify
1204 @param node: The new node to relocate
1205 @param ignore: Ignore violations of the ipolicy
1206 @param _compute_fn: The function to verify ipolicy (unittest only)
1207 @see: L{_ComputeIPolicySpecViolation}
1210 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1211 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1214 msg = ("Instance does not meet target node group's (%s) instance"
1215 " policy: %s") % (node.group, utils.CommaJoin(res))
1219 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1222 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1223 """Computes a set of any instances that would violate the new ipolicy.
1225 @param old_ipolicy: The current (still in-place) ipolicy
1226 @param new_ipolicy: The new (to become) ipolicy
1227 @param instances: List of instances to verify
1228 @return: A list of instances which violates the new ipolicy but did not before
1231 return (_ComputeViolatingInstances(old_ipolicy, instances) -
1232 _ComputeViolatingInstances(new_ipolicy, instances))
1235 def _ExpandItemName(fn, name, kind):
1236 """Expand an item name.
1238 @param fn: the function to use for expansion
1239 @param name: requested item name
1240 @param kind: text description ('Node' or 'Instance')
1241 @return: the resolved (full) name
1242 @raise errors.OpPrereqError: if the item is not found
1245 full_name = fn(name)
1246 if full_name is None:
1247 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1252 def _ExpandNodeName(cfg, name):
1253 """Wrapper over L{_ExpandItemName} for nodes."""
1254 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1257 def _ExpandInstanceName(cfg, name):
1258 """Wrapper over L{_ExpandItemName} for instance."""
1259 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1262 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1263 minmem, maxmem, vcpus, nics, disk_template, disks,
1264 bep, hvp, hypervisor_name, tags):
1265 """Builds instance related env variables for hooks
1267 This builds the hook environment from individual variables.
1270 @param name: the name of the instance
1271 @type primary_node: string
1272 @param primary_node: the name of the instance's primary node
1273 @type secondary_nodes: list
1274 @param secondary_nodes: list of secondary nodes as strings
1275 @type os_type: string
1276 @param os_type: the name of the instance's OS
1277 @type status: string
1278 @param status: the desired status of the instance
1279 @type minmem: string
1280 @param minmem: the minimum memory size of the instance
1281 @type maxmem: string
1282 @param maxmem: the maximum memory size of the instance
1284 @param vcpus: the count of VCPUs the instance has
1286 @param nics: list of tuples (ip, mac, mode, link) representing
1287 the NICs the instance has
1288 @type disk_template: string
1289 @param disk_template: the disk template of the instance
1291 @param disks: the list of (size, mode) pairs
1293 @param bep: the backend parameters for the instance
1295 @param hvp: the hypervisor parameters for the instance
1296 @type hypervisor_name: string
1297 @param hypervisor_name: the hypervisor for the instance
1299 @param tags: list of instance tags as strings
1301 @return: the hook environment for this instance
1306 "INSTANCE_NAME": name,
1307 "INSTANCE_PRIMARY": primary_node,
1308 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1309 "INSTANCE_OS_TYPE": os_type,
1310 "INSTANCE_STATUS": status,
1311 "INSTANCE_MINMEM": minmem,
1312 "INSTANCE_MAXMEM": maxmem,
1313 # TODO(2.7) remove deprecated "memory" value
1314 "INSTANCE_MEMORY": maxmem,
1315 "INSTANCE_VCPUS": vcpus,
1316 "INSTANCE_DISK_TEMPLATE": disk_template,
1317 "INSTANCE_HYPERVISOR": hypervisor_name,
1320 nic_count = len(nics)
1321 for idx, (ip, mac, mode, link) in enumerate(nics):
1324 env["INSTANCE_NIC%d_IP" % idx] = ip
1325 env["INSTANCE_NIC%d_MAC" % idx] = mac
1326 env["INSTANCE_NIC%d_MODE" % idx] = mode
1327 env["INSTANCE_NIC%d_LINK" % idx] = link
1328 if mode == constants.NIC_MODE_BRIDGED:
1329 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1333 env["INSTANCE_NIC_COUNT"] = nic_count
1336 disk_count = len(disks)
1337 for idx, (size, mode) in enumerate(disks):
1338 env["INSTANCE_DISK%d_SIZE" % idx] = size
1339 env["INSTANCE_DISK%d_MODE" % idx] = mode
1343 env["INSTANCE_DISK_COUNT"] = disk_count
1348 env["INSTANCE_TAGS"] = " ".join(tags)
1350 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1351 for key, value in source.items():
1352 env["INSTANCE_%s_%s" % (kind, key)] = value
1357 def _NICListToTuple(lu, nics):
1358 """Build a list of nic information tuples.
1360 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1361 value in LUInstanceQueryData.
1363 @type lu: L{LogicalUnit}
1364 @param lu: the logical unit on whose behalf we execute
1365 @type nics: list of L{objects.NIC}
1366 @param nics: list of nics to convert to hooks tuples
1370 cluster = lu.cfg.GetClusterInfo()
1374 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1375 mode = filled_params[constants.NIC_MODE]
1376 link = filled_params[constants.NIC_LINK]
1377 hooks_nics.append((ip, mac, mode, link))
1381 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1382 """Builds instance related env variables for hooks from an object.
1384 @type lu: L{LogicalUnit}
1385 @param lu: the logical unit on whose behalf we execute
1386 @type instance: L{objects.Instance}
1387 @param instance: the instance for which we should build the
1389 @type override: dict
1390 @param override: dictionary with key/values that will override
1393 @return: the hook environment dictionary
1396 cluster = lu.cfg.GetClusterInfo()
1397 bep = cluster.FillBE(instance)
1398 hvp = cluster.FillHV(instance)
1400 "name": instance.name,
1401 "primary_node": instance.primary_node,
1402 "secondary_nodes": instance.secondary_nodes,
1403 "os_type": instance.os,
1404 "status": instance.admin_state,
1405 "maxmem": bep[constants.BE_MAXMEM],
1406 "minmem": bep[constants.BE_MINMEM],
1407 "vcpus": bep[constants.BE_VCPUS],
1408 "nics": _NICListToTuple(lu, instance.nics),
1409 "disk_template": instance.disk_template,
1410 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1413 "hypervisor_name": instance.hypervisor,
1414 "tags": instance.tags,
1417 args.update(override)
1418 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1421 def _AdjustCandidatePool(lu, exceptions):
1422 """Adjust the candidate pool after node operations.
1425 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1427 lu.LogInfo("Promoted nodes to master candidate role: %s",
1428 utils.CommaJoin(node.name for node in mod_list))
1429 for name in mod_list:
1430 lu.context.ReaddNode(name)
1431 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1433 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1437 def _DecideSelfPromotion(lu, exceptions=None):
1438 """Decide whether I should promote myself as a master candidate.
1441 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1442 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1443 # the new node will increase mc_max with one, so:
1444 mc_should = min(mc_should + 1, cp_size)
1445 return mc_now < mc_should
1448 def _CalculateGroupIPolicy(cluster, group):
1449 """Calculate instance policy for group.
1452 return cluster.SimpleFillIPolicy(group.ipolicy)
1455 def _ComputeViolatingInstances(ipolicy, instances):
1456 """Computes a set of instances who violates given ipolicy.
1458 @param ipolicy: The ipolicy to verify
1459 @type instances: object.Instance
1460 @param instances: List of instances to verify
1461 @return: A frozenset of instance names violating the ipolicy
1464 return frozenset([inst.name for inst in instances
1465 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1468 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1469 """Check that the brigdes needed by a list of nics exist.
1472 cluster = lu.cfg.GetClusterInfo()
1473 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1474 brlist = [params[constants.NIC_LINK] for params in paramslist
1475 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1477 result = lu.rpc.call_bridges_exist(target_node, brlist)
1478 result.Raise("Error checking bridges on destination node '%s'" %
1479 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1482 def _CheckInstanceBridgesExist(lu, instance, node=None):
1483 """Check that the brigdes needed by an instance exist.
1487 node = instance.primary_node
1488 _CheckNicsBridgesExist(lu, instance.nics, node)
1491 def _CheckOSVariant(os_obj, name):
1492 """Check whether an OS name conforms to the os variants specification.
1494 @type os_obj: L{objects.OS}
1495 @param os_obj: OS object to check
1497 @param name: OS name passed by the user, to check for validity
1500 variant = objects.OS.GetVariant(name)
1501 if not os_obj.supported_variants:
1503 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1504 " passed)" % (os_obj.name, variant),
1508 raise errors.OpPrereqError("OS name must include a variant",
1511 if variant not in os_obj.supported_variants:
1512 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1515 def _GetNodeInstancesInner(cfg, fn):
1516 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1519 def _GetNodeInstances(cfg, node_name):
1520 """Returns a list of all primary and secondary instances on a node.
1524 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1527 def _GetNodePrimaryInstances(cfg, node_name):
1528 """Returns primary instances on a node.
1531 return _GetNodeInstancesInner(cfg,
1532 lambda inst: node_name == inst.primary_node)
1535 def _GetNodeSecondaryInstances(cfg, node_name):
1536 """Returns secondary instances on a node.
1539 return _GetNodeInstancesInner(cfg,
1540 lambda inst: node_name in inst.secondary_nodes)
1543 def _GetStorageTypeArgs(cfg, storage_type):
1544 """Returns the arguments for a storage type.
1547 # Special case for file storage
1548 if storage_type == constants.ST_FILE:
1549 # storage.FileStorage wants a list of storage directories
1550 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1555 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1558 for dev in instance.disks:
1559 cfg.SetDiskID(dev, node_name)
1561 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1562 result.Raise("Failed to get disk status from node %s" % node_name,
1563 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1565 for idx, bdev_status in enumerate(result.payload):
1566 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1572 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1573 """Check the sanity of iallocator and node arguments and use the
1574 cluster-wide iallocator if appropriate.
1576 Check that at most one of (iallocator, node) is specified. If none is
1577 specified, then the LU's opcode's iallocator slot is filled with the
1578 cluster-wide default iallocator.
1580 @type iallocator_slot: string
1581 @param iallocator_slot: the name of the opcode iallocator slot
1582 @type node_slot: string
1583 @param node_slot: the name of the opcode target node slot
1586 node = getattr(lu.op, node_slot, None)
1587 iallocator = getattr(lu.op, iallocator_slot, None)
1589 if node is not None and iallocator is not None:
1590 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1592 elif node is None and iallocator is None:
1593 default_iallocator = lu.cfg.GetDefaultIAllocator()
1594 if default_iallocator:
1595 setattr(lu.op, iallocator_slot, default_iallocator)
1597 raise errors.OpPrereqError("No iallocator or node given and no"
1598 " cluster-wide default iallocator found;"
1599 " please specify either an iallocator or a"
1600 " node, or set a cluster-wide default"
1604 def _GetDefaultIAllocator(cfg, iallocator):
1605 """Decides on which iallocator to use.
1607 @type cfg: L{config.ConfigWriter}
1608 @param cfg: Cluster configuration object
1609 @type iallocator: string or None
1610 @param iallocator: Iallocator specified in opcode
1612 @return: Iallocator name
1616 # Use default iallocator
1617 iallocator = cfg.GetDefaultIAllocator()
1620 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1621 " opcode nor as a cluster-wide default",
1627 class LUClusterPostInit(LogicalUnit):
1628 """Logical unit for running hooks after cluster initialization.
1631 HPATH = "cluster-init"
1632 HTYPE = constants.HTYPE_CLUSTER
1634 def BuildHooksEnv(self):
1639 "OP_TARGET": self.cfg.GetClusterName(),
1642 def BuildHooksNodes(self):
1643 """Build hooks nodes.
1646 return ([], [self.cfg.GetMasterNode()])
1648 def Exec(self, feedback_fn):
1655 class LUClusterDestroy(LogicalUnit):
1656 """Logical unit for destroying the cluster.
1659 HPATH = "cluster-destroy"
1660 HTYPE = constants.HTYPE_CLUSTER
1662 def BuildHooksEnv(self):
1667 "OP_TARGET": self.cfg.GetClusterName(),
1670 def BuildHooksNodes(self):
1671 """Build hooks nodes.
1676 def CheckPrereq(self):
1677 """Check prerequisites.
1679 This checks whether the cluster is empty.
1681 Any errors are signaled by raising errors.OpPrereqError.
1684 master = self.cfg.GetMasterNode()
1686 nodelist = self.cfg.GetNodeList()
1687 if len(nodelist) != 1 or nodelist[0] != master:
1688 raise errors.OpPrereqError("There are still %d node(s) in"
1689 " this cluster." % (len(nodelist) - 1),
1691 instancelist = self.cfg.GetInstanceList()
1693 raise errors.OpPrereqError("There are still %d instance(s) in"
1694 " this cluster." % len(instancelist),
1697 def Exec(self, feedback_fn):
1698 """Destroys the cluster.
1701 master_params = self.cfg.GetMasterNetworkParameters()
1703 # Run post hooks on master node before it's removed
1704 _RunPostHook(self, master_params.name)
1706 ems = self.cfg.GetUseExternalMipScript()
1707 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1710 self.LogWarning("Error disabling the master IP address: %s",
1713 return master_params.name
1716 def _VerifyCertificate(filename):
1717 """Verifies a certificate for L{LUClusterVerifyConfig}.
1719 @type filename: string
1720 @param filename: Path to PEM file
1724 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1725 utils.ReadFile(filename))
1726 except Exception, err: # pylint: disable=W0703
1727 return (LUClusterVerifyConfig.ETYPE_ERROR,
1728 "Failed to load X509 certificate %s: %s" % (filename, err))
1731 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1732 constants.SSL_CERT_EXPIRATION_ERROR)
1735 fnamemsg = "While verifying %s: %s" % (filename, msg)
1740 return (None, fnamemsg)
1741 elif errcode == utils.CERT_WARNING:
1742 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1743 elif errcode == utils.CERT_ERROR:
1744 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1746 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1749 def _GetAllHypervisorParameters(cluster, instances):
1750 """Compute the set of all hypervisor parameters.
1752 @type cluster: L{objects.Cluster}
1753 @param cluster: the cluster object
1754 @param instances: list of L{objects.Instance}
1755 @param instances: additional instances from which to obtain parameters
1756 @rtype: list of (origin, hypervisor, parameters)
1757 @return: a list with all parameters found, indicating the hypervisor they
1758 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1763 for hv_name in cluster.enabled_hypervisors:
1764 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1766 for os_name, os_hvp in cluster.os_hvp.items():
1767 for hv_name, hv_params in os_hvp.items():
1769 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1770 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1772 # TODO: collapse identical parameter values in a single one
1773 for instance in instances:
1774 if instance.hvparams:
1775 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1776 cluster.FillHV(instance)))
1781 class _VerifyErrors(object):
1782 """Mix-in for cluster/group verify LUs.
1784 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1785 self.op and self._feedback_fn to be available.)
1789 ETYPE_FIELD = "code"
1790 ETYPE_ERROR = "ERROR"
1791 ETYPE_WARNING = "WARNING"
1793 def _Error(self, ecode, item, msg, *args, **kwargs):
1794 """Format an error message.
1796 Based on the opcode's error_codes parameter, either format a
1797 parseable error code, or a simpler error string.
1799 This must be called only from Exec and functions called from Exec.
1802 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1803 itype, etxt, _ = ecode
1804 # first complete the msg
1807 # then format the whole message
1808 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1809 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1815 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1816 # and finally report it via the feedback_fn
1817 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1819 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1820 """Log an error message if the passed condition is True.
1824 or self.op.debug_simulate_errors) # pylint: disable=E1101
1826 # If the error code is in the list of ignored errors, demote the error to a
1828 (_, etxt, _) = ecode
1829 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1830 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1833 self._Error(ecode, *args, **kwargs)
1835 # do not mark the operation as failed for WARN cases only
1836 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1837 self.bad = self.bad or cond
1840 class LUClusterVerify(NoHooksLU):
1841 """Submits all jobs necessary to verify the cluster.
1846 def ExpandNames(self):
1847 self.needed_locks = {}
1849 def Exec(self, feedback_fn):
1852 if self.op.group_name:
1853 groups = [self.op.group_name]
1854 depends_fn = lambda: None
1856 groups = self.cfg.GetNodeGroupList()
1858 # Verify global configuration
1860 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1863 # Always depend on global verification
1864 depends_fn = lambda: [(-len(jobs), [])]
1866 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1867 ignore_errors=self.op.ignore_errors,
1868 depends=depends_fn())]
1869 for group in groups)
1871 # Fix up all parameters
1872 for op in itertools.chain(*jobs): # pylint: disable=W0142
1873 op.debug_simulate_errors = self.op.debug_simulate_errors
1874 op.verbose = self.op.verbose
1875 op.error_codes = self.op.error_codes
1877 op.skip_checks = self.op.skip_checks
1878 except AttributeError:
1879 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1881 return ResultWithJobs(jobs)
1884 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1885 """Verifies the cluster config.
1890 def _VerifyHVP(self, hvp_data):
1891 """Verifies locally the syntax of the hypervisor parameters.
1894 for item, hv_name, hv_params in hvp_data:
1895 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1898 hv_class = hypervisor.GetHypervisor(hv_name)
1899 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1900 hv_class.CheckParameterSyntax(hv_params)
1901 except errors.GenericError, err:
1902 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1904 def ExpandNames(self):
1905 # Information can be safely retrieved as the BGL is acquired in exclusive
1907 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1908 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1909 self.all_node_info = self.cfg.GetAllNodesInfo()
1910 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1911 self.needed_locks = {}
1913 def Exec(self, feedback_fn):
1914 """Verify integrity of cluster, performing various test on nodes.
1918 self._feedback_fn = feedback_fn
1920 feedback_fn("* Verifying cluster config")
1922 for msg in self.cfg.VerifyConfig():
1923 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1925 feedback_fn("* Verifying cluster certificate files")
1927 for cert_filename in constants.ALL_CERT_FILES:
1928 (errcode, msg) = _VerifyCertificate(cert_filename)
1929 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1931 feedback_fn("* Verifying hypervisor parameters")
1933 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1934 self.all_inst_info.values()))
1936 feedback_fn("* Verifying all nodes belong to an existing group")
1938 # We do this verification here because, should this bogus circumstance
1939 # occur, it would never be caught by VerifyGroup, which only acts on
1940 # nodes/instances reachable from existing node groups.
1942 dangling_nodes = set(node.name for node in self.all_node_info.values()
1943 if node.group not in self.all_group_info)
1945 dangling_instances = {}
1946 no_node_instances = []
1948 for inst in self.all_inst_info.values():
1949 if inst.primary_node in dangling_nodes:
1950 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1951 elif inst.primary_node not in self.all_node_info:
1952 no_node_instances.append(inst.name)
1957 utils.CommaJoin(dangling_instances.get(node.name,
1959 for node in dangling_nodes]
1961 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1963 "the following nodes (and their instances) belong to a non"
1964 " existing group: %s", utils.CommaJoin(pretty_dangling))
1966 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1968 "the following instances have a non-existing primary-node:"
1969 " %s", utils.CommaJoin(no_node_instances))
1974 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1975 """Verifies the status of a node group.
1978 HPATH = "cluster-verify"
1979 HTYPE = constants.HTYPE_CLUSTER
1982 _HOOKS_INDENT_RE = re.compile("^", re.M)
1984 class NodeImage(object):
1985 """A class representing the logical and physical status of a node.
1988 @ivar name: the node name to which this object refers
1989 @ivar volumes: a structure as returned from
1990 L{ganeti.backend.GetVolumeList} (runtime)
1991 @ivar instances: a list of running instances (runtime)
1992 @ivar pinst: list of configured primary instances (config)
1993 @ivar sinst: list of configured secondary instances (config)
1994 @ivar sbp: dictionary of {primary-node: list of instances} for all
1995 instances for which this node is secondary (config)
1996 @ivar mfree: free memory, as reported by hypervisor (runtime)
1997 @ivar dfree: free disk, as reported by the node (runtime)
1998 @ivar offline: the offline status (config)
1999 @type rpc_fail: boolean
2000 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2001 not whether the individual keys were correct) (runtime)
2002 @type lvm_fail: boolean
2003 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2004 @type hyp_fail: boolean
2005 @ivar hyp_fail: whether the RPC call didn't return the instance list
2006 @type ghost: boolean
2007 @ivar ghost: whether this is a known node or not (config)
2008 @type os_fail: boolean
2009 @ivar os_fail: whether the RPC call didn't return valid OS data
2011 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2012 @type vm_capable: boolean
2013 @ivar vm_capable: whether the node can host instances
2016 def __init__(self, offline=False, name=None, vm_capable=True):
2025 self.offline = offline
2026 self.vm_capable = vm_capable
2027 self.rpc_fail = False
2028 self.lvm_fail = False
2029 self.hyp_fail = False
2031 self.os_fail = False
2034 def ExpandNames(self):
2035 # This raises errors.OpPrereqError on its own:
2036 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2038 # Get instances in node group; this is unsafe and needs verification later
2040 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2042 self.needed_locks = {
2043 locking.LEVEL_INSTANCE: inst_names,
2044 locking.LEVEL_NODEGROUP: [self.group_uuid],
2045 locking.LEVEL_NODE: [],
2048 self.share_locks = _ShareAll()
2050 def DeclareLocks(self, level):
2051 if level == locking.LEVEL_NODE:
2052 # Get members of node group; this is unsafe and needs verification later
2053 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2055 all_inst_info = self.cfg.GetAllInstancesInfo()
2057 # In Exec(), we warn about mirrored instances that have primary and
2058 # secondary living in separate node groups. To fully verify that
2059 # volumes for these instances are healthy, we will need to do an
2060 # extra call to their secondaries. We ensure here those nodes will
2062 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2063 # Important: access only the instances whose lock is owned
2064 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2065 nodes.update(all_inst_info[inst].secondary_nodes)
2067 self.needed_locks[locking.LEVEL_NODE] = nodes
2069 def CheckPrereq(self):
2070 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2071 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2073 group_nodes = set(self.group_info.members)
2075 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2078 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2080 unlocked_instances = \
2081 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2084 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2085 utils.CommaJoin(unlocked_nodes),
2088 if unlocked_instances:
2089 raise errors.OpPrereqError("Missing lock for instances: %s" %
2090 utils.CommaJoin(unlocked_instances),
2093 self.all_node_info = self.cfg.GetAllNodesInfo()
2094 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2096 self.my_node_names = utils.NiceSort(group_nodes)
2097 self.my_inst_names = utils.NiceSort(group_instances)
2099 self.my_node_info = dict((name, self.all_node_info[name])
2100 for name in self.my_node_names)
2102 self.my_inst_info = dict((name, self.all_inst_info[name])
2103 for name in self.my_inst_names)
2105 # We detect here the nodes that will need the extra RPC calls for verifying
2106 # split LV volumes; they should be locked.
2107 extra_lv_nodes = set()
2109 for inst in self.my_inst_info.values():
2110 if inst.disk_template in constants.DTS_INT_MIRROR:
2111 for nname in inst.all_nodes:
2112 if self.all_node_info[nname].group != self.group_uuid:
2113 extra_lv_nodes.add(nname)
2115 unlocked_lv_nodes = \
2116 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2118 if unlocked_lv_nodes:
2119 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2120 utils.CommaJoin(unlocked_lv_nodes),
2122 self.extra_lv_nodes = list(extra_lv_nodes)
2124 def _VerifyNode(self, ninfo, nresult):
2125 """Perform some basic validation on data returned from a node.
2127 - check the result data structure is well formed and has all the
2129 - check ganeti version
2131 @type ninfo: L{objects.Node}
2132 @param ninfo: the node to check
2133 @param nresult: the results from the node
2135 @return: whether overall this call was successful (and we can expect
2136 reasonable values in the respose)
2140 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2142 # main result, nresult should be a non-empty dict
2143 test = not nresult or not isinstance(nresult, dict)
2144 _ErrorIf(test, constants.CV_ENODERPC, node,
2145 "unable to verify node: no data returned")
2149 # compares ganeti version
2150 local_version = constants.PROTOCOL_VERSION
2151 remote_version = nresult.get("version", None)
2152 test = not (remote_version and
2153 isinstance(remote_version, (list, tuple)) and
2154 len(remote_version) == 2)
2155 _ErrorIf(test, constants.CV_ENODERPC, node,
2156 "connection to node returned invalid data")
2160 test = local_version != remote_version[0]
2161 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2162 "incompatible protocol versions: master %s,"
2163 " node %s", local_version, remote_version[0])
2167 # node seems compatible, we can actually try to look into its results
2169 # full package version
2170 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2171 constants.CV_ENODEVERSION, node,
2172 "software version mismatch: master %s, node %s",
2173 constants.RELEASE_VERSION, remote_version[1],
2174 code=self.ETYPE_WARNING)
2176 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2177 if ninfo.vm_capable and isinstance(hyp_result, dict):
2178 for hv_name, hv_result in hyp_result.iteritems():
2179 test = hv_result is not None
2180 _ErrorIf(test, constants.CV_ENODEHV, node,
2181 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2183 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2184 if ninfo.vm_capable and isinstance(hvp_result, list):
2185 for item, hv_name, hv_result in hvp_result:
2186 _ErrorIf(True, constants.CV_ENODEHV, node,
2187 "hypervisor %s parameter verify failure (source %s): %s",
2188 hv_name, item, hv_result)
2190 test = nresult.get(constants.NV_NODESETUP,
2191 ["Missing NODESETUP results"])
2192 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2197 def _VerifyNodeTime(self, ninfo, nresult,
2198 nvinfo_starttime, nvinfo_endtime):
2199 """Check the node time.
2201 @type ninfo: L{objects.Node}
2202 @param ninfo: the node to check
2203 @param nresult: the remote results for the node
2204 @param nvinfo_starttime: the start time of the RPC call
2205 @param nvinfo_endtime: the end time of the RPC call
2209 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2211 ntime = nresult.get(constants.NV_TIME, None)
2213 ntime_merged = utils.MergeTime(ntime)
2214 except (ValueError, TypeError):
2215 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2218 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2219 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2220 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2221 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2225 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2226 "Node time diverges by at least %s from master node time",
2229 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2230 """Check the node LVM results.
2232 @type ninfo: L{objects.Node}
2233 @param ninfo: the node to check
2234 @param nresult: the remote results for the node
2235 @param vg_name: the configured VG name
2242 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2244 # checks vg existence and size > 20G
2245 vglist = nresult.get(constants.NV_VGLIST, None)
2247 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2249 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2250 constants.MIN_VG_SIZE)
2251 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2254 pvlist = nresult.get(constants.NV_PVLIST, None)
2255 test = pvlist is None
2256 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2258 # check that ':' is not present in PV names, since it's a
2259 # special character for lvcreate (denotes the range of PEs to
2261 for _, pvname, owner_vg in pvlist:
2262 test = ":" in pvname
2263 _ErrorIf(test, constants.CV_ENODELVM, node,
2264 "Invalid character ':' in PV '%s' of VG '%s'",
2267 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2268 """Check the node bridges.
2270 @type ninfo: L{objects.Node}
2271 @param ninfo: the node to check
2272 @param nresult: the remote results for the node
2273 @param bridges: the expected list of bridges
2280 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2282 missing = nresult.get(constants.NV_BRIDGES, None)
2283 test = not isinstance(missing, list)
2284 _ErrorIf(test, constants.CV_ENODENET, node,
2285 "did not return valid bridge information")
2287 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2288 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2290 def _VerifyNodeUserScripts(self, ninfo, nresult):
2291 """Check the results of user scripts presence and executability on the node
2293 @type ninfo: L{objects.Node}
2294 @param ninfo: the node to check
2295 @param nresult: the remote results for the node
2300 test = not constants.NV_USERSCRIPTS in nresult
2301 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2302 "did not return user scripts information")
2304 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2306 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2307 "user scripts not present or not executable: %s" %
2308 utils.CommaJoin(sorted(broken_scripts)))
2310 def _VerifyNodeNetwork(self, ninfo, nresult):
2311 """Check the node network connectivity results.
2313 @type ninfo: L{objects.Node}
2314 @param ninfo: the node to check
2315 @param nresult: the remote results for the node
2319 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2321 test = constants.NV_NODELIST not in nresult
2322 _ErrorIf(test, constants.CV_ENODESSH, node,
2323 "node hasn't returned node ssh connectivity data")
2325 if nresult[constants.NV_NODELIST]:
2326 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2327 _ErrorIf(True, constants.CV_ENODESSH, node,
2328 "ssh communication with node '%s': %s", a_node, a_msg)
2330 test = constants.NV_NODENETTEST not in nresult
2331 _ErrorIf(test, constants.CV_ENODENET, node,
2332 "node hasn't returned node tcp connectivity data")
2334 if nresult[constants.NV_NODENETTEST]:
2335 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2337 _ErrorIf(True, constants.CV_ENODENET, node,
2338 "tcp communication with node '%s': %s",
2339 anode, nresult[constants.NV_NODENETTEST][anode])
2341 test = constants.NV_MASTERIP not in nresult
2342 _ErrorIf(test, constants.CV_ENODENET, node,
2343 "node hasn't returned node master IP reachability data")
2345 if not nresult[constants.NV_MASTERIP]:
2346 if node == self.master_node:
2347 msg = "the master node cannot reach the master IP (not configured?)"
2349 msg = "cannot reach the master IP"
2350 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2352 def _VerifyInstance(self, instance, instanceconfig, node_image,
2354 """Verify an instance.
2356 This function checks to see if the required block devices are
2357 available on the instance's node.
2360 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2361 node_current = instanceconfig.primary_node
2363 node_vol_should = {}
2364 instanceconfig.MapLVsByNode(node_vol_should)
2366 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2367 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2368 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, err)
2370 for node in node_vol_should:
2371 n_img = node_image[node]
2372 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2373 # ignore missing volumes on offline or broken nodes
2375 for volume in node_vol_should[node]:
2376 test = volume not in n_img.volumes
2377 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2378 "volume %s missing on node %s", volume, node)
2380 if instanceconfig.admin_state == constants.ADMINST_UP:
2381 pri_img = node_image[node_current]
2382 test = instance not in pri_img.instances and not pri_img.offline
2383 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2384 "instance not running on its primary node %s",
2387 diskdata = [(nname, success, status, idx)
2388 for (nname, disks) in diskstatus.items()
2389 for idx, (success, status) in enumerate(disks)]
2391 for nname, success, bdev_status, idx in diskdata:
2392 # the 'ghost node' construction in Exec() ensures that we have a
2394 snode = node_image[nname]
2395 bad_snode = snode.ghost or snode.offline
2396 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2397 not success and not bad_snode,
2398 constants.CV_EINSTANCEFAULTYDISK, instance,
2399 "couldn't retrieve status for disk/%s on %s: %s",
2400 idx, nname, bdev_status)
2401 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2402 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2403 constants.CV_EINSTANCEFAULTYDISK, instance,
2404 "disk/%s on %s is faulty", idx, nname)
2406 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2407 """Verify if there are any unknown volumes in the cluster.
2409 The .os, .swap and backup volumes are ignored. All other volumes are
2410 reported as unknown.
2412 @type reserved: L{ganeti.utils.FieldSet}
2413 @param reserved: a FieldSet of reserved volume names
2416 for node, n_img in node_image.items():
2417 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2418 self.all_node_info[node].group != self.group_uuid):
2419 # skip non-healthy nodes
2421 for volume in n_img.volumes:
2422 test = ((node not in node_vol_should or
2423 volume not in node_vol_should[node]) and
2424 not reserved.Matches(volume))
2425 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2426 "volume %s is unknown", volume)
2428 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2429 """Verify N+1 Memory Resilience.
2431 Check that if one single node dies we can still start all the
2432 instances it was primary for.
2435 cluster_info = self.cfg.GetClusterInfo()
2436 for node, n_img in node_image.items():
2437 # This code checks that every node which is now listed as
2438 # secondary has enough memory to host all instances it is
2439 # supposed to should a single other node in the cluster fail.
2440 # FIXME: not ready for failover to an arbitrary node
2441 # FIXME: does not support file-backed instances
2442 # WARNING: we currently take into account down instances as well
2443 # as up ones, considering that even if they're down someone
2444 # might want to start them even in the event of a node failure.
2445 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2446 # we're skipping nodes marked offline and nodes in other groups from
2447 # the N+1 warning, since most likely we don't have good memory
2448 # infromation from them; we already list instances living on such
2449 # nodes, and that's enough warning
2451 #TODO(dynmem): also consider ballooning out other instances
2452 for prinode, instances in n_img.sbp.items():
2454 for instance in instances:
2455 bep = cluster_info.FillBE(instance_cfg[instance])
2456 if bep[constants.BE_AUTO_BALANCE]:
2457 needed_mem += bep[constants.BE_MINMEM]
2458 test = n_img.mfree < needed_mem
2459 self._ErrorIf(test, constants.CV_ENODEN1, node,
2460 "not enough memory to accomodate instance failovers"
2461 " should node %s fail (%dMiB needed, %dMiB available)",
2462 prinode, needed_mem, n_img.mfree)
2465 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2466 (files_all, files_opt, files_mc, files_vm)):
2467 """Verifies file checksums collected from all nodes.
2469 @param errorif: Callback for reporting errors
2470 @param nodeinfo: List of L{objects.Node} objects
2471 @param master_node: Name of master node
2472 @param all_nvinfo: RPC results
2475 # Define functions determining which nodes to consider for a file
2478 (files_mc, lambda node: (node.master_candidate or
2479 node.name == master_node)),
2480 (files_vm, lambda node: node.vm_capable),
2483 # Build mapping from filename to list of nodes which should have the file
2485 for (files, fn) in files2nodefn:
2487 filenodes = nodeinfo
2489 filenodes = filter(fn, nodeinfo)
2490 nodefiles.update((filename,
2491 frozenset(map(operator.attrgetter("name"), filenodes)))
2492 for filename in files)
2494 assert set(nodefiles) == (files_all | files_mc | files_vm)
2496 fileinfo = dict((filename, {}) for filename in nodefiles)
2497 ignore_nodes = set()
2499 for node in nodeinfo:
2501 ignore_nodes.add(node.name)
2504 nresult = all_nvinfo[node.name]
2506 if nresult.fail_msg or not nresult.payload:
2509 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2511 test = not (node_files and isinstance(node_files, dict))
2512 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2513 "Node did not return file checksum data")
2515 ignore_nodes.add(node.name)
2518 # Build per-checksum mapping from filename to nodes having it
2519 for (filename, checksum) in node_files.items():
2520 assert filename in nodefiles
2521 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2523 for (filename, checksums) in fileinfo.items():
2524 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2526 # Nodes having the file
2527 with_file = frozenset(node_name
2528 for nodes in fileinfo[filename].values()
2529 for node_name in nodes) - ignore_nodes
2531 expected_nodes = nodefiles[filename] - ignore_nodes
2533 # Nodes missing file
2534 missing_file = expected_nodes - with_file
2536 if filename in files_opt:
2538 errorif(missing_file and missing_file != expected_nodes,
2539 constants.CV_ECLUSTERFILECHECK, None,
2540 "File %s is optional, but it must exist on all or no"
2541 " nodes (not found on %s)",
2542 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2544 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2545 "File %s is missing from node(s) %s", filename,
2546 utils.CommaJoin(utils.NiceSort(missing_file)))
2548 # Warn if a node has a file it shouldn't
2549 unexpected = with_file - expected_nodes
2551 constants.CV_ECLUSTERFILECHECK, None,
2552 "File %s should not exist on node(s) %s",
2553 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2555 # See if there are multiple versions of the file
2556 test = len(checksums) > 1
2558 variants = ["variant %s on %s" %
2559 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2560 for (idx, (checksum, nodes)) in
2561 enumerate(sorted(checksums.items()))]
2565 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2566 "File %s found with %s different checksums (%s)",
2567 filename, len(checksums), "; ".join(variants))
2569 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2571 """Verifies and the node DRBD status.
2573 @type ninfo: L{objects.Node}
2574 @param ninfo: the node to check
2575 @param nresult: the remote results for the node
2576 @param instanceinfo: the dict of instances
2577 @param drbd_helper: the configured DRBD usermode helper
2578 @param drbd_map: the DRBD map as returned by
2579 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2583 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2586 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2587 test = (helper_result == None)
2588 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2589 "no drbd usermode helper returned")
2591 status, payload = helper_result
2593 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2594 "drbd usermode helper check unsuccessful: %s", payload)
2595 test = status and (payload != drbd_helper)
2596 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2597 "wrong drbd usermode helper: %s", payload)
2599 # compute the DRBD minors
2601 for minor, instance in drbd_map[node].items():
2602 test = instance not in instanceinfo
2603 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2604 "ghost instance '%s' in temporary DRBD map", instance)
2605 # ghost instance should not be running, but otherwise we
2606 # don't give double warnings (both ghost instance and
2607 # unallocated minor in use)
2609 node_drbd[minor] = (instance, False)
2611 instance = instanceinfo[instance]
2612 node_drbd[minor] = (instance.name,
2613 instance.admin_state == constants.ADMINST_UP)
2615 # and now check them
2616 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2617 test = not isinstance(used_minors, (tuple, list))
2618 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2619 "cannot parse drbd status file: %s", str(used_minors))
2621 # we cannot check drbd status
2624 for minor, (iname, must_exist) in node_drbd.items():
2625 test = minor not in used_minors and must_exist
2626 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2627 "drbd minor %d of instance %s is not active", minor, iname)
2628 for minor in used_minors:
2629 test = minor not in node_drbd
2630 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2631 "unallocated drbd minor %d is in use", minor)
2633 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2634 """Builds the node OS structures.
2636 @type ninfo: L{objects.Node}
2637 @param ninfo: the node to check
2638 @param nresult: the remote results for the node
2639 @param nimg: the node image object
2643 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2645 remote_os = nresult.get(constants.NV_OSLIST, None)
2646 test = (not isinstance(remote_os, list) or
2647 not compat.all(isinstance(v, list) and len(v) == 7
2648 for v in remote_os))
2650 _ErrorIf(test, constants.CV_ENODEOS, node,
2651 "node hasn't returned valid OS data")
2660 for (name, os_path, status, diagnose,
2661 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2663 if name not in os_dict:
2666 # parameters is a list of lists instead of list of tuples due to
2667 # JSON lacking a real tuple type, fix it:
2668 parameters = [tuple(v) for v in parameters]
2669 os_dict[name].append((os_path, status, diagnose,
2670 set(variants), set(parameters), set(api_ver)))
2672 nimg.oslist = os_dict
2674 def _VerifyNodeOS(self, ninfo, nimg, base):
2675 """Verifies the node OS list.
2677 @type ninfo: L{objects.Node}
2678 @param ninfo: the node to check
2679 @param nimg: the node image object
2680 @param base: the 'template' node we match against (e.g. from the master)
2684 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2686 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2688 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2689 for os_name, os_data in nimg.oslist.items():
2690 assert os_data, "Empty OS status for OS %s?!" % os_name
2691 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2692 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2693 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2694 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2695 "OS '%s' has multiple entries (first one shadows the rest): %s",
2696 os_name, utils.CommaJoin([v[0] for v in os_data]))
2697 # comparisons with the 'base' image
2698 test = os_name not in base.oslist
2699 _ErrorIf(test, constants.CV_ENODEOS, node,
2700 "Extra OS %s not present on reference node (%s)",
2704 assert base.oslist[os_name], "Base node has empty OS status?"
2705 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2707 # base OS is invalid, skipping
2709 for kind, a, b in [("API version", f_api, b_api),
2710 ("variants list", f_var, b_var),
2711 ("parameters", beautify_params(f_param),
2712 beautify_params(b_param))]:
2713 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2714 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2715 kind, os_name, base.name,
2716 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2718 # check any missing OSes
2719 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2720 _ErrorIf(missing, constants.CV_ENODEOS, node,
2721 "OSes present on reference node %s but missing on this node: %s",
2722 base.name, utils.CommaJoin(missing))
2724 def _VerifyOob(self, ninfo, nresult):
2725 """Verifies out of band functionality of a node.
2727 @type ninfo: L{objects.Node}
2728 @param ninfo: the node to check
2729 @param nresult: the remote results for the node
2733 # We just have to verify the paths on master and/or master candidates
2734 # as the oob helper is invoked on the master
2735 if ((ninfo.master_candidate or ninfo.master_capable) and
2736 constants.NV_OOB_PATHS in nresult):
2737 for path_result in nresult[constants.NV_OOB_PATHS]:
2738 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2740 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2741 """Verifies and updates the node volume data.
2743 This function will update a L{NodeImage}'s internal structures
2744 with data from the remote call.
2746 @type ninfo: L{objects.Node}
2747 @param ninfo: the node to check
2748 @param nresult: the remote results for the node
2749 @param nimg: the node image object
2750 @param vg_name: the configured VG name
2754 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2756 nimg.lvm_fail = True
2757 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2760 elif isinstance(lvdata, basestring):
2761 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2762 utils.SafeEncode(lvdata))
2763 elif not isinstance(lvdata, dict):
2764 _ErrorIf(True, constants.CV_ENODELVM, node,
2765 "rpc call to node failed (lvlist)")
2767 nimg.volumes = lvdata
2768 nimg.lvm_fail = False
2770 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2771 """Verifies and updates the node instance list.
2773 If the listing was successful, then updates this node's instance
2774 list. Otherwise, it marks the RPC call as failed for the instance
2777 @type ninfo: L{objects.Node}
2778 @param ninfo: the node to check
2779 @param nresult: the remote results for the node
2780 @param nimg: the node image object
2783 idata = nresult.get(constants.NV_INSTANCELIST, None)
2784 test = not isinstance(idata, list)
2785 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2786 "rpc call to node failed (instancelist): %s",
2787 utils.SafeEncode(str(idata)))
2789 nimg.hyp_fail = True
2791 nimg.instances = idata
2793 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2794 """Verifies and computes a node information map
2796 @type ninfo: L{objects.Node}
2797 @param ninfo: the node to check
2798 @param nresult: the remote results for the node
2799 @param nimg: the node image object
2800 @param vg_name: the configured VG name
2804 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2806 # try to read free memory (from the hypervisor)
2807 hv_info = nresult.get(constants.NV_HVINFO, None)
2808 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2809 _ErrorIf(test, constants.CV_ENODEHV, node,
2810 "rpc call to node failed (hvinfo)")
2813 nimg.mfree = int(hv_info["memory_free"])
2814 except (ValueError, TypeError):
2815 _ErrorIf(True, constants.CV_ENODERPC, node,
2816 "node returned invalid nodeinfo, check hypervisor")
2818 # FIXME: devise a free space model for file based instances as well
2819 if vg_name is not None:
2820 test = (constants.NV_VGLIST not in nresult or
2821 vg_name not in nresult[constants.NV_VGLIST])
2822 _ErrorIf(test, constants.CV_ENODELVM, node,
2823 "node didn't return data for the volume group '%s'"
2824 " - it is either missing or broken", vg_name)
2827 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2828 except (ValueError, TypeError):
2829 _ErrorIf(True, constants.CV_ENODERPC, node,
2830 "node returned invalid LVM info, check LVM status")
2832 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2833 """Gets per-disk status information for all instances.
2835 @type nodelist: list of strings
2836 @param nodelist: Node names
2837 @type node_image: dict of (name, L{objects.Node})
2838 @param node_image: Node objects
2839 @type instanceinfo: dict of (name, L{objects.Instance})
2840 @param instanceinfo: Instance objects
2841 @rtype: {instance: {node: [(succes, payload)]}}
2842 @return: a dictionary of per-instance dictionaries with nodes as
2843 keys and disk information as values; the disk information is a
2844 list of tuples (success, payload)
2847 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2850 node_disks_devonly = {}
2851 diskless_instances = set()
2852 diskless = constants.DT_DISKLESS
2854 for nname in nodelist:
2855 node_instances = list(itertools.chain(node_image[nname].pinst,
2856 node_image[nname].sinst))
2857 diskless_instances.update(inst for inst in node_instances
2858 if instanceinfo[inst].disk_template == diskless)
2859 disks = [(inst, disk)
2860 for inst in node_instances
2861 for disk in instanceinfo[inst].disks]
2864 # No need to collect data
2867 node_disks[nname] = disks
2869 # Creating copies as SetDiskID below will modify the objects and that can
2870 # lead to incorrect data returned from nodes
2871 devonly = [dev.Copy() for (_, dev) in disks]
2874 self.cfg.SetDiskID(dev, nname)
2876 node_disks_devonly[nname] = devonly
2878 assert len(node_disks) == len(node_disks_devonly)
2880 # Collect data from all nodes with disks
2881 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2884 assert len(result) == len(node_disks)
2888 for (nname, nres) in result.items():
2889 disks = node_disks[nname]
2892 # No data from this node
2893 data = len(disks) * [(False, "node offline")]
2896 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2897 "while getting disk information: %s", msg)
2899 # No data from this node
2900 data = len(disks) * [(False, msg)]
2903 for idx, i in enumerate(nres.payload):
2904 if isinstance(i, (tuple, list)) and len(i) == 2:
2907 logging.warning("Invalid result from node %s, entry %d: %s",
2909 data.append((False, "Invalid result from the remote node"))
2911 for ((inst, _), status) in zip(disks, data):
2912 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2914 # Add empty entries for diskless instances.
2915 for inst in diskless_instances:
2916 assert inst not in instdisk
2919 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2920 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2921 compat.all(isinstance(s, (tuple, list)) and
2922 len(s) == 2 for s in statuses)
2923 for inst, nnames in instdisk.items()
2924 for nname, statuses in nnames.items())
2925 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2930 def _SshNodeSelector(group_uuid, all_nodes):
2931 """Create endless iterators for all potential SSH check hosts.
2934 nodes = [node for node in all_nodes
2935 if (node.group != group_uuid and
2937 keyfunc = operator.attrgetter("group")
2939 return map(itertools.cycle,
2940 [sorted(map(operator.attrgetter("name"), names))
2941 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2945 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2946 """Choose which nodes should talk to which other nodes.
2948 We will make nodes contact all nodes in their group, and one node from
2951 @warning: This algorithm has a known issue if one node group is much
2952 smaller than others (e.g. just one node). In such a case all other
2953 nodes will talk to the single node.
2956 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2957 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2959 return (online_nodes,
2960 dict((name, sorted([i.next() for i in sel]))
2961 for name in online_nodes))
2963 def BuildHooksEnv(self):
2966 Cluster-Verify hooks just ran in the post phase and their failure makes
2967 the output be logged in the verify output and the verification to fail.
2971 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2974 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2975 for node in self.my_node_info.values())
2979 def BuildHooksNodes(self):
2980 """Build hooks nodes.
2983 return ([], self.my_node_names)
2985 def Exec(self, feedback_fn):
2986 """Verify integrity of the node group, performing various test on nodes.
2989 # This method has too many local variables. pylint: disable=R0914
2990 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2992 if not self.my_node_names:
2994 feedback_fn("* Empty node group, skipping verification")
2998 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2999 verbose = self.op.verbose
3000 self._feedback_fn = feedback_fn
3002 vg_name = self.cfg.GetVGName()
3003 drbd_helper = self.cfg.GetDRBDHelper()
3004 cluster = self.cfg.GetClusterInfo()
3005 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3006 hypervisors = cluster.enabled_hypervisors
3007 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3009 i_non_redundant = [] # Non redundant instances
3010 i_non_a_balanced = [] # Non auto-balanced instances
3011 i_offline = 0 # Count of offline instances
3012 n_offline = 0 # Count of offline nodes
3013 n_drained = 0 # Count of nodes being drained
3014 node_vol_should = {}
3016 # FIXME: verify OS list
3019 filemap = _ComputeAncillaryFiles(cluster, False)
3021 # do local checksums
3022 master_node = self.master_node = self.cfg.GetMasterNode()
3023 master_ip = self.cfg.GetMasterIP()
3025 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3028 if self.cfg.GetUseExternalMipScript():
3029 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3031 node_verify_param = {
3032 constants.NV_FILELIST:
3033 utils.UniqueSequence(filename
3034 for files in filemap
3035 for filename in files),
3036 constants.NV_NODELIST:
3037 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3038 self.all_node_info.values()),
3039 constants.NV_HYPERVISOR: hypervisors,
3040 constants.NV_HVPARAMS:
3041 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3042 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3043 for node in node_data_list
3044 if not node.offline],
3045 constants.NV_INSTANCELIST: hypervisors,
3046 constants.NV_VERSION: None,
3047 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3048 constants.NV_NODESETUP: None,
3049 constants.NV_TIME: None,
3050 constants.NV_MASTERIP: (master_node, master_ip),
3051 constants.NV_OSLIST: None,
3052 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3053 constants.NV_USERSCRIPTS: user_scripts,
3056 if vg_name is not None:
3057 node_verify_param[constants.NV_VGLIST] = None
3058 node_verify_param[constants.NV_LVLIST] = vg_name
3059 node_verify_param[constants.NV_PVLIST] = [vg_name]
3060 node_verify_param[constants.NV_DRBDLIST] = None
3063 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3066 # FIXME: this needs to be changed per node-group, not cluster-wide
3068 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3069 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3070 bridges.add(default_nicpp[constants.NIC_LINK])
3071 for instance in self.my_inst_info.values():
3072 for nic in instance.nics:
3073 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3074 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3075 bridges.add(full_nic[constants.NIC_LINK])
3078 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3080 # Build our expected cluster state
3081 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3083 vm_capable=node.vm_capable))
3084 for node in node_data_list)
3088 for node in self.all_node_info.values():
3089 path = _SupportsOob(self.cfg, node)
3090 if path and path not in oob_paths:
3091 oob_paths.append(path)
3094 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3096 for instance in self.my_inst_names:
3097 inst_config = self.my_inst_info[instance]
3099 for nname in inst_config.all_nodes:
3100 if nname not in node_image:
3101 gnode = self.NodeImage(name=nname)
3102 gnode.ghost = (nname not in self.all_node_info)
3103 node_image[nname] = gnode
3105 inst_config.MapLVsByNode(node_vol_should)
3107 pnode = inst_config.primary_node
3108 node_image[pnode].pinst.append(instance)
3110 for snode in inst_config.secondary_nodes:
3111 nimg = node_image[snode]
3112 nimg.sinst.append(instance)
3113 if pnode not in nimg.sbp:
3114 nimg.sbp[pnode] = []
3115 nimg.sbp[pnode].append(instance)
3117 # At this point, we have the in-memory data structures complete,
3118 # except for the runtime information, which we'll gather next
3120 # Due to the way our RPC system works, exact response times cannot be
3121 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3122 # time before and after executing the request, we can at least have a time
3124 nvinfo_starttime = time.time()
3125 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3127 self.cfg.GetClusterName())
3128 nvinfo_endtime = time.time()
3130 if self.extra_lv_nodes and vg_name is not None:
3132 self.rpc.call_node_verify(self.extra_lv_nodes,
3133 {constants.NV_LVLIST: vg_name},
3134 self.cfg.GetClusterName())
3136 extra_lv_nvinfo = {}
3138 all_drbd_map = self.cfg.ComputeDRBDMap()
3140 feedback_fn("* Gathering disk information (%s nodes)" %
3141 len(self.my_node_names))
3142 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3145 feedback_fn("* Verifying configuration file consistency")
3147 # If not all nodes are being checked, we need to make sure the master node
3148 # and a non-checked vm_capable node are in the list.
3149 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3151 vf_nvinfo = all_nvinfo.copy()
3152 vf_node_info = list(self.my_node_info.values())
3153 additional_nodes = []
3154 if master_node not in self.my_node_info:
3155 additional_nodes.append(master_node)
3156 vf_node_info.append(self.all_node_info[master_node])
3157 # Add the first vm_capable node we find which is not included
3158 for node in absent_nodes:
3159 nodeinfo = self.all_node_info[node]
3160 if nodeinfo.vm_capable and not nodeinfo.offline:
3161 additional_nodes.append(node)
3162 vf_node_info.append(self.all_node_info[node])
3164 key = constants.NV_FILELIST
3165 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3166 {key: node_verify_param[key]},
3167 self.cfg.GetClusterName()))
3169 vf_nvinfo = all_nvinfo
3170 vf_node_info = self.my_node_info.values()
3172 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3174 feedback_fn("* Verifying node status")
3178 for node_i in node_data_list:
3180 nimg = node_image[node]
3184 feedback_fn("* Skipping offline node %s" % (node,))
3188 if node == master_node:
3190 elif node_i.master_candidate:
3191 ntype = "master candidate"
3192 elif node_i.drained:
3198 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3200 msg = all_nvinfo[node].fail_msg
3201 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3204 nimg.rpc_fail = True
3207 nresult = all_nvinfo[node].payload
3209 nimg.call_ok = self._VerifyNode(node_i, nresult)
3210 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3211 self._VerifyNodeNetwork(node_i, nresult)
3212 self._VerifyNodeUserScripts(node_i, nresult)
3213 self._VerifyOob(node_i, nresult)
3216 self._VerifyNodeLVM(node_i, nresult, vg_name)
3217 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3220 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3221 self._UpdateNodeInstances(node_i, nresult, nimg)
3222 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3223 self._UpdateNodeOS(node_i, nresult, nimg)
3225 if not nimg.os_fail:
3226 if refos_img is None:
3228 self._VerifyNodeOS(node_i, nimg, refos_img)
3229 self._VerifyNodeBridges(node_i, nresult, bridges)
3231 # Check whether all running instancies are primary for the node. (This
3232 # can no longer be done from _VerifyInstance below, since some of the
3233 # wrong instances could be from other node groups.)
3234 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3236 for inst in non_primary_inst:
3237 # FIXME: investigate best way to handle offline insts
3238 if inst.admin_state == constants.ADMINST_OFFLINE:
3240 feedback_fn("* Skipping offline instance %s" % inst.name)
3243 test = inst in self.all_inst_info
3244 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3245 "instance should not run on node %s", node_i.name)
3246 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3247 "node is running unknown instance %s", inst)
3249 for node, result in extra_lv_nvinfo.items():
3250 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3251 node_image[node], vg_name)
3253 feedback_fn("* Verifying instance status")
3254 for instance in self.my_inst_names:
3256 feedback_fn("* Verifying instance %s" % instance)
3257 inst_config = self.my_inst_info[instance]
3258 self._VerifyInstance(instance, inst_config, node_image,
3260 inst_nodes_offline = []
3262 pnode = inst_config.primary_node
3263 pnode_img = node_image[pnode]
3264 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3265 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3266 " primary node failed", instance)
3268 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3270 constants.CV_EINSTANCEBADNODE, instance,
3271 "instance is marked as running and lives on offline node %s",
3272 inst_config.primary_node)
3274 # If the instance is non-redundant we cannot survive losing its primary
3275 # node, so we are not N+1 compliant. On the other hand we have no disk
3276 # templates with more than one secondary so that situation is not well
3278 # FIXME: does not support file-backed instances
3279 if not inst_config.secondary_nodes:
3280 i_non_redundant.append(instance)
3282 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3283 constants.CV_EINSTANCELAYOUT,
3284 instance, "instance has multiple secondary nodes: %s",
3285 utils.CommaJoin(inst_config.secondary_nodes),
3286 code=self.ETYPE_WARNING)
3288 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3289 pnode = inst_config.primary_node
3290 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3291 instance_groups = {}
3293 for node in instance_nodes:
3294 instance_groups.setdefault(self.all_node_info[node].group,
3298 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3299 # Sort so that we always list the primary node first.
3300 for group, nodes in sorted(instance_groups.items(),
3301 key=lambda (_, nodes): pnode in nodes,
3304 self._ErrorIf(len(instance_groups) > 1,
3305 constants.CV_EINSTANCESPLITGROUPS,
3306 instance, "instance has primary and secondary nodes in"
3307 " different groups: %s", utils.CommaJoin(pretty_list),
3308 code=self.ETYPE_WARNING)
3310 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3311 i_non_a_balanced.append(instance)
3313 for snode in inst_config.secondary_nodes:
3314 s_img = node_image[snode]
3315 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3316 snode, "instance %s, connection to secondary node failed",
3320 inst_nodes_offline.append(snode)
3322 # warn that the instance lives on offline nodes
3323 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3324 "instance has offline secondary node(s) %s",
3325 utils.CommaJoin(inst_nodes_offline))
3326 # ... or ghost/non-vm_capable nodes
3327 for node in inst_config.all_nodes:
3328 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3329 instance, "instance lives on ghost node %s", node)
3330 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3331 instance, "instance lives on non-vm_capable node %s", node)
3333 feedback_fn("* Verifying orphan volumes")
3334 reserved = utils.FieldSet(*cluster.reserved_lvs)
3336 # We will get spurious "unknown volume" warnings if any node of this group
3337 # is secondary for an instance whose primary is in another group. To avoid
3338 # them, we find these instances and add their volumes to node_vol_should.
3339 for inst in self.all_inst_info.values():
3340 for secondary in inst.secondary_nodes:
3341 if (secondary in self.my_node_info
3342 and inst.name not in self.my_inst_info):
3343 inst.MapLVsByNode(node_vol_should)
3346 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3348 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3349 feedback_fn("* Verifying N+1 Memory redundancy")
3350 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3352 feedback_fn("* Other Notes")
3354 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3355 % len(i_non_redundant))
3357 if i_non_a_balanced:
3358 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3359 % len(i_non_a_balanced))
3362 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3365 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3368 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3372 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3373 """Analyze the post-hooks' result
3375 This method analyses the hook result, handles it, and sends some
3376 nicely-formatted feedback back to the user.
3378 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3379 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3380 @param hooks_results: the results of the multi-node hooks rpc call
3381 @param feedback_fn: function used send feedback back to the caller
3382 @param lu_result: previous Exec result
3383 @return: the new Exec result, based on the previous result
3387 # We only really run POST phase hooks, only for non-empty groups,
3388 # and are only interested in their results
3389 if not self.my_node_names:
3392 elif phase == constants.HOOKS_PHASE_POST:
3393 # Used to change hooks' output to proper indentation
3394 feedback_fn("* Hooks Results")
3395 assert hooks_results, "invalid result from hooks"
3397 for node_name in hooks_results:
3398 res = hooks_results[node_name]
3400 test = msg and not res.offline
3401 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3402 "Communication failure in hooks execution: %s", msg)
3403 if res.offline or msg:
3404 # No need to investigate payload if node is offline or gave
3407 for script, hkr, output in res.payload:
3408 test = hkr == constants.HKR_FAIL
3409 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3410 "Script %s failed, output:", script)
3412 output = self._HOOKS_INDENT_RE.sub(" ", output)
3413 feedback_fn("%s" % output)
3419 class LUClusterVerifyDisks(NoHooksLU):
3420 """Verifies the cluster disks status.
3425 def ExpandNames(self):
3426 self.share_locks = _ShareAll()
3427 self.needed_locks = {
3428 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3431 def Exec(self, feedback_fn):
3432 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3434 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3435 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3436 for group in group_names])
3439 class LUGroupVerifyDisks(NoHooksLU):
3440 """Verifies the status of all disks in a node group.
3445 def ExpandNames(self):
3446 # Raises errors.OpPrereqError on its own if group can't be found
3447 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3449 self.share_locks = _ShareAll()
3450 self.needed_locks = {
3451 locking.LEVEL_INSTANCE: [],
3452 locking.LEVEL_NODEGROUP: [],
3453 locking.LEVEL_NODE: [],
3456 def DeclareLocks(self, level):
3457 if level == locking.LEVEL_INSTANCE:
3458 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3460 # Lock instances optimistically, needs verification once node and group
3461 # locks have been acquired
3462 self.needed_locks[locking.LEVEL_INSTANCE] = \
3463 self.cfg.GetNodeGroupInstances(self.group_uuid)
3465 elif level == locking.LEVEL_NODEGROUP:
3466 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3468 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3469 set([self.group_uuid] +
3470 # Lock all groups used by instances optimistically; this requires
3471 # going via the node before it's locked, requiring verification
3474 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3475 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3477 elif level == locking.LEVEL_NODE:
3478 # This will only lock the nodes in the group to be verified which contain
3480 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3481 self._LockInstancesNodes()
3483 # Lock all nodes in group to be verified
3484 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3485 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3486 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3488 def CheckPrereq(self):
3489 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3490 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3491 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3493 assert self.group_uuid in owned_groups
3495 # Check if locked instances are still correct
3496 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3498 # Get instance information
3499 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3501 # Check if node groups for locked instances are still correct
3502 for (instance_name, inst) in self.instances.items():
3503 assert owned_nodes.issuperset(inst.all_nodes), \
3504 "Instance %s's nodes changed while we kept the lock" % instance_name
3506 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3509 assert self.group_uuid in inst_groups, \
3510 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3512 def Exec(self, feedback_fn):
3513 """Verify integrity of cluster disks.
3515 @rtype: tuple of three items
3516 @return: a tuple of (dict of node-to-node_error, list of instances
3517 which need activate-disks, dict of instance: (node, volume) for
3522 res_instances = set()
3525 nv_dict = _MapInstanceDisksToNodes([inst
3526 for inst in self.instances.values()
3527 if inst.admin_state == constants.ADMINST_UP])
3530 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3531 set(self.cfg.GetVmCapableNodeList()))
3533 node_lvs = self.rpc.call_lv_list(nodes, [])
3535 for (node, node_res) in node_lvs.items():
3536 if node_res.offline:
3539 msg = node_res.fail_msg
3541 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3542 res_nodes[node] = msg
3545 for lv_name, (_, _, lv_online) in node_res.payload.items():
3546 inst = nv_dict.pop((node, lv_name), None)
3547 if not (lv_online or inst is None):
3548 res_instances.add(inst)
3550 # any leftover items in nv_dict are missing LVs, let's arrange the data
3552 for key, inst in nv_dict.iteritems():
3553 res_missing.setdefault(inst, []).append(list(key))
3555 return (res_nodes, list(res_instances), res_missing)
3558 class LUClusterRepairDiskSizes(NoHooksLU):
3559 """Verifies the cluster disks sizes.
3564 def ExpandNames(self):
3565 if self.op.instances:
3566 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3567 self.needed_locks = {
3568 locking.LEVEL_NODE_RES: [],
3569 locking.LEVEL_INSTANCE: self.wanted_names,
3571 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3573 self.wanted_names = None
3574 self.needed_locks = {
3575 locking.LEVEL_NODE_RES: locking.ALL_SET,
3576 locking.LEVEL_INSTANCE: locking.ALL_SET,
3578 self.share_locks = {
3579 locking.LEVEL_NODE_RES: 1,
3580 locking.LEVEL_INSTANCE: 0,
3583 def DeclareLocks(self, level):
3584 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3585 self._LockInstancesNodes(primary_only=True, level=level)
3587 def CheckPrereq(self):
3588 """Check prerequisites.
3590 This only checks the optional instance list against the existing names.
3593 if self.wanted_names is None:
3594 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3596 self.wanted_instances = \
3597 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3599 def _EnsureChildSizes(self, disk):
3600 """Ensure children of the disk have the needed disk size.
3602 This is valid mainly for DRBD8 and fixes an issue where the
3603 children have smaller disk size.
3605 @param disk: an L{ganeti.objects.Disk} object
3608 if disk.dev_type == constants.LD_DRBD8:
3609 assert disk.children, "Empty children for DRBD8?"
3610 fchild = disk.children[0]
3611 mismatch = fchild.size < disk.size
3613 self.LogInfo("Child disk has size %d, parent %d, fixing",
3614 fchild.size, disk.size)
3615 fchild.size = disk.size
3617 # and we recurse on this child only, not on the metadev
3618 return self._EnsureChildSizes(fchild) or mismatch
3622 def Exec(self, feedback_fn):
3623 """Verify the size of cluster disks.
3626 # TODO: check child disks too
3627 # TODO: check differences in size between primary/secondary nodes
3629 for instance in self.wanted_instances:
3630 pnode = instance.primary_node
3631 if pnode not in per_node_disks:
3632 per_node_disks[pnode] = []
3633 for idx, disk in enumerate(instance.disks):
3634 per_node_disks[pnode].append((instance, idx, disk))
3636 assert not (frozenset(per_node_disks.keys()) -
3637 self.owned_locks(locking.LEVEL_NODE_RES)), \
3638 "Not owning correct locks"
3639 assert not self.owned_locks(locking.LEVEL_NODE)
3642 for node, dskl in per_node_disks.items():
3643 newl = [v[2].Copy() for v in dskl]
3645 self.cfg.SetDiskID(dsk, node)
3646 result = self.rpc.call_blockdev_getsize(node, newl)
3648 self.LogWarning("Failure in blockdev_getsize call to node"
3649 " %s, ignoring", node)
3651 if len(result.payload) != len(dskl):
3652 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3653 " result.payload=%s", node, len(dskl), result.payload)
3654 self.LogWarning("Invalid result from node %s, ignoring node results",
3657 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3659 self.LogWarning("Disk %d of instance %s did not return size"
3660 " information, ignoring", idx, instance.name)
3662 if not isinstance(size, (int, long)):
3663 self.LogWarning("Disk %d of instance %s did not return valid"
3664 " size information, ignoring", idx, instance.name)
3667 if size != disk.size:
3668 self.LogInfo("Disk %d of instance %s has mismatched size,"
3669 " correcting: recorded %d, actual %d", idx,
3670 instance.name, disk.size, size)
3672 self.cfg.Update(instance, feedback_fn)
3673 changed.append((instance.name, idx, size))
3674 if self._EnsureChildSizes(disk):
3675 self.cfg.Update(instance, feedback_fn)
3676 changed.append((instance.name, idx, disk.size))
3680 class LUClusterRename(LogicalUnit):
3681 """Rename the cluster.
3684 HPATH = "cluster-rename"
3685 HTYPE = constants.HTYPE_CLUSTER
3687 def BuildHooksEnv(self):
3692 "OP_TARGET": self.cfg.GetClusterName(),
3693 "NEW_NAME": self.op.name,
3696 def BuildHooksNodes(self):
3697 """Build hooks nodes.
3700 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3702 def CheckPrereq(self):
3703 """Verify that the passed name is a valid one.
3706 hostname = netutils.GetHostname(name=self.op.name,
3707 family=self.cfg.GetPrimaryIPFamily())
3709 new_name = hostname.name
3710 self.ip = new_ip = hostname.ip
3711 old_name = self.cfg.GetClusterName()
3712 old_ip = self.cfg.GetMasterIP()
3713 if new_name == old_name and new_ip == old_ip:
3714 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3715 " cluster has changed",
3717 if new_ip != old_ip:
3718 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3719 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3720 " reachable on the network" %
3721 new_ip, errors.ECODE_NOTUNIQUE)
3723 self.op.name = new_name
3725 def Exec(self, feedback_fn):
3726 """Rename the cluster.
3729 clustername = self.op.name
3732 # shutdown the master IP
3733 master_params = self.cfg.GetMasterNetworkParameters()
3734 ems = self.cfg.GetUseExternalMipScript()
3735 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3737 result.Raise("Could not disable the master role")
3740 cluster = self.cfg.GetClusterInfo()
3741 cluster.cluster_name = clustername
3742 cluster.master_ip = new_ip
3743 self.cfg.Update(cluster, feedback_fn)
3745 # update the known hosts file
3746 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3747 node_list = self.cfg.GetOnlineNodeList()
3749 node_list.remove(master_params.name)
3752 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3754 master_params.ip = new_ip
3755 result = self.rpc.call_node_activate_master_ip(master_params.name,
3757 msg = result.fail_msg
3759 self.LogWarning("Could not re-enable the master role on"
3760 " the master, please restart manually: %s", msg)
3765 def _ValidateNetmask(cfg, netmask):
3766 """Checks if a netmask is valid.
3768 @type cfg: L{config.ConfigWriter}
3769 @param cfg: The cluster configuration
3771 @param netmask: the netmask to be verified
3772 @raise errors.OpPrereqError: if the validation fails
3775 ip_family = cfg.GetPrimaryIPFamily()
3777 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3778 except errors.ProgrammerError:
3779 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3781 if not ipcls.ValidateNetmask(netmask):
3782 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3786 class LUClusterSetParams(LogicalUnit):
3787 """Change the parameters of the cluster.
3790 HPATH = "cluster-modify"
3791 HTYPE = constants.HTYPE_CLUSTER
3794 def CheckArguments(self):
3798 if self.op.uid_pool:
3799 uidpool.CheckUidPool(self.op.uid_pool)
3801 if self.op.add_uids:
3802 uidpool.CheckUidPool(self.op.add_uids)
3804 if self.op.remove_uids:
3805 uidpool.CheckUidPool(self.op.remove_uids)
3807 if self.op.master_netmask is not None:
3808 _ValidateNetmask(self.cfg, self.op.master_netmask)
3810 if self.op.diskparams:
3811 for dt_params in self.op.diskparams.values():
3812 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3814 def ExpandNames(self):
3815 # FIXME: in the future maybe other cluster params won't require checking on
3816 # all nodes to be modified.
3817 self.needed_locks = {
3818 locking.LEVEL_NODE: locking.ALL_SET,
3819 locking.LEVEL_INSTANCE: locking.ALL_SET,
3820 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3822 self.share_locks = {
3823 locking.LEVEL_NODE: 1,
3824 locking.LEVEL_INSTANCE: 1,
3825 locking.LEVEL_NODEGROUP: 1,
3828 def BuildHooksEnv(self):
3833 "OP_TARGET": self.cfg.GetClusterName(),
3834 "NEW_VG_NAME": self.op.vg_name,
3837 def BuildHooksNodes(self):
3838 """Build hooks nodes.
3841 mn = self.cfg.GetMasterNode()
3844 def CheckPrereq(self):
3845 """Check prerequisites.
3847 This checks whether the given params don't conflict and
3848 if the given volume group is valid.
3851 if self.op.vg_name is not None and not self.op.vg_name:
3852 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3853 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3854 " instances exist", errors.ECODE_INVAL)
3856 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3857 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3858 raise errors.OpPrereqError("Cannot disable drbd helper while"
3859 " drbd-based instances exist",
3862 node_list = self.owned_locks(locking.LEVEL_NODE)
3864 # if vg_name not None, checks given volume group on all nodes
3866 vglist = self.rpc.call_vg_list(node_list)
3867 for node in node_list:
3868 msg = vglist[node].fail_msg
3870 # ignoring down node
3871 self.LogWarning("Error while gathering data on node %s"
3872 " (ignoring node): %s", node, msg)
3874 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3876 constants.MIN_VG_SIZE)
3878 raise errors.OpPrereqError("Error on node '%s': %s" %
3879 (node, vgstatus), errors.ECODE_ENVIRON)
3881 if self.op.drbd_helper:
3882 # checks given drbd helper on all nodes
3883 helpers = self.rpc.call_drbd_helper(node_list)
3884 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3886 self.LogInfo("Not checking drbd helper on offline node %s", node)
3888 msg = helpers[node].fail_msg
3890 raise errors.OpPrereqError("Error checking drbd helper on node"
3891 " '%s': %s" % (node, msg),
3892 errors.ECODE_ENVIRON)
3893 node_helper = helpers[node].payload
3894 if node_helper != self.op.drbd_helper:
3895 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3896 (node, node_helper), errors.ECODE_ENVIRON)
3898 self.cluster = cluster = self.cfg.GetClusterInfo()
3899 # validate params changes
3900 if self.op.beparams:
3901 objects.UpgradeBeParams(self.op.beparams)
3902 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3903 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3905 if self.op.ndparams:
3906 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3907 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3909 # TODO: we need a more general way to handle resetting
3910 # cluster-level parameters to default values
3911 if self.new_ndparams["oob_program"] == "":
3912 self.new_ndparams["oob_program"] = \
3913 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3915 if self.op.hv_state:
3916 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3917 self.cluster.hv_state_static)
3918 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3919 for hv, values in new_hv_state.items())
3921 if self.op.disk_state:
3922 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3923 self.cluster.disk_state_static)
3924 self.new_disk_state = \
3925 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3926 for name, values in svalues.items()))
3927 for storage, svalues in new_disk_state.items())
3930 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3933 all_instances = self.cfg.GetAllInstancesInfo().values()
3935 for group in self.cfg.GetAllNodeGroupsInfo().values():
3936 instances = frozenset([inst for inst in all_instances
3937 if compat.any(node in group.members
3938 for node in inst.all_nodes)])
3939 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3940 new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3942 new_ipolicy, instances)
3944 violations.update(new)
3947 self.LogWarning("After the ipolicy change the following instances"
3948 " violate them: %s",
3949 utils.CommaJoin(violations))
3951 if self.op.nicparams:
3952 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3953 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3954 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3957 # check all instances for consistency
3958 for instance in self.cfg.GetAllInstancesInfo().values():
3959 for nic_idx, nic in enumerate(instance.nics):
3960 params_copy = copy.deepcopy(nic.nicparams)
3961 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3963 # check parameter syntax
3965 objects.NIC.CheckParameterSyntax(params_filled)
3966 except errors.ConfigurationError, err:
3967 nic_errors.append("Instance %s, nic/%d: %s" %
3968 (instance.name, nic_idx, err))
3970 # if we're moving instances to routed, check that they have an ip
3971 target_mode = params_filled[constants.NIC_MODE]
3972 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3973 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3974 " address" % (instance.name, nic_idx))
3976 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3977 "\n".join(nic_errors))
3979 # hypervisor list/parameters
3980 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3981 if self.op.hvparams:
3982 for hv_name, hv_dict in self.op.hvparams.items():
3983 if hv_name not in self.new_hvparams:
3984 self.new_hvparams[hv_name] = hv_dict
3986 self.new_hvparams[hv_name].update(hv_dict)
3988 # disk template parameters
3989 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3990 if self.op.diskparams:
3991 for dt_name, dt_params in self.op.diskparams.items():
3992 if dt_name not in self.op.diskparams:
3993 self.new_diskparams[dt_name] = dt_params
3995 self.new_diskparams[dt_name].update(dt_params)
3997 # os hypervisor parameters
3998 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4000 for os_name, hvs in self.op.os_hvp.items():
4001 if os_name not in self.new_os_hvp:
4002 self.new_os_hvp[os_name] = hvs
4004 for hv_name, hv_dict in hvs.items():
4005 if hv_name not in self.new_os_hvp[os_name]:
4006 self.new_os_hvp[os_name][hv_name] = hv_dict
4008 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4011 self.new_osp = objects.FillDict(cluster.osparams, {})
4012 if self.op.osparams:
4013 for os_name, osp in self.op.osparams.items():
4014 if os_name not in self.new_osp:
4015 self.new_osp[os_name] = {}
4017 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4020 if not self.new_osp[os_name]:
4021 # we removed all parameters
4022 del self.new_osp[os_name]
4024 # check the parameter validity (remote check)
4025 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4026 os_name, self.new_osp[os_name])
4028 # changes to the hypervisor list
4029 if self.op.enabled_hypervisors is not None:
4030 self.hv_list = self.op.enabled_hypervisors
4031 for hv in self.hv_list:
4032 # if the hypervisor doesn't already exist in the cluster
4033 # hvparams, we initialize it to empty, and then (in both
4034 # cases) we make sure to fill the defaults, as we might not
4035 # have a complete defaults list if the hypervisor wasn't
4037 if hv not in new_hvp:
4039 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4040 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4042 self.hv_list = cluster.enabled_hypervisors
4044 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4045 # either the enabled list has changed, or the parameters have, validate
4046 for hv_name, hv_params in self.new_hvparams.items():
4047 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4048 (self.op.enabled_hypervisors and
4049 hv_name in self.op.enabled_hypervisors)):
4050 # either this is a new hypervisor, or its parameters have changed
4051 hv_class = hypervisor.GetHypervisor(hv_name)
4052 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4053 hv_class.CheckParameterSyntax(hv_params)
4054 _CheckHVParams(self, node_list, hv_name, hv_params)
4057 # no need to check any newly-enabled hypervisors, since the
4058 # defaults have already been checked in the above code-block
4059 for os_name, os_hvp in self.new_os_hvp.items():
4060 for hv_name, hv_params in os_hvp.items():
4061 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4062 # we need to fill in the new os_hvp on top of the actual hv_p
4063 cluster_defaults = self.new_hvparams.get(hv_name, {})
4064 new_osp = objects.FillDict(cluster_defaults, hv_params)
4065 hv_class = hypervisor.GetHypervisor(hv_name)
4066 hv_class.CheckParameterSyntax(new_osp)
4067 _CheckHVParams(self, node_list, hv_name, new_osp)
4069 if self.op.default_iallocator:
4070 alloc_script = utils.FindFile(self.op.default_iallocator,
4071 constants.IALLOCATOR_SEARCH_PATH,
4073 if alloc_script is None:
4074 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4075 " specified" % self.op.default_iallocator,
4078 def Exec(self, feedback_fn):
4079 """Change the parameters of the cluster.
4082 if self.op.vg_name is not None:
4083 new_volume = self.op.vg_name
4086 if new_volume != self.cfg.GetVGName():
4087 self.cfg.SetVGName(new_volume)
4089 feedback_fn("Cluster LVM configuration already in desired"
4090 " state, not changing")
4091 if self.op.drbd_helper is not None:
4092 new_helper = self.op.drbd_helper
4095 if new_helper != self.cfg.GetDRBDHelper():
4096 self.cfg.SetDRBDHelper(new_helper)
4098 feedback_fn("Cluster DRBD helper already in desired state,"
4100 if self.op.hvparams:
4101 self.cluster.hvparams = self.new_hvparams
4103 self.cluster.os_hvp = self.new_os_hvp
4104 if self.op.enabled_hypervisors is not None:
4105 self.cluster.hvparams = self.new_hvparams
4106 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4107 if self.op.beparams:
4108 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4109 if self.op.nicparams:
4110 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4112 self.cluster.ipolicy = self.new_ipolicy
4113 if self.op.osparams:
4114 self.cluster.osparams = self.new_osp
4115 if self.op.ndparams:
4116 self.cluster.ndparams = self.new_ndparams
4117 if self.op.diskparams:
4118 self.cluster.diskparams = self.new_diskparams
4119 if self.op.hv_state:
4120 self.cluster.hv_state_static = self.new_hv_state
4121 if self.op.disk_state:
4122 self.cluster.disk_state_static = self.new_disk_state
4124 if self.op.candidate_pool_size is not None:
4125 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4126 # we need to update the pool size here, otherwise the save will fail
4127 _AdjustCandidatePool(self, [])
4129 if self.op.maintain_node_health is not None:
4130 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4131 feedback_fn("Note: CONFD was disabled at build time, node health"
4132 " maintenance is not useful (still enabling it)")
4133 self.cluster.maintain_node_health = self.op.maintain_node_health
4135 if self.op.prealloc_wipe_disks is not None:
4136 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4138 if self.op.add_uids is not None:
4139 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4141 if self.op.remove_uids is not None:
4142 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4144 if self.op.uid_pool is not None:
4145 self.cluster.uid_pool = self.op.uid_pool
4147 if self.op.default_iallocator is not None:
4148 self.cluster.default_iallocator = self.op.default_iallocator
4150 if self.op.reserved_lvs is not None:
4151 self.cluster.reserved_lvs = self.op.reserved_lvs
4153 if self.op.use_external_mip_script is not None:
4154 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4156 def helper_os(aname, mods, desc):
4158 lst = getattr(self.cluster, aname)
4159 for key, val in mods:
4160 if key == constants.DDM_ADD:
4162 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4165 elif key == constants.DDM_REMOVE:
4169 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4171 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4173 if self.op.hidden_os:
4174 helper_os("hidden_os", self.op.hidden_os, "hidden")
4176 if self.op.blacklisted_os:
4177 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4179 if self.op.master_netdev:
4180 master_params = self.cfg.GetMasterNetworkParameters()
4181 ems = self.cfg.GetUseExternalMipScript()
4182 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4183 self.cluster.master_netdev)
4184 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4186 result.Raise("Could not disable the master ip")
4187 feedback_fn("Changing master_netdev from %s to %s" %
4188 (master_params.netdev, self.op.master_netdev))
4189 self.cluster.master_netdev = self.op.master_netdev
4191 if self.op.master_netmask:
4192 master_params = self.cfg.GetMasterNetworkParameters()
4193 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4194 result = self.rpc.call_node_change_master_netmask(master_params.name,
4195 master_params.netmask,
4196 self.op.master_netmask,
4198 master_params.netdev)
4200 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4203 self.cluster.master_netmask = self.op.master_netmask
4205 self.cfg.Update(self.cluster, feedback_fn)
4207 if self.op.master_netdev:
4208 master_params = self.cfg.GetMasterNetworkParameters()
4209 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4210 self.op.master_netdev)
4211 ems = self.cfg.GetUseExternalMipScript()
4212 result = self.rpc.call_node_activate_master_ip(master_params.name,
4215 self.LogWarning("Could not re-enable the master ip on"
4216 " the master, please restart manually: %s",
4220 def _UploadHelper(lu, nodes, fname):
4221 """Helper for uploading a file and showing warnings.
4224 if os.path.exists(fname):
4225 result = lu.rpc.call_upload_file(nodes, fname)
4226 for to_node, to_result in result.items():
4227 msg = to_result.fail_msg
4229 msg = ("Copy of file %s to node %s failed: %s" %
4230 (fname, to_node, msg))
4231 lu.proc.LogWarning(msg)
4234 def _ComputeAncillaryFiles(cluster, redist):
4235 """Compute files external to Ganeti which need to be consistent.
4237 @type redist: boolean
4238 @param redist: Whether to include files which need to be redistributed
4241 # Compute files for all nodes
4243 constants.SSH_KNOWN_HOSTS_FILE,
4244 constants.CONFD_HMAC_KEY,
4245 constants.CLUSTER_DOMAIN_SECRET_FILE,
4246 constants.SPICE_CERT_FILE,
4247 constants.SPICE_CACERT_FILE,
4248 constants.RAPI_USERS_FILE,
4252 files_all.update(constants.ALL_CERT_FILES)
4253 files_all.update(ssconf.SimpleStore().GetFileList())
4255 # we need to ship at least the RAPI certificate
4256 files_all.add(constants.RAPI_CERT_FILE)
4258 if cluster.modify_etc_hosts:
4259 files_all.add(constants.ETC_HOSTS)
4261 # Files which are optional, these must:
4262 # - be present in one other category as well
4263 # - either exist or not exist on all nodes of that category (mc, vm all)
4265 constants.RAPI_USERS_FILE,
4268 # Files which should only be on master candidates
4272 files_mc.add(constants.CLUSTER_CONF_FILE)
4274 # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4276 files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4278 # Files which should only be on VM-capable nodes
4279 files_vm = set(filename
4280 for hv_name in cluster.enabled_hypervisors
4281 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4283 files_opt |= set(filename
4284 for hv_name in cluster.enabled_hypervisors
4285 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4287 # Filenames in each category must be unique
4288 all_files_set = files_all | files_mc | files_vm
4289 assert (len(all_files_set) ==
4290 sum(map(len, [files_all, files_mc, files_vm]))), \
4291 "Found file listed in more than one file list"
4293 # Optional files must be present in one other category
4294 assert all_files_set.issuperset(files_opt), \
4295 "Optional file not in a different required list"
4297 return (files_all, files_opt, files_mc, files_vm)
4300 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4301 """Distribute additional files which are part of the cluster configuration.
4303 ConfigWriter takes care of distributing the config and ssconf files, but
4304 there are more files which should be distributed to all nodes. This function
4305 makes sure those are copied.
4307 @param lu: calling logical unit
4308 @param additional_nodes: list of nodes not in the config to distribute to
4309 @type additional_vm: boolean
4310 @param additional_vm: whether the additional nodes are vm-capable or not
4313 # Gather target nodes
4314 cluster = lu.cfg.GetClusterInfo()
4315 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4317 online_nodes = lu.cfg.GetOnlineNodeList()
4318 vm_nodes = lu.cfg.GetVmCapableNodeList()
4320 if additional_nodes is not None:
4321 online_nodes.extend(additional_nodes)
4323 vm_nodes.extend(additional_nodes)
4325 # Never distribute to master node
4326 for nodelist in [online_nodes, vm_nodes]:
4327 if master_info.name in nodelist:
4328 nodelist.remove(master_info.name)
4331 (files_all, _, files_mc, files_vm) = \
4332 _ComputeAncillaryFiles(cluster, True)
4334 # Never re-distribute configuration file from here
4335 assert not (constants.CLUSTER_CONF_FILE in files_all or
4336 constants.CLUSTER_CONF_FILE in files_vm)
4337 assert not files_mc, "Master candidates not handled in this function"
4340 (online_nodes, files_all),
4341 (vm_nodes, files_vm),
4345 for (node_list, files) in filemap:
4347 _UploadHelper(lu, node_list, fname)
4350 class LUClusterRedistConf(NoHooksLU):
4351 """Force the redistribution of cluster configuration.
4353 This is a very simple LU.
4358 def ExpandNames(self):
4359 self.needed_locks = {
4360 locking.LEVEL_NODE: locking.ALL_SET,
4362 self.share_locks[locking.LEVEL_NODE] = 1
4364 def Exec(self, feedback_fn):
4365 """Redistribute the configuration.
4368 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4369 _RedistributeAncillaryFiles(self)
4372 class LUClusterActivateMasterIp(NoHooksLU):
4373 """Activate the master IP on the master node.
4376 def Exec(self, feedback_fn):
4377 """Activate the master IP.
4380 master_params = self.cfg.GetMasterNetworkParameters()
4381 ems = self.cfg.GetUseExternalMipScript()
4382 result = self.rpc.call_node_activate_master_ip(master_params.name,
4384 result.Raise("Could not activate the master IP")
4387 class LUClusterDeactivateMasterIp(NoHooksLU):
4388 """Deactivate the master IP on the master node.
4391 def Exec(self, feedback_fn):
4392 """Deactivate the master IP.
4395 master_params = self.cfg.GetMasterNetworkParameters()
4396 ems = self.cfg.GetUseExternalMipScript()
4397 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4399 result.Raise("Could not deactivate the master IP")
4402 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4403 """Sleep and poll for an instance's disk to sync.
4406 if not instance.disks or disks is not None and not disks:
4409 disks = _ExpandCheckDisks(instance, disks)
4412 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4414 node = instance.primary_node
4417 lu.cfg.SetDiskID(dev, node)
4419 # TODO: Convert to utils.Retry
4422 degr_retries = 10 # in seconds, as we sleep 1 second each time
4426 cumul_degraded = False
4427 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4428 msg = rstats.fail_msg
4430 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4433 raise errors.RemoteError("Can't contact node %s for mirror data,"
4434 " aborting." % node)
4437 rstats = rstats.payload
4439 for i, mstat in enumerate(rstats):
4441 lu.LogWarning("Can't compute data for node %s/%s",
4442 node, disks[i].iv_name)
4445 cumul_degraded = (cumul_degraded or
4446 (mstat.is_degraded and mstat.sync_percent is None))
4447 if mstat.sync_percent is not None:
4449 if mstat.estimated_time is not None:
4450 rem_time = ("%s remaining (estimated)" %
4451 utils.FormatSeconds(mstat.estimated_time))
4452 max_time = mstat.estimated_time
4454 rem_time = "no time estimate"
4455 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4456 (disks[i].iv_name, mstat.sync_percent, rem_time))
4458 # if we're done but degraded, let's do a few small retries, to
4459 # make sure we see a stable and not transient situation; therefore
4460 # we force restart of the loop
4461 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4462 logging.info("Degraded disks found, %d retries left", degr_retries)
4470 time.sleep(min(60, max_time))
4473 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4474 return not cumul_degraded
4477 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4478 """Check that mirrors are not degraded.
4480 The ldisk parameter, if True, will change the test from the
4481 is_degraded attribute (which represents overall non-ok status for
4482 the device(s)) to the ldisk (representing the local storage status).
4485 lu.cfg.SetDiskID(dev, node)
4489 if on_primary or dev.AssembleOnSecondary():
4490 rstats = lu.rpc.call_blockdev_find(node, dev)
4491 msg = rstats.fail_msg
4493 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4495 elif not rstats.payload:
4496 lu.LogWarning("Can't find disk on node %s", node)
4500 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4502 result = result and not rstats.payload.is_degraded
4505 for child in dev.children:
4506 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4511 class LUOobCommand(NoHooksLU):
4512 """Logical unit for OOB handling.
4516 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4518 def ExpandNames(self):
4519 """Gather locks we need.
4522 if self.op.node_names:
4523 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4524 lock_names = self.op.node_names
4526 lock_names = locking.ALL_SET
4528 self.needed_locks = {
4529 locking.LEVEL_NODE: lock_names,
4532 def CheckPrereq(self):
4533 """Check prerequisites.
4536 - the node exists in the configuration
4539 Any errors are signaled by raising errors.OpPrereqError.
4543 self.master_node = self.cfg.GetMasterNode()
4545 assert self.op.power_delay >= 0.0
4547 if self.op.node_names:
4548 if (self.op.command in self._SKIP_MASTER and
4549 self.master_node in self.op.node_names):
4550 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4551 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4553 if master_oob_handler:
4554 additional_text = ("run '%s %s %s' if you want to operate on the"
4555 " master regardless") % (master_oob_handler,
4559 additional_text = "it does not support out-of-band operations"
4561 raise errors.OpPrereqError(("Operating on the master node %s is not"
4562 " allowed for %s; %s") %
4563 (self.master_node, self.op.command,
4564 additional_text), errors.ECODE_INVAL)
4566 self.op.node_names = self.cfg.GetNodeList()
4567 if self.op.command in self._SKIP_MASTER:
4568 self.op.node_names.remove(self.master_node)
4570 if self.op.command in self._SKIP_MASTER:
4571 assert self.master_node not in self.op.node_names
4573 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4575 raise errors.OpPrereqError("Node %s not found" % node_name,
4578 self.nodes.append(node)
4580 if (not self.op.ignore_status and
4581 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4582 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4583 " not marked offline") % node_name,
4586 def Exec(self, feedback_fn):
4587 """Execute OOB and return result if we expect any.
4590 master_node = self.master_node
4593 for idx, node in enumerate(utils.NiceSort(self.nodes,
4594 key=lambda node: node.name)):
4595 node_entry = [(constants.RS_NORMAL, node.name)]
4596 ret.append(node_entry)
4598 oob_program = _SupportsOob(self.cfg, node)
4601 node_entry.append((constants.RS_UNAVAIL, None))
4604 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4605 self.op.command, oob_program, node.name)
4606 result = self.rpc.call_run_oob(master_node, oob_program,
4607 self.op.command, node.name,
4611 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4612 node.name, result.fail_msg)
4613 node_entry.append((constants.RS_NODATA, None))
4616 self._CheckPayload(result)
4617 except errors.OpExecError, err:
4618 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4620 node_entry.append((constants.RS_NODATA, None))
4622 if self.op.command == constants.OOB_HEALTH:
4623 # For health we should log important events
4624 for item, status in result.payload:
4625 if status in [constants.OOB_STATUS_WARNING,
4626 constants.OOB_STATUS_CRITICAL]:
4627 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4628 item, node.name, status)
4630 if self.op.command == constants.OOB_POWER_ON:
4632 elif self.op.command == constants.OOB_POWER_OFF:
4633 node.powered = False
4634 elif self.op.command == constants.OOB_POWER_STATUS:
4635 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4636 if powered != node.powered:
4637 logging.warning(("Recorded power state (%s) of node '%s' does not"
4638 " match actual power state (%s)"), node.powered,
4641 # For configuration changing commands we should update the node
4642 if self.op.command in (constants.OOB_POWER_ON,
4643 constants.OOB_POWER_OFF):
4644 self.cfg.Update(node, feedback_fn)
4646 node_entry.append((constants.RS_NORMAL, result.payload))
4648 if (self.op.command == constants.OOB_POWER_ON and
4649 idx < len(self.nodes) - 1):
4650 time.sleep(self.op.power_delay)
4654 def _CheckPayload(self, result):
4655 """Checks if the payload is valid.
4657 @param result: RPC result
4658 @raises errors.OpExecError: If payload is not valid
4662 if self.op.command == constants.OOB_HEALTH:
4663 if not isinstance(result.payload, list):
4664 errs.append("command 'health' is expected to return a list but got %s" %
4665 type(result.payload))
4667 for item, status in result.payload:
4668 if status not in constants.OOB_STATUSES:
4669 errs.append("health item '%s' has invalid status '%s'" %
4672 if self.op.command == constants.OOB_POWER_STATUS:
4673 if not isinstance(result.payload, dict):
4674 errs.append("power-status is expected to return a dict but got %s" %
4675 type(result.payload))
4677 if self.op.command in [
4678 constants.OOB_POWER_ON,
4679 constants.OOB_POWER_OFF,
4680 constants.OOB_POWER_CYCLE,
4682 if result.payload is not None:
4683 errs.append("%s is expected to not return payload but got '%s'" %
4684 (self.op.command, result.payload))
4687 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4688 utils.CommaJoin(errs))
4691 class _OsQuery(_QueryBase):
4692 FIELDS = query.OS_FIELDS
4694 def ExpandNames(self, lu):
4695 # Lock all nodes in shared mode
4696 # Temporary removal of locks, should be reverted later
4697 # TODO: reintroduce locks when they are lighter-weight
4698 lu.needed_locks = {}
4699 #self.share_locks[locking.LEVEL_NODE] = 1
4700 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4702 # The following variables interact with _QueryBase._GetNames
4704 self.wanted = self.names
4706 self.wanted = locking.ALL_SET
4708 self.do_locking = self.use_locking
4710 def DeclareLocks(self, lu, level):
4714 def _DiagnoseByOS(rlist):
4715 """Remaps a per-node return list into an a per-os per-node dictionary
4717 @param rlist: a map with node names as keys and OS objects as values
4720 @return: a dictionary with osnames as keys and as value another
4721 map, with nodes as keys and tuples of (path, status, diagnose,
4722 variants, parameters, api_versions) as values, eg::
4724 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4725 (/srv/..., False, "invalid api")],
4726 "node2": [(/srv/..., True, "", [], [])]}
4731 # we build here the list of nodes that didn't fail the RPC (at RPC
4732 # level), so that nodes with a non-responding node daemon don't
4733 # make all OSes invalid
4734 good_nodes = [node_name for node_name in rlist
4735 if not rlist[node_name].fail_msg]
4736 for node_name, nr in rlist.items():
4737 if nr.fail_msg or not nr.payload:
4739 for (name, path, status, diagnose, variants,
4740 params, api_versions) in nr.payload:
4741 if name not in all_os:
4742 # build a list of nodes for this os containing empty lists
4743 # for each node in node_list
4745 for nname in good_nodes:
4746 all_os[name][nname] = []
4747 # convert params from [name, help] to (name, help)
4748 params = [tuple(v) for v in params]
4749 all_os[name][node_name].append((path, status, diagnose,
4750 variants, params, api_versions))
4753 def _GetQueryData(self, lu):
4754 """Computes the list of nodes and their attributes.
4757 # Locking is not used
4758 assert not (compat.any(lu.glm.is_owned(level)
4759 for level in locking.LEVELS
4760 if level != locking.LEVEL_CLUSTER) or
4761 self.do_locking or self.use_locking)
4763 valid_nodes = [node.name
4764 for node in lu.cfg.GetAllNodesInfo().values()
4765 if not node.offline and node.vm_capable]
4766 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4767 cluster = lu.cfg.GetClusterInfo()
4771 for (os_name, os_data) in pol.items():
4772 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4773 hidden=(os_name in cluster.hidden_os),
4774 blacklisted=(os_name in cluster.blacklisted_os))
4778 api_versions = set()
4780 for idx, osl in enumerate(os_data.values()):
4781 info.valid = bool(info.valid and osl and osl[0][1])
4785 (node_variants, node_params, node_api) = osl[0][3:6]
4788 variants.update(node_variants)
4789 parameters.update(node_params)
4790 api_versions.update(node_api)
4792 # Filter out inconsistent values
4793 variants.intersection_update(node_variants)
4794 parameters.intersection_update(node_params)
4795 api_versions.intersection_update(node_api)
4797 info.variants = list(variants)
4798 info.parameters = list(parameters)
4799 info.api_versions = list(api_versions)
4801 data[os_name] = info
4803 # Prepare data in requested order
4804 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4808 class LUOsDiagnose(NoHooksLU):
4809 """Logical unit for OS diagnose/query.
4815 def _BuildFilter(fields, names):
4816 """Builds a filter for querying OSes.
4819 name_filter = qlang.MakeSimpleFilter("name", names)
4821 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4822 # respective field is not requested
4823 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4824 for fname in ["hidden", "blacklisted"]
4825 if fname not in fields]
4826 if "valid" not in fields:
4827 status_filter.append([qlang.OP_TRUE, "valid"])
4830 status_filter.insert(0, qlang.OP_AND)
4832 status_filter = None
4834 if name_filter and status_filter:
4835 return [qlang.OP_AND, name_filter, status_filter]
4839 return status_filter
4841 def CheckArguments(self):
4842 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4843 self.op.output_fields, False)
4845 def ExpandNames(self):
4846 self.oq.ExpandNames(self)
4848 def Exec(self, feedback_fn):
4849 return self.oq.OldStyleQuery(self)
4852 class LUNodeRemove(LogicalUnit):
4853 """Logical unit for removing a node.
4856 HPATH = "node-remove"
4857 HTYPE = constants.HTYPE_NODE
4859 def BuildHooksEnv(self):
4864 "OP_TARGET": self.op.node_name,
4865 "NODE_NAME": self.op.node_name,
4868 def BuildHooksNodes(self):
4869 """Build hooks nodes.
4871 This doesn't run on the target node in the pre phase as a failed
4872 node would then be impossible to remove.
4875 all_nodes = self.cfg.GetNodeList()
4877 all_nodes.remove(self.op.node_name)
4880 return (all_nodes, all_nodes)
4882 def CheckPrereq(self):
4883 """Check prerequisites.
4886 - the node exists in the configuration
4887 - it does not have primary or secondary instances
4888 - it's not the master
4890 Any errors are signaled by raising errors.OpPrereqError.
4893 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4894 node = self.cfg.GetNodeInfo(self.op.node_name)
4895 assert node is not None
4897 masternode = self.cfg.GetMasterNode()
4898 if node.name == masternode:
4899 raise errors.OpPrereqError("Node is the master node, failover to another"
4900 " node is required", errors.ECODE_INVAL)
4902 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4903 if node.name in instance.all_nodes:
4904 raise errors.OpPrereqError("Instance %s is still running on the node,"
4905 " please remove first" % instance_name,
4907 self.op.node_name = node.name
4910 def Exec(self, feedback_fn):
4911 """Removes the node from the cluster.
4915 logging.info("Stopping the node daemon and removing configs from node %s",
4918 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4920 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4923 # Promote nodes to master candidate as needed
4924 _AdjustCandidatePool(self, exceptions=[node.name])
4925 self.context.RemoveNode(node.name)
4927 # Run post hooks on the node before it's removed
4928 _RunPostHook(self, node.name)
4930 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4931 msg = result.fail_msg
4933 self.LogWarning("Errors encountered on the remote node while leaving"
4934 " the cluster: %s", msg)
4936 # Remove node from our /etc/hosts
4937 if self.cfg.GetClusterInfo().modify_etc_hosts:
4938 master_node = self.cfg.GetMasterNode()
4939 result = self.rpc.call_etc_hosts_modify(master_node,
4940 constants.ETC_HOSTS_REMOVE,
4942 result.Raise("Can't update hosts file with new host data")
4943 _RedistributeAncillaryFiles(self)
4946 class _NodeQuery(_QueryBase):
4947 FIELDS = query.NODE_FIELDS
4949 def ExpandNames(self, lu):
4950 lu.needed_locks = {}
4951 lu.share_locks = _ShareAll()
4954 self.wanted = _GetWantedNodes(lu, self.names)
4956 self.wanted = locking.ALL_SET
4958 self.do_locking = (self.use_locking and
4959 query.NQ_LIVE in self.requested_data)
4962 # If any non-static field is requested we need to lock the nodes
4963 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4965 def DeclareLocks(self, lu, level):
4968 def _GetQueryData(self, lu):
4969 """Computes the list of nodes and their attributes.
4972 all_info = lu.cfg.GetAllNodesInfo()
4974 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4976 # Gather data as requested
4977 if query.NQ_LIVE in self.requested_data:
4978 # filter out non-vm_capable nodes
4979 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4981 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4982 [lu.cfg.GetHypervisorType()])
4983 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4984 for (name, nresult) in node_data.items()
4985 if not nresult.fail_msg and nresult.payload)
4989 if query.NQ_INST in self.requested_data:
4990 node_to_primary = dict([(name, set()) for name in nodenames])
4991 node_to_secondary = dict([(name, set()) for name in nodenames])
4993 inst_data = lu.cfg.GetAllInstancesInfo()
4995 for inst in inst_data.values():
4996 if inst.primary_node in node_to_primary:
4997 node_to_primary[inst.primary_node].add(inst.name)
4998 for secnode in inst.secondary_nodes:
4999 if secnode in node_to_secondary:
5000 node_to_secondary[secnode].add(inst.name)
5002 node_to_primary = None
5003 node_to_secondary = None
5005 if query.NQ_OOB in self.requested_data:
5006 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5007 for name, node in all_info.iteritems())
5011 if query.NQ_GROUP in self.requested_data:
5012 groups = lu.cfg.GetAllNodeGroupsInfo()
5016 return query.NodeQueryData([all_info[name] for name in nodenames],
5017 live_data, lu.cfg.GetMasterNode(),
5018 node_to_primary, node_to_secondary, groups,
5019 oob_support, lu.cfg.GetClusterInfo())
5022 class LUNodeQuery(NoHooksLU):
5023 """Logical unit for querying nodes.
5026 # pylint: disable=W0142
5029 def CheckArguments(self):
5030 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5031 self.op.output_fields, self.op.use_locking)
5033 def ExpandNames(self):
5034 self.nq.ExpandNames(self)
5036 def DeclareLocks(self, level):
5037 self.nq.DeclareLocks(self, level)
5039 def Exec(self, feedback_fn):
5040 return self.nq.OldStyleQuery(self)
5043 class LUNodeQueryvols(NoHooksLU):
5044 """Logical unit for getting volumes on node(s).
5048 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5049 _FIELDS_STATIC = utils.FieldSet("node")
5051 def CheckArguments(self):
5052 _CheckOutputFields(static=self._FIELDS_STATIC,
5053 dynamic=self._FIELDS_DYNAMIC,
5054 selected=self.op.output_fields)
5056 def ExpandNames(self):
5057 self.share_locks = _ShareAll()
5058 self.needed_locks = {}
5060 if not self.op.nodes:
5061 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5063 self.needed_locks[locking.LEVEL_NODE] = \
5064 _GetWantedNodes(self, self.op.nodes)
5066 def Exec(self, feedback_fn):
5067 """Computes the list of nodes and their attributes.
5070 nodenames = self.owned_locks(locking.LEVEL_NODE)
5071 volumes = self.rpc.call_node_volumes(nodenames)
5073 ilist = self.cfg.GetAllInstancesInfo()
5074 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5077 for node in nodenames:
5078 nresult = volumes[node]
5081 msg = nresult.fail_msg
5083 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5086 node_vols = sorted(nresult.payload,
5087 key=operator.itemgetter("dev"))
5089 for vol in node_vols:
5091 for field in self.op.output_fields:
5094 elif field == "phys":
5098 elif field == "name":
5100 elif field == "size":
5101 val = int(float(vol["size"]))
5102 elif field == "instance":
5103 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5105 raise errors.ParameterError(field)
5106 node_output.append(str(val))
5108 output.append(node_output)
5113 class LUNodeQueryStorage(NoHooksLU):
5114 """Logical unit for getting information on storage units on node(s).
5117 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5120 def CheckArguments(self):
5121 _CheckOutputFields(static=self._FIELDS_STATIC,
5122 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5123 selected=self.op.output_fields)
5125 def ExpandNames(self):
5126 self.share_locks = _ShareAll()
5127 self.needed_locks = {}
5130 self.needed_locks[locking.LEVEL_NODE] = \
5131 _GetWantedNodes(self, self.op.nodes)
5133 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5135 def Exec(self, feedback_fn):
5136 """Computes the list of nodes and their attributes.
5139 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5141 # Always get name to sort by
5142 if constants.SF_NAME in self.op.output_fields:
5143 fields = self.op.output_fields[:]
5145 fields = [constants.SF_NAME] + self.op.output_fields
5147 # Never ask for node or type as it's only known to the LU
5148 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5149 while extra in fields:
5150 fields.remove(extra)
5152 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5153 name_idx = field_idx[constants.SF_NAME]
5155 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5156 data = self.rpc.call_storage_list(self.nodes,
5157 self.op.storage_type, st_args,
5158 self.op.name, fields)
5162 for node in utils.NiceSort(self.nodes):
5163 nresult = data[node]
5167 msg = nresult.fail_msg
5169 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5172 rows = dict([(row[name_idx], row) for row in nresult.payload])
5174 for name in utils.NiceSort(rows.keys()):
5179 for field in self.op.output_fields:
5180 if field == constants.SF_NODE:
5182 elif field == constants.SF_TYPE:
5183 val = self.op.storage_type
5184 elif field in field_idx:
5185 val = row[field_idx[field]]
5187 raise errors.ParameterError(field)
5196 class _InstanceQuery(_QueryBase):
5197 FIELDS = query.INSTANCE_FIELDS
5199 def ExpandNames(self, lu):
5200 lu.needed_locks = {}
5201 lu.share_locks = _ShareAll()
5204 self.wanted = _GetWantedInstances(lu, self.names)
5206 self.wanted = locking.ALL_SET
5208 self.do_locking = (self.use_locking and
5209 query.IQ_LIVE in self.requested_data)
5211 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5212 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5213 lu.needed_locks[locking.LEVEL_NODE] = []
5214 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5216 self.do_grouplocks = (self.do_locking and
5217 query.IQ_NODES in self.requested_data)
5219 def DeclareLocks(self, lu, level):
5221 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5222 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5224 # Lock all groups used by instances optimistically; this requires going
5225 # via the node before it's locked, requiring verification later on
5226 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5228 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5229 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5230 elif level == locking.LEVEL_NODE:
5231 lu._LockInstancesNodes() # pylint: disable=W0212
5234 def _CheckGroupLocks(lu):
5235 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5236 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5238 # Check if node groups for locked instances are still correct
5239 for instance_name in owned_instances:
5240 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5242 def _GetQueryData(self, lu):
5243 """Computes the list of instances and their attributes.
5246 if self.do_grouplocks:
5247 self._CheckGroupLocks(lu)
5249 cluster = lu.cfg.GetClusterInfo()
5250 all_info = lu.cfg.GetAllInstancesInfo()
5252 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5254 instance_list = [all_info[name] for name in instance_names]
5255 nodes = frozenset(itertools.chain(*(inst.all_nodes
5256 for inst in instance_list)))
5257 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5260 wrongnode_inst = set()
5262 # Gather data as requested
5263 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5265 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5267 result = node_data[name]
5269 # offline nodes will be in both lists
5270 assert result.fail_msg
5271 offline_nodes.append(name)
5273 bad_nodes.append(name)
5274 elif result.payload:
5275 for inst in result.payload:
5276 if inst in all_info:
5277 if all_info[inst].primary_node == name:
5278 live_data.update(result.payload)
5280 wrongnode_inst.add(inst)
5282 # orphan instance; we don't list it here as we don't
5283 # handle this case yet in the output of instance listing
5284 logging.warning("Orphan instance '%s' found on node %s",
5286 # else no instance is alive
5290 if query.IQ_DISKUSAGE in self.requested_data:
5291 disk_usage = dict((inst.name,
5292 _ComputeDiskSize(inst.disk_template,
5293 [{constants.IDISK_SIZE: disk.size}
5294 for disk in inst.disks]))
5295 for inst in instance_list)
5299 if query.IQ_CONSOLE in self.requested_data:
5301 for inst in instance_list:
5302 if inst.name in live_data:
5303 # Instance is running
5304 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5306 consinfo[inst.name] = None
5307 assert set(consinfo.keys()) == set(instance_names)
5311 if query.IQ_NODES in self.requested_data:
5312 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5314 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5315 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5316 for uuid in set(map(operator.attrgetter("group"),
5322 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5323 disk_usage, offline_nodes, bad_nodes,
5324 live_data, wrongnode_inst, consinfo,
5328 class LUQuery(NoHooksLU):
5329 """Query for resources/items of a certain kind.
5332 # pylint: disable=W0142
5335 def CheckArguments(self):
5336 qcls = _GetQueryImplementation(self.op.what)
5338 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5340 def ExpandNames(self):
5341 self.impl.ExpandNames(self)
5343 def DeclareLocks(self, level):
5344 self.impl.DeclareLocks(self, level)
5346 def Exec(self, feedback_fn):
5347 return self.impl.NewStyleQuery(self)
5350 class LUQueryFields(NoHooksLU):
5351 """Query for resources/items of a certain kind.
5354 # pylint: disable=W0142
5357 def CheckArguments(self):
5358 self.qcls = _GetQueryImplementation(self.op.what)
5360 def ExpandNames(self):
5361 self.needed_locks = {}
5363 def Exec(self, feedback_fn):
5364 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5367 class LUNodeModifyStorage(NoHooksLU):
5368 """Logical unit for modifying a storage volume on a node.
5373 def CheckArguments(self):
5374 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5376 storage_type = self.op.storage_type
5379 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5381 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5382 " modified" % storage_type,
5385 diff = set(self.op.changes.keys()) - modifiable
5387 raise errors.OpPrereqError("The following fields can not be modified for"
5388 " storage units of type '%s': %r" %
5389 (storage_type, list(diff)),
5392 def ExpandNames(self):
5393 self.needed_locks = {
5394 locking.LEVEL_NODE: self.op.node_name,
5397 def Exec(self, feedback_fn):
5398 """Computes the list of nodes and their attributes.
5401 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5402 result = self.rpc.call_storage_modify(self.op.node_name,
5403 self.op.storage_type, st_args,
5404 self.op.name, self.op.changes)
5405 result.Raise("Failed to modify storage unit '%s' on %s" %
5406 (self.op.name, self.op.node_name))
5409 class LUNodeAdd(LogicalUnit):
5410 """Logical unit for adding node to the cluster.
5414 HTYPE = constants.HTYPE_NODE
5415 _NFLAGS = ["master_capable", "vm_capable"]
5417 def CheckArguments(self):
5418 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5419 # validate/normalize the node name
5420 self.hostname = netutils.GetHostname(name=self.op.node_name,
5421 family=self.primary_ip_family)
5422 self.op.node_name = self.hostname.name
5424 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5425 raise errors.OpPrereqError("Cannot readd the master node",
5428 if self.op.readd and self.op.group:
5429 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5430 " being readded", errors.ECODE_INVAL)
5432 def BuildHooksEnv(self):
5435 This will run on all nodes before, and on all nodes + the new node after.
5439 "OP_TARGET": self.op.node_name,
5440 "NODE_NAME": self.op.node_name,
5441 "NODE_PIP": self.op.primary_ip,
5442 "NODE_SIP": self.op.secondary_ip,
5443 "MASTER_CAPABLE": str(self.op.master_capable),
5444 "VM_CAPABLE": str(self.op.vm_capable),
5447 def BuildHooksNodes(self):
5448 """Build hooks nodes.
5451 # Exclude added node
5452 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5453 post_nodes = pre_nodes + [self.op.node_name, ]
5455 return (pre_nodes, post_nodes)
5457 def CheckPrereq(self):
5458 """Check prerequisites.
5461 - the new node is not already in the config
5463 - its parameters (single/dual homed) matches the cluster
5465 Any errors are signaled by raising errors.OpPrereqError.
5469 hostname = self.hostname
5470 node = hostname.name
5471 primary_ip = self.op.primary_ip = hostname.ip
5472 if self.op.secondary_ip is None:
5473 if self.primary_ip_family == netutils.IP6Address.family:
5474 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5475 " IPv4 address must be given as secondary",
5477 self.op.secondary_ip = primary_ip
5479 secondary_ip = self.op.secondary_ip
5480 if not netutils.IP4Address.IsValid(secondary_ip):
5481 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5482 " address" % secondary_ip, errors.ECODE_INVAL)
5484 node_list = cfg.GetNodeList()
5485 if not self.op.readd and node in node_list:
5486 raise errors.OpPrereqError("Node %s is already in the configuration" %
5487 node, errors.ECODE_EXISTS)
5488 elif self.op.readd and node not in node_list:
5489 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5492 self.changed_primary_ip = False
5494 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5495 if self.op.readd and node == existing_node_name:
5496 if existing_node.secondary_ip != secondary_ip:
5497 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5498 " address configuration as before",
5500 if existing_node.primary_ip != primary_ip:
5501 self.changed_primary_ip = True
5505 if (existing_node.primary_ip == primary_ip or
5506 existing_node.secondary_ip == primary_ip or
5507 existing_node.primary_ip == secondary_ip or
5508 existing_node.secondary_ip == secondary_ip):
5509 raise errors.OpPrereqError("New node ip address(es) conflict with"
5510 " existing node %s" % existing_node.name,
5511 errors.ECODE_NOTUNIQUE)
5513 # After this 'if' block, None is no longer a valid value for the
5514 # _capable op attributes
5516 old_node = self.cfg.GetNodeInfo(node)
5517 assert old_node is not None, "Can't retrieve locked node %s" % node
5518 for attr in self._NFLAGS:
5519 if getattr(self.op, attr) is None:
5520 setattr(self.op, attr, getattr(old_node, attr))
5522 for attr in self._NFLAGS:
5523 if getattr(self.op, attr) is None:
5524 setattr(self.op, attr, True)
5526 if self.op.readd and not self.op.vm_capable:
5527 pri, sec = cfg.GetNodeInstances(node)
5529 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5530 " flag set to false, but it already holds"
5531 " instances" % node,
5534 # check that the type of the node (single versus dual homed) is the
5535 # same as for the master
5536 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5537 master_singlehomed = myself.secondary_ip == myself.primary_ip
5538 newbie_singlehomed = secondary_ip == primary_ip
5539 if master_singlehomed != newbie_singlehomed:
5540 if master_singlehomed:
5541 raise errors.OpPrereqError("The master has no secondary ip but the"
5542 " new node has one",
5545 raise errors.OpPrereqError("The master has a secondary ip but the"
5546 " new node doesn't have one",
5549 # checks reachability
5550 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5551 raise errors.OpPrereqError("Node not reachable by ping",
5552 errors.ECODE_ENVIRON)
5554 if not newbie_singlehomed:
5555 # check reachability from my secondary ip to newbie's secondary ip
5556 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5557 source=myself.secondary_ip):
5558 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5559 " based ping to node daemon port",
5560 errors.ECODE_ENVIRON)
5567 if self.op.master_capable:
5568 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5570 self.master_candidate = False
5573 self.new_node = old_node
5575 node_group = cfg.LookupNodeGroup(self.op.group)
5576 self.new_node = objects.Node(name=node,
5577 primary_ip=primary_ip,
5578 secondary_ip=secondary_ip,
5579 master_candidate=self.master_candidate,
5580 offline=False, drained=False,
5583 if self.op.ndparams:
5584 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5586 if self.op.hv_state:
5587 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5589 if self.op.disk_state:
5590 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5592 def Exec(self, feedback_fn):
5593 """Adds the new node to the cluster.
5596 new_node = self.new_node
5597 node = new_node.name
5599 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5602 # We adding a new node so we assume it's powered
5603 new_node.powered = True
5605 # for re-adds, reset the offline/drained/master-candidate flags;
5606 # we need to reset here, otherwise offline would prevent RPC calls
5607 # later in the procedure; this also means that if the re-add
5608 # fails, we are left with a non-offlined, broken node
5610 new_node.drained = new_node.offline = False # pylint: disable=W0201
5611 self.LogInfo("Readding a node, the offline/drained flags were reset")
5612 # if we demote the node, we do cleanup later in the procedure
5613 new_node.master_candidate = self.master_candidate
5614 if self.changed_primary_ip:
5615 new_node.primary_ip = self.op.primary_ip
5617 # copy the master/vm_capable flags
5618 for attr in self._NFLAGS:
5619 setattr(new_node, attr, getattr(self.op, attr))
5621 # notify the user about any possible mc promotion
5622 if new_node.master_candidate:
5623 self.LogInfo("Node will be a master candidate")
5625 if self.op.ndparams:
5626 new_node.ndparams = self.op.ndparams
5628 new_node.ndparams = {}
5630 if self.op.hv_state:
5631 new_node.hv_state_static = self.new_hv_state
5633 if self.op.disk_state:
5634 new_node.disk_state_static = self.new_disk_state
5636 # check connectivity
5637 result = self.rpc.call_version([node])[node]
5638 result.Raise("Can't get version information from node %s" % node)
5639 if constants.PROTOCOL_VERSION == result.payload:
5640 logging.info("Communication to node %s fine, sw version %s match",
5641 node, result.payload)
5643 raise errors.OpExecError("Version mismatch master version %s,"
5644 " node version %s" %
5645 (constants.PROTOCOL_VERSION, result.payload))
5647 # Add node to our /etc/hosts, and add key to known_hosts
5648 if self.cfg.GetClusterInfo().modify_etc_hosts:
5649 master_node = self.cfg.GetMasterNode()
5650 result = self.rpc.call_etc_hosts_modify(master_node,
5651 constants.ETC_HOSTS_ADD,
5654 result.Raise("Can't update hosts file with new host data")
5656 if new_node.secondary_ip != new_node.primary_ip:
5657 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5660 node_verify_list = [self.cfg.GetMasterNode()]
5661 node_verify_param = {
5662 constants.NV_NODELIST: ([node], {}),
5663 # TODO: do a node-net-test as well?
5666 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5667 self.cfg.GetClusterName())
5668 for verifier in node_verify_list:
5669 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5670 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5672 for failed in nl_payload:
5673 feedback_fn("ssh/hostname verification failed"
5674 " (checking from %s): %s" %
5675 (verifier, nl_payload[failed]))
5676 raise errors.OpExecError("ssh/hostname verification failed")
5679 _RedistributeAncillaryFiles(self)
5680 self.context.ReaddNode(new_node)
5681 # make sure we redistribute the config
5682 self.cfg.Update(new_node, feedback_fn)
5683 # and make sure the new node will not have old files around
5684 if not new_node.master_candidate:
5685 result = self.rpc.call_node_demote_from_mc(new_node.name)
5686 msg = result.fail_msg
5688 self.LogWarning("Node failed to demote itself from master"
5689 " candidate status: %s" % msg)
5691 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5692 additional_vm=self.op.vm_capable)
5693 self.context.AddNode(new_node, self.proc.GetECId())
5696 class LUNodeSetParams(LogicalUnit):
5697 """Modifies the parameters of a node.
5699 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5700 to the node role (as _ROLE_*)
5701 @cvar _R2F: a dictionary from node role to tuples of flags
5702 @cvar _FLAGS: a list of attribute names corresponding to the flags
5705 HPATH = "node-modify"
5706 HTYPE = constants.HTYPE_NODE
5708 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5710 (True, False, False): _ROLE_CANDIDATE,
5711 (False, True, False): _ROLE_DRAINED,
5712 (False, False, True): _ROLE_OFFLINE,
5713 (False, False, False): _ROLE_REGULAR,
5715 _R2F = dict((v, k) for k, v in _F2R.items())
5716 _FLAGS = ["master_candidate", "drained", "offline"]
5718 def CheckArguments(self):
5719 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5720 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5721 self.op.master_capable, self.op.vm_capable,
5722 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5724 if all_mods.count(None) == len(all_mods):
5725 raise errors.OpPrereqError("Please pass at least one modification",
5727 if all_mods.count(True) > 1:
5728 raise errors.OpPrereqError("Can't set the node into more than one"
5729 " state at the same time",
5732 # Boolean value that tells us whether we might be demoting from MC
5733 self.might_demote = (self.op.master_candidate == False or
5734 self.op.offline == True or
5735 self.op.drained == True or
5736 self.op.master_capable == False)
5738 if self.op.secondary_ip:
5739 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5740 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5741 " address" % self.op.secondary_ip,
5744 self.lock_all = self.op.auto_promote and self.might_demote
5745 self.lock_instances = self.op.secondary_ip is not None
5747 def _InstanceFilter(self, instance):
5748 """Filter for getting affected instances.
5751 return (instance.disk_template in constants.DTS_INT_MIRROR and
5752 self.op.node_name in instance.all_nodes)
5754 def ExpandNames(self):
5756 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5758 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5760 # Since modifying a node can have severe effects on currently running
5761 # operations the resource lock is at least acquired in shared mode
5762 self.needed_locks[locking.LEVEL_NODE_RES] = \
5763 self.needed_locks[locking.LEVEL_NODE]
5765 # Get node resource and instance locks in shared mode; they are not used
5766 # for anything but read-only access
5767 self.share_locks[locking.LEVEL_NODE_RES] = 1
5768 self.share_locks[locking.LEVEL_INSTANCE] = 1
5770 if self.lock_instances:
5771 self.needed_locks[locking.LEVEL_INSTANCE] = \
5772 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5774 def BuildHooksEnv(self):
5777 This runs on the master node.
5781 "OP_TARGET": self.op.node_name,
5782 "MASTER_CANDIDATE": str(self.op.master_candidate),
5783 "OFFLINE": str(self.op.offline),
5784 "DRAINED": str(self.op.drained),
5785 "MASTER_CAPABLE": str(self.op.master_capable),
5786 "VM_CAPABLE": str(self.op.vm_capable),
5789 def BuildHooksNodes(self):
5790 """Build hooks nodes.
5793 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5796 def CheckPrereq(self):
5797 """Check prerequisites.
5799 This only checks the instance list against the existing names.
5802 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5804 if self.lock_instances:
5805 affected_instances = \
5806 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5808 # Verify instance locks
5809 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5810 wanted_instances = frozenset(affected_instances.keys())
5811 if wanted_instances - owned_instances:
5812 raise errors.OpPrereqError("Instances affected by changing node %s's"
5813 " secondary IP address have changed since"
5814 " locks were acquired, wanted '%s', have"
5815 " '%s'; retry the operation" %
5817 utils.CommaJoin(wanted_instances),
5818 utils.CommaJoin(owned_instances)),
5821 affected_instances = None
5823 if (self.op.master_candidate is not None or
5824 self.op.drained is not None or
5825 self.op.offline is not None):
5826 # we can't change the master's node flags
5827 if self.op.node_name == self.cfg.GetMasterNode():
5828 raise errors.OpPrereqError("The master role can be changed"
5829 " only via master-failover",
5832 if self.op.master_candidate and not node.master_capable:
5833 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5834 " it a master candidate" % node.name,
5837 if self.op.vm_capable == False:
5838 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5840 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5841 " the vm_capable flag" % node.name,
5844 if node.master_candidate and self.might_demote and not self.lock_all:
5845 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5846 # check if after removing the current node, we're missing master
5848 (mc_remaining, mc_should, _) = \
5849 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5850 if mc_remaining < mc_should:
5851 raise errors.OpPrereqError("Not enough master candidates, please"
5852 " pass auto promote option to allow"
5853 " promotion", errors.ECODE_STATE)
5855 self.old_flags = old_flags = (node.master_candidate,
5856 node.drained, node.offline)
5857 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5858 self.old_role = old_role = self._F2R[old_flags]
5860 # Check for ineffective changes
5861 for attr in self._FLAGS:
5862 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5863 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5864 setattr(self.op, attr, None)
5866 # Past this point, any flag change to False means a transition
5867 # away from the respective state, as only real changes are kept
5869 # TODO: We might query the real power state if it supports OOB
5870 if _SupportsOob(self.cfg, node):
5871 if self.op.offline is False and not (node.powered or
5872 self.op.powered == True):
5873 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5874 " offline status can be reset") %
5876 elif self.op.powered is not None:
5877 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5878 " as it does not support out-of-band"
5879 " handling") % self.op.node_name)
5881 # If we're being deofflined/drained, we'll MC ourself if needed
5882 if (self.op.drained == False or self.op.offline == False or
5883 (self.op.master_capable and not node.master_capable)):
5884 if _DecideSelfPromotion(self):
5885 self.op.master_candidate = True
5886 self.LogInfo("Auto-promoting node to master candidate")
5888 # If we're no longer master capable, we'll demote ourselves from MC
5889 if self.op.master_capable == False and node.master_candidate:
5890 self.LogInfo("Demoting from master candidate")
5891 self.op.master_candidate = False
5894 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5895 if self.op.master_candidate:
5896 new_role = self._ROLE_CANDIDATE
5897 elif self.op.drained:
5898 new_role = self._ROLE_DRAINED
5899 elif self.op.offline:
5900 new_role = self._ROLE_OFFLINE
5901 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5902 # False is still in new flags, which means we're un-setting (the
5904 new_role = self._ROLE_REGULAR
5905 else: # no new flags, nothing, keep old role
5908 self.new_role = new_role
5910 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5911 # Trying to transition out of offline status
5912 # TODO: Use standard RPC runner, but make sure it works when the node is
5913 # still marked offline
5914 result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5916 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5917 " to report its version: %s" %
5918 (node.name, result.fail_msg),
5921 self.LogWarning("Transitioning node from offline to online state"
5922 " without using re-add. Please make sure the node"
5925 if self.op.secondary_ip:
5926 # Ok even without locking, because this can't be changed by any LU
5927 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5928 master_singlehomed = master.secondary_ip == master.primary_ip
5929 if master_singlehomed and self.op.secondary_ip:
5930 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5931 " homed cluster", errors.ECODE_INVAL)
5933 assert not (frozenset(affected_instances) -
5934 self.owned_locks(locking.LEVEL_INSTANCE))
5937 if affected_instances:
5938 raise errors.OpPrereqError("Cannot change secondary IP address:"
5939 " offline node has instances (%s)"
5940 " configured to use it" %
5941 utils.CommaJoin(affected_instances.keys()))
5943 # On online nodes, check that no instances are running, and that
5944 # the node has the new ip and we can reach it.
5945 for instance in affected_instances.values():
5946 _CheckInstanceState(self, instance, INSTANCE_DOWN,
5947 msg="cannot change secondary ip")
5949 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5950 if master.name != node.name:
5951 # check reachability from master secondary ip to new secondary ip
5952 if not netutils.TcpPing(self.op.secondary_ip,
5953 constants.DEFAULT_NODED_PORT,
5954 source=master.secondary_ip):
5955 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5956 " based ping to node daemon port",
5957 errors.ECODE_ENVIRON)
5959 if self.op.ndparams:
5960 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5961 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5962 self.new_ndparams = new_ndparams
5964 if self.op.hv_state:
5965 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5966 self.node.hv_state_static)
5968 if self.op.disk_state:
5969 self.new_disk_state = \
5970 _MergeAndVerifyDiskState(self.op.disk_state,
5971 self.node.disk_state_static)
5973 def Exec(self, feedback_fn):
5978 old_role = self.old_role
5979 new_role = self.new_role
5983 if self.op.ndparams:
5984 node.ndparams = self.new_ndparams
5986 if self.op.powered is not None:
5987 node.powered = self.op.powered
5989 if self.op.hv_state:
5990 node.hv_state_static = self.new_hv_state
5992 if self.op.disk_state:
5993 node.disk_state_static = self.new_disk_state
5995 for attr in ["master_capable", "vm_capable"]:
5996 val = getattr(self.op, attr)
5998 setattr(node, attr, val)
5999 result.append((attr, str(val)))
6001 if new_role != old_role:
6002 # Tell the node to demote itself, if no longer MC and not offline
6003 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6004 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6006 self.LogWarning("Node failed to demote itself: %s", msg)
6008 new_flags = self._R2F[new_role]
6009 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6011 result.append((desc, str(nf)))
6012 (node.master_candidate, node.drained, node.offline) = new_flags
6014 # we locked all nodes, we adjust the CP before updating this node
6016 _AdjustCandidatePool(self, [node.name])
6018 if self.op.secondary_ip:
6019 node.secondary_ip = self.op.secondary_ip
6020 result.append(("secondary_ip", self.op.secondary_ip))
6022 # this will trigger configuration file update, if needed
6023 self.cfg.Update(node, feedback_fn)
6025 # this will trigger job queue propagation or cleanup if the mc
6027 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6028 self.context.ReaddNode(node)
6033 class LUNodePowercycle(NoHooksLU):
6034 """Powercycles a node.
6039 def CheckArguments(self):
6040 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6041 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6042 raise errors.OpPrereqError("The node is the master and the force"
6043 " parameter was not set",
6046 def ExpandNames(self):
6047 """Locking for PowercycleNode.
6049 This is a last-resort option and shouldn't block on other
6050 jobs. Therefore, we grab no locks.
6053 self.needed_locks = {}
6055 def Exec(self, feedback_fn):
6059 result = self.rpc.call_node_powercycle(self.op.node_name,
6060 self.cfg.GetHypervisorType())
6061 result.Raise("Failed to schedule the reboot")
6062 return result.payload
6065 class LUClusterQuery(NoHooksLU):
6066 """Query cluster configuration.
6071 def ExpandNames(self):
6072 self.needed_locks = {}
6074 def Exec(self, feedback_fn):
6075 """Return cluster config.
6078 cluster = self.cfg.GetClusterInfo()
6081 # Filter just for enabled hypervisors
6082 for os_name, hv_dict in cluster.os_hvp.items():
6083 os_hvp[os_name] = {}
6084 for hv_name, hv_params in hv_dict.items():
6085 if hv_name in cluster.enabled_hypervisors:
6086 os_hvp[os_name][hv_name] = hv_params
6088 # Convert ip_family to ip_version
6089 primary_ip_version = constants.IP4_VERSION
6090 if cluster.primary_ip_family == netutils.IP6Address.family:
6091 primary_ip_version = constants.IP6_VERSION
6094 "software_version": constants.RELEASE_VERSION,
6095 "protocol_version": constants.PROTOCOL_VERSION,
6096 "config_version": constants.CONFIG_VERSION,
6097 "os_api_version": max(constants.OS_API_VERSIONS),
6098 "export_version": constants.EXPORT_VERSION,
6099 "architecture": (platform.architecture()[0], platform.machine()),
6100 "name": cluster.cluster_name,
6101 "master": cluster.master_node,
6102 "default_hypervisor": cluster.primary_hypervisor,
6103 "enabled_hypervisors": cluster.enabled_hypervisors,
6104 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6105 for hypervisor_name in cluster.enabled_hypervisors]),
6107 "beparams": cluster.beparams,
6108 "osparams": cluster.osparams,
6109 "ipolicy": cluster.ipolicy,
6110 "nicparams": cluster.nicparams,
6111 "ndparams": cluster.ndparams,
6112 "candidate_pool_size": cluster.candidate_pool_size,
6113 "master_netdev": cluster.master_netdev,
6114 "master_netmask": cluster.master_netmask,
6115 "use_external_mip_script": cluster.use_external_mip_script,
6116 "volume_group_name": cluster.volume_group_name,
6117 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6118 "file_storage_dir": cluster.file_storage_dir,
6119 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6120 "maintain_node_health": cluster.maintain_node_health,
6121 "ctime": cluster.ctime,
6122 "mtime": cluster.mtime,
6123 "uuid": cluster.uuid,
6124 "tags": list(cluster.GetTags()),
6125 "uid_pool": cluster.uid_pool,
6126 "default_iallocator": cluster.default_iallocator,
6127 "reserved_lvs": cluster.reserved_lvs,
6128 "primary_ip_version": primary_ip_version,
6129 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6130 "hidden_os": cluster.hidden_os,
6131 "blacklisted_os": cluster.blacklisted_os,
6137 class LUClusterConfigQuery(NoHooksLU):
6138 """Return configuration values.
6142 _FIELDS_DYNAMIC = utils.FieldSet()
6143 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
6144 "watcher_pause", "volume_group_name")
6146 def CheckArguments(self):
6147 _CheckOutputFields(static=self._FIELDS_STATIC,
6148 dynamic=self._FIELDS_DYNAMIC,
6149 selected=self.op.output_fields)
6151 def ExpandNames(self):
6152 self.needed_locks = {}
6154 def Exec(self, feedback_fn):
6155 """Dump a representation of the cluster config to the standard output.
6159 for field in self.op.output_fields:
6160 if field == "cluster_name":
6161 entry = self.cfg.GetClusterName()
6162 elif field == "master_node":
6163 entry = self.cfg.GetMasterNode()
6164 elif field == "drain_flag":
6165 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6166 elif field == "watcher_pause":
6167 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6168 elif field == "volume_group_name":
6169 entry = self.cfg.GetVGName()
6171 raise errors.ParameterError(field)
6172 values.append(entry)
6176 class LUInstanceActivateDisks(NoHooksLU):
6177 """Bring up an instance's disks.
6182 def ExpandNames(self):
6183 self._ExpandAndLockInstance()
6184 self.needed_locks[locking.LEVEL_NODE] = []
6185 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6187 def DeclareLocks(self, level):
6188 if level == locking.LEVEL_NODE:
6189 self._LockInstancesNodes()
6191 def CheckPrereq(self):
6192 """Check prerequisites.
6194 This checks that the instance is in the cluster.
6197 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6198 assert self.instance is not None, \
6199 "Cannot retrieve locked instance %s" % self.op.instance_name
6200 _CheckNodeOnline(self, self.instance.primary_node)
6202 def Exec(self, feedback_fn):
6203 """Activate the disks.
6206 disks_ok, disks_info = \
6207 _AssembleInstanceDisks(self, self.instance,
6208 ignore_size=self.op.ignore_size)
6210 raise errors.OpExecError("Cannot activate block devices")
6215 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6217 """Prepare the block devices for an instance.
6219 This sets up the block devices on all nodes.
6221 @type lu: L{LogicalUnit}
6222 @param lu: the logical unit on whose behalf we execute
6223 @type instance: L{objects.Instance}
6224 @param instance: the instance for whose disks we assemble
6225 @type disks: list of L{objects.Disk} or None
6226 @param disks: which disks to assemble (or all, if None)
6227 @type ignore_secondaries: boolean
6228 @param ignore_secondaries: if true, errors on secondary nodes
6229 won't result in an error return from the function
6230 @type ignore_size: boolean
6231 @param ignore_size: if true, the current known size of the disk
6232 will not be used during the disk activation, useful for cases
6233 when the size is wrong
6234 @return: False if the operation failed, otherwise a list of
6235 (host, instance_visible_name, node_visible_name)
6236 with the mapping from node devices to instance devices
6241 iname = instance.name
6242 disks = _ExpandCheckDisks(instance, disks)
6244 # With the two passes mechanism we try to reduce the window of
6245 # opportunity for the race condition of switching DRBD to primary
6246 # before handshaking occured, but we do not eliminate it
6248 # The proper fix would be to wait (with some limits) until the
6249 # connection has been made and drbd transitions from WFConnection
6250 # into any other network-connected state (Connected, SyncTarget,
6253 # 1st pass, assemble on all nodes in secondary mode
6254 for idx, inst_disk in enumerate(disks):
6255 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6257 node_disk = node_disk.Copy()
6258 node_disk.UnsetSize()
6259 lu.cfg.SetDiskID(node_disk, node)
6260 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
6261 msg = result.fail_msg
6263 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6264 " (is_primary=False, pass=1): %s",
6265 inst_disk.iv_name, node, msg)
6266 if not ignore_secondaries:
6269 # FIXME: race condition on drbd migration to primary
6271 # 2nd pass, do only the primary node
6272 for idx, inst_disk in enumerate(disks):
6275 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6276 if node != instance.primary_node:
6279 node_disk = node_disk.Copy()
6280 node_disk.UnsetSize()
6281 lu.cfg.SetDiskID(node_disk, node)
6282 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6283 msg = result.fail_msg
6285 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6286 " (is_primary=True, pass=2): %s",
6287 inst_disk.iv_name, node, msg)
6290 dev_path = result.payload
6292 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6294 # leave the disks configured for the primary node
6295 # this is a workaround that would be fixed better by
6296 # improving the logical/physical id handling
6298 lu.cfg.SetDiskID(disk, instance.primary_node)
6300 return disks_ok, device_info
6303 def _StartInstanceDisks(lu, instance, force):
6304 """Start the disks of an instance.
6307 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6308 ignore_secondaries=force)
6310 _ShutdownInstanceDisks(lu, instance)
6311 if force is not None and not force:
6312 lu.proc.LogWarning("", hint="If the message above refers to a"
6314 " you can retry the operation using '--force'.")
6315 raise errors.OpExecError("Disk consistency error")
6318 class LUInstanceDeactivateDisks(NoHooksLU):
6319 """Shutdown an instance's disks.
6324 def ExpandNames(self):
6325 self._ExpandAndLockInstance()
6326 self.needed_locks[locking.LEVEL_NODE] = []
6327 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6329 def DeclareLocks(self, level):
6330 if level == locking.LEVEL_NODE:
6331 self._LockInstancesNodes()
6333 def CheckPrereq(self):
6334 """Check prerequisites.
6336 This checks that the instance is in the cluster.
6339 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6340 assert self.instance is not None, \
6341 "Cannot retrieve locked instance %s" % self.op.instance_name
6343 def Exec(self, feedback_fn):
6344 """Deactivate the disks
6347 instance = self.instance
6349 _ShutdownInstanceDisks(self, instance)
6351 _SafeShutdownInstanceDisks(self, instance)
6354 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6355 """Shutdown block devices of an instance.
6357 This function checks if an instance is running, before calling
6358 _ShutdownInstanceDisks.
6361 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6362 _ShutdownInstanceDisks(lu, instance, disks=disks)
6365 def _ExpandCheckDisks(instance, disks):
6366 """Return the instance disks selected by the disks list
6368 @type disks: list of L{objects.Disk} or None
6369 @param disks: selected disks
6370 @rtype: list of L{objects.Disk}
6371 @return: selected instance disks to act on
6375 return instance.disks
6377 if not set(disks).issubset(instance.disks):
6378 raise errors.ProgrammerError("Can only act on disks belonging to the"
6383 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6384 """Shutdown block devices of an instance.
6386 This does the shutdown on all nodes of the instance.
6388 If the ignore_primary is false, errors on the primary node are
6393 disks = _ExpandCheckDisks(instance, disks)
6396 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6397 lu.cfg.SetDiskID(top_disk, node)
6398 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6399 msg = result.fail_msg
6401 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6402 disk.iv_name, node, msg)
6403 if ((node == instance.primary_node and not ignore_primary) or
6404 (node != instance.primary_node and not result.offline)):
6409 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6410 """Checks if a node has enough free memory.
6412 This function check if a given node has the needed amount of free
6413 memory. In case the node has less memory or we cannot get the
6414 information from the node, this function raise an OpPrereqError
6417 @type lu: C{LogicalUnit}
6418 @param lu: a logical unit from which we get configuration data
6420 @param node: the node to check
6421 @type reason: C{str}
6422 @param reason: string to use in the error message
6423 @type requested: C{int}
6424 @param requested: the amount of memory in MiB to check for
6425 @type hypervisor_name: C{str}
6426 @param hypervisor_name: the hypervisor to ask for memory stats
6428 @return: node current free memory
6429 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6430 we cannot check the node
6433 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6434 nodeinfo[node].Raise("Can't get data from node %s" % node,
6435 prereq=True, ecode=errors.ECODE_ENVIRON)
6436 (_, _, (hv_info, )) = nodeinfo[node].payload
6438 free_mem = hv_info.get("memory_free", None)
6439 if not isinstance(free_mem, int):
6440 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6441 " was '%s'" % (node, free_mem),
6442 errors.ECODE_ENVIRON)
6443 if requested > free_mem:
6444 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6445 " needed %s MiB, available %s MiB" %
6446 (node, reason, requested, free_mem),
6451 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6452 """Checks if nodes have enough free disk space in the all VGs.
6454 This function check if all given nodes have the needed amount of
6455 free disk. In case any node has less disk or we cannot get the
6456 information from the node, this function raise an OpPrereqError
6459 @type lu: C{LogicalUnit}
6460 @param lu: a logical unit from which we get configuration data
6461 @type nodenames: C{list}
6462 @param nodenames: the list of node names to check
6463 @type req_sizes: C{dict}
6464 @param req_sizes: the hash of vg and corresponding amount of disk in
6466 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6467 or we cannot check the node
6470 for vg, req_size in req_sizes.items():
6471 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6474 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6475 """Checks if nodes have enough free disk space in the specified VG.
6477 This function check if all given nodes have the needed amount of
6478 free disk. In case any node has less disk or we cannot get the
6479 information from the node, this function raise an OpPrereqError
6482 @type lu: C{LogicalUnit}
6483 @param lu: a logical unit from which we get configuration data
6484 @type nodenames: C{list}
6485 @param nodenames: the list of node names to check
6487 @param vg: the volume group to check
6488 @type requested: C{int}
6489 @param requested: the amount of disk in MiB to check for
6490 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6491 or we cannot check the node
6494 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6495 for node in nodenames:
6496 info = nodeinfo[node]
6497 info.Raise("Cannot get current information from node %s" % node,
6498 prereq=True, ecode=errors.ECODE_ENVIRON)
6499 (_, (vg_info, ), _) = info.payload
6500 vg_free = vg_info.get("vg_free", None)
6501 if not isinstance(vg_free, int):
6502 raise errors.OpPrereqError("Can't compute free disk space on node"
6503 " %s for vg %s, result was '%s'" %
6504 (node, vg, vg_free), errors.ECODE_ENVIRON)
6505 if requested > vg_free:
6506 raise errors.OpPrereqError("Not enough disk space on target node %s"
6507 " vg %s: required %d MiB, available %d MiB" %
6508 (node, vg, requested, vg_free),
6512 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6513 """Checks if nodes have enough physical CPUs
6515 This function checks if all given nodes have the needed number of
6516 physical CPUs. In case any node has less CPUs or we cannot get the
6517 information from the node, this function raises an OpPrereqError
6520 @type lu: C{LogicalUnit}
6521 @param lu: a logical unit from which we get configuration data
6522 @type nodenames: C{list}
6523 @param nodenames: the list of node names to check
6524 @type requested: C{int}
6525 @param requested: the minimum acceptable number of physical CPUs
6526 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6527 or we cannot check the node
6530 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6531 for node in nodenames:
6532 info = nodeinfo[node]
6533 info.Raise("Cannot get current information from node %s" % node,
6534 prereq=True, ecode=errors.ECODE_ENVIRON)
6535 (_, _, (hv_info, )) = info.payload
6536 num_cpus = hv_info.get("cpu_total", None)
6537 if not isinstance(num_cpus, int):
6538 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6539 " on node %s, result was '%s'" %
6540 (node, num_cpus), errors.ECODE_ENVIRON)
6541 if requested > num_cpus:
6542 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6543 "required" % (node, num_cpus, requested),
6547 class LUInstanceStartup(LogicalUnit):
6548 """Starts an instance.
6551 HPATH = "instance-start"
6552 HTYPE = constants.HTYPE_INSTANCE
6555 def CheckArguments(self):
6557 if self.op.beparams:
6558 # fill the beparams dict
6559 objects.UpgradeBeParams(self.op.beparams)
6560 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6562 def ExpandNames(self):
6563 self._ExpandAndLockInstance()
6564 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6566 def DeclareLocks(self, level):
6567 if level == locking.LEVEL_NODE_RES:
6568 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6570 def BuildHooksEnv(self):
6573 This runs on master, primary and secondary nodes of the instance.
6577 "FORCE": self.op.force,
6580 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6584 def BuildHooksNodes(self):
6585 """Build hooks nodes.
6588 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6591 def CheckPrereq(self):
6592 """Check prerequisites.
6594 This checks that the instance is in the cluster.
6597 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6598 assert self.instance is not None, \
6599 "Cannot retrieve locked instance %s" % self.op.instance_name
6602 if self.op.hvparams:
6603 # check hypervisor parameter syntax (locally)
6604 cluster = self.cfg.GetClusterInfo()
6605 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6606 filled_hvp = cluster.FillHV(instance)
6607 filled_hvp.update(self.op.hvparams)
6608 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6609 hv_type.CheckParameterSyntax(filled_hvp)
6610 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6612 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6614 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6616 if self.primary_offline and self.op.ignore_offline_nodes:
6617 self.proc.LogWarning("Ignoring offline primary node")
6619 if self.op.hvparams or self.op.beparams:
6620 self.proc.LogWarning("Overridden parameters are ignored")
6622 _CheckNodeOnline(self, instance.primary_node)
6624 bep = self.cfg.GetClusterInfo().FillBE(instance)
6625 bep.update(self.op.beparams)
6627 # check bridges existence
6628 _CheckInstanceBridgesExist(self, instance)
6630 remote_info = self.rpc.call_instance_info(instance.primary_node,
6632 instance.hypervisor)
6633 remote_info.Raise("Error checking node %s" % instance.primary_node,
6634 prereq=True, ecode=errors.ECODE_ENVIRON)
6635 if not remote_info.payload: # not running already
6636 _CheckNodeFreeMemory(self, instance.primary_node,
6637 "starting instance %s" % instance.name,
6638 bep[constants.BE_MINMEM], instance.hypervisor)
6640 def Exec(self, feedback_fn):
6641 """Start the instance.
6644 instance = self.instance
6645 force = self.op.force
6647 if not self.op.no_remember:
6648 self.cfg.MarkInstanceUp(instance.name)
6650 if self.primary_offline:
6651 assert self.op.ignore_offline_nodes
6652 self.proc.LogInfo("Primary node offline, marked instance as started")
6654 node_current = instance.primary_node
6656 _StartInstanceDisks(self, instance, force)
6659 self.rpc.call_instance_start(node_current,
6660 (instance, self.op.hvparams,
6662 self.op.startup_paused)
6663 msg = result.fail_msg
6665 _ShutdownInstanceDisks(self, instance)
6666 raise errors.OpExecError("Could not start instance: %s" % msg)
6669 class LUInstanceReboot(LogicalUnit):
6670 """Reboot an instance.
6673 HPATH = "instance-reboot"
6674 HTYPE = constants.HTYPE_INSTANCE
6677 def ExpandNames(self):
6678 self._ExpandAndLockInstance()
6680 def BuildHooksEnv(self):
6683 This runs on master, primary and secondary nodes of the instance.
6687 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6688 "REBOOT_TYPE": self.op.reboot_type,
6689 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6692 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6696 def BuildHooksNodes(self):
6697 """Build hooks nodes.
6700 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6703 def CheckPrereq(self):
6704 """Check prerequisites.
6706 This checks that the instance is in the cluster.
6709 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6710 assert self.instance is not None, \
6711 "Cannot retrieve locked instance %s" % self.op.instance_name
6712 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6713 _CheckNodeOnline(self, instance.primary_node)
6715 # check bridges existence
6716 _CheckInstanceBridgesExist(self, instance)
6718 def Exec(self, feedback_fn):
6719 """Reboot the instance.
6722 instance = self.instance
6723 ignore_secondaries = self.op.ignore_secondaries
6724 reboot_type = self.op.reboot_type
6726 remote_info = self.rpc.call_instance_info(instance.primary_node,
6728 instance.hypervisor)
6729 remote_info.Raise("Error checking node %s" % instance.primary_node)
6730 instance_running = bool(remote_info.payload)
6732 node_current = instance.primary_node
6734 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6735 constants.INSTANCE_REBOOT_HARD]:
6736 for disk in instance.disks:
6737 self.cfg.SetDiskID(disk, node_current)
6738 result = self.rpc.call_instance_reboot(node_current, instance,
6740 self.op.shutdown_timeout)
6741 result.Raise("Could not reboot instance")
6743 if instance_running:
6744 result = self.rpc.call_instance_shutdown(node_current, instance,
6745 self.op.shutdown_timeout)
6746 result.Raise("Could not shutdown instance for full reboot")
6747 _ShutdownInstanceDisks(self, instance)
6749 self.LogInfo("Instance %s was already stopped, starting now",
6751 _StartInstanceDisks(self, instance, ignore_secondaries)
6752 result = self.rpc.call_instance_start(node_current,
6753 (instance, None, None), False)
6754 msg = result.fail_msg
6756 _ShutdownInstanceDisks(self, instance)
6757 raise errors.OpExecError("Could not start instance for"
6758 " full reboot: %s" % msg)
6760 self.cfg.MarkInstanceUp(instance.name)
6763 class LUInstanceShutdown(LogicalUnit):
6764 """Shutdown an instance.
6767 HPATH = "instance-stop"
6768 HTYPE = constants.HTYPE_INSTANCE
6771 def ExpandNames(self):
6772 self._ExpandAndLockInstance()
6774 def BuildHooksEnv(self):
6777 This runs on master, primary and secondary nodes of the instance.
6780 env = _BuildInstanceHookEnvByObject(self, self.instance)
6781 env["TIMEOUT"] = self.op.timeout
6784 def BuildHooksNodes(self):
6785 """Build hooks nodes.
6788 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6791 def CheckPrereq(self):
6792 """Check prerequisites.
6794 This checks that the instance is in the cluster.
6797 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6798 assert self.instance is not None, \
6799 "Cannot retrieve locked instance %s" % self.op.instance_name
6801 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6803 self.primary_offline = \
6804 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6806 if self.primary_offline and self.op.ignore_offline_nodes:
6807 self.proc.LogWarning("Ignoring offline primary node")
6809 _CheckNodeOnline(self, self.instance.primary_node)
6811 def Exec(self, feedback_fn):
6812 """Shutdown the instance.
6815 instance = self.instance
6816 node_current = instance.primary_node
6817 timeout = self.op.timeout
6819 if not self.op.no_remember:
6820 self.cfg.MarkInstanceDown(instance.name)
6822 if self.primary_offline:
6823 assert self.op.ignore_offline_nodes
6824 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6826 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6827 msg = result.fail_msg
6829 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6831 _ShutdownInstanceDisks(self, instance)
6834 class LUInstanceReinstall(LogicalUnit):
6835 """Reinstall an instance.
6838 HPATH = "instance-reinstall"
6839 HTYPE = constants.HTYPE_INSTANCE
6842 def ExpandNames(self):
6843 self._ExpandAndLockInstance()
6845 def BuildHooksEnv(self):
6848 This runs on master, primary and secondary nodes of the instance.
6851 return _BuildInstanceHookEnvByObject(self, self.instance)
6853 def BuildHooksNodes(self):
6854 """Build hooks nodes.
6857 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6860 def CheckPrereq(self):
6861 """Check prerequisites.
6863 This checks that the instance is in the cluster and is not running.
6866 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6867 assert instance is not None, \
6868 "Cannot retrieve locked instance %s" % self.op.instance_name
6869 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6870 " offline, cannot reinstall")
6871 for node in instance.secondary_nodes:
6872 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6873 " cannot reinstall")
6875 if instance.disk_template == constants.DT_DISKLESS:
6876 raise errors.OpPrereqError("Instance '%s' has no disks" %
6877 self.op.instance_name,
6879 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6881 if self.op.os_type is not None:
6883 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6884 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6885 instance_os = self.op.os_type
6887 instance_os = instance.os
6889 nodelist = list(instance.all_nodes)
6891 if self.op.osparams:
6892 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6893 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6894 self.os_inst = i_osdict # the new dict (without defaults)
6898 self.instance = instance
6900 def Exec(self, feedback_fn):
6901 """Reinstall the instance.
6904 inst = self.instance
6906 if self.op.os_type is not None:
6907 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6908 inst.os = self.op.os_type
6909 # Write to configuration
6910 self.cfg.Update(inst, feedback_fn)
6912 _StartInstanceDisks(self, inst, None)
6914 feedback_fn("Running the instance OS create scripts...")
6915 # FIXME: pass debug option from opcode to backend
6916 result = self.rpc.call_instance_os_add(inst.primary_node,
6917 (inst, self.os_inst), True,
6918 self.op.debug_level)
6919 result.Raise("Could not install OS for instance %s on node %s" %
6920 (inst.name, inst.primary_node))
6922 _ShutdownInstanceDisks(self, inst)
6925 class LUInstanceRecreateDisks(LogicalUnit):
6926 """Recreate an instance's missing disks.
6929 HPATH = "instance-recreate-disks"
6930 HTYPE = constants.HTYPE_INSTANCE
6933 _MODIFYABLE = frozenset([
6934 constants.IDISK_SIZE,
6935 constants.IDISK_MODE,
6938 # New or changed disk parameters may have different semantics
6939 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
6940 constants.IDISK_ADOPT,
6942 # TODO: Implement support changing VG while recreating
6944 constants.IDISK_METAVG,
6947 def CheckArguments(self):
6948 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
6949 # Normalize and convert deprecated list of disk indices
6950 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
6952 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
6954 raise errors.OpPrereqError("Some disks have been specified more than"
6955 " once: %s" % utils.CommaJoin(duplicates),
6958 for (idx, params) in self.op.disks:
6959 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
6960 unsupported = frozenset(params.keys()) - self._MODIFYABLE
6962 raise errors.OpPrereqError("Parameters for disk %s try to change"
6963 " unmodifyable parameter(s): %s" %
6964 (idx, utils.CommaJoin(unsupported)),
6967 def ExpandNames(self):
6968 self._ExpandAndLockInstance()
6969 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6971 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6972 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6974 self.needed_locks[locking.LEVEL_NODE] = []
6975 self.needed_locks[locking.LEVEL_NODE_RES] = []
6977 def DeclareLocks(self, level):
6978 if level == locking.LEVEL_NODE:
6979 # if we replace the nodes, we only need to lock the old primary,
6980 # otherwise we need to lock all nodes for disk re-creation
6981 primary_only = bool(self.op.nodes)
6982 self._LockInstancesNodes(primary_only=primary_only)
6983 elif level == locking.LEVEL_NODE_RES:
6985 self.needed_locks[locking.LEVEL_NODE_RES] = \
6986 self.needed_locks[locking.LEVEL_NODE][:]
6988 def BuildHooksEnv(self):
6991 This runs on master, primary and secondary nodes of the instance.
6994 return _BuildInstanceHookEnvByObject(self, self.instance)
6996 def BuildHooksNodes(self):
6997 """Build hooks nodes.
7000 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7003 def CheckPrereq(self):
7004 """Check prerequisites.
7006 This checks that the instance is in the cluster and is not running.
7009 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7010 assert instance is not None, \
7011 "Cannot retrieve locked instance %s" % self.op.instance_name
7013 if len(self.op.nodes) != len(instance.all_nodes):
7014 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7015 " %d replacement nodes were specified" %
7016 (instance.name, len(instance.all_nodes),
7017 len(self.op.nodes)),
7019 assert instance.disk_template != constants.DT_DRBD8 or \
7020 len(self.op.nodes) == 2
7021 assert instance.disk_template != constants.DT_PLAIN or \
7022 len(self.op.nodes) == 1
7023 primary_node = self.op.nodes[0]
7025 primary_node = instance.primary_node
7026 _CheckNodeOnline(self, primary_node)
7028 if instance.disk_template == constants.DT_DISKLESS:
7029 raise errors.OpPrereqError("Instance '%s' has no disks" %
7030 self.op.instance_name, errors.ECODE_INVAL)
7032 # if we replace nodes *and* the old primary is offline, we don't
7034 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7035 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7036 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7037 if not (self.op.nodes and old_pnode.offline):
7038 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7039 msg="cannot recreate disks")
7042 self.disks = dict(self.op.disks)
7044 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7046 maxidx = max(self.disks.keys())
7047 if maxidx >= len(instance.disks):
7048 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7051 if (self.op.nodes and
7052 sorted(self.disks.keys()) != range(len(instance.disks))):
7053 raise errors.OpPrereqError("Can't recreate disks partially and"
7054 " change the nodes at the same time",
7057 self.instance = instance
7059 def Exec(self, feedback_fn):
7060 """Recreate the disks.
7063 instance = self.instance
7065 assert (self.owned_locks(locking.LEVEL_NODE) ==
7066 self.owned_locks(locking.LEVEL_NODE_RES))
7069 mods = [] # keeps track of needed changes
7071 for idx, disk in enumerate(instance.disks):
7073 changes = self.disks[idx]
7075 # Disk should not be recreated
7079 # update secondaries for disks, if needed
7080 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7081 # need to update the nodes and minors
7082 assert len(self.op.nodes) == 2
7083 assert len(disk.logical_id) == 6 # otherwise disk internals
7085 (_, _, old_port, _, _, old_secret) = disk.logical_id
7086 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7087 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7088 new_minors[0], new_minors[1], old_secret)
7089 assert len(disk.logical_id) == len(new_id)
7093 mods.append((idx, new_id, changes))
7095 # now that we have passed all asserts above, we can apply the mods
7096 # in a single run (to avoid partial changes)
7097 for idx, new_id, changes in mods:
7098 disk = instance.disks[idx]
7099 if new_id is not None:
7100 assert disk.dev_type == constants.LD_DRBD8
7101 disk.logical_id = new_id
7103 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7104 mode=changes.get(constants.IDISK_MODE, None))
7106 # change primary node, if needed
7108 instance.primary_node = self.op.nodes[0]
7109 self.LogWarning("Changing the instance's nodes, you will have to"
7110 " remove any disks left on the older nodes manually")
7113 self.cfg.Update(instance, feedback_fn)
7115 _CreateDisks(self, instance, to_skip=to_skip)
7118 class LUInstanceRename(LogicalUnit):
7119 """Rename an instance.
7122 HPATH = "instance-rename"
7123 HTYPE = constants.HTYPE_INSTANCE
7125 def CheckArguments(self):
7129 if self.op.ip_check and not self.op.name_check:
7130 # TODO: make the ip check more flexible and not depend on the name check
7131 raise errors.OpPrereqError("IP address check requires a name check",
7134 def BuildHooksEnv(self):
7137 This runs on master, primary and secondary nodes of the instance.
7140 env = _BuildInstanceHookEnvByObject(self, self.instance)
7141 env["INSTANCE_NEW_NAME"] = self.op.new_name
7144 def BuildHooksNodes(self):
7145 """Build hooks nodes.
7148 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7151 def CheckPrereq(self):
7152 """Check prerequisites.
7154 This checks that the instance is in the cluster and is not running.
7157 self.op.instance_name = _ExpandInstanceName(self.cfg,
7158 self.op.instance_name)
7159 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7160 assert instance is not None
7161 _CheckNodeOnline(self, instance.primary_node)
7162 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7163 msg="cannot rename")
7164 self.instance = instance
7166 new_name = self.op.new_name
7167 if self.op.name_check:
7168 hostname = netutils.GetHostname(name=new_name)
7169 if hostname.name != new_name:
7170 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7172 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7173 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7174 " same as given hostname '%s'") %
7175 (hostname.name, self.op.new_name),
7177 new_name = self.op.new_name = hostname.name
7178 if (self.op.ip_check and
7179 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7180 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7181 (hostname.ip, new_name),
7182 errors.ECODE_NOTUNIQUE)
7184 instance_list = self.cfg.GetInstanceList()
7185 if new_name in instance_list and new_name != instance.name:
7186 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7187 new_name, errors.ECODE_EXISTS)
7189 def Exec(self, feedback_fn):
7190 """Rename the instance.
7193 inst = self.instance
7194 old_name = inst.name
7196 rename_file_storage = False
7197 if (inst.disk_template in constants.DTS_FILEBASED and
7198 self.op.new_name != inst.name):
7199 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7200 rename_file_storage = True
7202 self.cfg.RenameInstance(inst.name, self.op.new_name)
7203 # Change the instance lock. This is definitely safe while we hold the BGL.
7204 # Otherwise the new lock would have to be added in acquired mode.
7206 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7207 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7209 # re-read the instance from the configuration after rename
7210 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7212 if rename_file_storage:
7213 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7214 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7215 old_file_storage_dir,
7216 new_file_storage_dir)
7217 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7218 " (but the instance has been renamed in Ganeti)" %
7219 (inst.primary_node, old_file_storage_dir,
7220 new_file_storage_dir))
7222 _StartInstanceDisks(self, inst, None)
7224 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7225 old_name, self.op.debug_level)
7226 msg = result.fail_msg
7228 msg = ("Could not run OS rename script for instance %s on node %s"
7229 " (but the instance has been renamed in Ganeti): %s" %
7230 (inst.name, inst.primary_node, msg))
7231 self.proc.LogWarning(msg)
7233 _ShutdownInstanceDisks(self, inst)
7238 class LUInstanceRemove(LogicalUnit):
7239 """Remove an instance.
7242 HPATH = "instance-remove"
7243 HTYPE = constants.HTYPE_INSTANCE
7246 def ExpandNames(self):
7247 self._ExpandAndLockInstance()
7248 self.needed_locks[locking.LEVEL_NODE] = []
7249 self.needed_locks[locking.LEVEL_NODE_RES] = []
7250 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7252 def DeclareLocks(self, level):
7253 if level == locking.LEVEL_NODE:
7254 self._LockInstancesNodes()
7255 elif level == locking.LEVEL_NODE_RES:
7257 self.needed_locks[locking.LEVEL_NODE_RES] = \
7258 self.needed_locks[locking.LEVEL_NODE][:]
7260 def BuildHooksEnv(self):
7263 This runs on master, primary and secondary nodes of the instance.
7266 env = _BuildInstanceHookEnvByObject(self, self.instance)
7267 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7270 def BuildHooksNodes(self):
7271 """Build hooks nodes.
7274 nl = [self.cfg.GetMasterNode()]
7275 nl_post = list(self.instance.all_nodes) + nl
7276 return (nl, nl_post)
7278 def CheckPrereq(self):
7279 """Check prerequisites.
7281 This checks that the instance is in the cluster.
7284 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7285 assert self.instance is not None, \
7286 "Cannot retrieve locked instance %s" % self.op.instance_name
7288 def Exec(self, feedback_fn):
7289 """Remove the instance.
7292 instance = self.instance
7293 logging.info("Shutting down instance %s on node %s",
7294 instance.name, instance.primary_node)
7296 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7297 self.op.shutdown_timeout)
7298 msg = result.fail_msg
7300 if self.op.ignore_failures:
7301 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7303 raise errors.OpExecError("Could not shutdown instance %s on"
7305 (instance.name, instance.primary_node, msg))
7307 assert (self.owned_locks(locking.LEVEL_NODE) ==
7308 self.owned_locks(locking.LEVEL_NODE_RES))
7309 assert not (set(instance.all_nodes) -
7310 self.owned_locks(locking.LEVEL_NODE)), \
7311 "Not owning correct locks"
7313 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7316 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7317 """Utility function to remove an instance.
7320 logging.info("Removing block devices for instance %s", instance.name)
7322 if not _RemoveDisks(lu, instance):
7323 if not ignore_failures:
7324 raise errors.OpExecError("Can't remove instance's disks")
7325 feedback_fn("Warning: can't remove instance's disks")
7327 logging.info("Removing instance %s out of cluster config", instance.name)
7329 lu.cfg.RemoveInstance(instance.name)
7331 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7332 "Instance lock removal conflict"
7334 # Remove lock for the instance
7335 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7338 class LUInstanceQuery(NoHooksLU):
7339 """Logical unit for querying instances.
7342 # pylint: disable=W0142
7345 def CheckArguments(self):
7346 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7347 self.op.output_fields, self.op.use_locking)
7349 def ExpandNames(self):
7350 self.iq.ExpandNames(self)
7352 def DeclareLocks(self, level):
7353 self.iq.DeclareLocks(self, level)
7355 def Exec(self, feedback_fn):
7356 return self.iq.OldStyleQuery(self)
7359 class LUInstanceFailover(LogicalUnit):
7360 """Failover an instance.
7363 HPATH = "instance-failover"
7364 HTYPE = constants.HTYPE_INSTANCE
7367 def CheckArguments(self):
7368 """Check the arguments.
7371 self.iallocator = getattr(self.op, "iallocator", None)
7372 self.target_node = getattr(self.op, "target_node", None)
7374 def ExpandNames(self):
7375 self._ExpandAndLockInstance()
7377 if self.op.target_node is not None:
7378 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7380 self.needed_locks[locking.LEVEL_NODE] = []
7381 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7383 self.needed_locks[locking.LEVEL_NODE_RES] = []
7384 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7386 ignore_consistency = self.op.ignore_consistency
7387 shutdown_timeout = self.op.shutdown_timeout
7388 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7391 ignore_consistency=ignore_consistency,
7392 shutdown_timeout=shutdown_timeout,
7393 ignore_ipolicy=self.op.ignore_ipolicy)
7394 self.tasklets = [self._migrater]
7396 def DeclareLocks(self, level):
7397 if level == locking.LEVEL_NODE:
7398 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7399 if instance.disk_template in constants.DTS_EXT_MIRROR:
7400 if self.op.target_node is None:
7401 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7403 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7404 self.op.target_node]
7405 del self.recalculate_locks[locking.LEVEL_NODE]
7407 self._LockInstancesNodes()
7408 elif level == locking.LEVEL_NODE_RES:
7410 self.needed_locks[locking.LEVEL_NODE_RES] = \
7411 self.needed_locks[locking.LEVEL_NODE][:]
7413 def BuildHooksEnv(self):
7416 This runs on master, primary and secondary nodes of the instance.
7419 instance = self._migrater.instance
7420 source_node = instance.primary_node
7421 target_node = self.op.target_node
7423 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7424 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7425 "OLD_PRIMARY": source_node,
7426 "NEW_PRIMARY": target_node,
7429 if instance.disk_template in constants.DTS_INT_MIRROR:
7430 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7431 env["NEW_SECONDARY"] = source_node
7433 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7435 env.update(_BuildInstanceHookEnvByObject(self, instance))
7439 def BuildHooksNodes(self):
7440 """Build hooks nodes.
7443 instance = self._migrater.instance
7444 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7445 return (nl, nl + [instance.primary_node])
7448 class LUInstanceMigrate(LogicalUnit):
7449 """Migrate an instance.
7451 This is migration without shutting down, compared to the failover,
7452 which is done with shutdown.
7455 HPATH = "instance-migrate"
7456 HTYPE = constants.HTYPE_INSTANCE
7459 def ExpandNames(self):
7460 self._ExpandAndLockInstance()
7462 if self.op.target_node is not None:
7463 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7465 self.needed_locks[locking.LEVEL_NODE] = []
7466 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7468 self.needed_locks[locking.LEVEL_NODE] = []
7469 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7472 TLMigrateInstance(self, self.op.instance_name,
7473 cleanup=self.op.cleanup,
7475 fallback=self.op.allow_failover,
7476 allow_runtime_changes=self.op.allow_runtime_changes,
7477 ignore_ipolicy=self.op.ignore_ipolicy)
7478 self.tasklets = [self._migrater]
7480 def DeclareLocks(self, level):
7481 if level == locking.LEVEL_NODE:
7482 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7483 if instance.disk_template in constants.DTS_EXT_MIRROR:
7484 if self.op.target_node is None:
7485 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7487 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7488 self.op.target_node]
7489 del self.recalculate_locks[locking.LEVEL_NODE]
7491 self._LockInstancesNodes()
7492 elif level == locking.LEVEL_NODE_RES:
7494 self.needed_locks[locking.LEVEL_NODE_RES] = \
7495 self.needed_locks[locking.LEVEL_NODE][:]
7497 def BuildHooksEnv(self):
7500 This runs on master, primary and secondary nodes of the instance.
7503 instance = self._migrater.instance
7504 source_node = instance.primary_node
7505 target_node = self.op.target_node
7506 env = _BuildInstanceHookEnvByObject(self, instance)
7508 "MIGRATE_LIVE": self._migrater.live,
7509 "MIGRATE_CLEANUP": self.op.cleanup,
7510 "OLD_PRIMARY": source_node,
7511 "NEW_PRIMARY": target_node,
7512 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7515 if instance.disk_template in constants.DTS_INT_MIRROR:
7516 env["OLD_SECONDARY"] = target_node
7517 env["NEW_SECONDARY"] = source_node
7519 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7523 def BuildHooksNodes(self):
7524 """Build hooks nodes.
7527 instance = self._migrater.instance
7528 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7529 return (nl, nl + [instance.primary_node])
7532 class LUInstanceMove(LogicalUnit):
7533 """Move an instance by data-copying.
7536 HPATH = "instance-move"
7537 HTYPE = constants.HTYPE_INSTANCE
7540 def ExpandNames(self):
7541 self._ExpandAndLockInstance()
7542 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7543 self.op.target_node = target_node
7544 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7545 self.needed_locks[locking.LEVEL_NODE_RES] = []
7546 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7548 def DeclareLocks(self, level):
7549 if level == locking.LEVEL_NODE:
7550 self._LockInstancesNodes(primary_only=True)
7551 elif level == locking.LEVEL_NODE_RES:
7553 self.needed_locks[locking.LEVEL_NODE_RES] = \
7554 self.needed_locks[locking.LEVEL_NODE][:]
7556 def BuildHooksEnv(self):
7559 This runs on master, primary and secondary nodes of the instance.
7563 "TARGET_NODE": self.op.target_node,
7564 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7566 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7569 def BuildHooksNodes(self):
7570 """Build hooks nodes.
7574 self.cfg.GetMasterNode(),
7575 self.instance.primary_node,
7576 self.op.target_node,
7580 def CheckPrereq(self):
7581 """Check prerequisites.
7583 This checks that the instance is in the cluster.
7586 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7587 assert self.instance is not None, \
7588 "Cannot retrieve locked instance %s" % self.op.instance_name
7590 node = self.cfg.GetNodeInfo(self.op.target_node)
7591 assert node is not None, \
7592 "Cannot retrieve locked node %s" % self.op.target_node
7594 self.target_node = target_node = node.name
7596 if target_node == instance.primary_node:
7597 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7598 (instance.name, target_node),
7601 bep = self.cfg.GetClusterInfo().FillBE(instance)
7603 for idx, dsk in enumerate(instance.disks):
7604 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7605 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7606 " cannot copy" % idx, errors.ECODE_STATE)
7608 _CheckNodeOnline(self, target_node)
7609 _CheckNodeNotDrained(self, target_node)
7610 _CheckNodeVmCapable(self, target_node)
7611 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7612 self.cfg.GetNodeGroup(node.group))
7613 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7614 ignore=self.op.ignore_ipolicy)
7616 if instance.admin_state == constants.ADMINST_UP:
7617 # check memory requirements on the secondary node
7618 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7619 instance.name, bep[constants.BE_MAXMEM],
7620 instance.hypervisor)
7622 self.LogInfo("Not checking memory on the secondary node as"
7623 " instance will not be started")
7625 # check bridge existance
7626 _CheckInstanceBridgesExist(self, instance, node=target_node)
7628 def Exec(self, feedback_fn):
7629 """Move an instance.
7631 The move is done by shutting it down on its present node, copying
7632 the data over (slow) and starting it on the new node.
7635 instance = self.instance
7637 source_node = instance.primary_node
7638 target_node = self.target_node
7640 self.LogInfo("Shutting down instance %s on source node %s",
7641 instance.name, source_node)
7643 assert (self.owned_locks(locking.LEVEL_NODE) ==
7644 self.owned_locks(locking.LEVEL_NODE_RES))
7646 result = self.rpc.call_instance_shutdown(source_node, instance,
7647 self.op.shutdown_timeout)
7648 msg = result.fail_msg
7650 if self.op.ignore_consistency:
7651 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7652 " Proceeding anyway. Please make sure node"
7653 " %s is down. Error details: %s",
7654 instance.name, source_node, source_node, msg)
7656 raise errors.OpExecError("Could not shutdown instance %s on"
7658 (instance.name, source_node, msg))
7660 # create the target disks
7662 _CreateDisks(self, instance, target_node=target_node)
7663 except errors.OpExecError:
7664 self.LogWarning("Device creation failed, reverting...")
7666 _RemoveDisks(self, instance, target_node=target_node)
7668 self.cfg.ReleaseDRBDMinors(instance.name)
7671 cluster_name = self.cfg.GetClusterInfo().cluster_name
7674 # activate, get path, copy the data over
7675 for idx, disk in enumerate(instance.disks):
7676 self.LogInfo("Copying data for disk %d", idx)
7677 result = self.rpc.call_blockdev_assemble(target_node, disk,
7678 instance.name, True, idx)
7680 self.LogWarning("Can't assemble newly created disk %d: %s",
7681 idx, result.fail_msg)
7682 errs.append(result.fail_msg)
7684 dev_path = result.payload
7685 result = self.rpc.call_blockdev_export(source_node, disk,
7686 target_node, dev_path,
7689 self.LogWarning("Can't copy data over for disk %d: %s",
7690 idx, result.fail_msg)
7691 errs.append(result.fail_msg)
7695 self.LogWarning("Some disks failed to copy, aborting")
7697 _RemoveDisks(self, instance, target_node=target_node)
7699 self.cfg.ReleaseDRBDMinors(instance.name)
7700 raise errors.OpExecError("Errors during disk copy: %s" %
7703 instance.primary_node = target_node
7704 self.cfg.Update(instance, feedback_fn)
7706 self.LogInfo("Removing the disks on the original node")
7707 _RemoveDisks(self, instance, target_node=source_node)
7709 # Only start the instance if it's marked as up
7710 if instance.admin_state == constants.ADMINST_UP:
7711 self.LogInfo("Starting instance %s on node %s",
7712 instance.name, target_node)
7714 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7715 ignore_secondaries=True)
7717 _ShutdownInstanceDisks(self, instance)
7718 raise errors.OpExecError("Can't activate the instance's disks")
7720 result = self.rpc.call_instance_start(target_node,
7721 (instance, None, None), False)
7722 msg = result.fail_msg
7724 _ShutdownInstanceDisks(self, instance)
7725 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7726 (instance.name, target_node, msg))
7729 class LUNodeMigrate(LogicalUnit):
7730 """Migrate all instances from a node.
7733 HPATH = "node-migrate"
7734 HTYPE = constants.HTYPE_NODE
7737 def CheckArguments(self):
7740 def ExpandNames(self):
7741 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7743 self.share_locks = _ShareAll()
7744 self.needed_locks = {
7745 locking.LEVEL_NODE: [self.op.node_name],
7748 def BuildHooksEnv(self):
7751 This runs on the master, the primary and all the secondaries.
7755 "NODE_NAME": self.op.node_name,
7756 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7759 def BuildHooksNodes(self):
7760 """Build hooks nodes.
7763 nl = [self.cfg.GetMasterNode()]
7766 def CheckPrereq(self):
7769 def Exec(self, feedback_fn):
7770 # Prepare jobs for migration instances
7771 allow_runtime_changes = self.op.allow_runtime_changes
7773 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7776 iallocator=self.op.iallocator,
7777 target_node=self.op.target_node,
7778 allow_runtime_changes=allow_runtime_changes,
7779 ignore_ipolicy=self.op.ignore_ipolicy)]
7780 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7783 # TODO: Run iallocator in this opcode and pass correct placement options to
7784 # OpInstanceMigrate. Since other jobs can modify the cluster between
7785 # running the iallocator and the actual migration, a good consistency model
7786 # will have to be found.
7788 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7789 frozenset([self.op.node_name]))
7791 return ResultWithJobs(jobs)
7794 class TLMigrateInstance(Tasklet):
7795 """Tasklet class for instance migration.
7798 @ivar live: whether the migration will be done live or non-live;
7799 this variable is initalized only after CheckPrereq has run
7800 @type cleanup: boolean
7801 @ivar cleanup: Wheater we cleanup from a failed migration
7802 @type iallocator: string
7803 @ivar iallocator: The iallocator used to determine target_node
7804 @type target_node: string
7805 @ivar target_node: If given, the target_node to reallocate the instance to
7806 @type failover: boolean
7807 @ivar failover: Whether operation results in failover or migration
7808 @type fallback: boolean
7809 @ivar fallback: Whether fallback to failover is allowed if migration not
7811 @type ignore_consistency: boolean
7812 @ivar ignore_consistency: Wheter we should ignore consistency between source
7814 @type shutdown_timeout: int
7815 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7816 @type ignore_ipolicy: bool
7817 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7822 _MIGRATION_POLL_INTERVAL = 1 # seconds
7823 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7825 def __init__(self, lu, instance_name, cleanup=False,
7826 failover=False, fallback=False,
7827 ignore_consistency=False,
7828 allow_runtime_changes=True,
7829 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7830 ignore_ipolicy=False):
7831 """Initializes this class.
7834 Tasklet.__init__(self, lu)
7837 self.instance_name = instance_name
7838 self.cleanup = cleanup
7839 self.live = False # will be overridden later
7840 self.failover = failover
7841 self.fallback = fallback
7842 self.ignore_consistency = ignore_consistency
7843 self.shutdown_timeout = shutdown_timeout
7844 self.ignore_ipolicy = ignore_ipolicy
7845 self.allow_runtime_changes = allow_runtime_changes
7847 def CheckPrereq(self):
7848 """Check prerequisites.
7850 This checks that the instance is in the cluster.
7853 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7854 instance = self.cfg.GetInstanceInfo(instance_name)
7855 assert instance is not None
7856 self.instance = instance
7857 cluster = self.cfg.GetClusterInfo()
7859 if (not self.cleanup and
7860 not instance.admin_state == constants.ADMINST_UP and
7861 not self.failover and self.fallback):
7862 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7863 " switching to failover")
7864 self.failover = True
7866 if instance.disk_template not in constants.DTS_MIRRORED:
7871 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7872 " %s" % (instance.disk_template, text),
7875 if instance.disk_template in constants.DTS_EXT_MIRROR:
7876 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7878 if self.lu.op.iallocator:
7879 self._RunAllocator()
7881 # We set set self.target_node as it is required by
7883 self.target_node = self.lu.op.target_node
7885 # Check that the target node is correct in terms of instance policy
7886 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
7887 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7888 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7889 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7890 ignore=self.ignore_ipolicy)
7892 # self.target_node is already populated, either directly or by the
7894 target_node = self.target_node
7895 if self.target_node == instance.primary_node:
7896 raise errors.OpPrereqError("Cannot migrate instance %s"
7897 " to its primary (%s)" %
7898 (instance.name, instance.primary_node))
7900 if len(self.lu.tasklets) == 1:
7901 # It is safe to release locks only when we're the only tasklet
7903 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7904 keep=[instance.primary_node, self.target_node])
7907 secondary_nodes = instance.secondary_nodes
7908 if not secondary_nodes:
7909 raise errors.ConfigurationError("No secondary node but using"
7910 " %s disk template" %
7911 instance.disk_template)
7912 target_node = secondary_nodes[0]
7913 if self.lu.op.iallocator or (self.lu.op.target_node and
7914 self.lu.op.target_node != target_node):
7916 text = "failed over"
7919 raise errors.OpPrereqError("Instances with disk template %s cannot"
7920 " be %s to arbitrary nodes"
7921 " (neither an iallocator nor a target"
7922 " node can be passed)" %
7923 (instance.disk_template, text),
7925 nodeinfo = self.cfg.GetNodeInfo(target_node)
7926 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7927 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7928 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7929 ignore=self.ignore_ipolicy)
7931 i_be = cluster.FillBE(instance)
7933 # check memory requirements on the secondary node
7934 if (not self.cleanup and
7935 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
7936 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
7937 "migrating instance %s" %
7939 i_be[constants.BE_MINMEM],
7940 instance.hypervisor)
7942 self.lu.LogInfo("Not checking memory on the secondary node as"
7943 " instance will not be started")
7945 # check if failover must be forced instead of migration
7946 if (not self.cleanup and not self.failover and
7947 i_be[constants.BE_ALWAYS_FAILOVER]):
7949 self.lu.LogInfo("Instance configured to always failover; fallback"
7951 self.failover = True
7953 raise errors.OpPrereqError("This instance has been configured to"
7954 " always failover, please allow failover",
7957 # check bridge existance
7958 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7960 if not self.cleanup:
7961 _CheckNodeNotDrained(self.lu, target_node)
7962 if not self.failover:
7963 result = self.rpc.call_instance_migratable(instance.primary_node,
7965 if result.fail_msg and self.fallback:
7966 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7968 self.failover = True
7970 result.Raise("Can't migrate, please use failover",
7971 prereq=True, ecode=errors.ECODE_STATE)
7973 assert not (self.failover and self.cleanup)
7975 if not self.failover:
7976 if self.lu.op.live is not None and self.lu.op.mode is not None:
7977 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7978 " parameters are accepted",
7980 if self.lu.op.live is not None:
7982 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7984 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7985 # reset the 'live' parameter to None so that repeated
7986 # invocations of CheckPrereq do not raise an exception
7987 self.lu.op.live = None
7988 elif self.lu.op.mode is None:
7989 # read the default value from the hypervisor
7990 i_hv = cluster.FillHV(self.instance, skip_globals=False)
7991 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7993 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7995 # Failover is never live
7998 if not (self.failover or self.cleanup):
7999 remote_info = self.rpc.call_instance_info(instance.primary_node,
8001 instance.hypervisor)
8002 remote_info.Raise("Error checking instance on node %s" %
8003 instance.primary_node)
8004 instance_running = bool(remote_info.payload)
8005 if instance_running:
8006 self.current_mem = int(remote_info.payload["memory"])
8008 def _RunAllocator(self):
8009 """Run the allocator based on input opcode.
8012 # FIXME: add a self.ignore_ipolicy option
8013 ial = IAllocator(self.cfg, self.rpc,
8014 mode=constants.IALLOCATOR_MODE_RELOC,
8015 name=self.instance_name,
8016 # TODO See why hail breaks with a single node below
8017 relocate_from=[self.instance.primary_node,
8018 self.instance.primary_node],
8021 ial.Run(self.lu.op.iallocator)
8024 raise errors.OpPrereqError("Can't compute nodes using"
8025 " iallocator '%s': %s" %
8026 (self.lu.op.iallocator, ial.info),
8028 if len(ial.result) != ial.required_nodes:
8029 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8030 " of nodes (%s), required %s" %
8031 (self.lu.op.iallocator, len(ial.result),
8032 ial.required_nodes), errors.ECODE_FAULT)
8033 self.target_node = ial.result[0]
8034 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8035 self.instance_name, self.lu.op.iallocator,
8036 utils.CommaJoin(ial.result))
8038 def _WaitUntilSync(self):
8039 """Poll with custom rpc for disk sync.
8041 This uses our own step-based rpc call.
8044 self.feedback_fn("* wait until resync is done")
8048 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8050 self.instance.disks)
8052 for node, nres in result.items():
8053 nres.Raise("Cannot resync disks on node %s" % node)
8054 node_done, node_percent = nres.payload
8055 all_done = all_done and node_done
8056 if node_percent is not None:
8057 min_percent = min(min_percent, node_percent)
8059 if min_percent < 100:
8060 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8063 def _EnsureSecondary(self, node):
8064 """Demote a node to secondary.
8067 self.feedback_fn("* switching node %s to secondary mode" % node)
8069 for dev in self.instance.disks:
8070 self.cfg.SetDiskID(dev, node)
8072 result = self.rpc.call_blockdev_close(node, self.instance.name,
8073 self.instance.disks)
8074 result.Raise("Cannot change disk to secondary on node %s" % node)
8076 def _GoStandalone(self):
8077 """Disconnect from the network.
8080 self.feedback_fn("* changing into standalone mode")
8081 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8082 self.instance.disks)
8083 for node, nres in result.items():
8084 nres.Raise("Cannot disconnect disks node %s" % node)
8086 def _GoReconnect(self, multimaster):
8087 """Reconnect to the network.
8093 msg = "single-master"
8094 self.feedback_fn("* changing disks into %s mode" % msg)
8095 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8096 self.instance.disks,
8097 self.instance.name, multimaster)
8098 for node, nres in result.items():
8099 nres.Raise("Cannot change disks config on node %s" % node)
8101 def _ExecCleanup(self):
8102 """Try to cleanup after a failed migration.
8104 The cleanup is done by:
8105 - check that the instance is running only on one node
8106 (and update the config if needed)
8107 - change disks on its secondary node to secondary
8108 - wait until disks are fully synchronized
8109 - disconnect from the network
8110 - change disks into single-master mode
8111 - wait again until disks are fully synchronized
8114 instance = self.instance
8115 target_node = self.target_node
8116 source_node = self.source_node
8118 # check running on only one node
8119 self.feedback_fn("* checking where the instance actually runs"
8120 " (if this hangs, the hypervisor might be in"
8122 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8123 for node, result in ins_l.items():
8124 result.Raise("Can't contact node %s" % node)
8126 runningon_source = instance.name in ins_l[source_node].payload
8127 runningon_target = instance.name in ins_l[target_node].payload
8129 if runningon_source and runningon_target:
8130 raise errors.OpExecError("Instance seems to be running on two nodes,"
8131 " or the hypervisor is confused; you will have"
8132 " to ensure manually that it runs only on one"
8133 " and restart this operation")
8135 if not (runningon_source or runningon_target):
8136 raise errors.OpExecError("Instance does not seem to be running at all;"
8137 " in this case it's safer to repair by"
8138 " running 'gnt-instance stop' to ensure disk"
8139 " shutdown, and then restarting it")
8141 if runningon_target:
8142 # the migration has actually succeeded, we need to update the config
8143 self.feedback_fn("* instance running on secondary node (%s),"
8144 " updating config" % target_node)
8145 instance.primary_node = target_node
8146 self.cfg.Update(instance, self.feedback_fn)
8147 demoted_node = source_node
8149 self.feedback_fn("* instance confirmed to be running on its"
8150 " primary node (%s)" % source_node)
8151 demoted_node = target_node
8153 if instance.disk_template in constants.DTS_INT_MIRROR:
8154 self._EnsureSecondary(demoted_node)
8156 self._WaitUntilSync()
8157 except errors.OpExecError:
8158 # we ignore here errors, since if the device is standalone, it
8159 # won't be able to sync
8161 self._GoStandalone()
8162 self._GoReconnect(False)
8163 self._WaitUntilSync()
8165 self.feedback_fn("* done")
8167 def _RevertDiskStatus(self):
8168 """Try to revert the disk status after a failed migration.
8171 target_node = self.target_node
8172 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8176 self._EnsureSecondary(target_node)
8177 self._GoStandalone()
8178 self._GoReconnect(False)
8179 self._WaitUntilSync()
8180 except errors.OpExecError, err:
8181 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8182 " please try to recover the instance manually;"
8183 " error '%s'" % str(err))
8185 def _AbortMigration(self):
8186 """Call the hypervisor code to abort a started migration.
8189 instance = self.instance
8190 target_node = self.target_node
8191 source_node = self.source_node
8192 migration_info = self.migration_info
8194 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8198 abort_msg = abort_result.fail_msg
8200 logging.error("Aborting migration failed on target node %s: %s",
8201 target_node, abort_msg)
8202 # Don't raise an exception here, as we stil have to try to revert the
8203 # disk status, even if this step failed.
8205 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8206 instance, False, self.live)
8207 abort_msg = abort_result.fail_msg
8209 logging.error("Aborting migration failed on source node %s: %s",
8210 source_node, abort_msg)
8212 def _ExecMigration(self):
8213 """Migrate an instance.
8215 The migrate is done by:
8216 - change the disks into dual-master mode
8217 - wait until disks are fully synchronized again
8218 - migrate the instance
8219 - change disks on the new secondary node (the old primary) to secondary
8220 - wait until disks are fully synchronized
8221 - change disks into single-master mode
8224 instance = self.instance
8225 target_node = self.target_node
8226 source_node = self.source_node
8228 # Check for hypervisor version mismatch and warn the user.
8229 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8230 None, [self.instance.hypervisor])
8231 for ninfo in nodeinfo.values():
8232 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8234 (_, _, (src_info, )) = nodeinfo[source_node].payload
8235 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8237 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8238 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8239 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8240 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8241 if src_version != dst_version:
8242 self.feedback_fn("* warning: hypervisor version mismatch between"
8243 " source (%s) and target (%s) node" %
8244 (src_version, dst_version))
8246 self.feedback_fn("* checking disk consistency between source and target")
8247 for (idx, dev) in enumerate(instance.disks):
8248 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8249 raise errors.OpExecError("Disk %s is degraded or not fully"
8250 " synchronized on target node,"
8251 " aborting migration" % idx)
8253 if self.current_mem > self.tgt_free_mem:
8254 if not self.allow_runtime_changes:
8255 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8256 " free memory to fit instance %s on target"
8257 " node %s (have %dMB, need %dMB)" %
8258 (instance.name, target_node,
8259 self.tgt_free_mem, self.current_mem))
8260 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8261 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8264 rpcres.Raise("Cannot modify instance runtime memory")
8266 # First get the migration information from the remote node
8267 result = self.rpc.call_migration_info(source_node, instance)
8268 msg = result.fail_msg
8270 log_err = ("Failed fetching source migration information from %s: %s" %
8272 logging.error(log_err)
8273 raise errors.OpExecError(log_err)
8275 self.migration_info = migration_info = result.payload
8277 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8278 # Then switch the disks to master/master mode
8279 self._EnsureSecondary(target_node)
8280 self._GoStandalone()
8281 self._GoReconnect(True)
8282 self._WaitUntilSync()
8284 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8285 result = self.rpc.call_accept_instance(target_node,
8288 self.nodes_ip[target_node])
8290 msg = result.fail_msg
8292 logging.error("Instance pre-migration failed, trying to revert"
8293 " disk status: %s", msg)
8294 self.feedback_fn("Pre-migration failed, aborting")
8295 self._AbortMigration()
8296 self._RevertDiskStatus()
8297 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8298 (instance.name, msg))
8300 self.feedback_fn("* migrating instance to %s" % target_node)
8301 result = self.rpc.call_instance_migrate(source_node, instance,
8302 self.nodes_ip[target_node],
8304 msg = result.fail_msg
8306 logging.error("Instance migration failed, trying to revert"
8307 " disk status: %s", msg)
8308 self.feedback_fn("Migration failed, aborting")
8309 self._AbortMigration()
8310 self._RevertDiskStatus()
8311 raise errors.OpExecError("Could not migrate instance %s: %s" %
8312 (instance.name, msg))
8314 self.feedback_fn("* starting memory transfer")
8315 last_feedback = time.time()
8317 result = self.rpc.call_instance_get_migration_status(source_node,
8319 msg = result.fail_msg
8320 ms = result.payload # MigrationStatus instance
8321 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8322 logging.error("Instance migration failed, trying to revert"
8323 " disk status: %s", msg)
8324 self.feedback_fn("Migration failed, aborting")
8325 self._AbortMigration()
8326 self._RevertDiskStatus()
8327 raise errors.OpExecError("Could not migrate instance %s: %s" %
8328 (instance.name, msg))
8330 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8331 self.feedback_fn("* memory transfer complete")
8334 if (utils.TimeoutExpired(last_feedback,
8335 self._MIGRATION_FEEDBACK_INTERVAL) and
8336 ms.transferred_ram is not None):
8337 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8338 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8339 last_feedback = time.time()
8341 time.sleep(self._MIGRATION_POLL_INTERVAL)
8343 result = self.rpc.call_instance_finalize_migration_src(source_node,
8347 msg = result.fail_msg
8349 logging.error("Instance migration succeeded, but finalization failed"
8350 " on the source node: %s", msg)
8351 raise errors.OpExecError("Could not finalize instance migration: %s" %
8354 instance.primary_node = target_node
8356 # distribute new instance config to the other nodes
8357 self.cfg.Update(instance, self.feedback_fn)
8359 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8363 msg = result.fail_msg
8365 logging.error("Instance migration succeeded, but finalization failed"
8366 " on the target node: %s", msg)
8367 raise errors.OpExecError("Could not finalize instance migration: %s" %
8370 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8371 self._EnsureSecondary(source_node)
8372 self._WaitUntilSync()
8373 self._GoStandalone()
8374 self._GoReconnect(False)
8375 self._WaitUntilSync()
8377 # If the instance's disk template is `rbd' and there was a successful
8378 # migration, unmap the device from the source node.
8379 if self.instance.disk_template == constants.DT_RBD:
8380 disks = _ExpandCheckDisks(instance, instance.disks)
8381 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8383 result = self.rpc.call_blockdev_shutdown(source_node, disk)
8384 msg = result.fail_msg
8386 logging.error("Migration was successful, but couldn't unmap the"
8387 " block device %s on source node %s: %s",
8388 disk.iv_name, source_node, msg)
8389 logging.error("You need to unmap the device %s manually on %s",
8390 disk.iv_name, source_node)
8392 self.feedback_fn("* done")
8394 def _ExecFailover(self):
8395 """Failover an instance.
8397 The failover is done by shutting it down on its present node and
8398 starting it on the secondary.
8401 instance = self.instance
8402 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8404 source_node = instance.primary_node
8405 target_node = self.target_node
8407 if instance.admin_state == constants.ADMINST_UP:
8408 self.feedback_fn("* checking disk consistency between source and target")
8409 for (idx, dev) in enumerate(instance.disks):
8410 # for drbd, these are drbd over lvm
8411 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8412 if primary_node.offline:
8413 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8415 (primary_node.name, idx, target_node))
8416 elif not self.ignore_consistency:
8417 raise errors.OpExecError("Disk %s is degraded on target node,"
8418 " aborting failover" % idx)
8420 self.feedback_fn("* not checking disk consistency as instance is not"
8423 self.feedback_fn("* shutting down instance on source node")
8424 logging.info("Shutting down instance %s on node %s",
8425 instance.name, source_node)
8427 result = self.rpc.call_instance_shutdown(source_node, instance,
8428 self.shutdown_timeout)
8429 msg = result.fail_msg
8431 if self.ignore_consistency or primary_node.offline:
8432 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8433 " proceeding anyway; please make sure node"
8434 " %s is down; error details: %s",
8435 instance.name, source_node, source_node, msg)
8437 raise errors.OpExecError("Could not shutdown instance %s on"
8439 (instance.name, source_node, msg))
8441 self.feedback_fn("* deactivating the instance's disks on source node")
8442 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8443 raise errors.OpExecError("Can't shut down the instance's disks")
8445 instance.primary_node = target_node
8446 # distribute new instance config to the other nodes
8447 self.cfg.Update(instance, self.feedback_fn)
8449 # Only start the instance if it's marked as up
8450 if instance.admin_state == constants.ADMINST_UP:
8451 self.feedback_fn("* activating the instance's disks on target node %s" %
8453 logging.info("Starting instance %s on node %s",
8454 instance.name, target_node)
8456 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8457 ignore_secondaries=True)
8459 _ShutdownInstanceDisks(self.lu, instance)
8460 raise errors.OpExecError("Can't activate the instance's disks")
8462 self.feedback_fn("* starting the instance on the target node %s" %
8464 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8466 msg = result.fail_msg
8468 _ShutdownInstanceDisks(self.lu, instance)
8469 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8470 (instance.name, target_node, msg))
8472 def Exec(self, feedback_fn):
8473 """Perform the migration.
8476 self.feedback_fn = feedback_fn
8477 self.source_node = self.instance.primary_node
8479 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8480 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8481 self.target_node = self.instance.secondary_nodes[0]
8482 # Otherwise self.target_node has been populated either
8483 # directly, or through an iallocator.
8485 self.all_nodes = [self.source_node, self.target_node]
8486 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8487 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8490 feedback_fn("Failover instance %s" % self.instance.name)
8491 self._ExecFailover()
8493 feedback_fn("Migrating instance %s" % self.instance.name)
8496 return self._ExecCleanup()
8498 return self._ExecMigration()
8501 def _CreateBlockDev(lu, node, instance, device, force_create,
8503 """Create a tree of block devices on a given node.
8505 If this device type has to be created on secondaries, create it and
8508 If not, just recurse to children keeping the same 'force' value.
8510 @param lu: the lu on whose behalf we execute
8511 @param node: the node on which to create the device
8512 @type instance: L{objects.Instance}
8513 @param instance: the instance which owns the device
8514 @type device: L{objects.Disk}
8515 @param device: the device to create
8516 @type force_create: boolean
8517 @param force_create: whether to force creation of this device; this
8518 will be change to True whenever we find a device which has
8519 CreateOnSecondary() attribute
8520 @param info: the extra 'metadata' we should attach to the device
8521 (this will be represented as a LVM tag)
8522 @type force_open: boolean
8523 @param force_open: this parameter will be passes to the
8524 L{backend.BlockdevCreate} function where it specifies
8525 whether we run on primary or not, and it affects both
8526 the child assembly and the device own Open() execution
8529 if device.CreateOnSecondary():
8533 for child in device.children:
8534 _CreateBlockDev(lu, node, instance, child, force_create,
8537 if not force_create:
8540 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8543 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8544 """Create a single block device on a given node.
8546 This will not recurse over children of the device, so they must be
8549 @param lu: the lu on whose behalf we execute
8550 @param node: the node on which to create the device
8551 @type instance: L{objects.Instance}
8552 @param instance: the instance which owns the device
8553 @type device: L{objects.Disk}
8554 @param device: the device to create
8555 @param info: the extra 'metadata' we should attach to the device
8556 (this will be represented as a LVM tag)
8557 @type force_open: boolean
8558 @param force_open: this parameter will be passes to the
8559 L{backend.BlockdevCreate} function where it specifies
8560 whether we run on primary or not, and it affects both
8561 the child assembly and the device own Open() execution
8564 lu.cfg.SetDiskID(device, node)
8565 result = lu.rpc.call_blockdev_create(node, device, device.size,
8566 instance.name, force_open, info)
8567 result.Raise("Can't create block device %s on"
8568 " node %s for instance %s" % (device, node, instance.name))
8569 if device.physical_id is None:
8570 device.physical_id = result.payload
8573 def _GenerateUniqueNames(lu, exts):
8574 """Generate a suitable LV name.
8576 This will generate a logical volume name for the given instance.
8581 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8582 results.append("%s%s" % (new_id, val))
8586 def _ComputeLDParams(disk_template, disk_params):
8587 """Computes Logical Disk parameters from Disk Template parameters.
8589 @type disk_template: string
8590 @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8591 @type disk_params: dict
8592 @param disk_params: disk template parameters; dict(template_name -> parameters
8594 @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8595 contains the LD parameters of the node. The tree is flattened in-order.
8598 if disk_template not in constants.DISK_TEMPLATES:
8599 raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8602 dt_params = disk_params[disk_template]
8603 if disk_template == constants.DT_DRBD8:
8605 constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8606 constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8607 constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8608 constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8609 constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8610 constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8611 constants.LDP_DYNAMIC_RESYNC: dt_params[constants.DRBD_DYNAMIC_RESYNC],
8612 constants.LDP_PLAN_AHEAD: dt_params[constants.DRBD_PLAN_AHEAD],
8613 constants.LDP_FILL_TARGET: dt_params[constants.DRBD_FILL_TARGET],
8614 constants.LDP_DELAY_TARGET: dt_params[constants.DRBD_DELAY_TARGET],
8615 constants.LDP_MAX_RATE: dt_params[constants.DRBD_MAX_RATE],
8616 constants.LDP_MIN_RATE: dt_params[constants.DRBD_MIN_RATE],
8620 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8623 result.append(drbd_params)
8627 constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8630 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8632 result.append(data_params)
8636 constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8639 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8641 result.append(meta_params)
8643 elif (disk_template == constants.DT_FILE or
8644 disk_template == constants.DT_SHARED_FILE):
8645 result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8647 elif disk_template == constants.DT_PLAIN:
8649 constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8652 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8654 result.append(params)
8656 elif disk_template == constants.DT_BLOCK:
8657 result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8659 elif disk_template == constants.DT_RBD:
8661 constants.LDP_POOL: dt_params[constants.RBD_POOL]
8664 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_RBD],
8666 result.append(params)
8671 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8672 iv_name, p_minor, s_minor, drbd_params, data_params,
8674 """Generate a drbd8 device complete with its children.
8677 assert len(vgnames) == len(names) == 2
8678 port = lu.cfg.AllocatePort()
8679 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8681 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8682 logical_id=(vgnames[0], names[0]),
8684 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8685 logical_id=(vgnames[1], names[1]),
8687 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8688 logical_id=(primary, secondary, port,
8691 children=[dev_data, dev_meta],
8692 iv_name=iv_name, params=drbd_params)
8696 _DISK_TEMPLATE_NAME_PREFIX = {
8697 constants.DT_PLAIN: "",
8698 constants.DT_RBD: ".rbd",
8702 _DISK_TEMPLATE_DEVICE_TYPE = {
8703 constants.DT_PLAIN: constants.LD_LV,
8704 constants.DT_FILE: constants.LD_FILE,
8705 constants.DT_SHARED_FILE: constants.LD_FILE,
8706 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8707 constants.DT_RBD: constants.LD_RBD,
8711 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8712 secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8713 feedback_fn, disk_params,
8714 _req_file_storage=opcodes.RequireFileStorage,
8715 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8716 """Generate the entire disk layout for a given template type.
8719 #TODO: compute space requirements
8721 vgname = lu.cfg.GetVGName()
8722 disk_count = len(disk_info)
8724 ld_params = _ComputeLDParams(template_name, disk_params)
8726 if template_name == constants.DT_DISKLESS:
8728 elif template_name == constants.DT_DRBD8:
8729 drbd_params, data_params, meta_params = ld_params
8730 if len(secondary_nodes) != 1:
8731 raise errors.ProgrammerError("Wrong template configuration")
8732 remote_node = secondary_nodes[0]
8733 minors = lu.cfg.AllocateDRBDMinor(
8734 [primary_node, remote_node] * len(disk_info), instance_name)
8737 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8738 for i in range(disk_count)]):
8739 names.append(lv_prefix + "_data")
8740 names.append(lv_prefix + "_meta")
8741 for idx, disk in enumerate(disk_info):
8742 disk_index = idx + base_index
8743 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8744 data_vg = disk.get(constants.IDISK_VG, vgname)
8745 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8746 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8747 disk[constants.IDISK_SIZE],
8749 names[idx * 2:idx * 2 + 2],
8750 "disk/%d" % disk_index,
8751 minors[idx * 2], minors[idx * 2 + 1],
8752 drbd_params, data_params, meta_params)
8753 disk_dev.mode = disk[constants.IDISK_MODE]
8754 disks.append(disk_dev)
8757 raise errors.ProgrammerError("Wrong template configuration")
8759 if template_name == constants.DT_FILE:
8761 elif template_name == constants.DT_SHARED_FILE:
8762 _req_shr_file_storage()
8764 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8765 if name_prefix is None:
8768 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8769 (name_prefix, base_index + i)
8770 for i in range(disk_count)])
8772 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8774 if template_name == constants.DT_PLAIN:
8775 def logical_id_fn(idx, _, disk):
8776 vg = disk.get(constants.IDISK_VG, vgname)
8777 return (vg, names[idx])
8778 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8780 lambda _, disk_index, disk: (file_driver,
8781 "%s/disk%d" % (file_storage_dir,
8783 elif template_name == constants.DT_BLOCK:
8785 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8786 disk[constants.IDISK_ADOPT])
8787 elif template_name == constants.DT_RBD:
8788 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8790 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8792 for idx, disk in enumerate(disk_info):
8793 disk_index = idx + base_index
8794 size = disk[constants.IDISK_SIZE]
8795 feedback_fn("* disk %s, size %s" %
8796 (disk_index, utils.FormatUnit(size, "h")))
8797 disks.append(objects.Disk(dev_type=dev_type, size=size,
8798 logical_id=logical_id_fn(idx, disk_index, disk),
8799 iv_name="disk/%d" % disk_index,
8800 mode=disk[constants.IDISK_MODE],
8801 params=ld_params[0]))
8806 def _GetInstanceInfoText(instance):
8807 """Compute that text that should be added to the disk's metadata.
8810 return "originstname+%s" % instance.name
8813 def _CalcEta(time_taken, written, total_size):
8814 """Calculates the ETA based on size written and total size.
8816 @param time_taken: The time taken so far
8817 @param written: amount written so far
8818 @param total_size: The total size of data to be written
8819 @return: The remaining time in seconds
8822 avg_time = time_taken / float(written)
8823 return (total_size - written) * avg_time
8826 def _WipeDisks(lu, instance):
8827 """Wipes instance disks.
8829 @type lu: L{LogicalUnit}
8830 @param lu: the logical unit on whose behalf we execute
8831 @type instance: L{objects.Instance}
8832 @param instance: the instance whose disks we should create
8833 @return: the success of the wipe
8836 node = instance.primary_node
8838 for device in instance.disks:
8839 lu.cfg.SetDiskID(device, node)
8841 logging.info("Pause sync of instance %s disks", instance.name)
8842 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8844 for idx, success in enumerate(result.payload):
8846 logging.warn("pause-sync of instance %s for disks %d failed",
8850 for idx, device in enumerate(instance.disks):
8851 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8852 # MAX_WIPE_CHUNK at max
8853 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8854 constants.MIN_WIPE_CHUNK_PERCENT)
8855 # we _must_ make this an int, otherwise rounding errors will
8857 wipe_chunk_size = int(wipe_chunk_size)
8859 lu.LogInfo("* Wiping disk %d", idx)
8860 logging.info("Wiping disk %d for instance %s, node %s using"
8861 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8866 start_time = time.time()
8868 while offset < size:
8869 wipe_size = min(wipe_chunk_size, size - offset)
8870 logging.debug("Wiping disk %d, offset %s, chunk %s",
8871 idx, offset, wipe_size)
8872 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8873 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8874 (idx, offset, wipe_size))
8877 if now - last_output >= 60:
8878 eta = _CalcEta(now - start_time, offset, size)
8879 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8880 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8883 logging.info("Resume sync of instance %s disks", instance.name)
8885 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8887 for idx, success in enumerate(result.payload):
8889 lu.LogWarning("Resume sync of disk %d failed, please have a"
8890 " look at the status and troubleshoot the issue", idx)
8891 logging.warn("resume-sync of instance %s for disks %d failed",
8895 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8896 """Create all disks for an instance.
8898 This abstracts away some work from AddInstance.
8900 @type lu: L{LogicalUnit}
8901 @param lu: the logical unit on whose behalf we execute
8902 @type instance: L{objects.Instance}
8903 @param instance: the instance whose disks we should create
8905 @param to_skip: list of indices to skip
8906 @type target_node: string
8907 @param target_node: if passed, overrides the target node for creation
8909 @return: the success of the creation
8912 info = _GetInstanceInfoText(instance)
8913 if target_node is None:
8914 pnode = instance.primary_node
8915 all_nodes = instance.all_nodes
8920 if instance.disk_template in constants.DTS_FILEBASED:
8921 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8922 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8924 result.Raise("Failed to create directory '%s' on"
8925 " node %s" % (file_storage_dir, pnode))
8927 # Note: this needs to be kept in sync with adding of disks in
8928 # LUInstanceSetParams
8929 for idx, device in enumerate(instance.disks):
8930 if to_skip and idx in to_skip:
8932 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
8934 for node in all_nodes:
8935 f_create = node == pnode
8936 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8939 def _RemoveDisks(lu, instance, target_node=None):
8940 """Remove all disks for an instance.
8942 This abstracts away some work from `AddInstance()` and
8943 `RemoveInstance()`. Note that in case some of the devices couldn't
8944 be removed, the removal will continue with the other ones (compare
8945 with `_CreateDisks()`).
8947 @type lu: L{LogicalUnit}
8948 @param lu: the logical unit on whose behalf we execute
8949 @type instance: L{objects.Instance}
8950 @param instance: the instance whose disks we should remove
8951 @type target_node: string
8952 @param target_node: used to override the node on which to remove the disks
8954 @return: the success of the removal
8957 logging.info("Removing block devices for instance %s", instance.name)
8960 for (idx, device) in enumerate(instance.disks):
8962 edata = [(target_node, device)]
8964 edata = device.ComputeNodeTree(instance.primary_node)
8965 for node, disk in edata:
8966 lu.cfg.SetDiskID(disk, node)
8967 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8969 lu.LogWarning("Could not remove disk %s on node %s,"
8970 " continuing anyway: %s", idx, node, msg)
8973 # if this is a DRBD disk, return its port to the pool
8974 if device.dev_type in constants.LDS_DRBD:
8975 tcp_port = device.logical_id[2]
8976 lu.cfg.AddTcpUdpPort(tcp_port)
8978 if instance.disk_template == constants.DT_FILE:
8979 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8983 tgt = instance.primary_node
8984 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8986 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8987 file_storage_dir, instance.primary_node, result.fail_msg)
8993 def _ComputeDiskSizePerVG(disk_template, disks):
8994 """Compute disk size requirements in the volume group
8997 def _compute(disks, payload):
8998 """Universal algorithm.
9003 vgs[disk[constants.IDISK_VG]] = \
9004 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9008 # Required free disk space as a function of disk and swap space
9010 constants.DT_DISKLESS: {},
9011 constants.DT_PLAIN: _compute(disks, 0),
9012 # 128 MB are added for drbd metadata for each disk
9013 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9014 constants.DT_FILE: {},
9015 constants.DT_SHARED_FILE: {},
9018 if disk_template not in req_size_dict:
9019 raise errors.ProgrammerError("Disk template '%s' size requirement"
9020 " is unknown" % disk_template)
9022 return req_size_dict[disk_template]
9025 def _ComputeDiskSize(disk_template, disks):
9026 """Compute disk size requirements in the volume group
9029 # Required free disk space as a function of disk and swap space
9031 constants.DT_DISKLESS: None,
9032 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9033 # 128 MB are added for drbd metadata for each disk
9035 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9036 constants.DT_FILE: None,
9037 constants.DT_SHARED_FILE: 0,
9038 constants.DT_BLOCK: 0,
9039 constants.DT_RBD: 0,
9042 if disk_template not in req_size_dict:
9043 raise errors.ProgrammerError("Disk template '%s' size requirement"
9044 " is unknown" % disk_template)
9046 return req_size_dict[disk_template]
9049 def _FilterVmNodes(lu, nodenames):
9050 """Filters out non-vm_capable nodes from a list.
9052 @type lu: L{LogicalUnit}
9053 @param lu: the logical unit for which we check
9054 @type nodenames: list
9055 @param nodenames: the list of nodes on which we should check
9057 @return: the list of vm-capable nodes
9060 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9061 return [name for name in nodenames if name not in vm_nodes]
9064 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9065 """Hypervisor parameter validation.
9067 This function abstract the hypervisor parameter validation to be
9068 used in both instance create and instance modify.
9070 @type lu: L{LogicalUnit}
9071 @param lu: the logical unit for which we check
9072 @type nodenames: list
9073 @param nodenames: the list of nodes on which we should check
9074 @type hvname: string
9075 @param hvname: the name of the hypervisor we should use
9076 @type hvparams: dict
9077 @param hvparams: the parameters which we need to check
9078 @raise errors.OpPrereqError: if the parameters are not valid
9081 nodenames = _FilterVmNodes(lu, nodenames)
9083 cluster = lu.cfg.GetClusterInfo()
9084 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9086 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9087 for node in nodenames:
9091 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9094 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9095 """OS parameters validation.
9097 @type lu: L{LogicalUnit}
9098 @param lu: the logical unit for which we check
9099 @type required: boolean
9100 @param required: whether the validation should fail if the OS is not
9102 @type nodenames: list
9103 @param nodenames: the list of nodes on which we should check
9104 @type osname: string
9105 @param osname: the name of the hypervisor we should use
9106 @type osparams: dict
9107 @param osparams: the parameters which we need to check
9108 @raise errors.OpPrereqError: if the parameters are not valid
9111 nodenames = _FilterVmNodes(lu, nodenames)
9112 result = lu.rpc.call_os_validate(nodenames, required, osname,
9113 [constants.OS_VALIDATE_PARAMETERS],
9115 for node, nres in result.items():
9116 # we don't check for offline cases since this should be run only
9117 # against the master node and/or an instance's nodes
9118 nres.Raise("OS Parameters validation failed on node %s" % node)
9119 if not nres.payload:
9120 lu.LogInfo("OS %s not found on node %s, validation skipped",
9124 class LUInstanceCreate(LogicalUnit):
9125 """Create an instance.
9128 HPATH = "instance-add"
9129 HTYPE = constants.HTYPE_INSTANCE
9132 def CheckArguments(self):
9136 # do not require name_check to ease forward/backward compatibility
9138 if self.op.no_install and self.op.start:
9139 self.LogInfo("No-installation mode selected, disabling startup")
9140 self.op.start = False
9141 # validate/normalize the instance name
9142 self.op.instance_name = \
9143 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9145 if self.op.ip_check and not self.op.name_check:
9146 # TODO: make the ip check more flexible and not depend on the name check
9147 raise errors.OpPrereqError("Cannot do IP address check without a name"
9148 " check", errors.ECODE_INVAL)
9150 # check nics' parameter names
9151 for nic in self.op.nics:
9152 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9154 # check disks. parameter names and consistent adopt/no-adopt strategy
9155 has_adopt = has_no_adopt = False
9156 for disk in self.op.disks:
9157 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9158 if constants.IDISK_ADOPT in disk:
9162 if has_adopt and has_no_adopt:
9163 raise errors.OpPrereqError("Either all disks are adopted or none is",
9166 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9167 raise errors.OpPrereqError("Disk adoption is not supported for the"
9168 " '%s' disk template" %
9169 self.op.disk_template,
9171 if self.op.iallocator is not None:
9172 raise errors.OpPrereqError("Disk adoption not allowed with an"
9173 " iallocator script", errors.ECODE_INVAL)
9174 if self.op.mode == constants.INSTANCE_IMPORT:
9175 raise errors.OpPrereqError("Disk adoption not allowed for"
9176 " instance import", errors.ECODE_INVAL)
9178 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9179 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9180 " but no 'adopt' parameter given" %
9181 self.op.disk_template,
9184 self.adopt_disks = has_adopt
9186 # instance name verification
9187 if self.op.name_check:
9188 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9189 self.op.instance_name = self.hostname1.name
9190 # used in CheckPrereq for ip ping check
9191 self.check_ip = self.hostname1.ip
9193 self.check_ip = None
9195 # file storage checks
9196 if (self.op.file_driver and
9197 not self.op.file_driver in constants.FILE_DRIVER):
9198 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9199 self.op.file_driver, errors.ECODE_INVAL)
9201 if self.op.disk_template == constants.DT_FILE:
9202 opcodes.RequireFileStorage()
9203 elif self.op.disk_template == constants.DT_SHARED_FILE:
9204 opcodes.RequireSharedFileStorage()
9206 ### Node/iallocator related checks
9207 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9209 if self.op.pnode is not None:
9210 if self.op.disk_template in constants.DTS_INT_MIRROR:
9211 if self.op.snode is None:
9212 raise errors.OpPrereqError("The networked disk templates need"
9213 " a mirror node", errors.ECODE_INVAL)
9215 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9217 self.op.snode = None
9219 self._cds = _GetClusterDomainSecret()
9221 if self.op.mode == constants.INSTANCE_IMPORT:
9222 # On import force_variant must be True, because if we forced it at
9223 # initial install, our only chance when importing it back is that it
9225 self.op.force_variant = True
9227 if self.op.no_install:
9228 self.LogInfo("No-installation mode has no effect during import")
9230 elif self.op.mode == constants.INSTANCE_CREATE:
9231 if self.op.os_type is None:
9232 raise errors.OpPrereqError("No guest OS specified",
9234 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9235 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9236 " installation" % self.op.os_type,
9238 if self.op.disk_template is None:
9239 raise errors.OpPrereqError("No disk template specified",
9242 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9243 # Check handshake to ensure both clusters have the same domain secret
9244 src_handshake = self.op.source_handshake
9245 if not src_handshake:
9246 raise errors.OpPrereqError("Missing source handshake",
9249 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9252 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9255 # Load and check source CA
9256 self.source_x509_ca_pem = self.op.source_x509_ca
9257 if not self.source_x509_ca_pem:
9258 raise errors.OpPrereqError("Missing source X509 CA",
9262 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9264 except OpenSSL.crypto.Error, err:
9265 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9266 (err, ), errors.ECODE_INVAL)
9268 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9269 if errcode is not None:
9270 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9273 self.source_x509_ca = cert
9275 src_instance_name = self.op.source_instance_name
9276 if not src_instance_name:
9277 raise errors.OpPrereqError("Missing source instance name",
9280 self.source_instance_name = \
9281 netutils.GetHostname(name=src_instance_name).name
9284 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9285 self.op.mode, errors.ECODE_INVAL)
9287 def ExpandNames(self):
9288 """ExpandNames for CreateInstance.
9290 Figure out the right locks for instance creation.
9293 self.needed_locks = {}
9295 instance_name = self.op.instance_name
9296 # this is just a preventive check, but someone might still add this
9297 # instance in the meantime, and creation will fail at lock-add time
9298 if instance_name in self.cfg.GetInstanceList():
9299 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9300 instance_name, errors.ECODE_EXISTS)
9302 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9304 if self.op.iallocator:
9305 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9306 # specifying a group on instance creation and then selecting nodes from
9308 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9309 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9311 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9312 nodelist = [self.op.pnode]
9313 if self.op.snode is not None:
9314 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9315 nodelist.append(self.op.snode)
9316 self.needed_locks[locking.LEVEL_NODE] = nodelist
9317 # Lock resources of instance's primary and secondary nodes (copy to
9318 # prevent accidential modification)
9319 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9321 # in case of import lock the source node too
9322 if self.op.mode == constants.INSTANCE_IMPORT:
9323 src_node = self.op.src_node
9324 src_path = self.op.src_path
9326 if src_path is None:
9327 self.op.src_path = src_path = self.op.instance_name
9329 if src_node is None:
9330 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9331 self.op.src_node = None
9332 if os.path.isabs(src_path):
9333 raise errors.OpPrereqError("Importing an instance from a path"
9334 " requires a source node option",
9337 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9338 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9339 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9340 if not os.path.isabs(src_path):
9341 self.op.src_path = src_path = \
9342 utils.PathJoin(constants.EXPORT_DIR, src_path)
9344 def _RunAllocator(self):
9345 """Run the allocator based on input opcode.
9348 nics = [n.ToDict() for n in self.nics]
9349 ial = IAllocator(self.cfg, self.rpc,
9350 mode=constants.IALLOCATOR_MODE_ALLOC,
9351 name=self.op.instance_name,
9352 disk_template=self.op.disk_template,
9355 vcpus=self.be_full[constants.BE_VCPUS],
9356 memory=self.be_full[constants.BE_MAXMEM],
9357 spindle_usage=self.be_full[constants.BE_SPINDLE_USAGE],
9360 hypervisor=self.op.hypervisor,
9363 ial.Run(self.op.iallocator)
9366 raise errors.OpPrereqError("Can't compute nodes using"
9367 " iallocator '%s': %s" %
9368 (self.op.iallocator, ial.info),
9370 if len(ial.result) != ial.required_nodes:
9371 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9372 " of nodes (%s), required %s" %
9373 (self.op.iallocator, len(ial.result),
9374 ial.required_nodes), errors.ECODE_FAULT)
9375 self.op.pnode = ial.result[0]
9376 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9377 self.op.instance_name, self.op.iallocator,
9378 utils.CommaJoin(ial.result))
9379 if ial.required_nodes == 2:
9380 self.op.snode = ial.result[1]
9382 def BuildHooksEnv(self):
9385 This runs on master, primary and secondary nodes of the instance.
9389 "ADD_MODE": self.op.mode,
9391 if self.op.mode == constants.INSTANCE_IMPORT:
9392 env["SRC_NODE"] = self.op.src_node
9393 env["SRC_PATH"] = self.op.src_path
9394 env["SRC_IMAGES"] = self.src_images
9396 env.update(_BuildInstanceHookEnv(
9397 name=self.op.instance_name,
9398 primary_node=self.op.pnode,
9399 secondary_nodes=self.secondaries,
9400 status=self.op.start,
9401 os_type=self.op.os_type,
9402 minmem=self.be_full[constants.BE_MINMEM],
9403 maxmem=self.be_full[constants.BE_MAXMEM],
9404 vcpus=self.be_full[constants.BE_VCPUS],
9405 nics=_NICListToTuple(self, self.nics),
9406 disk_template=self.op.disk_template,
9407 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9408 for d in self.disks],
9411 hypervisor_name=self.op.hypervisor,
9417 def BuildHooksNodes(self):
9418 """Build hooks nodes.
9421 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9424 def _ReadExportInfo(self):
9425 """Reads the export information from disk.
9427 It will override the opcode source node and path with the actual
9428 information, if these two were not specified before.
9430 @return: the export information
9433 assert self.op.mode == constants.INSTANCE_IMPORT
9435 src_node = self.op.src_node
9436 src_path = self.op.src_path
9438 if src_node is None:
9439 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9440 exp_list = self.rpc.call_export_list(locked_nodes)
9442 for node in exp_list:
9443 if exp_list[node].fail_msg:
9445 if src_path in exp_list[node].payload:
9447 self.op.src_node = src_node = node
9448 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9452 raise errors.OpPrereqError("No export found for relative path %s" %
9453 src_path, errors.ECODE_INVAL)
9455 _CheckNodeOnline(self, src_node)
9456 result = self.rpc.call_export_info(src_node, src_path)
9457 result.Raise("No export or invalid export found in dir %s" % src_path)
9459 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9460 if not export_info.has_section(constants.INISECT_EXP):
9461 raise errors.ProgrammerError("Corrupted export config",
9462 errors.ECODE_ENVIRON)
9464 ei_version = export_info.get(constants.INISECT_EXP, "version")
9465 if (int(ei_version) != constants.EXPORT_VERSION):
9466 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9467 (ei_version, constants.EXPORT_VERSION),
9468 errors.ECODE_ENVIRON)
9471 def _ReadExportParams(self, einfo):
9472 """Use export parameters as defaults.
9474 In case the opcode doesn't specify (as in override) some instance
9475 parameters, then try to use them from the export information, if
9479 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9481 if self.op.disk_template is None:
9482 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9483 self.op.disk_template = einfo.get(constants.INISECT_INS,
9485 if self.op.disk_template not in constants.DISK_TEMPLATES:
9486 raise errors.OpPrereqError("Disk template specified in configuration"
9487 " file is not one of the allowed values:"
9488 " %s" % " ".join(constants.DISK_TEMPLATES))
9490 raise errors.OpPrereqError("No disk template specified and the export"
9491 " is missing the disk_template information",
9494 if not self.op.disks:
9496 # TODO: import the disk iv_name too
9497 for idx in range(constants.MAX_DISKS):
9498 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9499 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9500 disks.append({constants.IDISK_SIZE: disk_sz})
9501 self.op.disks = disks
9502 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9503 raise errors.OpPrereqError("No disk info specified and the export"
9504 " is missing the disk information",
9507 if not self.op.nics:
9509 for idx in range(constants.MAX_NICS):
9510 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9512 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9513 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9520 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9521 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9523 if (self.op.hypervisor is None and
9524 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9525 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9527 if einfo.has_section(constants.INISECT_HYP):
9528 # use the export parameters but do not override the ones
9529 # specified by the user
9530 for name, value in einfo.items(constants.INISECT_HYP):
9531 if name not in self.op.hvparams:
9532 self.op.hvparams[name] = value
9534 if einfo.has_section(constants.INISECT_BEP):
9535 # use the parameters, without overriding
9536 for name, value in einfo.items(constants.INISECT_BEP):
9537 if name not in self.op.beparams:
9538 self.op.beparams[name] = value
9539 # Compatibility for the old "memory" be param
9540 if name == constants.BE_MEMORY:
9541 if constants.BE_MAXMEM not in self.op.beparams:
9542 self.op.beparams[constants.BE_MAXMEM] = value
9543 if constants.BE_MINMEM not in self.op.beparams:
9544 self.op.beparams[constants.BE_MINMEM] = value
9546 # try to read the parameters old style, from the main section
9547 for name in constants.BES_PARAMETERS:
9548 if (name not in self.op.beparams and
9549 einfo.has_option(constants.INISECT_INS, name)):
9550 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9552 if einfo.has_section(constants.INISECT_OSP):
9553 # use the parameters, without overriding
9554 for name, value in einfo.items(constants.INISECT_OSP):
9555 if name not in self.op.osparams:
9556 self.op.osparams[name] = value
9558 def _RevertToDefaults(self, cluster):
9559 """Revert the instance parameters to the default values.
9563 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9564 for name in self.op.hvparams.keys():
9565 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9566 del self.op.hvparams[name]
9568 be_defs = cluster.SimpleFillBE({})
9569 for name in self.op.beparams.keys():
9570 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9571 del self.op.beparams[name]
9573 nic_defs = cluster.SimpleFillNIC({})
9574 for nic in self.op.nics:
9575 for name in constants.NICS_PARAMETERS:
9576 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9579 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9580 for name in self.op.osparams.keys():
9581 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9582 del self.op.osparams[name]
9584 def _CalculateFileStorageDir(self):
9585 """Calculate final instance file storage dir.
9588 # file storage dir calculation/check
9589 self.instance_file_storage_dir = None
9590 if self.op.disk_template in constants.DTS_FILEBASED:
9591 # build the full file storage dir path
9594 if self.op.disk_template == constants.DT_SHARED_FILE:
9595 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9597 get_fsd_fn = self.cfg.GetFileStorageDir
9599 cfg_storagedir = get_fsd_fn()
9600 if not cfg_storagedir:
9601 raise errors.OpPrereqError("Cluster file storage dir not defined")
9602 joinargs.append(cfg_storagedir)
9604 if self.op.file_storage_dir is not None:
9605 joinargs.append(self.op.file_storage_dir)
9607 joinargs.append(self.op.instance_name)
9609 # pylint: disable=W0142
9610 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9612 def CheckPrereq(self): # pylint: disable=R0914
9613 """Check prerequisites.
9616 self._CalculateFileStorageDir()
9618 if self.op.mode == constants.INSTANCE_IMPORT:
9619 export_info = self._ReadExportInfo()
9620 self._ReadExportParams(export_info)
9622 if (not self.cfg.GetVGName() and
9623 self.op.disk_template not in constants.DTS_NOT_LVM):
9624 raise errors.OpPrereqError("Cluster does not support lvm-based"
9625 " instances", errors.ECODE_STATE)
9627 if (self.op.hypervisor is None or
9628 self.op.hypervisor == constants.VALUE_AUTO):
9629 self.op.hypervisor = self.cfg.GetHypervisorType()
9631 cluster = self.cfg.GetClusterInfo()
9632 enabled_hvs = cluster.enabled_hypervisors
9633 if self.op.hypervisor not in enabled_hvs:
9634 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9635 " cluster (%s)" % (self.op.hypervisor,
9636 ",".join(enabled_hvs)),
9639 # Check tag validity
9640 for tag in self.op.tags:
9641 objects.TaggableObject.ValidateTag(tag)
9643 # check hypervisor parameter syntax (locally)
9644 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9645 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9647 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9648 hv_type.CheckParameterSyntax(filled_hvp)
9649 self.hv_full = filled_hvp
9650 # check that we don't specify global parameters on an instance
9651 _CheckGlobalHvParams(self.op.hvparams)
9653 # fill and remember the beparams dict
9654 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9655 for param, value in self.op.beparams.iteritems():
9656 if value == constants.VALUE_AUTO:
9657 self.op.beparams[param] = default_beparams[param]
9658 objects.UpgradeBeParams(self.op.beparams)
9659 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9660 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9662 # build os parameters
9663 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9665 # now that hvp/bep are in final format, let's reset to defaults,
9667 if self.op.identify_defaults:
9668 self._RevertToDefaults(cluster)
9672 for idx, nic in enumerate(self.op.nics):
9673 nic_mode_req = nic.get(constants.INIC_MODE, None)
9674 nic_mode = nic_mode_req
9675 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9676 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9678 # in routed mode, for the first nic, the default ip is 'auto'
9679 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9680 default_ip_mode = constants.VALUE_AUTO
9682 default_ip_mode = constants.VALUE_NONE
9684 # ip validity checks
9685 ip = nic.get(constants.INIC_IP, default_ip_mode)
9686 if ip is None or ip.lower() == constants.VALUE_NONE:
9688 elif ip.lower() == constants.VALUE_AUTO:
9689 if not self.op.name_check:
9690 raise errors.OpPrereqError("IP address set to auto but name checks"
9691 " have been skipped",
9693 nic_ip = self.hostname1.ip
9695 if not netutils.IPAddress.IsValid(ip):
9696 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9700 # TODO: check the ip address for uniqueness
9701 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9702 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9705 # MAC address verification
9706 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9707 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9708 mac = utils.NormalizeAndValidateMac(mac)
9711 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9712 except errors.ReservationError:
9713 raise errors.OpPrereqError("MAC address %s already in use"
9714 " in cluster" % mac,
9715 errors.ECODE_NOTUNIQUE)
9717 # Build nic parameters
9718 link = nic.get(constants.INIC_LINK, None)
9719 if link == constants.VALUE_AUTO:
9720 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9723 nicparams[constants.NIC_MODE] = nic_mode
9725 nicparams[constants.NIC_LINK] = link
9727 check_params = cluster.SimpleFillNIC(nicparams)
9728 objects.NIC.CheckParameterSyntax(check_params)
9729 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9731 # disk checks/pre-build
9732 default_vg = self.cfg.GetVGName()
9734 for disk in self.op.disks:
9735 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9736 if mode not in constants.DISK_ACCESS_SET:
9737 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9738 mode, errors.ECODE_INVAL)
9739 size = disk.get(constants.IDISK_SIZE, None)
9741 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9744 except (TypeError, ValueError):
9745 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9748 data_vg = disk.get(constants.IDISK_VG, default_vg)
9750 constants.IDISK_SIZE: size,
9751 constants.IDISK_MODE: mode,
9752 constants.IDISK_VG: data_vg,
9754 if constants.IDISK_METAVG in disk:
9755 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9756 if constants.IDISK_ADOPT in disk:
9757 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9758 self.disks.append(new_disk)
9760 if self.op.mode == constants.INSTANCE_IMPORT:
9762 for idx in range(len(self.disks)):
9763 option = "disk%d_dump" % idx
9764 if export_info.has_option(constants.INISECT_INS, option):
9765 # FIXME: are the old os-es, disk sizes, etc. useful?
9766 export_name = export_info.get(constants.INISECT_INS, option)
9767 image = utils.PathJoin(self.op.src_path, export_name)
9768 disk_images.append(image)
9770 disk_images.append(False)
9772 self.src_images = disk_images
9774 old_name = export_info.get(constants.INISECT_INS, "name")
9775 if self.op.instance_name == old_name:
9776 for idx, nic in enumerate(self.nics):
9777 if nic.mac == constants.VALUE_AUTO:
9778 nic_mac_ini = "nic%d_mac" % idx
9779 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9781 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9783 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9784 if self.op.ip_check:
9785 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9786 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9787 (self.check_ip, self.op.instance_name),
9788 errors.ECODE_NOTUNIQUE)
9790 #### mac address generation
9791 # By generating here the mac address both the allocator and the hooks get
9792 # the real final mac address rather than the 'auto' or 'generate' value.
9793 # There is a race condition between the generation and the instance object
9794 # creation, which means that we know the mac is valid now, but we're not
9795 # sure it will be when we actually add the instance. If things go bad
9796 # adding the instance will abort because of a duplicate mac, and the
9797 # creation job will fail.
9798 for nic in self.nics:
9799 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9800 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9804 if self.op.iallocator is not None:
9805 self._RunAllocator()
9807 # Release all unneeded node locks
9808 _ReleaseLocks(self, locking.LEVEL_NODE,
9809 keep=filter(None, [self.op.pnode, self.op.snode,
9811 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9812 keep=filter(None, [self.op.pnode, self.op.snode,
9815 #### node related checks
9817 # check primary node
9818 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9819 assert self.pnode is not None, \
9820 "Cannot retrieve locked node %s" % self.op.pnode
9822 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9823 pnode.name, errors.ECODE_STATE)
9825 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9826 pnode.name, errors.ECODE_STATE)
9827 if not pnode.vm_capable:
9828 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9829 " '%s'" % pnode.name, errors.ECODE_STATE)
9831 self.secondaries = []
9833 # mirror node verification
9834 if self.op.disk_template in constants.DTS_INT_MIRROR:
9835 if self.op.snode == pnode.name:
9836 raise errors.OpPrereqError("The secondary node cannot be the"
9837 " primary node", errors.ECODE_INVAL)
9838 _CheckNodeOnline(self, self.op.snode)
9839 _CheckNodeNotDrained(self, self.op.snode)
9840 _CheckNodeVmCapable(self, self.op.snode)
9841 self.secondaries.append(self.op.snode)
9843 snode = self.cfg.GetNodeInfo(self.op.snode)
9844 if pnode.group != snode.group:
9845 self.LogWarning("The primary and secondary nodes are in two"
9846 " different node groups; the disk parameters"
9847 " from the first disk's node group will be"
9850 nodenames = [pnode.name] + self.secondaries
9852 # Verify instance specs
9853 spindle_use = self.be_full.get(constants.BE_SPINDLE_USAGE, None)
9855 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9856 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9857 constants.ISPEC_DISK_COUNT: len(self.disks),
9858 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9859 constants.ISPEC_NIC_COUNT: len(self.nics),
9860 constants.ISPEC_SPINDLE_USE: spindle_use,
9863 group_info = self.cfg.GetNodeGroup(pnode.group)
9864 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9865 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9866 if not self.op.ignore_ipolicy and res:
9867 raise errors.OpPrereqError(("Instance allocation to group %s violates"
9868 " policy: %s") % (pnode.group,
9869 utils.CommaJoin(res)),
9872 # disk parameters (not customizable at instance or node level)
9873 # just use the primary node parameters, ignoring the secondary.
9874 self.diskparams = group_info.diskparams
9876 if not self.adopt_disks:
9877 if self.op.disk_template == constants.DT_RBD:
9878 # _CheckRADOSFreeSpace() is just a placeholder.
9879 # Any function that checks prerequisites can be placed here.
9880 # Check if there is enough space on the RADOS cluster.
9881 _CheckRADOSFreeSpace()
9883 # Check lv size requirements, if not adopting
9884 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9885 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9887 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9888 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9889 disk[constants.IDISK_ADOPT])
9890 for disk in self.disks])
9891 if len(all_lvs) != len(self.disks):
9892 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9894 for lv_name in all_lvs:
9896 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9897 # to ReserveLV uses the same syntax
9898 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9899 except errors.ReservationError:
9900 raise errors.OpPrereqError("LV named %s used by another instance" %
9901 lv_name, errors.ECODE_NOTUNIQUE)
9903 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9904 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9906 node_lvs = self.rpc.call_lv_list([pnode.name],
9907 vg_names.payload.keys())[pnode.name]
9908 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9909 node_lvs = node_lvs.payload
9911 delta = all_lvs.difference(node_lvs.keys())
9913 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9914 utils.CommaJoin(delta),
9916 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9918 raise errors.OpPrereqError("Online logical volumes found, cannot"
9919 " adopt: %s" % utils.CommaJoin(online_lvs),
9921 # update the size of disk based on what is found
9922 for dsk in self.disks:
9923 dsk[constants.IDISK_SIZE] = \
9924 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9925 dsk[constants.IDISK_ADOPT])][0]))
9927 elif self.op.disk_template == constants.DT_BLOCK:
9928 # Normalize and de-duplicate device paths
9929 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9930 for disk in self.disks])
9931 if len(all_disks) != len(self.disks):
9932 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9934 baddisks = [d for d in all_disks
9935 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9937 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9938 " cannot be adopted" %
9939 (", ".join(baddisks),
9940 constants.ADOPTABLE_BLOCKDEV_ROOT),
9943 node_disks = self.rpc.call_bdev_sizes([pnode.name],
9944 list(all_disks))[pnode.name]
9945 node_disks.Raise("Cannot get block device information from node %s" %
9947 node_disks = node_disks.payload
9948 delta = all_disks.difference(node_disks.keys())
9950 raise errors.OpPrereqError("Missing block device(s): %s" %
9951 utils.CommaJoin(delta),
9953 for dsk in self.disks:
9954 dsk[constants.IDISK_SIZE] = \
9955 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9957 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9959 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9960 # check OS parameters (remotely)
9961 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9963 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9965 # memory check on primary node
9966 #TODO(dynmem): use MINMEM for checking
9968 _CheckNodeFreeMemory(self, self.pnode.name,
9969 "creating instance %s" % self.op.instance_name,
9970 self.be_full[constants.BE_MAXMEM],
9973 self.dry_run_result = list(nodenames)
9975 def Exec(self, feedback_fn):
9976 """Create and add the instance to the cluster.
9979 instance = self.op.instance_name
9980 pnode_name = self.pnode.name
9982 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9983 self.owned_locks(locking.LEVEL_NODE)), \
9984 "Node locks differ from node resource locks"
9986 ht_kind = self.op.hypervisor
9987 if ht_kind in constants.HTS_REQ_PORT:
9988 network_port = self.cfg.AllocatePort()
9992 disks = _GenerateDiskTemplate(self,
9993 self.op.disk_template,
9994 instance, pnode_name,
9997 self.instance_file_storage_dir,
9998 self.op.file_driver,
10003 iobj = objects.Instance(name=instance, os=self.op.os_type,
10004 primary_node=pnode_name,
10005 nics=self.nics, disks=disks,
10006 disk_template=self.op.disk_template,
10007 admin_state=constants.ADMINST_DOWN,
10008 network_port=network_port,
10009 beparams=self.op.beparams,
10010 hvparams=self.op.hvparams,
10011 hypervisor=self.op.hypervisor,
10012 osparams=self.op.osparams,
10016 for tag in self.op.tags:
10019 if self.adopt_disks:
10020 if self.op.disk_template == constants.DT_PLAIN:
10021 # rename LVs to the newly-generated names; we need to construct
10022 # 'fake' LV disks with the old data, plus the new unique_id
10023 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10025 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10026 rename_to.append(t_dsk.logical_id)
10027 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10028 self.cfg.SetDiskID(t_dsk, pnode_name)
10029 result = self.rpc.call_blockdev_rename(pnode_name,
10030 zip(tmp_disks, rename_to))
10031 result.Raise("Failed to rename adoped LVs")
10033 feedback_fn("* creating instance disks...")
10035 _CreateDisks(self, iobj)
10036 except errors.OpExecError:
10037 self.LogWarning("Device creation failed, reverting...")
10039 _RemoveDisks(self, iobj)
10041 self.cfg.ReleaseDRBDMinors(instance)
10044 feedback_fn("adding instance %s to cluster config" % instance)
10046 self.cfg.AddInstance(iobj, self.proc.GetECId())
10048 # Declare that we don't want to remove the instance lock anymore, as we've
10049 # added the instance to the config
10050 del self.remove_locks[locking.LEVEL_INSTANCE]
10052 if self.op.mode == constants.INSTANCE_IMPORT:
10053 # Release unused nodes
10054 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10056 # Release all nodes
10057 _ReleaseLocks(self, locking.LEVEL_NODE)
10060 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10061 feedback_fn("* wiping instance disks...")
10063 _WipeDisks(self, iobj)
10064 except errors.OpExecError, err:
10065 logging.exception("Wiping disks failed")
10066 self.LogWarning("Wiping instance disks failed (%s)", err)
10070 # Something is already wrong with the disks, don't do anything else
10072 elif self.op.wait_for_sync:
10073 disk_abort = not _WaitForSync(self, iobj)
10074 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10075 # make sure the disks are not degraded (still sync-ing is ok)
10076 feedback_fn("* checking mirrors status")
10077 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10082 _RemoveDisks(self, iobj)
10083 self.cfg.RemoveInstance(iobj.name)
10084 # Make sure the instance lock gets removed
10085 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10086 raise errors.OpExecError("There are some degraded disks for"
10089 # Release all node resource locks
10090 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10092 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10093 if self.op.mode == constants.INSTANCE_CREATE:
10094 if not self.op.no_install:
10095 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10096 not self.op.wait_for_sync)
10098 feedback_fn("* pausing disk sync to install instance OS")
10099 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10101 for idx, success in enumerate(result.payload):
10103 logging.warn("pause-sync of instance %s for disk %d failed",
10106 feedback_fn("* running the instance OS create scripts...")
10107 # FIXME: pass debug option from opcode to backend
10109 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10110 self.op.debug_level)
10112 feedback_fn("* resuming disk sync")
10113 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10115 for idx, success in enumerate(result.payload):
10117 logging.warn("resume-sync of instance %s for disk %d failed",
10120 os_add_result.Raise("Could not add os for instance %s"
10121 " on node %s" % (instance, pnode_name))
10123 elif self.op.mode == constants.INSTANCE_IMPORT:
10124 feedback_fn("* running the instance OS import scripts...")
10128 for idx, image in enumerate(self.src_images):
10132 # FIXME: pass debug option from opcode to backend
10133 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10134 constants.IEIO_FILE, (image, ),
10135 constants.IEIO_SCRIPT,
10136 (iobj.disks[idx], idx),
10138 transfers.append(dt)
10141 masterd.instance.TransferInstanceData(self, feedback_fn,
10142 self.op.src_node, pnode_name,
10143 self.pnode.secondary_ip,
10145 if not compat.all(import_result):
10146 self.LogWarning("Some disks for instance %s on node %s were not"
10147 " imported successfully" % (instance, pnode_name))
10149 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10150 feedback_fn("* preparing remote import...")
10151 # The source cluster will stop the instance before attempting to make a
10152 # connection. In some cases stopping an instance can take a long time,
10153 # hence the shutdown timeout is added to the connection timeout.
10154 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10155 self.op.source_shutdown_timeout)
10156 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10158 assert iobj.primary_node == self.pnode.name
10160 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10161 self.source_x509_ca,
10162 self._cds, timeouts)
10163 if not compat.all(disk_results):
10164 # TODO: Should the instance still be started, even if some disks
10165 # failed to import (valid for local imports, too)?
10166 self.LogWarning("Some disks for instance %s on node %s were not"
10167 " imported successfully" % (instance, pnode_name))
10169 # Run rename script on newly imported instance
10170 assert iobj.name == instance
10171 feedback_fn("Running rename script for %s" % instance)
10172 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10173 self.source_instance_name,
10174 self.op.debug_level)
10175 if result.fail_msg:
10176 self.LogWarning("Failed to run rename script for %s on node"
10177 " %s: %s" % (instance, pnode_name, result.fail_msg))
10180 # also checked in the prereq part
10181 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10184 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10187 iobj.admin_state = constants.ADMINST_UP
10188 self.cfg.Update(iobj, feedback_fn)
10189 logging.info("Starting instance %s on node %s", instance, pnode_name)
10190 feedback_fn("* starting instance...")
10191 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10193 result.Raise("Could not start instance")
10195 return list(iobj.all_nodes)
10198 def _CheckRADOSFreeSpace():
10199 """Compute disk size requirements inside the RADOS cluster.
10202 # For the RADOS cluster we assume there is always enough space.
10206 class LUInstanceConsole(NoHooksLU):
10207 """Connect to an instance's console.
10209 This is somewhat special in that it returns the command line that
10210 you need to run on the master node in order to connect to the
10216 def ExpandNames(self):
10217 self.share_locks = _ShareAll()
10218 self._ExpandAndLockInstance()
10220 def CheckPrereq(self):
10221 """Check prerequisites.
10223 This checks that the instance is in the cluster.
10226 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10227 assert self.instance is not None, \
10228 "Cannot retrieve locked instance %s" % self.op.instance_name
10229 _CheckNodeOnline(self, self.instance.primary_node)
10231 def Exec(self, feedback_fn):
10232 """Connect to the console of an instance
10235 instance = self.instance
10236 node = instance.primary_node
10238 node_insts = self.rpc.call_instance_list([node],
10239 [instance.hypervisor])[node]
10240 node_insts.Raise("Can't get node information from %s" % node)
10242 if instance.name not in node_insts.payload:
10243 if instance.admin_state == constants.ADMINST_UP:
10244 state = constants.INSTST_ERRORDOWN
10245 elif instance.admin_state == constants.ADMINST_DOWN:
10246 state = constants.INSTST_ADMINDOWN
10248 state = constants.INSTST_ADMINOFFLINE
10249 raise errors.OpExecError("Instance %s is not running (state %s)" %
10250 (instance.name, state))
10252 logging.debug("Connecting to console of %s on %s", instance.name, node)
10254 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10257 def _GetInstanceConsole(cluster, instance):
10258 """Returns console information for an instance.
10260 @type cluster: L{objects.Cluster}
10261 @type instance: L{objects.Instance}
10265 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10266 # beparams and hvparams are passed separately, to avoid editing the
10267 # instance and then saving the defaults in the instance itself.
10268 hvparams = cluster.FillHV(instance)
10269 beparams = cluster.FillBE(instance)
10270 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10272 assert console.instance == instance.name
10273 assert console.Validate()
10275 return console.ToDict()
10278 class LUInstanceReplaceDisks(LogicalUnit):
10279 """Replace the disks of an instance.
10282 HPATH = "mirrors-replace"
10283 HTYPE = constants.HTYPE_INSTANCE
10286 def CheckArguments(self):
10287 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10288 self.op.iallocator)
10290 def ExpandNames(self):
10291 self._ExpandAndLockInstance()
10293 assert locking.LEVEL_NODE not in self.needed_locks
10294 assert locking.LEVEL_NODE_RES not in self.needed_locks
10295 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10297 assert self.op.iallocator is None or self.op.remote_node is None, \
10298 "Conflicting options"
10300 if self.op.remote_node is not None:
10301 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10303 # Warning: do not remove the locking of the new secondary here
10304 # unless DRBD8.AddChildren is changed to work in parallel;
10305 # currently it doesn't since parallel invocations of
10306 # FindUnusedMinor will conflict
10307 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10308 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10310 self.needed_locks[locking.LEVEL_NODE] = []
10311 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10313 if self.op.iallocator is not None:
10314 # iallocator will select a new node in the same group
10315 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10317 self.needed_locks[locking.LEVEL_NODE_RES] = []
10319 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10320 self.op.iallocator, self.op.remote_node,
10321 self.op.disks, False, self.op.early_release,
10322 self.op.ignore_ipolicy)
10324 self.tasklets = [self.replacer]
10326 def DeclareLocks(self, level):
10327 if level == locking.LEVEL_NODEGROUP:
10328 assert self.op.remote_node is None
10329 assert self.op.iallocator is not None
10330 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10332 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10333 # Lock all groups used by instance optimistically; this requires going
10334 # via the node before it's locked, requiring verification later on
10335 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10336 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10338 elif level == locking.LEVEL_NODE:
10339 if self.op.iallocator is not None:
10340 assert self.op.remote_node is None
10341 assert not self.needed_locks[locking.LEVEL_NODE]
10343 # Lock member nodes of all locked groups
10344 self.needed_locks[locking.LEVEL_NODE] = [node_name
10345 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10346 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10348 self._LockInstancesNodes()
10349 elif level == locking.LEVEL_NODE_RES:
10351 self.needed_locks[locking.LEVEL_NODE_RES] = \
10352 self.needed_locks[locking.LEVEL_NODE]
10354 def BuildHooksEnv(self):
10355 """Build hooks env.
10357 This runs on the master, the primary and all the secondaries.
10360 instance = self.replacer.instance
10362 "MODE": self.op.mode,
10363 "NEW_SECONDARY": self.op.remote_node,
10364 "OLD_SECONDARY": instance.secondary_nodes[0],
10366 env.update(_BuildInstanceHookEnvByObject(self, instance))
10369 def BuildHooksNodes(self):
10370 """Build hooks nodes.
10373 instance = self.replacer.instance
10375 self.cfg.GetMasterNode(),
10376 instance.primary_node,
10378 if self.op.remote_node is not None:
10379 nl.append(self.op.remote_node)
10382 def CheckPrereq(self):
10383 """Check prerequisites.
10386 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10387 self.op.iallocator is None)
10389 # Verify if node group locks are still correct
10390 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10392 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10394 return LogicalUnit.CheckPrereq(self)
10397 class TLReplaceDisks(Tasklet):
10398 """Replaces disks for an instance.
10400 Note: Locking is not within the scope of this class.
10403 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10404 disks, delay_iallocator, early_release, ignore_ipolicy):
10405 """Initializes this class.
10408 Tasklet.__init__(self, lu)
10411 self.instance_name = instance_name
10413 self.iallocator_name = iallocator_name
10414 self.remote_node = remote_node
10416 self.delay_iallocator = delay_iallocator
10417 self.early_release = early_release
10418 self.ignore_ipolicy = ignore_ipolicy
10421 self.instance = None
10422 self.new_node = None
10423 self.target_node = None
10424 self.other_node = None
10425 self.remote_node_info = None
10426 self.node_secondary_ip = None
10429 def CheckArguments(mode, remote_node, iallocator):
10430 """Helper function for users of this class.
10433 # check for valid parameter combination
10434 if mode == constants.REPLACE_DISK_CHG:
10435 if remote_node is None and iallocator is None:
10436 raise errors.OpPrereqError("When changing the secondary either an"
10437 " iallocator script must be used or the"
10438 " new node given", errors.ECODE_INVAL)
10440 if remote_node is not None and iallocator is not None:
10441 raise errors.OpPrereqError("Give either the iallocator or the new"
10442 " secondary, not both", errors.ECODE_INVAL)
10444 elif remote_node is not None or iallocator is not None:
10445 # Not replacing the secondary
10446 raise errors.OpPrereqError("The iallocator and new node options can"
10447 " only be used when changing the"
10448 " secondary node", errors.ECODE_INVAL)
10451 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10452 """Compute a new secondary node using an IAllocator.
10455 ial = IAllocator(lu.cfg, lu.rpc,
10456 mode=constants.IALLOCATOR_MODE_RELOC,
10457 name=instance_name,
10458 relocate_from=list(relocate_from))
10460 ial.Run(iallocator_name)
10462 if not ial.success:
10463 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10464 " %s" % (iallocator_name, ial.info),
10465 errors.ECODE_NORES)
10467 if len(ial.result) != ial.required_nodes:
10468 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10469 " of nodes (%s), required %s" %
10471 len(ial.result), ial.required_nodes),
10472 errors.ECODE_FAULT)
10474 remote_node_name = ial.result[0]
10476 lu.LogInfo("Selected new secondary for instance '%s': %s",
10477 instance_name, remote_node_name)
10479 return remote_node_name
10481 def _FindFaultyDisks(self, node_name):
10482 """Wrapper for L{_FindFaultyInstanceDisks}.
10485 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10488 def _CheckDisksActivated(self, instance):
10489 """Checks if the instance disks are activated.
10491 @param instance: The instance to check disks
10492 @return: True if they are activated, False otherwise
10495 nodes = instance.all_nodes
10497 for idx, dev in enumerate(instance.disks):
10499 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10500 self.cfg.SetDiskID(dev, node)
10502 result = self.rpc.call_blockdev_find(node, dev)
10506 elif result.fail_msg or not result.payload:
10511 def CheckPrereq(self):
10512 """Check prerequisites.
10514 This checks that the instance is in the cluster.
10517 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10518 assert instance is not None, \
10519 "Cannot retrieve locked instance %s" % self.instance_name
10521 if instance.disk_template != constants.DT_DRBD8:
10522 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10523 " instances", errors.ECODE_INVAL)
10525 if len(instance.secondary_nodes) != 1:
10526 raise errors.OpPrereqError("The instance has a strange layout,"
10527 " expected one secondary but found %d" %
10528 len(instance.secondary_nodes),
10529 errors.ECODE_FAULT)
10531 if not self.delay_iallocator:
10532 self._CheckPrereq2()
10534 def _CheckPrereq2(self):
10535 """Check prerequisites, second part.
10537 This function should always be part of CheckPrereq. It was separated and is
10538 now called from Exec because during node evacuation iallocator was only
10539 called with an unmodified cluster model, not taking planned changes into
10543 instance = self.instance
10544 secondary_node = instance.secondary_nodes[0]
10546 if self.iallocator_name is None:
10547 remote_node = self.remote_node
10549 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10550 instance.name, instance.secondary_nodes)
10552 if remote_node is None:
10553 self.remote_node_info = None
10555 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10556 "Remote node '%s' is not locked" % remote_node
10558 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10559 assert self.remote_node_info is not None, \
10560 "Cannot retrieve locked node %s" % remote_node
10562 if remote_node == self.instance.primary_node:
10563 raise errors.OpPrereqError("The specified node is the primary node of"
10564 " the instance", errors.ECODE_INVAL)
10566 if remote_node == secondary_node:
10567 raise errors.OpPrereqError("The specified node is already the"
10568 " secondary node of the instance",
10569 errors.ECODE_INVAL)
10571 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10572 constants.REPLACE_DISK_CHG):
10573 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10574 errors.ECODE_INVAL)
10576 if self.mode == constants.REPLACE_DISK_AUTO:
10577 if not self._CheckDisksActivated(instance):
10578 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10579 " first" % self.instance_name,
10580 errors.ECODE_STATE)
10581 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10582 faulty_secondary = self._FindFaultyDisks(secondary_node)
10584 if faulty_primary and faulty_secondary:
10585 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10586 " one node and can not be repaired"
10587 " automatically" % self.instance_name,
10588 errors.ECODE_STATE)
10591 self.disks = faulty_primary
10592 self.target_node = instance.primary_node
10593 self.other_node = secondary_node
10594 check_nodes = [self.target_node, self.other_node]
10595 elif faulty_secondary:
10596 self.disks = faulty_secondary
10597 self.target_node = secondary_node
10598 self.other_node = instance.primary_node
10599 check_nodes = [self.target_node, self.other_node]
10605 # Non-automatic modes
10606 if self.mode == constants.REPLACE_DISK_PRI:
10607 self.target_node = instance.primary_node
10608 self.other_node = secondary_node
10609 check_nodes = [self.target_node, self.other_node]
10611 elif self.mode == constants.REPLACE_DISK_SEC:
10612 self.target_node = secondary_node
10613 self.other_node = instance.primary_node
10614 check_nodes = [self.target_node, self.other_node]
10616 elif self.mode == constants.REPLACE_DISK_CHG:
10617 self.new_node = remote_node
10618 self.other_node = instance.primary_node
10619 self.target_node = secondary_node
10620 check_nodes = [self.new_node, self.other_node]
10622 _CheckNodeNotDrained(self.lu, remote_node)
10623 _CheckNodeVmCapable(self.lu, remote_node)
10625 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10626 assert old_node_info is not None
10627 if old_node_info.offline and not self.early_release:
10628 # doesn't make sense to delay the release
10629 self.early_release = True
10630 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10631 " early-release mode", secondary_node)
10634 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10637 # If not specified all disks should be replaced
10639 self.disks = range(len(self.instance.disks))
10641 # TODO: This is ugly, but right now we can't distinguish between internal
10642 # submitted opcode and external one. We should fix that.
10643 if self.remote_node_info:
10644 # We change the node, lets verify it still meets instance policy
10645 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10646 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10648 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10649 ignore=self.ignore_ipolicy)
10651 # TODO: compute disk parameters
10652 primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10653 secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10654 if primary_node_info.group != secondary_node_info.group:
10655 self.lu.LogInfo("The instance primary and secondary nodes are in two"
10656 " different node groups; the disk parameters of the"
10657 " primary node's group will be applied.")
10659 self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10661 for node in check_nodes:
10662 _CheckNodeOnline(self.lu, node)
10664 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10667 if node_name is not None)
10669 # Release unneeded node and node resource locks
10670 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10671 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10673 # Release any owned node group
10674 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10675 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10677 # Check whether disks are valid
10678 for disk_idx in self.disks:
10679 instance.FindDisk(disk_idx)
10681 # Get secondary node IP addresses
10682 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10683 in self.cfg.GetMultiNodeInfo(touched_nodes))
10685 def Exec(self, feedback_fn):
10686 """Execute disk replacement.
10688 This dispatches the disk replacement to the appropriate handler.
10691 if self.delay_iallocator:
10692 self._CheckPrereq2()
10695 # Verify owned locks before starting operation
10696 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10697 assert set(owned_nodes) == set(self.node_secondary_ip), \
10698 ("Incorrect node locks, owning %s, expected %s" %
10699 (owned_nodes, self.node_secondary_ip.keys()))
10700 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10701 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10703 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10704 assert list(owned_instances) == [self.instance_name], \
10705 "Instance '%s' not locked" % self.instance_name
10707 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10708 "Should not own any node group lock at this point"
10711 feedback_fn("No disks need replacement")
10714 feedback_fn("Replacing disk(s) %s for %s" %
10715 (utils.CommaJoin(self.disks), self.instance.name))
10717 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10719 # Activate the instance disks if we're replacing them on a down instance
10721 _StartInstanceDisks(self.lu, self.instance, True)
10724 # Should we replace the secondary node?
10725 if self.new_node is not None:
10726 fn = self._ExecDrbd8Secondary
10728 fn = self._ExecDrbd8DiskOnly
10730 result = fn(feedback_fn)
10732 # Deactivate the instance disks if we're replacing them on a
10735 _SafeShutdownInstanceDisks(self.lu, self.instance)
10737 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10740 # Verify owned locks
10741 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10742 nodes = frozenset(self.node_secondary_ip)
10743 assert ((self.early_release and not owned_nodes) or
10744 (not self.early_release and not (set(owned_nodes) - nodes))), \
10745 ("Not owning the correct locks, early_release=%s, owned=%r,"
10746 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10750 def _CheckVolumeGroup(self, nodes):
10751 self.lu.LogInfo("Checking volume groups")
10753 vgname = self.cfg.GetVGName()
10755 # Make sure volume group exists on all involved nodes
10756 results = self.rpc.call_vg_list(nodes)
10758 raise errors.OpExecError("Can't list volume groups on the nodes")
10761 res = results[node]
10762 res.Raise("Error checking node %s" % node)
10763 if vgname not in res.payload:
10764 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10767 def _CheckDisksExistence(self, nodes):
10768 # Check disk existence
10769 for idx, dev in enumerate(self.instance.disks):
10770 if idx not in self.disks:
10774 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10775 self.cfg.SetDiskID(dev, node)
10777 result = self.rpc.call_blockdev_find(node, dev)
10779 msg = result.fail_msg
10780 if msg or not result.payload:
10782 msg = "disk not found"
10783 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10786 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10787 for idx, dev in enumerate(self.instance.disks):
10788 if idx not in self.disks:
10791 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10794 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10796 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10797 " replace disks for instance %s" %
10798 (node_name, self.instance.name))
10800 def _CreateNewStorage(self, node_name):
10801 """Create new storage on the primary or secondary node.
10803 This is only used for same-node replaces, not for changing the
10804 secondary node, hence we don't want to modify the existing disk.
10809 for idx, dev in enumerate(self.instance.disks):
10810 if idx not in self.disks:
10813 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10815 self.cfg.SetDiskID(dev, node_name)
10817 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10818 names = _GenerateUniqueNames(self.lu, lv_names)
10820 _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10822 vg_data = dev.children[0].logical_id[0]
10823 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10824 logical_id=(vg_data, names[0]), params=data_p)
10825 vg_meta = dev.children[1].logical_id[0]
10826 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10827 logical_id=(vg_meta, names[1]), params=meta_p)
10829 new_lvs = [lv_data, lv_meta]
10830 old_lvs = [child.Copy() for child in dev.children]
10831 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10833 # we pass force_create=True to force the LVM creation
10834 for new_lv in new_lvs:
10835 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10836 _GetInstanceInfoText(self.instance), False)
10840 def _CheckDevices(self, node_name, iv_names):
10841 for name, (dev, _, _) in iv_names.iteritems():
10842 self.cfg.SetDiskID(dev, node_name)
10844 result = self.rpc.call_blockdev_find(node_name, dev)
10846 msg = result.fail_msg
10847 if msg or not result.payload:
10849 msg = "disk not found"
10850 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10853 if result.payload.is_degraded:
10854 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10856 def _RemoveOldStorage(self, node_name, iv_names):
10857 for name, (_, old_lvs, _) in iv_names.iteritems():
10858 self.lu.LogInfo("Remove logical volumes for %s" % name)
10861 self.cfg.SetDiskID(lv, node_name)
10863 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10865 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10866 hint="remove unused LVs manually")
10868 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10869 """Replace a disk on the primary or secondary for DRBD 8.
10871 The algorithm for replace is quite complicated:
10873 1. for each disk to be replaced:
10875 1. create new LVs on the target node with unique names
10876 1. detach old LVs from the drbd device
10877 1. rename old LVs to name_replaced.<time_t>
10878 1. rename new LVs to old LVs
10879 1. attach the new LVs (with the old names now) to the drbd device
10881 1. wait for sync across all devices
10883 1. for each modified disk:
10885 1. remove old LVs (which have the name name_replaces.<time_t>)
10887 Failures are not very well handled.
10892 # Step: check device activation
10893 self.lu.LogStep(1, steps_total, "Check device existence")
10894 self._CheckDisksExistence([self.other_node, self.target_node])
10895 self._CheckVolumeGroup([self.target_node, self.other_node])
10897 # Step: check other node consistency
10898 self.lu.LogStep(2, steps_total, "Check peer consistency")
10899 self._CheckDisksConsistency(self.other_node,
10900 self.other_node == self.instance.primary_node,
10903 # Step: create new storage
10904 self.lu.LogStep(3, steps_total, "Allocate new storage")
10905 iv_names = self._CreateNewStorage(self.target_node)
10907 # Step: for each lv, detach+rename*2+attach
10908 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10909 for dev, old_lvs, new_lvs in iv_names.itervalues():
10910 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10912 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10914 result.Raise("Can't detach drbd from local storage on node"
10915 " %s for device %s" % (self.target_node, dev.iv_name))
10917 #cfg.Update(instance)
10919 # ok, we created the new LVs, so now we know we have the needed
10920 # storage; as such, we proceed on the target node to rename
10921 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10922 # using the assumption that logical_id == physical_id (which in
10923 # turn is the unique_id on that node)
10925 # FIXME(iustin): use a better name for the replaced LVs
10926 temp_suffix = int(time.time())
10927 ren_fn = lambda d, suff: (d.physical_id[0],
10928 d.physical_id[1] + "_replaced-%s" % suff)
10930 # Build the rename list based on what LVs exist on the node
10931 rename_old_to_new = []
10932 for to_ren in old_lvs:
10933 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10934 if not result.fail_msg and result.payload:
10936 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10938 self.lu.LogInfo("Renaming the old LVs on the target node")
10939 result = self.rpc.call_blockdev_rename(self.target_node,
10941 result.Raise("Can't rename old LVs on node %s" % self.target_node)
10943 # Now we rename the new LVs to the old LVs
10944 self.lu.LogInfo("Renaming the new LVs on the target node")
10945 rename_new_to_old = [(new, old.physical_id)
10946 for old, new in zip(old_lvs, new_lvs)]
10947 result = self.rpc.call_blockdev_rename(self.target_node,
10949 result.Raise("Can't rename new LVs on node %s" % self.target_node)
10951 # Intermediate steps of in memory modifications
10952 for old, new in zip(old_lvs, new_lvs):
10953 new.logical_id = old.logical_id
10954 self.cfg.SetDiskID(new, self.target_node)
10956 # We need to modify old_lvs so that removal later removes the
10957 # right LVs, not the newly added ones; note that old_lvs is a
10959 for disk in old_lvs:
10960 disk.logical_id = ren_fn(disk, temp_suffix)
10961 self.cfg.SetDiskID(disk, self.target_node)
10963 # Now that the new lvs have the old name, we can add them to the device
10964 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10965 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10967 msg = result.fail_msg
10969 for new_lv in new_lvs:
10970 msg2 = self.rpc.call_blockdev_remove(self.target_node,
10973 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10974 hint=("cleanup manually the unused logical"
10976 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10978 cstep = itertools.count(5)
10980 if self.early_release:
10981 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10982 self._RemoveOldStorage(self.target_node, iv_names)
10983 # TODO: Check if releasing locks early still makes sense
10984 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10986 # Release all resource locks except those used by the instance
10987 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10988 keep=self.node_secondary_ip.keys())
10990 # Release all node locks while waiting for sync
10991 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10993 # TODO: Can the instance lock be downgraded here? Take the optional disk
10994 # shutdown in the caller into consideration.
10997 # This can fail as the old devices are degraded and _WaitForSync
10998 # does a combined result over all disks, so we don't check its return value
10999 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11000 _WaitForSync(self.lu, self.instance)
11002 # Check all devices manually
11003 self._CheckDevices(self.instance.primary_node, iv_names)
11005 # Step: remove old storage
11006 if not self.early_release:
11007 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11008 self._RemoveOldStorage(self.target_node, iv_names)
11010 def _ExecDrbd8Secondary(self, feedback_fn):
11011 """Replace the secondary node for DRBD 8.
11013 The algorithm for replace is quite complicated:
11014 - for all disks of the instance:
11015 - create new LVs on the new node with same names
11016 - shutdown the drbd device on the old secondary
11017 - disconnect the drbd network on the primary
11018 - create the drbd device on the new secondary
11019 - network attach the drbd on the primary, using an artifice:
11020 the drbd code for Attach() will connect to the network if it
11021 finds a device which is connected to the good local disks but
11022 not network enabled
11023 - wait for sync across all devices
11024 - remove all disks from the old secondary
11026 Failures are not very well handled.
11031 pnode = self.instance.primary_node
11033 # Step: check device activation
11034 self.lu.LogStep(1, steps_total, "Check device existence")
11035 self._CheckDisksExistence([self.instance.primary_node])
11036 self._CheckVolumeGroup([self.instance.primary_node])
11038 # Step: check other node consistency
11039 self.lu.LogStep(2, steps_total, "Check peer consistency")
11040 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11042 # Step: create new storage
11043 self.lu.LogStep(3, steps_total, "Allocate new storage")
11044 for idx, dev in enumerate(self.instance.disks):
11045 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11046 (self.new_node, idx))
11047 # we pass force_create=True to force LVM creation
11048 for new_lv in dev.children:
11049 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
11050 _GetInstanceInfoText(self.instance), False)
11052 # Step 4: dbrd minors and drbd setups changes
11053 # after this, we must manually remove the drbd minors on both the
11054 # error and the success paths
11055 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11056 minors = self.cfg.AllocateDRBDMinor([self.new_node
11057 for dev in self.instance.disks],
11058 self.instance.name)
11059 logging.debug("Allocated minors %r", minors)
11062 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11063 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11064 (self.new_node, idx))
11065 # create new devices on new_node; note that we create two IDs:
11066 # one without port, so the drbd will be activated without
11067 # networking information on the new node at this stage, and one
11068 # with network, for the latter activation in step 4
11069 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11070 if self.instance.primary_node == o_node1:
11073 assert self.instance.primary_node == o_node2, "Three-node instance?"
11076 new_alone_id = (self.instance.primary_node, self.new_node, None,
11077 p_minor, new_minor, o_secret)
11078 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11079 p_minor, new_minor, o_secret)
11081 iv_names[idx] = (dev, dev.children, new_net_id)
11082 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11084 drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
11085 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11086 logical_id=new_alone_id,
11087 children=dev.children,
11089 params=drbd_params)
11091 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
11092 _GetInstanceInfoText(self.instance), False)
11093 except errors.GenericError:
11094 self.cfg.ReleaseDRBDMinors(self.instance.name)
11097 # We have new devices, shutdown the drbd on the old secondary
11098 for idx, dev in enumerate(self.instance.disks):
11099 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11100 self.cfg.SetDiskID(dev, self.target_node)
11101 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
11103 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11104 "node: %s" % (idx, msg),
11105 hint=("Please cleanup this device manually as"
11106 " soon as possible"))
11108 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11109 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11110 self.instance.disks)[pnode]
11112 msg = result.fail_msg
11114 # detaches didn't succeed (unlikely)
11115 self.cfg.ReleaseDRBDMinors(self.instance.name)
11116 raise errors.OpExecError("Can't detach the disks from the network on"
11117 " old node: %s" % (msg,))
11119 # if we managed to detach at least one, we update all the disks of
11120 # the instance to point to the new secondary
11121 self.lu.LogInfo("Updating instance configuration")
11122 for dev, _, new_logical_id in iv_names.itervalues():
11123 dev.logical_id = new_logical_id
11124 self.cfg.SetDiskID(dev, self.instance.primary_node)
11126 self.cfg.Update(self.instance, feedback_fn)
11128 # Release all node locks (the configuration has been updated)
11129 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11131 # and now perform the drbd attach
11132 self.lu.LogInfo("Attaching primary drbds to new secondary"
11133 " (standalone => connected)")
11134 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11136 self.node_secondary_ip,
11137 self.instance.disks,
11138 self.instance.name,
11140 for to_node, to_result in result.items():
11141 msg = to_result.fail_msg
11143 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11145 hint=("please do a gnt-instance info to see the"
11146 " status of disks"))
11148 cstep = itertools.count(5)
11150 if self.early_release:
11151 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11152 self._RemoveOldStorage(self.target_node, iv_names)
11153 # TODO: Check if releasing locks early still makes sense
11154 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11156 # Release all resource locks except those used by the instance
11157 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11158 keep=self.node_secondary_ip.keys())
11160 # TODO: Can the instance lock be downgraded here? Take the optional disk
11161 # shutdown in the caller into consideration.
11164 # This can fail as the old devices are degraded and _WaitForSync
11165 # does a combined result over all disks, so we don't check its return value
11166 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11167 _WaitForSync(self.lu, self.instance)
11169 # Check all devices manually
11170 self._CheckDevices(self.instance.primary_node, iv_names)
11172 # Step: remove old storage
11173 if not self.early_release:
11174 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11175 self._RemoveOldStorage(self.target_node, iv_names)
11178 class LURepairNodeStorage(NoHooksLU):
11179 """Repairs the volume group on a node.
11184 def CheckArguments(self):
11185 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11187 storage_type = self.op.storage_type
11189 if (constants.SO_FIX_CONSISTENCY not in
11190 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11191 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11192 " repaired" % storage_type,
11193 errors.ECODE_INVAL)
11195 def ExpandNames(self):
11196 self.needed_locks = {
11197 locking.LEVEL_NODE: [self.op.node_name],
11200 def _CheckFaultyDisks(self, instance, node_name):
11201 """Ensure faulty disks abort the opcode or at least warn."""
11203 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11205 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11206 " node '%s'" % (instance.name, node_name),
11207 errors.ECODE_STATE)
11208 except errors.OpPrereqError, err:
11209 if self.op.ignore_consistency:
11210 self.proc.LogWarning(str(err.args[0]))
11214 def CheckPrereq(self):
11215 """Check prerequisites.
11218 # Check whether any instance on this node has faulty disks
11219 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11220 if inst.admin_state != constants.ADMINST_UP:
11222 check_nodes = set(inst.all_nodes)
11223 check_nodes.discard(self.op.node_name)
11224 for inst_node_name in check_nodes:
11225 self._CheckFaultyDisks(inst, inst_node_name)
11227 def Exec(self, feedback_fn):
11228 feedback_fn("Repairing storage unit '%s' on %s ..." %
11229 (self.op.name, self.op.node_name))
11231 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11232 result = self.rpc.call_storage_execute(self.op.node_name,
11233 self.op.storage_type, st_args,
11235 constants.SO_FIX_CONSISTENCY)
11236 result.Raise("Failed to repair storage unit '%s' on %s" %
11237 (self.op.name, self.op.node_name))
11240 class LUNodeEvacuate(NoHooksLU):
11241 """Evacuates instances off a list of nodes.
11246 _MODE2IALLOCATOR = {
11247 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11248 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11249 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11251 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11252 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11253 constants.IALLOCATOR_NEVAC_MODES)
11255 def CheckArguments(self):
11256 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11258 def ExpandNames(self):
11259 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11261 if self.op.remote_node is not None:
11262 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11263 assert self.op.remote_node
11265 if self.op.remote_node == self.op.node_name:
11266 raise errors.OpPrereqError("Can not use evacuated node as a new"
11267 " secondary node", errors.ECODE_INVAL)
11269 if self.op.mode != constants.NODE_EVAC_SEC:
11270 raise errors.OpPrereqError("Without the use of an iallocator only"
11271 " secondary instances can be evacuated",
11272 errors.ECODE_INVAL)
11275 self.share_locks = _ShareAll()
11276 self.needed_locks = {
11277 locking.LEVEL_INSTANCE: [],
11278 locking.LEVEL_NODEGROUP: [],
11279 locking.LEVEL_NODE: [],
11282 # Determine nodes (via group) optimistically, needs verification once locks
11283 # have been acquired
11284 self.lock_nodes = self._DetermineNodes()
11286 def _DetermineNodes(self):
11287 """Gets the list of nodes to operate on.
11290 if self.op.remote_node is None:
11291 # Iallocator will choose any node(s) in the same group
11292 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11294 group_nodes = frozenset([self.op.remote_node])
11296 # Determine nodes to be locked
11297 return set([self.op.node_name]) | group_nodes
11299 def _DetermineInstances(self):
11300 """Builds list of instances to operate on.
11303 assert self.op.mode in constants.NODE_EVAC_MODES
11305 if self.op.mode == constants.NODE_EVAC_PRI:
11306 # Primary instances only
11307 inst_fn = _GetNodePrimaryInstances
11308 assert self.op.remote_node is None, \
11309 "Evacuating primary instances requires iallocator"
11310 elif self.op.mode == constants.NODE_EVAC_SEC:
11311 # Secondary instances only
11312 inst_fn = _GetNodeSecondaryInstances
11315 assert self.op.mode == constants.NODE_EVAC_ALL
11316 inst_fn = _GetNodeInstances
11317 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11319 raise errors.OpPrereqError("Due to an issue with the iallocator"
11320 " interface it is not possible to evacuate"
11321 " all instances at once; specify explicitly"
11322 " whether to evacuate primary or secondary"
11324 errors.ECODE_INVAL)
11326 return inst_fn(self.cfg, self.op.node_name)
11328 def DeclareLocks(self, level):
11329 if level == locking.LEVEL_INSTANCE:
11330 # Lock instances optimistically, needs verification once node and group
11331 # locks have been acquired
11332 self.needed_locks[locking.LEVEL_INSTANCE] = \
11333 set(i.name for i in self._DetermineInstances())
11335 elif level == locking.LEVEL_NODEGROUP:
11336 # Lock node groups for all potential target nodes optimistically, needs
11337 # verification once nodes have been acquired
11338 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11339 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11341 elif level == locking.LEVEL_NODE:
11342 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11344 def CheckPrereq(self):
11346 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11347 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11348 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11350 need_nodes = self._DetermineNodes()
11352 if not owned_nodes.issuperset(need_nodes):
11353 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11354 " locks were acquired, current nodes are"
11355 " are '%s', used to be '%s'; retry the"
11357 (self.op.node_name,
11358 utils.CommaJoin(need_nodes),
11359 utils.CommaJoin(owned_nodes)),
11360 errors.ECODE_STATE)
11362 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11363 if owned_groups != wanted_groups:
11364 raise errors.OpExecError("Node groups changed since locks were acquired,"
11365 " current groups are '%s', used to be '%s';"
11366 " retry the operation" %
11367 (utils.CommaJoin(wanted_groups),
11368 utils.CommaJoin(owned_groups)))
11370 # Determine affected instances
11371 self.instances = self._DetermineInstances()
11372 self.instance_names = [i.name for i in self.instances]
11374 if set(self.instance_names) != owned_instances:
11375 raise errors.OpExecError("Instances on node '%s' changed since locks"
11376 " were acquired, current instances are '%s',"
11377 " used to be '%s'; retry the operation" %
11378 (self.op.node_name,
11379 utils.CommaJoin(self.instance_names),
11380 utils.CommaJoin(owned_instances)))
11382 if self.instance_names:
11383 self.LogInfo("Evacuating instances from node '%s': %s",
11385 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11387 self.LogInfo("No instances to evacuate from node '%s'",
11390 if self.op.remote_node is not None:
11391 for i in self.instances:
11392 if i.primary_node == self.op.remote_node:
11393 raise errors.OpPrereqError("Node %s is the primary node of"
11394 " instance %s, cannot use it as"
11396 (self.op.remote_node, i.name),
11397 errors.ECODE_INVAL)
11399 def Exec(self, feedback_fn):
11400 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11402 if not self.instance_names:
11403 # No instances to evacuate
11406 elif self.op.iallocator is not None:
11407 # TODO: Implement relocation to other group
11408 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11409 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11410 instances=list(self.instance_names))
11412 ial.Run(self.op.iallocator)
11414 if not ial.success:
11415 raise errors.OpPrereqError("Can't compute node evacuation using"
11416 " iallocator '%s': %s" %
11417 (self.op.iallocator, ial.info),
11418 errors.ECODE_NORES)
11420 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11422 elif self.op.remote_node is not None:
11423 assert self.op.mode == constants.NODE_EVAC_SEC
11425 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11426 remote_node=self.op.remote_node,
11428 mode=constants.REPLACE_DISK_CHG,
11429 early_release=self.op.early_release)]
11430 for instance_name in self.instance_names
11434 raise errors.ProgrammerError("No iallocator or remote node")
11436 return ResultWithJobs(jobs)
11439 def _SetOpEarlyRelease(early_release, op):
11440 """Sets C{early_release} flag on opcodes if available.
11444 op.early_release = early_release
11445 except AttributeError:
11446 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11451 def _NodeEvacDest(use_nodes, group, nodes):
11452 """Returns group or nodes depending on caller's choice.
11456 return utils.CommaJoin(nodes)
11461 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11462 """Unpacks the result of change-group and node-evacuate iallocator requests.
11464 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11465 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11467 @type lu: L{LogicalUnit}
11468 @param lu: Logical unit instance
11469 @type alloc_result: tuple/list
11470 @param alloc_result: Result from iallocator
11471 @type early_release: bool
11472 @param early_release: Whether to release locks early if possible
11473 @type use_nodes: bool
11474 @param use_nodes: Whether to display node names instead of groups
11477 (moved, failed, jobs) = alloc_result
11480 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11481 for (name, reason) in failed)
11482 lu.LogWarning("Unable to evacuate instances %s", failreason)
11483 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11486 lu.LogInfo("Instances to be moved: %s",
11487 utils.CommaJoin("%s (to %s)" %
11488 (name, _NodeEvacDest(use_nodes, group, nodes))
11489 for (name, group, nodes) in moved))
11491 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11492 map(opcodes.OpCode.LoadOpCode, ops))
11496 class LUInstanceGrowDisk(LogicalUnit):
11497 """Grow a disk of an instance.
11500 HPATH = "disk-grow"
11501 HTYPE = constants.HTYPE_INSTANCE
11504 def ExpandNames(self):
11505 self._ExpandAndLockInstance()
11506 self.needed_locks[locking.LEVEL_NODE] = []
11507 self.needed_locks[locking.LEVEL_NODE_RES] = []
11508 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11509 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11511 def DeclareLocks(self, level):
11512 if level == locking.LEVEL_NODE:
11513 self._LockInstancesNodes()
11514 elif level == locking.LEVEL_NODE_RES:
11516 self.needed_locks[locking.LEVEL_NODE_RES] = \
11517 self.needed_locks[locking.LEVEL_NODE][:]
11519 def BuildHooksEnv(self):
11520 """Build hooks env.
11522 This runs on the master, the primary and all the secondaries.
11526 "DISK": self.op.disk,
11527 "AMOUNT": self.op.amount,
11529 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11532 def BuildHooksNodes(self):
11533 """Build hooks nodes.
11536 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11539 def CheckPrereq(self):
11540 """Check prerequisites.
11542 This checks that the instance is in the cluster.
11545 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11546 assert instance is not None, \
11547 "Cannot retrieve locked instance %s" % self.op.instance_name
11548 nodenames = list(instance.all_nodes)
11549 for node in nodenames:
11550 _CheckNodeOnline(self, node)
11552 self.instance = instance
11554 if instance.disk_template not in constants.DTS_GROWABLE:
11555 raise errors.OpPrereqError("Instance's disk layout does not support"
11556 " growing", errors.ECODE_INVAL)
11558 self.disk = instance.FindDisk(self.op.disk)
11560 if instance.disk_template not in (constants.DT_FILE,
11561 constants.DT_SHARED_FILE,
11563 # TODO: check the free disk space for file, when that feature will be
11565 _CheckNodesFreeDiskPerVG(self, nodenames,
11566 self.disk.ComputeGrowth(self.op.amount))
11568 def Exec(self, feedback_fn):
11569 """Execute disk grow.
11572 instance = self.instance
11575 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11576 assert (self.owned_locks(locking.LEVEL_NODE) ==
11577 self.owned_locks(locking.LEVEL_NODE_RES))
11579 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11581 raise errors.OpExecError("Cannot activate block device to grow")
11583 feedback_fn("Growing disk %s of instance '%s' by %s" %
11584 (self.op.disk, instance.name,
11585 utils.FormatUnit(self.op.amount, "h")))
11587 # First run all grow ops in dry-run mode
11588 for node in instance.all_nodes:
11589 self.cfg.SetDiskID(disk, node)
11590 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11591 result.Raise("Grow request failed to node %s" % node)
11593 # We know that (as far as we can test) operations across different
11594 # nodes will succeed, time to run it for real
11595 for node in instance.all_nodes:
11596 self.cfg.SetDiskID(disk, node)
11597 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11598 result.Raise("Grow request failed to node %s" % node)
11600 # TODO: Rewrite code to work properly
11601 # DRBD goes into sync mode for a short amount of time after executing the
11602 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11603 # calling "resize" in sync mode fails. Sleeping for a short amount of
11604 # time is a work-around.
11607 disk.RecordGrow(self.op.amount)
11608 self.cfg.Update(instance, feedback_fn)
11610 # Changes have been recorded, release node lock
11611 _ReleaseLocks(self, locking.LEVEL_NODE)
11613 # Downgrade lock while waiting for sync
11614 self.glm.downgrade(locking.LEVEL_INSTANCE)
11616 if self.op.wait_for_sync:
11617 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11619 self.proc.LogWarning("Disk sync-ing has not returned a good"
11620 " status; please check the instance")
11621 if instance.admin_state != constants.ADMINST_UP:
11622 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11623 elif instance.admin_state != constants.ADMINST_UP:
11624 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11625 " not supposed to be running because no wait for"
11626 " sync mode was requested")
11628 assert self.owned_locks(locking.LEVEL_NODE_RES)
11629 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11632 class LUInstanceQueryData(NoHooksLU):
11633 """Query runtime instance data.
11638 def ExpandNames(self):
11639 self.needed_locks = {}
11641 # Use locking if requested or when non-static information is wanted
11642 if not (self.op.static or self.op.use_locking):
11643 self.LogWarning("Non-static data requested, locks need to be acquired")
11644 self.op.use_locking = True
11646 if self.op.instances or not self.op.use_locking:
11647 # Expand instance names right here
11648 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11650 # Will use acquired locks
11651 self.wanted_names = None
11653 if self.op.use_locking:
11654 self.share_locks = _ShareAll()
11656 if self.wanted_names is None:
11657 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11659 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11661 self.needed_locks[locking.LEVEL_NODE] = []
11662 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11664 def DeclareLocks(self, level):
11665 if self.op.use_locking and level == locking.LEVEL_NODE:
11666 self._LockInstancesNodes()
11668 def CheckPrereq(self):
11669 """Check prerequisites.
11671 This only checks the optional instance list against the existing names.
11674 if self.wanted_names is None:
11675 assert self.op.use_locking, "Locking was not used"
11676 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11678 self.wanted_instances = \
11679 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11681 def _ComputeBlockdevStatus(self, node, instance_name, dev):
11682 """Returns the status of a block device
11685 if self.op.static or not node:
11688 self.cfg.SetDiskID(dev, node)
11690 result = self.rpc.call_blockdev_find(node, dev)
11694 result.Raise("Can't compute disk status for %s" % instance_name)
11696 status = result.payload
11700 return (status.dev_path, status.major, status.minor,
11701 status.sync_percent, status.estimated_time,
11702 status.is_degraded, status.ldisk_status)
11704 def _ComputeDiskStatus(self, instance, snode, dev):
11705 """Compute block device status.
11708 if dev.dev_type in constants.LDS_DRBD:
11709 # we change the snode then (otherwise we use the one passed in)
11710 if dev.logical_id[0] == instance.primary_node:
11711 snode = dev.logical_id[1]
11713 snode = dev.logical_id[0]
11715 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11716 instance.name, dev)
11717 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11720 dev_children = map(compat.partial(self._ComputeDiskStatus,
11727 "iv_name": dev.iv_name,
11728 "dev_type": dev.dev_type,
11729 "logical_id": dev.logical_id,
11730 "physical_id": dev.physical_id,
11731 "pstatus": dev_pstatus,
11732 "sstatus": dev_sstatus,
11733 "children": dev_children,
11738 def Exec(self, feedback_fn):
11739 """Gather and return data"""
11742 cluster = self.cfg.GetClusterInfo()
11744 pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11745 for i in self.wanted_instances)
11746 for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11747 if self.op.static or pnode.offline:
11748 remote_state = None
11750 self.LogWarning("Primary node %s is marked offline, returning static"
11751 " information only for instance %s" %
11752 (pnode.name, instance.name))
11754 remote_info = self.rpc.call_instance_info(instance.primary_node,
11756 instance.hypervisor)
11757 remote_info.Raise("Error checking node %s" % instance.primary_node)
11758 remote_info = remote_info.payload
11759 if remote_info and "state" in remote_info:
11760 remote_state = "up"
11762 if instance.admin_state == constants.ADMINST_UP:
11763 remote_state = "down"
11765 remote_state = instance.admin_state
11767 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11770 result[instance.name] = {
11771 "name": instance.name,
11772 "config_state": instance.admin_state,
11773 "run_state": remote_state,
11774 "pnode": instance.primary_node,
11775 "snodes": instance.secondary_nodes,
11777 # this happens to be the same format used for hooks
11778 "nics": _NICListToTuple(self, instance.nics),
11779 "disk_template": instance.disk_template,
11781 "hypervisor": instance.hypervisor,
11782 "network_port": instance.network_port,
11783 "hv_instance": instance.hvparams,
11784 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11785 "be_instance": instance.beparams,
11786 "be_actual": cluster.FillBE(instance),
11787 "os_instance": instance.osparams,
11788 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11789 "serial_no": instance.serial_no,
11790 "mtime": instance.mtime,
11791 "ctime": instance.ctime,
11792 "uuid": instance.uuid,
11798 def PrepareContainerMods(mods, private_fn):
11799 """Prepares a list of container modifications by adding a private data field.
11801 @type mods: list of tuples; (operation, index, parameters)
11802 @param mods: List of modifications
11803 @type private_fn: callable or None
11804 @param private_fn: Callable for constructing a private data field for a
11809 if private_fn is None:
11814 return [(op, idx, params, fn()) for (op, idx, params) in mods]
11817 #: Type description for changes as returned by L{ApplyContainerMods}'s
11819 _TApplyContModsCbChanges = \
11820 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
11821 ht.TNonEmptyString,
11826 def ApplyContainerMods(kind, container, chgdesc, mods,
11827 create_fn, modify_fn, remove_fn):
11828 """Applies descriptions in C{mods} to C{container}.
11831 @param kind: One-word item description
11832 @type container: list
11833 @param container: Container to modify
11834 @type chgdesc: None or list
11835 @param chgdesc: List of applied changes
11837 @param mods: Modifications as returned by L{PrepareContainerMods}
11838 @type create_fn: callable
11839 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
11840 receives absolute item index, parameters and private data object as added
11841 by L{PrepareContainerMods}, returns tuple containing new item and changes
11843 @type modify_fn: callable
11844 @param modify_fn: Callback for modifying an existing item
11845 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
11846 and private data object as added by L{PrepareContainerMods}, returns
11848 @type remove_fn: callable
11849 @param remove_fn: Callback on removing item; receives absolute item index,
11850 item and private data object as added by L{PrepareContainerMods}
11853 for (op, idx, params, private) in mods:
11856 absidx = len(container) - 1
11858 raise IndexError("Not accepting negative indices other than -1")
11859 elif idx > len(container):
11860 raise IndexError("Got %s index %s, but there are only %s" %
11861 (kind, idx, len(container)))
11867 if op == constants.DDM_ADD:
11868 # Calculate where item will be added
11870 addidx = len(container)
11874 if create_fn is None:
11877 (item, changes) = create_fn(addidx, params, private)
11880 container.append(item)
11883 assert idx <= len(container)
11884 # list.insert does so before the specified index
11885 container.insert(idx, item)
11887 # Retrieve existing item
11889 item = container[absidx]
11891 raise IndexError("Invalid %s index %s" % (kind, idx))
11893 if op == constants.DDM_REMOVE:
11896 if remove_fn is not None:
11897 remove_fn(absidx, item, private)
11899 changes = [("%s/%s" % (kind, absidx), "remove")]
11901 assert container[absidx] == item
11902 del container[absidx]
11903 elif op == constants.DDM_MODIFY:
11904 if modify_fn is not None:
11905 changes = modify_fn(absidx, item, params, private)
11907 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
11909 assert _TApplyContModsCbChanges(changes)
11911 if not (chgdesc is None or changes is None):
11912 chgdesc.extend(changes)
11915 def _UpdateIvNames(base_index, disks):
11916 """Updates the C{iv_name} attribute of disks.
11918 @type disks: list of L{objects.Disk}
11921 for (idx, disk) in enumerate(disks):
11922 disk.iv_name = "disk/%s" % (base_index + idx, )
11925 class _InstNicModPrivate:
11926 """Data structure for network interface modifications.
11928 Used by L{LUInstanceSetParams}.
11931 def __init__(self):
11936 class LUInstanceSetParams(LogicalUnit):
11937 """Modifies an instances's parameters.
11940 HPATH = "instance-modify"
11941 HTYPE = constants.HTYPE_INSTANCE
11945 def _UpgradeDiskNicMods(kind, mods, verify_fn):
11946 assert ht.TList(mods)
11947 assert not mods or len(mods[0]) in (2, 3)
11949 if mods and len(mods[0]) == 2:
11953 for op, params in mods:
11954 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
11955 result.append((op, -1, params))
11959 raise errors.OpPrereqError("Only one %s add or remove operation is"
11960 " supported at a time" % kind,
11961 errors.ECODE_INVAL)
11963 result.append((constants.DDM_MODIFY, op, params))
11965 assert verify_fn(result)
11972 def _CheckMods(kind, mods, key_types, item_fn):
11973 """Ensures requested disk/NIC modifications are valid.
11976 for (op, _, params) in mods:
11977 assert ht.TDict(params)
11979 utils.ForceDictType(params, key_types)
11981 if op == constants.DDM_REMOVE:
11983 raise errors.OpPrereqError("No settings should be passed when"
11984 " removing a %s" % kind,
11985 errors.ECODE_INVAL)
11986 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
11987 item_fn(op, params)
11989 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
11992 def _VerifyDiskModification(op, params):
11993 """Verifies a disk modification.
11996 if op == constants.DDM_ADD:
11997 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11998 if mode not in constants.DISK_ACCESS_SET:
11999 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12000 errors.ECODE_INVAL)
12002 size = params.get(constants.IDISK_SIZE, None)
12004 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12005 constants.IDISK_SIZE, errors.ECODE_INVAL)
12009 except (TypeError, ValueError), err:
12010 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12011 errors.ECODE_INVAL)
12013 params[constants.IDISK_SIZE] = size
12015 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12016 raise errors.OpPrereqError("Disk size change not possible, use"
12017 " grow-disk", errors.ECODE_INVAL)
12020 def _VerifyNicModification(op, params):
12021 """Verifies a network interface modification.
12024 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12025 ip = params.get(constants.INIC_IP, None)
12028 elif ip.lower() == constants.VALUE_NONE:
12029 params[constants.INIC_IP] = None
12030 elif not netutils.IPAddress.IsValid(ip):
12031 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12032 errors.ECODE_INVAL)
12034 bridge = params.get("bridge", None)
12035 link = params.get(constants.INIC_LINK, None)
12036 if bridge and link:
12037 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12038 " at the same time", errors.ECODE_INVAL)
12039 elif bridge and bridge.lower() == constants.VALUE_NONE:
12040 params["bridge"] = None
12041 elif link and link.lower() == constants.VALUE_NONE:
12042 params[constants.INIC_LINK] = None
12044 if op == constants.DDM_ADD:
12045 macaddr = params.get(constants.INIC_MAC, None)
12046 if macaddr is None:
12047 params[constants.INIC_MAC] = constants.VALUE_AUTO
12049 if constants.INIC_MAC in params:
12050 macaddr = params[constants.INIC_MAC]
12051 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12052 macaddr = utils.NormalizeAndValidateMac(macaddr)
12054 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12055 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12056 " modifying an existing NIC",
12057 errors.ECODE_INVAL)
12059 def CheckArguments(self):
12060 if not (self.op.nics or self.op.disks or self.op.disk_template or
12061 self.op.hvparams or self.op.beparams or self.op.os_name or
12062 self.op.offline is not None or self.op.runtime_mem):
12063 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12065 if self.op.hvparams:
12066 _CheckGlobalHvParams(self.op.hvparams)
12069 self._UpgradeDiskNicMods("disk", self.op.disks,
12070 opcodes.OpInstanceSetParams.TestDiskModifications)
12072 self._UpgradeDiskNicMods("NIC", self.op.nics,
12073 opcodes.OpInstanceSetParams.TestNicModifications)
12075 # Check disk modifications
12076 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12077 self._VerifyDiskModification)
12079 if self.op.disks and self.op.disk_template is not None:
12080 raise errors.OpPrereqError("Disk template conversion and other disk"
12081 " changes not supported at the same time",
12082 errors.ECODE_INVAL)
12084 if (self.op.disk_template and
12085 self.op.disk_template in constants.DTS_INT_MIRROR and
12086 self.op.remote_node is None):
12087 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12088 " one requires specifying a secondary node",
12089 errors.ECODE_INVAL)
12091 # Check NIC modifications
12092 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12093 self._VerifyNicModification)
12095 def ExpandNames(self):
12096 self._ExpandAndLockInstance()
12097 # Can't even acquire node locks in shared mode as upcoming changes in
12098 # Ganeti 2.6 will start to modify the node object on disk conversion
12099 self.needed_locks[locking.LEVEL_NODE] = []
12100 self.needed_locks[locking.LEVEL_NODE_RES] = []
12101 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12103 def DeclareLocks(self, level):
12104 # TODO: Acquire group lock in shared mode (disk parameters)
12105 if level == locking.LEVEL_NODE:
12106 self._LockInstancesNodes()
12107 if self.op.disk_template and self.op.remote_node:
12108 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12109 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12110 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12112 self.needed_locks[locking.LEVEL_NODE_RES] = \
12113 self.needed_locks[locking.LEVEL_NODE][:]
12115 def BuildHooksEnv(self):
12116 """Build hooks env.
12118 This runs on the master, primary and secondaries.
12122 if constants.BE_MINMEM in self.be_new:
12123 args["minmem"] = self.be_new[constants.BE_MINMEM]
12124 if constants.BE_MAXMEM in self.be_new:
12125 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12126 if constants.BE_VCPUS in self.be_new:
12127 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12128 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12129 # information at all.
12131 if self._new_nics is not None:
12134 for nic in self._new_nics:
12135 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12136 mode = nicparams[constants.NIC_MODE]
12137 link = nicparams[constants.NIC_LINK]
12138 nics.append((nic.ip, nic.mac, mode, link))
12140 args["nics"] = nics
12142 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12143 if self.op.disk_template:
12144 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12145 if self.op.runtime_mem:
12146 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12150 def BuildHooksNodes(self):
12151 """Build hooks nodes.
12154 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12157 def _PrepareNicModification(self, params, private, old_ip, old_params,
12159 update_params_dict = dict([(key, params[key])
12160 for key in constants.NICS_PARAMETERS
12163 if "bridge" in params:
12164 update_params_dict[constants.NIC_LINK] = params["bridge"]
12166 new_params = _GetUpdatedParams(old_params, update_params_dict)
12167 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12169 new_filled_params = cluster.SimpleFillNIC(new_params)
12170 objects.NIC.CheckParameterSyntax(new_filled_params)
12172 new_mode = new_filled_params[constants.NIC_MODE]
12173 if new_mode == constants.NIC_MODE_BRIDGED:
12174 bridge = new_filled_params[constants.NIC_LINK]
12175 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12177 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12179 self.warn.append(msg)
12181 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12183 elif new_mode == constants.NIC_MODE_ROUTED:
12184 ip = params.get(constants.INIC_IP, old_ip)
12186 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12187 " on a routed NIC", errors.ECODE_INVAL)
12189 if constants.INIC_MAC in params:
12190 mac = params[constants.INIC_MAC]
12192 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12193 errors.ECODE_INVAL)
12194 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12195 # otherwise generate the MAC address
12196 params[constants.INIC_MAC] = \
12197 self.cfg.GenerateMAC(self.proc.GetECId())
12199 # or validate/reserve the current one
12201 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12202 except errors.ReservationError:
12203 raise errors.OpPrereqError("MAC address '%s' already in use"
12204 " in cluster" % mac,
12205 errors.ECODE_NOTUNIQUE)
12207 private.params = new_params
12208 private.filled = new_filled_params
12210 return (None, None)
12212 def CheckPrereq(self):
12213 """Check prerequisites.
12215 This only checks the instance list against the existing names.
12218 # checking the new params on the primary/secondary nodes
12220 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12221 cluster = self.cluster = self.cfg.GetClusterInfo()
12222 assert self.instance is not None, \
12223 "Cannot retrieve locked instance %s" % self.op.instance_name
12224 pnode = instance.primary_node
12225 nodelist = list(instance.all_nodes)
12226 pnode_info = self.cfg.GetNodeInfo(pnode)
12227 self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
12229 # Prepare disk/NIC modifications
12230 self.diskmod = PrepareContainerMods(self.op.disks, None)
12231 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12234 if self.op.os_name and not self.op.force:
12235 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12236 self.op.force_variant)
12237 instance_os = self.op.os_name
12239 instance_os = instance.os
12241 assert not (self.op.disk_template and self.op.disks), \
12242 "Can't modify disk template and apply disk changes at the same time"
12244 if self.op.disk_template:
12245 if instance.disk_template == self.op.disk_template:
12246 raise errors.OpPrereqError("Instance already has disk template %s" %
12247 instance.disk_template, errors.ECODE_INVAL)
12249 if (instance.disk_template,
12250 self.op.disk_template) not in self._DISK_CONVERSIONS:
12251 raise errors.OpPrereqError("Unsupported disk template conversion from"
12252 " %s to %s" % (instance.disk_template,
12253 self.op.disk_template),
12254 errors.ECODE_INVAL)
12255 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12256 msg="cannot change disk template")
12257 if self.op.disk_template in constants.DTS_INT_MIRROR:
12258 if self.op.remote_node == pnode:
12259 raise errors.OpPrereqError("Given new secondary node %s is the same"
12260 " as the primary node of the instance" %
12261 self.op.remote_node, errors.ECODE_STATE)
12262 _CheckNodeOnline(self, self.op.remote_node)
12263 _CheckNodeNotDrained(self, self.op.remote_node)
12264 # FIXME: here we assume that the old instance type is DT_PLAIN
12265 assert instance.disk_template == constants.DT_PLAIN
12266 disks = [{constants.IDISK_SIZE: d.size,
12267 constants.IDISK_VG: d.logical_id[0]}
12268 for d in instance.disks]
12269 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12270 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12272 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12273 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12274 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12275 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12276 ignore=self.op.ignore_ipolicy)
12277 if pnode_info.group != snode_info.group:
12278 self.LogWarning("The primary and secondary nodes are in two"
12279 " different node groups; the disk parameters"
12280 " from the first disk's node group will be"
12283 # hvparams processing
12284 if self.op.hvparams:
12285 hv_type = instance.hypervisor
12286 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12287 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12288 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12291 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12292 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12293 self.hv_proposed = self.hv_new = hv_new # the new actual values
12294 self.hv_inst = i_hvdict # the new dict (without defaults)
12296 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12298 self.hv_new = self.hv_inst = {}
12300 # beparams processing
12301 if self.op.beparams:
12302 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12304 objects.UpgradeBeParams(i_bedict)
12305 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12306 be_new = cluster.SimpleFillBE(i_bedict)
12307 self.be_proposed = self.be_new = be_new # the new actual values
12308 self.be_inst = i_bedict # the new dict (without defaults)
12310 self.be_new = self.be_inst = {}
12311 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12312 be_old = cluster.FillBE(instance)
12314 # CPU param validation -- checking every time a paramtere is
12315 # changed to cover all cases where either CPU mask or vcpus have
12317 if (constants.BE_VCPUS in self.be_proposed and
12318 constants.HV_CPU_MASK in self.hv_proposed):
12320 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12321 # Verify mask is consistent with number of vCPUs. Can skip this
12322 # test if only 1 entry in the CPU mask, which means same mask
12323 # is applied to all vCPUs.
12324 if (len(cpu_list) > 1 and
12325 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12326 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12328 (self.be_proposed[constants.BE_VCPUS],
12329 self.hv_proposed[constants.HV_CPU_MASK]),
12330 errors.ECODE_INVAL)
12332 # Only perform this test if a new CPU mask is given
12333 if constants.HV_CPU_MASK in self.hv_new:
12334 # Calculate the largest CPU number requested
12335 max_requested_cpu = max(map(max, cpu_list))
12336 # Check that all of the instance's nodes have enough physical CPUs to
12337 # satisfy the requested CPU mask
12338 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12339 max_requested_cpu + 1, instance.hypervisor)
12341 # osparams processing
12342 if self.op.osparams:
12343 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12344 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12345 self.os_inst = i_osdict # the new dict (without defaults)
12351 #TODO(dynmem): do the appropriate check involving MINMEM
12352 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12353 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12354 mem_check_list = [pnode]
12355 if be_new[constants.BE_AUTO_BALANCE]:
12356 # either we changed auto_balance to yes or it was from before
12357 mem_check_list.extend(instance.secondary_nodes)
12358 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12359 instance.hypervisor)
12360 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12361 [instance.hypervisor])
12362 pninfo = nodeinfo[pnode]
12363 msg = pninfo.fail_msg
12365 # Assume the primary node is unreachable and go ahead
12366 self.warn.append("Can't get info from primary node %s: %s" %
12369 (_, _, (pnhvinfo, )) = pninfo.payload
12370 if not isinstance(pnhvinfo.get("memory_free", None), int):
12371 self.warn.append("Node data from primary node %s doesn't contain"
12372 " free memory information" % pnode)
12373 elif instance_info.fail_msg:
12374 self.warn.append("Can't get instance runtime information: %s" %
12375 instance_info.fail_msg)
12377 if instance_info.payload:
12378 current_mem = int(instance_info.payload["memory"])
12380 # Assume instance not running
12381 # (there is a slight race condition here, but it's not very
12382 # probable, and we have no other way to check)
12383 # TODO: Describe race condition
12385 #TODO(dynmem): do the appropriate check involving MINMEM
12386 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12387 pnhvinfo["memory_free"])
12389 raise errors.OpPrereqError("This change will prevent the instance"
12390 " from starting, due to %d MB of memory"
12391 " missing on its primary node" %
12393 errors.ECODE_NORES)
12395 if be_new[constants.BE_AUTO_BALANCE]:
12396 for node, nres in nodeinfo.items():
12397 if node not in instance.secondary_nodes:
12399 nres.Raise("Can't get info from secondary node %s" % node,
12400 prereq=True, ecode=errors.ECODE_STATE)
12401 (_, _, (nhvinfo, )) = nres.payload
12402 if not isinstance(nhvinfo.get("memory_free", None), int):
12403 raise errors.OpPrereqError("Secondary node %s didn't return free"
12404 " memory information" % node,
12405 errors.ECODE_STATE)
12406 #TODO(dynmem): do the appropriate check involving MINMEM
12407 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12408 raise errors.OpPrereqError("This change will prevent the instance"
12409 " from failover to its secondary node"
12410 " %s, due to not enough memory" % node,
12411 errors.ECODE_STATE)
12413 if self.op.runtime_mem:
12414 remote_info = self.rpc.call_instance_info(instance.primary_node,
12416 instance.hypervisor)
12417 remote_info.Raise("Error checking node %s" % instance.primary_node)
12418 if not remote_info.payload: # not running already
12419 raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12420 errors.ECODE_STATE)
12422 current_memory = remote_info.payload["memory"]
12423 if (not self.op.force and
12424 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12425 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12426 raise errors.OpPrereqError("Instance %s must have memory between %d"
12427 " and %d MB of memory unless --force is"
12428 " given" % (instance.name,
12429 self.be_proposed[constants.BE_MINMEM],
12430 self.be_proposed[constants.BE_MAXMEM]),
12431 errors.ECODE_INVAL)
12433 if self.op.runtime_mem > current_memory:
12434 _CheckNodeFreeMemory(self, instance.primary_node,
12435 "ballooning memory for instance %s" %
12437 self.op.memory - current_memory,
12438 instance.hypervisor)
12440 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12441 raise errors.OpPrereqError("Disk operations not supported for"
12442 " diskless instances",
12443 errors.ECODE_INVAL)
12445 def _PrepareNicCreate(_, params, private):
12446 return self._PrepareNicModification(params, private, None, {},
12449 def _PrepareNicMod(_, nic, params, private):
12450 return self._PrepareNicModification(params, private, nic.ip,
12451 nic.nicparams, cluster, pnode)
12453 # Verify NIC changes (operating on copy)
12454 nics = instance.nics[:]
12455 ApplyContainerMods("NIC", nics, None, self.nicmod,
12456 _PrepareNicCreate, _PrepareNicMod, None)
12457 if len(nics) > constants.MAX_NICS:
12458 raise errors.OpPrereqError("Instance has too many network interfaces"
12459 " (%d), cannot add more" % constants.MAX_NICS,
12460 errors.ECODE_STATE)
12462 # Verify disk changes (operating on a copy)
12463 disks = instance.disks[:]
12464 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12465 if len(disks) > constants.MAX_DISKS:
12466 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12467 " more" % constants.MAX_DISKS,
12468 errors.ECODE_STATE)
12470 if self.op.offline is not None:
12471 if self.op.offline:
12472 msg = "can't change to offline"
12474 msg = "can't change to online"
12475 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12477 # Pre-compute NIC changes (necessary to use result in hooks)
12478 self._nic_chgdesc = []
12480 # Operate on copies as this is still in prereq
12481 nics = [nic.Copy() for nic in instance.nics]
12482 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12483 self._CreateNewNic, self._ApplyNicMods, None)
12484 self._new_nics = nics
12486 self._new_nics = None
12488 def _ConvertPlainToDrbd(self, feedback_fn):
12489 """Converts an instance from plain to drbd.
12492 feedback_fn("Converting template to drbd")
12493 instance = self.instance
12494 pnode = instance.primary_node
12495 snode = self.op.remote_node
12497 assert instance.disk_template == constants.DT_PLAIN
12499 # create a fake disk info for _GenerateDiskTemplate
12500 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12501 constants.IDISK_VG: d.logical_id[0]}
12502 for d in instance.disks]
12503 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12504 instance.name, pnode, [snode],
12505 disk_info, None, None, 0, feedback_fn,
12507 info = _GetInstanceInfoText(instance)
12508 feedback_fn("Creating aditional volumes...")
12509 # first, create the missing data and meta devices
12510 for disk in new_disks:
12511 # unfortunately this is... not too nice
12512 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12514 for child in disk.children:
12515 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12516 # at this stage, all new LVs have been created, we can rename the
12518 feedback_fn("Renaming original volumes...")
12519 rename_list = [(o, n.children[0].logical_id)
12520 for (o, n) in zip(instance.disks, new_disks)]
12521 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12522 result.Raise("Failed to rename original LVs")
12524 feedback_fn("Initializing DRBD devices...")
12525 # all child devices are in place, we can now create the DRBD devices
12526 for disk in new_disks:
12527 for node in [pnode, snode]:
12528 f_create = node == pnode
12529 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12531 # at this point, the instance has been modified
12532 instance.disk_template = constants.DT_DRBD8
12533 instance.disks = new_disks
12534 self.cfg.Update(instance, feedback_fn)
12536 # Release node locks while waiting for sync
12537 _ReleaseLocks(self, locking.LEVEL_NODE)
12539 # disks are created, waiting for sync
12540 disk_abort = not _WaitForSync(self, instance,
12541 oneshot=not self.op.wait_for_sync)
12543 raise errors.OpExecError("There are some degraded disks for"
12544 " this instance, please cleanup manually")
12546 # Node resource locks will be released by caller
12548 def _ConvertDrbdToPlain(self, feedback_fn):
12549 """Converts an instance from drbd to plain.
12552 instance = self.instance
12554 assert len(instance.secondary_nodes) == 1
12555 assert instance.disk_template == constants.DT_DRBD8
12557 pnode = instance.primary_node
12558 snode = instance.secondary_nodes[0]
12559 feedback_fn("Converting template to plain")
12561 old_disks = instance.disks
12562 new_disks = [d.children[0] for d in old_disks]
12564 # copy over size and mode
12565 for parent, child in zip(old_disks, new_disks):
12566 child.size = parent.size
12567 child.mode = parent.mode
12569 # update instance structure
12570 instance.disks = new_disks
12571 instance.disk_template = constants.DT_PLAIN
12572 self.cfg.Update(instance, feedback_fn)
12574 # Release locks in case removing disks takes a while
12575 _ReleaseLocks(self, locking.LEVEL_NODE)
12577 feedback_fn("Removing volumes on the secondary node...")
12578 for disk in old_disks:
12579 self.cfg.SetDiskID(disk, snode)
12580 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12582 self.LogWarning("Could not remove block device %s on node %s,"
12583 " continuing anyway: %s", disk.iv_name, snode, msg)
12585 feedback_fn("Removing unneeded volumes on the primary node...")
12586 for idx, disk in enumerate(old_disks):
12587 meta = disk.children[1]
12588 self.cfg.SetDiskID(meta, pnode)
12589 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12591 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12592 " continuing anyway: %s", idx, pnode, msg)
12594 # this is a DRBD disk, return its port to the pool
12595 for disk in old_disks:
12596 tcp_port = disk.logical_id[2]
12597 self.cfg.AddTcpUdpPort(tcp_port)
12599 # Node resource locks will be released by caller
12601 def _CreateNewDisk(self, idx, params, _):
12602 """Creates a new disk.
12605 instance = self.instance
12608 if instance.disk_template in constants.DTS_FILEBASED:
12609 (file_driver, file_path) = instance.disks[0].logical_id
12610 file_path = os.path.dirname(file_path)
12612 file_driver = file_path = None
12615 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12616 instance.primary_node, instance.secondary_nodes,
12617 [params], file_path, file_driver, idx,
12618 self.Log, self.diskparams)[0]
12620 info = _GetInstanceInfoText(instance)
12622 logging.info("Creating volume %s for instance %s",
12623 disk.iv_name, instance.name)
12624 # Note: this needs to be kept in sync with _CreateDisks
12626 for node in instance.all_nodes:
12627 f_create = (node == instance.primary_node)
12629 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12630 except errors.OpExecError, err:
12631 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12632 disk.iv_name, disk, node, err)
12635 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12639 def _ModifyDisk(idx, disk, params, _):
12640 """Modifies a disk.
12643 disk.mode = params[constants.IDISK_MODE]
12646 ("disk.mode/%d" % idx, disk.mode),
12649 def _RemoveDisk(self, idx, root, _):
12653 for node, disk in root.ComputeNodeTree(self.instance.primary_node):
12654 self.cfg.SetDiskID(disk, node)
12655 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12657 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12658 " continuing anyway", idx, node, msg)
12660 # if this is a DRBD disk, return its port to the pool
12661 if root.dev_type in constants.LDS_DRBD:
12662 self.cfg.AddTcpUdpPort(root.logical_id[2])
12665 def _CreateNewNic(idx, params, private):
12666 """Creates data structure for a new network interface.
12669 mac = params[constants.INIC_MAC]
12670 ip = params.get(constants.INIC_IP, None)
12671 nicparams = private.params
12673 return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12675 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12676 (mac, ip, private.filled[constants.NIC_MODE],
12677 private.filled[constants.NIC_LINK])),
12681 def _ApplyNicMods(idx, nic, params, private):
12682 """Modifies a network interface.
12687 for key in [constants.INIC_MAC, constants.INIC_IP]:
12689 changes.append(("nic.%s/%d" % (key, idx), params[key]))
12690 setattr(nic, key, params[key])
12693 nic.nicparams = private.params
12695 for (key, val) in params.items():
12696 changes.append(("nic.%s/%d" % (key, idx), val))
12700 def Exec(self, feedback_fn):
12701 """Modifies an instance.
12703 All parameters take effect only at the next restart of the instance.
12706 # Process here the warnings from CheckPrereq, as we don't have a
12707 # feedback_fn there.
12708 # TODO: Replace with self.LogWarning
12709 for warn in self.warn:
12710 feedback_fn("WARNING: %s" % warn)
12712 assert ((self.op.disk_template is None) ^
12713 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12714 "Not owning any node resource locks"
12717 instance = self.instance
12720 if self.op.runtime_mem:
12721 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12723 self.op.runtime_mem)
12724 rpcres.Raise("Cannot modify instance runtime memory")
12725 result.append(("runtime_memory", self.op.runtime_mem))
12727 # Apply disk changes
12728 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12729 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12730 _UpdateIvNames(0, instance.disks)
12732 if self.op.disk_template:
12734 check_nodes = set(instance.all_nodes)
12735 if self.op.remote_node:
12736 check_nodes.add(self.op.remote_node)
12737 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12738 owned = self.owned_locks(level)
12739 assert not (check_nodes - owned), \
12740 ("Not owning the correct locks, owning %r, expected at least %r" %
12741 (owned, check_nodes))
12743 r_shut = _ShutdownInstanceDisks(self, instance)
12745 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12746 " proceed with disk template conversion")
12747 mode = (instance.disk_template, self.op.disk_template)
12749 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12751 self.cfg.ReleaseDRBDMinors(instance.name)
12753 result.append(("disk_template", self.op.disk_template))
12755 assert instance.disk_template == self.op.disk_template, \
12756 ("Expected disk template '%s', found '%s'" %
12757 (self.op.disk_template, instance.disk_template))
12759 # Release node and resource locks if there are any (they might already have
12760 # been released during disk conversion)
12761 _ReleaseLocks(self, locking.LEVEL_NODE)
12762 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12764 # Apply NIC changes
12765 if self._new_nics is not None:
12766 instance.nics = self._new_nics
12767 result.extend(self._nic_chgdesc)
12770 if self.op.hvparams:
12771 instance.hvparams = self.hv_inst
12772 for key, val in self.op.hvparams.iteritems():
12773 result.append(("hv/%s" % key, val))
12776 if self.op.beparams:
12777 instance.beparams = self.be_inst
12778 for key, val in self.op.beparams.iteritems():
12779 result.append(("be/%s" % key, val))
12782 if self.op.os_name:
12783 instance.os = self.op.os_name
12786 if self.op.osparams:
12787 instance.osparams = self.os_inst
12788 for key, val in self.op.osparams.iteritems():
12789 result.append(("os/%s" % key, val))
12791 if self.op.offline is None:
12794 elif self.op.offline:
12795 # Mark instance as offline
12796 self.cfg.MarkInstanceOffline(instance.name)
12797 result.append(("admin_state", constants.ADMINST_OFFLINE))
12799 # Mark instance as online, but stopped
12800 self.cfg.MarkInstanceDown(instance.name)
12801 result.append(("admin_state", constants.ADMINST_DOWN))
12803 self.cfg.Update(instance, feedback_fn)
12805 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12806 self.owned_locks(locking.LEVEL_NODE)), \
12807 "All node locks should have been released by now"
12811 _DISK_CONVERSIONS = {
12812 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12813 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12817 class LUInstanceChangeGroup(LogicalUnit):
12818 HPATH = "instance-change-group"
12819 HTYPE = constants.HTYPE_INSTANCE
12822 def ExpandNames(self):
12823 self.share_locks = _ShareAll()
12824 self.needed_locks = {
12825 locking.LEVEL_NODEGROUP: [],
12826 locking.LEVEL_NODE: [],
12829 self._ExpandAndLockInstance()
12831 if self.op.target_groups:
12832 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12833 self.op.target_groups)
12835 self.req_target_uuids = None
12837 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12839 def DeclareLocks(self, level):
12840 if level == locking.LEVEL_NODEGROUP:
12841 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12843 if self.req_target_uuids:
12844 lock_groups = set(self.req_target_uuids)
12846 # Lock all groups used by instance optimistically; this requires going
12847 # via the node before it's locked, requiring verification later on
12848 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12849 lock_groups.update(instance_groups)
12851 # No target groups, need to lock all of them
12852 lock_groups = locking.ALL_SET
12854 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12856 elif level == locking.LEVEL_NODE:
12857 if self.req_target_uuids:
12858 # Lock all nodes used by instances
12859 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12860 self._LockInstancesNodes()
12862 # Lock all nodes in all potential target groups
12863 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12864 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12865 member_nodes = [node_name
12866 for group in lock_groups
12867 for node_name in self.cfg.GetNodeGroup(group).members]
12868 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12870 # Lock all nodes as all groups are potential targets
12871 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12873 def CheckPrereq(self):
12874 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12875 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12876 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12878 assert (self.req_target_uuids is None or
12879 owned_groups.issuperset(self.req_target_uuids))
12880 assert owned_instances == set([self.op.instance_name])
12882 # Get instance information
12883 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12885 # Check if node groups for locked instance are still correct
12886 assert owned_nodes.issuperset(self.instance.all_nodes), \
12887 ("Instance %s's nodes changed while we kept the lock" %
12888 self.op.instance_name)
12890 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12893 if self.req_target_uuids:
12894 # User requested specific target groups
12895 self.target_uuids = self.req_target_uuids
12897 # All groups except those used by the instance are potential targets
12898 self.target_uuids = owned_groups - inst_groups
12900 conflicting_groups = self.target_uuids & inst_groups
12901 if conflicting_groups:
12902 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12903 " used by the instance '%s'" %
12904 (utils.CommaJoin(conflicting_groups),
12905 self.op.instance_name),
12906 errors.ECODE_INVAL)
12908 if not self.target_uuids:
12909 raise errors.OpPrereqError("There are no possible target groups",
12910 errors.ECODE_INVAL)
12912 def BuildHooksEnv(self):
12913 """Build hooks env.
12916 assert self.target_uuids
12919 "TARGET_GROUPS": " ".join(self.target_uuids),
12922 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12926 def BuildHooksNodes(self):
12927 """Build hooks nodes.
12930 mn = self.cfg.GetMasterNode()
12931 return ([mn], [mn])
12933 def Exec(self, feedback_fn):
12934 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12936 assert instances == [self.op.instance_name], "Instance not locked"
12938 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12939 instances=instances, target_groups=list(self.target_uuids))
12941 ial.Run(self.op.iallocator)
12943 if not ial.success:
12944 raise errors.OpPrereqError("Can't compute solution for changing group of"
12945 " instance '%s' using iallocator '%s': %s" %
12946 (self.op.instance_name, self.op.iallocator,
12948 errors.ECODE_NORES)
12950 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12952 self.LogInfo("Iallocator returned %s job(s) for changing group of"
12953 " instance '%s'", len(jobs), self.op.instance_name)
12955 return ResultWithJobs(jobs)
12958 class LUBackupQuery(NoHooksLU):
12959 """Query the exports list
12964 def ExpandNames(self):
12965 self.needed_locks = {}
12966 self.share_locks[locking.LEVEL_NODE] = 1
12967 if not self.op.nodes:
12968 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12970 self.needed_locks[locking.LEVEL_NODE] = \
12971 _GetWantedNodes(self, self.op.nodes)
12973 def Exec(self, feedback_fn):
12974 """Compute the list of all the exported system images.
12977 @return: a dictionary with the structure node->(export-list)
12978 where export-list is a list of the instances exported on
12982 self.nodes = self.owned_locks(locking.LEVEL_NODE)
12983 rpcresult = self.rpc.call_export_list(self.nodes)
12985 for node in rpcresult:
12986 if rpcresult[node].fail_msg:
12987 result[node] = False
12989 result[node] = rpcresult[node].payload
12994 class LUBackupPrepare(NoHooksLU):
12995 """Prepares an instance for an export and returns useful information.
13000 def ExpandNames(self):
13001 self._ExpandAndLockInstance()
13003 def CheckPrereq(self):
13004 """Check prerequisites.
13007 instance_name = self.op.instance_name
13009 self.instance = self.cfg.GetInstanceInfo(instance_name)
13010 assert self.instance is not None, \
13011 "Cannot retrieve locked instance %s" % self.op.instance_name
13012 _CheckNodeOnline(self, self.instance.primary_node)
13014 self._cds = _GetClusterDomainSecret()
13016 def Exec(self, feedback_fn):
13017 """Prepares an instance for an export.
13020 instance = self.instance
13022 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13023 salt = utils.GenerateSecret(8)
13025 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13026 result = self.rpc.call_x509_cert_create(instance.primary_node,
13027 constants.RIE_CERT_VALIDITY)
13028 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13030 (name, cert_pem) = result.payload
13032 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13036 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13037 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13039 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13045 class LUBackupExport(LogicalUnit):
13046 """Export an instance to an image in the cluster.
13049 HPATH = "instance-export"
13050 HTYPE = constants.HTYPE_INSTANCE
13053 def CheckArguments(self):
13054 """Check the arguments.
13057 self.x509_key_name = self.op.x509_key_name
13058 self.dest_x509_ca_pem = self.op.destination_x509_ca
13060 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13061 if not self.x509_key_name:
13062 raise errors.OpPrereqError("Missing X509 key name for encryption",
13063 errors.ECODE_INVAL)
13065 if not self.dest_x509_ca_pem:
13066 raise errors.OpPrereqError("Missing destination X509 CA",
13067 errors.ECODE_INVAL)
13069 def ExpandNames(self):
13070 self._ExpandAndLockInstance()
13072 # Lock all nodes for local exports
13073 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13074 # FIXME: lock only instance primary and destination node
13076 # Sad but true, for now we have do lock all nodes, as we don't know where
13077 # the previous export might be, and in this LU we search for it and
13078 # remove it from its current node. In the future we could fix this by:
13079 # - making a tasklet to search (share-lock all), then create the
13080 # new one, then one to remove, after
13081 # - removing the removal operation altogether
13082 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13084 def DeclareLocks(self, level):
13085 """Last minute lock declaration."""
13086 # All nodes are locked anyway, so nothing to do here.
13088 def BuildHooksEnv(self):
13089 """Build hooks env.
13091 This will run on the master, primary node and target node.
13095 "EXPORT_MODE": self.op.mode,
13096 "EXPORT_NODE": self.op.target_node,
13097 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13098 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13099 # TODO: Generic function for boolean env variables
13100 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13103 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13107 def BuildHooksNodes(self):
13108 """Build hooks nodes.
13111 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13113 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13114 nl.append(self.op.target_node)
13118 def CheckPrereq(self):
13119 """Check prerequisites.
13121 This checks that the instance and node names are valid.
13124 instance_name = self.op.instance_name
13126 self.instance = self.cfg.GetInstanceInfo(instance_name)
13127 assert self.instance is not None, \
13128 "Cannot retrieve locked instance %s" % self.op.instance_name
13129 _CheckNodeOnline(self, self.instance.primary_node)
13131 if (self.op.remove_instance and
13132 self.instance.admin_state == constants.ADMINST_UP and
13133 not self.op.shutdown):
13134 raise errors.OpPrereqError("Can not remove instance without shutting it"
13137 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13138 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13139 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13140 assert self.dst_node is not None
13142 _CheckNodeOnline(self, self.dst_node.name)
13143 _CheckNodeNotDrained(self, self.dst_node.name)
13146 self.dest_disk_info = None
13147 self.dest_x509_ca = None
13149 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13150 self.dst_node = None
13152 if len(self.op.target_node) != len(self.instance.disks):
13153 raise errors.OpPrereqError(("Received destination information for %s"
13154 " disks, but instance %s has %s disks") %
13155 (len(self.op.target_node), instance_name,
13156 len(self.instance.disks)),
13157 errors.ECODE_INVAL)
13159 cds = _GetClusterDomainSecret()
13161 # Check X509 key name
13163 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13164 except (TypeError, ValueError), err:
13165 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13167 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13168 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13169 errors.ECODE_INVAL)
13171 # Load and verify CA
13173 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13174 except OpenSSL.crypto.Error, err:
13175 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13176 (err, ), errors.ECODE_INVAL)
13178 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13179 if errcode is not None:
13180 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13181 (msg, ), errors.ECODE_INVAL)
13183 self.dest_x509_ca = cert
13185 # Verify target information
13187 for idx, disk_data in enumerate(self.op.target_node):
13189 (host, port, magic) = \
13190 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13191 except errors.GenericError, err:
13192 raise errors.OpPrereqError("Target info for disk %s: %s" %
13193 (idx, err), errors.ECODE_INVAL)
13195 disk_info.append((host, port, magic))
13197 assert len(disk_info) == len(self.op.target_node)
13198 self.dest_disk_info = disk_info
13201 raise errors.ProgrammerError("Unhandled export mode %r" %
13204 # instance disk type verification
13205 # TODO: Implement export support for file-based disks
13206 for disk in self.instance.disks:
13207 if disk.dev_type == constants.LD_FILE:
13208 raise errors.OpPrereqError("Export not supported for instances with"
13209 " file-based disks", errors.ECODE_INVAL)
13211 def _CleanupExports(self, feedback_fn):
13212 """Removes exports of current instance from all other nodes.
13214 If an instance in a cluster with nodes A..D was exported to node C, its
13215 exports will be removed from the nodes A, B and D.
13218 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13220 nodelist = self.cfg.GetNodeList()
13221 nodelist.remove(self.dst_node.name)
13223 # on one-node clusters nodelist will be empty after the removal
13224 # if we proceed the backup would be removed because OpBackupQuery
13225 # substitutes an empty list with the full cluster node list.
13226 iname = self.instance.name
13228 feedback_fn("Removing old exports for instance %s" % iname)
13229 exportlist = self.rpc.call_export_list(nodelist)
13230 for node in exportlist:
13231 if exportlist[node].fail_msg:
13233 if iname in exportlist[node].payload:
13234 msg = self.rpc.call_export_remove(node, iname).fail_msg
13236 self.LogWarning("Could not remove older export for instance %s"
13237 " on node %s: %s", iname, node, msg)
13239 def Exec(self, feedback_fn):
13240 """Export an instance to an image in the cluster.
13243 assert self.op.mode in constants.EXPORT_MODES
13245 instance = self.instance
13246 src_node = instance.primary_node
13248 if self.op.shutdown:
13249 # shutdown the instance, but not the disks
13250 feedback_fn("Shutting down instance %s" % instance.name)
13251 result = self.rpc.call_instance_shutdown(src_node, instance,
13252 self.op.shutdown_timeout)
13253 # TODO: Maybe ignore failures if ignore_remove_failures is set
13254 result.Raise("Could not shutdown instance %s on"
13255 " node %s" % (instance.name, src_node))
13257 # set the disks ID correctly since call_instance_start needs the
13258 # correct drbd minor to create the symlinks
13259 for disk in instance.disks:
13260 self.cfg.SetDiskID(disk, src_node)
13262 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13265 # Activate the instance disks if we'exporting a stopped instance
13266 feedback_fn("Activating disks for %s" % instance.name)
13267 _StartInstanceDisks(self, instance, None)
13270 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13273 helper.CreateSnapshots()
13275 if (self.op.shutdown and
13276 instance.admin_state == constants.ADMINST_UP and
13277 not self.op.remove_instance):
13278 assert not activate_disks
13279 feedback_fn("Starting instance %s" % instance.name)
13280 result = self.rpc.call_instance_start(src_node,
13281 (instance, None, None), False)
13282 msg = result.fail_msg
13284 feedback_fn("Failed to start instance: %s" % msg)
13285 _ShutdownInstanceDisks(self, instance)
13286 raise errors.OpExecError("Could not start instance: %s" % msg)
13288 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13289 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13290 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13291 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13292 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13294 (key_name, _, _) = self.x509_key_name
13297 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13300 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13301 key_name, dest_ca_pem,
13306 # Check for backwards compatibility
13307 assert len(dresults) == len(instance.disks)
13308 assert compat.all(isinstance(i, bool) for i in dresults), \
13309 "Not all results are boolean: %r" % dresults
13313 feedback_fn("Deactivating disks for %s" % instance.name)
13314 _ShutdownInstanceDisks(self, instance)
13316 if not (compat.all(dresults) and fin_resu):
13319 failures.append("export finalization")
13320 if not compat.all(dresults):
13321 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13323 failures.append("disk export: disk(s) %s" % fdsk)
13325 raise errors.OpExecError("Export failed, errors in %s" %
13326 utils.CommaJoin(failures))
13328 # At this point, the export was successful, we can cleanup/finish
13330 # Remove instance if requested
13331 if self.op.remove_instance:
13332 feedback_fn("Removing instance %s" % instance.name)
13333 _RemoveInstance(self, feedback_fn, instance,
13334 self.op.ignore_remove_failures)
13336 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13337 self._CleanupExports(feedback_fn)
13339 return fin_resu, dresults
13342 class LUBackupRemove(NoHooksLU):
13343 """Remove exports related to the named instance.
13348 def ExpandNames(self):
13349 self.needed_locks = {}
13350 # We need all nodes to be locked in order for RemoveExport to work, but we
13351 # don't need to lock the instance itself, as nothing will happen to it (and
13352 # we can remove exports also for a removed instance)
13353 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13355 def Exec(self, feedback_fn):
13356 """Remove any export.
13359 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13360 # If the instance was not found we'll try with the name that was passed in.
13361 # This will only work if it was an FQDN, though.
13363 if not instance_name:
13365 instance_name = self.op.instance_name
13367 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13368 exportlist = self.rpc.call_export_list(locked_nodes)
13370 for node in exportlist:
13371 msg = exportlist[node].fail_msg
13373 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13375 if instance_name in exportlist[node].payload:
13377 result = self.rpc.call_export_remove(node, instance_name)
13378 msg = result.fail_msg
13380 logging.error("Could not remove export for instance %s"
13381 " on node %s: %s", instance_name, node, msg)
13383 if fqdn_warn and not found:
13384 feedback_fn("Export not found. If trying to remove an export belonging"
13385 " to a deleted instance please use its Fully Qualified"
13389 class LUGroupAdd(LogicalUnit):
13390 """Logical unit for creating node groups.
13393 HPATH = "group-add"
13394 HTYPE = constants.HTYPE_GROUP
13397 def ExpandNames(self):
13398 # We need the new group's UUID here so that we can create and acquire the
13399 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13400 # that it should not check whether the UUID exists in the configuration.
13401 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13402 self.needed_locks = {}
13403 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13405 def CheckPrereq(self):
13406 """Check prerequisites.
13408 This checks that the given group name is not an existing node group
13413 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13414 except errors.OpPrereqError:
13417 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13418 " node group (UUID: %s)" %
13419 (self.op.group_name, existing_uuid),
13420 errors.ECODE_EXISTS)
13422 if self.op.ndparams:
13423 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13425 if self.op.hv_state:
13426 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13428 self.new_hv_state = None
13430 if self.op.disk_state:
13431 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13433 self.new_disk_state = None
13435 if self.op.diskparams:
13436 for templ in constants.DISK_TEMPLATES:
13437 if templ not in self.op.diskparams:
13438 self.op.diskparams[templ] = {}
13439 utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
13441 self.op.diskparams = self.cfg.GetClusterInfo().diskparams
13443 if self.op.ipolicy:
13444 cluster = self.cfg.GetClusterInfo()
13445 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13447 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
13448 except errors.ConfigurationError, err:
13449 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13450 errors.ECODE_INVAL)
13452 def BuildHooksEnv(self):
13453 """Build hooks env.
13457 "GROUP_NAME": self.op.group_name,
13460 def BuildHooksNodes(self):
13461 """Build hooks nodes.
13464 mn = self.cfg.GetMasterNode()
13465 return ([mn], [mn])
13467 def Exec(self, feedback_fn):
13468 """Add the node group to the cluster.
13471 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13472 uuid=self.group_uuid,
13473 alloc_policy=self.op.alloc_policy,
13474 ndparams=self.op.ndparams,
13475 diskparams=self.op.diskparams,
13476 ipolicy=self.op.ipolicy,
13477 hv_state_static=self.new_hv_state,
13478 disk_state_static=self.new_disk_state)
13480 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13481 del self.remove_locks[locking.LEVEL_NODEGROUP]
13484 class LUGroupAssignNodes(NoHooksLU):
13485 """Logical unit for assigning nodes to groups.
13490 def ExpandNames(self):
13491 # These raise errors.OpPrereqError on their own:
13492 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13493 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13495 # We want to lock all the affected nodes and groups. We have readily
13496 # available the list of nodes, and the *destination* group. To gather the
13497 # list of "source" groups, we need to fetch node information later on.
13498 self.needed_locks = {
13499 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13500 locking.LEVEL_NODE: self.op.nodes,
13503 def DeclareLocks(self, level):
13504 if level == locking.LEVEL_NODEGROUP:
13505 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13507 # Try to get all affected nodes' groups without having the group or node
13508 # lock yet. Needs verification later in the code flow.
13509 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13511 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13513 def CheckPrereq(self):
13514 """Check prerequisites.
13517 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13518 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13519 frozenset(self.op.nodes))
13521 expected_locks = (set([self.group_uuid]) |
13522 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13523 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13524 if actual_locks != expected_locks:
13525 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13526 " current groups are '%s', used to be '%s'" %
13527 (utils.CommaJoin(expected_locks),
13528 utils.CommaJoin(actual_locks)))
13530 self.node_data = self.cfg.GetAllNodesInfo()
13531 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13532 instance_data = self.cfg.GetAllInstancesInfo()
13534 if self.group is None:
13535 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13536 (self.op.group_name, self.group_uuid))
13538 (new_splits, previous_splits) = \
13539 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13540 for node in self.op.nodes],
13541 self.node_data, instance_data)
13544 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13546 if not self.op.force:
13547 raise errors.OpExecError("The following instances get split by this"
13548 " change and --force was not given: %s" %
13551 self.LogWarning("This operation will split the following instances: %s",
13554 if previous_splits:
13555 self.LogWarning("In addition, these already-split instances continue"
13556 " to be split across groups: %s",
13557 utils.CommaJoin(utils.NiceSort(previous_splits)))
13559 def Exec(self, feedback_fn):
13560 """Assign nodes to a new group.
13563 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13565 self.cfg.AssignGroupNodes(mods)
13568 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13569 """Check for split instances after a node assignment.
13571 This method considers a series of node assignments as an atomic operation,
13572 and returns information about split instances after applying the set of
13575 In particular, it returns information about newly split instances, and
13576 instances that were already split, and remain so after the change.
13578 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13581 @type changes: list of (node_name, new_group_uuid) pairs.
13582 @param changes: list of node assignments to consider.
13583 @param node_data: a dict with data for all nodes
13584 @param instance_data: a dict with all instances to consider
13585 @rtype: a two-tuple
13586 @return: a list of instances that were previously okay and result split as a
13587 consequence of this change, and a list of instances that were previously
13588 split and this change does not fix.
13591 changed_nodes = dict((node, group) for node, group in changes
13592 if node_data[node].group != group)
13594 all_split_instances = set()
13595 previously_split_instances = set()
13597 def InstanceNodes(instance):
13598 return [instance.primary_node] + list(instance.secondary_nodes)
13600 for inst in instance_data.values():
13601 if inst.disk_template not in constants.DTS_INT_MIRROR:
13604 instance_nodes = InstanceNodes(inst)
13606 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13607 previously_split_instances.add(inst.name)
13609 if len(set(changed_nodes.get(node, node_data[node].group)
13610 for node in instance_nodes)) > 1:
13611 all_split_instances.add(inst.name)
13613 return (list(all_split_instances - previously_split_instances),
13614 list(previously_split_instances & all_split_instances))
13617 class _GroupQuery(_QueryBase):
13618 FIELDS = query.GROUP_FIELDS
13620 def ExpandNames(self, lu):
13621 lu.needed_locks = {}
13623 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13624 self._cluster = lu.cfg.GetClusterInfo()
13625 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13628 self.wanted = [name_to_uuid[name]
13629 for name in utils.NiceSort(name_to_uuid.keys())]
13631 # Accept names to be either names or UUIDs.
13634 all_uuid = frozenset(self._all_groups.keys())
13636 for name in self.names:
13637 if name in all_uuid:
13638 self.wanted.append(name)
13639 elif name in name_to_uuid:
13640 self.wanted.append(name_to_uuid[name])
13642 missing.append(name)
13645 raise errors.OpPrereqError("Some groups do not exist: %s" %
13646 utils.CommaJoin(missing),
13647 errors.ECODE_NOENT)
13649 def DeclareLocks(self, lu, level):
13652 def _GetQueryData(self, lu):
13653 """Computes the list of node groups and their attributes.
13656 do_nodes = query.GQ_NODE in self.requested_data
13657 do_instances = query.GQ_INST in self.requested_data
13659 group_to_nodes = None
13660 group_to_instances = None
13662 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13663 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13664 # latter GetAllInstancesInfo() is not enough, for we have to go through
13665 # instance->node. Hence, we will need to process nodes even if we only need
13666 # instance information.
13667 if do_nodes or do_instances:
13668 all_nodes = lu.cfg.GetAllNodesInfo()
13669 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13672 for node in all_nodes.values():
13673 if node.group in group_to_nodes:
13674 group_to_nodes[node.group].append(node.name)
13675 node_to_group[node.name] = node.group
13678 all_instances = lu.cfg.GetAllInstancesInfo()
13679 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13681 for instance in all_instances.values():
13682 node = instance.primary_node
13683 if node in node_to_group:
13684 group_to_instances[node_to_group[node]].append(instance.name)
13687 # Do not pass on node information if it was not requested.
13688 group_to_nodes = None
13690 return query.GroupQueryData(self._cluster,
13691 [self._all_groups[uuid]
13692 for uuid in self.wanted],
13693 group_to_nodes, group_to_instances)
13696 class LUGroupQuery(NoHooksLU):
13697 """Logical unit for querying node groups.
13702 def CheckArguments(self):
13703 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13704 self.op.output_fields, False)
13706 def ExpandNames(self):
13707 self.gq.ExpandNames(self)
13709 def DeclareLocks(self, level):
13710 self.gq.DeclareLocks(self, level)
13712 def Exec(self, feedback_fn):
13713 return self.gq.OldStyleQuery(self)
13716 class LUGroupSetParams(LogicalUnit):
13717 """Modifies the parameters of a node group.
13720 HPATH = "group-modify"
13721 HTYPE = constants.HTYPE_GROUP
13724 def CheckArguments(self):
13727 self.op.diskparams,
13728 self.op.alloc_policy,
13730 self.op.disk_state,
13734 if all_changes.count(None) == len(all_changes):
13735 raise errors.OpPrereqError("Please pass at least one modification",
13736 errors.ECODE_INVAL)
13738 def ExpandNames(self):
13739 # This raises errors.OpPrereqError on its own:
13740 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13742 self.needed_locks = {
13743 locking.LEVEL_INSTANCE: [],
13744 locking.LEVEL_NODEGROUP: [self.group_uuid],
13747 self.share_locks[locking.LEVEL_INSTANCE] = 1
13749 def DeclareLocks(self, level):
13750 if level == locking.LEVEL_INSTANCE:
13751 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13753 # Lock instances optimistically, needs verification once group lock has
13755 self.needed_locks[locking.LEVEL_INSTANCE] = \
13756 self.cfg.GetNodeGroupInstances(self.group_uuid)
13758 def CheckPrereq(self):
13759 """Check prerequisites.
13762 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13764 # Check if locked instances are still correct
13765 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13767 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13768 cluster = self.cfg.GetClusterInfo()
13770 if self.group is None:
13771 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13772 (self.op.group_name, self.group_uuid))
13774 if self.op.ndparams:
13775 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13776 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13777 self.new_ndparams = new_ndparams
13779 if self.op.diskparams:
13780 self.new_diskparams = dict()
13781 for templ in constants.DISK_TEMPLATES:
13782 if templ not in self.op.diskparams:
13783 self.op.diskparams[templ] = {}
13784 new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13785 self.op.diskparams[templ])
13786 utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13787 self.new_diskparams[templ] = new_templ_params
13789 if self.op.hv_state:
13790 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13791 self.group.hv_state_static)
13793 if self.op.disk_state:
13794 self.new_disk_state = \
13795 _MergeAndVerifyDiskState(self.op.disk_state,
13796 self.group.disk_state_static)
13798 if self.op.ipolicy:
13799 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
13803 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
13804 inst_filter = lambda inst: inst.name in owned_instances
13805 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
13807 _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
13809 new_ipolicy, instances)
13812 self.LogWarning("After the ipolicy change the following instances"
13813 " violate them: %s",
13814 utils.CommaJoin(violations))
13816 def BuildHooksEnv(self):
13817 """Build hooks env.
13821 "GROUP_NAME": self.op.group_name,
13822 "NEW_ALLOC_POLICY": self.op.alloc_policy,
13825 def BuildHooksNodes(self):
13826 """Build hooks nodes.
13829 mn = self.cfg.GetMasterNode()
13830 return ([mn], [mn])
13832 def Exec(self, feedback_fn):
13833 """Modifies the node group.
13838 if self.op.ndparams:
13839 self.group.ndparams = self.new_ndparams
13840 result.append(("ndparams", str(self.group.ndparams)))
13842 if self.op.diskparams:
13843 self.group.diskparams = self.new_diskparams
13844 result.append(("diskparams", str(self.group.diskparams)))
13846 if self.op.alloc_policy:
13847 self.group.alloc_policy = self.op.alloc_policy
13849 if self.op.hv_state:
13850 self.group.hv_state_static = self.new_hv_state
13852 if self.op.disk_state:
13853 self.group.disk_state_static = self.new_disk_state
13855 if self.op.ipolicy:
13856 self.group.ipolicy = self.new_ipolicy
13858 self.cfg.Update(self.group, feedback_fn)
13862 class LUGroupRemove(LogicalUnit):
13863 HPATH = "group-remove"
13864 HTYPE = constants.HTYPE_GROUP
13867 def ExpandNames(self):
13868 # This will raises errors.OpPrereqError on its own:
13869 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13870 self.needed_locks = {
13871 locking.LEVEL_NODEGROUP: [self.group_uuid],
13874 def CheckPrereq(self):
13875 """Check prerequisites.
13877 This checks that the given group name exists as a node group, that is
13878 empty (i.e., contains no nodes), and that is not the last group of the
13882 # Verify that the group is empty.
13883 group_nodes = [node.name
13884 for node in self.cfg.GetAllNodesInfo().values()
13885 if node.group == self.group_uuid]
13888 raise errors.OpPrereqError("Group '%s' not empty, has the following"
13890 (self.op.group_name,
13891 utils.CommaJoin(utils.NiceSort(group_nodes))),
13892 errors.ECODE_STATE)
13894 # Verify the cluster would not be left group-less.
13895 if len(self.cfg.GetNodeGroupList()) == 1:
13896 raise errors.OpPrereqError("Group '%s' is the only group,"
13897 " cannot be removed" %
13898 self.op.group_name,
13899 errors.ECODE_STATE)
13901 def BuildHooksEnv(self):
13902 """Build hooks env.
13906 "GROUP_NAME": self.op.group_name,
13909 def BuildHooksNodes(self):
13910 """Build hooks nodes.
13913 mn = self.cfg.GetMasterNode()
13914 return ([mn], [mn])
13916 def Exec(self, feedback_fn):
13917 """Remove the node group.
13921 self.cfg.RemoveNodeGroup(self.group_uuid)
13922 except errors.ConfigurationError:
13923 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13924 (self.op.group_name, self.group_uuid))
13926 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13929 class LUGroupRename(LogicalUnit):
13930 HPATH = "group-rename"
13931 HTYPE = constants.HTYPE_GROUP
13934 def ExpandNames(self):
13935 # This raises errors.OpPrereqError on its own:
13936 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13938 self.needed_locks = {
13939 locking.LEVEL_NODEGROUP: [self.group_uuid],
13942 def CheckPrereq(self):
13943 """Check prerequisites.
13945 Ensures requested new name is not yet used.
13949 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13950 except errors.OpPrereqError:
13953 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13954 " node group (UUID: %s)" %
13955 (self.op.new_name, new_name_uuid),
13956 errors.ECODE_EXISTS)
13958 def BuildHooksEnv(self):
13959 """Build hooks env.
13963 "OLD_NAME": self.op.group_name,
13964 "NEW_NAME": self.op.new_name,
13967 def BuildHooksNodes(self):
13968 """Build hooks nodes.
13971 mn = self.cfg.GetMasterNode()
13973 all_nodes = self.cfg.GetAllNodesInfo()
13974 all_nodes.pop(mn, None)
13977 run_nodes.extend(node.name for node in all_nodes.values()
13978 if node.group == self.group_uuid)
13980 return (run_nodes, run_nodes)
13982 def Exec(self, feedback_fn):
13983 """Rename the node group.
13986 group = self.cfg.GetNodeGroup(self.group_uuid)
13989 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13990 (self.op.group_name, self.group_uuid))
13992 group.name = self.op.new_name
13993 self.cfg.Update(group, feedback_fn)
13995 return self.op.new_name
13998 class LUGroupEvacuate(LogicalUnit):
13999 HPATH = "group-evacuate"
14000 HTYPE = constants.HTYPE_GROUP
14003 def ExpandNames(self):
14004 # This raises errors.OpPrereqError on its own:
14005 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14007 if self.op.target_groups:
14008 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14009 self.op.target_groups)
14011 self.req_target_uuids = []
14013 if self.group_uuid in self.req_target_uuids:
14014 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14015 " as a target group (targets are %s)" %
14017 utils.CommaJoin(self.req_target_uuids)),
14018 errors.ECODE_INVAL)
14020 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14022 self.share_locks = _ShareAll()
14023 self.needed_locks = {
14024 locking.LEVEL_INSTANCE: [],
14025 locking.LEVEL_NODEGROUP: [],
14026 locking.LEVEL_NODE: [],
14029 def DeclareLocks(self, level):
14030 if level == locking.LEVEL_INSTANCE:
14031 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14033 # Lock instances optimistically, needs verification once node and group
14034 # locks have been acquired
14035 self.needed_locks[locking.LEVEL_INSTANCE] = \
14036 self.cfg.GetNodeGroupInstances(self.group_uuid)
14038 elif level == locking.LEVEL_NODEGROUP:
14039 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14041 if self.req_target_uuids:
14042 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14044 # Lock all groups used by instances optimistically; this requires going
14045 # via the node before it's locked, requiring verification later on
14046 lock_groups.update(group_uuid
14047 for instance_name in
14048 self.owned_locks(locking.LEVEL_INSTANCE)
14050 self.cfg.GetInstanceNodeGroups(instance_name))
14052 # No target groups, need to lock all of them
14053 lock_groups = locking.ALL_SET
14055 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14057 elif level == locking.LEVEL_NODE:
14058 # This will only lock the nodes in the group to be evacuated which
14059 # contain actual instances
14060 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14061 self._LockInstancesNodes()
14063 # Lock all nodes in group to be evacuated and target groups
14064 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14065 assert self.group_uuid in owned_groups
14066 member_nodes = [node_name
14067 for group in owned_groups
14068 for node_name in self.cfg.GetNodeGroup(group).members]
14069 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14071 def CheckPrereq(self):
14072 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14073 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14074 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14076 assert owned_groups.issuperset(self.req_target_uuids)
14077 assert self.group_uuid in owned_groups
14079 # Check if locked instances are still correct
14080 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14082 # Get instance information
14083 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14085 # Check if node groups for locked instances are still correct
14086 for instance_name in owned_instances:
14087 inst = self.instances[instance_name]
14088 assert owned_nodes.issuperset(inst.all_nodes), \
14089 "Instance %s's nodes changed while we kept the lock" % instance_name
14091 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
14094 assert self.group_uuid in inst_groups, \
14095 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
14097 if self.req_target_uuids:
14098 # User requested specific target groups
14099 self.target_uuids = self.req_target_uuids
14101 # All groups except the one to be evacuated are potential targets
14102 self.target_uuids = [group_uuid for group_uuid in owned_groups
14103 if group_uuid != self.group_uuid]
14105 if not self.target_uuids:
14106 raise errors.OpPrereqError("There are no possible target groups",
14107 errors.ECODE_INVAL)
14109 def BuildHooksEnv(self):
14110 """Build hooks env.
14114 "GROUP_NAME": self.op.group_name,
14115 "TARGET_GROUPS": " ".join(self.target_uuids),
14118 def BuildHooksNodes(self):
14119 """Build hooks nodes.
14122 mn = self.cfg.GetMasterNode()
14124 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14126 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14128 return (run_nodes, run_nodes)
14130 def Exec(self, feedback_fn):
14131 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14133 assert self.group_uuid not in self.target_uuids
14135 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14136 instances=instances, target_groups=self.target_uuids)
14138 ial.Run(self.op.iallocator)
14140 if not ial.success:
14141 raise errors.OpPrereqError("Can't compute group evacuation using"
14142 " iallocator '%s': %s" %
14143 (self.op.iallocator, ial.info),
14144 errors.ECODE_NORES)
14146 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14148 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14149 len(jobs), self.op.group_name)
14151 return ResultWithJobs(jobs)
14154 class TagsLU(NoHooksLU): # pylint: disable=W0223
14155 """Generic tags LU.
14157 This is an abstract class which is the parent of all the other tags LUs.
14160 def ExpandNames(self):
14161 self.group_uuid = None
14162 self.needed_locks = {}
14163 if self.op.kind == constants.TAG_NODE:
14164 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14165 self.needed_locks[locking.LEVEL_NODE] = self.op.name
14166 elif self.op.kind == constants.TAG_INSTANCE:
14167 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14168 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
14169 elif self.op.kind == constants.TAG_NODEGROUP:
14170 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14172 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14173 # not possible to acquire the BGL based on opcode parameters)
14175 def CheckPrereq(self):
14176 """Check prerequisites.
14179 if self.op.kind == constants.TAG_CLUSTER:
14180 self.target = self.cfg.GetClusterInfo()
14181 elif self.op.kind == constants.TAG_NODE:
14182 self.target = self.cfg.GetNodeInfo(self.op.name)
14183 elif self.op.kind == constants.TAG_INSTANCE:
14184 self.target = self.cfg.GetInstanceInfo(self.op.name)
14185 elif self.op.kind == constants.TAG_NODEGROUP:
14186 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14188 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14189 str(self.op.kind), errors.ECODE_INVAL)
14192 class LUTagsGet(TagsLU):
14193 """Returns the tags of a given object.
14198 def ExpandNames(self):
14199 TagsLU.ExpandNames(self)
14201 # Share locks as this is only a read operation
14202 self.share_locks = _ShareAll()
14204 def Exec(self, feedback_fn):
14205 """Returns the tag list.
14208 return list(self.target.GetTags())
14211 class LUTagsSearch(NoHooksLU):
14212 """Searches the tags for a given pattern.
14217 def ExpandNames(self):
14218 self.needed_locks = {}
14220 def CheckPrereq(self):
14221 """Check prerequisites.
14223 This checks the pattern passed for validity by compiling it.
14227 self.re = re.compile(self.op.pattern)
14228 except re.error, err:
14229 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14230 (self.op.pattern, err), errors.ECODE_INVAL)
14232 def Exec(self, feedback_fn):
14233 """Returns the tag list.
14237 tgts = [("/cluster", cfg.GetClusterInfo())]
14238 ilist = cfg.GetAllInstancesInfo().values()
14239 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14240 nlist = cfg.GetAllNodesInfo().values()
14241 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14242 tgts.extend(("/nodegroup/%s" % n.name, n)
14243 for n in cfg.GetAllNodeGroupsInfo().values())
14245 for path, target in tgts:
14246 for tag in target.GetTags():
14247 if self.re.search(tag):
14248 results.append((path, tag))
14252 class LUTagsSet(TagsLU):
14253 """Sets a tag on a given object.
14258 def CheckPrereq(self):
14259 """Check prerequisites.
14261 This checks the type and length of the tag name and value.
14264 TagsLU.CheckPrereq(self)
14265 for tag in self.op.tags:
14266 objects.TaggableObject.ValidateTag(tag)
14268 def Exec(self, feedback_fn):
14273 for tag in self.op.tags:
14274 self.target.AddTag(tag)
14275 except errors.TagError, err:
14276 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14277 self.cfg.Update(self.target, feedback_fn)
14280 class LUTagsDel(TagsLU):
14281 """Delete a list of tags from a given object.
14286 def CheckPrereq(self):
14287 """Check prerequisites.
14289 This checks that we have the given tag.
14292 TagsLU.CheckPrereq(self)
14293 for tag in self.op.tags:
14294 objects.TaggableObject.ValidateTag(tag)
14295 del_tags = frozenset(self.op.tags)
14296 cur_tags = self.target.GetTags()
14298 diff_tags = del_tags - cur_tags
14300 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14301 raise errors.OpPrereqError("Tag(s) %s not found" %
14302 (utils.CommaJoin(diff_names), ),
14303 errors.ECODE_NOENT)
14305 def Exec(self, feedback_fn):
14306 """Remove the tag from the object.
14309 for tag in self.op.tags:
14310 self.target.RemoveTag(tag)
14311 self.cfg.Update(self.target, feedback_fn)
14314 class LUTestDelay(NoHooksLU):
14315 """Sleep for a specified amount of time.
14317 This LU sleeps on the master and/or nodes for a specified amount of
14323 def ExpandNames(self):
14324 """Expand names and set required locks.
14326 This expands the node list, if any.
14329 self.needed_locks = {}
14330 if self.op.on_nodes:
14331 # _GetWantedNodes can be used here, but is not always appropriate to use
14332 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14333 # more information.
14334 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14335 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14337 def _TestDelay(self):
14338 """Do the actual sleep.
14341 if self.op.on_master:
14342 if not utils.TestDelay(self.op.duration):
14343 raise errors.OpExecError("Error during master delay test")
14344 if self.op.on_nodes:
14345 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14346 for node, node_result in result.items():
14347 node_result.Raise("Failure during rpc call to node %s" % node)
14349 def Exec(self, feedback_fn):
14350 """Execute the test delay opcode, with the wanted repetitions.
14353 if self.op.repeat == 0:
14356 top_value = self.op.repeat - 1
14357 for i in range(self.op.repeat):
14358 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14362 class LUTestJqueue(NoHooksLU):
14363 """Utility LU to test some aspects of the job queue.
14368 # Must be lower than default timeout for WaitForJobChange to see whether it
14369 # notices changed jobs
14370 _CLIENT_CONNECT_TIMEOUT = 20.0
14371 _CLIENT_CONFIRM_TIMEOUT = 60.0
14374 def _NotifyUsingSocket(cls, cb, errcls):
14375 """Opens a Unix socket and waits for another program to connect.
14378 @param cb: Callback to send socket name to client
14379 @type errcls: class
14380 @param errcls: Exception class to use for errors
14383 # Using a temporary directory as there's no easy way to create temporary
14384 # sockets without writing a custom loop around tempfile.mktemp and
14386 tmpdir = tempfile.mkdtemp()
14388 tmpsock = utils.PathJoin(tmpdir, "sock")
14390 logging.debug("Creating temporary socket at %s", tmpsock)
14391 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14396 # Send details to client
14399 # Wait for client to connect before continuing
14400 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14402 (conn, _) = sock.accept()
14403 except socket.error, err:
14404 raise errcls("Client didn't connect in time (%s)" % err)
14408 # Remove as soon as client is connected
14409 shutil.rmtree(tmpdir)
14411 # Wait for client to close
14414 # pylint: disable=E1101
14415 # Instance of '_socketobject' has no ... member
14416 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14418 except socket.error, err:
14419 raise errcls("Client failed to confirm notification (%s)" % err)
14423 def _SendNotification(self, test, arg, sockname):
14424 """Sends a notification to the client.
14427 @param test: Test name
14428 @param arg: Test argument (depends on test)
14429 @type sockname: string
14430 @param sockname: Socket path
14433 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14435 def _Notify(self, prereq, test, arg):
14436 """Notifies the client of a test.
14439 @param prereq: Whether this is a prereq-phase test
14441 @param test: Test name
14442 @param arg: Test argument (depends on test)
14446 errcls = errors.OpPrereqError
14448 errcls = errors.OpExecError
14450 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14454 def CheckArguments(self):
14455 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14456 self.expandnames_calls = 0
14458 def ExpandNames(self):
14459 checkargs_calls = getattr(self, "checkargs_calls", 0)
14460 if checkargs_calls < 1:
14461 raise errors.ProgrammerError("CheckArguments was not called")
14463 self.expandnames_calls += 1
14465 if self.op.notify_waitlock:
14466 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14468 self.LogInfo("Expanding names")
14470 # Get lock on master node (just to get a lock, not for a particular reason)
14471 self.needed_locks = {
14472 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14475 def Exec(self, feedback_fn):
14476 if self.expandnames_calls < 1:
14477 raise errors.ProgrammerError("ExpandNames was not called")
14479 if self.op.notify_exec:
14480 self._Notify(False, constants.JQT_EXEC, None)
14482 self.LogInfo("Executing")
14484 if self.op.log_messages:
14485 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14486 for idx, msg in enumerate(self.op.log_messages):
14487 self.LogInfo("Sending log message %s", idx + 1)
14488 feedback_fn(constants.JQT_MSGPREFIX + msg)
14489 # Report how many test messages have been sent
14490 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14493 raise errors.OpExecError("Opcode failure was requested")
14498 class IAllocator(object):
14499 """IAllocator framework.
14501 An IAllocator instance has three sets of attributes:
14502 - cfg that is needed to query the cluster
14503 - input data (all members of the _KEYS class attribute are required)
14504 - four buffer attributes (in|out_data|text), that represent the
14505 input (to the external script) in text and data structure format,
14506 and the output from it, again in two formats
14507 - the result variables from the script (success, info, nodes) for
14511 # pylint: disable=R0902
14512 # lots of instance attributes
14514 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14516 self.rpc = rpc_runner
14517 # init buffer variables
14518 self.in_text = self.out_text = self.in_data = self.out_data = None
14519 # init all input fields so that pylint is happy
14521 self.memory = self.disks = self.disk_template = self.spindle_usage = None
14522 self.os = self.tags = self.nics = self.vcpus = None
14523 self.hypervisor = None
14524 self.relocate_from = None
14526 self.instances = None
14527 self.evac_mode = None
14528 self.target_groups = []
14530 self.required_nodes = None
14531 # init result fields
14532 self.success = self.info = self.result = None
14535 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14537 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14538 " IAllocator" % self.mode)
14540 keyset = [n for (n, _) in keydata]
14543 if key not in keyset:
14544 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14545 " IAllocator" % key)
14546 setattr(self, key, kwargs[key])
14549 if key not in kwargs:
14550 raise errors.ProgrammerError("Missing input parameter '%s' to"
14551 " IAllocator" % key)
14552 self._BuildInputData(compat.partial(fn, self), keydata)
14554 def _ComputeClusterData(self):
14555 """Compute the generic allocator input data.
14557 This is the data that is independent of the actual operation.
14561 cluster_info = cfg.GetClusterInfo()
14564 "version": constants.IALLOCATOR_VERSION,
14565 "cluster_name": cfg.GetClusterName(),
14566 "cluster_tags": list(cluster_info.GetTags()),
14567 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14568 "ipolicy": cluster_info.ipolicy,
14570 ninfo = cfg.GetAllNodesInfo()
14571 iinfo = cfg.GetAllInstancesInfo().values()
14572 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14575 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14577 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14578 hypervisor_name = self.hypervisor
14579 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14580 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14582 hypervisor_name = cluster_info.primary_hypervisor
14584 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14587 self.rpc.call_all_instances_info(node_list,
14588 cluster_info.enabled_hypervisors)
14590 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14592 config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14593 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14594 i_list, config_ndata)
14595 assert len(data["nodes"]) == len(ninfo), \
14596 "Incomplete node data computed"
14598 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14600 self.in_data = data
14603 def _ComputeNodeGroupData(cfg):
14604 """Compute node groups data.
14607 cluster = cfg.GetClusterInfo()
14608 ng = dict((guuid, {
14609 "name": gdata.name,
14610 "alloc_policy": gdata.alloc_policy,
14611 "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14613 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14618 def _ComputeBasicNodeData(cfg, node_cfg):
14619 """Compute global node data.
14622 @returns: a dict of name: (node dict, node config)
14625 # fill in static (config-based) values
14626 node_results = dict((ninfo.name, {
14627 "tags": list(ninfo.GetTags()),
14628 "primary_ip": ninfo.primary_ip,
14629 "secondary_ip": ninfo.secondary_ip,
14630 "offline": ninfo.offline,
14631 "drained": ninfo.drained,
14632 "master_candidate": ninfo.master_candidate,
14633 "group": ninfo.group,
14634 "master_capable": ninfo.master_capable,
14635 "vm_capable": ninfo.vm_capable,
14636 "ndparams": cfg.GetNdParams(ninfo),
14638 for ninfo in node_cfg.values())
14640 return node_results
14643 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14645 """Compute global node data.
14647 @param node_results: the basic node structures as filled from the config
14650 #TODO(dynmem): compute the right data on MAX and MIN memory
14651 # make a copy of the current dict
14652 node_results = dict(node_results)
14653 for nname, nresult in node_data.items():
14654 assert nname in node_results, "Missing basic data for node %s" % nname
14655 ninfo = node_cfg[nname]
14657 if not (ninfo.offline or ninfo.drained):
14658 nresult.Raise("Can't get data for node %s" % nname)
14659 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14661 remote_info = _MakeLegacyNodeInfo(nresult.payload)
14663 for attr in ["memory_total", "memory_free", "memory_dom0",
14664 "vg_size", "vg_free", "cpu_total"]:
14665 if attr not in remote_info:
14666 raise errors.OpExecError("Node '%s' didn't return attribute"
14667 " '%s'" % (nname, attr))
14668 if not isinstance(remote_info[attr], int):
14669 raise errors.OpExecError("Node '%s' returned invalid value"
14671 (nname, attr, remote_info[attr]))
14672 # compute memory used by primary instances
14673 i_p_mem = i_p_up_mem = 0
14674 for iinfo, beinfo in i_list:
14675 if iinfo.primary_node == nname:
14676 i_p_mem += beinfo[constants.BE_MAXMEM]
14677 if iinfo.name not in node_iinfo[nname].payload:
14680 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14681 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14682 remote_info["memory_free"] -= max(0, i_mem_diff)
14684 if iinfo.admin_state == constants.ADMINST_UP:
14685 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14687 # compute memory used by instances
14689 "total_memory": remote_info["memory_total"],
14690 "reserved_memory": remote_info["memory_dom0"],
14691 "free_memory": remote_info["memory_free"],
14692 "total_disk": remote_info["vg_size"],
14693 "free_disk": remote_info["vg_free"],
14694 "total_cpus": remote_info["cpu_total"],
14695 "i_pri_memory": i_p_mem,
14696 "i_pri_up_memory": i_p_up_mem,
14698 pnr_dyn.update(node_results[nname])
14699 node_results[nname] = pnr_dyn
14701 return node_results
14704 def _ComputeInstanceData(cluster_info, i_list):
14705 """Compute global instance data.
14709 for iinfo, beinfo in i_list:
14711 for nic in iinfo.nics:
14712 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14716 "mode": filled_params[constants.NIC_MODE],
14717 "link": filled_params[constants.NIC_LINK],
14719 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14720 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14721 nic_data.append(nic_dict)
14723 "tags": list(iinfo.GetTags()),
14724 "admin_state": iinfo.admin_state,
14725 "vcpus": beinfo[constants.BE_VCPUS],
14726 "memory": beinfo[constants.BE_MAXMEM],
14727 "spindle_usage": beinfo[constants.BE_SPINDLE_USAGE],
14729 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14731 "disks": [{constants.IDISK_SIZE: dsk.size,
14732 constants.IDISK_MODE: dsk.mode}
14733 for dsk in iinfo.disks],
14734 "disk_template": iinfo.disk_template,
14735 "hypervisor": iinfo.hypervisor,
14737 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14739 instance_data[iinfo.name] = pir
14741 return instance_data
14743 def _AddNewInstance(self):
14744 """Add new instance data to allocator structure.
14746 This in combination with _AllocatorGetClusterData will create the
14747 correct structure needed as input for the allocator.
14749 The checks for the completeness of the opcode must have already been
14753 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14755 if self.disk_template in constants.DTS_INT_MIRROR:
14756 self.required_nodes = 2
14758 self.required_nodes = 1
14762 "disk_template": self.disk_template,
14765 "vcpus": self.vcpus,
14766 "memory": self.memory,
14767 "spindle_usage": self.spindle_usage,
14768 "disks": self.disks,
14769 "disk_space_total": disk_space,
14771 "required_nodes": self.required_nodes,
14772 "hypervisor": self.hypervisor,
14777 def _AddRelocateInstance(self):
14778 """Add relocate instance data to allocator structure.
14780 This in combination with _IAllocatorGetClusterData will create the
14781 correct structure needed as input for the allocator.
14783 The checks for the completeness of the opcode must have already been
14787 instance = self.cfg.GetInstanceInfo(self.name)
14788 if instance is None:
14789 raise errors.ProgrammerError("Unknown instance '%s' passed to"
14790 " IAllocator" % self.name)
14792 if instance.disk_template not in constants.DTS_MIRRORED:
14793 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14794 errors.ECODE_INVAL)
14796 if instance.disk_template in constants.DTS_INT_MIRROR and \
14797 len(instance.secondary_nodes) != 1:
14798 raise errors.OpPrereqError("Instance has not exactly one secondary node",
14799 errors.ECODE_STATE)
14801 self.required_nodes = 1
14802 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14803 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14807 "disk_space_total": disk_space,
14808 "required_nodes": self.required_nodes,
14809 "relocate_from": self.relocate_from,
14813 def _AddNodeEvacuate(self):
14814 """Get data for node-evacuate requests.
14818 "instances": self.instances,
14819 "evac_mode": self.evac_mode,
14822 def _AddChangeGroup(self):
14823 """Get data for node-evacuate requests.
14827 "instances": self.instances,
14828 "target_groups": self.target_groups,
14831 def _BuildInputData(self, fn, keydata):
14832 """Build input data structures.
14835 self._ComputeClusterData()
14838 request["type"] = self.mode
14839 for keyname, keytype in keydata:
14840 if keyname not in request:
14841 raise errors.ProgrammerError("Request parameter %s is missing" %
14843 val = request[keyname]
14844 if not keytype(val):
14845 raise errors.ProgrammerError("Request parameter %s doesn't pass"
14846 " validation, value %s, expected"
14847 " type %s" % (keyname, val, keytype))
14848 self.in_data["request"] = request
14850 self.in_text = serializer.Dump(self.in_data)
14852 _STRING_LIST = ht.TListOf(ht.TString)
14853 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14854 # pylint: disable=E1101
14855 # Class '...' has no 'OP_ID' member
14856 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14857 opcodes.OpInstanceMigrate.OP_ID,
14858 opcodes.OpInstanceReplaceDisks.OP_ID])
14862 ht.TListOf(ht.TAnd(ht.TIsLength(3),
14863 ht.TItems([ht.TNonEmptyString,
14864 ht.TNonEmptyString,
14865 ht.TListOf(ht.TNonEmptyString),
14868 ht.TListOf(ht.TAnd(ht.TIsLength(2),
14869 ht.TItems([ht.TNonEmptyString,
14872 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14873 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14876 constants.IALLOCATOR_MODE_ALLOC:
14879 ("name", ht.TString),
14880 ("memory", ht.TInt),
14881 ("spindle_usage", ht.TInt),
14882 ("disks", ht.TListOf(ht.TDict)),
14883 ("disk_template", ht.TString),
14884 ("os", ht.TString),
14885 ("tags", _STRING_LIST),
14886 ("nics", ht.TListOf(ht.TDict)),
14887 ("vcpus", ht.TInt),
14888 ("hypervisor", ht.TString),
14890 constants.IALLOCATOR_MODE_RELOC:
14891 (_AddRelocateInstance,
14892 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14894 constants.IALLOCATOR_MODE_NODE_EVAC:
14895 (_AddNodeEvacuate, [
14896 ("instances", _STRING_LIST),
14897 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14899 constants.IALLOCATOR_MODE_CHG_GROUP:
14900 (_AddChangeGroup, [
14901 ("instances", _STRING_LIST),
14902 ("target_groups", _STRING_LIST),
14906 def Run(self, name, validate=True, call_fn=None):
14907 """Run an instance allocator and return the results.
14910 if call_fn is None:
14911 call_fn = self.rpc.call_iallocator_runner
14913 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14914 result.Raise("Failure while running the iallocator script")
14916 self.out_text = result.payload
14918 self._ValidateResult()
14920 def _ValidateResult(self):
14921 """Process the allocator results.
14923 This will process and if successful save the result in
14924 self.out_data and the other parameters.
14928 rdict = serializer.Load(self.out_text)
14929 except Exception, err:
14930 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14932 if not isinstance(rdict, dict):
14933 raise errors.OpExecError("Can't parse iallocator results: not a dict")
14935 # TODO: remove backwards compatiblity in later versions
14936 if "nodes" in rdict and "result" not in rdict:
14937 rdict["result"] = rdict["nodes"]
14940 for key in "success", "info", "result":
14941 if key not in rdict:
14942 raise errors.OpExecError("Can't parse iallocator results:"
14943 " missing key '%s'" % key)
14944 setattr(self, key, rdict[key])
14946 if not self._result_check(self.result):
14947 raise errors.OpExecError("Iallocator returned invalid result,"
14948 " expected %s, got %s" %
14949 (self._result_check, self.result),
14950 errors.ECODE_INVAL)
14952 if self.mode == constants.IALLOCATOR_MODE_RELOC:
14953 assert self.relocate_from is not None
14954 assert self.required_nodes == 1
14956 node2group = dict((name, ndata["group"])
14957 for (name, ndata) in self.in_data["nodes"].items())
14959 fn = compat.partial(self._NodesToGroups, node2group,
14960 self.in_data["nodegroups"])
14962 instance = self.cfg.GetInstanceInfo(self.name)
14963 request_groups = fn(self.relocate_from + [instance.primary_node])
14964 result_groups = fn(rdict["result"] + [instance.primary_node])
14966 if self.success and not set(result_groups).issubset(request_groups):
14967 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14968 " differ from original groups (%s)" %
14969 (utils.CommaJoin(result_groups),
14970 utils.CommaJoin(request_groups)))
14972 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14973 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14975 self.out_data = rdict
14978 def _NodesToGroups(node2group, groups, nodes):
14979 """Returns a list of unique group names for a list of nodes.
14981 @type node2group: dict
14982 @param node2group: Map from node name to group UUID
14984 @param groups: Group information
14986 @param nodes: Node names
14993 group_uuid = node2group[node]
14995 # Ignore unknown node
14999 group = groups[group_uuid]
15001 # Can't find group, let's use UUID
15002 group_name = group_uuid
15004 group_name = group["name"]
15006 result.add(group_name)
15008 return sorted(result)
15011 class LUTestAllocator(NoHooksLU):
15012 """Run allocator tests.
15014 This LU runs the allocator tests
15017 def CheckPrereq(self):
15018 """Check prerequisites.
15020 This checks the opcode parameters depending on the director and mode test.
15023 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15024 for attr in ["memory", "disks", "disk_template",
15025 "os", "tags", "nics", "vcpus"]:
15026 if not hasattr(self.op, attr):
15027 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15028 attr, errors.ECODE_INVAL)
15029 iname = self.cfg.ExpandInstanceName(self.op.name)
15030 if iname is not None:
15031 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15032 iname, errors.ECODE_EXISTS)
15033 if not isinstance(self.op.nics, list):
15034 raise errors.OpPrereqError("Invalid parameter 'nics'",
15035 errors.ECODE_INVAL)
15036 if not isinstance(self.op.disks, list):
15037 raise errors.OpPrereqError("Invalid parameter 'disks'",
15038 errors.ECODE_INVAL)
15039 for row in self.op.disks:
15040 if (not isinstance(row, dict) or
15041 constants.IDISK_SIZE not in row or
15042 not isinstance(row[constants.IDISK_SIZE], int) or
15043 constants.IDISK_MODE not in row or
15044 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15045 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15046 " parameter", errors.ECODE_INVAL)
15047 if self.op.hypervisor is None:
15048 self.op.hypervisor = self.cfg.GetHypervisorType()
15049 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15050 fname = _ExpandInstanceName(self.cfg, self.op.name)
15051 self.op.name = fname
15052 self.relocate_from = \
15053 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15054 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15055 constants.IALLOCATOR_MODE_NODE_EVAC):
15056 if not self.op.instances:
15057 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15058 self.op.instances = _GetWantedInstances(self, self.op.instances)
15060 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15061 self.op.mode, errors.ECODE_INVAL)
15063 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15064 if self.op.allocator is None:
15065 raise errors.OpPrereqError("Missing allocator name",
15066 errors.ECODE_INVAL)
15067 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15068 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15069 self.op.direction, errors.ECODE_INVAL)
15071 def Exec(self, feedback_fn):
15072 """Run the allocator test.
15075 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15076 ial = IAllocator(self.cfg, self.rpc,
15079 memory=self.op.memory,
15080 disks=self.op.disks,
15081 disk_template=self.op.disk_template,
15085 vcpus=self.op.vcpus,
15086 hypervisor=self.op.hypervisor,
15088 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15089 ial = IAllocator(self.cfg, self.rpc,
15092 relocate_from=list(self.relocate_from),
15094 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15095 ial = IAllocator(self.cfg, self.rpc,
15097 instances=self.op.instances,
15098 target_groups=self.op.target_groups)
15099 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15100 ial = IAllocator(self.cfg, self.rpc,
15102 instances=self.op.instances,
15103 evac_mode=self.op.evac_mode)
15105 raise errors.ProgrammerError("Uncatched mode %s in"
15106 " LUTestAllocator.Exec", self.op.mode)
15108 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15109 result = ial.in_text
15111 ial.Run(self.op.allocator, validate=False)
15112 result = ial.out_text
15116 #: Query type implementations
15118 constants.QR_INSTANCE: _InstanceQuery,
15119 constants.QR_NODE: _NodeQuery,
15120 constants.QR_GROUP: _GroupQuery,
15121 constants.QR_OS: _OsQuery,
15124 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15127 def _GetQueryImplementation(name):
15128 """Returns the implemtnation for a query type.
15130 @param name: Query type, must be one of L{constants.QR_VIA_OP}
15134 return _QUERY_IMPL[name]
15136 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15137 errors.ECODE_INVAL)