4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
62 from ganeti import rpc
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_UP = [constants.ADMINST_UP]
72 INSTANCE_DOWN = [constants.ADMINST_DOWN]
73 INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
74 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
75 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
79 """Data container for LU results with jobs.
81 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
83 contained in the C{jobs} attribute and include the job IDs in the opcode
87 def __init__(self, jobs, **kwargs):
88 """Initializes this class.
90 Additional return values can be specified as keyword arguments.
92 @type jobs: list of lists of L{opcode.OpCode}
93 @param jobs: A list of lists of opcode objects
100 class LogicalUnit(object):
101 """Logical Unit base class.
103 Subclasses must follow these rules:
104 - implement ExpandNames
105 - implement CheckPrereq (except when tasklets are used)
106 - implement Exec (except when tasklets are used)
107 - implement BuildHooksEnv
108 - implement BuildHooksNodes
109 - redefine HPATH and HTYPE
110 - optionally redefine their run requirements:
111 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
113 Note that all commands require root permissions.
115 @ivar dry_run_result: the value (if any) that will be returned to the caller
116 in dry-run mode (signalled by opcode dry_run parameter)
123 def __init__(self, processor, op, context, rpc_runner):
124 """Constructor for LogicalUnit.
126 This needs to be overridden in derived classes in order to check op
130 self.proc = processor
132 self.cfg = context.cfg
133 self.glm = context.glm
135 self.owned_locks = context.glm.list_owned
136 self.context = context
137 self.rpc = rpc_runner
138 # Dicts used to declare locking needs to mcpu
139 self.needed_locks = None
140 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
142 self.remove_locks = {}
143 # Used to force good behavior when calling helper functions
144 self.recalculate_locks = {}
146 self.Log = processor.Log # pylint: disable=C0103
147 self.LogWarning = processor.LogWarning # pylint: disable=C0103
148 self.LogInfo = processor.LogInfo # pylint: disable=C0103
149 self.LogStep = processor.LogStep # pylint: disable=C0103
150 # support for dry-run
151 self.dry_run_result = None
152 # support for generic debug attribute
153 if (not hasattr(self.op, "debug_level") or
154 not isinstance(self.op.debug_level, int)):
155 self.op.debug_level = 0
160 # Validate opcode parameters and set defaults
161 self.op.Validate(True)
163 self.CheckArguments()
165 def CheckArguments(self):
166 """Check syntactic validity for the opcode arguments.
168 This method is for doing a simple syntactic check and ensure
169 validity of opcode parameters, without any cluster-related
170 checks. While the same can be accomplished in ExpandNames and/or
171 CheckPrereq, doing these separate is better because:
173 - ExpandNames is left as as purely a lock-related function
174 - CheckPrereq is run after we have acquired locks (and possible
177 The function is allowed to change the self.op attribute so that
178 later methods can no longer worry about missing parameters.
183 def ExpandNames(self):
184 """Expand names for this LU.
186 This method is called before starting to execute the opcode, and it should
187 update all the parameters of the opcode to their canonical form (e.g. a
188 short node name must be fully expanded after this method has successfully
189 completed). This way locking, hooks, logging, etc. can work correctly.
191 LUs which implement this method must also populate the self.needed_locks
192 member, as a dict with lock levels as keys, and a list of needed lock names
195 - use an empty dict if you don't need any lock
196 - if you don't need any lock at a particular level omit that level
197 - don't put anything for the BGL level
198 - if you want all locks at a level use locking.ALL_SET as a value
200 If you need to share locks (rather than acquire them exclusively) at one
201 level you can modify self.share_locks, setting a true value (usually 1) for
202 that level. By default locks are not shared.
204 This function can also define a list of tasklets, which then will be
205 executed in order instead of the usual LU-level CheckPrereq and Exec
206 functions, if those are not defined by the LU.
210 # Acquire all nodes and one instance
211 self.needed_locks = {
212 locking.LEVEL_NODE: locking.ALL_SET,
213 locking.LEVEL_INSTANCE: ['instance1.example.com'],
215 # Acquire just two nodes
216 self.needed_locks = {
217 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
220 self.needed_locks = {} # No, you can't leave it to the default value None
223 # The implementation of this method is mandatory only if the new LU is
224 # concurrent, so that old LUs don't need to be changed all at the same
227 self.needed_locks = {} # Exclusive LUs don't need locks.
229 raise NotImplementedError
231 def DeclareLocks(self, level):
232 """Declare LU locking needs for a level
234 While most LUs can just declare their locking needs at ExpandNames time,
235 sometimes there's the need to calculate some locks after having acquired
236 the ones before. This function is called just before acquiring locks at a
237 particular level, but after acquiring the ones at lower levels, and permits
238 such calculations. It can be used to modify self.needed_locks, and by
239 default it does nothing.
241 This function is only called if you have something already set in
242 self.needed_locks for the level.
244 @param level: Locking level which is going to be locked
245 @type level: member of ganeti.locking.LEVELS
249 def CheckPrereq(self):
250 """Check prerequisites for this LU.
252 This method should check that the prerequisites for the execution
253 of this LU are fulfilled. It can do internode communication, but
254 it should be idempotent - no cluster or system changes are
257 The method should raise errors.OpPrereqError in case something is
258 not fulfilled. Its return value is ignored.
260 This method should also update all the parameters of the opcode to
261 their canonical form if it hasn't been done by ExpandNames before.
264 if self.tasklets is not None:
265 for (idx, tl) in enumerate(self.tasklets):
266 logging.debug("Checking prerequisites for tasklet %s/%s",
267 idx + 1, len(self.tasklets))
272 def Exec(self, feedback_fn):
275 This method should implement the actual work. It should raise
276 errors.OpExecError for failures that are somewhat dealt with in
280 if self.tasklets is not None:
281 for (idx, tl) in enumerate(self.tasklets):
282 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
285 raise NotImplementedError
287 def BuildHooksEnv(self):
288 """Build hooks environment for this LU.
291 @return: Dictionary containing the environment that will be used for
292 running the hooks for this LU. The keys of the dict must not be prefixed
293 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
294 will extend the environment with additional variables. If no environment
295 should be defined, an empty dictionary should be returned (not C{None}).
296 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
300 raise NotImplementedError
302 def BuildHooksNodes(self):
303 """Build list of nodes to run LU's hooks.
305 @rtype: tuple; (list, list)
306 @return: Tuple containing a list of node names on which the hook
307 should run before the execution and a list of node names on which the
308 hook should run after the execution. No nodes should be returned as an
309 empty list (and not None).
310 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
314 raise NotImplementedError
316 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
317 """Notify the LU about the results of its hooks.
319 This method is called every time a hooks phase is executed, and notifies
320 the Logical Unit about the hooks' result. The LU can then use it to alter
321 its result based on the hooks. By default the method does nothing and the
322 previous result is passed back unchanged but any LU can define it if it
323 wants to use the local cluster hook-scripts somehow.
325 @param phase: one of L{constants.HOOKS_PHASE_POST} or
326 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
327 @param hook_results: the results of the multi-node hooks rpc call
328 @param feedback_fn: function used send feedback back to the caller
329 @param lu_result: the previous Exec result this LU had, or None
331 @return: the new Exec result, based on the previous result
335 # API must be kept, thus we ignore the unused argument and could
336 # be a function warnings
337 # pylint: disable=W0613,R0201
340 def _ExpandAndLockInstance(self):
341 """Helper function to expand and lock an instance.
343 Many LUs that work on an instance take its name in self.op.instance_name
344 and need to expand it and then declare the expanded name for locking. This
345 function does it, and then updates self.op.instance_name to the expanded
346 name. It also initializes needed_locks as a dict, if this hasn't been done
350 if self.needed_locks is None:
351 self.needed_locks = {}
353 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
354 "_ExpandAndLockInstance called with instance-level locks set"
355 self.op.instance_name = _ExpandInstanceName(self.cfg,
356 self.op.instance_name)
357 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
359 def _LockInstancesNodes(self, primary_only=False,
360 level=locking.LEVEL_NODE):
361 """Helper function to declare instances' nodes for locking.
363 This function should be called after locking one or more instances to lock
364 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
365 with all primary or secondary nodes for instances already locked and
366 present in self.needed_locks[locking.LEVEL_INSTANCE].
368 It should be called from DeclareLocks, and for safety only works if
369 self.recalculate_locks[locking.LEVEL_NODE] is set.
371 In the future it may grow parameters to just lock some instance's nodes, or
372 to just lock primaries or secondary nodes, if needed.
374 If should be called in DeclareLocks in a way similar to::
376 if level == locking.LEVEL_NODE:
377 self._LockInstancesNodes()
379 @type primary_only: boolean
380 @param primary_only: only lock primary nodes of locked instances
381 @param level: Which lock level to use for locking nodes
384 assert level in self.recalculate_locks, \
385 "_LockInstancesNodes helper function called with no nodes to recalculate"
387 # TODO: check if we're really been called with the instance locks held
389 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
390 # future we might want to have different behaviors depending on the value
391 # of self.recalculate_locks[locking.LEVEL_NODE]
393 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
394 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
395 wanted_nodes.append(instance.primary_node)
397 wanted_nodes.extend(instance.secondary_nodes)
399 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
400 self.needed_locks[level] = wanted_nodes
401 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
402 self.needed_locks[level].extend(wanted_nodes)
404 raise errors.ProgrammerError("Unknown recalculation mode")
406 del self.recalculate_locks[level]
409 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
410 """Simple LU which runs no hooks.
412 This LU is intended as a parent for other LogicalUnits which will
413 run no hooks, in order to reduce duplicate code.
419 def BuildHooksEnv(self):
420 """Empty BuildHooksEnv for NoHooksLu.
422 This just raises an error.
425 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
427 def BuildHooksNodes(self):
428 """Empty BuildHooksNodes for NoHooksLU.
431 raise AssertionError("BuildHooksNodes called for NoHooksLU")
435 """Tasklet base class.
437 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
438 they can mix legacy code with tasklets. Locking needs to be done in the LU,
439 tasklets know nothing about locks.
441 Subclasses must follow these rules:
442 - Implement CheckPrereq
446 def __init__(self, lu):
453 def CheckPrereq(self):
454 """Check prerequisites for this tasklets.
456 This method should check whether the prerequisites for the execution of
457 this tasklet are fulfilled. It can do internode communication, but it
458 should be idempotent - no cluster or system changes are allowed.
460 The method should raise errors.OpPrereqError in case something is not
461 fulfilled. Its return value is ignored.
463 This method should also update all parameters to their canonical form if it
464 hasn't been done before.
469 def Exec(self, feedback_fn):
470 """Execute the tasklet.
472 This method should implement the actual work. It should raise
473 errors.OpExecError for failures that are somewhat dealt with in code, or
477 raise NotImplementedError
481 """Base for query utility classes.
484 #: Attribute holding field definitions
487 def __init__(self, qfilter, fields, use_locking):
488 """Initializes this class.
491 self.use_locking = use_locking
493 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
495 self.requested_data = self.query.RequestedData()
496 self.names = self.query.RequestedNames()
498 # Sort only if no names were requested
499 self.sort_by_name = not self.names
501 self.do_locking = None
504 def _GetNames(self, lu, all_names, lock_level):
505 """Helper function to determine names asked for in the query.
509 names = lu.owned_locks(lock_level)
513 if self.wanted == locking.ALL_SET:
514 assert not self.names
515 # caller didn't specify names, so ordering is not important
516 return utils.NiceSort(names)
518 # caller specified names and we must keep the same order
520 assert not self.do_locking or lu.glm.is_owned(lock_level)
522 missing = set(self.wanted).difference(names)
524 raise errors.OpExecError("Some items were removed before retrieving"
525 " their data: %s" % missing)
527 # Return expanded names
530 def ExpandNames(self, lu):
531 """Expand names for this query.
533 See L{LogicalUnit.ExpandNames}.
536 raise NotImplementedError()
538 def DeclareLocks(self, lu, level):
539 """Declare locks for this query.
541 See L{LogicalUnit.DeclareLocks}.
544 raise NotImplementedError()
546 def _GetQueryData(self, lu):
547 """Collects all data for this query.
549 @return: Query data object
552 raise NotImplementedError()
554 def NewStyleQuery(self, lu):
555 """Collect data and execute query.
558 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
559 sort_by_name=self.sort_by_name)
561 def OldStyleQuery(self, lu):
562 """Collect data and execute query.
565 return self.query.OldStyleQuery(self._GetQueryData(lu),
566 sort_by_name=self.sort_by_name)
570 """Returns a dict declaring all lock levels shared.
573 return dict.fromkeys(locking.LEVELS, 1)
576 def _MakeLegacyNodeInfo(data):
577 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
579 Converts the data into a single dictionary. This is fine for most use cases,
580 but some require information from more than one volume group or hypervisor.
583 (bootid, (vg_info, ), (hv_info, )) = data
585 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
590 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
591 """Checks if the owned node groups are still correct for an instance.
593 @type cfg: L{config.ConfigWriter}
594 @param cfg: The cluster configuration
595 @type instance_name: string
596 @param instance_name: Instance name
597 @type owned_groups: set or frozenset
598 @param owned_groups: List of currently owned node groups
601 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
603 if not owned_groups.issuperset(inst_groups):
604 raise errors.OpPrereqError("Instance %s's node groups changed since"
605 " locks were acquired, current groups are"
606 " are '%s', owning groups '%s'; retry the"
609 utils.CommaJoin(inst_groups),
610 utils.CommaJoin(owned_groups)),
616 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
617 """Checks if the instances in a node group are still correct.
619 @type cfg: L{config.ConfigWriter}
620 @param cfg: The cluster configuration
621 @type group_uuid: string
622 @param group_uuid: Node group UUID
623 @type owned_instances: set or frozenset
624 @param owned_instances: List of currently owned instances
627 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
628 if owned_instances != wanted_instances:
629 raise errors.OpPrereqError("Instances in node group '%s' changed since"
630 " locks were acquired, wanted '%s', have '%s';"
631 " retry the operation" %
633 utils.CommaJoin(wanted_instances),
634 utils.CommaJoin(owned_instances)),
637 return wanted_instances
640 def _SupportsOob(cfg, node):
641 """Tells if node supports OOB.
643 @type cfg: L{config.ConfigWriter}
644 @param cfg: The cluster configuration
645 @type node: L{objects.Node}
646 @param node: The node
647 @return: The OOB script if supported or an empty string otherwise
650 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
653 def _GetWantedNodes(lu, nodes):
654 """Returns list of checked and expanded node names.
656 @type lu: L{LogicalUnit}
657 @param lu: the logical unit on whose behalf we execute
659 @param nodes: list of node names or None for all nodes
661 @return: the list of nodes, sorted
662 @raise errors.ProgrammerError: if the nodes parameter is wrong type
666 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
668 return utils.NiceSort(lu.cfg.GetNodeList())
671 def _GetWantedInstances(lu, instances):
672 """Returns list of checked and expanded instance names.
674 @type lu: L{LogicalUnit}
675 @param lu: the logical unit on whose behalf we execute
676 @type instances: list
677 @param instances: list of instance names or None for all instances
679 @return: the list of instances, sorted
680 @raise errors.OpPrereqError: if the instances parameter is wrong type
681 @raise errors.OpPrereqError: if any of the passed instances is not found
685 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
687 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
691 def _GetUpdatedParams(old_params, update_dict,
692 use_default=True, use_none=False):
693 """Return the new version of a parameter dictionary.
695 @type old_params: dict
696 @param old_params: old parameters
697 @type update_dict: dict
698 @param update_dict: dict containing new parameter values, or
699 constants.VALUE_DEFAULT to reset the parameter to its default
701 @param use_default: boolean
702 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
703 values as 'to be deleted' values
704 @param use_none: boolean
705 @type use_none: whether to recognise C{None} values as 'to be
708 @return: the new parameter dictionary
711 params_copy = copy.deepcopy(old_params)
712 for key, val in update_dict.iteritems():
713 if ((use_default and val == constants.VALUE_DEFAULT) or
714 (use_none and val is None)):
720 params_copy[key] = val
724 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
725 """Return the new version of a instance policy.
727 @param group_policy: whether this policy applies to a group and thus
728 we should support removal of policy entries
731 use_none = use_default = group_policy
732 ipolicy = copy.deepcopy(old_ipolicy)
733 for key, value in new_ipolicy.items():
734 if key not in constants.IPOLICY_ALL_KEYS:
735 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
737 if key in constants.IPOLICY_ISPECS:
738 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
739 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
741 use_default=use_default)
743 if not value or value == [constants.VALUE_DEFAULT]:
747 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
748 " on the cluster'" % key,
751 if key in constants.IPOLICY_PARAMETERS:
752 # FIXME: we assume all such values are float
754 ipolicy[key] = float(value)
755 except (TypeError, ValueError), err:
756 raise errors.OpPrereqError("Invalid value for attribute"
757 " '%s': '%s', error: %s" %
758 (key, value, err), errors.ECODE_INVAL)
760 # FIXME: we assume all others are lists; this should be redone
762 ipolicy[key] = list(value)
764 objects.InstancePolicy.CheckParameterSyntax(ipolicy)
765 except errors.ConfigurationError, err:
766 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
771 def _UpdateAndVerifySubDict(base, updates, type_check):
772 """Updates and verifies a dict with sub dicts of the same type.
774 @param base: The dict with the old data
775 @param updates: The dict with the new data
776 @param type_check: Dict suitable to ForceDictType to verify correct types
777 @returns: A new dict with updated and verified values
781 new = _GetUpdatedParams(old, value)
782 utils.ForceDictType(new, type_check)
785 ret = copy.deepcopy(base)
786 ret.update(dict((key, fn(base.get(key, {}), value))
787 for key, value in updates.items()))
791 def _MergeAndVerifyHvState(op_input, obj_input):
792 """Combines the hv state from an opcode with the one of the object
794 @param op_input: The input dict from the opcode
795 @param obj_input: The input dict from the objects
796 @return: The verified and updated dict
800 invalid_hvs = set(op_input) - constants.HYPER_TYPES
802 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
803 " %s" % utils.CommaJoin(invalid_hvs),
805 if obj_input is None:
807 type_check = constants.HVSTS_PARAMETER_TYPES
808 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
813 def _MergeAndVerifyDiskState(op_input, obj_input):
814 """Combines the disk state from an opcode with the one of the object
816 @param op_input: The input dict from the opcode
817 @param obj_input: The input dict from the objects
818 @return: The verified and updated dict
821 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
823 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
824 utils.CommaJoin(invalid_dst),
826 type_check = constants.DSS_PARAMETER_TYPES
827 if obj_input is None:
829 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
831 for key, value in op_input.items())
836 def _ReleaseLocks(lu, level, names=None, keep=None):
837 """Releases locks owned by an LU.
839 @type lu: L{LogicalUnit}
840 @param level: Lock level
841 @type names: list or None
842 @param names: Names of locks to release
843 @type keep: list or None
844 @param keep: Names of locks to retain
847 assert not (keep is not None and names is not None), \
848 "Only one of the 'names' and the 'keep' parameters can be given"
850 if names is not None:
851 should_release = names.__contains__
853 should_release = lambda name: name not in keep
855 should_release = None
857 owned = lu.owned_locks(level)
859 # Not owning any lock at this level, do nothing
866 # Determine which locks to release
868 if should_release(name):
873 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
875 # Release just some locks
876 lu.glm.release(level, names=release)
878 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
881 lu.glm.release(level)
883 assert not lu.glm.is_owned(level), "No locks should be owned"
886 def _MapInstanceDisksToNodes(instances):
887 """Creates a map from (node, volume) to instance name.
889 @type instances: list of L{objects.Instance}
890 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
893 return dict(((node, vol), inst.name)
894 for inst in instances
895 for (node, vols) in inst.MapLVsByNode().items()
899 def _RunPostHook(lu, node_name):
900 """Runs the post-hook for an opcode on a single node.
903 hm = lu.proc.BuildHooksManager(lu)
905 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
907 # pylint: disable=W0702
908 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
911 def _CheckOutputFields(static, dynamic, selected):
912 """Checks whether all selected fields are valid.
914 @type static: L{utils.FieldSet}
915 @param static: static fields set
916 @type dynamic: L{utils.FieldSet}
917 @param dynamic: dynamic fields set
924 delta = f.NonMatching(selected)
926 raise errors.OpPrereqError("Unknown output fields selected: %s"
927 % ",".join(delta), errors.ECODE_INVAL)
930 def _CheckGlobalHvParams(params):
931 """Validates that given hypervisor params are not global ones.
933 This will ensure that instances don't get customised versions of
937 used_globals = constants.HVC_GLOBALS.intersection(params)
939 msg = ("The following hypervisor parameters are global and cannot"
940 " be customized at instance level, please modify them at"
941 " cluster level: %s" % utils.CommaJoin(used_globals))
942 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
945 def _CheckNodeOnline(lu, node, msg=None):
946 """Ensure that a given node is online.
948 @param lu: the LU on behalf of which we make the check
949 @param node: the node to check
950 @param msg: if passed, should be a message to replace the default one
951 @raise errors.OpPrereqError: if the node is offline
955 msg = "Can't use offline node"
956 if lu.cfg.GetNodeInfo(node).offline:
957 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
960 def _CheckNodeNotDrained(lu, node):
961 """Ensure that a given node is not drained.
963 @param lu: the LU on behalf of which we make the check
964 @param node: the node to check
965 @raise errors.OpPrereqError: if the node is drained
968 if lu.cfg.GetNodeInfo(node).drained:
969 raise errors.OpPrereqError("Can't use drained node %s" % node,
973 def _CheckNodeVmCapable(lu, node):
974 """Ensure that a given node is vm capable.
976 @param lu: the LU on behalf of which we make the check
977 @param node: the node to check
978 @raise errors.OpPrereqError: if the node is not vm capable
981 if not lu.cfg.GetNodeInfo(node).vm_capable:
982 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
986 def _CheckNodeHasOS(lu, node, os_name, force_variant):
987 """Ensure that a node supports a given OS.
989 @param lu: the LU on behalf of which we make the check
990 @param node: the node to check
991 @param os_name: the OS to query about
992 @param force_variant: whether to ignore variant errors
993 @raise errors.OpPrereqError: if the node is not supporting the OS
996 result = lu.rpc.call_os_get(node, os_name)
997 result.Raise("OS '%s' not in supported OS list for node %s" %
999 prereq=True, ecode=errors.ECODE_INVAL)
1000 if not force_variant:
1001 _CheckOSVariant(result.payload, os_name)
1004 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1005 """Ensure that a node has the given secondary ip.
1007 @type lu: L{LogicalUnit}
1008 @param lu: the LU on behalf of which we make the check
1010 @param node: the node to check
1011 @type secondary_ip: string
1012 @param secondary_ip: the ip to check
1013 @type prereq: boolean
1014 @param prereq: whether to throw a prerequisite or an execute error
1015 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1016 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1019 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1020 result.Raise("Failure checking secondary ip on node %s" % node,
1021 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1022 if not result.payload:
1023 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1024 " please fix and re-run this command" % secondary_ip)
1026 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1028 raise errors.OpExecError(msg)
1031 def _GetClusterDomainSecret():
1032 """Reads the cluster domain secret.
1035 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1039 def _CheckInstanceState(lu, instance, req_states, msg=None):
1040 """Ensure that an instance is in one of the required states.
1042 @param lu: the LU on behalf of which we make the check
1043 @param instance: the instance to check
1044 @param msg: if passed, should be a message to replace the default one
1045 @raise errors.OpPrereqError: if the instance is not in the required state
1049 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1050 if instance.admin_state not in req_states:
1051 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1052 (instance.name, instance.admin_state, msg),
1055 if constants.ADMINST_UP not in req_states:
1056 pnode = instance.primary_node
1057 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1058 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1059 prereq=True, ecode=errors.ECODE_ENVIRON)
1061 if instance.name in ins_l.payload:
1062 raise errors.OpPrereqError("Instance %s is running, %s" %
1063 (instance.name, msg), errors.ECODE_STATE)
1066 def _ComputeMinMaxSpec(name, ipolicy, value):
1067 """Computes if value is in the desired range.
1069 @param name: name of the parameter for which we perform the check
1070 @param ipolicy: dictionary containing min, max and std values
1071 @param value: actual value that we want to use
1072 @return: None or element not meeting the criteria
1076 if value in [None, constants.VALUE_AUTO]:
1078 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1079 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1080 if value > max_v or min_v > value:
1081 return ("%s value %s is not in range [%s, %s]" %
1082 (name, value, min_v, max_v))
1086 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1087 nic_count, disk_sizes,
1088 _compute_fn=_ComputeMinMaxSpec):
1089 """Verifies ipolicy against provided specs.
1092 @param ipolicy: The ipolicy
1094 @param mem_size: The memory size
1095 @type cpu_count: int
1096 @param cpu_count: Used cpu cores
1097 @type disk_count: int
1098 @param disk_count: Number of disks used
1099 @type nic_count: int
1100 @param nic_count: Number of nics used
1101 @type disk_sizes: list of ints
1102 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1103 @param _compute_fn: The compute function (unittest only)
1104 @return: A list of violations, or an empty list of no violations are found
1107 assert disk_count == len(disk_sizes)
1110 (constants.ISPEC_MEM_SIZE, mem_size),
1111 (constants.ISPEC_CPU_COUNT, cpu_count),
1112 (constants.ISPEC_DISK_COUNT, disk_count),
1113 (constants.ISPEC_NIC_COUNT, nic_count),
1114 ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
1117 (_compute_fn(name, ipolicy, value)
1118 for (name, value) in test_settings))
1121 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1122 _compute_fn=_ComputeIPolicySpecViolation):
1123 """Compute if instance meets the specs of ipolicy.
1126 @param ipolicy: The ipolicy to verify against
1127 @type instance: L{objects.Instance}
1128 @param instance: The instance to verify
1129 @param _compute_fn: The function to verify ipolicy (unittest only)
1130 @see: L{_ComputeIPolicySpecViolation}
1133 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1134 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1135 disk_count = len(instance.disks)
1136 disk_sizes = [disk.size for disk in instance.disks]
1137 nic_count = len(instance.nics)
1139 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1143 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1144 _compute_fn=_ComputeIPolicySpecViolation):
1145 """Compute if instance specs meets the specs of ipolicy.
1148 @param ipolicy: The ipolicy to verify against
1149 @param instance_spec: dict
1150 @param instance_spec: The instance spec to verify
1151 @param _compute_fn: The function to verify ipolicy (unittest only)
1152 @see: L{_ComputeIPolicySpecViolation}
1155 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1156 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1157 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1158 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1159 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1161 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1165 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1167 _compute_fn=_ComputeIPolicyInstanceViolation):
1168 """Compute if instance meets the specs of the new target group.
1170 @param ipolicy: The ipolicy to verify
1171 @param instance: The instance object to verify
1172 @param current_group: The current group of the instance
1173 @param target_group: The new group of the instance
1174 @param _compute_fn: The function to verify ipolicy (unittest only)
1175 @see: L{_ComputeIPolicySpecViolation}
1178 if current_group == target_group:
1181 return _compute_fn(ipolicy, instance)
1184 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1185 _compute_fn=_ComputeIPolicyNodeViolation):
1186 """Checks that the target node is correct in terms of instance policy.
1188 @param ipolicy: The ipolicy to verify
1189 @param instance: The instance object to verify
1190 @param node: The new node to relocate
1191 @param ignore: Ignore violations of the ipolicy
1192 @param _compute_fn: The function to verify ipolicy (unittest only)
1193 @see: L{_ComputeIPolicySpecViolation}
1196 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1197 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1200 msg = ("Instance does not meet target node group's (%s) instance"
1201 " policy: %s") % (node.group, utils.CommaJoin(res))
1205 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1208 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1209 """Computes a set of any instances that would violate the new ipolicy.
1211 @param old_ipolicy: The current (still in-place) ipolicy
1212 @param new_ipolicy: The new (to become) ipolicy
1213 @param instances: List of instances to verify
1214 @return: A list of instances which violates the new ipolicy but did not before
1217 return (_ComputeViolatingInstances(old_ipolicy, instances) -
1218 _ComputeViolatingInstances(new_ipolicy, instances))
1221 def _ExpandItemName(fn, name, kind):
1222 """Expand an item name.
1224 @param fn: the function to use for expansion
1225 @param name: requested item name
1226 @param kind: text description ('Node' or 'Instance')
1227 @return: the resolved (full) name
1228 @raise errors.OpPrereqError: if the item is not found
1231 full_name = fn(name)
1232 if full_name is None:
1233 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1238 def _ExpandNodeName(cfg, name):
1239 """Wrapper over L{_ExpandItemName} for nodes."""
1240 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1243 def _ExpandInstanceName(cfg, name):
1244 """Wrapper over L{_ExpandItemName} for instance."""
1245 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1248 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1249 minmem, maxmem, vcpus, nics, disk_template, disks,
1250 bep, hvp, hypervisor_name, tags):
1251 """Builds instance related env variables for hooks
1253 This builds the hook environment from individual variables.
1256 @param name: the name of the instance
1257 @type primary_node: string
1258 @param primary_node: the name of the instance's primary node
1259 @type secondary_nodes: list
1260 @param secondary_nodes: list of secondary nodes as strings
1261 @type os_type: string
1262 @param os_type: the name of the instance's OS
1263 @type status: string
1264 @param status: the desired status of the instance
1265 @type minmem: string
1266 @param minmem: the minimum memory size of the instance
1267 @type maxmem: string
1268 @param maxmem: the maximum memory size of the instance
1270 @param vcpus: the count of VCPUs the instance has
1272 @param nics: list of tuples (ip, mac, mode, link) representing
1273 the NICs the instance has
1274 @type disk_template: string
1275 @param disk_template: the disk template of the instance
1277 @param disks: the list of (size, mode) pairs
1279 @param bep: the backend parameters for the instance
1281 @param hvp: the hypervisor parameters for the instance
1282 @type hypervisor_name: string
1283 @param hypervisor_name: the hypervisor for the instance
1285 @param tags: list of instance tags as strings
1287 @return: the hook environment for this instance
1292 "INSTANCE_NAME": name,
1293 "INSTANCE_PRIMARY": primary_node,
1294 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1295 "INSTANCE_OS_TYPE": os_type,
1296 "INSTANCE_STATUS": status,
1297 "INSTANCE_MINMEM": minmem,
1298 "INSTANCE_MAXMEM": maxmem,
1299 # TODO(2.7) remove deprecated "memory" value
1300 "INSTANCE_MEMORY": maxmem,
1301 "INSTANCE_VCPUS": vcpus,
1302 "INSTANCE_DISK_TEMPLATE": disk_template,
1303 "INSTANCE_HYPERVISOR": hypervisor_name,
1306 nic_count = len(nics)
1307 for idx, (ip, mac, mode, link) in enumerate(nics):
1310 env["INSTANCE_NIC%d_IP" % idx] = ip
1311 env["INSTANCE_NIC%d_MAC" % idx] = mac
1312 env["INSTANCE_NIC%d_MODE" % idx] = mode
1313 env["INSTANCE_NIC%d_LINK" % idx] = link
1314 if mode == constants.NIC_MODE_BRIDGED:
1315 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1319 env["INSTANCE_NIC_COUNT"] = nic_count
1322 disk_count = len(disks)
1323 for idx, (size, mode) in enumerate(disks):
1324 env["INSTANCE_DISK%d_SIZE" % idx] = size
1325 env["INSTANCE_DISK%d_MODE" % idx] = mode
1329 env["INSTANCE_DISK_COUNT"] = disk_count
1334 env["INSTANCE_TAGS"] = " ".join(tags)
1336 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1337 for key, value in source.items():
1338 env["INSTANCE_%s_%s" % (kind, key)] = value
1343 def _NICListToTuple(lu, nics):
1344 """Build a list of nic information tuples.
1346 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1347 value in LUInstanceQueryData.
1349 @type lu: L{LogicalUnit}
1350 @param lu: the logical unit on whose behalf we execute
1351 @type nics: list of L{objects.NIC}
1352 @param nics: list of nics to convert to hooks tuples
1356 cluster = lu.cfg.GetClusterInfo()
1360 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1361 mode = filled_params[constants.NIC_MODE]
1362 link = filled_params[constants.NIC_LINK]
1363 hooks_nics.append((ip, mac, mode, link))
1367 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1368 """Builds instance related env variables for hooks from an object.
1370 @type lu: L{LogicalUnit}
1371 @param lu: the logical unit on whose behalf we execute
1372 @type instance: L{objects.Instance}
1373 @param instance: the instance for which we should build the
1375 @type override: dict
1376 @param override: dictionary with key/values that will override
1379 @return: the hook environment dictionary
1382 cluster = lu.cfg.GetClusterInfo()
1383 bep = cluster.FillBE(instance)
1384 hvp = cluster.FillHV(instance)
1386 "name": instance.name,
1387 "primary_node": instance.primary_node,
1388 "secondary_nodes": instance.secondary_nodes,
1389 "os_type": instance.os,
1390 "status": instance.admin_state,
1391 "maxmem": bep[constants.BE_MAXMEM],
1392 "minmem": bep[constants.BE_MINMEM],
1393 "vcpus": bep[constants.BE_VCPUS],
1394 "nics": _NICListToTuple(lu, instance.nics),
1395 "disk_template": instance.disk_template,
1396 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1399 "hypervisor_name": instance.hypervisor,
1400 "tags": instance.tags,
1403 args.update(override)
1404 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1407 def _AdjustCandidatePool(lu, exceptions):
1408 """Adjust the candidate pool after node operations.
1411 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1413 lu.LogInfo("Promoted nodes to master candidate role: %s",
1414 utils.CommaJoin(node.name for node in mod_list))
1415 for name in mod_list:
1416 lu.context.ReaddNode(name)
1417 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1419 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1423 def _DecideSelfPromotion(lu, exceptions=None):
1424 """Decide whether I should promote myself as a master candidate.
1427 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1428 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1429 # the new node will increase mc_max with one, so:
1430 mc_should = min(mc_should + 1, cp_size)
1431 return mc_now < mc_should
1434 def _CalculateGroupIPolicy(cluster, group):
1435 """Calculate instance policy for group.
1438 return cluster.SimpleFillIPolicy(group.ipolicy)
1441 def _ComputeViolatingInstances(ipolicy, instances):
1442 """Computes a set of instances who violates given ipolicy.
1444 @param ipolicy: The ipolicy to verify
1445 @type instances: object.Instance
1446 @param instances: List of instances to verify
1447 @return: A frozenset of instance names violating the ipolicy
1450 return frozenset([inst.name for inst in instances
1451 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1454 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1455 """Check that the brigdes needed by a list of nics exist.
1458 cluster = lu.cfg.GetClusterInfo()
1459 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1460 brlist = [params[constants.NIC_LINK] for params in paramslist
1461 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1463 result = lu.rpc.call_bridges_exist(target_node, brlist)
1464 result.Raise("Error checking bridges on destination node '%s'" %
1465 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1468 def _CheckInstanceBridgesExist(lu, instance, node=None):
1469 """Check that the brigdes needed by an instance exist.
1473 node = instance.primary_node
1474 _CheckNicsBridgesExist(lu, instance.nics, node)
1477 def _CheckOSVariant(os_obj, name):
1478 """Check whether an OS name conforms to the os variants specification.
1480 @type os_obj: L{objects.OS}
1481 @param os_obj: OS object to check
1483 @param name: OS name passed by the user, to check for validity
1486 variant = objects.OS.GetVariant(name)
1487 if not os_obj.supported_variants:
1489 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1490 " passed)" % (os_obj.name, variant),
1494 raise errors.OpPrereqError("OS name must include a variant",
1497 if variant not in os_obj.supported_variants:
1498 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1501 def _GetNodeInstancesInner(cfg, fn):
1502 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1505 def _GetNodeInstances(cfg, node_name):
1506 """Returns a list of all primary and secondary instances on a node.
1510 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1513 def _GetNodePrimaryInstances(cfg, node_name):
1514 """Returns primary instances on a node.
1517 return _GetNodeInstancesInner(cfg,
1518 lambda inst: node_name == inst.primary_node)
1521 def _GetNodeSecondaryInstances(cfg, node_name):
1522 """Returns secondary instances on a node.
1525 return _GetNodeInstancesInner(cfg,
1526 lambda inst: node_name in inst.secondary_nodes)
1529 def _GetStorageTypeArgs(cfg, storage_type):
1530 """Returns the arguments for a storage type.
1533 # Special case for file storage
1534 if storage_type == constants.ST_FILE:
1535 # storage.FileStorage wants a list of storage directories
1536 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1541 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1544 for dev in instance.disks:
1545 cfg.SetDiskID(dev, node_name)
1547 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1548 result.Raise("Failed to get disk status from node %s" % node_name,
1549 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1551 for idx, bdev_status in enumerate(result.payload):
1552 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1558 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1559 """Check the sanity of iallocator and node arguments and use the
1560 cluster-wide iallocator if appropriate.
1562 Check that at most one of (iallocator, node) is specified. If none is
1563 specified, then the LU's opcode's iallocator slot is filled with the
1564 cluster-wide default iallocator.
1566 @type iallocator_slot: string
1567 @param iallocator_slot: the name of the opcode iallocator slot
1568 @type node_slot: string
1569 @param node_slot: the name of the opcode target node slot
1572 node = getattr(lu.op, node_slot, None)
1573 iallocator = getattr(lu.op, iallocator_slot, None)
1575 if node is not None and iallocator is not None:
1576 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1578 elif node is None and iallocator is None:
1579 default_iallocator = lu.cfg.GetDefaultIAllocator()
1580 if default_iallocator:
1581 setattr(lu.op, iallocator_slot, default_iallocator)
1583 raise errors.OpPrereqError("No iallocator or node given and no"
1584 " cluster-wide default iallocator found;"
1585 " please specify either an iallocator or a"
1586 " node, or set a cluster-wide default"
1590 def _GetDefaultIAllocator(cfg, iallocator):
1591 """Decides on which iallocator to use.
1593 @type cfg: L{config.ConfigWriter}
1594 @param cfg: Cluster configuration object
1595 @type iallocator: string or None
1596 @param iallocator: Iallocator specified in opcode
1598 @return: Iallocator name
1602 # Use default iallocator
1603 iallocator = cfg.GetDefaultIAllocator()
1606 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1607 " opcode nor as a cluster-wide default",
1613 class LUClusterPostInit(LogicalUnit):
1614 """Logical unit for running hooks after cluster initialization.
1617 HPATH = "cluster-init"
1618 HTYPE = constants.HTYPE_CLUSTER
1620 def BuildHooksEnv(self):
1625 "OP_TARGET": self.cfg.GetClusterName(),
1628 def BuildHooksNodes(self):
1629 """Build hooks nodes.
1632 return ([], [self.cfg.GetMasterNode()])
1634 def Exec(self, feedback_fn):
1641 class LUClusterDestroy(LogicalUnit):
1642 """Logical unit for destroying the cluster.
1645 HPATH = "cluster-destroy"
1646 HTYPE = constants.HTYPE_CLUSTER
1648 def BuildHooksEnv(self):
1653 "OP_TARGET": self.cfg.GetClusterName(),
1656 def BuildHooksNodes(self):
1657 """Build hooks nodes.
1662 def CheckPrereq(self):
1663 """Check prerequisites.
1665 This checks whether the cluster is empty.
1667 Any errors are signaled by raising errors.OpPrereqError.
1670 master = self.cfg.GetMasterNode()
1672 nodelist = self.cfg.GetNodeList()
1673 if len(nodelist) != 1 or nodelist[0] != master:
1674 raise errors.OpPrereqError("There are still %d node(s) in"
1675 " this cluster." % (len(nodelist) - 1),
1677 instancelist = self.cfg.GetInstanceList()
1679 raise errors.OpPrereqError("There are still %d instance(s) in"
1680 " this cluster." % len(instancelist),
1683 def Exec(self, feedback_fn):
1684 """Destroys the cluster.
1687 master_params = self.cfg.GetMasterNetworkParameters()
1689 # Run post hooks on master node before it's removed
1690 _RunPostHook(self, master_params.name)
1692 ems = self.cfg.GetUseExternalMipScript()
1693 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1696 self.LogWarning("Error disabling the master IP address: %s",
1699 return master_params.name
1702 def _VerifyCertificate(filename):
1703 """Verifies a certificate for L{LUClusterVerifyConfig}.
1705 @type filename: string
1706 @param filename: Path to PEM file
1710 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1711 utils.ReadFile(filename))
1712 except Exception, err: # pylint: disable=W0703
1713 return (LUClusterVerifyConfig.ETYPE_ERROR,
1714 "Failed to load X509 certificate %s: %s" % (filename, err))
1717 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1718 constants.SSL_CERT_EXPIRATION_ERROR)
1721 fnamemsg = "While verifying %s: %s" % (filename, msg)
1726 return (None, fnamemsg)
1727 elif errcode == utils.CERT_WARNING:
1728 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1729 elif errcode == utils.CERT_ERROR:
1730 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1732 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1735 def _GetAllHypervisorParameters(cluster, instances):
1736 """Compute the set of all hypervisor parameters.
1738 @type cluster: L{objects.Cluster}
1739 @param cluster: the cluster object
1740 @param instances: list of L{objects.Instance}
1741 @param instances: additional instances from which to obtain parameters
1742 @rtype: list of (origin, hypervisor, parameters)
1743 @return: a list with all parameters found, indicating the hypervisor they
1744 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1749 for hv_name in cluster.enabled_hypervisors:
1750 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1752 for os_name, os_hvp in cluster.os_hvp.items():
1753 for hv_name, hv_params in os_hvp.items():
1755 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1756 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1758 # TODO: collapse identical parameter values in a single one
1759 for instance in instances:
1760 if instance.hvparams:
1761 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1762 cluster.FillHV(instance)))
1767 class _VerifyErrors(object):
1768 """Mix-in for cluster/group verify LUs.
1770 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1771 self.op and self._feedback_fn to be available.)
1775 ETYPE_FIELD = "code"
1776 ETYPE_ERROR = "ERROR"
1777 ETYPE_WARNING = "WARNING"
1779 def _Error(self, ecode, item, msg, *args, **kwargs):
1780 """Format an error message.
1782 Based on the opcode's error_codes parameter, either format a
1783 parseable error code, or a simpler error string.
1785 This must be called only from Exec and functions called from Exec.
1788 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1789 itype, etxt, _ = ecode
1790 # first complete the msg
1793 # then format the whole message
1794 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1795 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1801 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1802 # and finally report it via the feedback_fn
1803 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1805 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1806 """Log an error message if the passed condition is True.
1810 or self.op.debug_simulate_errors) # pylint: disable=E1101
1812 # If the error code is in the list of ignored errors, demote the error to a
1814 (_, etxt, _) = ecode
1815 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1816 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1819 self._Error(ecode, *args, **kwargs)
1821 # do not mark the operation as failed for WARN cases only
1822 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1823 self.bad = self.bad or cond
1826 class LUClusterVerify(NoHooksLU):
1827 """Submits all jobs necessary to verify the cluster.
1832 def ExpandNames(self):
1833 self.needed_locks = {}
1835 def Exec(self, feedback_fn):
1838 if self.op.group_name:
1839 groups = [self.op.group_name]
1840 depends_fn = lambda: None
1842 groups = self.cfg.GetNodeGroupList()
1844 # Verify global configuration
1846 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1849 # Always depend on global verification
1850 depends_fn = lambda: [(-len(jobs), [])]
1852 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1853 ignore_errors=self.op.ignore_errors,
1854 depends=depends_fn())]
1855 for group in groups)
1857 # Fix up all parameters
1858 for op in itertools.chain(*jobs): # pylint: disable=W0142
1859 op.debug_simulate_errors = self.op.debug_simulate_errors
1860 op.verbose = self.op.verbose
1861 op.error_codes = self.op.error_codes
1863 op.skip_checks = self.op.skip_checks
1864 except AttributeError:
1865 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1867 return ResultWithJobs(jobs)
1870 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1871 """Verifies the cluster config.
1876 def _VerifyHVP(self, hvp_data):
1877 """Verifies locally the syntax of the hypervisor parameters.
1880 for item, hv_name, hv_params in hvp_data:
1881 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1884 hv_class = hypervisor.GetHypervisor(hv_name)
1885 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1886 hv_class.CheckParameterSyntax(hv_params)
1887 except errors.GenericError, err:
1888 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1890 def ExpandNames(self):
1891 # Information can be safely retrieved as the BGL is acquired in exclusive
1893 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1894 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1895 self.all_node_info = self.cfg.GetAllNodesInfo()
1896 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1897 self.needed_locks = {}
1899 def Exec(self, feedback_fn):
1900 """Verify integrity of cluster, performing various test on nodes.
1904 self._feedback_fn = feedback_fn
1906 feedback_fn("* Verifying cluster config")
1908 for msg in self.cfg.VerifyConfig():
1909 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1911 feedback_fn("* Verifying cluster certificate files")
1913 for cert_filename in constants.ALL_CERT_FILES:
1914 (errcode, msg) = _VerifyCertificate(cert_filename)
1915 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1917 feedback_fn("* Verifying hypervisor parameters")
1919 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1920 self.all_inst_info.values()))
1922 feedback_fn("* Verifying all nodes belong to an existing group")
1924 # We do this verification here because, should this bogus circumstance
1925 # occur, it would never be caught by VerifyGroup, which only acts on
1926 # nodes/instances reachable from existing node groups.
1928 dangling_nodes = set(node.name for node in self.all_node_info.values()
1929 if node.group not in self.all_group_info)
1931 dangling_instances = {}
1932 no_node_instances = []
1934 for inst in self.all_inst_info.values():
1935 if inst.primary_node in dangling_nodes:
1936 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1937 elif inst.primary_node not in self.all_node_info:
1938 no_node_instances.append(inst.name)
1943 utils.CommaJoin(dangling_instances.get(node.name,
1945 for node in dangling_nodes]
1947 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1949 "the following nodes (and their instances) belong to a non"
1950 " existing group: %s", utils.CommaJoin(pretty_dangling))
1952 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1954 "the following instances have a non-existing primary-node:"
1955 " %s", utils.CommaJoin(no_node_instances))
1960 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1961 """Verifies the status of a node group.
1964 HPATH = "cluster-verify"
1965 HTYPE = constants.HTYPE_CLUSTER
1968 _HOOKS_INDENT_RE = re.compile("^", re.M)
1970 class NodeImage(object):
1971 """A class representing the logical and physical status of a node.
1974 @ivar name: the node name to which this object refers
1975 @ivar volumes: a structure as returned from
1976 L{ganeti.backend.GetVolumeList} (runtime)
1977 @ivar instances: a list of running instances (runtime)
1978 @ivar pinst: list of configured primary instances (config)
1979 @ivar sinst: list of configured secondary instances (config)
1980 @ivar sbp: dictionary of {primary-node: list of instances} for all
1981 instances for which this node is secondary (config)
1982 @ivar mfree: free memory, as reported by hypervisor (runtime)
1983 @ivar dfree: free disk, as reported by the node (runtime)
1984 @ivar offline: the offline status (config)
1985 @type rpc_fail: boolean
1986 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1987 not whether the individual keys were correct) (runtime)
1988 @type lvm_fail: boolean
1989 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1990 @type hyp_fail: boolean
1991 @ivar hyp_fail: whether the RPC call didn't return the instance list
1992 @type ghost: boolean
1993 @ivar ghost: whether this is a known node or not (config)
1994 @type os_fail: boolean
1995 @ivar os_fail: whether the RPC call didn't return valid OS data
1997 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1998 @type vm_capable: boolean
1999 @ivar vm_capable: whether the node can host instances
2002 def __init__(self, offline=False, name=None, vm_capable=True):
2011 self.offline = offline
2012 self.vm_capable = vm_capable
2013 self.rpc_fail = False
2014 self.lvm_fail = False
2015 self.hyp_fail = False
2017 self.os_fail = False
2020 def ExpandNames(self):
2021 # This raises errors.OpPrereqError on its own:
2022 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2024 # Get instances in node group; this is unsafe and needs verification later
2025 inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
2027 self.needed_locks = {
2028 locking.LEVEL_INSTANCE: inst_names,
2029 locking.LEVEL_NODEGROUP: [self.group_uuid],
2030 locking.LEVEL_NODE: [],
2033 self.share_locks = _ShareAll()
2035 def DeclareLocks(self, level):
2036 if level == locking.LEVEL_NODE:
2037 # Get members of node group; this is unsafe and needs verification later
2038 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2040 all_inst_info = self.cfg.GetAllInstancesInfo()
2042 # In Exec(), we warn about mirrored instances that have primary and
2043 # secondary living in separate node groups. To fully verify that
2044 # volumes for these instances are healthy, we will need to do an
2045 # extra call to their secondaries. We ensure here those nodes will
2047 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2048 # Important: access only the instances whose lock is owned
2049 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2050 nodes.update(all_inst_info[inst].secondary_nodes)
2052 self.needed_locks[locking.LEVEL_NODE] = nodes
2054 def CheckPrereq(self):
2055 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2056 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2058 group_nodes = set(self.group_info.members)
2059 group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
2062 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2064 unlocked_instances = \
2065 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2068 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2069 utils.CommaJoin(unlocked_nodes))
2071 if unlocked_instances:
2072 raise errors.OpPrereqError("Missing lock for instances: %s" %
2073 utils.CommaJoin(unlocked_instances))
2075 self.all_node_info = self.cfg.GetAllNodesInfo()
2076 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2078 self.my_node_names = utils.NiceSort(group_nodes)
2079 self.my_inst_names = utils.NiceSort(group_instances)
2081 self.my_node_info = dict((name, self.all_node_info[name])
2082 for name in self.my_node_names)
2084 self.my_inst_info = dict((name, self.all_inst_info[name])
2085 for name in self.my_inst_names)
2087 # We detect here the nodes that will need the extra RPC calls for verifying
2088 # split LV volumes; they should be locked.
2089 extra_lv_nodes = set()
2091 for inst in self.my_inst_info.values():
2092 if inst.disk_template in constants.DTS_INT_MIRROR:
2093 group = self.my_node_info[inst.primary_node].group
2094 for nname in inst.secondary_nodes:
2095 if self.all_node_info[nname].group != group:
2096 extra_lv_nodes.add(nname)
2098 unlocked_lv_nodes = \
2099 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2101 if unlocked_lv_nodes:
2102 raise errors.OpPrereqError("these nodes could be locked: %s" %
2103 utils.CommaJoin(unlocked_lv_nodes))
2104 self.extra_lv_nodes = list(extra_lv_nodes)
2106 def _VerifyNode(self, ninfo, nresult):
2107 """Perform some basic validation on data returned from a node.
2109 - check the result data structure is well formed and has all the
2111 - check ganeti version
2113 @type ninfo: L{objects.Node}
2114 @param ninfo: the node to check
2115 @param nresult: the results from the node
2117 @return: whether overall this call was successful (and we can expect
2118 reasonable values in the respose)
2122 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2124 # main result, nresult should be a non-empty dict
2125 test = not nresult or not isinstance(nresult, dict)
2126 _ErrorIf(test, constants.CV_ENODERPC, node,
2127 "unable to verify node: no data returned")
2131 # compares ganeti version
2132 local_version = constants.PROTOCOL_VERSION
2133 remote_version = nresult.get("version", None)
2134 test = not (remote_version and
2135 isinstance(remote_version, (list, tuple)) and
2136 len(remote_version) == 2)
2137 _ErrorIf(test, constants.CV_ENODERPC, node,
2138 "connection to node returned invalid data")
2142 test = local_version != remote_version[0]
2143 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2144 "incompatible protocol versions: master %s,"
2145 " node %s", local_version, remote_version[0])
2149 # node seems compatible, we can actually try to look into its results
2151 # full package version
2152 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2153 constants.CV_ENODEVERSION, node,
2154 "software version mismatch: master %s, node %s",
2155 constants.RELEASE_VERSION, remote_version[1],
2156 code=self.ETYPE_WARNING)
2158 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2159 if ninfo.vm_capable and isinstance(hyp_result, dict):
2160 for hv_name, hv_result in hyp_result.iteritems():
2161 test = hv_result is not None
2162 _ErrorIf(test, constants.CV_ENODEHV, node,
2163 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2165 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2166 if ninfo.vm_capable and isinstance(hvp_result, list):
2167 for item, hv_name, hv_result in hvp_result:
2168 _ErrorIf(True, constants.CV_ENODEHV, node,
2169 "hypervisor %s parameter verify failure (source %s): %s",
2170 hv_name, item, hv_result)
2172 test = nresult.get(constants.NV_NODESETUP,
2173 ["Missing NODESETUP results"])
2174 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2179 def _VerifyNodeTime(self, ninfo, nresult,
2180 nvinfo_starttime, nvinfo_endtime):
2181 """Check the node time.
2183 @type ninfo: L{objects.Node}
2184 @param ninfo: the node to check
2185 @param nresult: the remote results for the node
2186 @param nvinfo_starttime: the start time of the RPC call
2187 @param nvinfo_endtime: the end time of the RPC call
2191 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2193 ntime = nresult.get(constants.NV_TIME, None)
2195 ntime_merged = utils.MergeTime(ntime)
2196 except (ValueError, TypeError):
2197 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2200 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2201 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2202 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2203 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2207 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2208 "Node time diverges by at least %s from master node time",
2211 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2212 """Check the node LVM results.
2214 @type ninfo: L{objects.Node}
2215 @param ninfo: the node to check
2216 @param nresult: the remote results for the node
2217 @param vg_name: the configured VG name
2224 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2226 # checks vg existence and size > 20G
2227 vglist = nresult.get(constants.NV_VGLIST, None)
2229 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2231 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2232 constants.MIN_VG_SIZE)
2233 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2236 pvlist = nresult.get(constants.NV_PVLIST, None)
2237 test = pvlist is None
2238 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2240 # check that ':' is not present in PV names, since it's a
2241 # special character for lvcreate (denotes the range of PEs to
2243 for _, pvname, owner_vg in pvlist:
2244 test = ":" in pvname
2245 _ErrorIf(test, constants.CV_ENODELVM, node,
2246 "Invalid character ':' in PV '%s' of VG '%s'",
2249 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2250 """Check the node bridges.
2252 @type ninfo: L{objects.Node}
2253 @param ninfo: the node to check
2254 @param nresult: the remote results for the node
2255 @param bridges: the expected list of bridges
2262 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2264 missing = nresult.get(constants.NV_BRIDGES, None)
2265 test = not isinstance(missing, list)
2266 _ErrorIf(test, constants.CV_ENODENET, node,
2267 "did not return valid bridge information")
2269 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2270 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2272 def _VerifyNodeUserScripts(self, ninfo, nresult):
2273 """Check the results of user scripts presence and executability on the node
2275 @type ninfo: L{objects.Node}
2276 @param ninfo: the node to check
2277 @param nresult: the remote results for the node
2282 test = not constants.NV_USERSCRIPTS in nresult
2283 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2284 "did not return user scripts information")
2286 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2288 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2289 "user scripts not present or not executable: %s" %
2290 utils.CommaJoin(sorted(broken_scripts)))
2292 def _VerifyNodeNetwork(self, ninfo, nresult):
2293 """Check the node network connectivity results.
2295 @type ninfo: L{objects.Node}
2296 @param ninfo: the node to check
2297 @param nresult: the remote results for the node
2301 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2303 test = constants.NV_NODELIST not in nresult
2304 _ErrorIf(test, constants.CV_ENODESSH, node,
2305 "node hasn't returned node ssh connectivity data")
2307 if nresult[constants.NV_NODELIST]:
2308 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2309 _ErrorIf(True, constants.CV_ENODESSH, node,
2310 "ssh communication with node '%s': %s", a_node, a_msg)
2312 test = constants.NV_NODENETTEST not in nresult
2313 _ErrorIf(test, constants.CV_ENODENET, node,
2314 "node hasn't returned node tcp connectivity data")
2316 if nresult[constants.NV_NODENETTEST]:
2317 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2319 _ErrorIf(True, constants.CV_ENODENET, node,
2320 "tcp communication with node '%s': %s",
2321 anode, nresult[constants.NV_NODENETTEST][anode])
2323 test = constants.NV_MASTERIP not in nresult
2324 _ErrorIf(test, constants.CV_ENODENET, node,
2325 "node hasn't returned node master IP reachability data")
2327 if not nresult[constants.NV_MASTERIP]:
2328 if node == self.master_node:
2329 msg = "the master node cannot reach the master IP (not configured?)"
2331 msg = "cannot reach the master IP"
2332 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2334 def _VerifyInstance(self, instance, instanceconfig, node_image,
2336 """Verify an instance.
2338 This function checks to see if the required block devices are
2339 available on the instance's node.
2342 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2343 node_current = instanceconfig.primary_node
2345 node_vol_should = {}
2346 instanceconfig.MapLVsByNode(node_vol_should)
2348 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2349 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2350 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, err)
2352 for node in node_vol_should:
2353 n_img = node_image[node]
2354 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2355 # ignore missing volumes on offline or broken nodes
2357 for volume in node_vol_should[node]:
2358 test = volume not in n_img.volumes
2359 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2360 "volume %s missing on node %s", volume, node)
2362 if instanceconfig.admin_state == constants.ADMINST_UP:
2363 pri_img = node_image[node_current]
2364 test = instance not in pri_img.instances and not pri_img.offline
2365 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2366 "instance not running on its primary node %s",
2369 diskdata = [(nname, success, status, idx)
2370 for (nname, disks) in diskstatus.items()
2371 for idx, (success, status) in enumerate(disks)]
2373 for nname, success, bdev_status, idx in diskdata:
2374 # the 'ghost node' construction in Exec() ensures that we have a
2376 snode = node_image[nname]
2377 bad_snode = snode.ghost or snode.offline
2378 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2379 not success and not bad_snode,
2380 constants.CV_EINSTANCEFAULTYDISK, instance,
2381 "couldn't retrieve status for disk/%s on %s: %s",
2382 idx, nname, bdev_status)
2383 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2384 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2385 constants.CV_EINSTANCEFAULTYDISK, instance,
2386 "disk/%s on %s is faulty", idx, nname)
2388 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2389 """Verify if there are any unknown volumes in the cluster.
2391 The .os, .swap and backup volumes are ignored. All other volumes are
2392 reported as unknown.
2394 @type reserved: L{ganeti.utils.FieldSet}
2395 @param reserved: a FieldSet of reserved volume names
2398 for node, n_img in node_image.items():
2399 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2400 # skip non-healthy nodes
2402 for volume in n_img.volumes:
2403 test = ((node not in node_vol_should or
2404 volume not in node_vol_should[node]) and
2405 not reserved.Matches(volume))
2406 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2407 "volume %s is unknown", volume)
2409 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2410 """Verify N+1 Memory Resilience.
2412 Check that if one single node dies we can still start all the
2413 instances it was primary for.
2416 cluster_info = self.cfg.GetClusterInfo()
2417 for node, n_img in node_image.items():
2418 # This code checks that every node which is now listed as
2419 # secondary has enough memory to host all instances it is
2420 # supposed to should a single other node in the cluster fail.
2421 # FIXME: not ready for failover to an arbitrary node
2422 # FIXME: does not support file-backed instances
2423 # WARNING: we currently take into account down instances as well
2424 # as up ones, considering that even if they're down someone
2425 # might want to start them even in the event of a node failure.
2427 # we're skipping offline nodes from the N+1 warning, since
2428 # most likely we don't have good memory infromation from them;
2429 # we already list instances living on such nodes, and that's
2432 #TODO(dynmem): use MINMEM for checking
2433 #TODO(dynmem): also consider ballooning out other instances
2434 for prinode, instances in n_img.sbp.items():
2436 for instance in instances:
2437 bep = cluster_info.FillBE(instance_cfg[instance])
2438 if bep[constants.BE_AUTO_BALANCE]:
2439 needed_mem += bep[constants.BE_MAXMEM]
2440 test = n_img.mfree < needed_mem
2441 self._ErrorIf(test, constants.CV_ENODEN1, node,
2442 "not enough memory to accomodate instance failovers"
2443 " should node %s fail (%dMiB needed, %dMiB available)",
2444 prinode, needed_mem, n_img.mfree)
2447 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2448 (files_all, files_opt, files_mc, files_vm)):
2449 """Verifies file checksums collected from all nodes.
2451 @param errorif: Callback for reporting errors
2452 @param nodeinfo: List of L{objects.Node} objects
2453 @param master_node: Name of master node
2454 @param all_nvinfo: RPC results
2457 # Define functions determining which nodes to consider for a file
2460 (files_mc, lambda node: (node.master_candidate or
2461 node.name == master_node)),
2462 (files_vm, lambda node: node.vm_capable),
2465 # Build mapping from filename to list of nodes which should have the file
2467 for (files, fn) in files2nodefn:
2469 filenodes = nodeinfo
2471 filenodes = filter(fn, nodeinfo)
2472 nodefiles.update((filename,
2473 frozenset(map(operator.attrgetter("name"), filenodes)))
2474 for filename in files)
2476 assert set(nodefiles) == (files_all | files_mc | files_vm)
2478 fileinfo = dict((filename, {}) for filename in nodefiles)
2479 ignore_nodes = set()
2481 for node in nodeinfo:
2483 ignore_nodes.add(node.name)
2486 nresult = all_nvinfo[node.name]
2488 if nresult.fail_msg or not nresult.payload:
2491 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2493 test = not (node_files and isinstance(node_files, dict))
2494 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2495 "Node did not return file checksum data")
2497 ignore_nodes.add(node.name)
2500 # Build per-checksum mapping from filename to nodes having it
2501 for (filename, checksum) in node_files.items():
2502 assert filename in nodefiles
2503 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2505 for (filename, checksums) in fileinfo.items():
2506 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2508 # Nodes having the file
2509 with_file = frozenset(node_name
2510 for nodes in fileinfo[filename].values()
2511 for node_name in nodes) - ignore_nodes
2513 expected_nodes = nodefiles[filename] - ignore_nodes
2515 # Nodes missing file
2516 missing_file = expected_nodes - with_file
2518 if filename in files_opt:
2520 errorif(missing_file and missing_file != expected_nodes,
2521 constants.CV_ECLUSTERFILECHECK, None,
2522 "File %s is optional, but it must exist on all or no"
2523 " nodes (not found on %s)",
2524 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2526 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2527 "File %s is missing from node(s) %s", filename,
2528 utils.CommaJoin(utils.NiceSort(missing_file)))
2530 # Warn if a node has a file it shouldn't
2531 unexpected = with_file - expected_nodes
2533 constants.CV_ECLUSTERFILECHECK, None,
2534 "File %s should not exist on node(s) %s",
2535 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2537 # See if there are multiple versions of the file
2538 test = len(checksums) > 1
2540 variants = ["variant %s on %s" %
2541 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2542 for (idx, (checksum, nodes)) in
2543 enumerate(sorted(checksums.items()))]
2547 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2548 "File %s found with %s different checksums (%s)",
2549 filename, len(checksums), "; ".join(variants))
2551 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2553 """Verifies and the node DRBD status.
2555 @type ninfo: L{objects.Node}
2556 @param ninfo: the node to check
2557 @param nresult: the remote results for the node
2558 @param instanceinfo: the dict of instances
2559 @param drbd_helper: the configured DRBD usermode helper
2560 @param drbd_map: the DRBD map as returned by
2561 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2565 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2568 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2569 test = (helper_result == None)
2570 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2571 "no drbd usermode helper returned")
2573 status, payload = helper_result
2575 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2576 "drbd usermode helper check unsuccessful: %s", payload)
2577 test = status and (payload != drbd_helper)
2578 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2579 "wrong drbd usermode helper: %s", payload)
2581 # compute the DRBD minors
2583 for minor, instance in drbd_map[node].items():
2584 test = instance not in instanceinfo
2585 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2586 "ghost instance '%s' in temporary DRBD map", instance)
2587 # ghost instance should not be running, but otherwise we
2588 # don't give double warnings (both ghost instance and
2589 # unallocated minor in use)
2591 node_drbd[minor] = (instance, False)
2593 instance = instanceinfo[instance]
2594 node_drbd[minor] = (instance.name,
2595 instance.admin_state == constants.ADMINST_UP)
2597 # and now check them
2598 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2599 test = not isinstance(used_minors, (tuple, list))
2600 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2601 "cannot parse drbd status file: %s", str(used_minors))
2603 # we cannot check drbd status
2606 for minor, (iname, must_exist) in node_drbd.items():
2607 test = minor not in used_minors and must_exist
2608 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2609 "drbd minor %d of instance %s is not active", minor, iname)
2610 for minor in used_minors:
2611 test = minor not in node_drbd
2612 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2613 "unallocated drbd minor %d is in use", minor)
2615 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2616 """Builds the node OS structures.
2618 @type ninfo: L{objects.Node}
2619 @param ninfo: the node to check
2620 @param nresult: the remote results for the node
2621 @param nimg: the node image object
2625 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2627 remote_os = nresult.get(constants.NV_OSLIST, None)
2628 test = (not isinstance(remote_os, list) or
2629 not compat.all(isinstance(v, list) and len(v) == 7
2630 for v in remote_os))
2632 _ErrorIf(test, constants.CV_ENODEOS, node,
2633 "node hasn't returned valid OS data")
2642 for (name, os_path, status, diagnose,
2643 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2645 if name not in os_dict:
2648 # parameters is a list of lists instead of list of tuples due to
2649 # JSON lacking a real tuple type, fix it:
2650 parameters = [tuple(v) for v in parameters]
2651 os_dict[name].append((os_path, status, diagnose,
2652 set(variants), set(parameters), set(api_ver)))
2654 nimg.oslist = os_dict
2656 def _VerifyNodeOS(self, ninfo, nimg, base):
2657 """Verifies the node OS list.
2659 @type ninfo: L{objects.Node}
2660 @param ninfo: the node to check
2661 @param nimg: the node image object
2662 @param base: the 'template' node we match against (e.g. from the master)
2666 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2668 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2670 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2671 for os_name, os_data in nimg.oslist.items():
2672 assert os_data, "Empty OS status for OS %s?!" % os_name
2673 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2674 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2675 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2676 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2677 "OS '%s' has multiple entries (first one shadows the rest): %s",
2678 os_name, utils.CommaJoin([v[0] for v in os_data]))
2679 # comparisons with the 'base' image
2680 test = os_name not in base.oslist
2681 _ErrorIf(test, constants.CV_ENODEOS, node,
2682 "Extra OS %s not present on reference node (%s)",
2686 assert base.oslist[os_name], "Base node has empty OS status?"
2687 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2689 # base OS is invalid, skipping
2691 for kind, a, b in [("API version", f_api, b_api),
2692 ("variants list", f_var, b_var),
2693 ("parameters", beautify_params(f_param),
2694 beautify_params(b_param))]:
2695 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2696 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2697 kind, os_name, base.name,
2698 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2700 # check any missing OSes
2701 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2702 _ErrorIf(missing, constants.CV_ENODEOS, node,
2703 "OSes present on reference node %s but missing on this node: %s",
2704 base.name, utils.CommaJoin(missing))
2706 def _VerifyOob(self, ninfo, nresult):
2707 """Verifies out of band functionality of a node.
2709 @type ninfo: L{objects.Node}
2710 @param ninfo: the node to check
2711 @param nresult: the remote results for the node
2715 # We just have to verify the paths on master and/or master candidates
2716 # as the oob helper is invoked on the master
2717 if ((ninfo.master_candidate or ninfo.master_capable) and
2718 constants.NV_OOB_PATHS in nresult):
2719 for path_result in nresult[constants.NV_OOB_PATHS]:
2720 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2722 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2723 """Verifies and updates the node volume data.
2725 This function will update a L{NodeImage}'s internal structures
2726 with data from the remote call.
2728 @type ninfo: L{objects.Node}
2729 @param ninfo: the node to check
2730 @param nresult: the remote results for the node
2731 @param nimg: the node image object
2732 @param vg_name: the configured VG name
2736 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2738 nimg.lvm_fail = True
2739 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2742 elif isinstance(lvdata, basestring):
2743 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2744 utils.SafeEncode(lvdata))
2745 elif not isinstance(lvdata, dict):
2746 _ErrorIf(True, constants.CV_ENODELVM, node,
2747 "rpc call to node failed (lvlist)")
2749 nimg.volumes = lvdata
2750 nimg.lvm_fail = False
2752 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2753 """Verifies and updates the node instance list.
2755 If the listing was successful, then updates this node's instance
2756 list. Otherwise, it marks the RPC call as failed for the instance
2759 @type ninfo: L{objects.Node}
2760 @param ninfo: the node to check
2761 @param nresult: the remote results for the node
2762 @param nimg: the node image object
2765 idata = nresult.get(constants.NV_INSTANCELIST, None)
2766 test = not isinstance(idata, list)
2767 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2768 "rpc call to node failed (instancelist): %s",
2769 utils.SafeEncode(str(idata)))
2771 nimg.hyp_fail = True
2773 nimg.instances = idata
2775 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2776 """Verifies and computes a node information map
2778 @type ninfo: L{objects.Node}
2779 @param ninfo: the node to check
2780 @param nresult: the remote results for the node
2781 @param nimg: the node image object
2782 @param vg_name: the configured VG name
2786 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2788 # try to read free memory (from the hypervisor)
2789 hv_info = nresult.get(constants.NV_HVINFO, None)
2790 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2791 _ErrorIf(test, constants.CV_ENODEHV, node,
2792 "rpc call to node failed (hvinfo)")
2795 nimg.mfree = int(hv_info["memory_free"])
2796 except (ValueError, TypeError):
2797 _ErrorIf(True, constants.CV_ENODERPC, node,
2798 "node returned invalid nodeinfo, check hypervisor")
2800 # FIXME: devise a free space model for file based instances as well
2801 if vg_name is not None:
2802 test = (constants.NV_VGLIST not in nresult or
2803 vg_name not in nresult[constants.NV_VGLIST])
2804 _ErrorIf(test, constants.CV_ENODELVM, node,
2805 "node didn't return data for the volume group '%s'"
2806 " - it is either missing or broken", vg_name)
2809 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2810 except (ValueError, TypeError):
2811 _ErrorIf(True, constants.CV_ENODERPC, node,
2812 "node returned invalid LVM info, check LVM status")
2814 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2815 """Gets per-disk status information for all instances.
2817 @type nodelist: list of strings
2818 @param nodelist: Node names
2819 @type node_image: dict of (name, L{objects.Node})
2820 @param node_image: Node objects
2821 @type instanceinfo: dict of (name, L{objects.Instance})
2822 @param instanceinfo: Instance objects
2823 @rtype: {instance: {node: [(succes, payload)]}}
2824 @return: a dictionary of per-instance dictionaries with nodes as
2825 keys and disk information as values; the disk information is a
2826 list of tuples (success, payload)
2829 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2832 node_disks_devonly = {}
2833 diskless_instances = set()
2834 diskless = constants.DT_DISKLESS
2836 for nname in nodelist:
2837 node_instances = list(itertools.chain(node_image[nname].pinst,
2838 node_image[nname].sinst))
2839 diskless_instances.update(inst for inst in node_instances
2840 if instanceinfo[inst].disk_template == diskless)
2841 disks = [(inst, disk)
2842 for inst in node_instances
2843 for disk in instanceinfo[inst].disks]
2846 # No need to collect data
2849 node_disks[nname] = disks
2851 # Creating copies as SetDiskID below will modify the objects and that can
2852 # lead to incorrect data returned from nodes
2853 devonly = [dev.Copy() for (_, dev) in disks]
2856 self.cfg.SetDiskID(dev, nname)
2858 node_disks_devonly[nname] = devonly
2860 assert len(node_disks) == len(node_disks_devonly)
2862 # Collect data from all nodes with disks
2863 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2866 assert len(result) == len(node_disks)
2870 for (nname, nres) in result.items():
2871 disks = node_disks[nname]
2874 # No data from this node
2875 data = len(disks) * [(False, "node offline")]
2878 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2879 "while getting disk information: %s", msg)
2881 # No data from this node
2882 data = len(disks) * [(False, msg)]
2885 for idx, i in enumerate(nres.payload):
2886 if isinstance(i, (tuple, list)) and len(i) == 2:
2889 logging.warning("Invalid result from node %s, entry %d: %s",
2891 data.append((False, "Invalid result from the remote node"))
2893 for ((inst, _), status) in zip(disks, data):
2894 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2896 # Add empty entries for diskless instances.
2897 for inst in diskless_instances:
2898 assert inst not in instdisk
2901 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2902 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2903 compat.all(isinstance(s, (tuple, list)) and
2904 len(s) == 2 for s in statuses)
2905 for inst, nnames in instdisk.items()
2906 for nname, statuses in nnames.items())
2907 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2912 def _SshNodeSelector(group_uuid, all_nodes):
2913 """Create endless iterators for all potential SSH check hosts.
2916 nodes = [node for node in all_nodes
2917 if (node.group != group_uuid and
2919 keyfunc = operator.attrgetter("group")
2921 return map(itertools.cycle,
2922 [sorted(map(operator.attrgetter("name"), names))
2923 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2927 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2928 """Choose which nodes should talk to which other nodes.
2930 We will make nodes contact all nodes in their group, and one node from
2933 @warning: This algorithm has a known issue if one node group is much
2934 smaller than others (e.g. just one node). In such a case all other
2935 nodes will talk to the single node.
2938 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2939 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2941 return (online_nodes,
2942 dict((name, sorted([i.next() for i in sel]))
2943 for name in online_nodes))
2945 def BuildHooksEnv(self):
2948 Cluster-Verify hooks just ran in the post phase and their failure makes
2949 the output be logged in the verify output and the verification to fail.
2953 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2956 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2957 for node in self.my_node_info.values())
2961 def BuildHooksNodes(self):
2962 """Build hooks nodes.
2965 return ([], self.my_node_names)
2967 def Exec(self, feedback_fn):
2968 """Verify integrity of the node group, performing various test on nodes.
2971 # This method has too many local variables. pylint: disable=R0914
2972 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2974 if not self.my_node_names:
2976 feedback_fn("* Empty node group, skipping verification")
2980 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2981 verbose = self.op.verbose
2982 self._feedback_fn = feedback_fn
2984 vg_name = self.cfg.GetVGName()
2985 drbd_helper = self.cfg.GetDRBDHelper()
2986 cluster = self.cfg.GetClusterInfo()
2987 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2988 hypervisors = cluster.enabled_hypervisors
2989 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2991 i_non_redundant = [] # Non redundant instances
2992 i_non_a_balanced = [] # Non auto-balanced instances
2993 i_offline = 0 # Count of offline instances
2994 n_offline = 0 # Count of offline nodes
2995 n_drained = 0 # Count of nodes being drained
2996 node_vol_should = {}
2998 # FIXME: verify OS list
3001 filemap = _ComputeAncillaryFiles(cluster, False)
3003 # do local checksums
3004 master_node = self.master_node = self.cfg.GetMasterNode()
3005 master_ip = self.cfg.GetMasterIP()
3007 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3010 if self.cfg.GetUseExternalMipScript():
3011 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3013 node_verify_param = {
3014 constants.NV_FILELIST:
3015 utils.UniqueSequence(filename
3016 for files in filemap
3017 for filename in files),
3018 constants.NV_NODELIST:
3019 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3020 self.all_node_info.values()),
3021 constants.NV_HYPERVISOR: hypervisors,
3022 constants.NV_HVPARAMS:
3023 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3024 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3025 for node in node_data_list
3026 if not node.offline],
3027 constants.NV_INSTANCELIST: hypervisors,
3028 constants.NV_VERSION: None,
3029 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3030 constants.NV_NODESETUP: None,
3031 constants.NV_TIME: None,
3032 constants.NV_MASTERIP: (master_node, master_ip),
3033 constants.NV_OSLIST: None,
3034 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3035 constants.NV_USERSCRIPTS: user_scripts,
3038 if vg_name is not None:
3039 node_verify_param[constants.NV_VGLIST] = None
3040 node_verify_param[constants.NV_LVLIST] = vg_name
3041 node_verify_param[constants.NV_PVLIST] = [vg_name]
3042 node_verify_param[constants.NV_DRBDLIST] = None
3045 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3048 # FIXME: this needs to be changed per node-group, not cluster-wide
3050 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3051 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3052 bridges.add(default_nicpp[constants.NIC_LINK])
3053 for instance in self.my_inst_info.values():
3054 for nic in instance.nics:
3055 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3056 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3057 bridges.add(full_nic[constants.NIC_LINK])
3060 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3062 # Build our expected cluster state
3063 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3065 vm_capable=node.vm_capable))
3066 for node in node_data_list)
3070 for node in self.all_node_info.values():
3071 path = _SupportsOob(self.cfg, node)
3072 if path and path not in oob_paths:
3073 oob_paths.append(path)
3076 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3078 for instance in self.my_inst_names:
3079 inst_config = self.my_inst_info[instance]
3081 for nname in inst_config.all_nodes:
3082 if nname not in node_image:
3083 gnode = self.NodeImage(name=nname)
3084 gnode.ghost = (nname not in self.all_node_info)
3085 node_image[nname] = gnode
3087 inst_config.MapLVsByNode(node_vol_should)
3089 pnode = inst_config.primary_node
3090 node_image[pnode].pinst.append(instance)
3092 for snode in inst_config.secondary_nodes:
3093 nimg = node_image[snode]
3094 nimg.sinst.append(instance)
3095 if pnode not in nimg.sbp:
3096 nimg.sbp[pnode] = []
3097 nimg.sbp[pnode].append(instance)
3099 # At this point, we have the in-memory data structures complete,
3100 # except for the runtime information, which we'll gather next
3102 # Due to the way our RPC system works, exact response times cannot be
3103 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3104 # time before and after executing the request, we can at least have a time
3106 nvinfo_starttime = time.time()
3107 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3109 self.cfg.GetClusterName())
3110 nvinfo_endtime = time.time()
3112 if self.extra_lv_nodes and vg_name is not None:
3114 self.rpc.call_node_verify(self.extra_lv_nodes,
3115 {constants.NV_LVLIST: vg_name},
3116 self.cfg.GetClusterName())
3118 extra_lv_nvinfo = {}
3120 all_drbd_map = self.cfg.ComputeDRBDMap()
3122 feedback_fn("* Gathering disk information (%s nodes)" %
3123 len(self.my_node_names))
3124 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3127 feedback_fn("* Verifying configuration file consistency")
3129 # If not all nodes are being checked, we need to make sure the master node
3130 # and a non-checked vm_capable node are in the list.
3131 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3133 vf_nvinfo = all_nvinfo.copy()
3134 vf_node_info = list(self.my_node_info.values())
3135 additional_nodes = []
3136 if master_node not in self.my_node_info:
3137 additional_nodes.append(master_node)
3138 vf_node_info.append(self.all_node_info[master_node])
3139 # Add the first vm_capable node we find which is not included
3140 for node in absent_nodes:
3141 nodeinfo = self.all_node_info[node]
3142 if nodeinfo.vm_capable and not nodeinfo.offline:
3143 additional_nodes.append(node)
3144 vf_node_info.append(self.all_node_info[node])
3146 key = constants.NV_FILELIST
3147 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3148 {key: node_verify_param[key]},
3149 self.cfg.GetClusterName()))
3151 vf_nvinfo = all_nvinfo
3152 vf_node_info = self.my_node_info.values()
3154 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3156 feedback_fn("* Verifying node status")
3160 for node_i in node_data_list:
3162 nimg = node_image[node]
3166 feedback_fn("* Skipping offline node %s" % (node,))
3170 if node == master_node:
3172 elif node_i.master_candidate:
3173 ntype = "master candidate"
3174 elif node_i.drained:
3180 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3182 msg = all_nvinfo[node].fail_msg
3183 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3186 nimg.rpc_fail = True
3189 nresult = all_nvinfo[node].payload
3191 nimg.call_ok = self._VerifyNode(node_i, nresult)
3192 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3193 self._VerifyNodeNetwork(node_i, nresult)
3194 self._VerifyNodeUserScripts(node_i, nresult)
3195 self._VerifyOob(node_i, nresult)
3198 self._VerifyNodeLVM(node_i, nresult, vg_name)
3199 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3202 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3203 self._UpdateNodeInstances(node_i, nresult, nimg)
3204 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3205 self._UpdateNodeOS(node_i, nresult, nimg)
3207 if not nimg.os_fail:
3208 if refos_img is None:
3210 self._VerifyNodeOS(node_i, nimg, refos_img)
3211 self._VerifyNodeBridges(node_i, nresult, bridges)
3213 # Check whether all running instancies are primary for the node. (This
3214 # can no longer be done from _VerifyInstance below, since some of the
3215 # wrong instances could be from other node groups.)
3216 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3218 for inst in non_primary_inst:
3219 # FIXME: investigate best way to handle offline insts
3220 if inst.admin_state == constants.ADMINST_OFFLINE:
3222 feedback_fn("* Skipping offline instance %s" % inst.name)
3225 test = inst in self.all_inst_info
3226 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3227 "instance should not run on node %s", node_i.name)
3228 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3229 "node is running unknown instance %s", inst)
3231 for node, result in extra_lv_nvinfo.items():
3232 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3233 node_image[node], vg_name)
3235 feedback_fn("* Verifying instance status")
3236 for instance in self.my_inst_names:
3238 feedback_fn("* Verifying instance %s" % instance)
3239 inst_config = self.my_inst_info[instance]
3240 self._VerifyInstance(instance, inst_config, node_image,
3242 inst_nodes_offline = []
3244 pnode = inst_config.primary_node
3245 pnode_img = node_image[pnode]
3246 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3247 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3248 " primary node failed", instance)
3250 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3252 constants.CV_EINSTANCEBADNODE, instance,
3253 "instance is marked as running and lives on offline node %s",
3254 inst_config.primary_node)
3256 # If the instance is non-redundant we cannot survive losing its primary
3257 # node, so we are not N+1 compliant. On the other hand we have no disk
3258 # templates with more than one secondary so that situation is not well
3260 # FIXME: does not support file-backed instances
3261 if not inst_config.secondary_nodes:
3262 i_non_redundant.append(instance)
3264 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3265 constants.CV_EINSTANCELAYOUT,
3266 instance, "instance has multiple secondary nodes: %s",
3267 utils.CommaJoin(inst_config.secondary_nodes),
3268 code=self.ETYPE_WARNING)
3270 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3271 pnode = inst_config.primary_node
3272 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3273 instance_groups = {}
3275 for node in instance_nodes:
3276 instance_groups.setdefault(self.all_node_info[node].group,
3280 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3281 # Sort so that we always list the primary node first.
3282 for group, nodes in sorted(instance_groups.items(),
3283 key=lambda (_, nodes): pnode in nodes,
3286 self._ErrorIf(len(instance_groups) > 1,
3287 constants.CV_EINSTANCESPLITGROUPS,
3288 instance, "instance has primary and secondary nodes in"
3289 " different groups: %s", utils.CommaJoin(pretty_list),
3290 code=self.ETYPE_WARNING)
3292 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3293 i_non_a_balanced.append(instance)
3295 for snode in inst_config.secondary_nodes:
3296 s_img = node_image[snode]
3297 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3298 snode, "instance %s, connection to secondary node failed",
3302 inst_nodes_offline.append(snode)
3304 # warn that the instance lives on offline nodes
3305 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3306 "instance has offline secondary node(s) %s",
3307 utils.CommaJoin(inst_nodes_offline))
3308 # ... or ghost/non-vm_capable nodes
3309 for node in inst_config.all_nodes:
3310 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3311 instance, "instance lives on ghost node %s", node)
3312 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3313 instance, "instance lives on non-vm_capable node %s", node)
3315 feedback_fn("* Verifying orphan volumes")
3316 reserved = utils.FieldSet(*cluster.reserved_lvs)
3318 # We will get spurious "unknown volume" warnings if any node of this group
3319 # is secondary for an instance whose primary is in another group. To avoid
3320 # them, we find these instances and add their volumes to node_vol_should.
3321 for inst in self.all_inst_info.values():
3322 for secondary in inst.secondary_nodes:
3323 if (secondary in self.my_node_info
3324 and inst.name not in self.my_inst_info):
3325 inst.MapLVsByNode(node_vol_should)
3328 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3330 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3331 feedback_fn("* Verifying N+1 Memory redundancy")
3332 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3334 feedback_fn("* Other Notes")
3336 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3337 % len(i_non_redundant))
3339 if i_non_a_balanced:
3340 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3341 % len(i_non_a_balanced))
3344 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3347 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3350 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3354 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3355 """Analyze the post-hooks' result
3357 This method analyses the hook result, handles it, and sends some
3358 nicely-formatted feedback back to the user.
3360 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3361 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3362 @param hooks_results: the results of the multi-node hooks rpc call
3363 @param feedback_fn: function used send feedback back to the caller
3364 @param lu_result: previous Exec result
3365 @return: the new Exec result, based on the previous result
3369 # We only really run POST phase hooks, only for non-empty groups,
3370 # and are only interested in their results
3371 if not self.my_node_names:
3374 elif phase == constants.HOOKS_PHASE_POST:
3375 # Used to change hooks' output to proper indentation
3376 feedback_fn("* Hooks Results")
3377 assert hooks_results, "invalid result from hooks"
3379 for node_name in hooks_results:
3380 res = hooks_results[node_name]
3382 test = msg and not res.offline
3383 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3384 "Communication failure in hooks execution: %s", msg)
3385 if res.offline or msg:
3386 # No need to investigate payload if node is offline or gave
3389 for script, hkr, output in res.payload:
3390 test = hkr == constants.HKR_FAIL
3391 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3392 "Script %s failed, output:", script)
3394 output = self._HOOKS_INDENT_RE.sub(" ", output)
3395 feedback_fn("%s" % output)
3401 class LUClusterVerifyDisks(NoHooksLU):
3402 """Verifies the cluster disks status.
3407 def ExpandNames(self):
3408 self.share_locks = _ShareAll()
3409 self.needed_locks = {
3410 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3413 def Exec(self, feedback_fn):
3414 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3416 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3417 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3418 for group in group_names])
3421 class LUGroupVerifyDisks(NoHooksLU):
3422 """Verifies the status of all disks in a node group.
3427 def ExpandNames(self):
3428 # Raises errors.OpPrereqError on its own if group can't be found
3429 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3431 self.share_locks = _ShareAll()
3432 self.needed_locks = {
3433 locking.LEVEL_INSTANCE: [],
3434 locking.LEVEL_NODEGROUP: [],
3435 locking.LEVEL_NODE: [],
3438 def DeclareLocks(self, level):
3439 if level == locking.LEVEL_INSTANCE:
3440 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3442 # Lock instances optimistically, needs verification once node and group
3443 # locks have been acquired
3444 self.needed_locks[locking.LEVEL_INSTANCE] = \
3445 self.cfg.GetNodeGroupInstances(self.group_uuid)
3447 elif level == locking.LEVEL_NODEGROUP:
3448 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3450 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3451 set([self.group_uuid] +
3452 # Lock all groups used by instances optimistically; this requires
3453 # going via the node before it's locked, requiring verification
3456 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3457 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3459 elif level == locking.LEVEL_NODE:
3460 # This will only lock the nodes in the group to be verified which contain
3462 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3463 self._LockInstancesNodes()
3465 # Lock all nodes in group to be verified
3466 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3467 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3468 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3470 def CheckPrereq(self):
3471 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3472 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3473 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3475 assert self.group_uuid in owned_groups
3477 # Check if locked instances are still correct
3478 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3480 # Get instance information
3481 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3483 # Check if node groups for locked instances are still correct
3484 for (instance_name, inst) in self.instances.items():
3485 assert owned_nodes.issuperset(inst.all_nodes), \
3486 "Instance %s's nodes changed while we kept the lock" % instance_name
3488 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3491 assert self.group_uuid in inst_groups, \
3492 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3494 def Exec(self, feedback_fn):
3495 """Verify integrity of cluster disks.
3497 @rtype: tuple of three items
3498 @return: a tuple of (dict of node-to-node_error, list of instances
3499 which need activate-disks, dict of instance: (node, volume) for
3504 res_instances = set()
3507 nv_dict = _MapInstanceDisksToNodes([inst
3508 for inst in self.instances.values()
3509 if inst.admin_state == constants.ADMINST_UP])
3512 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3513 set(self.cfg.GetVmCapableNodeList()))
3515 node_lvs = self.rpc.call_lv_list(nodes, [])
3517 for (node, node_res) in node_lvs.items():
3518 if node_res.offline:
3521 msg = node_res.fail_msg
3523 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3524 res_nodes[node] = msg
3527 for lv_name, (_, _, lv_online) in node_res.payload.items():
3528 inst = nv_dict.pop((node, lv_name), None)
3529 if not (lv_online or inst is None):
3530 res_instances.add(inst)
3532 # any leftover items in nv_dict are missing LVs, let's arrange the data
3534 for key, inst in nv_dict.iteritems():
3535 res_missing.setdefault(inst, []).append(list(key))
3537 return (res_nodes, list(res_instances), res_missing)
3540 class LUClusterRepairDiskSizes(NoHooksLU):
3541 """Verifies the cluster disks sizes.
3546 def ExpandNames(self):
3547 if self.op.instances:
3548 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3549 self.needed_locks = {
3550 locking.LEVEL_NODE_RES: [],
3551 locking.LEVEL_INSTANCE: self.wanted_names,
3553 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3555 self.wanted_names = None
3556 self.needed_locks = {
3557 locking.LEVEL_NODE_RES: locking.ALL_SET,
3558 locking.LEVEL_INSTANCE: locking.ALL_SET,
3560 self.share_locks = {
3561 locking.LEVEL_NODE_RES: 1,
3562 locking.LEVEL_INSTANCE: 0,
3565 def DeclareLocks(self, level):
3566 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3567 self._LockInstancesNodes(primary_only=True, level=level)
3569 def CheckPrereq(self):
3570 """Check prerequisites.
3572 This only checks the optional instance list against the existing names.
3575 if self.wanted_names is None:
3576 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3578 self.wanted_instances = \
3579 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3581 def _EnsureChildSizes(self, disk):
3582 """Ensure children of the disk have the needed disk size.
3584 This is valid mainly for DRBD8 and fixes an issue where the
3585 children have smaller disk size.
3587 @param disk: an L{ganeti.objects.Disk} object
3590 if disk.dev_type == constants.LD_DRBD8:
3591 assert disk.children, "Empty children for DRBD8?"
3592 fchild = disk.children[0]
3593 mismatch = fchild.size < disk.size
3595 self.LogInfo("Child disk has size %d, parent %d, fixing",
3596 fchild.size, disk.size)
3597 fchild.size = disk.size
3599 # and we recurse on this child only, not on the metadev
3600 return self._EnsureChildSizes(fchild) or mismatch
3604 def Exec(self, feedback_fn):
3605 """Verify the size of cluster disks.
3608 # TODO: check child disks too
3609 # TODO: check differences in size between primary/secondary nodes
3611 for instance in self.wanted_instances:
3612 pnode = instance.primary_node
3613 if pnode not in per_node_disks:
3614 per_node_disks[pnode] = []
3615 for idx, disk in enumerate(instance.disks):
3616 per_node_disks[pnode].append((instance, idx, disk))
3618 assert not (frozenset(per_node_disks.keys()) -
3619 self.owned_locks(locking.LEVEL_NODE_RES)), \
3620 "Not owning correct locks"
3621 assert not self.owned_locks(locking.LEVEL_NODE)
3624 for node, dskl in per_node_disks.items():
3625 newl = [v[2].Copy() for v in dskl]
3627 self.cfg.SetDiskID(dsk, node)
3628 result = self.rpc.call_blockdev_getsize(node, newl)
3630 self.LogWarning("Failure in blockdev_getsize call to node"
3631 " %s, ignoring", node)
3633 if len(result.payload) != len(dskl):
3634 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3635 " result.payload=%s", node, len(dskl), result.payload)
3636 self.LogWarning("Invalid result from node %s, ignoring node results",
3639 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3641 self.LogWarning("Disk %d of instance %s did not return size"
3642 " information, ignoring", idx, instance.name)
3644 if not isinstance(size, (int, long)):
3645 self.LogWarning("Disk %d of instance %s did not return valid"
3646 " size information, ignoring", idx, instance.name)
3649 if size != disk.size:
3650 self.LogInfo("Disk %d of instance %s has mismatched size,"
3651 " correcting: recorded %d, actual %d", idx,
3652 instance.name, disk.size, size)
3654 self.cfg.Update(instance, feedback_fn)
3655 changed.append((instance.name, idx, size))
3656 if self._EnsureChildSizes(disk):
3657 self.cfg.Update(instance, feedback_fn)
3658 changed.append((instance.name, idx, disk.size))
3662 class LUClusterRename(LogicalUnit):
3663 """Rename the cluster.
3666 HPATH = "cluster-rename"
3667 HTYPE = constants.HTYPE_CLUSTER
3669 def BuildHooksEnv(self):
3674 "OP_TARGET": self.cfg.GetClusterName(),
3675 "NEW_NAME": self.op.name,
3678 def BuildHooksNodes(self):
3679 """Build hooks nodes.
3682 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3684 def CheckPrereq(self):
3685 """Verify that the passed name is a valid one.
3688 hostname = netutils.GetHostname(name=self.op.name,
3689 family=self.cfg.GetPrimaryIPFamily())
3691 new_name = hostname.name
3692 self.ip = new_ip = hostname.ip
3693 old_name = self.cfg.GetClusterName()
3694 old_ip = self.cfg.GetMasterIP()
3695 if new_name == old_name and new_ip == old_ip:
3696 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3697 " cluster has changed",
3699 if new_ip != old_ip:
3700 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3701 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3702 " reachable on the network" %
3703 new_ip, errors.ECODE_NOTUNIQUE)
3705 self.op.name = new_name
3707 def Exec(self, feedback_fn):
3708 """Rename the cluster.
3711 clustername = self.op.name
3714 # shutdown the master IP
3715 master_params = self.cfg.GetMasterNetworkParameters()
3716 ems = self.cfg.GetUseExternalMipScript()
3717 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3719 result.Raise("Could not disable the master role")
3722 cluster = self.cfg.GetClusterInfo()
3723 cluster.cluster_name = clustername
3724 cluster.master_ip = new_ip
3725 self.cfg.Update(cluster, feedback_fn)
3727 # update the known hosts file
3728 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3729 node_list = self.cfg.GetOnlineNodeList()
3731 node_list.remove(master_params.name)
3734 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3736 master_params.ip = new_ip
3737 result = self.rpc.call_node_activate_master_ip(master_params.name,
3739 msg = result.fail_msg
3741 self.LogWarning("Could not re-enable the master role on"
3742 " the master, please restart manually: %s", msg)
3747 def _ValidateNetmask(cfg, netmask):
3748 """Checks if a netmask is valid.
3750 @type cfg: L{config.ConfigWriter}
3751 @param cfg: The cluster configuration
3753 @param netmask: the netmask to be verified
3754 @raise errors.OpPrereqError: if the validation fails
3757 ip_family = cfg.GetPrimaryIPFamily()
3759 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3760 except errors.ProgrammerError:
3761 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3763 if not ipcls.ValidateNetmask(netmask):
3764 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3768 class LUClusterSetParams(LogicalUnit):
3769 """Change the parameters of the cluster.
3772 HPATH = "cluster-modify"
3773 HTYPE = constants.HTYPE_CLUSTER
3776 def CheckArguments(self):
3780 if self.op.uid_pool:
3781 uidpool.CheckUidPool(self.op.uid_pool)
3783 if self.op.add_uids:
3784 uidpool.CheckUidPool(self.op.add_uids)
3786 if self.op.remove_uids:
3787 uidpool.CheckUidPool(self.op.remove_uids)
3789 if self.op.master_netmask is not None:
3790 _ValidateNetmask(self.cfg, self.op.master_netmask)
3792 if self.op.diskparams:
3793 for dt_params in self.op.diskparams.values():
3794 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3796 def ExpandNames(self):
3797 # FIXME: in the future maybe other cluster params won't require checking on
3798 # all nodes to be modified.
3799 self.needed_locks = {
3800 locking.LEVEL_NODE: locking.ALL_SET,
3801 locking.LEVEL_INSTANCE: locking.ALL_SET,
3802 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3804 self.share_locks = {
3805 locking.LEVEL_NODE: 1,
3806 locking.LEVEL_INSTANCE: 1,
3807 locking.LEVEL_NODEGROUP: 1,
3810 def BuildHooksEnv(self):
3815 "OP_TARGET": self.cfg.GetClusterName(),
3816 "NEW_VG_NAME": self.op.vg_name,
3819 def BuildHooksNodes(self):
3820 """Build hooks nodes.
3823 mn = self.cfg.GetMasterNode()
3826 def CheckPrereq(self):
3827 """Check prerequisites.
3829 This checks whether the given params don't conflict and
3830 if the given volume group is valid.
3833 if self.op.vg_name is not None and not self.op.vg_name:
3834 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3835 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3836 " instances exist", errors.ECODE_INVAL)
3838 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3839 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3840 raise errors.OpPrereqError("Cannot disable drbd helper while"
3841 " drbd-based instances exist",
3844 node_list = self.owned_locks(locking.LEVEL_NODE)
3846 # if vg_name not None, checks given volume group on all nodes
3848 vglist = self.rpc.call_vg_list(node_list)
3849 for node in node_list:
3850 msg = vglist[node].fail_msg
3852 # ignoring down node
3853 self.LogWarning("Error while gathering data on node %s"
3854 " (ignoring node): %s", node, msg)
3856 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3858 constants.MIN_VG_SIZE)
3860 raise errors.OpPrereqError("Error on node '%s': %s" %
3861 (node, vgstatus), errors.ECODE_ENVIRON)
3863 if self.op.drbd_helper:
3864 # checks given drbd helper on all nodes
3865 helpers = self.rpc.call_drbd_helper(node_list)
3866 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3868 self.LogInfo("Not checking drbd helper on offline node %s", node)
3870 msg = helpers[node].fail_msg
3872 raise errors.OpPrereqError("Error checking drbd helper on node"
3873 " '%s': %s" % (node, msg),
3874 errors.ECODE_ENVIRON)
3875 node_helper = helpers[node].payload
3876 if node_helper != self.op.drbd_helper:
3877 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3878 (node, node_helper), errors.ECODE_ENVIRON)
3880 self.cluster = cluster = self.cfg.GetClusterInfo()
3881 # validate params changes
3882 if self.op.beparams:
3883 objects.UpgradeBeParams(self.op.beparams)
3884 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3885 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3887 if self.op.ndparams:
3888 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3889 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3891 # TODO: we need a more general way to handle resetting
3892 # cluster-level parameters to default values
3893 if self.new_ndparams["oob_program"] == "":
3894 self.new_ndparams["oob_program"] = \
3895 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3897 if self.op.hv_state:
3898 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3899 self.cluster.hv_state_static)
3900 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3901 for hv, values in new_hv_state.items())
3903 if self.op.disk_state:
3904 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3905 self.cluster.disk_state_static)
3906 self.new_disk_state = \
3907 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3908 for name, values in svalues.items()))
3909 for storage, svalues in new_disk_state.items())
3912 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3915 all_instances = self.cfg.GetAllInstancesInfo().values()
3917 for group in self.cfg.GetAllNodeGroupsInfo().values():
3918 instances = frozenset([inst for inst in all_instances
3919 if compat.any(node in group.members
3920 for node in inst.all_nodes)])
3921 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3922 new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3924 new_ipolicy, instances)
3926 violations.update(new)
3929 self.LogWarning("After the ipolicy change the following instances"
3930 " violate them: %s",
3931 utils.CommaJoin(violations))
3933 if self.op.nicparams:
3934 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3935 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3936 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3939 # check all instances for consistency
3940 for instance in self.cfg.GetAllInstancesInfo().values():
3941 for nic_idx, nic in enumerate(instance.nics):
3942 params_copy = copy.deepcopy(nic.nicparams)
3943 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3945 # check parameter syntax
3947 objects.NIC.CheckParameterSyntax(params_filled)
3948 except errors.ConfigurationError, err:
3949 nic_errors.append("Instance %s, nic/%d: %s" %
3950 (instance.name, nic_idx, err))
3952 # if we're moving instances to routed, check that they have an ip
3953 target_mode = params_filled[constants.NIC_MODE]
3954 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3955 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3956 " address" % (instance.name, nic_idx))
3958 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3959 "\n".join(nic_errors))
3961 # hypervisor list/parameters
3962 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3963 if self.op.hvparams:
3964 for hv_name, hv_dict in self.op.hvparams.items():
3965 if hv_name not in self.new_hvparams:
3966 self.new_hvparams[hv_name] = hv_dict
3968 self.new_hvparams[hv_name].update(hv_dict)
3970 # disk template parameters
3971 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3972 if self.op.diskparams:
3973 for dt_name, dt_params in self.op.diskparams.items():
3974 if dt_name not in self.op.diskparams:
3975 self.new_diskparams[dt_name] = dt_params
3977 self.new_diskparams[dt_name].update(dt_params)
3979 # os hypervisor parameters
3980 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3982 for os_name, hvs in self.op.os_hvp.items():
3983 if os_name not in self.new_os_hvp:
3984 self.new_os_hvp[os_name] = hvs
3986 for hv_name, hv_dict in hvs.items():
3987 if hv_name not in self.new_os_hvp[os_name]:
3988 self.new_os_hvp[os_name][hv_name] = hv_dict
3990 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3993 self.new_osp = objects.FillDict(cluster.osparams, {})
3994 if self.op.osparams:
3995 for os_name, osp in self.op.osparams.items():
3996 if os_name not in self.new_osp:
3997 self.new_osp[os_name] = {}
3999 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4002 if not self.new_osp[os_name]:
4003 # we removed all parameters
4004 del self.new_osp[os_name]
4006 # check the parameter validity (remote check)
4007 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4008 os_name, self.new_osp[os_name])
4010 # changes to the hypervisor list
4011 if self.op.enabled_hypervisors is not None:
4012 self.hv_list = self.op.enabled_hypervisors
4013 for hv in self.hv_list:
4014 # if the hypervisor doesn't already exist in the cluster
4015 # hvparams, we initialize it to empty, and then (in both
4016 # cases) we make sure to fill the defaults, as we might not
4017 # have a complete defaults list if the hypervisor wasn't
4019 if hv not in new_hvp:
4021 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4022 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4024 self.hv_list = cluster.enabled_hypervisors
4026 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4027 # either the enabled list has changed, or the parameters have, validate
4028 for hv_name, hv_params in self.new_hvparams.items():
4029 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4030 (self.op.enabled_hypervisors and
4031 hv_name in self.op.enabled_hypervisors)):
4032 # either this is a new hypervisor, or its parameters have changed
4033 hv_class = hypervisor.GetHypervisor(hv_name)
4034 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4035 hv_class.CheckParameterSyntax(hv_params)
4036 _CheckHVParams(self, node_list, hv_name, hv_params)
4039 # no need to check any newly-enabled hypervisors, since the
4040 # defaults have already been checked in the above code-block
4041 for os_name, os_hvp in self.new_os_hvp.items():
4042 for hv_name, hv_params in os_hvp.items():
4043 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4044 # we need to fill in the new os_hvp on top of the actual hv_p
4045 cluster_defaults = self.new_hvparams.get(hv_name, {})
4046 new_osp = objects.FillDict(cluster_defaults, hv_params)
4047 hv_class = hypervisor.GetHypervisor(hv_name)
4048 hv_class.CheckParameterSyntax(new_osp)
4049 _CheckHVParams(self, node_list, hv_name, new_osp)
4051 if self.op.default_iallocator:
4052 alloc_script = utils.FindFile(self.op.default_iallocator,
4053 constants.IALLOCATOR_SEARCH_PATH,
4055 if alloc_script is None:
4056 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4057 " specified" % self.op.default_iallocator,
4060 def Exec(self, feedback_fn):
4061 """Change the parameters of the cluster.
4064 if self.op.vg_name is not None:
4065 new_volume = self.op.vg_name
4068 if new_volume != self.cfg.GetVGName():
4069 self.cfg.SetVGName(new_volume)
4071 feedback_fn("Cluster LVM configuration already in desired"
4072 " state, not changing")
4073 if self.op.drbd_helper is not None:
4074 new_helper = self.op.drbd_helper
4077 if new_helper != self.cfg.GetDRBDHelper():
4078 self.cfg.SetDRBDHelper(new_helper)
4080 feedback_fn("Cluster DRBD helper already in desired state,"
4082 if self.op.hvparams:
4083 self.cluster.hvparams = self.new_hvparams
4085 self.cluster.os_hvp = self.new_os_hvp
4086 if self.op.enabled_hypervisors is not None:
4087 self.cluster.hvparams = self.new_hvparams
4088 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4089 if self.op.beparams:
4090 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4091 if self.op.nicparams:
4092 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4094 self.cluster.ipolicy = self.new_ipolicy
4095 if self.op.osparams:
4096 self.cluster.osparams = self.new_osp
4097 if self.op.ndparams:
4098 self.cluster.ndparams = self.new_ndparams
4099 if self.op.diskparams:
4100 self.cluster.diskparams = self.new_diskparams
4101 if self.op.hv_state:
4102 self.cluster.hv_state_static = self.new_hv_state
4103 if self.op.disk_state:
4104 self.cluster.disk_state_static = self.new_disk_state
4106 if self.op.candidate_pool_size is not None:
4107 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4108 # we need to update the pool size here, otherwise the save will fail
4109 _AdjustCandidatePool(self, [])
4111 if self.op.maintain_node_health is not None:
4112 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4113 feedback_fn("Note: CONFD was disabled at build time, node health"
4114 " maintenance is not useful (still enabling it)")
4115 self.cluster.maintain_node_health = self.op.maintain_node_health
4117 if self.op.prealloc_wipe_disks is not None:
4118 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4120 if self.op.add_uids is not None:
4121 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4123 if self.op.remove_uids is not None:
4124 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4126 if self.op.uid_pool is not None:
4127 self.cluster.uid_pool = self.op.uid_pool
4129 if self.op.default_iallocator is not None:
4130 self.cluster.default_iallocator = self.op.default_iallocator
4132 if self.op.reserved_lvs is not None:
4133 self.cluster.reserved_lvs = self.op.reserved_lvs
4135 if self.op.use_external_mip_script is not None:
4136 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4138 def helper_os(aname, mods, desc):
4140 lst = getattr(self.cluster, aname)
4141 for key, val in mods:
4142 if key == constants.DDM_ADD:
4144 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4147 elif key == constants.DDM_REMOVE:
4151 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4153 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4155 if self.op.hidden_os:
4156 helper_os("hidden_os", self.op.hidden_os, "hidden")
4158 if self.op.blacklisted_os:
4159 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4161 if self.op.master_netdev:
4162 master_params = self.cfg.GetMasterNetworkParameters()
4163 ems = self.cfg.GetUseExternalMipScript()
4164 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4165 self.cluster.master_netdev)
4166 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4168 result.Raise("Could not disable the master ip")
4169 feedback_fn("Changing master_netdev from %s to %s" %
4170 (master_params.netdev, self.op.master_netdev))
4171 self.cluster.master_netdev = self.op.master_netdev
4173 if self.op.master_netmask:
4174 master_params = self.cfg.GetMasterNetworkParameters()
4175 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4176 result = self.rpc.call_node_change_master_netmask(master_params.name,
4177 master_params.netmask,
4178 self.op.master_netmask,
4180 master_params.netdev)
4182 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4185 self.cluster.master_netmask = self.op.master_netmask
4187 self.cfg.Update(self.cluster, feedback_fn)
4189 if self.op.master_netdev:
4190 master_params = self.cfg.GetMasterNetworkParameters()
4191 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4192 self.op.master_netdev)
4193 ems = self.cfg.GetUseExternalMipScript()
4194 result = self.rpc.call_node_activate_master_ip(master_params.name,
4197 self.LogWarning("Could not re-enable the master ip on"
4198 " the master, please restart manually: %s",
4202 def _UploadHelper(lu, nodes, fname):
4203 """Helper for uploading a file and showing warnings.
4206 if os.path.exists(fname):
4207 result = lu.rpc.call_upload_file(nodes, fname)
4208 for to_node, to_result in result.items():
4209 msg = to_result.fail_msg
4211 msg = ("Copy of file %s to node %s failed: %s" %
4212 (fname, to_node, msg))
4213 lu.proc.LogWarning(msg)
4216 def _ComputeAncillaryFiles(cluster, redist):
4217 """Compute files external to Ganeti which need to be consistent.
4219 @type redist: boolean
4220 @param redist: Whether to include files which need to be redistributed
4223 # Compute files for all nodes
4225 constants.SSH_KNOWN_HOSTS_FILE,
4226 constants.CONFD_HMAC_KEY,
4227 constants.CLUSTER_DOMAIN_SECRET_FILE,
4228 constants.SPICE_CERT_FILE,
4229 constants.SPICE_CACERT_FILE,
4230 constants.RAPI_USERS_FILE,
4234 files_all.update(constants.ALL_CERT_FILES)
4235 files_all.update(ssconf.SimpleStore().GetFileList())
4237 # we need to ship at least the RAPI certificate
4238 files_all.add(constants.RAPI_CERT_FILE)
4240 if cluster.modify_etc_hosts:
4241 files_all.add(constants.ETC_HOSTS)
4243 # Files which are optional, these must:
4244 # - be present in one other category as well
4245 # - either exist or not exist on all nodes of that category (mc, vm all)
4247 constants.RAPI_USERS_FILE,
4250 # Files which should only be on master candidates
4254 files_mc.add(constants.CLUSTER_CONF_FILE)
4256 # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4258 files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4260 # Files which should only be on VM-capable nodes
4261 files_vm = set(filename
4262 for hv_name in cluster.enabled_hypervisors
4263 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4265 files_opt |= set(filename
4266 for hv_name in cluster.enabled_hypervisors
4267 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4269 # Filenames in each category must be unique
4270 all_files_set = files_all | files_mc | files_vm
4271 assert (len(all_files_set) ==
4272 sum(map(len, [files_all, files_mc, files_vm]))), \
4273 "Found file listed in more than one file list"
4275 # Optional files must be present in one other category
4276 assert all_files_set.issuperset(files_opt), \
4277 "Optional file not in a different required list"
4279 return (files_all, files_opt, files_mc, files_vm)
4282 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4283 """Distribute additional files which are part of the cluster configuration.
4285 ConfigWriter takes care of distributing the config and ssconf files, but
4286 there are more files which should be distributed to all nodes. This function
4287 makes sure those are copied.
4289 @param lu: calling logical unit
4290 @param additional_nodes: list of nodes not in the config to distribute to
4291 @type additional_vm: boolean
4292 @param additional_vm: whether the additional nodes are vm-capable or not
4295 # Gather target nodes
4296 cluster = lu.cfg.GetClusterInfo()
4297 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4299 online_nodes = lu.cfg.GetOnlineNodeList()
4300 vm_nodes = lu.cfg.GetVmCapableNodeList()
4302 if additional_nodes is not None:
4303 online_nodes.extend(additional_nodes)
4305 vm_nodes.extend(additional_nodes)
4307 # Never distribute to master node
4308 for nodelist in [online_nodes, vm_nodes]:
4309 if master_info.name in nodelist:
4310 nodelist.remove(master_info.name)
4313 (files_all, _, files_mc, files_vm) = \
4314 _ComputeAncillaryFiles(cluster, True)
4316 # Never re-distribute configuration file from here
4317 assert not (constants.CLUSTER_CONF_FILE in files_all or
4318 constants.CLUSTER_CONF_FILE in files_vm)
4319 assert not files_mc, "Master candidates not handled in this function"
4322 (online_nodes, files_all),
4323 (vm_nodes, files_vm),
4327 for (node_list, files) in filemap:
4329 _UploadHelper(lu, node_list, fname)
4332 class LUClusterRedistConf(NoHooksLU):
4333 """Force the redistribution of cluster configuration.
4335 This is a very simple LU.
4340 def ExpandNames(self):
4341 self.needed_locks = {
4342 locking.LEVEL_NODE: locking.ALL_SET,
4344 self.share_locks[locking.LEVEL_NODE] = 1
4346 def Exec(self, feedback_fn):
4347 """Redistribute the configuration.
4350 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4351 _RedistributeAncillaryFiles(self)
4354 class LUClusterActivateMasterIp(NoHooksLU):
4355 """Activate the master IP on the master node.
4358 def Exec(self, feedback_fn):
4359 """Activate the master IP.
4362 master_params = self.cfg.GetMasterNetworkParameters()
4363 ems = self.cfg.GetUseExternalMipScript()
4364 result = self.rpc.call_node_activate_master_ip(master_params.name,
4366 result.Raise("Could not activate the master IP")
4369 class LUClusterDeactivateMasterIp(NoHooksLU):
4370 """Deactivate the master IP on the master node.
4373 def Exec(self, feedback_fn):
4374 """Deactivate the master IP.
4377 master_params = self.cfg.GetMasterNetworkParameters()
4378 ems = self.cfg.GetUseExternalMipScript()
4379 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4381 result.Raise("Could not deactivate the master IP")
4384 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4385 """Sleep and poll for an instance's disk to sync.
4388 if not instance.disks or disks is not None and not disks:
4391 disks = _ExpandCheckDisks(instance, disks)
4394 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4396 node = instance.primary_node
4399 lu.cfg.SetDiskID(dev, node)
4401 # TODO: Convert to utils.Retry
4404 degr_retries = 10 # in seconds, as we sleep 1 second each time
4408 cumul_degraded = False
4409 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4410 msg = rstats.fail_msg
4412 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4415 raise errors.RemoteError("Can't contact node %s for mirror data,"
4416 " aborting." % node)
4419 rstats = rstats.payload
4421 for i, mstat in enumerate(rstats):
4423 lu.LogWarning("Can't compute data for node %s/%s",
4424 node, disks[i].iv_name)
4427 cumul_degraded = (cumul_degraded or
4428 (mstat.is_degraded and mstat.sync_percent is None))
4429 if mstat.sync_percent is not None:
4431 if mstat.estimated_time is not None:
4432 rem_time = ("%s remaining (estimated)" %
4433 utils.FormatSeconds(mstat.estimated_time))
4434 max_time = mstat.estimated_time
4436 rem_time = "no time estimate"
4437 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4438 (disks[i].iv_name, mstat.sync_percent, rem_time))
4440 # if we're done but degraded, let's do a few small retries, to
4441 # make sure we see a stable and not transient situation; therefore
4442 # we force restart of the loop
4443 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4444 logging.info("Degraded disks found, %d retries left", degr_retries)
4452 time.sleep(min(60, max_time))
4455 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4456 return not cumul_degraded
4459 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4460 """Check that mirrors are not degraded.
4462 The ldisk parameter, if True, will change the test from the
4463 is_degraded attribute (which represents overall non-ok status for
4464 the device(s)) to the ldisk (representing the local storage status).
4467 lu.cfg.SetDiskID(dev, node)
4471 if on_primary or dev.AssembleOnSecondary():
4472 rstats = lu.rpc.call_blockdev_find(node, dev)
4473 msg = rstats.fail_msg
4475 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4477 elif not rstats.payload:
4478 lu.LogWarning("Can't find disk on node %s", node)
4482 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4484 result = result and not rstats.payload.is_degraded
4487 for child in dev.children:
4488 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4493 class LUOobCommand(NoHooksLU):
4494 """Logical unit for OOB handling.
4498 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4500 def ExpandNames(self):
4501 """Gather locks we need.
4504 if self.op.node_names:
4505 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4506 lock_names = self.op.node_names
4508 lock_names = locking.ALL_SET
4510 self.needed_locks = {
4511 locking.LEVEL_NODE: lock_names,
4514 def CheckPrereq(self):
4515 """Check prerequisites.
4518 - the node exists in the configuration
4521 Any errors are signaled by raising errors.OpPrereqError.
4525 self.master_node = self.cfg.GetMasterNode()
4527 assert self.op.power_delay >= 0.0
4529 if self.op.node_names:
4530 if (self.op.command in self._SKIP_MASTER and
4531 self.master_node in self.op.node_names):
4532 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4533 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4535 if master_oob_handler:
4536 additional_text = ("run '%s %s %s' if you want to operate on the"
4537 " master regardless") % (master_oob_handler,
4541 additional_text = "it does not support out-of-band operations"
4543 raise errors.OpPrereqError(("Operating on the master node %s is not"
4544 " allowed for %s; %s") %
4545 (self.master_node, self.op.command,
4546 additional_text), errors.ECODE_INVAL)
4548 self.op.node_names = self.cfg.GetNodeList()
4549 if self.op.command in self._SKIP_MASTER:
4550 self.op.node_names.remove(self.master_node)
4552 if self.op.command in self._SKIP_MASTER:
4553 assert self.master_node not in self.op.node_names
4555 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4557 raise errors.OpPrereqError("Node %s not found" % node_name,
4560 self.nodes.append(node)
4562 if (not self.op.ignore_status and
4563 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4564 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4565 " not marked offline") % node_name,
4568 def Exec(self, feedback_fn):
4569 """Execute OOB and return result if we expect any.
4572 master_node = self.master_node
4575 for idx, node in enumerate(utils.NiceSort(self.nodes,
4576 key=lambda node: node.name)):
4577 node_entry = [(constants.RS_NORMAL, node.name)]
4578 ret.append(node_entry)
4580 oob_program = _SupportsOob(self.cfg, node)
4583 node_entry.append((constants.RS_UNAVAIL, None))
4586 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4587 self.op.command, oob_program, node.name)
4588 result = self.rpc.call_run_oob(master_node, oob_program,
4589 self.op.command, node.name,
4593 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4594 node.name, result.fail_msg)
4595 node_entry.append((constants.RS_NODATA, None))
4598 self._CheckPayload(result)
4599 except errors.OpExecError, err:
4600 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4602 node_entry.append((constants.RS_NODATA, None))
4604 if self.op.command == constants.OOB_HEALTH:
4605 # For health we should log important events
4606 for item, status in result.payload:
4607 if status in [constants.OOB_STATUS_WARNING,
4608 constants.OOB_STATUS_CRITICAL]:
4609 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4610 item, node.name, status)
4612 if self.op.command == constants.OOB_POWER_ON:
4614 elif self.op.command == constants.OOB_POWER_OFF:
4615 node.powered = False
4616 elif self.op.command == constants.OOB_POWER_STATUS:
4617 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4618 if powered != node.powered:
4619 logging.warning(("Recorded power state (%s) of node '%s' does not"
4620 " match actual power state (%s)"), node.powered,
4623 # For configuration changing commands we should update the node
4624 if self.op.command in (constants.OOB_POWER_ON,
4625 constants.OOB_POWER_OFF):
4626 self.cfg.Update(node, feedback_fn)
4628 node_entry.append((constants.RS_NORMAL, result.payload))
4630 if (self.op.command == constants.OOB_POWER_ON and
4631 idx < len(self.nodes) - 1):
4632 time.sleep(self.op.power_delay)
4636 def _CheckPayload(self, result):
4637 """Checks if the payload is valid.
4639 @param result: RPC result
4640 @raises errors.OpExecError: If payload is not valid
4644 if self.op.command == constants.OOB_HEALTH:
4645 if not isinstance(result.payload, list):
4646 errs.append("command 'health' is expected to return a list but got %s" %
4647 type(result.payload))
4649 for item, status in result.payload:
4650 if status not in constants.OOB_STATUSES:
4651 errs.append("health item '%s' has invalid status '%s'" %
4654 if self.op.command == constants.OOB_POWER_STATUS:
4655 if not isinstance(result.payload, dict):
4656 errs.append("power-status is expected to return a dict but got %s" %
4657 type(result.payload))
4659 if self.op.command in [
4660 constants.OOB_POWER_ON,
4661 constants.OOB_POWER_OFF,
4662 constants.OOB_POWER_CYCLE,
4664 if result.payload is not None:
4665 errs.append("%s is expected to not return payload but got '%s'" %
4666 (self.op.command, result.payload))
4669 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4670 utils.CommaJoin(errs))
4673 class _OsQuery(_QueryBase):
4674 FIELDS = query.OS_FIELDS
4676 def ExpandNames(self, lu):
4677 # Lock all nodes in shared mode
4678 # Temporary removal of locks, should be reverted later
4679 # TODO: reintroduce locks when they are lighter-weight
4680 lu.needed_locks = {}
4681 #self.share_locks[locking.LEVEL_NODE] = 1
4682 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4684 # The following variables interact with _QueryBase._GetNames
4686 self.wanted = self.names
4688 self.wanted = locking.ALL_SET
4690 self.do_locking = self.use_locking
4692 def DeclareLocks(self, lu, level):
4696 def _DiagnoseByOS(rlist):
4697 """Remaps a per-node return list into an a per-os per-node dictionary
4699 @param rlist: a map with node names as keys and OS objects as values
4702 @return: a dictionary with osnames as keys and as value another
4703 map, with nodes as keys and tuples of (path, status, diagnose,
4704 variants, parameters, api_versions) as values, eg::
4706 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4707 (/srv/..., False, "invalid api")],
4708 "node2": [(/srv/..., True, "", [], [])]}
4713 # we build here the list of nodes that didn't fail the RPC (at RPC
4714 # level), so that nodes with a non-responding node daemon don't
4715 # make all OSes invalid
4716 good_nodes = [node_name for node_name in rlist
4717 if not rlist[node_name].fail_msg]
4718 for node_name, nr in rlist.items():
4719 if nr.fail_msg or not nr.payload:
4721 for (name, path, status, diagnose, variants,
4722 params, api_versions) in nr.payload:
4723 if name not in all_os:
4724 # build a list of nodes for this os containing empty lists
4725 # for each node in node_list
4727 for nname in good_nodes:
4728 all_os[name][nname] = []
4729 # convert params from [name, help] to (name, help)
4730 params = [tuple(v) for v in params]
4731 all_os[name][node_name].append((path, status, diagnose,
4732 variants, params, api_versions))
4735 def _GetQueryData(self, lu):
4736 """Computes the list of nodes and their attributes.
4739 # Locking is not used
4740 assert not (compat.any(lu.glm.is_owned(level)
4741 for level in locking.LEVELS
4742 if level != locking.LEVEL_CLUSTER) or
4743 self.do_locking or self.use_locking)
4745 valid_nodes = [node.name
4746 for node in lu.cfg.GetAllNodesInfo().values()
4747 if not node.offline and node.vm_capable]
4748 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4749 cluster = lu.cfg.GetClusterInfo()
4753 for (os_name, os_data) in pol.items():
4754 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4755 hidden=(os_name in cluster.hidden_os),
4756 blacklisted=(os_name in cluster.blacklisted_os))
4760 api_versions = set()
4762 for idx, osl in enumerate(os_data.values()):
4763 info.valid = bool(info.valid and osl and osl[0][1])
4767 (node_variants, node_params, node_api) = osl[0][3:6]
4770 variants.update(node_variants)
4771 parameters.update(node_params)
4772 api_versions.update(node_api)
4774 # Filter out inconsistent values
4775 variants.intersection_update(node_variants)
4776 parameters.intersection_update(node_params)
4777 api_versions.intersection_update(node_api)
4779 info.variants = list(variants)
4780 info.parameters = list(parameters)
4781 info.api_versions = list(api_versions)
4783 data[os_name] = info
4785 # Prepare data in requested order
4786 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4790 class LUOsDiagnose(NoHooksLU):
4791 """Logical unit for OS diagnose/query.
4797 def _BuildFilter(fields, names):
4798 """Builds a filter for querying OSes.
4801 name_filter = qlang.MakeSimpleFilter("name", names)
4803 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4804 # respective field is not requested
4805 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4806 for fname in ["hidden", "blacklisted"]
4807 if fname not in fields]
4808 if "valid" not in fields:
4809 status_filter.append([qlang.OP_TRUE, "valid"])
4812 status_filter.insert(0, qlang.OP_AND)
4814 status_filter = None
4816 if name_filter and status_filter:
4817 return [qlang.OP_AND, name_filter, status_filter]
4821 return status_filter
4823 def CheckArguments(self):
4824 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4825 self.op.output_fields, False)
4827 def ExpandNames(self):
4828 self.oq.ExpandNames(self)
4830 def Exec(self, feedback_fn):
4831 return self.oq.OldStyleQuery(self)
4834 class LUNodeRemove(LogicalUnit):
4835 """Logical unit for removing a node.
4838 HPATH = "node-remove"
4839 HTYPE = constants.HTYPE_NODE
4841 def BuildHooksEnv(self):
4844 This doesn't run on the target node in the pre phase as a failed
4845 node would then be impossible to remove.
4849 "OP_TARGET": self.op.node_name,
4850 "NODE_NAME": self.op.node_name,
4853 def BuildHooksNodes(self):
4854 """Build hooks nodes.
4857 all_nodes = self.cfg.GetNodeList()
4859 all_nodes.remove(self.op.node_name)
4861 logging.warning("Node '%s', which is about to be removed, was not found"
4862 " in the list of all nodes", self.op.node_name)
4863 return (all_nodes, all_nodes)
4865 def CheckPrereq(self):
4866 """Check prerequisites.
4869 - the node exists in the configuration
4870 - it does not have primary or secondary instances
4871 - it's not the master
4873 Any errors are signaled by raising errors.OpPrereqError.
4876 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4877 node = self.cfg.GetNodeInfo(self.op.node_name)
4878 assert node is not None
4880 masternode = self.cfg.GetMasterNode()
4881 if node.name == masternode:
4882 raise errors.OpPrereqError("Node is the master node, failover to another"
4883 " node is required", errors.ECODE_INVAL)
4885 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4886 if node.name in instance.all_nodes:
4887 raise errors.OpPrereqError("Instance %s is still running on the node,"
4888 " please remove first" % instance_name,
4890 self.op.node_name = node.name
4893 def Exec(self, feedback_fn):
4894 """Removes the node from the cluster.
4898 logging.info("Stopping the node daemon and removing configs from node %s",
4901 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4903 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4906 # Promote nodes to master candidate as needed
4907 _AdjustCandidatePool(self, exceptions=[node.name])
4908 self.context.RemoveNode(node.name)
4910 # Run post hooks on the node before it's removed
4911 _RunPostHook(self, node.name)
4913 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4914 msg = result.fail_msg
4916 self.LogWarning("Errors encountered on the remote node while leaving"
4917 " the cluster: %s", msg)
4919 # Remove node from our /etc/hosts
4920 if self.cfg.GetClusterInfo().modify_etc_hosts:
4921 master_node = self.cfg.GetMasterNode()
4922 result = self.rpc.call_etc_hosts_modify(master_node,
4923 constants.ETC_HOSTS_REMOVE,
4925 result.Raise("Can't update hosts file with new host data")
4926 _RedistributeAncillaryFiles(self)
4929 class _NodeQuery(_QueryBase):
4930 FIELDS = query.NODE_FIELDS
4932 def ExpandNames(self, lu):
4933 lu.needed_locks = {}
4934 lu.share_locks = _ShareAll()
4937 self.wanted = _GetWantedNodes(lu, self.names)
4939 self.wanted = locking.ALL_SET
4941 self.do_locking = (self.use_locking and
4942 query.NQ_LIVE in self.requested_data)
4945 # If any non-static field is requested we need to lock the nodes
4946 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4948 def DeclareLocks(self, lu, level):
4951 def _GetQueryData(self, lu):
4952 """Computes the list of nodes and their attributes.
4955 all_info = lu.cfg.GetAllNodesInfo()
4957 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4959 # Gather data as requested
4960 if query.NQ_LIVE in self.requested_data:
4961 # filter out non-vm_capable nodes
4962 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4964 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4965 [lu.cfg.GetHypervisorType()])
4966 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4967 for (name, nresult) in node_data.items()
4968 if not nresult.fail_msg and nresult.payload)
4972 if query.NQ_INST in self.requested_data:
4973 node_to_primary = dict([(name, set()) for name in nodenames])
4974 node_to_secondary = dict([(name, set()) for name in nodenames])
4976 inst_data = lu.cfg.GetAllInstancesInfo()
4978 for inst in inst_data.values():
4979 if inst.primary_node in node_to_primary:
4980 node_to_primary[inst.primary_node].add(inst.name)
4981 for secnode in inst.secondary_nodes:
4982 if secnode in node_to_secondary:
4983 node_to_secondary[secnode].add(inst.name)
4985 node_to_primary = None
4986 node_to_secondary = None
4988 if query.NQ_OOB in self.requested_data:
4989 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4990 for name, node in all_info.iteritems())
4994 if query.NQ_GROUP in self.requested_data:
4995 groups = lu.cfg.GetAllNodeGroupsInfo()
4999 return query.NodeQueryData([all_info[name] for name in nodenames],
5000 live_data, lu.cfg.GetMasterNode(),
5001 node_to_primary, node_to_secondary, groups,
5002 oob_support, lu.cfg.GetClusterInfo())
5005 class LUNodeQuery(NoHooksLU):
5006 """Logical unit for querying nodes.
5009 # pylint: disable=W0142
5012 def CheckArguments(self):
5013 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5014 self.op.output_fields, self.op.use_locking)
5016 def ExpandNames(self):
5017 self.nq.ExpandNames(self)
5019 def DeclareLocks(self, level):
5020 self.nq.DeclareLocks(self, level)
5022 def Exec(self, feedback_fn):
5023 return self.nq.OldStyleQuery(self)
5026 class LUNodeQueryvols(NoHooksLU):
5027 """Logical unit for getting volumes on node(s).
5031 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5032 _FIELDS_STATIC = utils.FieldSet("node")
5034 def CheckArguments(self):
5035 _CheckOutputFields(static=self._FIELDS_STATIC,
5036 dynamic=self._FIELDS_DYNAMIC,
5037 selected=self.op.output_fields)
5039 def ExpandNames(self):
5040 self.share_locks = _ShareAll()
5041 self.needed_locks = {}
5043 if not self.op.nodes:
5044 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5046 self.needed_locks[locking.LEVEL_NODE] = \
5047 _GetWantedNodes(self, self.op.nodes)
5049 def Exec(self, feedback_fn):
5050 """Computes the list of nodes and their attributes.
5053 nodenames = self.owned_locks(locking.LEVEL_NODE)
5054 volumes = self.rpc.call_node_volumes(nodenames)
5056 ilist = self.cfg.GetAllInstancesInfo()
5057 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5060 for node in nodenames:
5061 nresult = volumes[node]
5064 msg = nresult.fail_msg
5066 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5069 node_vols = sorted(nresult.payload,
5070 key=operator.itemgetter("dev"))
5072 for vol in node_vols:
5074 for field in self.op.output_fields:
5077 elif field == "phys":
5081 elif field == "name":
5083 elif field == "size":
5084 val = int(float(vol["size"]))
5085 elif field == "instance":
5086 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5088 raise errors.ParameterError(field)
5089 node_output.append(str(val))
5091 output.append(node_output)
5096 class LUNodeQueryStorage(NoHooksLU):
5097 """Logical unit for getting information on storage units on node(s).
5100 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5103 def CheckArguments(self):
5104 _CheckOutputFields(static=self._FIELDS_STATIC,
5105 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5106 selected=self.op.output_fields)
5108 def ExpandNames(self):
5109 self.share_locks = _ShareAll()
5110 self.needed_locks = {}
5113 self.needed_locks[locking.LEVEL_NODE] = \
5114 _GetWantedNodes(self, self.op.nodes)
5116 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5118 def Exec(self, feedback_fn):
5119 """Computes the list of nodes and their attributes.
5122 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5124 # Always get name to sort by
5125 if constants.SF_NAME in self.op.output_fields:
5126 fields = self.op.output_fields[:]
5128 fields = [constants.SF_NAME] + self.op.output_fields
5130 # Never ask for node or type as it's only known to the LU
5131 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5132 while extra in fields:
5133 fields.remove(extra)
5135 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5136 name_idx = field_idx[constants.SF_NAME]
5138 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5139 data = self.rpc.call_storage_list(self.nodes,
5140 self.op.storage_type, st_args,
5141 self.op.name, fields)
5145 for node in utils.NiceSort(self.nodes):
5146 nresult = data[node]
5150 msg = nresult.fail_msg
5152 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5155 rows = dict([(row[name_idx], row) for row in nresult.payload])
5157 for name in utils.NiceSort(rows.keys()):
5162 for field in self.op.output_fields:
5163 if field == constants.SF_NODE:
5165 elif field == constants.SF_TYPE:
5166 val = self.op.storage_type
5167 elif field in field_idx:
5168 val = row[field_idx[field]]
5170 raise errors.ParameterError(field)
5179 class _InstanceQuery(_QueryBase):
5180 FIELDS = query.INSTANCE_FIELDS
5182 def ExpandNames(self, lu):
5183 lu.needed_locks = {}
5184 lu.share_locks = _ShareAll()
5187 self.wanted = _GetWantedInstances(lu, self.names)
5189 self.wanted = locking.ALL_SET
5191 self.do_locking = (self.use_locking and
5192 query.IQ_LIVE in self.requested_data)
5194 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5195 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5196 lu.needed_locks[locking.LEVEL_NODE] = []
5197 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5199 self.do_grouplocks = (self.do_locking and
5200 query.IQ_NODES in self.requested_data)
5202 def DeclareLocks(self, lu, level):
5204 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5205 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5207 # Lock all groups used by instances optimistically; this requires going
5208 # via the node before it's locked, requiring verification later on
5209 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5211 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5212 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5213 elif level == locking.LEVEL_NODE:
5214 lu._LockInstancesNodes() # pylint: disable=W0212
5217 def _CheckGroupLocks(lu):
5218 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5219 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5221 # Check if node groups for locked instances are still correct
5222 for instance_name in owned_instances:
5223 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5225 def _GetQueryData(self, lu):
5226 """Computes the list of instances and their attributes.
5229 if self.do_grouplocks:
5230 self._CheckGroupLocks(lu)
5232 cluster = lu.cfg.GetClusterInfo()
5233 all_info = lu.cfg.GetAllInstancesInfo()
5235 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5237 instance_list = [all_info[name] for name in instance_names]
5238 nodes = frozenset(itertools.chain(*(inst.all_nodes
5239 for inst in instance_list)))
5240 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5243 wrongnode_inst = set()
5245 # Gather data as requested
5246 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5248 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5250 result = node_data[name]
5252 # offline nodes will be in both lists
5253 assert result.fail_msg
5254 offline_nodes.append(name)
5256 bad_nodes.append(name)
5257 elif result.payload:
5258 for inst in result.payload:
5259 if inst in all_info:
5260 if all_info[inst].primary_node == name:
5261 live_data.update(result.payload)
5263 wrongnode_inst.add(inst)
5265 # orphan instance; we don't list it here as we don't
5266 # handle this case yet in the output of instance listing
5267 logging.warning("Orphan instance '%s' found on node %s",
5269 # else no instance is alive
5273 if query.IQ_DISKUSAGE in self.requested_data:
5274 disk_usage = dict((inst.name,
5275 _ComputeDiskSize(inst.disk_template,
5276 [{constants.IDISK_SIZE: disk.size}
5277 for disk in inst.disks]))
5278 for inst in instance_list)
5282 if query.IQ_CONSOLE in self.requested_data:
5284 for inst in instance_list:
5285 if inst.name in live_data:
5286 # Instance is running
5287 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5289 consinfo[inst.name] = None
5290 assert set(consinfo.keys()) == set(instance_names)
5294 if query.IQ_NODES in self.requested_data:
5295 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5297 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5298 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5299 for uuid in set(map(operator.attrgetter("group"),
5305 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5306 disk_usage, offline_nodes, bad_nodes,
5307 live_data, wrongnode_inst, consinfo,
5311 class LUQuery(NoHooksLU):
5312 """Query for resources/items of a certain kind.
5315 # pylint: disable=W0142
5318 def CheckArguments(self):
5319 qcls = _GetQueryImplementation(self.op.what)
5321 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5323 def ExpandNames(self):
5324 self.impl.ExpandNames(self)
5326 def DeclareLocks(self, level):
5327 self.impl.DeclareLocks(self, level)
5329 def Exec(self, feedback_fn):
5330 return self.impl.NewStyleQuery(self)
5333 class LUQueryFields(NoHooksLU):
5334 """Query for resources/items of a certain kind.
5337 # pylint: disable=W0142
5340 def CheckArguments(self):
5341 self.qcls = _GetQueryImplementation(self.op.what)
5343 def ExpandNames(self):
5344 self.needed_locks = {}
5346 def Exec(self, feedback_fn):
5347 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5350 class LUNodeModifyStorage(NoHooksLU):
5351 """Logical unit for modifying a storage volume on a node.
5356 def CheckArguments(self):
5357 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5359 storage_type = self.op.storage_type
5362 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5364 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5365 " modified" % storage_type,
5368 diff = set(self.op.changes.keys()) - modifiable
5370 raise errors.OpPrereqError("The following fields can not be modified for"
5371 " storage units of type '%s': %r" %
5372 (storage_type, list(diff)),
5375 def ExpandNames(self):
5376 self.needed_locks = {
5377 locking.LEVEL_NODE: self.op.node_name,
5380 def Exec(self, feedback_fn):
5381 """Computes the list of nodes and their attributes.
5384 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5385 result = self.rpc.call_storage_modify(self.op.node_name,
5386 self.op.storage_type, st_args,
5387 self.op.name, self.op.changes)
5388 result.Raise("Failed to modify storage unit '%s' on %s" %
5389 (self.op.name, self.op.node_name))
5392 class LUNodeAdd(LogicalUnit):
5393 """Logical unit for adding node to the cluster.
5397 HTYPE = constants.HTYPE_NODE
5398 _NFLAGS = ["master_capable", "vm_capable"]
5400 def CheckArguments(self):
5401 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5402 # validate/normalize the node name
5403 self.hostname = netutils.GetHostname(name=self.op.node_name,
5404 family=self.primary_ip_family)
5405 self.op.node_name = self.hostname.name
5407 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5408 raise errors.OpPrereqError("Cannot readd the master node",
5411 if self.op.readd and self.op.group:
5412 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5413 " being readded", errors.ECODE_INVAL)
5415 def BuildHooksEnv(self):
5418 This will run on all nodes before, and on all nodes + the new node after.
5422 "OP_TARGET": self.op.node_name,
5423 "NODE_NAME": self.op.node_name,
5424 "NODE_PIP": self.op.primary_ip,
5425 "NODE_SIP": self.op.secondary_ip,
5426 "MASTER_CAPABLE": str(self.op.master_capable),
5427 "VM_CAPABLE": str(self.op.vm_capable),
5430 def BuildHooksNodes(self):
5431 """Build hooks nodes.
5434 # Exclude added node
5435 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5436 post_nodes = pre_nodes + [self.op.node_name, ]
5438 return (pre_nodes, post_nodes)
5440 def CheckPrereq(self):
5441 """Check prerequisites.
5444 - the new node is not already in the config
5446 - its parameters (single/dual homed) matches the cluster
5448 Any errors are signaled by raising errors.OpPrereqError.
5452 hostname = self.hostname
5453 node = hostname.name
5454 primary_ip = self.op.primary_ip = hostname.ip
5455 if self.op.secondary_ip is None:
5456 if self.primary_ip_family == netutils.IP6Address.family:
5457 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5458 " IPv4 address must be given as secondary",
5460 self.op.secondary_ip = primary_ip
5462 secondary_ip = self.op.secondary_ip
5463 if not netutils.IP4Address.IsValid(secondary_ip):
5464 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5465 " address" % secondary_ip, errors.ECODE_INVAL)
5467 node_list = cfg.GetNodeList()
5468 if not self.op.readd and node in node_list:
5469 raise errors.OpPrereqError("Node %s is already in the configuration" %
5470 node, errors.ECODE_EXISTS)
5471 elif self.op.readd and node not in node_list:
5472 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5475 self.changed_primary_ip = False
5477 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5478 if self.op.readd and node == existing_node_name:
5479 if existing_node.secondary_ip != secondary_ip:
5480 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5481 " address configuration as before",
5483 if existing_node.primary_ip != primary_ip:
5484 self.changed_primary_ip = True
5488 if (existing_node.primary_ip == primary_ip or
5489 existing_node.secondary_ip == primary_ip or
5490 existing_node.primary_ip == secondary_ip or
5491 existing_node.secondary_ip == secondary_ip):
5492 raise errors.OpPrereqError("New node ip address(es) conflict with"
5493 " existing node %s" % existing_node.name,
5494 errors.ECODE_NOTUNIQUE)
5496 # After this 'if' block, None is no longer a valid value for the
5497 # _capable op attributes
5499 old_node = self.cfg.GetNodeInfo(node)
5500 assert old_node is not None, "Can't retrieve locked node %s" % node
5501 for attr in self._NFLAGS:
5502 if getattr(self.op, attr) is None:
5503 setattr(self.op, attr, getattr(old_node, attr))
5505 for attr in self._NFLAGS:
5506 if getattr(self.op, attr) is None:
5507 setattr(self.op, attr, True)
5509 if self.op.readd and not self.op.vm_capable:
5510 pri, sec = cfg.GetNodeInstances(node)
5512 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5513 " flag set to false, but it already holds"
5514 " instances" % node,
5517 # check that the type of the node (single versus dual homed) is the
5518 # same as for the master
5519 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5520 master_singlehomed = myself.secondary_ip == myself.primary_ip
5521 newbie_singlehomed = secondary_ip == primary_ip
5522 if master_singlehomed != newbie_singlehomed:
5523 if master_singlehomed:
5524 raise errors.OpPrereqError("The master has no secondary ip but the"
5525 " new node has one",
5528 raise errors.OpPrereqError("The master has a secondary ip but the"
5529 " new node doesn't have one",
5532 # checks reachability
5533 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5534 raise errors.OpPrereqError("Node not reachable by ping",
5535 errors.ECODE_ENVIRON)
5537 if not newbie_singlehomed:
5538 # check reachability from my secondary ip to newbie's secondary ip
5539 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5540 source=myself.secondary_ip):
5541 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5542 " based ping to node daemon port",
5543 errors.ECODE_ENVIRON)
5550 if self.op.master_capable:
5551 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5553 self.master_candidate = False
5556 self.new_node = old_node
5558 node_group = cfg.LookupNodeGroup(self.op.group)
5559 self.new_node = objects.Node(name=node,
5560 primary_ip=primary_ip,
5561 secondary_ip=secondary_ip,
5562 master_candidate=self.master_candidate,
5563 offline=False, drained=False,
5566 if self.op.ndparams:
5567 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5569 if self.op.hv_state:
5570 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5572 if self.op.disk_state:
5573 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5575 def Exec(self, feedback_fn):
5576 """Adds the new node to the cluster.
5579 new_node = self.new_node
5580 node = new_node.name
5582 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5585 # We adding a new node so we assume it's powered
5586 new_node.powered = True
5588 # for re-adds, reset the offline/drained/master-candidate flags;
5589 # we need to reset here, otherwise offline would prevent RPC calls
5590 # later in the procedure; this also means that if the re-add
5591 # fails, we are left with a non-offlined, broken node
5593 new_node.drained = new_node.offline = False # pylint: disable=W0201
5594 self.LogInfo("Readding a node, the offline/drained flags were reset")
5595 # if we demote the node, we do cleanup later in the procedure
5596 new_node.master_candidate = self.master_candidate
5597 if self.changed_primary_ip:
5598 new_node.primary_ip = self.op.primary_ip
5600 # copy the master/vm_capable flags
5601 for attr in self._NFLAGS:
5602 setattr(new_node, attr, getattr(self.op, attr))
5604 # notify the user about any possible mc promotion
5605 if new_node.master_candidate:
5606 self.LogInfo("Node will be a master candidate")
5608 if self.op.ndparams:
5609 new_node.ndparams = self.op.ndparams
5611 new_node.ndparams = {}
5613 if self.op.hv_state:
5614 new_node.hv_state_static = self.new_hv_state
5616 if self.op.disk_state:
5617 new_node.disk_state_static = self.new_disk_state
5619 # check connectivity
5620 result = self.rpc.call_version([node])[node]
5621 result.Raise("Can't get version information from node %s" % node)
5622 if constants.PROTOCOL_VERSION == result.payload:
5623 logging.info("Communication to node %s fine, sw version %s match",
5624 node, result.payload)
5626 raise errors.OpExecError("Version mismatch master version %s,"
5627 " node version %s" %
5628 (constants.PROTOCOL_VERSION, result.payload))
5630 # Add node to our /etc/hosts, and add key to known_hosts
5631 if self.cfg.GetClusterInfo().modify_etc_hosts:
5632 master_node = self.cfg.GetMasterNode()
5633 result = self.rpc.call_etc_hosts_modify(master_node,
5634 constants.ETC_HOSTS_ADD,
5637 result.Raise("Can't update hosts file with new host data")
5639 if new_node.secondary_ip != new_node.primary_ip:
5640 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5643 node_verify_list = [self.cfg.GetMasterNode()]
5644 node_verify_param = {
5645 constants.NV_NODELIST: ([node], {}),
5646 # TODO: do a node-net-test as well?
5649 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5650 self.cfg.GetClusterName())
5651 for verifier in node_verify_list:
5652 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5653 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5655 for failed in nl_payload:
5656 feedback_fn("ssh/hostname verification failed"
5657 " (checking from %s): %s" %
5658 (verifier, nl_payload[failed]))
5659 raise errors.OpExecError("ssh/hostname verification failed")
5662 _RedistributeAncillaryFiles(self)
5663 self.context.ReaddNode(new_node)
5664 # make sure we redistribute the config
5665 self.cfg.Update(new_node, feedback_fn)
5666 # and make sure the new node will not have old files around
5667 if not new_node.master_candidate:
5668 result = self.rpc.call_node_demote_from_mc(new_node.name)
5669 msg = result.fail_msg
5671 self.LogWarning("Node failed to demote itself from master"
5672 " candidate status: %s" % msg)
5674 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5675 additional_vm=self.op.vm_capable)
5676 self.context.AddNode(new_node, self.proc.GetECId())
5679 class LUNodeSetParams(LogicalUnit):
5680 """Modifies the parameters of a node.
5682 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5683 to the node role (as _ROLE_*)
5684 @cvar _R2F: a dictionary from node role to tuples of flags
5685 @cvar _FLAGS: a list of attribute names corresponding to the flags
5688 HPATH = "node-modify"
5689 HTYPE = constants.HTYPE_NODE
5691 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5693 (True, False, False): _ROLE_CANDIDATE,
5694 (False, True, False): _ROLE_DRAINED,
5695 (False, False, True): _ROLE_OFFLINE,
5696 (False, False, False): _ROLE_REGULAR,
5698 _R2F = dict((v, k) for k, v in _F2R.items())
5699 _FLAGS = ["master_candidate", "drained", "offline"]
5701 def CheckArguments(self):
5702 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5703 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5704 self.op.master_capable, self.op.vm_capable,
5705 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5707 if all_mods.count(None) == len(all_mods):
5708 raise errors.OpPrereqError("Please pass at least one modification",
5710 if all_mods.count(True) > 1:
5711 raise errors.OpPrereqError("Can't set the node into more than one"
5712 " state at the same time",
5715 # Boolean value that tells us whether we might be demoting from MC
5716 self.might_demote = (self.op.master_candidate == False or
5717 self.op.offline == True or
5718 self.op.drained == True or
5719 self.op.master_capable == False)
5721 if self.op.secondary_ip:
5722 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5723 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5724 " address" % self.op.secondary_ip,
5727 self.lock_all = self.op.auto_promote and self.might_demote
5728 self.lock_instances = self.op.secondary_ip is not None
5730 def _InstanceFilter(self, instance):
5731 """Filter for getting affected instances.
5734 return (instance.disk_template in constants.DTS_INT_MIRROR and
5735 self.op.node_name in instance.all_nodes)
5737 def ExpandNames(self):
5739 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5741 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5743 # Since modifying a node can have severe effects on currently running
5744 # operations the resource lock is at least acquired in shared mode
5745 self.needed_locks[locking.LEVEL_NODE_RES] = \
5746 self.needed_locks[locking.LEVEL_NODE]
5748 # Get node resource and instance locks in shared mode; they are not used
5749 # for anything but read-only access
5750 self.share_locks[locking.LEVEL_NODE_RES] = 1
5751 self.share_locks[locking.LEVEL_INSTANCE] = 1
5753 if self.lock_instances:
5754 self.needed_locks[locking.LEVEL_INSTANCE] = \
5755 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5757 def BuildHooksEnv(self):
5760 This runs on the master node.
5764 "OP_TARGET": self.op.node_name,
5765 "MASTER_CANDIDATE": str(self.op.master_candidate),
5766 "OFFLINE": str(self.op.offline),
5767 "DRAINED": str(self.op.drained),
5768 "MASTER_CAPABLE": str(self.op.master_capable),
5769 "VM_CAPABLE": str(self.op.vm_capable),
5772 def BuildHooksNodes(self):
5773 """Build hooks nodes.
5776 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5779 def CheckPrereq(self):
5780 """Check prerequisites.
5782 This only checks the instance list against the existing names.
5785 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5787 if self.lock_instances:
5788 affected_instances = \
5789 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5791 # Verify instance locks
5792 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5793 wanted_instances = frozenset(affected_instances.keys())
5794 if wanted_instances - owned_instances:
5795 raise errors.OpPrereqError("Instances affected by changing node %s's"
5796 " secondary IP address have changed since"
5797 " locks were acquired, wanted '%s', have"
5798 " '%s'; retry the operation" %
5800 utils.CommaJoin(wanted_instances),
5801 utils.CommaJoin(owned_instances)),
5804 affected_instances = None
5806 if (self.op.master_candidate is not None or
5807 self.op.drained is not None or
5808 self.op.offline is not None):
5809 # we can't change the master's node flags
5810 if self.op.node_name == self.cfg.GetMasterNode():
5811 raise errors.OpPrereqError("The master role can be changed"
5812 " only via master-failover",
5815 if self.op.master_candidate and not node.master_capable:
5816 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5817 " it a master candidate" % node.name,
5820 if self.op.vm_capable == False:
5821 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5823 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5824 " the vm_capable flag" % node.name,
5827 if node.master_candidate and self.might_demote and not self.lock_all:
5828 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5829 # check if after removing the current node, we're missing master
5831 (mc_remaining, mc_should, _) = \
5832 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5833 if mc_remaining < mc_should:
5834 raise errors.OpPrereqError("Not enough master candidates, please"
5835 " pass auto promote option to allow"
5836 " promotion", errors.ECODE_STATE)
5838 self.old_flags = old_flags = (node.master_candidate,
5839 node.drained, node.offline)
5840 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5841 self.old_role = old_role = self._F2R[old_flags]
5843 # Check for ineffective changes
5844 for attr in self._FLAGS:
5845 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5846 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5847 setattr(self.op, attr, None)
5849 # Past this point, any flag change to False means a transition
5850 # away from the respective state, as only real changes are kept
5852 # TODO: We might query the real power state if it supports OOB
5853 if _SupportsOob(self.cfg, node):
5854 if self.op.offline is False and not (node.powered or
5855 self.op.powered == True):
5856 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5857 " offline status can be reset") %
5859 elif self.op.powered is not None:
5860 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5861 " as it does not support out-of-band"
5862 " handling") % self.op.node_name)
5864 # If we're being deofflined/drained, we'll MC ourself if needed
5865 if (self.op.drained == False or self.op.offline == False or
5866 (self.op.master_capable and not node.master_capable)):
5867 if _DecideSelfPromotion(self):
5868 self.op.master_candidate = True
5869 self.LogInfo("Auto-promoting node to master candidate")
5871 # If we're no longer master capable, we'll demote ourselves from MC
5872 if self.op.master_capable == False and node.master_candidate:
5873 self.LogInfo("Demoting from master candidate")
5874 self.op.master_candidate = False
5877 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5878 if self.op.master_candidate:
5879 new_role = self._ROLE_CANDIDATE
5880 elif self.op.drained:
5881 new_role = self._ROLE_DRAINED
5882 elif self.op.offline:
5883 new_role = self._ROLE_OFFLINE
5884 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5885 # False is still in new flags, which means we're un-setting (the
5887 new_role = self._ROLE_REGULAR
5888 else: # no new flags, nothing, keep old role
5891 self.new_role = new_role
5893 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5894 # Trying to transition out of offline status
5895 # TODO: Use standard RPC runner, but make sure it works when the node is
5896 # still marked offline
5897 result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5899 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5900 " to report its version: %s" %
5901 (node.name, result.fail_msg),
5904 self.LogWarning("Transitioning node from offline to online state"
5905 " without using re-add. Please make sure the node"
5908 if self.op.secondary_ip:
5909 # Ok even without locking, because this can't be changed by any LU
5910 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5911 master_singlehomed = master.secondary_ip == master.primary_ip
5912 if master_singlehomed and self.op.secondary_ip:
5913 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5914 " homed cluster", errors.ECODE_INVAL)
5916 assert not (frozenset(affected_instances) -
5917 self.owned_locks(locking.LEVEL_INSTANCE))
5920 if affected_instances:
5921 raise errors.OpPrereqError("Cannot change secondary IP address:"
5922 " offline node has instances (%s)"
5923 " configured to use it" %
5924 utils.CommaJoin(affected_instances.keys()))
5926 # On online nodes, check that no instances are running, and that
5927 # the node has the new ip and we can reach it.
5928 for instance in affected_instances.values():
5929 _CheckInstanceState(self, instance, INSTANCE_DOWN,
5930 msg="cannot change secondary ip")
5932 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5933 if master.name != node.name:
5934 # check reachability from master secondary ip to new secondary ip
5935 if not netutils.TcpPing(self.op.secondary_ip,
5936 constants.DEFAULT_NODED_PORT,
5937 source=master.secondary_ip):
5938 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5939 " based ping to node daemon port",
5940 errors.ECODE_ENVIRON)
5942 if self.op.ndparams:
5943 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5944 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5945 self.new_ndparams = new_ndparams
5947 if self.op.hv_state:
5948 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5949 self.node.hv_state_static)
5951 if self.op.disk_state:
5952 self.new_disk_state = \
5953 _MergeAndVerifyDiskState(self.op.disk_state,
5954 self.node.disk_state_static)
5956 def Exec(self, feedback_fn):
5961 old_role = self.old_role
5962 new_role = self.new_role
5966 if self.op.ndparams:
5967 node.ndparams = self.new_ndparams
5969 if self.op.powered is not None:
5970 node.powered = self.op.powered
5972 if self.op.hv_state:
5973 node.hv_state_static = self.new_hv_state
5975 if self.op.disk_state:
5976 node.disk_state_static = self.new_disk_state
5978 for attr in ["master_capable", "vm_capable"]:
5979 val = getattr(self.op, attr)
5981 setattr(node, attr, val)
5982 result.append((attr, str(val)))
5984 if new_role != old_role:
5985 # Tell the node to demote itself, if no longer MC and not offline
5986 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5987 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5989 self.LogWarning("Node failed to demote itself: %s", msg)
5991 new_flags = self._R2F[new_role]
5992 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5994 result.append((desc, str(nf)))
5995 (node.master_candidate, node.drained, node.offline) = new_flags
5997 # we locked all nodes, we adjust the CP before updating this node
5999 _AdjustCandidatePool(self, [node.name])
6001 if self.op.secondary_ip:
6002 node.secondary_ip = self.op.secondary_ip
6003 result.append(("secondary_ip", self.op.secondary_ip))
6005 # this will trigger configuration file update, if needed
6006 self.cfg.Update(node, feedback_fn)
6008 # this will trigger job queue propagation or cleanup if the mc
6010 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6011 self.context.ReaddNode(node)
6016 class LUNodePowercycle(NoHooksLU):
6017 """Powercycles a node.
6022 def CheckArguments(self):
6023 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6024 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6025 raise errors.OpPrereqError("The node is the master and the force"
6026 " parameter was not set",
6029 def ExpandNames(self):
6030 """Locking for PowercycleNode.
6032 This is a last-resort option and shouldn't block on other
6033 jobs. Therefore, we grab no locks.
6036 self.needed_locks = {}
6038 def Exec(self, feedback_fn):
6042 result = self.rpc.call_node_powercycle(self.op.node_name,
6043 self.cfg.GetHypervisorType())
6044 result.Raise("Failed to schedule the reboot")
6045 return result.payload
6048 class LUClusterQuery(NoHooksLU):
6049 """Query cluster configuration.
6054 def ExpandNames(self):
6055 self.needed_locks = {}
6057 def Exec(self, feedback_fn):
6058 """Return cluster config.
6061 cluster = self.cfg.GetClusterInfo()
6064 # Filter just for enabled hypervisors
6065 for os_name, hv_dict in cluster.os_hvp.items():
6066 os_hvp[os_name] = {}
6067 for hv_name, hv_params in hv_dict.items():
6068 if hv_name in cluster.enabled_hypervisors:
6069 os_hvp[os_name][hv_name] = hv_params
6071 # Convert ip_family to ip_version
6072 primary_ip_version = constants.IP4_VERSION
6073 if cluster.primary_ip_family == netutils.IP6Address.family:
6074 primary_ip_version = constants.IP6_VERSION
6077 "software_version": constants.RELEASE_VERSION,
6078 "protocol_version": constants.PROTOCOL_VERSION,
6079 "config_version": constants.CONFIG_VERSION,
6080 "os_api_version": max(constants.OS_API_VERSIONS),
6081 "export_version": constants.EXPORT_VERSION,
6082 "architecture": (platform.architecture()[0], platform.machine()),
6083 "name": cluster.cluster_name,
6084 "master": cluster.master_node,
6085 "default_hypervisor": cluster.primary_hypervisor,
6086 "enabled_hypervisors": cluster.enabled_hypervisors,
6087 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6088 for hypervisor_name in cluster.enabled_hypervisors]),
6090 "beparams": cluster.beparams,
6091 "osparams": cluster.osparams,
6092 "ipolicy": cluster.ipolicy,
6093 "nicparams": cluster.nicparams,
6094 "ndparams": cluster.ndparams,
6095 "candidate_pool_size": cluster.candidate_pool_size,
6096 "master_netdev": cluster.master_netdev,
6097 "master_netmask": cluster.master_netmask,
6098 "use_external_mip_script": cluster.use_external_mip_script,
6099 "volume_group_name": cluster.volume_group_name,
6100 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6101 "file_storage_dir": cluster.file_storage_dir,
6102 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6103 "maintain_node_health": cluster.maintain_node_health,
6104 "ctime": cluster.ctime,
6105 "mtime": cluster.mtime,
6106 "uuid": cluster.uuid,
6107 "tags": list(cluster.GetTags()),
6108 "uid_pool": cluster.uid_pool,
6109 "default_iallocator": cluster.default_iallocator,
6110 "reserved_lvs": cluster.reserved_lvs,
6111 "primary_ip_version": primary_ip_version,
6112 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6113 "hidden_os": cluster.hidden_os,
6114 "blacklisted_os": cluster.blacklisted_os,
6120 class LUClusterConfigQuery(NoHooksLU):
6121 """Return configuration values.
6125 _FIELDS_DYNAMIC = utils.FieldSet()
6126 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
6127 "watcher_pause", "volume_group_name")
6129 def CheckArguments(self):
6130 _CheckOutputFields(static=self._FIELDS_STATIC,
6131 dynamic=self._FIELDS_DYNAMIC,
6132 selected=self.op.output_fields)
6134 def ExpandNames(self):
6135 self.needed_locks = {}
6137 def Exec(self, feedback_fn):
6138 """Dump a representation of the cluster config to the standard output.
6142 for field in self.op.output_fields:
6143 if field == "cluster_name":
6144 entry = self.cfg.GetClusterName()
6145 elif field == "master_node":
6146 entry = self.cfg.GetMasterNode()
6147 elif field == "drain_flag":
6148 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6149 elif field == "watcher_pause":
6150 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6151 elif field == "volume_group_name":
6152 entry = self.cfg.GetVGName()
6154 raise errors.ParameterError(field)
6155 values.append(entry)
6159 class LUInstanceActivateDisks(NoHooksLU):
6160 """Bring up an instance's disks.
6165 def ExpandNames(self):
6166 self._ExpandAndLockInstance()
6167 self.needed_locks[locking.LEVEL_NODE] = []
6168 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6170 def DeclareLocks(self, level):
6171 if level == locking.LEVEL_NODE:
6172 self._LockInstancesNodes()
6174 def CheckPrereq(self):
6175 """Check prerequisites.
6177 This checks that the instance is in the cluster.
6180 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6181 assert self.instance is not None, \
6182 "Cannot retrieve locked instance %s" % self.op.instance_name
6183 _CheckNodeOnline(self, self.instance.primary_node)
6185 def Exec(self, feedback_fn):
6186 """Activate the disks.
6189 disks_ok, disks_info = \
6190 _AssembleInstanceDisks(self, self.instance,
6191 ignore_size=self.op.ignore_size)
6193 raise errors.OpExecError("Cannot activate block devices")
6198 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6200 """Prepare the block devices for an instance.
6202 This sets up the block devices on all nodes.
6204 @type lu: L{LogicalUnit}
6205 @param lu: the logical unit on whose behalf we execute
6206 @type instance: L{objects.Instance}
6207 @param instance: the instance for whose disks we assemble
6208 @type disks: list of L{objects.Disk} or None
6209 @param disks: which disks to assemble (or all, if None)
6210 @type ignore_secondaries: boolean
6211 @param ignore_secondaries: if true, errors on secondary nodes
6212 won't result in an error return from the function
6213 @type ignore_size: boolean
6214 @param ignore_size: if true, the current known size of the disk
6215 will not be used during the disk activation, useful for cases
6216 when the size is wrong
6217 @return: False if the operation failed, otherwise a list of
6218 (host, instance_visible_name, node_visible_name)
6219 with the mapping from node devices to instance devices
6224 iname = instance.name
6225 disks = _ExpandCheckDisks(instance, disks)
6227 # With the two passes mechanism we try to reduce the window of
6228 # opportunity for the race condition of switching DRBD to primary
6229 # before handshaking occured, but we do not eliminate it
6231 # The proper fix would be to wait (with some limits) until the
6232 # connection has been made and drbd transitions from WFConnection
6233 # into any other network-connected state (Connected, SyncTarget,
6236 # 1st pass, assemble on all nodes in secondary mode
6237 for idx, inst_disk in enumerate(disks):
6238 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6240 node_disk = node_disk.Copy()
6241 node_disk.UnsetSize()
6242 lu.cfg.SetDiskID(node_disk, node)
6243 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
6244 msg = result.fail_msg
6246 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6247 " (is_primary=False, pass=1): %s",
6248 inst_disk.iv_name, node, msg)
6249 if not ignore_secondaries:
6252 # FIXME: race condition on drbd migration to primary
6254 # 2nd pass, do only the primary node
6255 for idx, inst_disk in enumerate(disks):
6258 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6259 if node != instance.primary_node:
6262 node_disk = node_disk.Copy()
6263 node_disk.UnsetSize()
6264 lu.cfg.SetDiskID(node_disk, node)
6265 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6266 msg = result.fail_msg
6268 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6269 " (is_primary=True, pass=2): %s",
6270 inst_disk.iv_name, node, msg)
6273 dev_path = result.payload
6275 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6277 # leave the disks configured for the primary node
6278 # this is a workaround that would be fixed better by
6279 # improving the logical/physical id handling
6281 lu.cfg.SetDiskID(disk, instance.primary_node)
6283 return disks_ok, device_info
6286 def _StartInstanceDisks(lu, instance, force):
6287 """Start the disks of an instance.
6290 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6291 ignore_secondaries=force)
6293 _ShutdownInstanceDisks(lu, instance)
6294 if force is not None and not force:
6295 lu.proc.LogWarning("", hint="If the message above refers to a"
6297 " you can retry the operation using '--force'.")
6298 raise errors.OpExecError("Disk consistency error")
6301 class LUInstanceDeactivateDisks(NoHooksLU):
6302 """Shutdown an instance's disks.
6307 def ExpandNames(self):
6308 self._ExpandAndLockInstance()
6309 self.needed_locks[locking.LEVEL_NODE] = []
6310 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6312 def DeclareLocks(self, level):
6313 if level == locking.LEVEL_NODE:
6314 self._LockInstancesNodes()
6316 def CheckPrereq(self):
6317 """Check prerequisites.
6319 This checks that the instance is in the cluster.
6322 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6323 assert self.instance is not None, \
6324 "Cannot retrieve locked instance %s" % self.op.instance_name
6326 def Exec(self, feedback_fn):
6327 """Deactivate the disks
6330 instance = self.instance
6332 _ShutdownInstanceDisks(self, instance)
6334 _SafeShutdownInstanceDisks(self, instance)
6337 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6338 """Shutdown block devices of an instance.
6340 This function checks if an instance is running, before calling
6341 _ShutdownInstanceDisks.
6344 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6345 _ShutdownInstanceDisks(lu, instance, disks=disks)
6348 def _ExpandCheckDisks(instance, disks):
6349 """Return the instance disks selected by the disks list
6351 @type disks: list of L{objects.Disk} or None
6352 @param disks: selected disks
6353 @rtype: list of L{objects.Disk}
6354 @return: selected instance disks to act on
6358 return instance.disks
6360 if not set(disks).issubset(instance.disks):
6361 raise errors.ProgrammerError("Can only act on disks belonging to the"
6366 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6367 """Shutdown block devices of an instance.
6369 This does the shutdown on all nodes of the instance.
6371 If the ignore_primary is false, errors on the primary node are
6376 disks = _ExpandCheckDisks(instance, disks)
6379 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6380 lu.cfg.SetDiskID(top_disk, node)
6381 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6382 msg = result.fail_msg
6384 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6385 disk.iv_name, node, msg)
6386 if ((node == instance.primary_node and not ignore_primary) or
6387 (node != instance.primary_node and not result.offline)):
6392 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6393 """Checks if a node has enough free memory.
6395 This function check if a given node has the needed amount of free
6396 memory. In case the node has less memory or we cannot get the
6397 information from the node, this function raise an OpPrereqError
6400 @type lu: C{LogicalUnit}
6401 @param lu: a logical unit from which we get configuration data
6403 @param node: the node to check
6404 @type reason: C{str}
6405 @param reason: string to use in the error message
6406 @type requested: C{int}
6407 @param requested: the amount of memory in MiB to check for
6408 @type hypervisor_name: C{str}
6409 @param hypervisor_name: the hypervisor to ask for memory stats
6410 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6411 we cannot check the node
6414 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6415 nodeinfo[node].Raise("Can't get data from node %s" % node,
6416 prereq=True, ecode=errors.ECODE_ENVIRON)
6417 (_, _, (hv_info, )) = nodeinfo[node].payload
6419 free_mem = hv_info.get("memory_free", None)
6420 if not isinstance(free_mem, int):
6421 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6422 " was '%s'" % (node, free_mem),
6423 errors.ECODE_ENVIRON)
6424 if requested > free_mem:
6425 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6426 " needed %s MiB, available %s MiB" %
6427 (node, reason, requested, free_mem),
6431 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6432 """Checks if nodes have enough free disk space in the all VGs.
6434 This function check if all given nodes have the needed amount of
6435 free disk. In case any node has less disk or we cannot get the
6436 information from the node, this function raise an OpPrereqError
6439 @type lu: C{LogicalUnit}
6440 @param lu: a logical unit from which we get configuration data
6441 @type nodenames: C{list}
6442 @param nodenames: the list of node names to check
6443 @type req_sizes: C{dict}
6444 @param req_sizes: the hash of vg and corresponding amount of disk in
6446 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6447 or we cannot check the node
6450 for vg, req_size in req_sizes.items():
6451 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6454 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6455 """Checks if nodes have enough free disk space in the specified VG.
6457 This function check if all given nodes have the needed amount of
6458 free disk. In case any node has less disk or we cannot get the
6459 information from the node, this function raise an OpPrereqError
6462 @type lu: C{LogicalUnit}
6463 @param lu: a logical unit from which we get configuration data
6464 @type nodenames: C{list}
6465 @param nodenames: the list of node names to check
6467 @param vg: the volume group to check
6468 @type requested: C{int}
6469 @param requested: the amount of disk in MiB to check for
6470 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6471 or we cannot check the node
6474 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6475 for node in nodenames:
6476 info = nodeinfo[node]
6477 info.Raise("Cannot get current information from node %s" % node,
6478 prereq=True, ecode=errors.ECODE_ENVIRON)
6479 (_, (vg_info, ), _) = info.payload
6480 vg_free = vg_info.get("vg_free", None)
6481 if not isinstance(vg_free, int):
6482 raise errors.OpPrereqError("Can't compute free disk space on node"
6483 " %s for vg %s, result was '%s'" %
6484 (node, vg, vg_free), errors.ECODE_ENVIRON)
6485 if requested > vg_free:
6486 raise errors.OpPrereqError("Not enough disk space on target node %s"
6487 " vg %s: required %d MiB, available %d MiB" %
6488 (node, vg, requested, vg_free),
6492 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6493 """Checks if nodes have enough physical CPUs
6495 This function checks if all given nodes have the needed number of
6496 physical CPUs. In case any node has less CPUs or we cannot get the
6497 information from the node, this function raises an OpPrereqError
6500 @type lu: C{LogicalUnit}
6501 @param lu: a logical unit from which we get configuration data
6502 @type nodenames: C{list}
6503 @param nodenames: the list of node names to check
6504 @type requested: C{int}
6505 @param requested: the minimum acceptable number of physical CPUs
6506 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6507 or we cannot check the node
6510 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6511 for node in nodenames:
6512 info = nodeinfo[node]
6513 info.Raise("Cannot get current information from node %s" % node,
6514 prereq=True, ecode=errors.ECODE_ENVIRON)
6515 (_, _, (hv_info, )) = info.payload
6516 num_cpus = hv_info.get("cpu_total", None)
6517 if not isinstance(num_cpus, int):
6518 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6519 " on node %s, result was '%s'" %
6520 (node, num_cpus), errors.ECODE_ENVIRON)
6521 if requested > num_cpus:
6522 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6523 "required" % (node, num_cpus, requested),
6527 class LUInstanceStartup(LogicalUnit):
6528 """Starts an instance.
6531 HPATH = "instance-start"
6532 HTYPE = constants.HTYPE_INSTANCE
6535 def CheckArguments(self):
6537 if self.op.beparams:
6538 # fill the beparams dict
6539 objects.UpgradeBeParams(self.op.beparams)
6540 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6542 def ExpandNames(self):
6543 self._ExpandAndLockInstance()
6544 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6546 def DeclareLocks(self, level):
6547 if level == locking.LEVEL_NODE_RES:
6548 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6550 def BuildHooksEnv(self):
6553 This runs on master, primary and secondary nodes of the instance.
6557 "FORCE": self.op.force,
6560 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6564 def BuildHooksNodes(self):
6565 """Build hooks nodes.
6568 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6571 def CheckPrereq(self):
6572 """Check prerequisites.
6574 This checks that the instance is in the cluster.
6577 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6578 assert self.instance is not None, \
6579 "Cannot retrieve locked instance %s" % self.op.instance_name
6582 if self.op.hvparams:
6583 # check hypervisor parameter syntax (locally)
6584 cluster = self.cfg.GetClusterInfo()
6585 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6586 filled_hvp = cluster.FillHV(instance)
6587 filled_hvp.update(self.op.hvparams)
6588 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6589 hv_type.CheckParameterSyntax(filled_hvp)
6590 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6592 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6594 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6596 if self.primary_offline and self.op.ignore_offline_nodes:
6597 self.proc.LogWarning("Ignoring offline primary node")
6599 if self.op.hvparams or self.op.beparams:
6600 self.proc.LogWarning("Overridden parameters are ignored")
6602 _CheckNodeOnline(self, instance.primary_node)
6604 bep = self.cfg.GetClusterInfo().FillBE(instance)
6605 bep.update(self.op.beparams)
6607 # check bridges existence
6608 _CheckInstanceBridgesExist(self, instance)
6610 remote_info = self.rpc.call_instance_info(instance.primary_node,
6612 instance.hypervisor)
6613 remote_info.Raise("Error checking node %s" % instance.primary_node,
6614 prereq=True, ecode=errors.ECODE_ENVIRON)
6615 if not remote_info.payload: # not running already
6616 _CheckNodeFreeMemory(self, instance.primary_node,
6617 "starting instance %s" % instance.name,
6618 bep[constants.BE_MINMEM], instance.hypervisor)
6620 def Exec(self, feedback_fn):
6621 """Start the instance.
6624 instance = self.instance
6625 force = self.op.force
6627 if not self.op.no_remember:
6628 self.cfg.MarkInstanceUp(instance.name)
6630 if self.primary_offline:
6631 assert self.op.ignore_offline_nodes
6632 self.proc.LogInfo("Primary node offline, marked instance as started")
6634 node_current = instance.primary_node
6636 _StartInstanceDisks(self, instance, force)
6639 self.rpc.call_instance_start(node_current,
6640 (instance, self.op.hvparams,
6642 self.op.startup_paused)
6643 msg = result.fail_msg
6645 _ShutdownInstanceDisks(self, instance)
6646 raise errors.OpExecError("Could not start instance: %s" % msg)
6649 class LUInstanceReboot(LogicalUnit):
6650 """Reboot an instance.
6653 HPATH = "instance-reboot"
6654 HTYPE = constants.HTYPE_INSTANCE
6657 def ExpandNames(self):
6658 self._ExpandAndLockInstance()
6660 def BuildHooksEnv(self):
6663 This runs on master, primary and secondary nodes of the instance.
6667 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6668 "REBOOT_TYPE": self.op.reboot_type,
6669 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6672 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6676 def BuildHooksNodes(self):
6677 """Build hooks nodes.
6680 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6683 def CheckPrereq(self):
6684 """Check prerequisites.
6686 This checks that the instance is in the cluster.
6689 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6690 assert self.instance is not None, \
6691 "Cannot retrieve locked instance %s" % self.op.instance_name
6692 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6693 _CheckNodeOnline(self, instance.primary_node)
6695 # check bridges existence
6696 _CheckInstanceBridgesExist(self, instance)
6698 def Exec(self, feedback_fn):
6699 """Reboot the instance.
6702 instance = self.instance
6703 ignore_secondaries = self.op.ignore_secondaries
6704 reboot_type = self.op.reboot_type
6706 remote_info = self.rpc.call_instance_info(instance.primary_node,
6708 instance.hypervisor)
6709 remote_info.Raise("Error checking node %s" % instance.primary_node)
6710 instance_running = bool(remote_info.payload)
6712 node_current = instance.primary_node
6714 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6715 constants.INSTANCE_REBOOT_HARD]:
6716 for disk in instance.disks:
6717 self.cfg.SetDiskID(disk, node_current)
6718 result = self.rpc.call_instance_reboot(node_current, instance,
6720 self.op.shutdown_timeout)
6721 result.Raise("Could not reboot instance")
6723 if instance_running:
6724 result = self.rpc.call_instance_shutdown(node_current, instance,
6725 self.op.shutdown_timeout)
6726 result.Raise("Could not shutdown instance for full reboot")
6727 _ShutdownInstanceDisks(self, instance)
6729 self.LogInfo("Instance %s was already stopped, starting now",
6731 _StartInstanceDisks(self, instance, ignore_secondaries)
6732 result = self.rpc.call_instance_start(node_current,
6733 (instance, None, None), False)
6734 msg = result.fail_msg
6736 _ShutdownInstanceDisks(self, instance)
6737 raise errors.OpExecError("Could not start instance for"
6738 " full reboot: %s" % msg)
6740 self.cfg.MarkInstanceUp(instance.name)
6743 class LUInstanceShutdown(LogicalUnit):
6744 """Shutdown an instance.
6747 HPATH = "instance-stop"
6748 HTYPE = constants.HTYPE_INSTANCE
6751 def ExpandNames(self):
6752 self._ExpandAndLockInstance()
6754 def BuildHooksEnv(self):
6757 This runs on master, primary and secondary nodes of the instance.
6760 env = _BuildInstanceHookEnvByObject(self, self.instance)
6761 env["TIMEOUT"] = self.op.timeout
6764 def BuildHooksNodes(self):
6765 """Build hooks nodes.
6768 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6771 def CheckPrereq(self):
6772 """Check prerequisites.
6774 This checks that the instance is in the cluster.
6777 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6778 assert self.instance is not None, \
6779 "Cannot retrieve locked instance %s" % self.op.instance_name
6781 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6783 self.primary_offline = \
6784 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6786 if self.primary_offline and self.op.ignore_offline_nodes:
6787 self.proc.LogWarning("Ignoring offline primary node")
6789 _CheckNodeOnline(self, self.instance.primary_node)
6791 def Exec(self, feedback_fn):
6792 """Shutdown the instance.
6795 instance = self.instance
6796 node_current = instance.primary_node
6797 timeout = self.op.timeout
6799 if not self.op.no_remember:
6800 self.cfg.MarkInstanceDown(instance.name)
6802 if self.primary_offline:
6803 assert self.op.ignore_offline_nodes
6804 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6806 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6807 msg = result.fail_msg
6809 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6811 _ShutdownInstanceDisks(self, instance)
6814 class LUInstanceReinstall(LogicalUnit):
6815 """Reinstall an instance.
6818 HPATH = "instance-reinstall"
6819 HTYPE = constants.HTYPE_INSTANCE
6822 def ExpandNames(self):
6823 self._ExpandAndLockInstance()
6825 def BuildHooksEnv(self):
6828 This runs on master, primary and secondary nodes of the instance.
6831 return _BuildInstanceHookEnvByObject(self, self.instance)
6833 def BuildHooksNodes(self):
6834 """Build hooks nodes.
6837 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6840 def CheckPrereq(self):
6841 """Check prerequisites.
6843 This checks that the instance is in the cluster and is not running.
6846 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6847 assert instance is not None, \
6848 "Cannot retrieve locked instance %s" % self.op.instance_name
6849 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6850 " offline, cannot reinstall")
6851 for node in instance.secondary_nodes:
6852 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6853 " cannot reinstall")
6855 if instance.disk_template == constants.DT_DISKLESS:
6856 raise errors.OpPrereqError("Instance '%s' has no disks" %
6857 self.op.instance_name,
6859 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6861 if self.op.os_type is not None:
6863 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6864 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6865 instance_os = self.op.os_type
6867 instance_os = instance.os
6869 nodelist = list(instance.all_nodes)
6871 if self.op.osparams:
6872 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6873 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6874 self.os_inst = i_osdict # the new dict (without defaults)
6878 self.instance = instance
6880 def Exec(self, feedback_fn):
6881 """Reinstall the instance.
6884 inst = self.instance
6886 if self.op.os_type is not None:
6887 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6888 inst.os = self.op.os_type
6889 # Write to configuration
6890 self.cfg.Update(inst, feedback_fn)
6892 _StartInstanceDisks(self, inst, None)
6894 feedback_fn("Running the instance OS create scripts...")
6895 # FIXME: pass debug option from opcode to backend
6896 result = self.rpc.call_instance_os_add(inst.primary_node,
6897 (inst, self.os_inst), True,
6898 self.op.debug_level)
6899 result.Raise("Could not install OS for instance %s on node %s" %
6900 (inst.name, inst.primary_node))
6902 _ShutdownInstanceDisks(self, inst)
6905 class LUInstanceRecreateDisks(LogicalUnit):
6906 """Recreate an instance's missing disks.
6909 HPATH = "instance-recreate-disks"
6910 HTYPE = constants.HTYPE_INSTANCE
6913 _MODIFYABLE = frozenset([
6914 constants.IDISK_SIZE,
6915 constants.IDISK_MODE,
6918 # New or changed disk parameters may have different semantics
6919 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
6920 constants.IDISK_ADOPT,
6922 # TODO: Implement support changing VG while recreating
6924 constants.IDISK_METAVG,
6927 def CheckArguments(self):
6928 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
6929 # Normalize and convert deprecated list of disk indices
6930 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
6932 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
6934 raise errors.OpPrereqError("Some disks have been specified more than"
6935 " once: %s" % utils.CommaJoin(duplicates),
6938 for (idx, params) in self.op.disks:
6939 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
6940 unsupported = frozenset(params.keys()) - self._MODIFYABLE
6942 raise errors.OpPrereqError("Parameters for disk %s try to change"
6943 " unmodifyable parameter(s): %s" %
6944 (idx, utils.CommaJoin(unsupported)),
6947 def ExpandNames(self):
6948 self._ExpandAndLockInstance()
6949 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6951 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6952 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6954 self.needed_locks[locking.LEVEL_NODE] = []
6955 self.needed_locks[locking.LEVEL_NODE_RES] = []
6957 def DeclareLocks(self, level):
6958 if level == locking.LEVEL_NODE:
6959 # if we replace the nodes, we only need to lock the old primary,
6960 # otherwise we need to lock all nodes for disk re-creation
6961 primary_only = bool(self.op.nodes)
6962 self._LockInstancesNodes(primary_only=primary_only)
6963 elif level == locking.LEVEL_NODE_RES:
6965 self.needed_locks[locking.LEVEL_NODE_RES] = \
6966 self.needed_locks[locking.LEVEL_NODE][:]
6968 def BuildHooksEnv(self):
6971 This runs on master, primary and secondary nodes of the instance.
6974 return _BuildInstanceHookEnvByObject(self, self.instance)
6976 def BuildHooksNodes(self):
6977 """Build hooks nodes.
6980 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6983 def CheckPrereq(self):
6984 """Check prerequisites.
6986 This checks that the instance is in the cluster and is not running.
6989 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6990 assert instance is not None, \
6991 "Cannot retrieve locked instance %s" % self.op.instance_name
6993 if len(self.op.nodes) != len(instance.all_nodes):
6994 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6995 " %d replacement nodes were specified" %
6996 (instance.name, len(instance.all_nodes),
6997 len(self.op.nodes)),
6999 assert instance.disk_template != constants.DT_DRBD8 or \
7000 len(self.op.nodes) == 2
7001 assert instance.disk_template != constants.DT_PLAIN or \
7002 len(self.op.nodes) == 1
7003 primary_node = self.op.nodes[0]
7005 primary_node = instance.primary_node
7006 _CheckNodeOnline(self, primary_node)
7008 if instance.disk_template == constants.DT_DISKLESS:
7009 raise errors.OpPrereqError("Instance '%s' has no disks" %
7010 self.op.instance_name, errors.ECODE_INVAL)
7012 # if we replace nodes *and* the old primary is offline, we don't
7014 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7015 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7016 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7017 if not (self.op.nodes and old_pnode.offline):
7018 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7019 msg="cannot recreate disks")
7022 self.disks = dict(self.op.disks)
7024 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7026 maxidx = max(self.disks.keys())
7027 if maxidx >= len(instance.disks):
7028 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7031 if (self.op.nodes and
7032 sorted(self.disks.keys()) != range(len(instance.disks))):
7033 raise errors.OpPrereqError("Can't recreate disks partially and"
7034 " change the nodes at the same time",
7037 self.instance = instance
7039 def Exec(self, feedback_fn):
7040 """Recreate the disks.
7043 instance = self.instance
7045 assert (self.owned_locks(locking.LEVEL_NODE) ==
7046 self.owned_locks(locking.LEVEL_NODE_RES))
7049 mods = [] # keeps track of needed changes
7051 for idx, disk in enumerate(instance.disks):
7053 changes = self.disks[idx]
7055 # Disk should not be recreated
7059 # update secondaries for disks, if needed
7060 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7061 # need to update the nodes and minors
7062 assert len(self.op.nodes) == 2
7063 assert len(disk.logical_id) == 6 # otherwise disk internals
7065 (_, _, old_port, _, _, old_secret) = disk.logical_id
7066 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7067 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7068 new_minors[0], new_minors[1], old_secret)
7069 assert len(disk.logical_id) == len(new_id)
7073 mods.append((idx, new_id, changes))
7075 # now that we have passed all asserts above, we can apply the mods
7076 # in a single run (to avoid partial changes)
7077 for idx, new_id, changes in mods:
7078 disk = instance.disks[idx]
7079 if new_id is not None:
7080 assert disk.dev_type == constants.LD_DRBD8
7081 disk.logical_id = new_id
7083 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7084 mode=changes.get(constants.IDISK_MODE, None))
7086 # change primary node, if needed
7088 instance.primary_node = self.op.nodes[0]
7089 self.LogWarning("Changing the instance's nodes, you will have to"
7090 " remove any disks left on the older nodes manually")
7093 self.cfg.Update(instance, feedback_fn)
7095 _CreateDisks(self, instance, to_skip=to_skip)
7098 class LUInstanceRename(LogicalUnit):
7099 """Rename an instance.
7102 HPATH = "instance-rename"
7103 HTYPE = constants.HTYPE_INSTANCE
7105 def CheckArguments(self):
7109 if self.op.ip_check and not self.op.name_check:
7110 # TODO: make the ip check more flexible and not depend on the name check
7111 raise errors.OpPrereqError("IP address check requires a name check",
7114 def BuildHooksEnv(self):
7117 This runs on master, primary and secondary nodes of the instance.
7120 env = _BuildInstanceHookEnvByObject(self, self.instance)
7121 env["INSTANCE_NEW_NAME"] = self.op.new_name
7124 def BuildHooksNodes(self):
7125 """Build hooks nodes.
7128 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7131 def CheckPrereq(self):
7132 """Check prerequisites.
7134 This checks that the instance is in the cluster and is not running.
7137 self.op.instance_name = _ExpandInstanceName(self.cfg,
7138 self.op.instance_name)
7139 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7140 assert instance is not None
7141 _CheckNodeOnline(self, instance.primary_node)
7142 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7143 msg="cannot rename")
7144 self.instance = instance
7146 new_name = self.op.new_name
7147 if self.op.name_check:
7148 hostname = netutils.GetHostname(name=new_name)
7149 if hostname.name != new_name:
7150 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7152 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7153 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7154 " same as given hostname '%s'") %
7155 (hostname.name, self.op.new_name),
7157 new_name = self.op.new_name = hostname.name
7158 if (self.op.ip_check and
7159 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7160 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7161 (hostname.ip, new_name),
7162 errors.ECODE_NOTUNIQUE)
7164 instance_list = self.cfg.GetInstanceList()
7165 if new_name in instance_list and new_name != instance.name:
7166 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7167 new_name, errors.ECODE_EXISTS)
7169 def Exec(self, feedback_fn):
7170 """Rename the instance.
7173 inst = self.instance
7174 old_name = inst.name
7176 rename_file_storage = False
7177 if (inst.disk_template in constants.DTS_FILEBASED and
7178 self.op.new_name != inst.name):
7179 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7180 rename_file_storage = True
7182 self.cfg.RenameInstance(inst.name, self.op.new_name)
7183 # Change the instance lock. This is definitely safe while we hold the BGL.
7184 # Otherwise the new lock would have to be added in acquired mode.
7186 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7187 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7189 # re-read the instance from the configuration after rename
7190 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7192 if rename_file_storage:
7193 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7194 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7195 old_file_storage_dir,
7196 new_file_storage_dir)
7197 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7198 " (but the instance has been renamed in Ganeti)" %
7199 (inst.primary_node, old_file_storage_dir,
7200 new_file_storage_dir))
7202 _StartInstanceDisks(self, inst, None)
7204 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7205 old_name, self.op.debug_level)
7206 msg = result.fail_msg
7208 msg = ("Could not run OS rename script for instance %s on node %s"
7209 " (but the instance has been renamed in Ganeti): %s" %
7210 (inst.name, inst.primary_node, msg))
7211 self.proc.LogWarning(msg)
7213 _ShutdownInstanceDisks(self, inst)
7218 class LUInstanceRemove(LogicalUnit):
7219 """Remove an instance.
7222 HPATH = "instance-remove"
7223 HTYPE = constants.HTYPE_INSTANCE
7226 def ExpandNames(self):
7227 self._ExpandAndLockInstance()
7228 self.needed_locks[locking.LEVEL_NODE] = []
7229 self.needed_locks[locking.LEVEL_NODE_RES] = []
7230 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7232 def DeclareLocks(self, level):
7233 if level == locking.LEVEL_NODE:
7234 self._LockInstancesNodes()
7235 elif level == locking.LEVEL_NODE_RES:
7237 self.needed_locks[locking.LEVEL_NODE_RES] = \
7238 self.needed_locks[locking.LEVEL_NODE][:]
7240 def BuildHooksEnv(self):
7243 This runs on master, primary and secondary nodes of the instance.
7246 env = _BuildInstanceHookEnvByObject(self, self.instance)
7247 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7250 def BuildHooksNodes(self):
7251 """Build hooks nodes.
7254 nl = [self.cfg.GetMasterNode()]
7255 nl_post = list(self.instance.all_nodes) + nl
7256 return (nl, nl_post)
7258 def CheckPrereq(self):
7259 """Check prerequisites.
7261 This checks that the instance is in the cluster.
7264 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7265 assert self.instance is not None, \
7266 "Cannot retrieve locked instance %s" % self.op.instance_name
7268 def Exec(self, feedback_fn):
7269 """Remove the instance.
7272 instance = self.instance
7273 logging.info("Shutting down instance %s on node %s",
7274 instance.name, instance.primary_node)
7276 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7277 self.op.shutdown_timeout)
7278 msg = result.fail_msg
7280 if self.op.ignore_failures:
7281 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7283 raise errors.OpExecError("Could not shutdown instance %s on"
7285 (instance.name, instance.primary_node, msg))
7287 assert (self.owned_locks(locking.LEVEL_NODE) ==
7288 self.owned_locks(locking.LEVEL_NODE_RES))
7289 assert not (set(instance.all_nodes) -
7290 self.owned_locks(locking.LEVEL_NODE)), \
7291 "Not owning correct locks"
7293 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7296 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7297 """Utility function to remove an instance.
7300 logging.info("Removing block devices for instance %s", instance.name)
7302 if not _RemoveDisks(lu, instance):
7303 if not ignore_failures:
7304 raise errors.OpExecError("Can't remove instance's disks")
7305 feedback_fn("Warning: can't remove instance's disks")
7307 logging.info("Removing instance %s out of cluster config", instance.name)
7309 lu.cfg.RemoveInstance(instance.name)
7311 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7312 "Instance lock removal conflict"
7314 # Remove lock for the instance
7315 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7318 class LUInstanceQuery(NoHooksLU):
7319 """Logical unit for querying instances.
7322 # pylint: disable=W0142
7325 def CheckArguments(self):
7326 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7327 self.op.output_fields, self.op.use_locking)
7329 def ExpandNames(self):
7330 self.iq.ExpandNames(self)
7332 def DeclareLocks(self, level):
7333 self.iq.DeclareLocks(self, level)
7335 def Exec(self, feedback_fn):
7336 return self.iq.OldStyleQuery(self)
7339 class LUInstanceFailover(LogicalUnit):
7340 """Failover an instance.
7343 HPATH = "instance-failover"
7344 HTYPE = constants.HTYPE_INSTANCE
7347 def CheckArguments(self):
7348 """Check the arguments.
7351 self.iallocator = getattr(self.op, "iallocator", None)
7352 self.target_node = getattr(self.op, "target_node", None)
7354 def ExpandNames(self):
7355 self._ExpandAndLockInstance()
7357 if self.op.target_node is not None:
7358 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7360 self.needed_locks[locking.LEVEL_NODE] = []
7361 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7363 ignore_consistency = self.op.ignore_consistency
7364 shutdown_timeout = self.op.shutdown_timeout
7365 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7368 ignore_consistency=ignore_consistency,
7369 shutdown_timeout=shutdown_timeout,
7370 ignore_ipolicy=self.op.ignore_ipolicy)
7371 self.tasklets = [self._migrater]
7373 def DeclareLocks(self, level):
7374 if level == locking.LEVEL_NODE:
7375 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7376 if instance.disk_template in constants.DTS_EXT_MIRROR:
7377 if self.op.target_node is None:
7378 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7380 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7381 self.op.target_node]
7382 del self.recalculate_locks[locking.LEVEL_NODE]
7384 self._LockInstancesNodes()
7386 def BuildHooksEnv(self):
7389 This runs on master, primary and secondary nodes of the instance.
7392 instance = self._migrater.instance
7393 source_node = instance.primary_node
7394 target_node = self.op.target_node
7396 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7397 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7398 "OLD_PRIMARY": source_node,
7399 "NEW_PRIMARY": target_node,
7402 if instance.disk_template in constants.DTS_INT_MIRROR:
7403 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7404 env["NEW_SECONDARY"] = source_node
7406 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7408 env.update(_BuildInstanceHookEnvByObject(self, instance))
7412 def BuildHooksNodes(self):
7413 """Build hooks nodes.
7416 instance = self._migrater.instance
7417 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7418 return (nl, nl + [instance.primary_node])
7421 class LUInstanceMigrate(LogicalUnit):
7422 """Migrate an instance.
7424 This is migration without shutting down, compared to the failover,
7425 which is done with shutdown.
7428 HPATH = "instance-migrate"
7429 HTYPE = constants.HTYPE_INSTANCE
7432 def ExpandNames(self):
7433 self._ExpandAndLockInstance()
7435 if self.op.target_node is not None:
7436 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7438 self.needed_locks[locking.LEVEL_NODE] = []
7439 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7441 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7442 cleanup=self.op.cleanup,
7444 fallback=self.op.allow_failover,
7445 ignore_ipolicy=self.op.ignore_ipolicy)
7446 self.tasklets = [self._migrater]
7448 def DeclareLocks(self, level):
7449 if level == locking.LEVEL_NODE:
7450 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7451 if instance.disk_template in constants.DTS_EXT_MIRROR:
7452 if self.op.target_node is None:
7453 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7455 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7456 self.op.target_node]
7457 del self.recalculate_locks[locking.LEVEL_NODE]
7459 self._LockInstancesNodes()
7461 def BuildHooksEnv(self):
7464 This runs on master, primary and secondary nodes of the instance.
7467 instance = self._migrater.instance
7468 source_node = instance.primary_node
7469 target_node = self.op.target_node
7470 env = _BuildInstanceHookEnvByObject(self, instance)
7472 "MIGRATE_LIVE": self._migrater.live,
7473 "MIGRATE_CLEANUP": self.op.cleanup,
7474 "OLD_PRIMARY": source_node,
7475 "NEW_PRIMARY": target_node,
7478 if instance.disk_template in constants.DTS_INT_MIRROR:
7479 env["OLD_SECONDARY"] = target_node
7480 env["NEW_SECONDARY"] = source_node
7482 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7486 def BuildHooksNodes(self):
7487 """Build hooks nodes.
7490 instance = self._migrater.instance
7491 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7492 return (nl, nl + [instance.primary_node])
7495 class LUInstanceMove(LogicalUnit):
7496 """Move an instance by data-copying.
7499 HPATH = "instance-move"
7500 HTYPE = constants.HTYPE_INSTANCE
7503 def ExpandNames(self):
7504 self._ExpandAndLockInstance()
7505 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7506 self.op.target_node = target_node
7507 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7508 self.needed_locks[locking.LEVEL_NODE_RES] = []
7509 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7511 def DeclareLocks(self, level):
7512 if level == locking.LEVEL_NODE:
7513 self._LockInstancesNodes(primary_only=True)
7514 elif level == locking.LEVEL_NODE_RES:
7516 self.needed_locks[locking.LEVEL_NODE_RES] = \
7517 self.needed_locks[locking.LEVEL_NODE][:]
7519 def BuildHooksEnv(self):
7522 This runs on master, primary and secondary nodes of the instance.
7526 "TARGET_NODE": self.op.target_node,
7527 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7529 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7532 def BuildHooksNodes(self):
7533 """Build hooks nodes.
7537 self.cfg.GetMasterNode(),
7538 self.instance.primary_node,
7539 self.op.target_node,
7543 def CheckPrereq(self):
7544 """Check prerequisites.
7546 This checks that the instance is in the cluster.
7549 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7550 assert self.instance is not None, \
7551 "Cannot retrieve locked instance %s" % self.op.instance_name
7553 node = self.cfg.GetNodeInfo(self.op.target_node)
7554 assert node is not None, \
7555 "Cannot retrieve locked node %s" % self.op.target_node
7557 self.target_node = target_node = node.name
7559 if target_node == instance.primary_node:
7560 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7561 (instance.name, target_node),
7564 bep = self.cfg.GetClusterInfo().FillBE(instance)
7566 for idx, dsk in enumerate(instance.disks):
7567 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7568 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7569 " cannot copy" % idx, errors.ECODE_STATE)
7571 _CheckNodeOnline(self, target_node)
7572 _CheckNodeNotDrained(self, target_node)
7573 _CheckNodeVmCapable(self, target_node)
7574 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7575 self.cfg.GetNodeGroup(node.group))
7576 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7577 ignore=self.op.ignore_ipolicy)
7579 if instance.admin_state == constants.ADMINST_UP:
7580 # check memory requirements on the secondary node
7581 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7582 instance.name, bep[constants.BE_MAXMEM],
7583 instance.hypervisor)
7585 self.LogInfo("Not checking memory on the secondary node as"
7586 " instance will not be started")
7588 # check bridge existance
7589 _CheckInstanceBridgesExist(self, instance, node=target_node)
7591 def Exec(self, feedback_fn):
7592 """Move an instance.
7594 The move is done by shutting it down on its present node, copying
7595 the data over (slow) and starting it on the new node.
7598 instance = self.instance
7600 source_node = instance.primary_node
7601 target_node = self.target_node
7603 self.LogInfo("Shutting down instance %s on source node %s",
7604 instance.name, source_node)
7606 assert (self.owned_locks(locking.LEVEL_NODE) ==
7607 self.owned_locks(locking.LEVEL_NODE_RES))
7609 result = self.rpc.call_instance_shutdown(source_node, instance,
7610 self.op.shutdown_timeout)
7611 msg = result.fail_msg
7613 if self.op.ignore_consistency:
7614 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7615 " Proceeding anyway. Please make sure node"
7616 " %s is down. Error details: %s",
7617 instance.name, source_node, source_node, msg)
7619 raise errors.OpExecError("Could not shutdown instance %s on"
7621 (instance.name, source_node, msg))
7623 # create the target disks
7625 _CreateDisks(self, instance, target_node=target_node)
7626 except errors.OpExecError:
7627 self.LogWarning("Device creation failed, reverting...")
7629 _RemoveDisks(self, instance, target_node=target_node)
7631 self.cfg.ReleaseDRBDMinors(instance.name)
7634 cluster_name = self.cfg.GetClusterInfo().cluster_name
7637 # activate, get path, copy the data over
7638 for idx, disk in enumerate(instance.disks):
7639 self.LogInfo("Copying data for disk %d", idx)
7640 result = self.rpc.call_blockdev_assemble(target_node, disk,
7641 instance.name, True, idx)
7643 self.LogWarning("Can't assemble newly created disk %d: %s",
7644 idx, result.fail_msg)
7645 errs.append(result.fail_msg)
7647 dev_path = result.payload
7648 result = self.rpc.call_blockdev_export(source_node, disk,
7649 target_node, dev_path,
7652 self.LogWarning("Can't copy data over for disk %d: %s",
7653 idx, result.fail_msg)
7654 errs.append(result.fail_msg)
7658 self.LogWarning("Some disks failed to copy, aborting")
7660 _RemoveDisks(self, instance, target_node=target_node)
7662 self.cfg.ReleaseDRBDMinors(instance.name)
7663 raise errors.OpExecError("Errors during disk copy: %s" %
7666 instance.primary_node = target_node
7667 self.cfg.Update(instance, feedback_fn)
7669 self.LogInfo("Removing the disks on the original node")
7670 _RemoveDisks(self, instance, target_node=source_node)
7672 # Only start the instance if it's marked as up
7673 if instance.admin_state == constants.ADMINST_UP:
7674 self.LogInfo("Starting instance %s on node %s",
7675 instance.name, target_node)
7677 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7678 ignore_secondaries=True)
7680 _ShutdownInstanceDisks(self, instance)
7681 raise errors.OpExecError("Can't activate the instance's disks")
7683 result = self.rpc.call_instance_start(target_node,
7684 (instance, None, None), False)
7685 msg = result.fail_msg
7687 _ShutdownInstanceDisks(self, instance)
7688 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7689 (instance.name, target_node, msg))
7692 class LUNodeMigrate(LogicalUnit):
7693 """Migrate all instances from a node.
7696 HPATH = "node-migrate"
7697 HTYPE = constants.HTYPE_NODE
7700 def CheckArguments(self):
7703 def ExpandNames(self):
7704 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7706 self.share_locks = _ShareAll()
7707 self.needed_locks = {
7708 locking.LEVEL_NODE: [self.op.node_name],
7711 def BuildHooksEnv(self):
7714 This runs on the master, the primary and all the secondaries.
7718 "NODE_NAME": self.op.node_name,
7721 def BuildHooksNodes(self):
7722 """Build hooks nodes.
7725 nl = [self.cfg.GetMasterNode()]
7728 def CheckPrereq(self):
7731 def Exec(self, feedback_fn):
7732 # Prepare jobs for migration instances
7734 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7737 iallocator=self.op.iallocator,
7738 target_node=self.op.target_node,
7739 ignore_ipolicy=self.op.ignore_ipolicy)]
7740 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7743 # TODO: Run iallocator in this opcode and pass correct placement options to
7744 # OpInstanceMigrate. Since other jobs can modify the cluster between
7745 # running the iallocator and the actual migration, a good consistency model
7746 # will have to be found.
7748 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7749 frozenset([self.op.node_name]))
7751 return ResultWithJobs(jobs)
7754 class TLMigrateInstance(Tasklet):
7755 """Tasklet class for instance migration.
7758 @ivar live: whether the migration will be done live or non-live;
7759 this variable is initalized only after CheckPrereq has run
7760 @type cleanup: boolean
7761 @ivar cleanup: Wheater we cleanup from a failed migration
7762 @type iallocator: string
7763 @ivar iallocator: The iallocator used to determine target_node
7764 @type target_node: string
7765 @ivar target_node: If given, the target_node to reallocate the instance to
7766 @type failover: boolean
7767 @ivar failover: Whether operation results in failover or migration
7768 @type fallback: boolean
7769 @ivar fallback: Whether fallback to failover is allowed if migration not
7771 @type ignore_consistency: boolean
7772 @ivar ignore_consistency: Wheter we should ignore consistency between source
7774 @type shutdown_timeout: int
7775 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7776 @type ignore_ipolicy: bool
7777 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7782 _MIGRATION_POLL_INTERVAL = 1 # seconds
7783 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7785 def __init__(self, lu, instance_name, cleanup=False,
7786 failover=False, fallback=False,
7787 ignore_consistency=False,
7788 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7789 ignore_ipolicy=False):
7790 """Initializes this class.
7793 Tasklet.__init__(self, lu)
7796 self.instance_name = instance_name
7797 self.cleanup = cleanup
7798 self.live = False # will be overridden later
7799 self.failover = failover
7800 self.fallback = fallback
7801 self.ignore_consistency = ignore_consistency
7802 self.shutdown_timeout = shutdown_timeout
7803 self.ignore_ipolicy = ignore_ipolicy
7805 def CheckPrereq(self):
7806 """Check prerequisites.
7808 This checks that the instance is in the cluster.
7811 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7812 instance = self.cfg.GetInstanceInfo(instance_name)
7813 assert instance is not None
7814 self.instance = instance
7815 cluster = self.cfg.GetClusterInfo()
7817 if (not self.cleanup and
7818 not instance.admin_state == constants.ADMINST_UP and
7819 not self.failover and self.fallback):
7820 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7821 " switching to failover")
7822 self.failover = True
7824 if instance.disk_template not in constants.DTS_MIRRORED:
7829 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7830 " %s" % (instance.disk_template, text),
7833 if instance.disk_template in constants.DTS_EXT_MIRROR:
7834 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7836 if self.lu.op.iallocator:
7837 self._RunAllocator()
7839 # We set set self.target_node as it is required by
7841 self.target_node = self.lu.op.target_node
7843 # Check that the target node is correct in terms of instance policy
7844 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
7845 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7846 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7847 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7848 ignore=self.ignore_ipolicy)
7850 # self.target_node is already populated, either directly or by the
7852 target_node = self.target_node
7853 if self.target_node == instance.primary_node:
7854 raise errors.OpPrereqError("Cannot migrate instance %s"
7855 " to its primary (%s)" %
7856 (instance.name, instance.primary_node))
7858 if len(self.lu.tasklets) == 1:
7859 # It is safe to release locks only when we're the only tasklet
7861 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7862 keep=[instance.primary_node, self.target_node])
7865 secondary_nodes = instance.secondary_nodes
7866 if not secondary_nodes:
7867 raise errors.ConfigurationError("No secondary node but using"
7868 " %s disk template" %
7869 instance.disk_template)
7870 target_node = secondary_nodes[0]
7871 if self.lu.op.iallocator or (self.lu.op.target_node and
7872 self.lu.op.target_node != target_node):
7874 text = "failed over"
7877 raise errors.OpPrereqError("Instances with disk template %s cannot"
7878 " be %s to arbitrary nodes"
7879 " (neither an iallocator nor a target"
7880 " node can be passed)" %
7881 (instance.disk_template, text),
7883 nodeinfo = self.cfg.GetNodeInfo(target_node)
7884 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7885 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7886 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7887 ignore=self.ignore_ipolicy)
7889 i_be = cluster.FillBE(instance)
7891 # check memory requirements on the secondary node
7892 if (not self.cleanup and
7893 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
7894 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7895 instance.name, i_be[constants.BE_MAXMEM],
7896 instance.hypervisor)
7898 self.lu.LogInfo("Not checking memory on the secondary node as"
7899 " instance will not be started")
7901 # check if failover must be forced instead of migration
7902 if (not self.cleanup and not self.failover and
7903 i_be[constants.BE_ALWAYS_FAILOVER]):
7905 self.lu.LogInfo("Instance configured to always failover; fallback"
7907 self.failover = True
7909 raise errors.OpPrereqError("This instance has been configured to"
7910 " always failover, please allow failover",
7913 # check bridge existance
7914 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7916 if not self.cleanup:
7917 _CheckNodeNotDrained(self.lu, target_node)
7918 if not self.failover:
7919 result = self.rpc.call_instance_migratable(instance.primary_node,
7921 if result.fail_msg and self.fallback:
7922 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7924 self.failover = True
7926 result.Raise("Can't migrate, please use failover",
7927 prereq=True, ecode=errors.ECODE_STATE)
7929 assert not (self.failover and self.cleanup)
7931 if not self.failover:
7932 if self.lu.op.live is not None and self.lu.op.mode is not None:
7933 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7934 " parameters are accepted",
7936 if self.lu.op.live is not None:
7938 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7940 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7941 # reset the 'live' parameter to None so that repeated
7942 # invocations of CheckPrereq do not raise an exception
7943 self.lu.op.live = None
7944 elif self.lu.op.mode is None:
7945 # read the default value from the hypervisor
7946 i_hv = cluster.FillHV(self.instance, skip_globals=False)
7947 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7949 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7951 # Failover is never live
7954 def _RunAllocator(self):
7955 """Run the allocator based on input opcode.
7958 # FIXME: add a self.ignore_ipolicy option
7959 ial = IAllocator(self.cfg, self.rpc,
7960 mode=constants.IALLOCATOR_MODE_RELOC,
7961 name=self.instance_name,
7962 # TODO See why hail breaks with a single node below
7963 relocate_from=[self.instance.primary_node,
7964 self.instance.primary_node],
7967 ial.Run(self.lu.op.iallocator)
7970 raise errors.OpPrereqError("Can't compute nodes using"
7971 " iallocator '%s': %s" %
7972 (self.lu.op.iallocator, ial.info),
7974 if len(ial.result) != ial.required_nodes:
7975 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7976 " of nodes (%s), required %s" %
7977 (self.lu.op.iallocator, len(ial.result),
7978 ial.required_nodes), errors.ECODE_FAULT)
7979 self.target_node = ial.result[0]
7980 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7981 self.instance_name, self.lu.op.iallocator,
7982 utils.CommaJoin(ial.result))
7984 def _WaitUntilSync(self):
7985 """Poll with custom rpc for disk sync.
7987 This uses our own step-based rpc call.
7990 self.feedback_fn("* wait until resync is done")
7994 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7996 self.instance.disks)
7998 for node, nres in result.items():
7999 nres.Raise("Cannot resync disks on node %s" % node)
8000 node_done, node_percent = nres.payload
8001 all_done = all_done and node_done
8002 if node_percent is not None:
8003 min_percent = min(min_percent, node_percent)
8005 if min_percent < 100:
8006 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8009 def _EnsureSecondary(self, node):
8010 """Demote a node to secondary.
8013 self.feedback_fn("* switching node %s to secondary mode" % node)
8015 for dev in self.instance.disks:
8016 self.cfg.SetDiskID(dev, node)
8018 result = self.rpc.call_blockdev_close(node, self.instance.name,
8019 self.instance.disks)
8020 result.Raise("Cannot change disk to secondary on node %s" % node)
8022 def _GoStandalone(self):
8023 """Disconnect from the network.
8026 self.feedback_fn("* changing into standalone mode")
8027 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8028 self.instance.disks)
8029 for node, nres in result.items():
8030 nres.Raise("Cannot disconnect disks node %s" % node)
8032 def _GoReconnect(self, multimaster):
8033 """Reconnect to the network.
8039 msg = "single-master"
8040 self.feedback_fn("* changing disks into %s mode" % msg)
8041 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8042 self.instance.disks,
8043 self.instance.name, multimaster)
8044 for node, nres in result.items():
8045 nres.Raise("Cannot change disks config on node %s" % node)
8047 def _ExecCleanup(self):
8048 """Try to cleanup after a failed migration.
8050 The cleanup is done by:
8051 - check that the instance is running only on one node
8052 (and update the config if needed)
8053 - change disks on its secondary node to secondary
8054 - wait until disks are fully synchronized
8055 - disconnect from the network
8056 - change disks into single-master mode
8057 - wait again until disks are fully synchronized
8060 instance = self.instance
8061 target_node = self.target_node
8062 source_node = self.source_node
8064 # check running on only one node
8065 self.feedback_fn("* checking where the instance actually runs"
8066 " (if this hangs, the hypervisor might be in"
8068 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8069 for node, result in ins_l.items():
8070 result.Raise("Can't contact node %s" % node)
8072 runningon_source = instance.name in ins_l[source_node].payload
8073 runningon_target = instance.name in ins_l[target_node].payload
8075 if runningon_source and runningon_target:
8076 raise errors.OpExecError("Instance seems to be running on two nodes,"
8077 " or the hypervisor is confused; you will have"
8078 " to ensure manually that it runs only on one"
8079 " and restart this operation")
8081 if not (runningon_source or runningon_target):
8082 raise errors.OpExecError("Instance does not seem to be running at all;"
8083 " in this case it's safer to repair by"
8084 " running 'gnt-instance stop' to ensure disk"
8085 " shutdown, and then restarting it")
8087 if runningon_target:
8088 # the migration has actually succeeded, we need to update the config
8089 self.feedback_fn("* instance running on secondary node (%s),"
8090 " updating config" % target_node)
8091 instance.primary_node = target_node
8092 self.cfg.Update(instance, self.feedback_fn)
8093 demoted_node = source_node
8095 self.feedback_fn("* instance confirmed to be running on its"
8096 " primary node (%s)" % source_node)
8097 demoted_node = target_node
8099 if instance.disk_template in constants.DTS_INT_MIRROR:
8100 self._EnsureSecondary(demoted_node)
8102 self._WaitUntilSync()
8103 except errors.OpExecError:
8104 # we ignore here errors, since if the device is standalone, it
8105 # won't be able to sync
8107 self._GoStandalone()
8108 self._GoReconnect(False)
8109 self._WaitUntilSync()
8111 self.feedback_fn("* done")
8113 def _RevertDiskStatus(self):
8114 """Try to revert the disk status after a failed migration.
8117 target_node = self.target_node
8118 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8122 self._EnsureSecondary(target_node)
8123 self._GoStandalone()
8124 self._GoReconnect(False)
8125 self._WaitUntilSync()
8126 except errors.OpExecError, err:
8127 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8128 " please try to recover the instance manually;"
8129 " error '%s'" % str(err))
8131 def _AbortMigration(self):
8132 """Call the hypervisor code to abort a started migration.
8135 instance = self.instance
8136 target_node = self.target_node
8137 source_node = self.source_node
8138 migration_info = self.migration_info
8140 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8144 abort_msg = abort_result.fail_msg
8146 logging.error("Aborting migration failed on target node %s: %s",
8147 target_node, abort_msg)
8148 # Don't raise an exception here, as we stil have to try to revert the
8149 # disk status, even if this step failed.
8151 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8152 instance, False, self.live)
8153 abort_msg = abort_result.fail_msg
8155 logging.error("Aborting migration failed on source node %s: %s",
8156 source_node, abort_msg)
8158 def _ExecMigration(self):
8159 """Migrate an instance.
8161 The migrate is done by:
8162 - change the disks into dual-master mode
8163 - wait until disks are fully synchronized again
8164 - migrate the instance
8165 - change disks on the new secondary node (the old primary) to secondary
8166 - wait until disks are fully synchronized
8167 - change disks into single-master mode
8170 instance = self.instance
8171 target_node = self.target_node
8172 source_node = self.source_node
8174 # Check for hypervisor version mismatch and warn the user.
8175 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8176 None, [self.instance.hypervisor])
8177 for ninfo in nodeinfo.values():
8178 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8180 (_, _, (src_info, )) = nodeinfo[source_node].payload
8181 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8183 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8184 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8185 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8186 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8187 if src_version != dst_version:
8188 self.feedback_fn("* warning: hypervisor version mismatch between"
8189 " source (%s) and target (%s) node" %
8190 (src_version, dst_version))
8192 self.feedback_fn("* checking disk consistency between source and target")
8193 for dev in instance.disks:
8194 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8195 raise errors.OpExecError("Disk %s is degraded or not fully"
8196 " synchronized on target node,"
8197 " aborting migration" % dev.iv_name)
8199 # First get the migration information from the remote node
8200 result = self.rpc.call_migration_info(source_node, instance)
8201 msg = result.fail_msg
8203 log_err = ("Failed fetching source migration information from %s: %s" %
8205 logging.error(log_err)
8206 raise errors.OpExecError(log_err)
8208 self.migration_info = migration_info = result.payload
8210 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8211 # Then switch the disks to master/master mode
8212 self._EnsureSecondary(target_node)
8213 self._GoStandalone()
8214 self._GoReconnect(True)
8215 self._WaitUntilSync()
8217 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8218 result = self.rpc.call_accept_instance(target_node,
8221 self.nodes_ip[target_node])
8223 msg = result.fail_msg
8225 logging.error("Instance pre-migration failed, trying to revert"
8226 " disk status: %s", msg)
8227 self.feedback_fn("Pre-migration failed, aborting")
8228 self._AbortMigration()
8229 self._RevertDiskStatus()
8230 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8231 (instance.name, msg))
8233 self.feedback_fn("* migrating instance to %s" % target_node)
8234 result = self.rpc.call_instance_migrate(source_node, instance,
8235 self.nodes_ip[target_node],
8237 msg = result.fail_msg
8239 logging.error("Instance migration failed, trying to revert"
8240 " disk status: %s", msg)
8241 self.feedback_fn("Migration failed, aborting")
8242 self._AbortMigration()
8243 self._RevertDiskStatus()
8244 raise errors.OpExecError("Could not migrate instance %s: %s" %
8245 (instance.name, msg))
8247 self.feedback_fn("* starting memory transfer")
8248 last_feedback = time.time()
8250 result = self.rpc.call_instance_get_migration_status(source_node,
8252 msg = result.fail_msg
8253 ms = result.payload # MigrationStatus instance
8254 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8255 logging.error("Instance migration failed, trying to revert"
8256 " disk status: %s", msg)
8257 self.feedback_fn("Migration failed, aborting")
8258 self._AbortMigration()
8259 self._RevertDiskStatus()
8260 raise errors.OpExecError("Could not migrate instance %s: %s" %
8261 (instance.name, msg))
8263 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8264 self.feedback_fn("* memory transfer complete")
8267 if (utils.TimeoutExpired(last_feedback,
8268 self._MIGRATION_FEEDBACK_INTERVAL) and
8269 ms.transferred_ram is not None):
8270 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8271 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8272 last_feedback = time.time()
8274 time.sleep(self._MIGRATION_POLL_INTERVAL)
8276 result = self.rpc.call_instance_finalize_migration_src(source_node,
8280 msg = result.fail_msg
8282 logging.error("Instance migration succeeded, but finalization failed"
8283 " on the source node: %s", msg)
8284 raise errors.OpExecError("Could not finalize instance migration: %s" %
8287 instance.primary_node = target_node
8289 # distribute new instance config to the other nodes
8290 self.cfg.Update(instance, self.feedback_fn)
8292 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8296 msg = result.fail_msg
8298 logging.error("Instance migration succeeded, but finalization failed"
8299 " on the target node: %s", msg)
8300 raise errors.OpExecError("Could not finalize instance migration: %s" %
8303 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8304 self._EnsureSecondary(source_node)
8305 self._WaitUntilSync()
8306 self._GoStandalone()
8307 self._GoReconnect(False)
8308 self._WaitUntilSync()
8310 # If the instance's disk template is `rbd' and there was a successful
8311 # migration, unmap the device from the source node.
8312 if self.instance.disk_template == constants.DT_RBD:
8313 disks = _ExpandCheckDisks(instance, instance.disks)
8314 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8316 result = self.rpc.call_blockdev_shutdown(source_node, disk)
8317 msg = result.fail_msg
8319 logging.error("Migration was successful, but couldn't unmap the"
8320 " block device %s on source node %s: %s",
8321 disk.iv_name, source_node, msg)
8322 logging.error("You need to unmap the device %s manually on %s",
8323 disk.iv_name, source_node)
8325 self.feedback_fn("* done")
8327 def _ExecFailover(self):
8328 """Failover an instance.
8330 The failover is done by shutting it down on its present node and
8331 starting it on the secondary.
8334 instance = self.instance
8335 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8337 source_node = instance.primary_node
8338 target_node = self.target_node
8340 if instance.admin_state == constants.ADMINST_UP:
8341 self.feedback_fn("* checking disk consistency between source and target")
8342 for dev in instance.disks:
8343 # for drbd, these are drbd over lvm
8344 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8345 if primary_node.offline:
8346 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8348 (primary_node.name, dev.iv_name, target_node))
8349 elif not self.ignore_consistency:
8350 raise errors.OpExecError("Disk %s is degraded on target node,"
8351 " aborting failover" % dev.iv_name)
8353 self.feedback_fn("* not checking disk consistency as instance is not"
8356 self.feedback_fn("* shutting down instance on source node")
8357 logging.info("Shutting down instance %s on node %s",
8358 instance.name, source_node)
8360 result = self.rpc.call_instance_shutdown(source_node, instance,
8361 self.shutdown_timeout)
8362 msg = result.fail_msg
8364 if self.ignore_consistency or primary_node.offline:
8365 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8366 " proceeding anyway; please make sure node"
8367 " %s is down; error details: %s",
8368 instance.name, source_node, source_node, msg)
8370 raise errors.OpExecError("Could not shutdown instance %s on"
8372 (instance.name, source_node, msg))
8374 self.feedback_fn("* deactivating the instance's disks on source node")
8375 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8376 raise errors.OpExecError("Can't shut down the instance's disks")
8378 instance.primary_node = target_node
8379 # distribute new instance config to the other nodes
8380 self.cfg.Update(instance, self.feedback_fn)
8382 # Only start the instance if it's marked as up
8383 if instance.admin_state == constants.ADMINST_UP:
8384 self.feedback_fn("* activating the instance's disks on target node %s" %
8386 logging.info("Starting instance %s on node %s",
8387 instance.name, target_node)
8389 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8390 ignore_secondaries=True)
8392 _ShutdownInstanceDisks(self.lu, instance)
8393 raise errors.OpExecError("Can't activate the instance's disks")
8395 self.feedback_fn("* starting the instance on the target node %s" %
8397 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8399 msg = result.fail_msg
8401 _ShutdownInstanceDisks(self.lu, instance)
8402 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8403 (instance.name, target_node, msg))
8405 def Exec(self, feedback_fn):
8406 """Perform the migration.
8409 self.feedback_fn = feedback_fn
8410 self.source_node = self.instance.primary_node
8412 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8413 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8414 self.target_node = self.instance.secondary_nodes[0]
8415 # Otherwise self.target_node has been populated either
8416 # directly, or through an iallocator.
8418 self.all_nodes = [self.source_node, self.target_node]
8419 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8420 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8423 feedback_fn("Failover instance %s" % self.instance.name)
8424 self._ExecFailover()
8426 feedback_fn("Migrating instance %s" % self.instance.name)
8429 return self._ExecCleanup()
8431 return self._ExecMigration()
8434 def _CreateBlockDev(lu, node, instance, device, force_create,
8436 """Create a tree of block devices on a given node.
8438 If this device type has to be created on secondaries, create it and
8441 If not, just recurse to children keeping the same 'force' value.
8443 @param lu: the lu on whose behalf we execute
8444 @param node: the node on which to create the device
8445 @type instance: L{objects.Instance}
8446 @param instance: the instance which owns the device
8447 @type device: L{objects.Disk}
8448 @param device: the device to create
8449 @type force_create: boolean
8450 @param force_create: whether to force creation of this device; this
8451 will be change to True whenever we find a device which has
8452 CreateOnSecondary() attribute
8453 @param info: the extra 'metadata' we should attach to the device
8454 (this will be represented as a LVM tag)
8455 @type force_open: boolean
8456 @param force_open: this parameter will be passes to the
8457 L{backend.BlockdevCreate} function where it specifies
8458 whether we run on primary or not, and it affects both
8459 the child assembly and the device own Open() execution
8462 if device.CreateOnSecondary():
8466 for child in device.children:
8467 _CreateBlockDev(lu, node, instance, child, force_create,
8470 if not force_create:
8473 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8476 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8477 """Create a single block device on a given node.
8479 This will not recurse over children of the device, so they must be
8482 @param lu: the lu on whose behalf we execute
8483 @param node: the node on which to create the device
8484 @type instance: L{objects.Instance}
8485 @param instance: the instance which owns the device
8486 @type device: L{objects.Disk}
8487 @param device: the device to create
8488 @param info: the extra 'metadata' we should attach to the device
8489 (this will be represented as a LVM tag)
8490 @type force_open: boolean
8491 @param force_open: this parameter will be passes to the
8492 L{backend.BlockdevCreate} function where it specifies
8493 whether we run on primary or not, and it affects both
8494 the child assembly and the device own Open() execution
8497 lu.cfg.SetDiskID(device, node)
8498 result = lu.rpc.call_blockdev_create(node, device, device.size,
8499 instance.name, force_open, info)
8500 result.Raise("Can't create block device %s on"
8501 " node %s for instance %s" % (device, node, instance.name))
8502 if device.physical_id is None:
8503 device.physical_id = result.payload
8506 def _GenerateUniqueNames(lu, exts):
8507 """Generate a suitable LV name.
8509 This will generate a logical volume name for the given instance.
8514 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8515 results.append("%s%s" % (new_id, val))
8519 def _ComputeLDParams(disk_template, disk_params):
8520 """Computes Logical Disk parameters from Disk Template parameters.
8522 @type disk_template: string
8523 @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8524 @type disk_params: dict
8525 @param disk_params: disk template parameters; dict(template_name -> parameters
8527 @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8528 contains the LD parameters of the node. The tree is flattened in-order.
8531 if disk_template not in constants.DISK_TEMPLATES:
8532 raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8535 dt_params = disk_params[disk_template]
8536 if disk_template == constants.DT_DRBD8:
8538 constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8539 constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8540 constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8541 constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8542 constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8543 constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8544 constants.LDP_DYNAMIC_RESYNC: dt_params[constants.DRBD_DYNAMIC_RESYNC],
8545 constants.LDP_PLAN_AHEAD: dt_params[constants.DRBD_PLAN_AHEAD],
8546 constants.LDP_FILL_TARGET: dt_params[constants.DRBD_FILL_TARGET],
8547 constants.LDP_DELAY_TARGET: dt_params[constants.DRBD_DELAY_TARGET],
8548 constants.LDP_MAX_RATE: dt_params[constants.DRBD_MAX_RATE],
8549 constants.LDP_MIN_RATE: dt_params[constants.DRBD_MIN_RATE],
8553 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8556 result.append(drbd_params)
8560 constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8563 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8565 result.append(data_params)
8569 constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8572 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8574 result.append(meta_params)
8576 elif (disk_template == constants.DT_FILE or
8577 disk_template == constants.DT_SHARED_FILE):
8578 result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8580 elif disk_template == constants.DT_PLAIN:
8582 constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8585 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8587 result.append(params)
8589 elif disk_template == constants.DT_BLOCK:
8590 result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8592 elif disk_template == constants.DT_RBD:
8594 constants.LDP_POOL: dt_params[constants.RBD_POOL]
8597 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_RBD],
8599 result.append(params)
8604 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8605 iv_name, p_minor, s_minor, drbd_params, data_params,
8607 """Generate a drbd8 device complete with its children.
8610 assert len(vgnames) == len(names) == 2
8611 port = lu.cfg.AllocatePort()
8612 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8614 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8615 logical_id=(vgnames[0], names[0]),
8617 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8618 logical_id=(vgnames[1], names[1]),
8620 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8621 logical_id=(primary, secondary, port,
8624 children=[dev_data, dev_meta],
8625 iv_name=iv_name, params=drbd_params)
8629 def _GenerateDiskTemplate(lu, template_name,
8630 instance_name, primary_node,
8631 secondary_nodes, disk_info,
8632 file_storage_dir, file_driver,
8633 base_index, feedback_fn, disk_params):
8634 """Generate the entire disk layout for a given template type.
8637 #TODO: compute space requirements
8639 vgname = lu.cfg.GetVGName()
8640 disk_count = len(disk_info)
8642 ld_params = _ComputeLDParams(template_name, disk_params)
8643 if template_name == constants.DT_DISKLESS:
8645 elif template_name == constants.DT_PLAIN:
8647 raise errors.ProgrammerError("Wrong template configuration")
8649 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8650 for i in range(disk_count)])
8651 for idx, disk in enumerate(disk_info):
8652 disk_index = idx + base_index
8653 vg = disk.get(constants.IDISK_VG, vgname)
8654 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8655 disk_dev = objects.Disk(dev_type=constants.LD_LV,
8656 size=disk[constants.IDISK_SIZE],
8657 logical_id=(vg, names[idx]),
8658 iv_name="disk/%d" % disk_index,
8659 mode=disk[constants.IDISK_MODE],
8660 params=ld_params[0])
8661 disks.append(disk_dev)
8662 elif template_name == constants.DT_DRBD8:
8663 drbd_params, data_params, meta_params = ld_params
8664 if len(secondary_nodes) != 1:
8665 raise errors.ProgrammerError("Wrong template configuration")
8666 remote_node = secondary_nodes[0]
8667 minors = lu.cfg.AllocateDRBDMinor(
8668 [primary_node, remote_node] * len(disk_info), instance_name)
8671 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8672 for i in range(disk_count)]):
8673 names.append(lv_prefix + "_data")
8674 names.append(lv_prefix + "_meta")
8675 for idx, disk in enumerate(disk_info):
8676 disk_index = idx + base_index
8677 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8678 data_vg = disk.get(constants.IDISK_VG, vgname)
8679 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8680 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8681 disk[constants.IDISK_SIZE],
8683 names[idx * 2:idx * 2 + 2],
8684 "disk/%d" % disk_index,
8685 minors[idx * 2], minors[idx * 2 + 1],
8686 drbd_params, data_params, meta_params)
8687 disk_dev.mode = disk[constants.IDISK_MODE]
8688 disks.append(disk_dev)
8689 elif template_name == constants.DT_FILE:
8691 raise errors.ProgrammerError("Wrong template configuration")
8693 opcodes.RequireFileStorage()
8695 for idx, disk in enumerate(disk_info):
8696 disk_index = idx + base_index
8697 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8698 size=disk[constants.IDISK_SIZE],
8699 iv_name="disk/%d" % disk_index,
8700 logical_id=(file_driver,
8701 "%s/disk%d" % (file_storage_dir,
8703 mode=disk[constants.IDISK_MODE],
8704 params=ld_params[0])
8705 disks.append(disk_dev)
8706 elif template_name == constants.DT_SHARED_FILE:
8708 raise errors.ProgrammerError("Wrong template configuration")
8710 opcodes.RequireSharedFileStorage()
8712 for idx, disk in enumerate(disk_info):
8713 disk_index = idx + base_index
8714 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8715 size=disk[constants.IDISK_SIZE],
8716 iv_name="disk/%d" % disk_index,
8717 logical_id=(file_driver,
8718 "%s/disk%d" % (file_storage_dir,
8720 mode=disk[constants.IDISK_MODE],
8721 params=ld_params[0])
8722 disks.append(disk_dev)
8723 elif template_name == constants.DT_BLOCK:
8725 raise errors.ProgrammerError("Wrong template configuration")
8727 for idx, disk in enumerate(disk_info):
8728 disk_index = idx + base_index
8729 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8730 size=disk[constants.IDISK_SIZE],
8731 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8732 disk[constants.IDISK_ADOPT]),
8733 iv_name="disk/%d" % disk_index,
8734 mode=disk[constants.IDISK_MODE],
8735 params=ld_params[0])
8736 disks.append(disk_dev)
8737 elif template_name == constants.DT_RBD:
8739 raise errors.ProgrammerError("Wrong template configuration")
8741 names = _GenerateUniqueNames(lu, [".rbd.disk%d" % (base_index + i)
8742 for i in range(disk_count)])
8744 for idx, disk in enumerate(disk_info):
8745 disk_index = idx + base_index
8746 disk_dev = objects.Disk(dev_type=constants.LD_RBD,
8747 size=disk[constants.IDISK_SIZE],
8748 logical_id=("rbd", names[idx]),
8749 iv_name="disk/%d" % disk_index,
8750 mode=disk[constants.IDISK_MODE],
8751 params=ld_params[0])
8752 disks.append(disk_dev)
8755 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8759 def _GetInstanceInfoText(instance):
8760 """Compute that text that should be added to the disk's metadata.
8763 return "originstname+%s" % instance.name
8766 def _CalcEta(time_taken, written, total_size):
8767 """Calculates the ETA based on size written and total size.
8769 @param time_taken: The time taken so far
8770 @param written: amount written so far
8771 @param total_size: The total size of data to be written
8772 @return: The remaining time in seconds
8775 avg_time = time_taken / float(written)
8776 return (total_size - written) * avg_time
8779 def _WipeDisks(lu, instance):
8780 """Wipes instance disks.
8782 @type lu: L{LogicalUnit}
8783 @param lu: the logical unit on whose behalf we execute
8784 @type instance: L{objects.Instance}
8785 @param instance: the instance whose disks we should create
8786 @return: the success of the wipe
8789 node = instance.primary_node
8791 for device in instance.disks:
8792 lu.cfg.SetDiskID(device, node)
8794 logging.info("Pause sync of instance %s disks", instance.name)
8795 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8797 for idx, success in enumerate(result.payload):
8799 logging.warn("pause-sync of instance %s for disks %d failed",
8803 for idx, device in enumerate(instance.disks):
8804 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8805 # MAX_WIPE_CHUNK at max
8806 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8807 constants.MIN_WIPE_CHUNK_PERCENT)
8808 # we _must_ make this an int, otherwise rounding errors will
8810 wipe_chunk_size = int(wipe_chunk_size)
8812 lu.LogInfo("* Wiping disk %d", idx)
8813 logging.info("Wiping disk %d for instance %s, node %s using"
8814 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8819 start_time = time.time()
8821 while offset < size:
8822 wipe_size = min(wipe_chunk_size, size - offset)
8823 logging.debug("Wiping disk %d, offset %s, chunk %s",
8824 idx, offset, wipe_size)
8825 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8826 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8827 (idx, offset, wipe_size))
8830 if now - last_output >= 60:
8831 eta = _CalcEta(now - start_time, offset, size)
8832 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8833 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8836 logging.info("Resume sync of instance %s disks", instance.name)
8838 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8840 for idx, success in enumerate(result.payload):
8842 lu.LogWarning("Resume sync of disk %d failed, please have a"
8843 " look at the status and troubleshoot the issue", idx)
8844 logging.warn("resume-sync of instance %s for disks %d failed",
8848 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8849 """Create all disks for an instance.
8851 This abstracts away some work from AddInstance.
8853 @type lu: L{LogicalUnit}
8854 @param lu: the logical unit on whose behalf we execute
8855 @type instance: L{objects.Instance}
8856 @param instance: the instance whose disks we should create
8858 @param to_skip: list of indices to skip
8859 @type target_node: string
8860 @param target_node: if passed, overrides the target node for creation
8862 @return: the success of the creation
8865 info = _GetInstanceInfoText(instance)
8866 if target_node is None:
8867 pnode = instance.primary_node
8868 all_nodes = instance.all_nodes
8873 if instance.disk_template in constants.DTS_FILEBASED:
8874 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8875 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8877 result.Raise("Failed to create directory '%s' on"
8878 " node %s" % (file_storage_dir, pnode))
8880 # Note: this needs to be kept in sync with adding of disks in
8881 # LUInstanceSetParams
8882 for idx, device in enumerate(instance.disks):
8883 if to_skip and idx in to_skip:
8885 logging.info("Creating volume %s for instance %s",
8886 device.iv_name, instance.name)
8888 for node in all_nodes:
8889 f_create = node == pnode
8890 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8893 def _RemoveDisks(lu, instance, target_node=None):
8894 """Remove all disks for an instance.
8896 This abstracts away some work from `AddInstance()` and
8897 `RemoveInstance()`. Note that in case some of the devices couldn't
8898 be removed, the removal will continue with the other ones (compare
8899 with `_CreateDisks()`).
8901 @type lu: L{LogicalUnit}
8902 @param lu: the logical unit on whose behalf we execute
8903 @type instance: L{objects.Instance}
8904 @param instance: the instance whose disks we should remove
8905 @type target_node: string
8906 @param target_node: used to override the node on which to remove the disks
8908 @return: the success of the removal
8911 logging.info("Removing block devices for instance %s", instance.name)
8914 for device in instance.disks:
8916 edata = [(target_node, device)]
8918 edata = device.ComputeNodeTree(instance.primary_node)
8919 for node, disk in edata:
8920 lu.cfg.SetDiskID(disk, node)
8921 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8923 lu.LogWarning("Could not remove block device %s on node %s,"
8924 " continuing anyway: %s", device.iv_name, node, msg)
8927 # if this is a DRBD disk, return its port to the pool
8928 if device.dev_type in constants.LDS_DRBD:
8929 tcp_port = device.logical_id[2]
8930 lu.cfg.AddTcpUdpPort(tcp_port)
8932 if instance.disk_template == constants.DT_FILE:
8933 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8937 tgt = instance.primary_node
8938 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8940 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8941 file_storage_dir, instance.primary_node, result.fail_msg)
8947 def _ComputeDiskSizePerVG(disk_template, disks):
8948 """Compute disk size requirements in the volume group
8951 def _compute(disks, payload):
8952 """Universal algorithm.
8957 vgs[disk[constants.IDISK_VG]] = \
8958 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8962 # Required free disk space as a function of disk and swap space
8964 constants.DT_DISKLESS: {},
8965 constants.DT_PLAIN: _compute(disks, 0),
8966 # 128 MB are added for drbd metadata for each disk
8967 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8968 constants.DT_FILE: {},
8969 constants.DT_SHARED_FILE: {},
8972 if disk_template not in req_size_dict:
8973 raise errors.ProgrammerError("Disk template '%s' size requirement"
8974 " is unknown" % disk_template)
8976 return req_size_dict[disk_template]
8979 def _ComputeDiskSize(disk_template, disks):
8980 """Compute disk size requirements in the volume group
8983 # Required free disk space as a function of disk and swap space
8985 constants.DT_DISKLESS: None,
8986 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8987 # 128 MB are added for drbd metadata for each disk
8989 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8990 constants.DT_FILE: None,
8991 constants.DT_SHARED_FILE: 0,
8992 constants.DT_BLOCK: 0,
8993 constants.DT_RBD: 0,
8996 if disk_template not in req_size_dict:
8997 raise errors.ProgrammerError("Disk template '%s' size requirement"
8998 " is unknown" % disk_template)
9000 return req_size_dict[disk_template]
9003 def _FilterVmNodes(lu, nodenames):
9004 """Filters out non-vm_capable nodes from a list.
9006 @type lu: L{LogicalUnit}
9007 @param lu: the logical unit for which we check
9008 @type nodenames: list
9009 @param nodenames: the list of nodes on which we should check
9011 @return: the list of vm-capable nodes
9014 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9015 return [name for name in nodenames if name not in vm_nodes]
9018 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9019 """Hypervisor parameter validation.
9021 This function abstract the hypervisor parameter validation to be
9022 used in both instance create and instance modify.
9024 @type lu: L{LogicalUnit}
9025 @param lu: the logical unit for which we check
9026 @type nodenames: list
9027 @param nodenames: the list of nodes on which we should check
9028 @type hvname: string
9029 @param hvname: the name of the hypervisor we should use
9030 @type hvparams: dict
9031 @param hvparams: the parameters which we need to check
9032 @raise errors.OpPrereqError: if the parameters are not valid
9035 nodenames = _FilterVmNodes(lu, nodenames)
9037 cluster = lu.cfg.GetClusterInfo()
9038 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9040 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9041 for node in nodenames:
9045 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9048 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9049 """OS parameters validation.
9051 @type lu: L{LogicalUnit}
9052 @param lu: the logical unit for which we check
9053 @type required: boolean
9054 @param required: whether the validation should fail if the OS is not
9056 @type nodenames: list
9057 @param nodenames: the list of nodes on which we should check
9058 @type osname: string
9059 @param osname: the name of the hypervisor we should use
9060 @type osparams: dict
9061 @param osparams: the parameters which we need to check
9062 @raise errors.OpPrereqError: if the parameters are not valid
9065 nodenames = _FilterVmNodes(lu, nodenames)
9066 result = lu.rpc.call_os_validate(nodenames, required, osname,
9067 [constants.OS_VALIDATE_PARAMETERS],
9069 for node, nres in result.items():
9070 # we don't check for offline cases since this should be run only
9071 # against the master node and/or an instance's nodes
9072 nres.Raise("OS Parameters validation failed on node %s" % node)
9073 if not nres.payload:
9074 lu.LogInfo("OS %s not found on node %s, validation skipped",
9078 class LUInstanceCreate(LogicalUnit):
9079 """Create an instance.
9082 HPATH = "instance-add"
9083 HTYPE = constants.HTYPE_INSTANCE
9086 def CheckArguments(self):
9090 # do not require name_check to ease forward/backward compatibility
9092 if self.op.no_install and self.op.start:
9093 self.LogInfo("No-installation mode selected, disabling startup")
9094 self.op.start = False
9095 # validate/normalize the instance name
9096 self.op.instance_name = \
9097 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9099 if self.op.ip_check and not self.op.name_check:
9100 # TODO: make the ip check more flexible and not depend on the name check
9101 raise errors.OpPrereqError("Cannot do IP address check without a name"
9102 " check", errors.ECODE_INVAL)
9104 # check nics' parameter names
9105 for nic in self.op.nics:
9106 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9108 # check disks. parameter names and consistent adopt/no-adopt strategy
9109 has_adopt = has_no_adopt = False
9110 for disk in self.op.disks:
9111 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9112 if constants.IDISK_ADOPT in disk:
9116 if has_adopt and has_no_adopt:
9117 raise errors.OpPrereqError("Either all disks are adopted or none is",
9120 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9121 raise errors.OpPrereqError("Disk adoption is not supported for the"
9122 " '%s' disk template" %
9123 self.op.disk_template,
9125 if self.op.iallocator is not None:
9126 raise errors.OpPrereqError("Disk adoption not allowed with an"
9127 " iallocator script", errors.ECODE_INVAL)
9128 if self.op.mode == constants.INSTANCE_IMPORT:
9129 raise errors.OpPrereqError("Disk adoption not allowed for"
9130 " instance import", errors.ECODE_INVAL)
9132 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9133 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9134 " but no 'adopt' parameter given" %
9135 self.op.disk_template,
9138 self.adopt_disks = has_adopt
9140 # instance name verification
9141 if self.op.name_check:
9142 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9143 self.op.instance_name = self.hostname1.name
9144 # used in CheckPrereq for ip ping check
9145 self.check_ip = self.hostname1.ip
9147 self.check_ip = None
9149 # file storage checks
9150 if (self.op.file_driver and
9151 not self.op.file_driver in constants.FILE_DRIVER):
9152 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9153 self.op.file_driver, errors.ECODE_INVAL)
9155 if self.op.disk_template == constants.DT_FILE:
9156 opcodes.RequireFileStorage()
9157 elif self.op.disk_template == constants.DT_SHARED_FILE:
9158 opcodes.RequireSharedFileStorage()
9160 ### Node/iallocator related checks
9161 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9163 if self.op.pnode is not None:
9164 if self.op.disk_template in constants.DTS_INT_MIRROR:
9165 if self.op.snode is None:
9166 raise errors.OpPrereqError("The networked disk templates need"
9167 " a mirror node", errors.ECODE_INVAL)
9169 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9171 self.op.snode = None
9173 self._cds = _GetClusterDomainSecret()
9175 if self.op.mode == constants.INSTANCE_IMPORT:
9176 # On import force_variant must be True, because if we forced it at
9177 # initial install, our only chance when importing it back is that it
9179 self.op.force_variant = True
9181 if self.op.no_install:
9182 self.LogInfo("No-installation mode has no effect during import")
9184 elif self.op.mode == constants.INSTANCE_CREATE:
9185 if self.op.os_type is None:
9186 raise errors.OpPrereqError("No guest OS specified",
9188 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9189 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9190 " installation" % self.op.os_type,
9192 if self.op.disk_template is None:
9193 raise errors.OpPrereqError("No disk template specified",
9196 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9197 # Check handshake to ensure both clusters have the same domain secret
9198 src_handshake = self.op.source_handshake
9199 if not src_handshake:
9200 raise errors.OpPrereqError("Missing source handshake",
9203 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9206 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9209 # Load and check source CA
9210 self.source_x509_ca_pem = self.op.source_x509_ca
9211 if not self.source_x509_ca_pem:
9212 raise errors.OpPrereqError("Missing source X509 CA",
9216 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9218 except OpenSSL.crypto.Error, err:
9219 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9220 (err, ), errors.ECODE_INVAL)
9222 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9223 if errcode is not None:
9224 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9227 self.source_x509_ca = cert
9229 src_instance_name = self.op.source_instance_name
9230 if not src_instance_name:
9231 raise errors.OpPrereqError("Missing source instance name",
9234 self.source_instance_name = \
9235 netutils.GetHostname(name=src_instance_name).name
9238 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9239 self.op.mode, errors.ECODE_INVAL)
9241 def ExpandNames(self):
9242 """ExpandNames for CreateInstance.
9244 Figure out the right locks for instance creation.
9247 self.needed_locks = {}
9249 instance_name = self.op.instance_name
9250 # this is just a preventive check, but someone might still add this
9251 # instance in the meantime, and creation will fail at lock-add time
9252 if instance_name in self.cfg.GetInstanceList():
9253 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9254 instance_name, errors.ECODE_EXISTS)
9256 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9258 if self.op.iallocator:
9259 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9260 # specifying a group on instance creation and then selecting nodes from
9262 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9263 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9265 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9266 nodelist = [self.op.pnode]
9267 if self.op.snode is not None:
9268 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9269 nodelist.append(self.op.snode)
9270 self.needed_locks[locking.LEVEL_NODE] = nodelist
9271 # Lock resources of instance's primary and secondary nodes (copy to
9272 # prevent accidential modification)
9273 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9275 # in case of import lock the source node too
9276 if self.op.mode == constants.INSTANCE_IMPORT:
9277 src_node = self.op.src_node
9278 src_path = self.op.src_path
9280 if src_path is None:
9281 self.op.src_path = src_path = self.op.instance_name
9283 if src_node is None:
9284 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9285 self.op.src_node = None
9286 if os.path.isabs(src_path):
9287 raise errors.OpPrereqError("Importing an instance from a path"
9288 " requires a source node option",
9291 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9292 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9293 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9294 if not os.path.isabs(src_path):
9295 self.op.src_path = src_path = \
9296 utils.PathJoin(constants.EXPORT_DIR, src_path)
9298 def _RunAllocator(self):
9299 """Run the allocator based on input opcode.
9302 nics = [n.ToDict() for n in self.nics]
9303 ial = IAllocator(self.cfg, self.rpc,
9304 mode=constants.IALLOCATOR_MODE_ALLOC,
9305 name=self.op.instance_name,
9306 disk_template=self.op.disk_template,
9309 vcpus=self.be_full[constants.BE_VCPUS],
9310 memory=self.be_full[constants.BE_MAXMEM],
9313 hypervisor=self.op.hypervisor,
9316 ial.Run(self.op.iallocator)
9319 raise errors.OpPrereqError("Can't compute nodes using"
9320 " iallocator '%s': %s" %
9321 (self.op.iallocator, ial.info),
9323 if len(ial.result) != ial.required_nodes:
9324 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9325 " of nodes (%s), required %s" %
9326 (self.op.iallocator, len(ial.result),
9327 ial.required_nodes), errors.ECODE_FAULT)
9328 self.op.pnode = ial.result[0]
9329 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9330 self.op.instance_name, self.op.iallocator,
9331 utils.CommaJoin(ial.result))
9332 if ial.required_nodes == 2:
9333 self.op.snode = ial.result[1]
9335 def BuildHooksEnv(self):
9338 This runs on master, primary and secondary nodes of the instance.
9342 "ADD_MODE": self.op.mode,
9344 if self.op.mode == constants.INSTANCE_IMPORT:
9345 env["SRC_NODE"] = self.op.src_node
9346 env["SRC_PATH"] = self.op.src_path
9347 env["SRC_IMAGES"] = self.src_images
9349 env.update(_BuildInstanceHookEnv(
9350 name=self.op.instance_name,
9351 primary_node=self.op.pnode,
9352 secondary_nodes=self.secondaries,
9353 status=self.op.start,
9354 os_type=self.op.os_type,
9355 minmem=self.be_full[constants.BE_MINMEM],
9356 maxmem=self.be_full[constants.BE_MAXMEM],
9357 vcpus=self.be_full[constants.BE_VCPUS],
9358 nics=_NICListToTuple(self, self.nics),
9359 disk_template=self.op.disk_template,
9360 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9361 for d in self.disks],
9364 hypervisor_name=self.op.hypervisor,
9370 def BuildHooksNodes(self):
9371 """Build hooks nodes.
9374 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9377 def _ReadExportInfo(self):
9378 """Reads the export information from disk.
9380 It will override the opcode source node and path with the actual
9381 information, if these two were not specified before.
9383 @return: the export information
9386 assert self.op.mode == constants.INSTANCE_IMPORT
9388 src_node = self.op.src_node
9389 src_path = self.op.src_path
9391 if src_node is None:
9392 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9393 exp_list = self.rpc.call_export_list(locked_nodes)
9395 for node in exp_list:
9396 if exp_list[node].fail_msg:
9398 if src_path in exp_list[node].payload:
9400 self.op.src_node = src_node = node
9401 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9405 raise errors.OpPrereqError("No export found for relative path %s" %
9406 src_path, errors.ECODE_INVAL)
9408 _CheckNodeOnline(self, src_node)
9409 result = self.rpc.call_export_info(src_node, src_path)
9410 result.Raise("No export or invalid export found in dir %s" % src_path)
9412 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9413 if not export_info.has_section(constants.INISECT_EXP):
9414 raise errors.ProgrammerError("Corrupted export config",
9415 errors.ECODE_ENVIRON)
9417 ei_version = export_info.get(constants.INISECT_EXP, "version")
9418 if (int(ei_version) != constants.EXPORT_VERSION):
9419 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9420 (ei_version, constants.EXPORT_VERSION),
9421 errors.ECODE_ENVIRON)
9424 def _ReadExportParams(self, einfo):
9425 """Use export parameters as defaults.
9427 In case the opcode doesn't specify (as in override) some instance
9428 parameters, then try to use them from the export information, if
9432 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9434 if self.op.disk_template is None:
9435 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9436 self.op.disk_template = einfo.get(constants.INISECT_INS,
9438 if self.op.disk_template not in constants.DISK_TEMPLATES:
9439 raise errors.OpPrereqError("Disk template specified in configuration"
9440 " file is not one of the allowed values:"
9441 " %s" % " ".join(constants.DISK_TEMPLATES))
9443 raise errors.OpPrereqError("No disk template specified and the export"
9444 " is missing the disk_template information",
9447 if not self.op.disks:
9449 # TODO: import the disk iv_name too
9450 for idx in range(constants.MAX_DISKS):
9451 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9452 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9453 disks.append({constants.IDISK_SIZE: disk_sz})
9454 self.op.disks = disks
9455 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9456 raise errors.OpPrereqError("No disk info specified and the export"
9457 " is missing the disk information",
9460 if not self.op.nics:
9462 for idx in range(constants.MAX_NICS):
9463 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9465 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9466 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9473 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9474 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9476 if (self.op.hypervisor is None and
9477 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9478 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9480 if einfo.has_section(constants.INISECT_HYP):
9481 # use the export parameters but do not override the ones
9482 # specified by the user
9483 for name, value in einfo.items(constants.INISECT_HYP):
9484 if name not in self.op.hvparams:
9485 self.op.hvparams[name] = value
9487 if einfo.has_section(constants.INISECT_BEP):
9488 # use the parameters, without overriding
9489 for name, value in einfo.items(constants.INISECT_BEP):
9490 if name not in self.op.beparams:
9491 self.op.beparams[name] = value
9492 # Compatibility for the old "memory" be param
9493 if name == constants.BE_MEMORY:
9494 if constants.BE_MAXMEM not in self.op.beparams:
9495 self.op.beparams[constants.BE_MAXMEM] = value
9496 if constants.BE_MINMEM not in self.op.beparams:
9497 self.op.beparams[constants.BE_MINMEM] = value
9499 # try to read the parameters old style, from the main section
9500 for name in constants.BES_PARAMETERS:
9501 if (name not in self.op.beparams and
9502 einfo.has_option(constants.INISECT_INS, name)):
9503 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9505 if einfo.has_section(constants.INISECT_OSP):
9506 # use the parameters, without overriding
9507 for name, value in einfo.items(constants.INISECT_OSP):
9508 if name not in self.op.osparams:
9509 self.op.osparams[name] = value
9511 def _RevertToDefaults(self, cluster):
9512 """Revert the instance parameters to the default values.
9516 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9517 for name in self.op.hvparams.keys():
9518 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9519 del self.op.hvparams[name]
9521 be_defs = cluster.SimpleFillBE({})
9522 for name in self.op.beparams.keys():
9523 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9524 del self.op.beparams[name]
9526 nic_defs = cluster.SimpleFillNIC({})
9527 for nic in self.op.nics:
9528 for name in constants.NICS_PARAMETERS:
9529 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9532 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9533 for name in self.op.osparams.keys():
9534 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9535 del self.op.osparams[name]
9537 def _CalculateFileStorageDir(self):
9538 """Calculate final instance file storage dir.
9541 # file storage dir calculation/check
9542 self.instance_file_storage_dir = None
9543 if self.op.disk_template in constants.DTS_FILEBASED:
9544 # build the full file storage dir path
9547 if self.op.disk_template == constants.DT_SHARED_FILE:
9548 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9550 get_fsd_fn = self.cfg.GetFileStorageDir
9552 cfg_storagedir = get_fsd_fn()
9553 if not cfg_storagedir:
9554 raise errors.OpPrereqError("Cluster file storage dir not defined")
9555 joinargs.append(cfg_storagedir)
9557 if self.op.file_storage_dir is not None:
9558 joinargs.append(self.op.file_storage_dir)
9560 joinargs.append(self.op.instance_name)
9562 # pylint: disable=W0142
9563 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9565 def CheckPrereq(self): # pylint: disable=R0914
9566 """Check prerequisites.
9569 self._CalculateFileStorageDir()
9571 if self.op.mode == constants.INSTANCE_IMPORT:
9572 export_info = self._ReadExportInfo()
9573 self._ReadExportParams(export_info)
9575 if (not self.cfg.GetVGName() and
9576 self.op.disk_template not in constants.DTS_NOT_LVM):
9577 raise errors.OpPrereqError("Cluster does not support lvm-based"
9578 " instances", errors.ECODE_STATE)
9580 if (self.op.hypervisor is None or
9581 self.op.hypervisor == constants.VALUE_AUTO):
9582 self.op.hypervisor = self.cfg.GetHypervisorType()
9584 cluster = self.cfg.GetClusterInfo()
9585 enabled_hvs = cluster.enabled_hypervisors
9586 if self.op.hypervisor not in enabled_hvs:
9587 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9588 " cluster (%s)" % (self.op.hypervisor,
9589 ",".join(enabled_hvs)),
9592 # Check tag validity
9593 for tag in self.op.tags:
9594 objects.TaggableObject.ValidateTag(tag)
9596 # check hypervisor parameter syntax (locally)
9597 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9598 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9600 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9601 hv_type.CheckParameterSyntax(filled_hvp)
9602 self.hv_full = filled_hvp
9603 # check that we don't specify global parameters on an instance
9604 _CheckGlobalHvParams(self.op.hvparams)
9606 # fill and remember the beparams dict
9607 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9608 for param, value in self.op.beparams.iteritems():
9609 if value == constants.VALUE_AUTO:
9610 self.op.beparams[param] = default_beparams[param]
9611 objects.UpgradeBeParams(self.op.beparams)
9612 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9613 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9615 # build os parameters
9616 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9618 # now that hvp/bep are in final format, let's reset to defaults,
9620 if self.op.identify_defaults:
9621 self._RevertToDefaults(cluster)
9625 for idx, nic in enumerate(self.op.nics):
9626 nic_mode_req = nic.get(constants.INIC_MODE, None)
9627 nic_mode = nic_mode_req
9628 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9629 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9631 # in routed mode, for the first nic, the default ip is 'auto'
9632 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9633 default_ip_mode = constants.VALUE_AUTO
9635 default_ip_mode = constants.VALUE_NONE
9637 # ip validity checks
9638 ip = nic.get(constants.INIC_IP, default_ip_mode)
9639 if ip is None or ip.lower() == constants.VALUE_NONE:
9641 elif ip.lower() == constants.VALUE_AUTO:
9642 if not self.op.name_check:
9643 raise errors.OpPrereqError("IP address set to auto but name checks"
9644 " have been skipped",
9646 nic_ip = self.hostname1.ip
9648 if not netutils.IPAddress.IsValid(ip):
9649 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9653 # TODO: check the ip address for uniqueness
9654 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9655 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9658 # MAC address verification
9659 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9660 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9661 mac = utils.NormalizeAndValidateMac(mac)
9664 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9665 except errors.ReservationError:
9666 raise errors.OpPrereqError("MAC address %s already in use"
9667 " in cluster" % mac,
9668 errors.ECODE_NOTUNIQUE)
9670 # Build nic parameters
9671 link = nic.get(constants.INIC_LINK, None)
9672 if link == constants.VALUE_AUTO:
9673 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9676 nicparams[constants.NIC_MODE] = nic_mode
9678 nicparams[constants.NIC_LINK] = link
9680 check_params = cluster.SimpleFillNIC(nicparams)
9681 objects.NIC.CheckParameterSyntax(check_params)
9682 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9684 # disk checks/pre-build
9685 default_vg = self.cfg.GetVGName()
9687 for disk in self.op.disks:
9688 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9689 if mode not in constants.DISK_ACCESS_SET:
9690 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9691 mode, errors.ECODE_INVAL)
9692 size = disk.get(constants.IDISK_SIZE, None)
9694 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9697 except (TypeError, ValueError):
9698 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9701 data_vg = disk.get(constants.IDISK_VG, default_vg)
9703 constants.IDISK_SIZE: size,
9704 constants.IDISK_MODE: mode,
9705 constants.IDISK_VG: data_vg,
9707 if constants.IDISK_METAVG in disk:
9708 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9709 if constants.IDISK_ADOPT in disk:
9710 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9711 self.disks.append(new_disk)
9713 if self.op.mode == constants.INSTANCE_IMPORT:
9715 for idx in range(len(self.disks)):
9716 option = "disk%d_dump" % idx
9717 if export_info.has_option(constants.INISECT_INS, option):
9718 # FIXME: are the old os-es, disk sizes, etc. useful?
9719 export_name = export_info.get(constants.INISECT_INS, option)
9720 image = utils.PathJoin(self.op.src_path, export_name)
9721 disk_images.append(image)
9723 disk_images.append(False)
9725 self.src_images = disk_images
9727 old_name = export_info.get(constants.INISECT_INS, "name")
9728 if self.op.instance_name == old_name:
9729 for idx, nic in enumerate(self.nics):
9730 if nic.mac == constants.VALUE_AUTO:
9731 nic_mac_ini = "nic%d_mac" % idx
9732 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9734 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9736 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9737 if self.op.ip_check:
9738 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9739 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9740 (self.check_ip, self.op.instance_name),
9741 errors.ECODE_NOTUNIQUE)
9743 #### mac address generation
9744 # By generating here the mac address both the allocator and the hooks get
9745 # the real final mac address rather than the 'auto' or 'generate' value.
9746 # There is a race condition between the generation and the instance object
9747 # creation, which means that we know the mac is valid now, but we're not
9748 # sure it will be when we actually add the instance. If things go bad
9749 # adding the instance will abort because of a duplicate mac, and the
9750 # creation job will fail.
9751 for nic in self.nics:
9752 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9753 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9757 if self.op.iallocator is not None:
9758 self._RunAllocator()
9760 # Release all unneeded node locks
9761 _ReleaseLocks(self, locking.LEVEL_NODE,
9762 keep=filter(None, [self.op.pnode, self.op.snode,
9764 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9765 keep=filter(None, [self.op.pnode, self.op.snode,
9768 #### node related checks
9770 # check primary node
9771 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9772 assert self.pnode is not None, \
9773 "Cannot retrieve locked node %s" % self.op.pnode
9775 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9776 pnode.name, errors.ECODE_STATE)
9778 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9779 pnode.name, errors.ECODE_STATE)
9780 if not pnode.vm_capable:
9781 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9782 " '%s'" % pnode.name, errors.ECODE_STATE)
9784 self.secondaries = []
9786 # mirror node verification
9787 if self.op.disk_template in constants.DTS_INT_MIRROR:
9788 if self.op.snode == pnode.name:
9789 raise errors.OpPrereqError("The secondary node cannot be the"
9790 " primary node", errors.ECODE_INVAL)
9791 _CheckNodeOnline(self, self.op.snode)
9792 _CheckNodeNotDrained(self, self.op.snode)
9793 _CheckNodeVmCapable(self, self.op.snode)
9794 self.secondaries.append(self.op.snode)
9796 snode = self.cfg.GetNodeInfo(self.op.snode)
9797 if pnode.group != snode.group:
9798 self.LogWarning("The primary and secondary nodes are in two"
9799 " different node groups; the disk parameters"
9800 " from the first disk's node group will be"
9803 nodenames = [pnode.name] + self.secondaries
9805 # Verify instance specs
9807 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9808 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9809 constants.ISPEC_DISK_COUNT: len(self.disks),
9810 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9811 constants.ISPEC_NIC_COUNT: len(self.nics),
9814 group_info = self.cfg.GetNodeGroup(pnode.group)
9815 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9816 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9817 if not self.op.ignore_ipolicy and res:
9818 raise errors.OpPrereqError(("Instance allocation to group %s violates"
9819 " policy: %s") % (pnode.group,
9820 utils.CommaJoin(res)),
9823 # disk parameters (not customizable at instance or node level)
9824 # just use the primary node parameters, ignoring the secondary.
9825 self.diskparams = group_info.diskparams
9827 if not self.adopt_disks:
9828 if self.op.disk_template == constants.DT_RBD:
9829 # _CheckRADOSFreeSpace() is just a placeholder.
9830 # Any function that checks prerequisites can be placed here.
9831 # Check if there is enough space on the RADOS cluster.
9832 _CheckRADOSFreeSpace()
9834 # Check lv size requirements, if not adopting
9835 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9836 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9838 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9839 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9840 disk[constants.IDISK_ADOPT])
9841 for disk in self.disks])
9842 if len(all_lvs) != len(self.disks):
9843 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9845 for lv_name in all_lvs:
9847 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9848 # to ReserveLV uses the same syntax
9849 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9850 except errors.ReservationError:
9851 raise errors.OpPrereqError("LV named %s used by another instance" %
9852 lv_name, errors.ECODE_NOTUNIQUE)
9854 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9855 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9857 node_lvs = self.rpc.call_lv_list([pnode.name],
9858 vg_names.payload.keys())[pnode.name]
9859 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9860 node_lvs = node_lvs.payload
9862 delta = all_lvs.difference(node_lvs.keys())
9864 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9865 utils.CommaJoin(delta),
9867 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9869 raise errors.OpPrereqError("Online logical volumes found, cannot"
9870 " adopt: %s" % utils.CommaJoin(online_lvs),
9872 # update the size of disk based on what is found
9873 for dsk in self.disks:
9874 dsk[constants.IDISK_SIZE] = \
9875 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9876 dsk[constants.IDISK_ADOPT])][0]))
9878 elif self.op.disk_template == constants.DT_BLOCK:
9879 # Normalize and de-duplicate device paths
9880 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9881 for disk in self.disks])
9882 if len(all_disks) != len(self.disks):
9883 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9885 baddisks = [d for d in all_disks
9886 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9888 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9889 " cannot be adopted" %
9890 (", ".join(baddisks),
9891 constants.ADOPTABLE_BLOCKDEV_ROOT),
9894 node_disks = self.rpc.call_bdev_sizes([pnode.name],
9895 list(all_disks))[pnode.name]
9896 node_disks.Raise("Cannot get block device information from node %s" %
9898 node_disks = node_disks.payload
9899 delta = all_disks.difference(node_disks.keys())
9901 raise errors.OpPrereqError("Missing block device(s): %s" %
9902 utils.CommaJoin(delta),
9904 for dsk in self.disks:
9905 dsk[constants.IDISK_SIZE] = \
9906 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9908 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9910 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9911 # check OS parameters (remotely)
9912 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9914 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9916 # memory check on primary node
9917 #TODO(dynmem): use MINMEM for checking
9919 _CheckNodeFreeMemory(self, self.pnode.name,
9920 "creating instance %s" % self.op.instance_name,
9921 self.be_full[constants.BE_MAXMEM],
9924 self.dry_run_result = list(nodenames)
9926 def Exec(self, feedback_fn):
9927 """Create and add the instance to the cluster.
9930 instance = self.op.instance_name
9931 pnode_name = self.pnode.name
9933 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9934 self.owned_locks(locking.LEVEL_NODE)), \
9935 "Node locks differ from node resource locks"
9937 ht_kind = self.op.hypervisor
9938 if ht_kind in constants.HTS_REQ_PORT:
9939 network_port = self.cfg.AllocatePort()
9943 disks = _GenerateDiskTemplate(self,
9944 self.op.disk_template,
9945 instance, pnode_name,
9948 self.instance_file_storage_dir,
9949 self.op.file_driver,
9954 iobj = objects.Instance(name=instance, os=self.op.os_type,
9955 primary_node=pnode_name,
9956 nics=self.nics, disks=disks,
9957 disk_template=self.op.disk_template,
9958 admin_state=constants.ADMINST_DOWN,
9959 network_port=network_port,
9960 beparams=self.op.beparams,
9961 hvparams=self.op.hvparams,
9962 hypervisor=self.op.hypervisor,
9963 osparams=self.op.osparams,
9967 for tag in self.op.tags:
9970 if self.adopt_disks:
9971 if self.op.disk_template == constants.DT_PLAIN:
9972 # rename LVs to the newly-generated names; we need to construct
9973 # 'fake' LV disks with the old data, plus the new unique_id
9974 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9976 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9977 rename_to.append(t_dsk.logical_id)
9978 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9979 self.cfg.SetDiskID(t_dsk, pnode_name)
9980 result = self.rpc.call_blockdev_rename(pnode_name,
9981 zip(tmp_disks, rename_to))
9982 result.Raise("Failed to rename adoped LVs")
9984 feedback_fn("* creating instance disks...")
9986 _CreateDisks(self, iobj)
9987 except errors.OpExecError:
9988 self.LogWarning("Device creation failed, reverting...")
9990 _RemoveDisks(self, iobj)
9992 self.cfg.ReleaseDRBDMinors(instance)
9995 feedback_fn("adding instance %s to cluster config" % instance)
9997 self.cfg.AddInstance(iobj, self.proc.GetECId())
9999 # Declare that we don't want to remove the instance lock anymore, as we've
10000 # added the instance to the config
10001 del self.remove_locks[locking.LEVEL_INSTANCE]
10003 if self.op.mode == constants.INSTANCE_IMPORT:
10004 # Release unused nodes
10005 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10007 # Release all nodes
10008 _ReleaseLocks(self, locking.LEVEL_NODE)
10011 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10012 feedback_fn("* wiping instance disks...")
10014 _WipeDisks(self, iobj)
10015 except errors.OpExecError, err:
10016 logging.exception("Wiping disks failed")
10017 self.LogWarning("Wiping instance disks failed (%s)", err)
10021 # Something is already wrong with the disks, don't do anything else
10023 elif self.op.wait_for_sync:
10024 disk_abort = not _WaitForSync(self, iobj)
10025 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10026 # make sure the disks are not degraded (still sync-ing is ok)
10027 feedback_fn("* checking mirrors status")
10028 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10033 _RemoveDisks(self, iobj)
10034 self.cfg.RemoveInstance(iobj.name)
10035 # Make sure the instance lock gets removed
10036 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10037 raise errors.OpExecError("There are some degraded disks for"
10040 # Release all node resource locks
10041 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10043 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10044 if self.op.mode == constants.INSTANCE_CREATE:
10045 if not self.op.no_install:
10046 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10047 not self.op.wait_for_sync)
10049 feedback_fn("* pausing disk sync to install instance OS")
10050 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10052 for idx, success in enumerate(result.payload):
10054 logging.warn("pause-sync of instance %s for disk %d failed",
10057 feedback_fn("* running the instance OS create scripts...")
10058 # FIXME: pass debug option from opcode to backend
10060 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10061 self.op.debug_level)
10063 feedback_fn("* resuming disk sync")
10064 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10066 for idx, success in enumerate(result.payload):
10068 logging.warn("resume-sync of instance %s for disk %d failed",
10071 os_add_result.Raise("Could not add os for instance %s"
10072 " on node %s" % (instance, pnode_name))
10074 elif self.op.mode == constants.INSTANCE_IMPORT:
10075 feedback_fn("* running the instance OS import scripts...")
10079 for idx, image in enumerate(self.src_images):
10083 # FIXME: pass debug option from opcode to backend
10084 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10085 constants.IEIO_FILE, (image, ),
10086 constants.IEIO_SCRIPT,
10087 (iobj.disks[idx], idx),
10089 transfers.append(dt)
10092 masterd.instance.TransferInstanceData(self, feedback_fn,
10093 self.op.src_node, pnode_name,
10094 self.pnode.secondary_ip,
10096 if not compat.all(import_result):
10097 self.LogWarning("Some disks for instance %s on node %s were not"
10098 " imported successfully" % (instance, pnode_name))
10100 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10101 feedback_fn("* preparing remote import...")
10102 # The source cluster will stop the instance before attempting to make a
10103 # connection. In some cases stopping an instance can take a long time,
10104 # hence the shutdown timeout is added to the connection timeout.
10105 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10106 self.op.source_shutdown_timeout)
10107 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10109 assert iobj.primary_node == self.pnode.name
10111 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10112 self.source_x509_ca,
10113 self._cds, timeouts)
10114 if not compat.all(disk_results):
10115 # TODO: Should the instance still be started, even if some disks
10116 # failed to import (valid for local imports, too)?
10117 self.LogWarning("Some disks for instance %s on node %s were not"
10118 " imported successfully" % (instance, pnode_name))
10120 # Run rename script on newly imported instance
10121 assert iobj.name == instance
10122 feedback_fn("Running rename script for %s" % instance)
10123 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10124 self.source_instance_name,
10125 self.op.debug_level)
10126 if result.fail_msg:
10127 self.LogWarning("Failed to run rename script for %s on node"
10128 " %s: %s" % (instance, pnode_name, result.fail_msg))
10131 # also checked in the prereq part
10132 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10135 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10138 iobj.admin_state = constants.ADMINST_UP
10139 self.cfg.Update(iobj, feedback_fn)
10140 logging.info("Starting instance %s on node %s", instance, pnode_name)
10141 feedback_fn("* starting instance...")
10142 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10144 result.Raise("Could not start instance")
10146 return list(iobj.all_nodes)
10149 def _CheckRADOSFreeSpace():
10150 """Compute disk size requirements inside the RADOS cluster.
10153 # For the RADOS cluster we assume there is always enough space.
10157 class LUInstanceConsole(NoHooksLU):
10158 """Connect to an instance's console.
10160 This is somewhat special in that it returns the command line that
10161 you need to run on the master node in order to connect to the
10167 def ExpandNames(self):
10168 self.share_locks = _ShareAll()
10169 self._ExpandAndLockInstance()
10171 def CheckPrereq(self):
10172 """Check prerequisites.
10174 This checks that the instance is in the cluster.
10177 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10178 assert self.instance is not None, \
10179 "Cannot retrieve locked instance %s" % self.op.instance_name
10180 _CheckNodeOnline(self, self.instance.primary_node)
10182 def Exec(self, feedback_fn):
10183 """Connect to the console of an instance
10186 instance = self.instance
10187 node = instance.primary_node
10189 node_insts = self.rpc.call_instance_list([node],
10190 [instance.hypervisor])[node]
10191 node_insts.Raise("Can't get node information from %s" % node)
10193 if instance.name not in node_insts.payload:
10194 if instance.admin_state == constants.ADMINST_UP:
10195 state = constants.INSTST_ERRORDOWN
10196 elif instance.admin_state == constants.ADMINST_DOWN:
10197 state = constants.INSTST_ADMINDOWN
10199 state = constants.INSTST_ADMINOFFLINE
10200 raise errors.OpExecError("Instance %s is not running (state %s)" %
10201 (instance.name, state))
10203 logging.debug("Connecting to console of %s on %s", instance.name, node)
10205 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10208 def _GetInstanceConsole(cluster, instance):
10209 """Returns console information for an instance.
10211 @type cluster: L{objects.Cluster}
10212 @type instance: L{objects.Instance}
10216 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10217 # beparams and hvparams are passed separately, to avoid editing the
10218 # instance and then saving the defaults in the instance itself.
10219 hvparams = cluster.FillHV(instance)
10220 beparams = cluster.FillBE(instance)
10221 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10223 assert console.instance == instance.name
10224 assert console.Validate()
10226 return console.ToDict()
10229 class LUInstanceReplaceDisks(LogicalUnit):
10230 """Replace the disks of an instance.
10233 HPATH = "mirrors-replace"
10234 HTYPE = constants.HTYPE_INSTANCE
10237 def CheckArguments(self):
10238 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10239 self.op.iallocator)
10241 def ExpandNames(self):
10242 self._ExpandAndLockInstance()
10244 assert locking.LEVEL_NODE not in self.needed_locks
10245 assert locking.LEVEL_NODE_RES not in self.needed_locks
10246 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10248 assert self.op.iallocator is None or self.op.remote_node is None, \
10249 "Conflicting options"
10251 if self.op.remote_node is not None:
10252 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10254 # Warning: do not remove the locking of the new secondary here
10255 # unless DRBD8.AddChildren is changed to work in parallel;
10256 # currently it doesn't since parallel invocations of
10257 # FindUnusedMinor will conflict
10258 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10259 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10261 self.needed_locks[locking.LEVEL_NODE] = []
10262 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10264 if self.op.iallocator is not None:
10265 # iallocator will select a new node in the same group
10266 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10268 self.needed_locks[locking.LEVEL_NODE_RES] = []
10270 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10271 self.op.iallocator, self.op.remote_node,
10272 self.op.disks, False, self.op.early_release,
10273 self.op.ignore_ipolicy)
10275 self.tasklets = [self.replacer]
10277 def DeclareLocks(self, level):
10278 if level == locking.LEVEL_NODEGROUP:
10279 assert self.op.remote_node is None
10280 assert self.op.iallocator is not None
10281 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10283 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10284 # Lock all groups used by instance optimistically; this requires going
10285 # via the node before it's locked, requiring verification later on
10286 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10287 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10289 elif level == locking.LEVEL_NODE:
10290 if self.op.iallocator is not None:
10291 assert self.op.remote_node is None
10292 assert not self.needed_locks[locking.LEVEL_NODE]
10294 # Lock member nodes of all locked groups
10295 self.needed_locks[locking.LEVEL_NODE] = [node_name
10296 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10297 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10299 self._LockInstancesNodes()
10300 elif level == locking.LEVEL_NODE_RES:
10302 self.needed_locks[locking.LEVEL_NODE_RES] = \
10303 self.needed_locks[locking.LEVEL_NODE]
10305 def BuildHooksEnv(self):
10306 """Build hooks env.
10308 This runs on the master, the primary and all the secondaries.
10311 instance = self.replacer.instance
10313 "MODE": self.op.mode,
10314 "NEW_SECONDARY": self.op.remote_node,
10315 "OLD_SECONDARY": instance.secondary_nodes[0],
10317 env.update(_BuildInstanceHookEnvByObject(self, instance))
10320 def BuildHooksNodes(self):
10321 """Build hooks nodes.
10324 instance = self.replacer.instance
10326 self.cfg.GetMasterNode(),
10327 instance.primary_node,
10329 if self.op.remote_node is not None:
10330 nl.append(self.op.remote_node)
10333 def CheckPrereq(self):
10334 """Check prerequisites.
10337 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10338 self.op.iallocator is None)
10340 # Verify if node group locks are still correct
10341 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10343 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10345 return LogicalUnit.CheckPrereq(self)
10348 class TLReplaceDisks(Tasklet):
10349 """Replaces disks for an instance.
10351 Note: Locking is not within the scope of this class.
10354 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10355 disks, delay_iallocator, early_release, ignore_ipolicy):
10356 """Initializes this class.
10359 Tasklet.__init__(self, lu)
10362 self.instance_name = instance_name
10364 self.iallocator_name = iallocator_name
10365 self.remote_node = remote_node
10367 self.delay_iallocator = delay_iallocator
10368 self.early_release = early_release
10369 self.ignore_ipolicy = ignore_ipolicy
10372 self.instance = None
10373 self.new_node = None
10374 self.target_node = None
10375 self.other_node = None
10376 self.remote_node_info = None
10377 self.node_secondary_ip = None
10380 def CheckArguments(mode, remote_node, iallocator):
10381 """Helper function for users of this class.
10384 # check for valid parameter combination
10385 if mode == constants.REPLACE_DISK_CHG:
10386 if remote_node is None and iallocator is None:
10387 raise errors.OpPrereqError("When changing the secondary either an"
10388 " iallocator script must be used or the"
10389 " new node given", errors.ECODE_INVAL)
10391 if remote_node is not None and iallocator is not None:
10392 raise errors.OpPrereqError("Give either the iallocator or the new"
10393 " secondary, not both", errors.ECODE_INVAL)
10395 elif remote_node is not None or iallocator is not None:
10396 # Not replacing the secondary
10397 raise errors.OpPrereqError("The iallocator and new node options can"
10398 " only be used when changing the"
10399 " secondary node", errors.ECODE_INVAL)
10402 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10403 """Compute a new secondary node using an IAllocator.
10406 ial = IAllocator(lu.cfg, lu.rpc,
10407 mode=constants.IALLOCATOR_MODE_RELOC,
10408 name=instance_name,
10409 relocate_from=list(relocate_from))
10411 ial.Run(iallocator_name)
10413 if not ial.success:
10414 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10415 " %s" % (iallocator_name, ial.info),
10416 errors.ECODE_NORES)
10418 if len(ial.result) != ial.required_nodes:
10419 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10420 " of nodes (%s), required %s" %
10422 len(ial.result), ial.required_nodes),
10423 errors.ECODE_FAULT)
10425 remote_node_name = ial.result[0]
10427 lu.LogInfo("Selected new secondary for instance '%s': %s",
10428 instance_name, remote_node_name)
10430 return remote_node_name
10432 def _FindFaultyDisks(self, node_name):
10433 """Wrapper for L{_FindFaultyInstanceDisks}.
10436 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10439 def _CheckDisksActivated(self, instance):
10440 """Checks if the instance disks are activated.
10442 @param instance: The instance to check disks
10443 @return: True if they are activated, False otherwise
10446 nodes = instance.all_nodes
10448 for idx, dev in enumerate(instance.disks):
10450 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10451 self.cfg.SetDiskID(dev, node)
10453 result = self.rpc.call_blockdev_find(node, dev)
10457 elif result.fail_msg or not result.payload:
10462 def CheckPrereq(self):
10463 """Check prerequisites.
10465 This checks that the instance is in the cluster.
10468 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10469 assert instance is not None, \
10470 "Cannot retrieve locked instance %s" % self.instance_name
10472 if instance.disk_template != constants.DT_DRBD8:
10473 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10474 " instances", errors.ECODE_INVAL)
10476 if len(instance.secondary_nodes) != 1:
10477 raise errors.OpPrereqError("The instance has a strange layout,"
10478 " expected one secondary but found %d" %
10479 len(instance.secondary_nodes),
10480 errors.ECODE_FAULT)
10482 if not self.delay_iallocator:
10483 self._CheckPrereq2()
10485 def _CheckPrereq2(self):
10486 """Check prerequisites, second part.
10488 This function should always be part of CheckPrereq. It was separated and is
10489 now called from Exec because during node evacuation iallocator was only
10490 called with an unmodified cluster model, not taking planned changes into
10494 instance = self.instance
10495 secondary_node = instance.secondary_nodes[0]
10497 if self.iallocator_name is None:
10498 remote_node = self.remote_node
10500 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10501 instance.name, instance.secondary_nodes)
10503 if remote_node is None:
10504 self.remote_node_info = None
10506 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10507 "Remote node '%s' is not locked" % remote_node
10509 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10510 assert self.remote_node_info is not None, \
10511 "Cannot retrieve locked node %s" % remote_node
10513 if remote_node == self.instance.primary_node:
10514 raise errors.OpPrereqError("The specified node is the primary node of"
10515 " the instance", errors.ECODE_INVAL)
10517 if remote_node == secondary_node:
10518 raise errors.OpPrereqError("The specified node is already the"
10519 " secondary node of the instance",
10520 errors.ECODE_INVAL)
10522 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10523 constants.REPLACE_DISK_CHG):
10524 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10525 errors.ECODE_INVAL)
10527 if self.mode == constants.REPLACE_DISK_AUTO:
10528 if not self._CheckDisksActivated(instance):
10529 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10530 " first" % self.instance_name,
10531 errors.ECODE_STATE)
10532 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10533 faulty_secondary = self._FindFaultyDisks(secondary_node)
10535 if faulty_primary and faulty_secondary:
10536 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10537 " one node and can not be repaired"
10538 " automatically" % self.instance_name,
10539 errors.ECODE_STATE)
10542 self.disks = faulty_primary
10543 self.target_node = instance.primary_node
10544 self.other_node = secondary_node
10545 check_nodes = [self.target_node, self.other_node]
10546 elif faulty_secondary:
10547 self.disks = faulty_secondary
10548 self.target_node = secondary_node
10549 self.other_node = instance.primary_node
10550 check_nodes = [self.target_node, self.other_node]
10556 # Non-automatic modes
10557 if self.mode == constants.REPLACE_DISK_PRI:
10558 self.target_node = instance.primary_node
10559 self.other_node = secondary_node
10560 check_nodes = [self.target_node, self.other_node]
10562 elif self.mode == constants.REPLACE_DISK_SEC:
10563 self.target_node = secondary_node
10564 self.other_node = instance.primary_node
10565 check_nodes = [self.target_node, self.other_node]
10567 elif self.mode == constants.REPLACE_DISK_CHG:
10568 self.new_node = remote_node
10569 self.other_node = instance.primary_node
10570 self.target_node = secondary_node
10571 check_nodes = [self.new_node, self.other_node]
10573 _CheckNodeNotDrained(self.lu, remote_node)
10574 _CheckNodeVmCapable(self.lu, remote_node)
10576 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10577 assert old_node_info is not None
10578 if old_node_info.offline and not self.early_release:
10579 # doesn't make sense to delay the release
10580 self.early_release = True
10581 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10582 " early-release mode", secondary_node)
10585 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10588 # If not specified all disks should be replaced
10590 self.disks = range(len(self.instance.disks))
10592 # TODO: This is ugly, but right now we can't distinguish between internal
10593 # submitted opcode and external one. We should fix that.
10594 if self.remote_node_info:
10595 # We change the node, lets verify it still meets instance policy
10596 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10597 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10599 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10600 ignore=self.ignore_ipolicy)
10602 # TODO: compute disk parameters
10603 primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10604 secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10605 if primary_node_info.group != secondary_node_info.group:
10606 self.lu.LogInfo("The instance primary and secondary nodes are in two"
10607 " different node groups; the disk parameters of the"
10608 " primary node's group will be applied.")
10610 self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10612 for node in check_nodes:
10613 _CheckNodeOnline(self.lu, node)
10615 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10618 if node_name is not None)
10620 # Release unneeded node and node resource locks
10621 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10622 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10624 # Release any owned node group
10625 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10626 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10628 # Check whether disks are valid
10629 for disk_idx in self.disks:
10630 instance.FindDisk(disk_idx)
10632 # Get secondary node IP addresses
10633 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10634 in self.cfg.GetMultiNodeInfo(touched_nodes))
10636 def Exec(self, feedback_fn):
10637 """Execute disk replacement.
10639 This dispatches the disk replacement to the appropriate handler.
10642 if self.delay_iallocator:
10643 self._CheckPrereq2()
10646 # Verify owned locks before starting operation
10647 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10648 assert set(owned_nodes) == set(self.node_secondary_ip), \
10649 ("Incorrect node locks, owning %s, expected %s" %
10650 (owned_nodes, self.node_secondary_ip.keys()))
10651 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10652 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10654 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10655 assert list(owned_instances) == [self.instance_name], \
10656 "Instance '%s' not locked" % self.instance_name
10658 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10659 "Should not own any node group lock at this point"
10662 feedback_fn("No disks need replacement")
10665 feedback_fn("Replacing disk(s) %s for %s" %
10666 (utils.CommaJoin(self.disks), self.instance.name))
10668 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10670 # Activate the instance disks if we're replacing them on a down instance
10672 _StartInstanceDisks(self.lu, self.instance, True)
10675 # Should we replace the secondary node?
10676 if self.new_node is not None:
10677 fn = self._ExecDrbd8Secondary
10679 fn = self._ExecDrbd8DiskOnly
10681 result = fn(feedback_fn)
10683 # Deactivate the instance disks if we're replacing them on a
10686 _SafeShutdownInstanceDisks(self.lu, self.instance)
10688 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10691 # Verify owned locks
10692 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10693 nodes = frozenset(self.node_secondary_ip)
10694 assert ((self.early_release and not owned_nodes) or
10695 (not self.early_release and not (set(owned_nodes) - nodes))), \
10696 ("Not owning the correct locks, early_release=%s, owned=%r,"
10697 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10701 def _CheckVolumeGroup(self, nodes):
10702 self.lu.LogInfo("Checking volume groups")
10704 vgname = self.cfg.GetVGName()
10706 # Make sure volume group exists on all involved nodes
10707 results = self.rpc.call_vg_list(nodes)
10709 raise errors.OpExecError("Can't list volume groups on the nodes")
10712 res = results[node]
10713 res.Raise("Error checking node %s" % node)
10714 if vgname not in res.payload:
10715 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10718 def _CheckDisksExistence(self, nodes):
10719 # Check disk existence
10720 for idx, dev in enumerate(self.instance.disks):
10721 if idx not in self.disks:
10725 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10726 self.cfg.SetDiskID(dev, node)
10728 result = self.rpc.call_blockdev_find(node, dev)
10730 msg = result.fail_msg
10731 if msg or not result.payload:
10733 msg = "disk not found"
10734 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10737 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10738 for idx, dev in enumerate(self.instance.disks):
10739 if idx not in self.disks:
10742 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10745 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10747 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10748 " replace disks for instance %s" %
10749 (node_name, self.instance.name))
10751 def _CreateNewStorage(self, node_name):
10752 """Create new storage on the primary or secondary node.
10754 This is only used for same-node replaces, not for changing the
10755 secondary node, hence we don't want to modify the existing disk.
10760 for idx, dev in enumerate(self.instance.disks):
10761 if idx not in self.disks:
10764 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10766 self.cfg.SetDiskID(dev, node_name)
10768 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10769 names = _GenerateUniqueNames(self.lu, lv_names)
10771 _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10773 vg_data = dev.children[0].logical_id[0]
10774 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10775 logical_id=(vg_data, names[0]), params=data_p)
10776 vg_meta = dev.children[1].logical_id[0]
10777 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10778 logical_id=(vg_meta, names[1]), params=meta_p)
10780 new_lvs = [lv_data, lv_meta]
10781 old_lvs = [child.Copy() for child in dev.children]
10782 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10784 # we pass force_create=True to force the LVM creation
10785 for new_lv in new_lvs:
10786 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10787 _GetInstanceInfoText(self.instance), False)
10791 def _CheckDevices(self, node_name, iv_names):
10792 for name, (dev, _, _) in iv_names.iteritems():
10793 self.cfg.SetDiskID(dev, node_name)
10795 result = self.rpc.call_blockdev_find(node_name, dev)
10797 msg = result.fail_msg
10798 if msg or not result.payload:
10800 msg = "disk not found"
10801 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10804 if result.payload.is_degraded:
10805 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10807 def _RemoveOldStorage(self, node_name, iv_names):
10808 for name, (_, old_lvs, _) in iv_names.iteritems():
10809 self.lu.LogInfo("Remove logical volumes for %s" % name)
10812 self.cfg.SetDiskID(lv, node_name)
10814 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10816 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10817 hint="remove unused LVs manually")
10819 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10820 """Replace a disk on the primary or secondary for DRBD 8.
10822 The algorithm for replace is quite complicated:
10824 1. for each disk to be replaced:
10826 1. create new LVs on the target node with unique names
10827 1. detach old LVs from the drbd device
10828 1. rename old LVs to name_replaced.<time_t>
10829 1. rename new LVs to old LVs
10830 1. attach the new LVs (with the old names now) to the drbd device
10832 1. wait for sync across all devices
10834 1. for each modified disk:
10836 1. remove old LVs (which have the name name_replaces.<time_t>)
10838 Failures are not very well handled.
10843 # Step: check device activation
10844 self.lu.LogStep(1, steps_total, "Check device existence")
10845 self._CheckDisksExistence([self.other_node, self.target_node])
10846 self._CheckVolumeGroup([self.target_node, self.other_node])
10848 # Step: check other node consistency
10849 self.lu.LogStep(2, steps_total, "Check peer consistency")
10850 self._CheckDisksConsistency(self.other_node,
10851 self.other_node == self.instance.primary_node,
10854 # Step: create new storage
10855 self.lu.LogStep(3, steps_total, "Allocate new storage")
10856 iv_names = self._CreateNewStorage(self.target_node)
10858 # Step: for each lv, detach+rename*2+attach
10859 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10860 for dev, old_lvs, new_lvs in iv_names.itervalues():
10861 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10863 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10865 result.Raise("Can't detach drbd from local storage on node"
10866 " %s for device %s" % (self.target_node, dev.iv_name))
10868 #cfg.Update(instance)
10870 # ok, we created the new LVs, so now we know we have the needed
10871 # storage; as such, we proceed on the target node to rename
10872 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10873 # using the assumption that logical_id == physical_id (which in
10874 # turn is the unique_id on that node)
10876 # FIXME(iustin): use a better name for the replaced LVs
10877 temp_suffix = int(time.time())
10878 ren_fn = lambda d, suff: (d.physical_id[0],
10879 d.physical_id[1] + "_replaced-%s" % suff)
10881 # Build the rename list based on what LVs exist on the node
10882 rename_old_to_new = []
10883 for to_ren in old_lvs:
10884 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10885 if not result.fail_msg and result.payload:
10887 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10889 self.lu.LogInfo("Renaming the old LVs on the target node")
10890 result = self.rpc.call_blockdev_rename(self.target_node,
10892 result.Raise("Can't rename old LVs on node %s" % self.target_node)
10894 # Now we rename the new LVs to the old LVs
10895 self.lu.LogInfo("Renaming the new LVs on the target node")
10896 rename_new_to_old = [(new, old.physical_id)
10897 for old, new in zip(old_lvs, new_lvs)]
10898 result = self.rpc.call_blockdev_rename(self.target_node,
10900 result.Raise("Can't rename new LVs on node %s" % self.target_node)
10902 # Intermediate steps of in memory modifications
10903 for old, new in zip(old_lvs, new_lvs):
10904 new.logical_id = old.logical_id
10905 self.cfg.SetDiskID(new, self.target_node)
10907 # We need to modify old_lvs so that removal later removes the
10908 # right LVs, not the newly added ones; note that old_lvs is a
10910 for disk in old_lvs:
10911 disk.logical_id = ren_fn(disk, temp_suffix)
10912 self.cfg.SetDiskID(disk, self.target_node)
10914 # Now that the new lvs have the old name, we can add them to the device
10915 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10916 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10918 msg = result.fail_msg
10920 for new_lv in new_lvs:
10921 msg2 = self.rpc.call_blockdev_remove(self.target_node,
10924 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10925 hint=("cleanup manually the unused logical"
10927 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10929 cstep = itertools.count(5)
10931 if self.early_release:
10932 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10933 self._RemoveOldStorage(self.target_node, iv_names)
10934 # TODO: Check if releasing locks early still makes sense
10935 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10937 # Release all resource locks except those used by the instance
10938 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10939 keep=self.node_secondary_ip.keys())
10941 # Release all node locks while waiting for sync
10942 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10944 # TODO: Can the instance lock be downgraded here? Take the optional disk
10945 # shutdown in the caller into consideration.
10948 # This can fail as the old devices are degraded and _WaitForSync
10949 # does a combined result over all disks, so we don't check its return value
10950 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10951 _WaitForSync(self.lu, self.instance)
10953 # Check all devices manually
10954 self._CheckDevices(self.instance.primary_node, iv_names)
10956 # Step: remove old storage
10957 if not self.early_release:
10958 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10959 self._RemoveOldStorage(self.target_node, iv_names)
10961 def _ExecDrbd8Secondary(self, feedback_fn):
10962 """Replace the secondary node for DRBD 8.
10964 The algorithm for replace is quite complicated:
10965 - for all disks of the instance:
10966 - create new LVs on the new node with same names
10967 - shutdown the drbd device on the old secondary
10968 - disconnect the drbd network on the primary
10969 - create the drbd device on the new secondary
10970 - network attach the drbd on the primary, using an artifice:
10971 the drbd code for Attach() will connect to the network if it
10972 finds a device which is connected to the good local disks but
10973 not network enabled
10974 - wait for sync across all devices
10975 - remove all disks from the old secondary
10977 Failures are not very well handled.
10982 pnode = self.instance.primary_node
10984 # Step: check device activation
10985 self.lu.LogStep(1, steps_total, "Check device existence")
10986 self._CheckDisksExistence([self.instance.primary_node])
10987 self._CheckVolumeGroup([self.instance.primary_node])
10989 # Step: check other node consistency
10990 self.lu.LogStep(2, steps_total, "Check peer consistency")
10991 self._CheckDisksConsistency(self.instance.primary_node, True, True)
10993 # Step: create new storage
10994 self.lu.LogStep(3, steps_total, "Allocate new storage")
10995 for idx, dev in enumerate(self.instance.disks):
10996 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10997 (self.new_node, idx))
10998 # we pass force_create=True to force LVM creation
10999 for new_lv in dev.children:
11000 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
11001 _GetInstanceInfoText(self.instance), False)
11003 # Step 4: dbrd minors and drbd setups changes
11004 # after this, we must manually remove the drbd minors on both the
11005 # error and the success paths
11006 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11007 minors = self.cfg.AllocateDRBDMinor([self.new_node
11008 for dev in self.instance.disks],
11009 self.instance.name)
11010 logging.debug("Allocated minors %r", minors)
11013 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11014 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11015 (self.new_node, idx))
11016 # create new devices on new_node; note that we create two IDs:
11017 # one without port, so the drbd will be activated without
11018 # networking information on the new node at this stage, and one
11019 # with network, for the latter activation in step 4
11020 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11021 if self.instance.primary_node == o_node1:
11024 assert self.instance.primary_node == o_node2, "Three-node instance?"
11027 new_alone_id = (self.instance.primary_node, self.new_node, None,
11028 p_minor, new_minor, o_secret)
11029 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11030 p_minor, new_minor, o_secret)
11032 iv_names[idx] = (dev, dev.children, new_net_id)
11033 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11035 drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
11036 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11037 logical_id=new_alone_id,
11038 children=dev.children,
11040 params=drbd_params)
11042 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
11043 _GetInstanceInfoText(self.instance), False)
11044 except errors.GenericError:
11045 self.cfg.ReleaseDRBDMinors(self.instance.name)
11048 # We have new devices, shutdown the drbd on the old secondary
11049 for idx, dev in enumerate(self.instance.disks):
11050 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11051 self.cfg.SetDiskID(dev, self.target_node)
11052 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
11054 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11055 "node: %s" % (idx, msg),
11056 hint=("Please cleanup this device manually as"
11057 " soon as possible"))
11059 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11060 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11061 self.instance.disks)[pnode]
11063 msg = result.fail_msg
11065 # detaches didn't succeed (unlikely)
11066 self.cfg.ReleaseDRBDMinors(self.instance.name)
11067 raise errors.OpExecError("Can't detach the disks from the network on"
11068 " old node: %s" % (msg,))
11070 # if we managed to detach at least one, we update all the disks of
11071 # the instance to point to the new secondary
11072 self.lu.LogInfo("Updating instance configuration")
11073 for dev, _, new_logical_id in iv_names.itervalues():
11074 dev.logical_id = new_logical_id
11075 self.cfg.SetDiskID(dev, self.instance.primary_node)
11077 self.cfg.Update(self.instance, feedback_fn)
11079 # Release all node locks (the configuration has been updated)
11080 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11082 # and now perform the drbd attach
11083 self.lu.LogInfo("Attaching primary drbds to new secondary"
11084 " (standalone => connected)")
11085 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11087 self.node_secondary_ip,
11088 self.instance.disks,
11089 self.instance.name,
11091 for to_node, to_result in result.items():
11092 msg = to_result.fail_msg
11094 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11096 hint=("please do a gnt-instance info to see the"
11097 " status of disks"))
11099 cstep = itertools.count(5)
11101 if self.early_release:
11102 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11103 self._RemoveOldStorage(self.target_node, iv_names)
11104 # TODO: Check if releasing locks early still makes sense
11105 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11107 # Release all resource locks except those used by the instance
11108 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11109 keep=self.node_secondary_ip.keys())
11111 # TODO: Can the instance lock be downgraded here? Take the optional disk
11112 # shutdown in the caller into consideration.
11115 # This can fail as the old devices are degraded and _WaitForSync
11116 # does a combined result over all disks, so we don't check its return value
11117 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11118 _WaitForSync(self.lu, self.instance)
11120 # Check all devices manually
11121 self._CheckDevices(self.instance.primary_node, iv_names)
11123 # Step: remove old storage
11124 if not self.early_release:
11125 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11126 self._RemoveOldStorage(self.target_node, iv_names)
11129 class LURepairNodeStorage(NoHooksLU):
11130 """Repairs the volume group on a node.
11135 def CheckArguments(self):
11136 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11138 storage_type = self.op.storage_type
11140 if (constants.SO_FIX_CONSISTENCY not in
11141 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11142 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11143 " repaired" % storage_type,
11144 errors.ECODE_INVAL)
11146 def ExpandNames(self):
11147 self.needed_locks = {
11148 locking.LEVEL_NODE: [self.op.node_name],
11151 def _CheckFaultyDisks(self, instance, node_name):
11152 """Ensure faulty disks abort the opcode or at least warn."""
11154 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11156 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11157 " node '%s'" % (instance.name, node_name),
11158 errors.ECODE_STATE)
11159 except errors.OpPrereqError, err:
11160 if self.op.ignore_consistency:
11161 self.proc.LogWarning(str(err.args[0]))
11165 def CheckPrereq(self):
11166 """Check prerequisites.
11169 # Check whether any instance on this node has faulty disks
11170 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11171 if inst.admin_state != constants.ADMINST_UP:
11173 check_nodes = set(inst.all_nodes)
11174 check_nodes.discard(self.op.node_name)
11175 for inst_node_name in check_nodes:
11176 self._CheckFaultyDisks(inst, inst_node_name)
11178 def Exec(self, feedback_fn):
11179 feedback_fn("Repairing storage unit '%s' on %s ..." %
11180 (self.op.name, self.op.node_name))
11182 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11183 result = self.rpc.call_storage_execute(self.op.node_name,
11184 self.op.storage_type, st_args,
11186 constants.SO_FIX_CONSISTENCY)
11187 result.Raise("Failed to repair storage unit '%s' on %s" %
11188 (self.op.name, self.op.node_name))
11191 class LUNodeEvacuate(NoHooksLU):
11192 """Evacuates instances off a list of nodes.
11197 _MODE2IALLOCATOR = {
11198 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11199 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11200 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11202 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11203 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11204 constants.IALLOCATOR_NEVAC_MODES)
11206 def CheckArguments(self):
11207 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11209 def ExpandNames(self):
11210 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11212 if self.op.remote_node is not None:
11213 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11214 assert self.op.remote_node
11216 if self.op.remote_node == self.op.node_name:
11217 raise errors.OpPrereqError("Can not use evacuated node as a new"
11218 " secondary node", errors.ECODE_INVAL)
11220 if self.op.mode != constants.NODE_EVAC_SEC:
11221 raise errors.OpPrereqError("Without the use of an iallocator only"
11222 " secondary instances can be evacuated",
11223 errors.ECODE_INVAL)
11226 self.share_locks = _ShareAll()
11227 self.needed_locks = {
11228 locking.LEVEL_INSTANCE: [],
11229 locking.LEVEL_NODEGROUP: [],
11230 locking.LEVEL_NODE: [],
11233 # Determine nodes (via group) optimistically, needs verification once locks
11234 # have been acquired
11235 self.lock_nodes = self._DetermineNodes()
11237 def _DetermineNodes(self):
11238 """Gets the list of nodes to operate on.
11241 if self.op.remote_node is None:
11242 # Iallocator will choose any node(s) in the same group
11243 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11245 group_nodes = frozenset([self.op.remote_node])
11247 # Determine nodes to be locked
11248 return set([self.op.node_name]) | group_nodes
11250 def _DetermineInstances(self):
11251 """Builds list of instances to operate on.
11254 assert self.op.mode in constants.NODE_EVAC_MODES
11256 if self.op.mode == constants.NODE_EVAC_PRI:
11257 # Primary instances only
11258 inst_fn = _GetNodePrimaryInstances
11259 assert self.op.remote_node is None, \
11260 "Evacuating primary instances requires iallocator"
11261 elif self.op.mode == constants.NODE_EVAC_SEC:
11262 # Secondary instances only
11263 inst_fn = _GetNodeSecondaryInstances
11266 assert self.op.mode == constants.NODE_EVAC_ALL
11267 inst_fn = _GetNodeInstances
11268 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11270 raise errors.OpPrereqError("Due to an issue with the iallocator"
11271 " interface it is not possible to evacuate"
11272 " all instances at once; specify explicitly"
11273 " whether to evacuate primary or secondary"
11275 errors.ECODE_INVAL)
11277 return inst_fn(self.cfg, self.op.node_name)
11279 def DeclareLocks(self, level):
11280 if level == locking.LEVEL_INSTANCE:
11281 # Lock instances optimistically, needs verification once node and group
11282 # locks have been acquired
11283 self.needed_locks[locking.LEVEL_INSTANCE] = \
11284 set(i.name for i in self._DetermineInstances())
11286 elif level == locking.LEVEL_NODEGROUP:
11287 # Lock node groups for all potential target nodes optimistically, needs
11288 # verification once nodes have been acquired
11289 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11290 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11292 elif level == locking.LEVEL_NODE:
11293 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11295 def CheckPrereq(self):
11297 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11298 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11299 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11301 need_nodes = self._DetermineNodes()
11303 if not owned_nodes.issuperset(need_nodes):
11304 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11305 " locks were acquired, current nodes are"
11306 " are '%s', used to be '%s'; retry the"
11308 (self.op.node_name,
11309 utils.CommaJoin(need_nodes),
11310 utils.CommaJoin(owned_nodes)),
11311 errors.ECODE_STATE)
11313 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11314 if owned_groups != wanted_groups:
11315 raise errors.OpExecError("Node groups changed since locks were acquired,"
11316 " current groups are '%s', used to be '%s';"
11317 " retry the operation" %
11318 (utils.CommaJoin(wanted_groups),
11319 utils.CommaJoin(owned_groups)))
11321 # Determine affected instances
11322 self.instances = self._DetermineInstances()
11323 self.instance_names = [i.name for i in self.instances]
11325 if set(self.instance_names) != owned_instances:
11326 raise errors.OpExecError("Instances on node '%s' changed since locks"
11327 " were acquired, current instances are '%s',"
11328 " used to be '%s'; retry the operation" %
11329 (self.op.node_name,
11330 utils.CommaJoin(self.instance_names),
11331 utils.CommaJoin(owned_instances)))
11333 if self.instance_names:
11334 self.LogInfo("Evacuating instances from node '%s': %s",
11336 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11338 self.LogInfo("No instances to evacuate from node '%s'",
11341 if self.op.remote_node is not None:
11342 for i in self.instances:
11343 if i.primary_node == self.op.remote_node:
11344 raise errors.OpPrereqError("Node %s is the primary node of"
11345 " instance %s, cannot use it as"
11347 (self.op.remote_node, i.name),
11348 errors.ECODE_INVAL)
11350 def Exec(self, feedback_fn):
11351 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11353 if not self.instance_names:
11354 # No instances to evacuate
11357 elif self.op.iallocator is not None:
11358 # TODO: Implement relocation to other group
11359 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11360 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11361 instances=list(self.instance_names))
11363 ial.Run(self.op.iallocator)
11365 if not ial.success:
11366 raise errors.OpPrereqError("Can't compute node evacuation using"
11367 " iallocator '%s': %s" %
11368 (self.op.iallocator, ial.info),
11369 errors.ECODE_NORES)
11371 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11373 elif self.op.remote_node is not None:
11374 assert self.op.mode == constants.NODE_EVAC_SEC
11376 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11377 remote_node=self.op.remote_node,
11379 mode=constants.REPLACE_DISK_CHG,
11380 early_release=self.op.early_release)]
11381 for instance_name in self.instance_names
11385 raise errors.ProgrammerError("No iallocator or remote node")
11387 return ResultWithJobs(jobs)
11390 def _SetOpEarlyRelease(early_release, op):
11391 """Sets C{early_release} flag on opcodes if available.
11395 op.early_release = early_release
11396 except AttributeError:
11397 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11402 def _NodeEvacDest(use_nodes, group, nodes):
11403 """Returns group or nodes depending on caller's choice.
11407 return utils.CommaJoin(nodes)
11412 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11413 """Unpacks the result of change-group and node-evacuate iallocator requests.
11415 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11416 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11418 @type lu: L{LogicalUnit}
11419 @param lu: Logical unit instance
11420 @type alloc_result: tuple/list
11421 @param alloc_result: Result from iallocator
11422 @type early_release: bool
11423 @param early_release: Whether to release locks early if possible
11424 @type use_nodes: bool
11425 @param use_nodes: Whether to display node names instead of groups
11428 (moved, failed, jobs) = alloc_result
11431 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11432 for (name, reason) in failed)
11433 lu.LogWarning("Unable to evacuate instances %s", failreason)
11434 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11437 lu.LogInfo("Instances to be moved: %s",
11438 utils.CommaJoin("%s (to %s)" %
11439 (name, _NodeEvacDest(use_nodes, group, nodes))
11440 for (name, group, nodes) in moved))
11442 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11443 map(opcodes.OpCode.LoadOpCode, ops))
11447 class LUInstanceGrowDisk(LogicalUnit):
11448 """Grow a disk of an instance.
11451 HPATH = "disk-grow"
11452 HTYPE = constants.HTYPE_INSTANCE
11455 def ExpandNames(self):
11456 self._ExpandAndLockInstance()
11457 self.needed_locks[locking.LEVEL_NODE] = []
11458 self.needed_locks[locking.LEVEL_NODE_RES] = []
11459 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11460 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11462 def DeclareLocks(self, level):
11463 if level == locking.LEVEL_NODE:
11464 self._LockInstancesNodes()
11465 elif level == locking.LEVEL_NODE_RES:
11467 self.needed_locks[locking.LEVEL_NODE_RES] = \
11468 self.needed_locks[locking.LEVEL_NODE][:]
11470 def BuildHooksEnv(self):
11471 """Build hooks env.
11473 This runs on the master, the primary and all the secondaries.
11477 "DISK": self.op.disk,
11478 "AMOUNT": self.op.amount,
11480 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11483 def BuildHooksNodes(self):
11484 """Build hooks nodes.
11487 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11490 def CheckPrereq(self):
11491 """Check prerequisites.
11493 This checks that the instance is in the cluster.
11496 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11497 assert instance is not None, \
11498 "Cannot retrieve locked instance %s" % self.op.instance_name
11499 nodenames = list(instance.all_nodes)
11500 for node in nodenames:
11501 _CheckNodeOnline(self, node)
11503 self.instance = instance
11505 if instance.disk_template not in constants.DTS_GROWABLE:
11506 raise errors.OpPrereqError("Instance's disk layout does not support"
11507 " growing", errors.ECODE_INVAL)
11509 self.disk = instance.FindDisk(self.op.disk)
11511 if instance.disk_template not in (constants.DT_FILE,
11512 constants.DT_SHARED_FILE,
11514 # TODO: check the free disk space for file, when that feature will be
11516 _CheckNodesFreeDiskPerVG(self, nodenames,
11517 self.disk.ComputeGrowth(self.op.amount))
11519 def Exec(self, feedback_fn):
11520 """Execute disk grow.
11523 instance = self.instance
11526 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11527 assert (self.owned_locks(locking.LEVEL_NODE) ==
11528 self.owned_locks(locking.LEVEL_NODE_RES))
11530 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11532 raise errors.OpExecError("Cannot activate block device to grow")
11534 feedback_fn("Growing disk %s of instance '%s' by %s" %
11535 (self.op.disk, instance.name,
11536 utils.FormatUnit(self.op.amount, "h")))
11538 # First run all grow ops in dry-run mode
11539 for node in instance.all_nodes:
11540 self.cfg.SetDiskID(disk, node)
11541 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11542 result.Raise("Grow request failed to node %s" % node)
11544 # We know that (as far as we can test) operations across different
11545 # nodes will succeed, time to run it for real
11546 for node in instance.all_nodes:
11547 self.cfg.SetDiskID(disk, node)
11548 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11549 result.Raise("Grow request failed to node %s" % node)
11551 # TODO: Rewrite code to work properly
11552 # DRBD goes into sync mode for a short amount of time after executing the
11553 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11554 # calling "resize" in sync mode fails. Sleeping for a short amount of
11555 # time is a work-around.
11558 disk.RecordGrow(self.op.amount)
11559 self.cfg.Update(instance, feedback_fn)
11561 # Changes have been recorded, release node lock
11562 _ReleaseLocks(self, locking.LEVEL_NODE)
11564 # Downgrade lock while waiting for sync
11565 self.glm.downgrade(locking.LEVEL_INSTANCE)
11567 if self.op.wait_for_sync:
11568 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11570 self.proc.LogWarning("Disk sync-ing has not returned a good"
11571 " status; please check the instance")
11572 if instance.admin_state != constants.ADMINST_UP:
11573 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11574 elif instance.admin_state != constants.ADMINST_UP:
11575 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11576 " not supposed to be running because no wait for"
11577 " sync mode was requested")
11579 assert self.owned_locks(locking.LEVEL_NODE_RES)
11580 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11583 class LUInstanceQueryData(NoHooksLU):
11584 """Query runtime instance data.
11589 def ExpandNames(self):
11590 self.needed_locks = {}
11592 # Use locking if requested or when non-static information is wanted
11593 if not (self.op.static or self.op.use_locking):
11594 self.LogWarning("Non-static data requested, locks need to be acquired")
11595 self.op.use_locking = True
11597 if self.op.instances or not self.op.use_locking:
11598 # Expand instance names right here
11599 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11601 # Will use acquired locks
11602 self.wanted_names = None
11604 if self.op.use_locking:
11605 self.share_locks = _ShareAll()
11607 if self.wanted_names is None:
11608 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11610 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11612 self.needed_locks[locking.LEVEL_NODE] = []
11613 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11615 def DeclareLocks(self, level):
11616 if self.op.use_locking and level == locking.LEVEL_NODE:
11617 self._LockInstancesNodes()
11619 def CheckPrereq(self):
11620 """Check prerequisites.
11622 This only checks the optional instance list against the existing names.
11625 if self.wanted_names is None:
11626 assert self.op.use_locking, "Locking was not used"
11627 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11629 self.wanted_instances = \
11630 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11632 def _ComputeBlockdevStatus(self, node, instance_name, dev):
11633 """Returns the status of a block device
11636 if self.op.static or not node:
11639 self.cfg.SetDiskID(dev, node)
11641 result = self.rpc.call_blockdev_find(node, dev)
11645 result.Raise("Can't compute disk status for %s" % instance_name)
11647 status = result.payload
11651 return (status.dev_path, status.major, status.minor,
11652 status.sync_percent, status.estimated_time,
11653 status.is_degraded, status.ldisk_status)
11655 def _ComputeDiskStatus(self, instance, snode, dev):
11656 """Compute block device status.
11659 if dev.dev_type in constants.LDS_DRBD:
11660 # we change the snode then (otherwise we use the one passed in)
11661 if dev.logical_id[0] == instance.primary_node:
11662 snode = dev.logical_id[1]
11664 snode = dev.logical_id[0]
11666 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11667 instance.name, dev)
11668 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11671 dev_children = map(compat.partial(self._ComputeDiskStatus,
11678 "iv_name": dev.iv_name,
11679 "dev_type": dev.dev_type,
11680 "logical_id": dev.logical_id,
11681 "physical_id": dev.physical_id,
11682 "pstatus": dev_pstatus,
11683 "sstatus": dev_sstatus,
11684 "children": dev_children,
11689 def Exec(self, feedback_fn):
11690 """Gather and return data"""
11693 cluster = self.cfg.GetClusterInfo()
11695 pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11696 for i in self.wanted_instances)
11697 for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11698 if self.op.static or pnode.offline:
11699 remote_state = None
11701 self.LogWarning("Primary node %s is marked offline, returning static"
11702 " information only for instance %s" %
11703 (pnode.name, instance.name))
11705 remote_info = self.rpc.call_instance_info(instance.primary_node,
11707 instance.hypervisor)
11708 remote_info.Raise("Error checking node %s" % instance.primary_node)
11709 remote_info = remote_info.payload
11710 if remote_info and "state" in remote_info:
11711 remote_state = "up"
11713 if instance.admin_state == constants.ADMINST_UP:
11714 remote_state = "down"
11716 remote_state = instance.admin_state
11718 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11721 result[instance.name] = {
11722 "name": instance.name,
11723 "config_state": instance.admin_state,
11724 "run_state": remote_state,
11725 "pnode": instance.primary_node,
11726 "snodes": instance.secondary_nodes,
11728 # this happens to be the same format used for hooks
11729 "nics": _NICListToTuple(self, instance.nics),
11730 "disk_template": instance.disk_template,
11732 "hypervisor": instance.hypervisor,
11733 "network_port": instance.network_port,
11734 "hv_instance": instance.hvparams,
11735 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11736 "be_instance": instance.beparams,
11737 "be_actual": cluster.FillBE(instance),
11738 "os_instance": instance.osparams,
11739 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11740 "serial_no": instance.serial_no,
11741 "mtime": instance.mtime,
11742 "ctime": instance.ctime,
11743 "uuid": instance.uuid,
11749 class LUInstanceSetParams(LogicalUnit):
11750 """Modifies an instances's parameters.
11753 HPATH = "instance-modify"
11754 HTYPE = constants.HTYPE_INSTANCE
11757 def CheckArguments(self):
11758 if not (self.op.nics or self.op.disks or self.op.disk_template or
11759 self.op.hvparams or self.op.beparams or self.op.os_name or
11760 self.op.online_inst or self.op.offline_inst or
11761 self.op.runtime_mem):
11762 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
11764 if self.op.hvparams:
11765 _CheckGlobalHvParams(self.op.hvparams)
11769 for disk_op, disk_dict in self.op.disks:
11770 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
11771 if disk_op == constants.DDM_REMOVE:
11772 disk_addremove += 1
11774 elif disk_op == constants.DDM_ADD:
11775 disk_addremove += 1
11777 if not isinstance(disk_op, int):
11778 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11779 if not isinstance(disk_dict, dict):
11780 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11781 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11783 if disk_op == constants.DDM_ADD:
11784 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11785 if mode not in constants.DISK_ACCESS_SET:
11786 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11787 errors.ECODE_INVAL)
11788 size = disk_dict.get(constants.IDISK_SIZE, None)
11790 raise errors.OpPrereqError("Required disk parameter size missing",
11791 errors.ECODE_INVAL)
11794 except (TypeError, ValueError), err:
11795 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11796 str(err), errors.ECODE_INVAL)
11797 disk_dict[constants.IDISK_SIZE] = size
11799 # modification of disk
11800 if constants.IDISK_SIZE in disk_dict:
11801 raise errors.OpPrereqError("Disk size change not possible, use"
11802 " grow-disk", errors.ECODE_INVAL)
11804 if disk_addremove > 1:
11805 raise errors.OpPrereqError("Only one disk add or remove operation"
11806 " supported at a time", errors.ECODE_INVAL)
11808 if self.op.disks and self.op.disk_template is not None:
11809 raise errors.OpPrereqError("Disk template conversion and other disk"
11810 " changes not supported at the same time",
11811 errors.ECODE_INVAL)
11813 if (self.op.disk_template and
11814 self.op.disk_template in constants.DTS_INT_MIRROR and
11815 self.op.remote_node is None):
11816 raise errors.OpPrereqError("Changing the disk template to a mirrored"
11817 " one requires specifying a secondary node",
11818 errors.ECODE_INVAL)
11822 for nic_op, nic_dict in self.op.nics:
11823 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11824 if nic_op == constants.DDM_REMOVE:
11827 elif nic_op == constants.DDM_ADD:
11830 if not isinstance(nic_op, int):
11831 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11832 if not isinstance(nic_dict, dict):
11833 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11834 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11836 # nic_dict should be a dict
11837 nic_ip = nic_dict.get(constants.INIC_IP, None)
11838 if nic_ip is not None:
11839 if nic_ip.lower() == constants.VALUE_NONE:
11840 nic_dict[constants.INIC_IP] = None
11842 if not netutils.IPAddress.IsValid(nic_ip):
11843 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11844 errors.ECODE_INVAL)
11846 nic_bridge = nic_dict.get("bridge", None)
11847 nic_link = nic_dict.get(constants.INIC_LINK, None)
11848 if nic_bridge and nic_link:
11849 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11850 " at the same time", errors.ECODE_INVAL)
11851 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11852 nic_dict["bridge"] = None
11853 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11854 nic_dict[constants.INIC_LINK] = None
11856 if nic_op == constants.DDM_ADD:
11857 nic_mac = nic_dict.get(constants.INIC_MAC, None)
11858 if nic_mac is None:
11859 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11861 if constants.INIC_MAC in nic_dict:
11862 nic_mac = nic_dict[constants.INIC_MAC]
11863 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11864 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11866 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11867 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11868 " modifying an existing nic",
11869 errors.ECODE_INVAL)
11871 if nic_addremove > 1:
11872 raise errors.OpPrereqError("Only one NIC add or remove operation"
11873 " supported at a time", errors.ECODE_INVAL)
11875 def ExpandNames(self):
11876 self._ExpandAndLockInstance()
11877 # Can't even acquire node locks in shared mode as upcoming changes in
11878 # Ganeti 2.6 will start to modify the node object on disk conversion
11879 self.needed_locks[locking.LEVEL_NODE] = []
11880 self.needed_locks[locking.LEVEL_NODE_RES] = []
11881 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11883 def DeclareLocks(self, level):
11884 if level == locking.LEVEL_NODE:
11885 self._LockInstancesNodes()
11886 if self.op.disk_template and self.op.remote_node:
11887 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11888 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11889 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11891 self.needed_locks[locking.LEVEL_NODE_RES] = \
11892 self.needed_locks[locking.LEVEL_NODE][:]
11894 def BuildHooksEnv(self):
11895 """Build hooks env.
11897 This runs on the master, primary and secondaries.
11901 if constants.BE_MINMEM in self.be_new:
11902 args["minmem"] = self.be_new[constants.BE_MINMEM]
11903 if constants.BE_MAXMEM in self.be_new:
11904 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
11905 if constants.BE_VCPUS in self.be_new:
11906 args["vcpus"] = self.be_new[constants.BE_VCPUS]
11907 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11908 # information at all.
11911 nic_override = dict(self.op.nics)
11912 for idx, nic in enumerate(self.instance.nics):
11913 if idx in nic_override:
11914 this_nic_override = nic_override[idx]
11916 this_nic_override = {}
11917 if constants.INIC_IP in this_nic_override:
11918 ip = this_nic_override[constants.INIC_IP]
11921 if constants.INIC_MAC in this_nic_override:
11922 mac = this_nic_override[constants.INIC_MAC]
11925 if idx in self.nic_pnew:
11926 nicparams = self.nic_pnew[idx]
11928 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11929 mode = nicparams[constants.NIC_MODE]
11930 link = nicparams[constants.NIC_LINK]
11931 args["nics"].append((ip, mac, mode, link))
11932 if constants.DDM_ADD in nic_override:
11933 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11934 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11935 nicparams = self.nic_pnew[constants.DDM_ADD]
11936 mode = nicparams[constants.NIC_MODE]
11937 link = nicparams[constants.NIC_LINK]
11938 args["nics"].append((ip, mac, mode, link))
11939 elif constants.DDM_REMOVE in nic_override:
11940 del args["nics"][-1]
11942 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11943 if self.op.disk_template:
11944 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11945 if self.op.runtime_mem:
11946 env["RUNTIME_MEMORY"] = self.op.runtime_mem
11950 def BuildHooksNodes(self):
11951 """Build hooks nodes.
11954 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11957 def CheckPrereq(self):
11958 """Check prerequisites.
11960 This only checks the instance list against the existing names.
11963 # checking the new params on the primary/secondary nodes
11965 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11966 cluster = self.cluster = self.cfg.GetClusterInfo()
11967 assert self.instance is not None, \
11968 "Cannot retrieve locked instance %s" % self.op.instance_name
11969 pnode = instance.primary_node
11970 nodelist = list(instance.all_nodes)
11971 pnode_info = self.cfg.GetNodeInfo(pnode)
11972 self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
11975 if self.op.os_name and not self.op.force:
11976 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11977 self.op.force_variant)
11978 instance_os = self.op.os_name
11980 instance_os = instance.os
11982 if self.op.disk_template:
11983 if instance.disk_template == self.op.disk_template:
11984 raise errors.OpPrereqError("Instance already has disk template %s" %
11985 instance.disk_template, errors.ECODE_INVAL)
11987 if (instance.disk_template,
11988 self.op.disk_template) not in self._DISK_CONVERSIONS:
11989 raise errors.OpPrereqError("Unsupported disk template conversion from"
11990 " %s to %s" % (instance.disk_template,
11991 self.op.disk_template),
11992 errors.ECODE_INVAL)
11993 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11994 msg="cannot change disk template")
11995 if self.op.disk_template in constants.DTS_INT_MIRROR:
11996 if self.op.remote_node == pnode:
11997 raise errors.OpPrereqError("Given new secondary node %s is the same"
11998 " as the primary node of the instance" %
11999 self.op.remote_node, errors.ECODE_STATE)
12000 _CheckNodeOnline(self, self.op.remote_node)
12001 _CheckNodeNotDrained(self, self.op.remote_node)
12002 # FIXME: here we assume that the old instance type is DT_PLAIN
12003 assert instance.disk_template == constants.DT_PLAIN
12004 disks = [{constants.IDISK_SIZE: d.size,
12005 constants.IDISK_VG: d.logical_id[0]}
12006 for d in instance.disks]
12007 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12008 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12010 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12011 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12012 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12013 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12014 ignore=self.op.ignore_ipolicy)
12015 if pnode_info.group != snode_info.group:
12016 self.LogWarning("The primary and secondary nodes are in two"
12017 " different node groups; the disk parameters"
12018 " from the first disk's node group will be"
12021 # hvparams processing
12022 if self.op.hvparams:
12023 hv_type = instance.hypervisor
12024 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12025 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12026 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12029 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12030 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12031 self.hv_proposed = self.hv_new = hv_new # the new actual values
12032 self.hv_inst = i_hvdict # the new dict (without defaults)
12034 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12036 self.hv_new = self.hv_inst = {}
12038 # beparams processing
12039 if self.op.beparams:
12040 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12042 objects.UpgradeBeParams(i_bedict)
12043 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12044 be_new = cluster.SimpleFillBE(i_bedict)
12045 self.be_proposed = self.be_new = be_new # the new actual values
12046 self.be_inst = i_bedict # the new dict (without defaults)
12048 self.be_new = self.be_inst = {}
12049 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12050 be_old = cluster.FillBE(instance)
12052 # CPU param validation -- checking every time a paramtere is
12053 # changed to cover all cases where either CPU mask or vcpus have
12055 if (constants.BE_VCPUS in self.be_proposed and
12056 constants.HV_CPU_MASK in self.hv_proposed):
12058 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12059 # Verify mask is consistent with number of vCPUs. Can skip this
12060 # test if only 1 entry in the CPU mask, which means same mask
12061 # is applied to all vCPUs.
12062 if (len(cpu_list) > 1 and
12063 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12064 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12066 (self.be_proposed[constants.BE_VCPUS],
12067 self.hv_proposed[constants.HV_CPU_MASK]),
12068 errors.ECODE_INVAL)
12070 # Only perform this test if a new CPU mask is given
12071 if constants.HV_CPU_MASK in self.hv_new:
12072 # Calculate the largest CPU number requested
12073 max_requested_cpu = max(map(max, cpu_list))
12074 # Check that all of the instance's nodes have enough physical CPUs to
12075 # satisfy the requested CPU mask
12076 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12077 max_requested_cpu + 1, instance.hypervisor)
12079 # osparams processing
12080 if self.op.osparams:
12081 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12082 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12083 self.os_inst = i_osdict # the new dict (without defaults)
12089 #TODO(dynmem): do the appropriate check involving MINMEM
12090 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12091 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12092 mem_check_list = [pnode]
12093 if be_new[constants.BE_AUTO_BALANCE]:
12094 # either we changed auto_balance to yes or it was from before
12095 mem_check_list.extend(instance.secondary_nodes)
12096 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12097 instance.hypervisor)
12098 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12099 [instance.hypervisor])
12100 pninfo = nodeinfo[pnode]
12101 msg = pninfo.fail_msg
12103 # Assume the primary node is unreachable and go ahead
12104 self.warn.append("Can't get info from primary node %s: %s" %
12107 (_, _, (pnhvinfo, )) = pninfo.payload
12108 if not isinstance(pnhvinfo.get("memory_free", None), int):
12109 self.warn.append("Node data from primary node %s doesn't contain"
12110 " free memory information" % pnode)
12111 elif instance_info.fail_msg:
12112 self.warn.append("Can't get instance runtime information: %s" %
12113 instance_info.fail_msg)
12115 if instance_info.payload:
12116 current_mem = int(instance_info.payload["memory"])
12118 # Assume instance not running
12119 # (there is a slight race condition here, but it's not very
12120 # probable, and we have no other way to check)
12121 # TODO: Describe race condition
12123 #TODO(dynmem): do the appropriate check involving MINMEM
12124 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12125 pnhvinfo["memory_free"])
12127 raise errors.OpPrereqError("This change will prevent the instance"
12128 " from starting, due to %d MB of memory"
12129 " missing on its primary node" %
12131 errors.ECODE_NORES)
12133 if be_new[constants.BE_AUTO_BALANCE]:
12134 for node, nres in nodeinfo.items():
12135 if node not in instance.secondary_nodes:
12137 nres.Raise("Can't get info from secondary node %s" % node,
12138 prereq=True, ecode=errors.ECODE_STATE)
12139 (_, _, (nhvinfo, )) = nres.payload
12140 if not isinstance(nhvinfo.get("memory_free", None), int):
12141 raise errors.OpPrereqError("Secondary node %s didn't return free"
12142 " memory information" % node,
12143 errors.ECODE_STATE)
12144 #TODO(dynmem): do the appropriate check involving MINMEM
12145 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12146 raise errors.OpPrereqError("This change will prevent the instance"
12147 " from failover to its secondary node"
12148 " %s, due to not enough memory" % node,
12149 errors.ECODE_STATE)
12151 if self.op.runtime_mem:
12152 remote_info = self.rpc.call_instance_info(instance.primary_node,
12154 instance.hypervisor)
12155 remote_info.Raise("Error checking node %s" % instance.primary_node)
12156 if not remote_info.payload: # not running already
12157 raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12158 errors.ECODE_STATE)
12160 current_memory = remote_info.payload["memory"]
12161 if (not self.op.force and
12162 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12163 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12164 raise errors.OpPrereqError("Instance %s must have memory between %d"
12165 " and %d MB of memory unless --force is"
12166 " given" % (instance.name,
12167 self.be_proposed[constants.BE_MINMEM],
12168 self.be_proposed[constants.BE_MAXMEM]),
12169 errors.ECODE_INVAL)
12171 if self.op.runtime_mem > current_memory:
12172 _CheckNodeFreeMemory(self, instance.primary_node,
12173 "ballooning memory for instance %s" %
12175 self.op.memory - current_memory,
12176 instance.hypervisor)
12180 self.nic_pinst = {}
12181 for nic_op, nic_dict in self.op.nics:
12182 if nic_op == constants.DDM_REMOVE:
12183 if not instance.nics:
12184 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
12185 errors.ECODE_INVAL)
12187 if nic_op != constants.DDM_ADD:
12189 if not instance.nics:
12190 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
12191 " no NICs" % nic_op,
12192 errors.ECODE_INVAL)
12193 if nic_op < 0 or nic_op >= len(instance.nics):
12194 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
12196 (nic_op, len(instance.nics) - 1),
12197 errors.ECODE_INVAL)
12198 old_nic_params = instance.nics[nic_op].nicparams
12199 old_nic_ip = instance.nics[nic_op].ip
12201 old_nic_params = {}
12204 update_params_dict = dict([(key, nic_dict[key])
12205 for key in constants.NICS_PARAMETERS
12206 if key in nic_dict])
12208 if "bridge" in nic_dict:
12209 update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
12211 new_nic_params = _GetUpdatedParams(old_nic_params,
12212 update_params_dict)
12213 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
12214 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
12215 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
12216 self.nic_pinst[nic_op] = new_nic_params
12217 self.nic_pnew[nic_op] = new_filled_nic_params
12218 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
12220 if new_nic_mode == constants.NIC_MODE_BRIDGED:
12221 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
12222 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
12224 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
12226 self.warn.append(msg)
12228 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12229 if new_nic_mode == constants.NIC_MODE_ROUTED:
12230 if constants.INIC_IP in nic_dict:
12231 nic_ip = nic_dict[constants.INIC_IP]
12233 nic_ip = old_nic_ip
12235 raise errors.OpPrereqError("Cannot set the nic ip to None"
12236 " on a routed nic", errors.ECODE_INVAL)
12237 if constants.INIC_MAC in nic_dict:
12238 nic_mac = nic_dict[constants.INIC_MAC]
12239 if nic_mac is None:
12240 raise errors.OpPrereqError("Cannot set the nic mac to None",
12241 errors.ECODE_INVAL)
12242 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12243 # otherwise generate the mac
12244 nic_dict[constants.INIC_MAC] = \
12245 self.cfg.GenerateMAC(self.proc.GetECId())
12247 # or validate/reserve the current one
12249 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
12250 except errors.ReservationError:
12251 raise errors.OpPrereqError("MAC address %s already in use"
12252 " in cluster" % nic_mac,
12253 errors.ECODE_NOTUNIQUE)
12256 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12257 raise errors.OpPrereqError("Disk operations not supported for"
12258 " diskless instances",
12259 errors.ECODE_INVAL)
12260 for disk_op, _ in self.op.disks:
12261 if disk_op == constants.DDM_REMOVE:
12262 if len(instance.disks) == 1:
12263 raise errors.OpPrereqError("Cannot remove the last disk of"
12264 " an instance", errors.ECODE_INVAL)
12265 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12266 msg="cannot remove disks")
12268 if (disk_op == constants.DDM_ADD and
12269 len(instance.disks) >= constants.MAX_DISKS):
12270 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
12271 " add more" % constants.MAX_DISKS,
12272 errors.ECODE_STATE)
12273 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
12275 if disk_op < 0 or disk_op >= len(instance.disks):
12276 raise errors.OpPrereqError("Invalid disk index %s, valid values"
12278 (disk_op, len(instance.disks)),
12279 errors.ECODE_INVAL)
12281 # disabling the instance
12282 if self.op.offline_inst:
12283 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12284 msg="cannot change instance state to offline")
12286 # enabling the instance
12287 if self.op.online_inst:
12288 _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
12289 msg="cannot make instance go online")
12291 def _ConvertPlainToDrbd(self, feedback_fn):
12292 """Converts an instance from plain to drbd.
12295 feedback_fn("Converting template to drbd")
12296 instance = self.instance
12297 pnode = instance.primary_node
12298 snode = self.op.remote_node
12300 assert instance.disk_template == constants.DT_PLAIN
12302 # create a fake disk info for _GenerateDiskTemplate
12303 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12304 constants.IDISK_VG: d.logical_id[0]}
12305 for d in instance.disks]
12306 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12307 instance.name, pnode, [snode],
12308 disk_info, None, None, 0, feedback_fn,
12310 info = _GetInstanceInfoText(instance)
12311 feedback_fn("Creating aditional volumes...")
12312 # first, create the missing data and meta devices
12313 for disk in new_disks:
12314 # unfortunately this is... not too nice
12315 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12317 for child in disk.children:
12318 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12319 # at this stage, all new LVs have been created, we can rename the
12321 feedback_fn("Renaming original volumes...")
12322 rename_list = [(o, n.children[0].logical_id)
12323 for (o, n) in zip(instance.disks, new_disks)]
12324 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12325 result.Raise("Failed to rename original LVs")
12327 feedback_fn("Initializing DRBD devices...")
12328 # all child devices are in place, we can now create the DRBD devices
12329 for disk in new_disks:
12330 for node in [pnode, snode]:
12331 f_create = node == pnode
12332 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12334 # at this point, the instance has been modified
12335 instance.disk_template = constants.DT_DRBD8
12336 instance.disks = new_disks
12337 self.cfg.Update(instance, feedback_fn)
12339 # Release node locks while waiting for sync
12340 _ReleaseLocks(self, locking.LEVEL_NODE)
12342 # disks are created, waiting for sync
12343 disk_abort = not _WaitForSync(self, instance,
12344 oneshot=not self.op.wait_for_sync)
12346 raise errors.OpExecError("There are some degraded disks for"
12347 " this instance, please cleanup manually")
12349 # Node resource locks will be released by caller
12351 def _ConvertDrbdToPlain(self, feedback_fn):
12352 """Converts an instance from drbd to plain.
12355 instance = self.instance
12357 assert len(instance.secondary_nodes) == 1
12358 assert instance.disk_template == constants.DT_DRBD8
12360 pnode = instance.primary_node
12361 snode = instance.secondary_nodes[0]
12362 feedback_fn("Converting template to plain")
12364 old_disks = instance.disks
12365 new_disks = [d.children[0] for d in old_disks]
12367 # copy over size and mode
12368 for parent, child in zip(old_disks, new_disks):
12369 child.size = parent.size
12370 child.mode = parent.mode
12372 # update instance structure
12373 instance.disks = new_disks
12374 instance.disk_template = constants.DT_PLAIN
12375 self.cfg.Update(instance, feedback_fn)
12377 # Release locks in case removing disks takes a while
12378 _ReleaseLocks(self, locking.LEVEL_NODE)
12380 feedback_fn("Removing volumes on the secondary node...")
12381 for disk in old_disks:
12382 self.cfg.SetDiskID(disk, snode)
12383 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12385 self.LogWarning("Could not remove block device %s on node %s,"
12386 " continuing anyway: %s", disk.iv_name, snode, msg)
12388 feedback_fn("Removing unneeded volumes on the primary node...")
12389 for idx, disk in enumerate(old_disks):
12390 meta = disk.children[1]
12391 self.cfg.SetDiskID(meta, pnode)
12392 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12394 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12395 " continuing anyway: %s", idx, pnode, msg)
12397 # this is a DRBD disk, return its port to the pool
12398 for disk in old_disks:
12399 tcp_port = disk.logical_id[2]
12400 self.cfg.AddTcpUdpPort(tcp_port)
12402 # Node resource locks will be released by caller
12404 def Exec(self, feedback_fn):
12405 """Modifies an instance.
12407 All parameters take effect only at the next restart of the instance.
12410 # Process here the warnings from CheckPrereq, as we don't have a
12411 # feedback_fn there.
12412 for warn in self.warn:
12413 feedback_fn("WARNING: %s" % warn)
12415 assert ((self.op.disk_template is None) ^
12416 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12417 "Not owning any node resource locks"
12420 instance = self.instance
12423 if self.op.runtime_mem:
12424 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12426 self.op.runtime_mem)
12427 rpcres.Raise("Cannot modify instance runtime memory")
12428 result.append(("runtime_memory", self.op.runtime_mem))
12431 for disk_op, disk_dict in self.op.disks:
12432 if disk_op == constants.DDM_REMOVE:
12433 # remove the last disk
12434 device = instance.disks.pop()
12435 device_idx = len(instance.disks)
12436 for node, disk in device.ComputeNodeTree(instance.primary_node):
12437 self.cfg.SetDiskID(disk, node)
12438 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12440 self.LogWarning("Could not remove disk/%d on node %s: %s,"
12441 " continuing anyway", device_idx, node, msg)
12442 result.append(("disk/%d" % device_idx, "remove"))
12444 # if this is a DRBD disk, return its port to the pool
12445 if device.dev_type in constants.LDS_DRBD:
12446 tcp_port = device.logical_id[2]
12447 self.cfg.AddTcpUdpPort(tcp_port)
12448 elif disk_op == constants.DDM_ADD:
12450 if instance.disk_template in (constants.DT_FILE,
12451 constants.DT_SHARED_FILE):
12452 file_driver, file_path = instance.disks[0].logical_id
12453 file_path = os.path.dirname(file_path)
12455 file_driver = file_path = None
12456 disk_idx_base = len(instance.disks)
12457 new_disk = _GenerateDiskTemplate(self,
12458 instance.disk_template,
12459 instance.name, instance.primary_node,
12460 instance.secondary_nodes,
12466 self.diskparams)[0]
12467 instance.disks.append(new_disk)
12468 info = _GetInstanceInfoText(instance)
12470 logging.info("Creating volume %s for instance %s",
12471 new_disk.iv_name, instance.name)
12472 # Note: this needs to be kept in sync with _CreateDisks
12474 for node in instance.all_nodes:
12475 f_create = node == instance.primary_node
12477 _CreateBlockDev(self, node, instance, new_disk,
12478 f_create, info, f_create)
12479 except errors.OpExecError, err:
12480 self.LogWarning("Failed to create volume %s (%s) on"
12482 new_disk.iv_name, new_disk, node, err)
12483 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
12484 (new_disk.size, new_disk.mode)))
12486 # change a given disk
12487 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
12488 result.append(("disk.mode/%d" % disk_op,
12489 disk_dict[constants.IDISK_MODE]))
12491 if self.op.disk_template:
12493 check_nodes = set(instance.all_nodes)
12494 if self.op.remote_node:
12495 check_nodes.add(self.op.remote_node)
12496 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12497 owned = self.owned_locks(level)
12498 assert not (check_nodes - owned), \
12499 ("Not owning the correct locks, owning %r, expected at least %r" %
12500 (owned, check_nodes))
12502 r_shut = _ShutdownInstanceDisks(self, instance)
12504 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12505 " proceed with disk template conversion")
12506 mode = (instance.disk_template, self.op.disk_template)
12508 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12510 self.cfg.ReleaseDRBDMinors(instance.name)
12512 result.append(("disk_template", self.op.disk_template))
12514 assert instance.disk_template == self.op.disk_template, \
12515 ("Expected disk template '%s', found '%s'" %
12516 (self.op.disk_template, instance.disk_template))
12518 # Release node and resource locks if there are any (they might already have
12519 # been released during disk conversion)
12520 _ReleaseLocks(self, locking.LEVEL_NODE)
12521 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12524 for nic_op, nic_dict in self.op.nics:
12525 if nic_op == constants.DDM_REMOVE:
12526 # remove the last nic
12527 del instance.nics[-1]
12528 result.append(("nic.%d" % len(instance.nics), "remove"))
12529 elif nic_op == constants.DDM_ADD:
12530 # mac and bridge should be set, by now
12531 mac = nic_dict[constants.INIC_MAC]
12532 ip = nic_dict.get(constants.INIC_IP, None)
12533 nicparams = self.nic_pinst[constants.DDM_ADD]
12534 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
12535 instance.nics.append(new_nic)
12536 result.append(("nic.%d" % (len(instance.nics) - 1),
12537 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12538 (new_nic.mac, new_nic.ip,
12539 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
12540 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
12543 for key in (constants.INIC_MAC, constants.INIC_IP):
12544 if key in nic_dict:
12545 setattr(instance.nics[nic_op], key, nic_dict[key])
12546 if nic_op in self.nic_pinst:
12547 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
12548 for key, val in nic_dict.iteritems():
12549 result.append(("nic.%s/%d" % (key, nic_op), val))
12552 if self.op.hvparams:
12553 instance.hvparams = self.hv_inst
12554 for key, val in self.op.hvparams.iteritems():
12555 result.append(("hv/%s" % key, val))
12558 if self.op.beparams:
12559 instance.beparams = self.be_inst
12560 for key, val in self.op.beparams.iteritems():
12561 result.append(("be/%s" % key, val))
12564 if self.op.os_name:
12565 instance.os = self.op.os_name
12568 if self.op.osparams:
12569 instance.osparams = self.os_inst
12570 for key, val in self.op.osparams.iteritems():
12571 result.append(("os/%s" % key, val))
12573 # online/offline instance
12574 if self.op.online_inst:
12575 self.cfg.MarkInstanceDown(instance.name)
12576 result.append(("admin_state", constants.ADMINST_DOWN))
12577 if self.op.offline_inst:
12578 self.cfg.MarkInstanceOffline(instance.name)
12579 result.append(("admin_state", constants.ADMINST_OFFLINE))
12581 self.cfg.Update(instance, feedback_fn)
12583 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12584 self.owned_locks(locking.LEVEL_NODE)), \
12585 "All node locks should have been released by now"
12589 _DISK_CONVERSIONS = {
12590 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12591 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12595 class LUInstanceChangeGroup(LogicalUnit):
12596 HPATH = "instance-change-group"
12597 HTYPE = constants.HTYPE_INSTANCE
12600 def ExpandNames(self):
12601 self.share_locks = _ShareAll()
12602 self.needed_locks = {
12603 locking.LEVEL_NODEGROUP: [],
12604 locking.LEVEL_NODE: [],
12607 self._ExpandAndLockInstance()
12609 if self.op.target_groups:
12610 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12611 self.op.target_groups)
12613 self.req_target_uuids = None
12615 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12617 def DeclareLocks(self, level):
12618 if level == locking.LEVEL_NODEGROUP:
12619 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12621 if self.req_target_uuids:
12622 lock_groups = set(self.req_target_uuids)
12624 # Lock all groups used by instance optimistically; this requires going
12625 # via the node before it's locked, requiring verification later on
12626 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12627 lock_groups.update(instance_groups)
12629 # No target groups, need to lock all of them
12630 lock_groups = locking.ALL_SET
12632 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12634 elif level == locking.LEVEL_NODE:
12635 if self.req_target_uuids:
12636 # Lock all nodes used by instances
12637 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12638 self._LockInstancesNodes()
12640 # Lock all nodes in all potential target groups
12641 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12642 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12643 member_nodes = [node_name
12644 for group in lock_groups
12645 for node_name in self.cfg.GetNodeGroup(group).members]
12646 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12648 # Lock all nodes as all groups are potential targets
12649 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12651 def CheckPrereq(self):
12652 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12653 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12654 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12656 assert (self.req_target_uuids is None or
12657 owned_groups.issuperset(self.req_target_uuids))
12658 assert owned_instances == set([self.op.instance_name])
12660 # Get instance information
12661 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12663 # Check if node groups for locked instance are still correct
12664 assert owned_nodes.issuperset(self.instance.all_nodes), \
12665 ("Instance %s's nodes changed while we kept the lock" %
12666 self.op.instance_name)
12668 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12671 if self.req_target_uuids:
12672 # User requested specific target groups
12673 self.target_uuids = self.req_target_uuids
12675 # All groups except those used by the instance are potential targets
12676 self.target_uuids = owned_groups - inst_groups
12678 conflicting_groups = self.target_uuids & inst_groups
12679 if conflicting_groups:
12680 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12681 " used by the instance '%s'" %
12682 (utils.CommaJoin(conflicting_groups),
12683 self.op.instance_name),
12684 errors.ECODE_INVAL)
12686 if not self.target_uuids:
12687 raise errors.OpPrereqError("There are no possible target groups",
12688 errors.ECODE_INVAL)
12690 def BuildHooksEnv(self):
12691 """Build hooks env.
12694 assert self.target_uuids
12697 "TARGET_GROUPS": " ".join(self.target_uuids),
12700 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12704 def BuildHooksNodes(self):
12705 """Build hooks nodes.
12708 mn = self.cfg.GetMasterNode()
12709 return ([mn], [mn])
12711 def Exec(self, feedback_fn):
12712 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12714 assert instances == [self.op.instance_name], "Instance not locked"
12716 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12717 instances=instances, target_groups=list(self.target_uuids))
12719 ial.Run(self.op.iallocator)
12721 if not ial.success:
12722 raise errors.OpPrereqError("Can't compute solution for changing group of"
12723 " instance '%s' using iallocator '%s': %s" %
12724 (self.op.instance_name, self.op.iallocator,
12726 errors.ECODE_NORES)
12728 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12730 self.LogInfo("Iallocator returned %s job(s) for changing group of"
12731 " instance '%s'", len(jobs), self.op.instance_name)
12733 return ResultWithJobs(jobs)
12736 class LUBackupQuery(NoHooksLU):
12737 """Query the exports list
12742 def ExpandNames(self):
12743 self.needed_locks = {}
12744 self.share_locks[locking.LEVEL_NODE] = 1
12745 if not self.op.nodes:
12746 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12748 self.needed_locks[locking.LEVEL_NODE] = \
12749 _GetWantedNodes(self, self.op.nodes)
12751 def Exec(self, feedback_fn):
12752 """Compute the list of all the exported system images.
12755 @return: a dictionary with the structure node->(export-list)
12756 where export-list is a list of the instances exported on
12760 self.nodes = self.owned_locks(locking.LEVEL_NODE)
12761 rpcresult = self.rpc.call_export_list(self.nodes)
12763 for node in rpcresult:
12764 if rpcresult[node].fail_msg:
12765 result[node] = False
12767 result[node] = rpcresult[node].payload
12772 class LUBackupPrepare(NoHooksLU):
12773 """Prepares an instance for an export and returns useful information.
12778 def ExpandNames(self):
12779 self._ExpandAndLockInstance()
12781 def CheckPrereq(self):
12782 """Check prerequisites.
12785 instance_name = self.op.instance_name
12787 self.instance = self.cfg.GetInstanceInfo(instance_name)
12788 assert self.instance is not None, \
12789 "Cannot retrieve locked instance %s" % self.op.instance_name
12790 _CheckNodeOnline(self, self.instance.primary_node)
12792 self._cds = _GetClusterDomainSecret()
12794 def Exec(self, feedback_fn):
12795 """Prepares an instance for an export.
12798 instance = self.instance
12800 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12801 salt = utils.GenerateSecret(8)
12803 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
12804 result = self.rpc.call_x509_cert_create(instance.primary_node,
12805 constants.RIE_CERT_VALIDITY)
12806 result.Raise("Can't create X509 key and certificate on %s" % result.node)
12808 (name, cert_pem) = result.payload
12810 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
12814 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
12815 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
12817 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
12823 class LUBackupExport(LogicalUnit):
12824 """Export an instance to an image in the cluster.
12827 HPATH = "instance-export"
12828 HTYPE = constants.HTYPE_INSTANCE
12831 def CheckArguments(self):
12832 """Check the arguments.
12835 self.x509_key_name = self.op.x509_key_name
12836 self.dest_x509_ca_pem = self.op.destination_x509_ca
12838 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12839 if not self.x509_key_name:
12840 raise errors.OpPrereqError("Missing X509 key name for encryption",
12841 errors.ECODE_INVAL)
12843 if not self.dest_x509_ca_pem:
12844 raise errors.OpPrereqError("Missing destination X509 CA",
12845 errors.ECODE_INVAL)
12847 def ExpandNames(self):
12848 self._ExpandAndLockInstance()
12850 # Lock all nodes for local exports
12851 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12852 # FIXME: lock only instance primary and destination node
12854 # Sad but true, for now we have do lock all nodes, as we don't know where
12855 # the previous export might be, and in this LU we search for it and
12856 # remove it from its current node. In the future we could fix this by:
12857 # - making a tasklet to search (share-lock all), then create the
12858 # new one, then one to remove, after
12859 # - removing the removal operation altogether
12860 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12862 def DeclareLocks(self, level):
12863 """Last minute lock declaration."""
12864 # All nodes are locked anyway, so nothing to do here.
12866 def BuildHooksEnv(self):
12867 """Build hooks env.
12869 This will run on the master, primary node and target node.
12873 "EXPORT_MODE": self.op.mode,
12874 "EXPORT_NODE": self.op.target_node,
12875 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12876 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12877 # TODO: Generic function for boolean env variables
12878 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12881 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12885 def BuildHooksNodes(self):
12886 """Build hooks nodes.
12889 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12891 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12892 nl.append(self.op.target_node)
12896 def CheckPrereq(self):
12897 """Check prerequisites.
12899 This checks that the instance and node names are valid.
12902 instance_name = self.op.instance_name
12904 self.instance = self.cfg.GetInstanceInfo(instance_name)
12905 assert self.instance is not None, \
12906 "Cannot retrieve locked instance %s" % self.op.instance_name
12907 _CheckNodeOnline(self, self.instance.primary_node)
12909 if (self.op.remove_instance and
12910 self.instance.admin_state == constants.ADMINST_UP and
12911 not self.op.shutdown):
12912 raise errors.OpPrereqError("Can not remove instance without shutting it"
12915 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12916 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12917 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12918 assert self.dst_node is not None
12920 _CheckNodeOnline(self, self.dst_node.name)
12921 _CheckNodeNotDrained(self, self.dst_node.name)
12924 self.dest_disk_info = None
12925 self.dest_x509_ca = None
12927 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12928 self.dst_node = None
12930 if len(self.op.target_node) != len(self.instance.disks):
12931 raise errors.OpPrereqError(("Received destination information for %s"
12932 " disks, but instance %s has %s disks") %
12933 (len(self.op.target_node), instance_name,
12934 len(self.instance.disks)),
12935 errors.ECODE_INVAL)
12937 cds = _GetClusterDomainSecret()
12939 # Check X509 key name
12941 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12942 except (TypeError, ValueError), err:
12943 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12945 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12946 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12947 errors.ECODE_INVAL)
12949 # Load and verify CA
12951 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
12952 except OpenSSL.crypto.Error, err:
12953 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
12954 (err, ), errors.ECODE_INVAL)
12956 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
12957 if errcode is not None:
12958 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
12959 (msg, ), errors.ECODE_INVAL)
12961 self.dest_x509_ca = cert
12963 # Verify target information
12965 for idx, disk_data in enumerate(self.op.target_node):
12967 (host, port, magic) = \
12968 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
12969 except errors.GenericError, err:
12970 raise errors.OpPrereqError("Target info for disk %s: %s" %
12971 (idx, err), errors.ECODE_INVAL)
12973 disk_info.append((host, port, magic))
12975 assert len(disk_info) == len(self.op.target_node)
12976 self.dest_disk_info = disk_info
12979 raise errors.ProgrammerError("Unhandled export mode %r" %
12982 # instance disk type verification
12983 # TODO: Implement export support for file-based disks
12984 for disk in self.instance.disks:
12985 if disk.dev_type == constants.LD_FILE:
12986 raise errors.OpPrereqError("Export not supported for instances with"
12987 " file-based disks", errors.ECODE_INVAL)
12989 def _CleanupExports(self, feedback_fn):
12990 """Removes exports of current instance from all other nodes.
12992 If an instance in a cluster with nodes A..D was exported to node C, its
12993 exports will be removed from the nodes A, B and D.
12996 assert self.op.mode != constants.EXPORT_MODE_REMOTE
12998 nodelist = self.cfg.GetNodeList()
12999 nodelist.remove(self.dst_node.name)
13001 # on one-node clusters nodelist will be empty after the removal
13002 # if we proceed the backup would be removed because OpBackupQuery
13003 # substitutes an empty list with the full cluster node list.
13004 iname = self.instance.name
13006 feedback_fn("Removing old exports for instance %s" % iname)
13007 exportlist = self.rpc.call_export_list(nodelist)
13008 for node in exportlist:
13009 if exportlist[node].fail_msg:
13011 if iname in exportlist[node].payload:
13012 msg = self.rpc.call_export_remove(node, iname).fail_msg
13014 self.LogWarning("Could not remove older export for instance %s"
13015 " on node %s: %s", iname, node, msg)
13017 def Exec(self, feedback_fn):
13018 """Export an instance to an image in the cluster.
13021 assert self.op.mode in constants.EXPORT_MODES
13023 instance = self.instance
13024 src_node = instance.primary_node
13026 if self.op.shutdown:
13027 # shutdown the instance, but not the disks
13028 feedback_fn("Shutting down instance %s" % instance.name)
13029 result = self.rpc.call_instance_shutdown(src_node, instance,
13030 self.op.shutdown_timeout)
13031 # TODO: Maybe ignore failures if ignore_remove_failures is set
13032 result.Raise("Could not shutdown instance %s on"
13033 " node %s" % (instance.name, src_node))
13035 # set the disks ID correctly since call_instance_start needs the
13036 # correct drbd minor to create the symlinks
13037 for disk in instance.disks:
13038 self.cfg.SetDiskID(disk, src_node)
13040 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13043 # Activate the instance disks if we'exporting a stopped instance
13044 feedback_fn("Activating disks for %s" % instance.name)
13045 _StartInstanceDisks(self, instance, None)
13048 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13051 helper.CreateSnapshots()
13053 if (self.op.shutdown and
13054 instance.admin_state == constants.ADMINST_UP and
13055 not self.op.remove_instance):
13056 assert not activate_disks
13057 feedback_fn("Starting instance %s" % instance.name)
13058 result = self.rpc.call_instance_start(src_node,
13059 (instance, None, None), False)
13060 msg = result.fail_msg
13062 feedback_fn("Failed to start instance: %s" % msg)
13063 _ShutdownInstanceDisks(self, instance)
13064 raise errors.OpExecError("Could not start instance: %s" % msg)
13066 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13067 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13068 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13069 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13070 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13072 (key_name, _, _) = self.x509_key_name
13075 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13078 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13079 key_name, dest_ca_pem,
13084 # Check for backwards compatibility
13085 assert len(dresults) == len(instance.disks)
13086 assert compat.all(isinstance(i, bool) for i in dresults), \
13087 "Not all results are boolean: %r" % dresults
13091 feedback_fn("Deactivating disks for %s" % instance.name)
13092 _ShutdownInstanceDisks(self, instance)
13094 if not (compat.all(dresults) and fin_resu):
13097 failures.append("export finalization")
13098 if not compat.all(dresults):
13099 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13101 failures.append("disk export: disk(s) %s" % fdsk)
13103 raise errors.OpExecError("Export failed, errors in %s" %
13104 utils.CommaJoin(failures))
13106 # At this point, the export was successful, we can cleanup/finish
13108 # Remove instance if requested
13109 if self.op.remove_instance:
13110 feedback_fn("Removing instance %s" % instance.name)
13111 _RemoveInstance(self, feedback_fn, instance,
13112 self.op.ignore_remove_failures)
13114 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13115 self._CleanupExports(feedback_fn)
13117 return fin_resu, dresults
13120 class LUBackupRemove(NoHooksLU):
13121 """Remove exports related to the named instance.
13126 def ExpandNames(self):
13127 self.needed_locks = {}
13128 # We need all nodes to be locked in order for RemoveExport to work, but we
13129 # don't need to lock the instance itself, as nothing will happen to it (and
13130 # we can remove exports also for a removed instance)
13131 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13133 def Exec(self, feedback_fn):
13134 """Remove any export.
13137 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13138 # If the instance was not found we'll try with the name that was passed in.
13139 # This will only work if it was an FQDN, though.
13141 if not instance_name:
13143 instance_name = self.op.instance_name
13145 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13146 exportlist = self.rpc.call_export_list(locked_nodes)
13148 for node in exportlist:
13149 msg = exportlist[node].fail_msg
13151 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13153 if instance_name in exportlist[node].payload:
13155 result = self.rpc.call_export_remove(node, instance_name)
13156 msg = result.fail_msg
13158 logging.error("Could not remove export for instance %s"
13159 " on node %s: %s", instance_name, node, msg)
13161 if fqdn_warn and not found:
13162 feedback_fn("Export not found. If trying to remove an export belonging"
13163 " to a deleted instance please use its Fully Qualified"
13167 class LUGroupAdd(LogicalUnit):
13168 """Logical unit for creating node groups.
13171 HPATH = "group-add"
13172 HTYPE = constants.HTYPE_GROUP
13175 def ExpandNames(self):
13176 # We need the new group's UUID here so that we can create and acquire the
13177 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13178 # that it should not check whether the UUID exists in the configuration.
13179 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13180 self.needed_locks = {}
13181 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13183 def CheckPrereq(self):
13184 """Check prerequisites.
13186 This checks that the given group name is not an existing node group
13191 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13192 except errors.OpPrereqError:
13195 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13196 " node group (UUID: %s)" %
13197 (self.op.group_name, existing_uuid),
13198 errors.ECODE_EXISTS)
13200 if self.op.ndparams:
13201 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13203 if self.op.hv_state:
13204 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13206 self.new_hv_state = None
13208 if self.op.disk_state:
13209 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13211 self.new_disk_state = None
13213 if self.op.diskparams:
13214 for templ in constants.DISK_TEMPLATES:
13215 if templ not in self.op.diskparams:
13216 self.op.diskparams[templ] = {}
13217 utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
13219 self.op.diskparams = self.cfg.GetClusterInfo().diskparams
13221 if self.op.ipolicy:
13222 cluster = self.cfg.GetClusterInfo()
13223 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13225 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
13226 except errors.ConfigurationError, err:
13227 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13228 errors.ECODE_INVAL)
13230 def BuildHooksEnv(self):
13231 """Build hooks env.
13235 "GROUP_NAME": self.op.group_name,
13238 def BuildHooksNodes(self):
13239 """Build hooks nodes.
13242 mn = self.cfg.GetMasterNode()
13243 return ([mn], [mn])
13245 def Exec(self, feedback_fn):
13246 """Add the node group to the cluster.
13249 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13250 uuid=self.group_uuid,
13251 alloc_policy=self.op.alloc_policy,
13252 ndparams=self.op.ndparams,
13253 diskparams=self.op.diskparams,
13254 ipolicy=self.op.ipolicy,
13255 hv_state_static=self.new_hv_state,
13256 disk_state_static=self.new_disk_state)
13258 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13259 del self.remove_locks[locking.LEVEL_NODEGROUP]
13262 class LUGroupAssignNodes(NoHooksLU):
13263 """Logical unit for assigning nodes to groups.
13268 def ExpandNames(self):
13269 # These raise errors.OpPrereqError on their own:
13270 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13271 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13273 # We want to lock all the affected nodes and groups. We have readily
13274 # available the list of nodes, and the *destination* group. To gather the
13275 # list of "source" groups, we need to fetch node information later on.
13276 self.needed_locks = {
13277 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13278 locking.LEVEL_NODE: self.op.nodes,
13281 def DeclareLocks(self, level):
13282 if level == locking.LEVEL_NODEGROUP:
13283 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13285 # Try to get all affected nodes' groups without having the group or node
13286 # lock yet. Needs verification later in the code flow.
13287 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13289 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13291 def CheckPrereq(self):
13292 """Check prerequisites.
13295 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13296 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13297 frozenset(self.op.nodes))
13299 expected_locks = (set([self.group_uuid]) |
13300 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13301 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13302 if actual_locks != expected_locks:
13303 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13304 " current groups are '%s', used to be '%s'" %
13305 (utils.CommaJoin(expected_locks),
13306 utils.CommaJoin(actual_locks)))
13308 self.node_data = self.cfg.GetAllNodesInfo()
13309 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13310 instance_data = self.cfg.GetAllInstancesInfo()
13312 if self.group is None:
13313 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13314 (self.op.group_name, self.group_uuid))
13316 (new_splits, previous_splits) = \
13317 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13318 for node in self.op.nodes],
13319 self.node_data, instance_data)
13322 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13324 if not self.op.force:
13325 raise errors.OpExecError("The following instances get split by this"
13326 " change and --force was not given: %s" %
13329 self.LogWarning("This operation will split the following instances: %s",
13332 if previous_splits:
13333 self.LogWarning("In addition, these already-split instances continue"
13334 " to be split across groups: %s",
13335 utils.CommaJoin(utils.NiceSort(previous_splits)))
13337 def Exec(self, feedback_fn):
13338 """Assign nodes to a new group.
13341 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13343 self.cfg.AssignGroupNodes(mods)
13346 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13347 """Check for split instances after a node assignment.
13349 This method considers a series of node assignments as an atomic operation,
13350 and returns information about split instances after applying the set of
13353 In particular, it returns information about newly split instances, and
13354 instances that were already split, and remain so after the change.
13356 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13359 @type changes: list of (node_name, new_group_uuid) pairs.
13360 @param changes: list of node assignments to consider.
13361 @param node_data: a dict with data for all nodes
13362 @param instance_data: a dict with all instances to consider
13363 @rtype: a two-tuple
13364 @return: a list of instances that were previously okay and result split as a
13365 consequence of this change, and a list of instances that were previously
13366 split and this change does not fix.
13369 changed_nodes = dict((node, group) for node, group in changes
13370 if node_data[node].group != group)
13372 all_split_instances = set()
13373 previously_split_instances = set()
13375 def InstanceNodes(instance):
13376 return [instance.primary_node] + list(instance.secondary_nodes)
13378 for inst in instance_data.values():
13379 if inst.disk_template not in constants.DTS_INT_MIRROR:
13382 instance_nodes = InstanceNodes(inst)
13384 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13385 previously_split_instances.add(inst.name)
13387 if len(set(changed_nodes.get(node, node_data[node].group)
13388 for node in instance_nodes)) > 1:
13389 all_split_instances.add(inst.name)
13391 return (list(all_split_instances - previously_split_instances),
13392 list(previously_split_instances & all_split_instances))
13395 class _GroupQuery(_QueryBase):
13396 FIELDS = query.GROUP_FIELDS
13398 def ExpandNames(self, lu):
13399 lu.needed_locks = {}
13401 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13402 self._cluster = lu.cfg.GetClusterInfo()
13403 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13406 self.wanted = [name_to_uuid[name]
13407 for name in utils.NiceSort(name_to_uuid.keys())]
13409 # Accept names to be either names or UUIDs.
13412 all_uuid = frozenset(self._all_groups.keys())
13414 for name in self.names:
13415 if name in all_uuid:
13416 self.wanted.append(name)
13417 elif name in name_to_uuid:
13418 self.wanted.append(name_to_uuid[name])
13420 missing.append(name)
13423 raise errors.OpPrereqError("Some groups do not exist: %s" %
13424 utils.CommaJoin(missing),
13425 errors.ECODE_NOENT)
13427 def DeclareLocks(self, lu, level):
13430 def _GetQueryData(self, lu):
13431 """Computes the list of node groups and their attributes.
13434 do_nodes = query.GQ_NODE in self.requested_data
13435 do_instances = query.GQ_INST in self.requested_data
13437 group_to_nodes = None
13438 group_to_instances = None
13440 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13441 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13442 # latter GetAllInstancesInfo() is not enough, for we have to go through
13443 # instance->node. Hence, we will need to process nodes even if we only need
13444 # instance information.
13445 if do_nodes or do_instances:
13446 all_nodes = lu.cfg.GetAllNodesInfo()
13447 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13450 for node in all_nodes.values():
13451 if node.group in group_to_nodes:
13452 group_to_nodes[node.group].append(node.name)
13453 node_to_group[node.name] = node.group
13456 all_instances = lu.cfg.GetAllInstancesInfo()
13457 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13459 for instance in all_instances.values():
13460 node = instance.primary_node
13461 if node in node_to_group:
13462 group_to_instances[node_to_group[node]].append(instance.name)
13465 # Do not pass on node information if it was not requested.
13466 group_to_nodes = None
13468 return query.GroupQueryData(self._cluster,
13469 [self._all_groups[uuid]
13470 for uuid in self.wanted],
13471 group_to_nodes, group_to_instances)
13474 class LUGroupQuery(NoHooksLU):
13475 """Logical unit for querying node groups.
13480 def CheckArguments(self):
13481 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13482 self.op.output_fields, False)
13484 def ExpandNames(self):
13485 self.gq.ExpandNames(self)
13487 def DeclareLocks(self, level):
13488 self.gq.DeclareLocks(self, level)
13490 def Exec(self, feedback_fn):
13491 return self.gq.OldStyleQuery(self)
13494 class LUGroupSetParams(LogicalUnit):
13495 """Modifies the parameters of a node group.
13498 HPATH = "group-modify"
13499 HTYPE = constants.HTYPE_GROUP
13502 def CheckArguments(self):
13505 self.op.diskparams,
13506 self.op.alloc_policy,
13508 self.op.disk_state,
13512 if all_changes.count(None) == len(all_changes):
13513 raise errors.OpPrereqError("Please pass at least one modification",
13514 errors.ECODE_INVAL)
13516 def ExpandNames(self):
13517 # This raises errors.OpPrereqError on its own:
13518 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13520 self.needed_locks = {
13521 locking.LEVEL_INSTANCE: [],
13522 locking.LEVEL_NODEGROUP: [self.group_uuid],
13525 self.share_locks[locking.LEVEL_INSTANCE] = 1
13527 def DeclareLocks(self, level):
13528 if level == locking.LEVEL_INSTANCE:
13529 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13531 # Lock instances optimistically, needs verification once group lock has
13533 self.needed_locks[locking.LEVEL_INSTANCE] = \
13534 self.cfg.GetNodeGroupInstances(self.group_uuid)
13536 def CheckPrereq(self):
13537 """Check prerequisites.
13540 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13542 # Check if locked instances are still correct
13543 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13545 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13546 cluster = self.cfg.GetClusterInfo()
13548 if self.group is None:
13549 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13550 (self.op.group_name, self.group_uuid))
13552 if self.op.ndparams:
13553 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13554 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13555 self.new_ndparams = new_ndparams
13557 if self.op.diskparams:
13558 self.new_diskparams = dict()
13559 for templ in constants.DISK_TEMPLATES:
13560 if templ not in self.op.diskparams:
13561 self.op.diskparams[templ] = {}
13562 new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13563 self.op.diskparams[templ])
13564 utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13565 self.new_diskparams[templ] = new_templ_params
13567 if self.op.hv_state:
13568 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13569 self.group.hv_state_static)
13571 if self.op.disk_state:
13572 self.new_disk_state = \
13573 _MergeAndVerifyDiskState(self.op.disk_state,
13574 self.group.disk_state_static)
13576 if self.op.ipolicy:
13577 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
13581 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
13582 inst_filter = lambda inst: inst.name in owned_instances
13583 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
13585 _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
13587 new_ipolicy, instances)
13590 self.LogWarning("After the ipolicy change the following instances"
13591 " violate them: %s",
13592 utils.CommaJoin(violations))
13594 def BuildHooksEnv(self):
13595 """Build hooks env.
13599 "GROUP_NAME": self.op.group_name,
13600 "NEW_ALLOC_POLICY": self.op.alloc_policy,
13603 def BuildHooksNodes(self):
13604 """Build hooks nodes.
13607 mn = self.cfg.GetMasterNode()
13608 return ([mn], [mn])
13610 def Exec(self, feedback_fn):
13611 """Modifies the node group.
13616 if self.op.ndparams:
13617 self.group.ndparams = self.new_ndparams
13618 result.append(("ndparams", str(self.group.ndparams)))
13620 if self.op.diskparams:
13621 self.group.diskparams = self.new_diskparams
13622 result.append(("diskparams", str(self.group.diskparams)))
13624 if self.op.alloc_policy:
13625 self.group.alloc_policy = self.op.alloc_policy
13627 if self.op.hv_state:
13628 self.group.hv_state_static = self.new_hv_state
13630 if self.op.disk_state:
13631 self.group.disk_state_static = self.new_disk_state
13633 if self.op.ipolicy:
13634 self.group.ipolicy = self.new_ipolicy
13636 self.cfg.Update(self.group, feedback_fn)
13640 class LUGroupRemove(LogicalUnit):
13641 HPATH = "group-remove"
13642 HTYPE = constants.HTYPE_GROUP
13645 def ExpandNames(self):
13646 # This will raises errors.OpPrereqError on its own:
13647 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13648 self.needed_locks = {
13649 locking.LEVEL_NODEGROUP: [self.group_uuid],
13652 def CheckPrereq(self):
13653 """Check prerequisites.
13655 This checks that the given group name exists as a node group, that is
13656 empty (i.e., contains no nodes), and that is not the last group of the
13660 # Verify that the group is empty.
13661 group_nodes = [node.name
13662 for node in self.cfg.GetAllNodesInfo().values()
13663 if node.group == self.group_uuid]
13666 raise errors.OpPrereqError("Group '%s' not empty, has the following"
13668 (self.op.group_name,
13669 utils.CommaJoin(utils.NiceSort(group_nodes))),
13670 errors.ECODE_STATE)
13672 # Verify the cluster would not be left group-less.
13673 if len(self.cfg.GetNodeGroupList()) == 1:
13674 raise errors.OpPrereqError("Group '%s' is the only group,"
13675 " cannot be removed" %
13676 self.op.group_name,
13677 errors.ECODE_STATE)
13679 def BuildHooksEnv(self):
13680 """Build hooks env.
13684 "GROUP_NAME": self.op.group_name,
13687 def BuildHooksNodes(self):
13688 """Build hooks nodes.
13691 mn = self.cfg.GetMasterNode()
13692 return ([mn], [mn])
13694 def Exec(self, feedback_fn):
13695 """Remove the node group.
13699 self.cfg.RemoveNodeGroup(self.group_uuid)
13700 except errors.ConfigurationError:
13701 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13702 (self.op.group_name, self.group_uuid))
13704 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13707 class LUGroupRename(LogicalUnit):
13708 HPATH = "group-rename"
13709 HTYPE = constants.HTYPE_GROUP
13712 def ExpandNames(self):
13713 # This raises errors.OpPrereqError on its own:
13714 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13716 self.needed_locks = {
13717 locking.LEVEL_NODEGROUP: [self.group_uuid],
13720 def CheckPrereq(self):
13721 """Check prerequisites.
13723 Ensures requested new name is not yet used.
13727 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13728 except errors.OpPrereqError:
13731 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13732 " node group (UUID: %s)" %
13733 (self.op.new_name, new_name_uuid),
13734 errors.ECODE_EXISTS)
13736 def BuildHooksEnv(self):
13737 """Build hooks env.
13741 "OLD_NAME": self.op.group_name,
13742 "NEW_NAME": self.op.new_name,
13745 def BuildHooksNodes(self):
13746 """Build hooks nodes.
13749 mn = self.cfg.GetMasterNode()
13751 all_nodes = self.cfg.GetAllNodesInfo()
13752 all_nodes.pop(mn, None)
13755 run_nodes.extend(node.name for node in all_nodes.values()
13756 if node.group == self.group_uuid)
13758 return (run_nodes, run_nodes)
13760 def Exec(self, feedback_fn):
13761 """Rename the node group.
13764 group = self.cfg.GetNodeGroup(self.group_uuid)
13767 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13768 (self.op.group_name, self.group_uuid))
13770 group.name = self.op.new_name
13771 self.cfg.Update(group, feedback_fn)
13773 return self.op.new_name
13776 class LUGroupEvacuate(LogicalUnit):
13777 HPATH = "group-evacuate"
13778 HTYPE = constants.HTYPE_GROUP
13781 def ExpandNames(self):
13782 # This raises errors.OpPrereqError on its own:
13783 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13785 if self.op.target_groups:
13786 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13787 self.op.target_groups)
13789 self.req_target_uuids = []
13791 if self.group_uuid in self.req_target_uuids:
13792 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
13793 " as a target group (targets are %s)" %
13795 utils.CommaJoin(self.req_target_uuids)),
13796 errors.ECODE_INVAL)
13798 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13800 self.share_locks = _ShareAll()
13801 self.needed_locks = {
13802 locking.LEVEL_INSTANCE: [],
13803 locking.LEVEL_NODEGROUP: [],
13804 locking.LEVEL_NODE: [],
13807 def DeclareLocks(self, level):
13808 if level == locking.LEVEL_INSTANCE:
13809 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13811 # Lock instances optimistically, needs verification once node and group
13812 # locks have been acquired
13813 self.needed_locks[locking.LEVEL_INSTANCE] = \
13814 self.cfg.GetNodeGroupInstances(self.group_uuid)
13816 elif level == locking.LEVEL_NODEGROUP:
13817 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13819 if self.req_target_uuids:
13820 lock_groups = set([self.group_uuid] + self.req_target_uuids)
13822 # Lock all groups used by instances optimistically; this requires going
13823 # via the node before it's locked, requiring verification later on
13824 lock_groups.update(group_uuid
13825 for instance_name in
13826 self.owned_locks(locking.LEVEL_INSTANCE)
13828 self.cfg.GetInstanceNodeGroups(instance_name))
13830 # No target groups, need to lock all of them
13831 lock_groups = locking.ALL_SET
13833 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13835 elif level == locking.LEVEL_NODE:
13836 # This will only lock the nodes in the group to be evacuated which
13837 # contain actual instances
13838 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13839 self._LockInstancesNodes()
13841 # Lock all nodes in group to be evacuated and target groups
13842 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13843 assert self.group_uuid in owned_groups
13844 member_nodes = [node_name
13845 for group in owned_groups
13846 for node_name in self.cfg.GetNodeGroup(group).members]
13847 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13849 def CheckPrereq(self):
13850 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13851 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13852 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13854 assert owned_groups.issuperset(self.req_target_uuids)
13855 assert self.group_uuid in owned_groups
13857 # Check if locked instances are still correct
13858 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13860 # Get instance information
13861 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
13863 # Check if node groups for locked instances are still correct
13864 for instance_name in owned_instances:
13865 inst = self.instances[instance_name]
13866 assert owned_nodes.issuperset(inst.all_nodes), \
13867 "Instance %s's nodes changed while we kept the lock" % instance_name
13869 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
13872 assert self.group_uuid in inst_groups, \
13873 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
13875 if self.req_target_uuids:
13876 # User requested specific target groups
13877 self.target_uuids = self.req_target_uuids
13879 # All groups except the one to be evacuated are potential targets
13880 self.target_uuids = [group_uuid for group_uuid in owned_groups
13881 if group_uuid != self.group_uuid]
13883 if not self.target_uuids:
13884 raise errors.OpPrereqError("There are no possible target groups",
13885 errors.ECODE_INVAL)
13887 def BuildHooksEnv(self):
13888 """Build hooks env.
13892 "GROUP_NAME": self.op.group_name,
13893 "TARGET_GROUPS": " ".join(self.target_uuids),
13896 def BuildHooksNodes(self):
13897 """Build hooks nodes.
13900 mn = self.cfg.GetMasterNode()
13902 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
13904 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
13906 return (run_nodes, run_nodes)
13908 def Exec(self, feedback_fn):
13909 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13911 assert self.group_uuid not in self.target_uuids
13913 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13914 instances=instances, target_groups=self.target_uuids)
13916 ial.Run(self.op.iallocator)
13918 if not ial.success:
13919 raise errors.OpPrereqError("Can't compute group evacuation using"
13920 " iallocator '%s': %s" %
13921 (self.op.iallocator, ial.info),
13922 errors.ECODE_NORES)
13924 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13926 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
13927 len(jobs), self.op.group_name)
13929 return ResultWithJobs(jobs)
13932 class TagsLU(NoHooksLU): # pylint: disable=W0223
13933 """Generic tags LU.
13935 This is an abstract class which is the parent of all the other tags LUs.
13938 def ExpandNames(self):
13939 self.group_uuid = None
13940 self.needed_locks = {}
13941 if self.op.kind == constants.TAG_NODE:
13942 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
13943 self.needed_locks[locking.LEVEL_NODE] = self.op.name
13944 elif self.op.kind == constants.TAG_INSTANCE:
13945 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
13946 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
13947 elif self.op.kind == constants.TAG_NODEGROUP:
13948 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
13950 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
13951 # not possible to acquire the BGL based on opcode parameters)
13953 def CheckPrereq(self):
13954 """Check prerequisites.
13957 if self.op.kind == constants.TAG_CLUSTER:
13958 self.target = self.cfg.GetClusterInfo()
13959 elif self.op.kind == constants.TAG_NODE:
13960 self.target = self.cfg.GetNodeInfo(self.op.name)
13961 elif self.op.kind == constants.TAG_INSTANCE:
13962 self.target = self.cfg.GetInstanceInfo(self.op.name)
13963 elif self.op.kind == constants.TAG_NODEGROUP:
13964 self.target = self.cfg.GetNodeGroup(self.group_uuid)
13966 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
13967 str(self.op.kind), errors.ECODE_INVAL)
13970 class LUTagsGet(TagsLU):
13971 """Returns the tags of a given object.
13976 def ExpandNames(self):
13977 TagsLU.ExpandNames(self)
13979 # Share locks as this is only a read operation
13980 self.share_locks = _ShareAll()
13982 def Exec(self, feedback_fn):
13983 """Returns the tag list.
13986 return list(self.target.GetTags())
13989 class LUTagsSearch(NoHooksLU):
13990 """Searches the tags for a given pattern.
13995 def ExpandNames(self):
13996 self.needed_locks = {}
13998 def CheckPrereq(self):
13999 """Check prerequisites.
14001 This checks the pattern passed for validity by compiling it.
14005 self.re = re.compile(self.op.pattern)
14006 except re.error, err:
14007 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14008 (self.op.pattern, err), errors.ECODE_INVAL)
14010 def Exec(self, feedback_fn):
14011 """Returns the tag list.
14015 tgts = [("/cluster", cfg.GetClusterInfo())]
14016 ilist = cfg.GetAllInstancesInfo().values()
14017 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14018 nlist = cfg.GetAllNodesInfo().values()
14019 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14020 tgts.extend(("/nodegroup/%s" % n.name, n)
14021 for n in cfg.GetAllNodeGroupsInfo().values())
14023 for path, target in tgts:
14024 for tag in target.GetTags():
14025 if self.re.search(tag):
14026 results.append((path, tag))
14030 class LUTagsSet(TagsLU):
14031 """Sets a tag on a given object.
14036 def CheckPrereq(self):
14037 """Check prerequisites.
14039 This checks the type and length of the tag name and value.
14042 TagsLU.CheckPrereq(self)
14043 for tag in self.op.tags:
14044 objects.TaggableObject.ValidateTag(tag)
14046 def Exec(self, feedback_fn):
14051 for tag in self.op.tags:
14052 self.target.AddTag(tag)
14053 except errors.TagError, err:
14054 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14055 self.cfg.Update(self.target, feedback_fn)
14058 class LUTagsDel(TagsLU):
14059 """Delete a list of tags from a given object.
14064 def CheckPrereq(self):
14065 """Check prerequisites.
14067 This checks that we have the given tag.
14070 TagsLU.CheckPrereq(self)
14071 for tag in self.op.tags:
14072 objects.TaggableObject.ValidateTag(tag)
14073 del_tags = frozenset(self.op.tags)
14074 cur_tags = self.target.GetTags()
14076 diff_tags = del_tags - cur_tags
14078 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14079 raise errors.OpPrereqError("Tag(s) %s not found" %
14080 (utils.CommaJoin(diff_names), ),
14081 errors.ECODE_NOENT)
14083 def Exec(self, feedback_fn):
14084 """Remove the tag from the object.
14087 for tag in self.op.tags:
14088 self.target.RemoveTag(tag)
14089 self.cfg.Update(self.target, feedback_fn)
14092 class LUTestDelay(NoHooksLU):
14093 """Sleep for a specified amount of time.
14095 This LU sleeps on the master and/or nodes for a specified amount of
14101 def ExpandNames(self):
14102 """Expand names and set required locks.
14104 This expands the node list, if any.
14107 self.needed_locks = {}
14108 if self.op.on_nodes:
14109 # _GetWantedNodes can be used here, but is not always appropriate to use
14110 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14111 # more information.
14112 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14113 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14115 def _TestDelay(self):
14116 """Do the actual sleep.
14119 if self.op.on_master:
14120 if not utils.TestDelay(self.op.duration):
14121 raise errors.OpExecError("Error during master delay test")
14122 if self.op.on_nodes:
14123 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14124 for node, node_result in result.items():
14125 node_result.Raise("Failure during rpc call to node %s" % node)
14127 def Exec(self, feedback_fn):
14128 """Execute the test delay opcode, with the wanted repetitions.
14131 if self.op.repeat == 0:
14134 top_value = self.op.repeat - 1
14135 for i in range(self.op.repeat):
14136 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14140 class LUTestJqueue(NoHooksLU):
14141 """Utility LU to test some aspects of the job queue.
14146 # Must be lower than default timeout for WaitForJobChange to see whether it
14147 # notices changed jobs
14148 _CLIENT_CONNECT_TIMEOUT = 20.0
14149 _CLIENT_CONFIRM_TIMEOUT = 60.0
14152 def _NotifyUsingSocket(cls, cb, errcls):
14153 """Opens a Unix socket and waits for another program to connect.
14156 @param cb: Callback to send socket name to client
14157 @type errcls: class
14158 @param errcls: Exception class to use for errors
14161 # Using a temporary directory as there's no easy way to create temporary
14162 # sockets without writing a custom loop around tempfile.mktemp and
14164 tmpdir = tempfile.mkdtemp()
14166 tmpsock = utils.PathJoin(tmpdir, "sock")
14168 logging.debug("Creating temporary socket at %s", tmpsock)
14169 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14174 # Send details to client
14177 # Wait for client to connect before continuing
14178 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14180 (conn, _) = sock.accept()
14181 except socket.error, err:
14182 raise errcls("Client didn't connect in time (%s)" % err)
14186 # Remove as soon as client is connected
14187 shutil.rmtree(tmpdir)
14189 # Wait for client to close
14192 # pylint: disable=E1101
14193 # Instance of '_socketobject' has no ... member
14194 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14196 except socket.error, err:
14197 raise errcls("Client failed to confirm notification (%s)" % err)
14201 def _SendNotification(self, test, arg, sockname):
14202 """Sends a notification to the client.
14205 @param test: Test name
14206 @param arg: Test argument (depends on test)
14207 @type sockname: string
14208 @param sockname: Socket path
14211 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14213 def _Notify(self, prereq, test, arg):
14214 """Notifies the client of a test.
14217 @param prereq: Whether this is a prereq-phase test
14219 @param test: Test name
14220 @param arg: Test argument (depends on test)
14224 errcls = errors.OpPrereqError
14226 errcls = errors.OpExecError
14228 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14232 def CheckArguments(self):
14233 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14234 self.expandnames_calls = 0
14236 def ExpandNames(self):
14237 checkargs_calls = getattr(self, "checkargs_calls", 0)
14238 if checkargs_calls < 1:
14239 raise errors.ProgrammerError("CheckArguments was not called")
14241 self.expandnames_calls += 1
14243 if self.op.notify_waitlock:
14244 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14246 self.LogInfo("Expanding names")
14248 # Get lock on master node (just to get a lock, not for a particular reason)
14249 self.needed_locks = {
14250 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14253 def Exec(self, feedback_fn):
14254 if self.expandnames_calls < 1:
14255 raise errors.ProgrammerError("ExpandNames was not called")
14257 if self.op.notify_exec:
14258 self._Notify(False, constants.JQT_EXEC, None)
14260 self.LogInfo("Executing")
14262 if self.op.log_messages:
14263 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14264 for idx, msg in enumerate(self.op.log_messages):
14265 self.LogInfo("Sending log message %s", idx + 1)
14266 feedback_fn(constants.JQT_MSGPREFIX + msg)
14267 # Report how many test messages have been sent
14268 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14271 raise errors.OpExecError("Opcode failure was requested")
14276 class IAllocator(object):
14277 """IAllocator framework.
14279 An IAllocator instance has three sets of attributes:
14280 - cfg that is needed to query the cluster
14281 - input data (all members of the _KEYS class attribute are required)
14282 - four buffer attributes (in|out_data|text), that represent the
14283 input (to the external script) in text and data structure format,
14284 and the output from it, again in two formats
14285 - the result variables from the script (success, info, nodes) for
14289 # pylint: disable=R0902
14290 # lots of instance attributes
14292 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14294 self.rpc = rpc_runner
14295 # init buffer variables
14296 self.in_text = self.out_text = self.in_data = self.out_data = None
14297 # init all input fields so that pylint is happy
14299 self.memory = self.disks = self.disk_template = None
14300 self.os = self.tags = self.nics = self.vcpus = None
14301 self.hypervisor = None
14302 self.relocate_from = None
14304 self.instances = None
14305 self.evac_mode = None
14306 self.target_groups = []
14308 self.required_nodes = None
14309 # init result fields
14310 self.success = self.info = self.result = None
14313 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14315 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14316 " IAllocator" % self.mode)
14318 keyset = [n for (n, _) in keydata]
14321 if key not in keyset:
14322 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14323 " IAllocator" % key)
14324 setattr(self, key, kwargs[key])
14327 if key not in kwargs:
14328 raise errors.ProgrammerError("Missing input parameter '%s' to"
14329 " IAllocator" % key)
14330 self._BuildInputData(compat.partial(fn, self), keydata)
14332 def _ComputeClusterData(self):
14333 """Compute the generic allocator input data.
14335 This is the data that is independent of the actual operation.
14339 cluster_info = cfg.GetClusterInfo()
14342 "version": constants.IALLOCATOR_VERSION,
14343 "cluster_name": cfg.GetClusterName(),
14344 "cluster_tags": list(cluster_info.GetTags()),
14345 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14346 "ipolicy": cluster_info.ipolicy,
14348 ninfo = cfg.GetAllNodesInfo()
14349 iinfo = cfg.GetAllInstancesInfo().values()
14350 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14353 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14355 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14356 hypervisor_name = self.hypervisor
14357 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14358 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14360 hypervisor_name = cluster_info.primary_hypervisor
14362 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14365 self.rpc.call_all_instances_info(node_list,
14366 cluster_info.enabled_hypervisors)
14368 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14370 config_ndata = self._ComputeBasicNodeData(ninfo)
14371 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14372 i_list, config_ndata)
14373 assert len(data["nodes"]) == len(ninfo), \
14374 "Incomplete node data computed"
14376 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14378 self.in_data = data
14381 def _ComputeNodeGroupData(cfg):
14382 """Compute node groups data.
14385 cluster = cfg.GetClusterInfo()
14386 ng = dict((guuid, {
14387 "name": gdata.name,
14388 "alloc_policy": gdata.alloc_policy,
14389 "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14391 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14396 def _ComputeBasicNodeData(node_cfg):
14397 """Compute global node data.
14400 @returns: a dict of name: (node dict, node config)
14403 # fill in static (config-based) values
14404 node_results = dict((ninfo.name, {
14405 "tags": list(ninfo.GetTags()),
14406 "primary_ip": ninfo.primary_ip,
14407 "secondary_ip": ninfo.secondary_ip,
14408 "offline": ninfo.offline,
14409 "drained": ninfo.drained,
14410 "master_candidate": ninfo.master_candidate,
14411 "group": ninfo.group,
14412 "master_capable": ninfo.master_capable,
14413 "vm_capable": ninfo.vm_capable,
14415 for ninfo in node_cfg.values())
14417 return node_results
14420 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14422 """Compute global node data.
14424 @param node_results: the basic node structures as filled from the config
14427 #TODO(dynmem): compute the right data on MAX and MIN memory
14428 # make a copy of the current dict
14429 node_results = dict(node_results)
14430 for nname, nresult in node_data.items():
14431 assert nname in node_results, "Missing basic data for node %s" % nname
14432 ninfo = node_cfg[nname]
14434 if not (ninfo.offline or ninfo.drained):
14435 nresult.Raise("Can't get data for node %s" % nname)
14436 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14438 remote_info = _MakeLegacyNodeInfo(nresult.payload)
14440 for attr in ["memory_total", "memory_free", "memory_dom0",
14441 "vg_size", "vg_free", "cpu_total"]:
14442 if attr not in remote_info:
14443 raise errors.OpExecError("Node '%s' didn't return attribute"
14444 " '%s'" % (nname, attr))
14445 if not isinstance(remote_info[attr], int):
14446 raise errors.OpExecError("Node '%s' returned invalid value"
14448 (nname, attr, remote_info[attr]))
14449 # compute memory used by primary instances
14450 i_p_mem = i_p_up_mem = 0
14451 for iinfo, beinfo in i_list:
14452 if iinfo.primary_node == nname:
14453 i_p_mem += beinfo[constants.BE_MAXMEM]
14454 if iinfo.name not in node_iinfo[nname].payload:
14457 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14458 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14459 remote_info["memory_free"] -= max(0, i_mem_diff)
14461 if iinfo.admin_state == constants.ADMINST_UP:
14462 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14464 # compute memory used by instances
14466 "total_memory": remote_info["memory_total"],
14467 "reserved_memory": remote_info["memory_dom0"],
14468 "free_memory": remote_info["memory_free"],
14469 "total_disk": remote_info["vg_size"],
14470 "free_disk": remote_info["vg_free"],
14471 "total_cpus": remote_info["cpu_total"],
14472 "i_pri_memory": i_p_mem,
14473 "i_pri_up_memory": i_p_up_mem,
14475 pnr_dyn.update(node_results[nname])
14476 node_results[nname] = pnr_dyn
14478 return node_results
14481 def _ComputeInstanceData(cluster_info, i_list):
14482 """Compute global instance data.
14486 for iinfo, beinfo in i_list:
14488 for nic in iinfo.nics:
14489 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14493 "mode": filled_params[constants.NIC_MODE],
14494 "link": filled_params[constants.NIC_LINK],
14496 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14497 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14498 nic_data.append(nic_dict)
14500 "tags": list(iinfo.GetTags()),
14501 "admin_state": iinfo.admin_state,
14502 "vcpus": beinfo[constants.BE_VCPUS],
14503 "memory": beinfo[constants.BE_MAXMEM],
14505 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14507 "disks": [{constants.IDISK_SIZE: dsk.size,
14508 constants.IDISK_MODE: dsk.mode}
14509 for dsk in iinfo.disks],
14510 "disk_template": iinfo.disk_template,
14511 "hypervisor": iinfo.hypervisor,
14513 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14515 instance_data[iinfo.name] = pir
14517 return instance_data
14519 def _AddNewInstance(self):
14520 """Add new instance data to allocator structure.
14522 This in combination with _AllocatorGetClusterData will create the
14523 correct structure needed as input for the allocator.
14525 The checks for the completeness of the opcode must have already been
14529 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14531 if self.disk_template in constants.DTS_INT_MIRROR:
14532 self.required_nodes = 2
14534 self.required_nodes = 1
14538 "disk_template": self.disk_template,
14541 "vcpus": self.vcpus,
14542 "memory": self.memory,
14543 "disks": self.disks,
14544 "disk_space_total": disk_space,
14546 "required_nodes": self.required_nodes,
14547 "hypervisor": self.hypervisor,
14552 def _AddRelocateInstance(self):
14553 """Add relocate instance data to allocator structure.
14555 This in combination with _IAllocatorGetClusterData will create the
14556 correct structure needed as input for the allocator.
14558 The checks for the completeness of the opcode must have already been
14562 instance = self.cfg.GetInstanceInfo(self.name)
14563 if instance is None:
14564 raise errors.ProgrammerError("Unknown instance '%s' passed to"
14565 " IAllocator" % self.name)
14567 if instance.disk_template not in constants.DTS_MIRRORED:
14568 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14569 errors.ECODE_INVAL)
14571 if instance.disk_template in constants.DTS_INT_MIRROR and \
14572 len(instance.secondary_nodes) != 1:
14573 raise errors.OpPrereqError("Instance has not exactly one secondary node",
14574 errors.ECODE_STATE)
14576 self.required_nodes = 1
14577 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14578 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14582 "disk_space_total": disk_space,
14583 "required_nodes": self.required_nodes,
14584 "relocate_from": self.relocate_from,
14588 def _AddNodeEvacuate(self):
14589 """Get data for node-evacuate requests.
14593 "instances": self.instances,
14594 "evac_mode": self.evac_mode,
14597 def _AddChangeGroup(self):
14598 """Get data for node-evacuate requests.
14602 "instances": self.instances,
14603 "target_groups": self.target_groups,
14606 def _BuildInputData(self, fn, keydata):
14607 """Build input data structures.
14610 self._ComputeClusterData()
14613 request["type"] = self.mode
14614 for keyname, keytype in keydata:
14615 if keyname not in request:
14616 raise errors.ProgrammerError("Request parameter %s is missing" %
14618 val = request[keyname]
14619 if not keytype(val):
14620 raise errors.ProgrammerError("Request parameter %s doesn't pass"
14621 " validation, value %s, expected"
14622 " type %s" % (keyname, val, keytype))
14623 self.in_data["request"] = request
14625 self.in_text = serializer.Dump(self.in_data)
14627 _STRING_LIST = ht.TListOf(ht.TString)
14628 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14629 # pylint: disable=E1101
14630 # Class '...' has no 'OP_ID' member
14631 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14632 opcodes.OpInstanceMigrate.OP_ID,
14633 opcodes.OpInstanceReplaceDisks.OP_ID])
14637 ht.TListOf(ht.TAnd(ht.TIsLength(3),
14638 ht.TItems([ht.TNonEmptyString,
14639 ht.TNonEmptyString,
14640 ht.TListOf(ht.TNonEmptyString),
14643 ht.TListOf(ht.TAnd(ht.TIsLength(2),
14644 ht.TItems([ht.TNonEmptyString,
14647 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14648 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14651 constants.IALLOCATOR_MODE_ALLOC:
14654 ("name", ht.TString),
14655 ("memory", ht.TInt),
14656 ("disks", ht.TListOf(ht.TDict)),
14657 ("disk_template", ht.TString),
14658 ("os", ht.TString),
14659 ("tags", _STRING_LIST),
14660 ("nics", ht.TListOf(ht.TDict)),
14661 ("vcpus", ht.TInt),
14662 ("hypervisor", ht.TString),
14664 constants.IALLOCATOR_MODE_RELOC:
14665 (_AddRelocateInstance,
14666 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14668 constants.IALLOCATOR_MODE_NODE_EVAC:
14669 (_AddNodeEvacuate, [
14670 ("instances", _STRING_LIST),
14671 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14673 constants.IALLOCATOR_MODE_CHG_GROUP:
14674 (_AddChangeGroup, [
14675 ("instances", _STRING_LIST),
14676 ("target_groups", _STRING_LIST),
14680 def Run(self, name, validate=True, call_fn=None):
14681 """Run an instance allocator and return the results.
14684 if call_fn is None:
14685 call_fn = self.rpc.call_iallocator_runner
14687 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14688 result.Raise("Failure while running the iallocator script")
14690 self.out_text = result.payload
14692 self._ValidateResult()
14694 def _ValidateResult(self):
14695 """Process the allocator results.
14697 This will process and if successful save the result in
14698 self.out_data and the other parameters.
14702 rdict = serializer.Load(self.out_text)
14703 except Exception, err:
14704 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14706 if not isinstance(rdict, dict):
14707 raise errors.OpExecError("Can't parse iallocator results: not a dict")
14709 # TODO: remove backwards compatiblity in later versions
14710 if "nodes" in rdict and "result" not in rdict:
14711 rdict["result"] = rdict["nodes"]
14714 for key in "success", "info", "result":
14715 if key not in rdict:
14716 raise errors.OpExecError("Can't parse iallocator results:"
14717 " missing key '%s'" % key)
14718 setattr(self, key, rdict[key])
14720 if not self._result_check(self.result):
14721 raise errors.OpExecError("Iallocator returned invalid result,"
14722 " expected %s, got %s" %
14723 (self._result_check, self.result),
14724 errors.ECODE_INVAL)
14726 if self.mode == constants.IALLOCATOR_MODE_RELOC:
14727 assert self.relocate_from is not None
14728 assert self.required_nodes == 1
14730 node2group = dict((name, ndata["group"])
14731 for (name, ndata) in self.in_data["nodes"].items())
14733 fn = compat.partial(self._NodesToGroups, node2group,
14734 self.in_data["nodegroups"])
14736 instance = self.cfg.GetInstanceInfo(self.name)
14737 request_groups = fn(self.relocate_from + [instance.primary_node])
14738 result_groups = fn(rdict["result"] + [instance.primary_node])
14740 if self.success and not set(result_groups).issubset(request_groups):
14741 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14742 " differ from original groups (%s)" %
14743 (utils.CommaJoin(result_groups),
14744 utils.CommaJoin(request_groups)))
14746 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14747 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14749 self.out_data = rdict
14752 def _NodesToGroups(node2group, groups, nodes):
14753 """Returns a list of unique group names for a list of nodes.
14755 @type node2group: dict
14756 @param node2group: Map from node name to group UUID
14758 @param groups: Group information
14760 @param nodes: Node names
14767 group_uuid = node2group[node]
14769 # Ignore unknown node
14773 group = groups[group_uuid]
14775 # Can't find group, let's use UUID
14776 group_name = group_uuid
14778 group_name = group["name"]
14780 result.add(group_name)
14782 return sorted(result)
14785 class LUTestAllocator(NoHooksLU):
14786 """Run allocator tests.
14788 This LU runs the allocator tests
14791 def CheckPrereq(self):
14792 """Check prerequisites.
14794 This checks the opcode parameters depending on the director and mode test.
14797 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14798 for attr in ["memory", "disks", "disk_template",
14799 "os", "tags", "nics", "vcpus"]:
14800 if not hasattr(self.op, attr):
14801 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14802 attr, errors.ECODE_INVAL)
14803 iname = self.cfg.ExpandInstanceName(self.op.name)
14804 if iname is not None:
14805 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14806 iname, errors.ECODE_EXISTS)
14807 if not isinstance(self.op.nics, list):
14808 raise errors.OpPrereqError("Invalid parameter 'nics'",
14809 errors.ECODE_INVAL)
14810 if not isinstance(self.op.disks, list):
14811 raise errors.OpPrereqError("Invalid parameter 'disks'",
14812 errors.ECODE_INVAL)
14813 for row in self.op.disks:
14814 if (not isinstance(row, dict) or
14815 constants.IDISK_SIZE not in row or
14816 not isinstance(row[constants.IDISK_SIZE], int) or
14817 constants.IDISK_MODE not in row or
14818 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
14819 raise errors.OpPrereqError("Invalid contents of the 'disks'"
14820 " parameter", errors.ECODE_INVAL)
14821 if self.op.hypervisor is None:
14822 self.op.hypervisor = self.cfg.GetHypervisorType()
14823 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14824 fname = _ExpandInstanceName(self.cfg, self.op.name)
14825 self.op.name = fname
14826 self.relocate_from = \
14827 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
14828 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
14829 constants.IALLOCATOR_MODE_NODE_EVAC):
14830 if not self.op.instances:
14831 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
14832 self.op.instances = _GetWantedInstances(self, self.op.instances)
14834 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
14835 self.op.mode, errors.ECODE_INVAL)
14837 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
14838 if self.op.allocator is None:
14839 raise errors.OpPrereqError("Missing allocator name",
14840 errors.ECODE_INVAL)
14841 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
14842 raise errors.OpPrereqError("Wrong allocator test '%s'" %
14843 self.op.direction, errors.ECODE_INVAL)
14845 def Exec(self, feedback_fn):
14846 """Run the allocator test.
14849 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14850 ial = IAllocator(self.cfg, self.rpc,
14853 memory=self.op.memory,
14854 disks=self.op.disks,
14855 disk_template=self.op.disk_template,
14859 vcpus=self.op.vcpus,
14860 hypervisor=self.op.hypervisor,
14862 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14863 ial = IAllocator(self.cfg, self.rpc,
14866 relocate_from=list(self.relocate_from),
14868 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14869 ial = IAllocator(self.cfg, self.rpc,
14871 instances=self.op.instances,
14872 target_groups=self.op.target_groups)
14873 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14874 ial = IAllocator(self.cfg, self.rpc,
14876 instances=self.op.instances,
14877 evac_mode=self.op.evac_mode)
14879 raise errors.ProgrammerError("Uncatched mode %s in"
14880 " LUTestAllocator.Exec", self.op.mode)
14882 if self.op.direction == constants.IALLOCATOR_DIR_IN:
14883 result = ial.in_text
14885 ial.Run(self.op.allocator, validate=False)
14886 result = ial.out_text
14890 #: Query type implementations
14892 constants.QR_INSTANCE: _InstanceQuery,
14893 constants.QR_NODE: _NodeQuery,
14894 constants.QR_GROUP: _GroupQuery,
14895 constants.QR_OS: _OsQuery,
14898 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14901 def _GetQueryImplementation(name):
14902 """Returns the implemtnation for a query type.
14904 @param name: Query type, must be one of L{constants.QR_VIA_OP}
14908 return _QUERY_IMPL[name]
14910 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14911 errors.ECODE_INVAL)