4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
62 from ganeti import rpc
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_UP = [constants.ADMINST_UP]
72 INSTANCE_DOWN = [constants.ADMINST_DOWN]
73 INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
74 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
75 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
79 """Data container for LU results with jobs.
81 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
83 contained in the C{jobs} attribute and include the job IDs in the opcode
87 def __init__(self, jobs, **kwargs):
88 """Initializes this class.
90 Additional return values can be specified as keyword arguments.
92 @type jobs: list of lists of L{opcode.OpCode}
93 @param jobs: A list of lists of opcode objects
100 class LogicalUnit(object):
101 """Logical Unit base class.
103 Subclasses must follow these rules:
104 - implement ExpandNames
105 - implement CheckPrereq (except when tasklets are used)
106 - implement Exec (except when tasklets are used)
107 - implement BuildHooksEnv
108 - implement BuildHooksNodes
109 - redefine HPATH and HTYPE
110 - optionally redefine their run requirements:
111 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
113 Note that all commands require root permissions.
115 @ivar dry_run_result: the value (if any) that will be returned to the caller
116 in dry-run mode (signalled by opcode dry_run parameter)
123 def __init__(self, processor, op, context, rpc_runner):
124 """Constructor for LogicalUnit.
126 This needs to be overridden in derived classes in order to check op
130 self.proc = processor
132 self.cfg = context.cfg
133 self.glm = context.glm
135 self.owned_locks = context.glm.list_owned
136 self.context = context
137 self.rpc = rpc_runner
138 # Dicts used to declare locking needs to mcpu
139 self.needed_locks = None
140 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
142 self.remove_locks = {}
143 # Used to force good behavior when calling helper functions
144 self.recalculate_locks = {}
146 self.Log = processor.Log # pylint: disable=C0103
147 self.LogWarning = processor.LogWarning # pylint: disable=C0103
148 self.LogInfo = processor.LogInfo # pylint: disable=C0103
149 self.LogStep = processor.LogStep # pylint: disable=C0103
150 # support for dry-run
151 self.dry_run_result = None
152 # support for generic debug attribute
153 if (not hasattr(self.op, "debug_level") or
154 not isinstance(self.op.debug_level, int)):
155 self.op.debug_level = 0
160 # Validate opcode parameters and set defaults
161 self.op.Validate(True)
163 self.CheckArguments()
165 def CheckArguments(self):
166 """Check syntactic validity for the opcode arguments.
168 This method is for doing a simple syntactic check and ensure
169 validity of opcode parameters, without any cluster-related
170 checks. While the same can be accomplished in ExpandNames and/or
171 CheckPrereq, doing these separate is better because:
173 - ExpandNames is left as as purely a lock-related function
174 - CheckPrereq is run after we have acquired locks (and possible
177 The function is allowed to change the self.op attribute so that
178 later methods can no longer worry about missing parameters.
183 def ExpandNames(self):
184 """Expand names for this LU.
186 This method is called before starting to execute the opcode, and it should
187 update all the parameters of the opcode to their canonical form (e.g. a
188 short node name must be fully expanded after this method has successfully
189 completed). This way locking, hooks, logging, etc. can work correctly.
191 LUs which implement this method must also populate the self.needed_locks
192 member, as a dict with lock levels as keys, and a list of needed lock names
195 - use an empty dict if you don't need any lock
196 - if you don't need any lock at a particular level omit that level
197 - don't put anything for the BGL level
198 - if you want all locks at a level use locking.ALL_SET as a value
200 If you need to share locks (rather than acquire them exclusively) at one
201 level you can modify self.share_locks, setting a true value (usually 1) for
202 that level. By default locks are not shared.
204 This function can also define a list of tasklets, which then will be
205 executed in order instead of the usual LU-level CheckPrereq and Exec
206 functions, if those are not defined by the LU.
210 # Acquire all nodes and one instance
211 self.needed_locks = {
212 locking.LEVEL_NODE: locking.ALL_SET,
213 locking.LEVEL_INSTANCE: ['instance1.example.com'],
215 # Acquire just two nodes
216 self.needed_locks = {
217 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
220 self.needed_locks = {} # No, you can't leave it to the default value None
223 # The implementation of this method is mandatory only if the new LU is
224 # concurrent, so that old LUs don't need to be changed all at the same
227 self.needed_locks = {} # Exclusive LUs don't need locks.
229 raise NotImplementedError
231 def DeclareLocks(self, level):
232 """Declare LU locking needs for a level
234 While most LUs can just declare their locking needs at ExpandNames time,
235 sometimes there's the need to calculate some locks after having acquired
236 the ones before. This function is called just before acquiring locks at a
237 particular level, but after acquiring the ones at lower levels, and permits
238 such calculations. It can be used to modify self.needed_locks, and by
239 default it does nothing.
241 This function is only called if you have something already set in
242 self.needed_locks for the level.
244 @param level: Locking level which is going to be locked
245 @type level: member of ganeti.locking.LEVELS
249 def CheckPrereq(self):
250 """Check prerequisites for this LU.
252 This method should check that the prerequisites for the execution
253 of this LU are fulfilled. It can do internode communication, but
254 it should be idempotent - no cluster or system changes are
257 The method should raise errors.OpPrereqError in case something is
258 not fulfilled. Its return value is ignored.
260 This method should also update all the parameters of the opcode to
261 their canonical form if it hasn't been done by ExpandNames before.
264 if self.tasklets is not None:
265 for (idx, tl) in enumerate(self.tasklets):
266 logging.debug("Checking prerequisites for tasklet %s/%s",
267 idx + 1, len(self.tasklets))
272 def Exec(self, feedback_fn):
275 This method should implement the actual work. It should raise
276 errors.OpExecError for failures that are somewhat dealt with in
280 if self.tasklets is not None:
281 for (idx, tl) in enumerate(self.tasklets):
282 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
285 raise NotImplementedError
287 def BuildHooksEnv(self):
288 """Build hooks environment for this LU.
291 @return: Dictionary containing the environment that will be used for
292 running the hooks for this LU. The keys of the dict must not be prefixed
293 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
294 will extend the environment with additional variables. If no environment
295 should be defined, an empty dictionary should be returned (not C{None}).
296 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
300 raise NotImplementedError
302 def BuildHooksNodes(self):
303 """Build list of nodes to run LU's hooks.
305 @rtype: tuple; (list, list)
306 @return: Tuple containing a list of node names on which the hook
307 should run before the execution and a list of node names on which the
308 hook should run after the execution. No nodes should be returned as an
309 empty list (and not None).
310 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
314 raise NotImplementedError
316 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
317 """Notify the LU about the results of its hooks.
319 This method is called every time a hooks phase is executed, and notifies
320 the Logical Unit about the hooks' result. The LU can then use it to alter
321 its result based on the hooks. By default the method does nothing and the
322 previous result is passed back unchanged but any LU can define it if it
323 wants to use the local cluster hook-scripts somehow.
325 @param phase: one of L{constants.HOOKS_PHASE_POST} or
326 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
327 @param hook_results: the results of the multi-node hooks rpc call
328 @param feedback_fn: function used send feedback back to the caller
329 @param lu_result: the previous Exec result this LU had, or None
331 @return: the new Exec result, based on the previous result
335 # API must be kept, thus we ignore the unused argument and could
336 # be a function warnings
337 # pylint: disable=W0613,R0201
340 def _ExpandAndLockInstance(self):
341 """Helper function to expand and lock an instance.
343 Many LUs that work on an instance take its name in self.op.instance_name
344 and need to expand it and then declare the expanded name for locking. This
345 function does it, and then updates self.op.instance_name to the expanded
346 name. It also initializes needed_locks as a dict, if this hasn't been done
350 if self.needed_locks is None:
351 self.needed_locks = {}
353 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
354 "_ExpandAndLockInstance called with instance-level locks set"
355 self.op.instance_name = _ExpandInstanceName(self.cfg,
356 self.op.instance_name)
357 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
359 def _LockInstancesNodes(self, primary_only=False,
360 level=locking.LEVEL_NODE):
361 """Helper function to declare instances' nodes for locking.
363 This function should be called after locking one or more instances to lock
364 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
365 with all primary or secondary nodes for instances already locked and
366 present in self.needed_locks[locking.LEVEL_INSTANCE].
368 It should be called from DeclareLocks, and for safety only works if
369 self.recalculate_locks[locking.LEVEL_NODE] is set.
371 In the future it may grow parameters to just lock some instance's nodes, or
372 to just lock primaries or secondary nodes, if needed.
374 If should be called in DeclareLocks in a way similar to::
376 if level == locking.LEVEL_NODE:
377 self._LockInstancesNodes()
379 @type primary_only: boolean
380 @param primary_only: only lock primary nodes of locked instances
381 @param level: Which lock level to use for locking nodes
384 assert level in self.recalculate_locks, \
385 "_LockInstancesNodes helper function called with no nodes to recalculate"
387 # TODO: check if we're really been called with the instance locks held
389 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
390 # future we might want to have different behaviors depending on the value
391 # of self.recalculate_locks[locking.LEVEL_NODE]
393 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
394 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
395 wanted_nodes.append(instance.primary_node)
397 wanted_nodes.extend(instance.secondary_nodes)
399 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
400 self.needed_locks[level] = wanted_nodes
401 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
402 self.needed_locks[level].extend(wanted_nodes)
404 raise errors.ProgrammerError("Unknown recalculation mode")
406 del self.recalculate_locks[level]
409 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
410 """Simple LU which runs no hooks.
412 This LU is intended as a parent for other LogicalUnits which will
413 run no hooks, in order to reduce duplicate code.
419 def BuildHooksEnv(self):
420 """Empty BuildHooksEnv for NoHooksLu.
422 This just raises an error.
425 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
427 def BuildHooksNodes(self):
428 """Empty BuildHooksNodes for NoHooksLU.
431 raise AssertionError("BuildHooksNodes called for NoHooksLU")
435 """Tasklet base class.
437 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
438 they can mix legacy code with tasklets. Locking needs to be done in the LU,
439 tasklets know nothing about locks.
441 Subclasses must follow these rules:
442 - Implement CheckPrereq
446 def __init__(self, lu):
453 def CheckPrereq(self):
454 """Check prerequisites for this tasklets.
456 This method should check whether the prerequisites for the execution of
457 this tasklet are fulfilled. It can do internode communication, but it
458 should be idempotent - no cluster or system changes are allowed.
460 The method should raise errors.OpPrereqError in case something is not
461 fulfilled. Its return value is ignored.
463 This method should also update all parameters to their canonical form if it
464 hasn't been done before.
469 def Exec(self, feedback_fn):
470 """Execute the tasklet.
472 This method should implement the actual work. It should raise
473 errors.OpExecError for failures that are somewhat dealt with in code, or
477 raise NotImplementedError
481 """Base for query utility classes.
484 #: Attribute holding field definitions
487 def __init__(self, qfilter, fields, use_locking):
488 """Initializes this class.
491 self.use_locking = use_locking
493 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
495 self.requested_data = self.query.RequestedData()
496 self.names = self.query.RequestedNames()
498 # Sort only if no names were requested
499 self.sort_by_name = not self.names
501 self.do_locking = None
504 def _GetNames(self, lu, all_names, lock_level):
505 """Helper function to determine names asked for in the query.
509 names = lu.owned_locks(lock_level)
513 if self.wanted == locking.ALL_SET:
514 assert not self.names
515 # caller didn't specify names, so ordering is not important
516 return utils.NiceSort(names)
518 # caller specified names and we must keep the same order
520 assert not self.do_locking or lu.glm.is_owned(lock_level)
522 missing = set(self.wanted).difference(names)
524 raise errors.OpExecError("Some items were removed before retrieving"
525 " their data: %s" % missing)
527 # Return expanded names
530 def ExpandNames(self, lu):
531 """Expand names for this query.
533 See L{LogicalUnit.ExpandNames}.
536 raise NotImplementedError()
538 def DeclareLocks(self, lu, level):
539 """Declare locks for this query.
541 See L{LogicalUnit.DeclareLocks}.
544 raise NotImplementedError()
546 def _GetQueryData(self, lu):
547 """Collects all data for this query.
549 @return: Query data object
552 raise NotImplementedError()
554 def NewStyleQuery(self, lu):
555 """Collect data and execute query.
558 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
559 sort_by_name=self.sort_by_name)
561 def OldStyleQuery(self, lu):
562 """Collect data and execute query.
565 return self.query.OldStyleQuery(self._GetQueryData(lu),
566 sort_by_name=self.sort_by_name)
570 """Returns a dict declaring all lock levels shared.
573 return dict.fromkeys(locking.LEVELS, 1)
576 def _MakeLegacyNodeInfo(data):
577 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
579 Converts the data into a single dictionary. This is fine for most use cases,
580 but some require information from more than one volume group or hypervisor.
583 (bootid, (vg_info, ), (hv_info, )) = data
585 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
590 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
591 """Checks if the owned node groups are still correct for an instance.
593 @type cfg: L{config.ConfigWriter}
594 @param cfg: The cluster configuration
595 @type instance_name: string
596 @param instance_name: Instance name
597 @type owned_groups: set or frozenset
598 @param owned_groups: List of currently owned node groups
601 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
603 if not owned_groups.issuperset(inst_groups):
604 raise errors.OpPrereqError("Instance %s's node groups changed since"
605 " locks were acquired, current groups are"
606 " are '%s', owning groups '%s'; retry the"
609 utils.CommaJoin(inst_groups),
610 utils.CommaJoin(owned_groups)),
616 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
617 """Checks if the instances in a node group are still correct.
619 @type cfg: L{config.ConfigWriter}
620 @param cfg: The cluster configuration
621 @type group_uuid: string
622 @param group_uuid: Node group UUID
623 @type owned_instances: set or frozenset
624 @param owned_instances: List of currently owned instances
627 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
628 if owned_instances != wanted_instances:
629 raise errors.OpPrereqError("Instances in node group '%s' changed since"
630 " locks were acquired, wanted '%s', have '%s';"
631 " retry the operation" %
633 utils.CommaJoin(wanted_instances),
634 utils.CommaJoin(owned_instances)),
637 return wanted_instances
640 def _SupportsOob(cfg, node):
641 """Tells if node supports OOB.
643 @type cfg: L{config.ConfigWriter}
644 @param cfg: The cluster configuration
645 @type node: L{objects.Node}
646 @param node: The node
647 @return: The OOB script if supported or an empty string otherwise
650 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
653 def _GetWantedNodes(lu, nodes):
654 """Returns list of checked and expanded node names.
656 @type lu: L{LogicalUnit}
657 @param lu: the logical unit on whose behalf we execute
659 @param nodes: list of node names or None for all nodes
661 @return: the list of nodes, sorted
662 @raise errors.ProgrammerError: if the nodes parameter is wrong type
666 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
668 return utils.NiceSort(lu.cfg.GetNodeList())
671 def _GetWantedInstances(lu, instances):
672 """Returns list of checked and expanded instance names.
674 @type lu: L{LogicalUnit}
675 @param lu: the logical unit on whose behalf we execute
676 @type instances: list
677 @param instances: list of instance names or None for all instances
679 @return: the list of instances, sorted
680 @raise errors.OpPrereqError: if the instances parameter is wrong type
681 @raise errors.OpPrereqError: if any of the passed instances is not found
685 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
687 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
691 def _GetUpdatedParams(old_params, update_dict,
692 use_default=True, use_none=False):
693 """Return the new version of a parameter dictionary.
695 @type old_params: dict
696 @param old_params: old parameters
697 @type update_dict: dict
698 @param update_dict: dict containing new parameter values, or
699 constants.VALUE_DEFAULT to reset the parameter to its default
701 @param use_default: boolean
702 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
703 values as 'to be deleted' values
704 @param use_none: boolean
705 @type use_none: whether to recognise C{None} values as 'to be
708 @return: the new parameter dictionary
711 params_copy = copy.deepcopy(old_params)
712 for key, val in update_dict.iteritems():
713 if ((use_default and val == constants.VALUE_DEFAULT) or
714 (use_none and val is None)):
720 params_copy[key] = val
724 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
725 """Return the new version of a instance policy.
727 @param group_policy: whether this policy applies to a group and thus
728 we should support removal of policy entries
731 use_none = use_default = group_policy
732 ipolicy = copy.deepcopy(old_ipolicy)
733 for key, value in new_ipolicy.items():
734 if key not in constants.IPOLICY_ALL_KEYS:
735 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
737 if key in constants.IPOLICY_PARAMETERS:
738 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
739 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
741 use_default=use_default)
743 # FIXME: we assume all others are lists; this should be redone
745 if not value or value == [constants.VALUE_DEFAULT]:
749 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
750 " on the cluster'" % key,
753 ipolicy[key] = list(value)
755 objects.InstancePolicy.CheckParameterSyntax(ipolicy)
756 except errors.ConfigurationError, err:
757 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
762 def _UpdateAndVerifySubDict(base, updates, type_check):
763 """Updates and verifies a dict with sub dicts of the same type.
765 @param base: The dict with the old data
766 @param updates: The dict with the new data
767 @param type_check: Dict suitable to ForceDictType to verify correct types
768 @returns: A new dict with updated and verified values
772 new = _GetUpdatedParams(old, value)
773 utils.ForceDictType(new, type_check)
776 ret = copy.deepcopy(base)
777 ret.update(dict((key, fn(base.get(key, {}), value))
778 for key, value in updates.items()))
782 def _MergeAndVerifyHvState(op_input, obj_input):
783 """Combines the hv state from an opcode with the one of the object
785 @param op_input: The input dict from the opcode
786 @param obj_input: The input dict from the objects
787 @return: The verified and updated dict
791 invalid_hvs = set(op_input) - constants.HYPER_TYPES
793 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
794 " %s" % utils.CommaJoin(invalid_hvs),
796 if obj_input is None:
798 type_check = constants.HVSTS_PARAMETER_TYPES
799 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
804 def _MergeAndVerifyDiskState(op_input, obj_input):
805 """Combines the disk state from an opcode with the one of the object
807 @param op_input: The input dict from the opcode
808 @param obj_input: The input dict from the objects
809 @return: The verified and updated dict
812 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
814 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
815 utils.CommaJoin(invalid_dst),
817 type_check = constants.DSS_PARAMETER_TYPES
818 if obj_input is None:
820 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
822 for key, value in op_input.items())
827 def _ReleaseLocks(lu, level, names=None, keep=None):
828 """Releases locks owned by an LU.
830 @type lu: L{LogicalUnit}
831 @param level: Lock level
832 @type names: list or None
833 @param names: Names of locks to release
834 @type keep: list or None
835 @param keep: Names of locks to retain
838 assert not (keep is not None and names is not None), \
839 "Only one of the 'names' and the 'keep' parameters can be given"
841 if names is not None:
842 should_release = names.__contains__
844 should_release = lambda name: name not in keep
846 should_release = None
848 owned = lu.owned_locks(level)
850 # Not owning any lock at this level, do nothing
857 # Determine which locks to release
859 if should_release(name):
864 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
866 # Release just some locks
867 lu.glm.release(level, names=release)
869 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
872 lu.glm.release(level)
874 assert not lu.glm.is_owned(level), "No locks should be owned"
877 def _MapInstanceDisksToNodes(instances):
878 """Creates a map from (node, volume) to instance name.
880 @type instances: list of L{objects.Instance}
881 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
884 return dict(((node, vol), inst.name)
885 for inst in instances
886 for (node, vols) in inst.MapLVsByNode().items()
890 def _RunPostHook(lu, node_name):
891 """Runs the post-hook for an opcode on a single node.
894 hm = lu.proc.BuildHooksManager(lu)
896 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
898 # pylint: disable=W0702
899 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
902 def _CheckOutputFields(static, dynamic, selected):
903 """Checks whether all selected fields are valid.
905 @type static: L{utils.FieldSet}
906 @param static: static fields set
907 @type dynamic: L{utils.FieldSet}
908 @param dynamic: dynamic fields set
915 delta = f.NonMatching(selected)
917 raise errors.OpPrereqError("Unknown output fields selected: %s"
918 % ",".join(delta), errors.ECODE_INVAL)
921 def _CheckGlobalHvParams(params):
922 """Validates that given hypervisor params are not global ones.
924 This will ensure that instances don't get customised versions of
928 used_globals = constants.HVC_GLOBALS.intersection(params)
930 msg = ("The following hypervisor parameters are global and cannot"
931 " be customized at instance level, please modify them at"
932 " cluster level: %s" % utils.CommaJoin(used_globals))
933 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
936 def _CheckNodeOnline(lu, node, msg=None):
937 """Ensure that a given node is online.
939 @param lu: the LU on behalf of which we make the check
940 @param node: the node to check
941 @param msg: if passed, should be a message to replace the default one
942 @raise errors.OpPrereqError: if the node is offline
946 msg = "Can't use offline node"
947 if lu.cfg.GetNodeInfo(node).offline:
948 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
951 def _CheckNodeNotDrained(lu, node):
952 """Ensure that a given node is not drained.
954 @param lu: the LU on behalf of which we make the check
955 @param node: the node to check
956 @raise errors.OpPrereqError: if the node is drained
959 if lu.cfg.GetNodeInfo(node).drained:
960 raise errors.OpPrereqError("Can't use drained node %s" % node,
964 def _CheckNodeVmCapable(lu, node):
965 """Ensure that a given node is vm capable.
967 @param lu: the LU on behalf of which we make the check
968 @param node: the node to check
969 @raise errors.OpPrereqError: if the node is not vm capable
972 if not lu.cfg.GetNodeInfo(node).vm_capable:
973 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
977 def _CheckNodeHasOS(lu, node, os_name, force_variant):
978 """Ensure that a node supports a given OS.
980 @param lu: the LU on behalf of which we make the check
981 @param node: the node to check
982 @param os_name: the OS to query about
983 @param force_variant: whether to ignore variant errors
984 @raise errors.OpPrereqError: if the node is not supporting the OS
987 result = lu.rpc.call_os_get(node, os_name)
988 result.Raise("OS '%s' not in supported OS list for node %s" %
990 prereq=True, ecode=errors.ECODE_INVAL)
991 if not force_variant:
992 _CheckOSVariant(result.payload, os_name)
995 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
996 """Ensure that a node has the given secondary ip.
998 @type lu: L{LogicalUnit}
999 @param lu: the LU on behalf of which we make the check
1001 @param node: the node to check
1002 @type secondary_ip: string
1003 @param secondary_ip: the ip to check
1004 @type prereq: boolean
1005 @param prereq: whether to throw a prerequisite or an execute error
1006 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1007 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1010 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1011 result.Raise("Failure checking secondary ip on node %s" % node,
1012 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1013 if not result.payload:
1014 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1015 " please fix and re-run this command" % secondary_ip)
1017 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1019 raise errors.OpExecError(msg)
1022 def _GetClusterDomainSecret():
1023 """Reads the cluster domain secret.
1026 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1030 def _CheckInstanceState(lu, instance, req_states, msg=None):
1031 """Ensure that an instance is in one of the required states.
1033 @param lu: the LU on behalf of which we make the check
1034 @param instance: the instance to check
1035 @param msg: if passed, should be a message to replace the default one
1036 @raise errors.OpPrereqError: if the instance is not in the required state
1040 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1041 if instance.admin_state not in req_states:
1042 raise errors.OpPrereqError("Instance %s is marked to be %s, %s" %
1043 (instance, instance.admin_state, msg),
1046 if constants.ADMINST_UP not in req_states:
1047 pnode = instance.primary_node
1048 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1049 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1050 prereq=True, ecode=errors.ECODE_ENVIRON)
1052 if instance.name in ins_l.payload:
1053 raise errors.OpPrereqError("Instance %s is running, %s" %
1054 (instance.name, msg), errors.ECODE_STATE)
1057 def _ComputeMinMaxSpec(name, ipolicy, value):
1058 """Computes if value is in the desired range.
1060 @param name: name of the parameter for which we perform the check
1061 @param ipolicy: dictionary containing min, max and std values
1062 @param value: actual value that we want to use
1063 @return: None or element not meeting the criteria
1067 if value in [None, constants.VALUE_AUTO]:
1069 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1070 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1071 if value > max_v or min_v > value:
1072 return ("%s value %s is not in range [%s, %s]" %
1073 (name, value, min_v, max_v))
1077 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1078 nic_count, disk_sizes,
1079 _compute_fn=_ComputeMinMaxSpec):
1080 """Verifies ipolicy against provided specs.
1083 @param ipolicy: The ipolicy
1085 @param mem_size: The memory size
1086 @type cpu_count: int
1087 @param cpu_count: Used cpu cores
1088 @type disk_count: int
1089 @param disk_count: Number of disks used
1090 @type nic_count: int
1091 @param nic_count: Number of nics used
1092 @type disk_sizes: list of ints
1093 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1094 @param _compute_fn: The compute function (unittest only)
1095 @return: A list of violations, or an empty list of no violations are found
1098 assert disk_count == len(disk_sizes)
1101 (constants.ISPEC_MEM_SIZE, mem_size),
1102 (constants.ISPEC_CPU_COUNT, cpu_count),
1103 (constants.ISPEC_DISK_COUNT, disk_count),
1104 (constants.ISPEC_NIC_COUNT, nic_count),
1105 ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
1108 (_compute_fn(name, ipolicy, value)
1109 for (name, value) in test_settings))
1112 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1113 _compute_fn=_ComputeIPolicySpecViolation):
1114 """Compute if instance meets the specs of ipolicy.
1117 @param ipolicy: The ipolicy to verify against
1118 @type instance: L{objects.Instance}
1119 @param instance: The instance to verify
1120 @param _compute_fn: The function to verify ipolicy (unittest only)
1121 @see: L{_ComputeIPolicySpecViolation}
1124 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1125 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1126 disk_count = len(instance.disks)
1127 disk_sizes = [disk.size for disk in instance.disks]
1128 nic_count = len(instance.nics)
1130 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1134 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1135 _compute_fn=_ComputeIPolicySpecViolation):
1136 """Compute if instance specs meets the specs of ipolicy.
1139 @param ipolicy: The ipolicy to verify against
1140 @param instance_spec: dict
1141 @param instance_spec: The instance spec to verify
1142 @param _compute_fn: The function to verify ipolicy (unittest only)
1143 @see: L{_ComputeIPolicySpecViolation}
1146 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1147 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1148 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1149 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1150 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1152 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1156 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1158 _compute_fn=_ComputeIPolicyInstanceViolation):
1159 """Compute if instance meets the specs of the new target group.
1161 @param ipolicy: The ipolicy to verify
1162 @param instance: The instance object to verify
1163 @param current_group: The current group of the instance
1164 @param target_group: The new group of the instance
1165 @param _compute_fn: The function to verify ipolicy (unittest only)
1166 @see: L{_ComputeIPolicySpecViolation}
1169 if current_group == target_group:
1172 return _compute_fn(ipolicy, instance)
1175 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1176 _compute_fn=_ComputeIPolicyNodeViolation):
1177 """Checks that the target node is correct in terms of instance policy.
1179 @param ipolicy: The ipolicy to verify
1180 @param instance: The instance object to verify
1181 @param node: The new node to relocate
1182 @param ignore: Ignore violations of the ipolicy
1183 @param _compute_fn: The function to verify ipolicy (unittest only)
1184 @see: L{_ComputeIPolicySpecViolation}
1187 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1188 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1191 msg = ("Instance does not meet target node group's (%s) instance"
1192 " policy: %s") % (node.group, utils.CommaJoin(res))
1196 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1199 def _ExpandItemName(fn, name, kind):
1200 """Expand an item name.
1202 @param fn: the function to use for expansion
1203 @param name: requested item name
1204 @param kind: text description ('Node' or 'Instance')
1205 @return: the resolved (full) name
1206 @raise errors.OpPrereqError: if the item is not found
1209 full_name = fn(name)
1210 if full_name is None:
1211 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1216 def _ExpandNodeName(cfg, name):
1217 """Wrapper over L{_ExpandItemName} for nodes."""
1218 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1221 def _ExpandInstanceName(cfg, name):
1222 """Wrapper over L{_ExpandItemName} for instance."""
1223 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1226 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1227 minmem, maxmem, vcpus, nics, disk_template, disks,
1228 bep, hvp, hypervisor_name, tags):
1229 """Builds instance related env variables for hooks
1231 This builds the hook environment from individual variables.
1234 @param name: the name of the instance
1235 @type primary_node: string
1236 @param primary_node: the name of the instance's primary node
1237 @type secondary_nodes: list
1238 @param secondary_nodes: list of secondary nodes as strings
1239 @type os_type: string
1240 @param os_type: the name of the instance's OS
1241 @type status: string
1242 @param status: the desired status of the instance
1243 @type minmem: string
1244 @param minmem: the minimum memory size of the instance
1245 @type maxmem: string
1246 @param maxmem: the maximum memory size of the instance
1248 @param vcpus: the count of VCPUs the instance has
1250 @param nics: list of tuples (ip, mac, mode, link) representing
1251 the NICs the instance has
1252 @type disk_template: string
1253 @param disk_template: the disk template of the instance
1255 @param disks: the list of (size, mode) pairs
1257 @param bep: the backend parameters for the instance
1259 @param hvp: the hypervisor parameters for the instance
1260 @type hypervisor_name: string
1261 @param hypervisor_name: the hypervisor for the instance
1263 @param tags: list of instance tags as strings
1265 @return: the hook environment for this instance
1270 "INSTANCE_NAME": name,
1271 "INSTANCE_PRIMARY": primary_node,
1272 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1273 "INSTANCE_OS_TYPE": os_type,
1274 "INSTANCE_STATUS": status,
1275 "INSTANCE_MINMEM": minmem,
1276 "INSTANCE_MAXMEM": maxmem,
1277 # TODO(2.7) remove deprecated "memory" value
1278 "INSTANCE_MEMORY": maxmem,
1279 "INSTANCE_VCPUS": vcpus,
1280 "INSTANCE_DISK_TEMPLATE": disk_template,
1281 "INSTANCE_HYPERVISOR": hypervisor_name,
1284 nic_count = len(nics)
1285 for idx, (ip, mac, mode, link) in enumerate(nics):
1288 env["INSTANCE_NIC%d_IP" % idx] = ip
1289 env["INSTANCE_NIC%d_MAC" % idx] = mac
1290 env["INSTANCE_NIC%d_MODE" % idx] = mode
1291 env["INSTANCE_NIC%d_LINK" % idx] = link
1292 if mode == constants.NIC_MODE_BRIDGED:
1293 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1297 env["INSTANCE_NIC_COUNT"] = nic_count
1300 disk_count = len(disks)
1301 for idx, (size, mode) in enumerate(disks):
1302 env["INSTANCE_DISK%d_SIZE" % idx] = size
1303 env["INSTANCE_DISK%d_MODE" % idx] = mode
1307 env["INSTANCE_DISK_COUNT"] = disk_count
1312 env["INSTANCE_TAGS"] = " ".join(tags)
1314 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1315 for key, value in source.items():
1316 env["INSTANCE_%s_%s" % (kind, key)] = value
1321 def _NICListToTuple(lu, nics):
1322 """Build a list of nic information tuples.
1324 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1325 value in LUInstanceQueryData.
1327 @type lu: L{LogicalUnit}
1328 @param lu: the logical unit on whose behalf we execute
1329 @type nics: list of L{objects.NIC}
1330 @param nics: list of nics to convert to hooks tuples
1334 cluster = lu.cfg.GetClusterInfo()
1338 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1339 mode = filled_params[constants.NIC_MODE]
1340 link = filled_params[constants.NIC_LINK]
1341 hooks_nics.append((ip, mac, mode, link))
1345 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1346 """Builds instance related env variables for hooks from an object.
1348 @type lu: L{LogicalUnit}
1349 @param lu: the logical unit on whose behalf we execute
1350 @type instance: L{objects.Instance}
1351 @param instance: the instance for which we should build the
1353 @type override: dict
1354 @param override: dictionary with key/values that will override
1357 @return: the hook environment dictionary
1360 cluster = lu.cfg.GetClusterInfo()
1361 bep = cluster.FillBE(instance)
1362 hvp = cluster.FillHV(instance)
1364 "name": instance.name,
1365 "primary_node": instance.primary_node,
1366 "secondary_nodes": instance.secondary_nodes,
1367 "os_type": instance.os,
1368 "status": instance.admin_state,
1369 "maxmem": bep[constants.BE_MAXMEM],
1370 "minmem": bep[constants.BE_MINMEM],
1371 "vcpus": bep[constants.BE_VCPUS],
1372 "nics": _NICListToTuple(lu, instance.nics),
1373 "disk_template": instance.disk_template,
1374 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1377 "hypervisor_name": instance.hypervisor,
1378 "tags": instance.tags,
1381 args.update(override)
1382 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1385 def _AdjustCandidatePool(lu, exceptions):
1386 """Adjust the candidate pool after node operations.
1389 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1391 lu.LogInfo("Promoted nodes to master candidate role: %s",
1392 utils.CommaJoin(node.name for node in mod_list))
1393 for name in mod_list:
1394 lu.context.ReaddNode(name)
1395 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1397 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1401 def _DecideSelfPromotion(lu, exceptions=None):
1402 """Decide whether I should promote myself as a master candidate.
1405 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1406 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1407 # the new node will increase mc_max with one, so:
1408 mc_should = min(mc_should + 1, cp_size)
1409 return mc_now < mc_should
1412 def _CalculateGroupIPolicy(cluster, group):
1413 """Calculate instance policy for group.
1416 return cluster.SimpleFillIPolicy(group.ipolicy)
1419 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1420 """Check that the brigdes needed by a list of nics exist.
1423 cluster = lu.cfg.GetClusterInfo()
1424 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1425 brlist = [params[constants.NIC_LINK] for params in paramslist
1426 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1428 result = lu.rpc.call_bridges_exist(target_node, brlist)
1429 result.Raise("Error checking bridges on destination node '%s'" %
1430 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1433 def _CheckInstanceBridgesExist(lu, instance, node=None):
1434 """Check that the brigdes needed by an instance exist.
1438 node = instance.primary_node
1439 _CheckNicsBridgesExist(lu, instance.nics, node)
1442 def _CheckOSVariant(os_obj, name):
1443 """Check whether an OS name conforms to the os variants specification.
1445 @type os_obj: L{objects.OS}
1446 @param os_obj: OS object to check
1448 @param name: OS name passed by the user, to check for validity
1451 variant = objects.OS.GetVariant(name)
1452 if not os_obj.supported_variants:
1454 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1455 " passed)" % (os_obj.name, variant),
1459 raise errors.OpPrereqError("OS name must include a variant",
1462 if variant not in os_obj.supported_variants:
1463 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1466 def _GetNodeInstancesInner(cfg, fn):
1467 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1470 def _GetNodeInstances(cfg, node_name):
1471 """Returns a list of all primary and secondary instances on a node.
1475 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1478 def _GetNodePrimaryInstances(cfg, node_name):
1479 """Returns primary instances on a node.
1482 return _GetNodeInstancesInner(cfg,
1483 lambda inst: node_name == inst.primary_node)
1486 def _GetNodeSecondaryInstances(cfg, node_name):
1487 """Returns secondary instances on a node.
1490 return _GetNodeInstancesInner(cfg,
1491 lambda inst: node_name in inst.secondary_nodes)
1494 def _GetStorageTypeArgs(cfg, storage_type):
1495 """Returns the arguments for a storage type.
1498 # Special case for file storage
1499 if storage_type == constants.ST_FILE:
1500 # storage.FileStorage wants a list of storage directories
1501 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1506 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1509 for dev in instance.disks:
1510 cfg.SetDiskID(dev, node_name)
1512 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1513 result.Raise("Failed to get disk status from node %s" % node_name,
1514 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1516 for idx, bdev_status in enumerate(result.payload):
1517 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1523 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1524 """Check the sanity of iallocator and node arguments and use the
1525 cluster-wide iallocator if appropriate.
1527 Check that at most one of (iallocator, node) is specified. If none is
1528 specified, then the LU's opcode's iallocator slot is filled with the
1529 cluster-wide default iallocator.
1531 @type iallocator_slot: string
1532 @param iallocator_slot: the name of the opcode iallocator slot
1533 @type node_slot: string
1534 @param node_slot: the name of the opcode target node slot
1537 node = getattr(lu.op, node_slot, None)
1538 iallocator = getattr(lu.op, iallocator_slot, None)
1540 if node is not None and iallocator is not None:
1541 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1543 elif node is None and iallocator is None:
1544 default_iallocator = lu.cfg.GetDefaultIAllocator()
1545 if default_iallocator:
1546 setattr(lu.op, iallocator_slot, default_iallocator)
1548 raise errors.OpPrereqError("No iallocator or node given and no"
1549 " cluster-wide default iallocator found;"
1550 " please specify either an iallocator or a"
1551 " node, or set a cluster-wide default"
1555 def _GetDefaultIAllocator(cfg, iallocator):
1556 """Decides on which iallocator to use.
1558 @type cfg: L{config.ConfigWriter}
1559 @param cfg: Cluster configuration object
1560 @type iallocator: string or None
1561 @param iallocator: Iallocator specified in opcode
1563 @return: Iallocator name
1567 # Use default iallocator
1568 iallocator = cfg.GetDefaultIAllocator()
1571 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1572 " opcode nor as a cluster-wide default",
1578 class LUClusterPostInit(LogicalUnit):
1579 """Logical unit for running hooks after cluster initialization.
1582 HPATH = "cluster-init"
1583 HTYPE = constants.HTYPE_CLUSTER
1585 def BuildHooksEnv(self):
1590 "OP_TARGET": self.cfg.GetClusterName(),
1593 def BuildHooksNodes(self):
1594 """Build hooks nodes.
1597 return ([], [self.cfg.GetMasterNode()])
1599 def Exec(self, feedback_fn):
1606 class LUClusterDestroy(LogicalUnit):
1607 """Logical unit for destroying the cluster.
1610 HPATH = "cluster-destroy"
1611 HTYPE = constants.HTYPE_CLUSTER
1613 def BuildHooksEnv(self):
1618 "OP_TARGET": self.cfg.GetClusterName(),
1621 def BuildHooksNodes(self):
1622 """Build hooks nodes.
1627 def CheckPrereq(self):
1628 """Check prerequisites.
1630 This checks whether the cluster is empty.
1632 Any errors are signaled by raising errors.OpPrereqError.
1635 master = self.cfg.GetMasterNode()
1637 nodelist = self.cfg.GetNodeList()
1638 if len(nodelist) != 1 or nodelist[0] != master:
1639 raise errors.OpPrereqError("There are still %d node(s) in"
1640 " this cluster." % (len(nodelist) - 1),
1642 instancelist = self.cfg.GetInstanceList()
1644 raise errors.OpPrereqError("There are still %d instance(s) in"
1645 " this cluster." % len(instancelist),
1648 def Exec(self, feedback_fn):
1649 """Destroys the cluster.
1652 master_params = self.cfg.GetMasterNetworkParameters()
1654 # Run post hooks on master node before it's removed
1655 _RunPostHook(self, master_params.name)
1657 ems = self.cfg.GetUseExternalMipScript()
1658 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1661 self.LogWarning("Error disabling the master IP address: %s",
1664 return master_params.name
1667 def _VerifyCertificate(filename):
1668 """Verifies a certificate for L{LUClusterVerifyConfig}.
1670 @type filename: string
1671 @param filename: Path to PEM file
1675 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1676 utils.ReadFile(filename))
1677 except Exception, err: # pylint: disable=W0703
1678 return (LUClusterVerifyConfig.ETYPE_ERROR,
1679 "Failed to load X509 certificate %s: %s" % (filename, err))
1682 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1683 constants.SSL_CERT_EXPIRATION_ERROR)
1686 fnamemsg = "While verifying %s: %s" % (filename, msg)
1691 return (None, fnamemsg)
1692 elif errcode == utils.CERT_WARNING:
1693 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1694 elif errcode == utils.CERT_ERROR:
1695 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1697 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1700 def _GetAllHypervisorParameters(cluster, instances):
1701 """Compute the set of all hypervisor parameters.
1703 @type cluster: L{objects.Cluster}
1704 @param cluster: the cluster object
1705 @param instances: list of L{objects.Instance}
1706 @param instances: additional instances from which to obtain parameters
1707 @rtype: list of (origin, hypervisor, parameters)
1708 @return: a list with all parameters found, indicating the hypervisor they
1709 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1714 for hv_name in cluster.enabled_hypervisors:
1715 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1717 for os_name, os_hvp in cluster.os_hvp.items():
1718 for hv_name, hv_params in os_hvp.items():
1720 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1721 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1723 # TODO: collapse identical parameter values in a single one
1724 for instance in instances:
1725 if instance.hvparams:
1726 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1727 cluster.FillHV(instance)))
1732 class _VerifyErrors(object):
1733 """Mix-in for cluster/group verify LUs.
1735 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1736 self.op and self._feedback_fn to be available.)
1740 ETYPE_FIELD = "code"
1741 ETYPE_ERROR = "ERROR"
1742 ETYPE_WARNING = "WARNING"
1744 def _Error(self, ecode, item, msg, *args, **kwargs):
1745 """Format an error message.
1747 Based on the opcode's error_codes parameter, either format a
1748 parseable error code, or a simpler error string.
1750 This must be called only from Exec and functions called from Exec.
1753 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1754 itype, etxt, _ = ecode
1755 # first complete the msg
1758 # then format the whole message
1759 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1760 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1766 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1767 # and finally report it via the feedback_fn
1768 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1770 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1771 """Log an error message if the passed condition is True.
1775 or self.op.debug_simulate_errors) # pylint: disable=E1101
1777 # If the error code is in the list of ignored errors, demote the error to a
1779 (_, etxt, _) = ecode
1780 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1781 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1784 self._Error(ecode, *args, **kwargs)
1786 # do not mark the operation as failed for WARN cases only
1787 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1788 self.bad = self.bad or cond
1791 class LUClusterVerify(NoHooksLU):
1792 """Submits all jobs necessary to verify the cluster.
1797 def ExpandNames(self):
1798 self.needed_locks = {}
1800 def Exec(self, feedback_fn):
1803 if self.op.group_name:
1804 groups = [self.op.group_name]
1805 depends_fn = lambda: None
1807 groups = self.cfg.GetNodeGroupList()
1809 # Verify global configuration
1811 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1814 # Always depend on global verification
1815 depends_fn = lambda: [(-len(jobs), [])]
1817 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1818 ignore_errors=self.op.ignore_errors,
1819 depends=depends_fn())]
1820 for group in groups)
1822 # Fix up all parameters
1823 for op in itertools.chain(*jobs): # pylint: disable=W0142
1824 op.debug_simulate_errors = self.op.debug_simulate_errors
1825 op.verbose = self.op.verbose
1826 op.error_codes = self.op.error_codes
1828 op.skip_checks = self.op.skip_checks
1829 except AttributeError:
1830 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1832 return ResultWithJobs(jobs)
1835 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1836 """Verifies the cluster config.
1841 def _VerifyHVP(self, hvp_data):
1842 """Verifies locally the syntax of the hypervisor parameters.
1845 for item, hv_name, hv_params in hvp_data:
1846 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1849 hv_class = hypervisor.GetHypervisor(hv_name)
1850 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1851 hv_class.CheckParameterSyntax(hv_params)
1852 except errors.GenericError, err:
1853 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1855 def ExpandNames(self):
1856 # Information can be safely retrieved as the BGL is acquired in exclusive
1858 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1859 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1860 self.all_node_info = self.cfg.GetAllNodesInfo()
1861 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1862 self.needed_locks = {}
1864 def Exec(self, feedback_fn):
1865 """Verify integrity of cluster, performing various test on nodes.
1869 self._feedback_fn = feedback_fn
1871 feedback_fn("* Verifying cluster config")
1873 for msg in self.cfg.VerifyConfig():
1874 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1876 feedback_fn("* Verifying cluster certificate files")
1878 for cert_filename in constants.ALL_CERT_FILES:
1879 (errcode, msg) = _VerifyCertificate(cert_filename)
1880 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1882 feedback_fn("* Verifying hypervisor parameters")
1884 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1885 self.all_inst_info.values()))
1887 feedback_fn("* Verifying all nodes belong to an existing group")
1889 # We do this verification here because, should this bogus circumstance
1890 # occur, it would never be caught by VerifyGroup, which only acts on
1891 # nodes/instances reachable from existing node groups.
1893 dangling_nodes = set(node.name for node in self.all_node_info.values()
1894 if node.group not in self.all_group_info)
1896 dangling_instances = {}
1897 no_node_instances = []
1899 for inst in self.all_inst_info.values():
1900 if inst.primary_node in dangling_nodes:
1901 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1902 elif inst.primary_node not in self.all_node_info:
1903 no_node_instances.append(inst.name)
1908 utils.CommaJoin(dangling_instances.get(node.name,
1910 for node in dangling_nodes]
1912 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1914 "the following nodes (and their instances) belong to a non"
1915 " existing group: %s", utils.CommaJoin(pretty_dangling))
1917 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1919 "the following instances have a non-existing primary-node:"
1920 " %s", utils.CommaJoin(no_node_instances))
1925 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1926 """Verifies the status of a node group.
1929 HPATH = "cluster-verify"
1930 HTYPE = constants.HTYPE_CLUSTER
1933 _HOOKS_INDENT_RE = re.compile("^", re.M)
1935 class NodeImage(object):
1936 """A class representing the logical and physical status of a node.
1939 @ivar name: the node name to which this object refers
1940 @ivar volumes: a structure as returned from
1941 L{ganeti.backend.GetVolumeList} (runtime)
1942 @ivar instances: a list of running instances (runtime)
1943 @ivar pinst: list of configured primary instances (config)
1944 @ivar sinst: list of configured secondary instances (config)
1945 @ivar sbp: dictionary of {primary-node: list of instances} for all
1946 instances for which this node is secondary (config)
1947 @ivar mfree: free memory, as reported by hypervisor (runtime)
1948 @ivar dfree: free disk, as reported by the node (runtime)
1949 @ivar offline: the offline status (config)
1950 @type rpc_fail: boolean
1951 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1952 not whether the individual keys were correct) (runtime)
1953 @type lvm_fail: boolean
1954 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1955 @type hyp_fail: boolean
1956 @ivar hyp_fail: whether the RPC call didn't return the instance list
1957 @type ghost: boolean
1958 @ivar ghost: whether this is a known node or not (config)
1959 @type os_fail: boolean
1960 @ivar os_fail: whether the RPC call didn't return valid OS data
1962 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1963 @type vm_capable: boolean
1964 @ivar vm_capable: whether the node can host instances
1967 def __init__(self, offline=False, name=None, vm_capable=True):
1976 self.offline = offline
1977 self.vm_capable = vm_capable
1978 self.rpc_fail = False
1979 self.lvm_fail = False
1980 self.hyp_fail = False
1982 self.os_fail = False
1985 def ExpandNames(self):
1986 # This raises errors.OpPrereqError on its own:
1987 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1989 # Get instances in node group; this is unsafe and needs verification later
1990 inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1992 self.needed_locks = {
1993 locking.LEVEL_INSTANCE: inst_names,
1994 locking.LEVEL_NODEGROUP: [self.group_uuid],
1995 locking.LEVEL_NODE: [],
1998 self.share_locks = _ShareAll()
2000 def DeclareLocks(self, level):
2001 if level == locking.LEVEL_NODE:
2002 # Get members of node group; this is unsafe and needs verification later
2003 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2005 all_inst_info = self.cfg.GetAllInstancesInfo()
2007 # In Exec(), we warn about mirrored instances that have primary and
2008 # secondary living in separate node groups. To fully verify that
2009 # volumes for these instances are healthy, we will need to do an
2010 # extra call to their secondaries. We ensure here those nodes will
2012 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2013 # Important: access only the instances whose lock is owned
2014 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2015 nodes.update(all_inst_info[inst].secondary_nodes)
2017 self.needed_locks[locking.LEVEL_NODE] = nodes
2019 def CheckPrereq(self):
2020 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2021 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2023 group_nodes = set(self.group_info.members)
2024 group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
2027 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2029 unlocked_instances = \
2030 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2033 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2034 utils.CommaJoin(unlocked_nodes))
2036 if unlocked_instances:
2037 raise errors.OpPrereqError("Missing lock for instances: %s" %
2038 utils.CommaJoin(unlocked_instances))
2040 self.all_node_info = self.cfg.GetAllNodesInfo()
2041 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2043 self.my_node_names = utils.NiceSort(group_nodes)
2044 self.my_inst_names = utils.NiceSort(group_instances)
2046 self.my_node_info = dict((name, self.all_node_info[name])
2047 for name in self.my_node_names)
2049 self.my_inst_info = dict((name, self.all_inst_info[name])
2050 for name in self.my_inst_names)
2052 # We detect here the nodes that will need the extra RPC calls for verifying
2053 # split LV volumes; they should be locked.
2054 extra_lv_nodes = set()
2056 for inst in self.my_inst_info.values():
2057 if inst.disk_template in constants.DTS_INT_MIRROR:
2058 group = self.my_node_info[inst.primary_node].group
2059 for nname in inst.secondary_nodes:
2060 if self.all_node_info[nname].group != group:
2061 extra_lv_nodes.add(nname)
2063 unlocked_lv_nodes = \
2064 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2066 if unlocked_lv_nodes:
2067 raise errors.OpPrereqError("these nodes could be locked: %s" %
2068 utils.CommaJoin(unlocked_lv_nodes))
2069 self.extra_lv_nodes = list(extra_lv_nodes)
2071 def _VerifyNode(self, ninfo, nresult):
2072 """Perform some basic validation on data returned from a node.
2074 - check the result data structure is well formed and has all the
2076 - check ganeti version
2078 @type ninfo: L{objects.Node}
2079 @param ninfo: the node to check
2080 @param nresult: the results from the node
2082 @return: whether overall this call was successful (and we can expect
2083 reasonable values in the respose)
2087 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2089 # main result, nresult should be a non-empty dict
2090 test = not nresult or not isinstance(nresult, dict)
2091 _ErrorIf(test, constants.CV_ENODERPC, node,
2092 "unable to verify node: no data returned")
2096 # compares ganeti version
2097 local_version = constants.PROTOCOL_VERSION
2098 remote_version = nresult.get("version", None)
2099 test = not (remote_version and
2100 isinstance(remote_version, (list, tuple)) and
2101 len(remote_version) == 2)
2102 _ErrorIf(test, constants.CV_ENODERPC, node,
2103 "connection to node returned invalid data")
2107 test = local_version != remote_version[0]
2108 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2109 "incompatible protocol versions: master %s,"
2110 " node %s", local_version, remote_version[0])
2114 # node seems compatible, we can actually try to look into its results
2116 # full package version
2117 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2118 constants.CV_ENODEVERSION, node,
2119 "software version mismatch: master %s, node %s",
2120 constants.RELEASE_VERSION, remote_version[1],
2121 code=self.ETYPE_WARNING)
2123 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2124 if ninfo.vm_capable and isinstance(hyp_result, dict):
2125 for hv_name, hv_result in hyp_result.iteritems():
2126 test = hv_result is not None
2127 _ErrorIf(test, constants.CV_ENODEHV, node,
2128 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2130 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2131 if ninfo.vm_capable and isinstance(hvp_result, list):
2132 for item, hv_name, hv_result in hvp_result:
2133 _ErrorIf(True, constants.CV_ENODEHV, node,
2134 "hypervisor %s parameter verify failure (source %s): %s",
2135 hv_name, item, hv_result)
2137 test = nresult.get(constants.NV_NODESETUP,
2138 ["Missing NODESETUP results"])
2139 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2144 def _VerifyNodeTime(self, ninfo, nresult,
2145 nvinfo_starttime, nvinfo_endtime):
2146 """Check the node time.
2148 @type ninfo: L{objects.Node}
2149 @param ninfo: the node to check
2150 @param nresult: the remote results for the node
2151 @param nvinfo_starttime: the start time of the RPC call
2152 @param nvinfo_endtime: the end time of the RPC call
2156 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2158 ntime = nresult.get(constants.NV_TIME, None)
2160 ntime_merged = utils.MergeTime(ntime)
2161 except (ValueError, TypeError):
2162 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2165 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2166 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2167 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2168 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2172 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2173 "Node time diverges by at least %s from master node time",
2176 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2177 """Check the node LVM results.
2179 @type ninfo: L{objects.Node}
2180 @param ninfo: the node to check
2181 @param nresult: the remote results for the node
2182 @param vg_name: the configured VG name
2189 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2191 # checks vg existence and size > 20G
2192 vglist = nresult.get(constants.NV_VGLIST, None)
2194 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2196 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2197 constants.MIN_VG_SIZE)
2198 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2201 pvlist = nresult.get(constants.NV_PVLIST, None)
2202 test = pvlist is None
2203 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2205 # check that ':' is not present in PV names, since it's a
2206 # special character for lvcreate (denotes the range of PEs to
2208 for _, pvname, owner_vg in pvlist:
2209 test = ":" in pvname
2210 _ErrorIf(test, constants.CV_ENODELVM, node,
2211 "Invalid character ':' in PV '%s' of VG '%s'",
2214 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2215 """Check the node bridges.
2217 @type ninfo: L{objects.Node}
2218 @param ninfo: the node to check
2219 @param nresult: the remote results for the node
2220 @param bridges: the expected list of bridges
2227 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2229 missing = nresult.get(constants.NV_BRIDGES, None)
2230 test = not isinstance(missing, list)
2231 _ErrorIf(test, constants.CV_ENODENET, node,
2232 "did not return valid bridge information")
2234 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2235 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2237 def _VerifyNodeUserScripts(self, ninfo, nresult):
2238 """Check the results of user scripts presence and executability on the node
2240 @type ninfo: L{objects.Node}
2241 @param ninfo: the node to check
2242 @param nresult: the remote results for the node
2247 test = not constants.NV_USERSCRIPTS in nresult
2248 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2249 "did not return user scripts information")
2251 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2253 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2254 "user scripts not present or not executable: %s" %
2255 utils.CommaJoin(sorted(broken_scripts)))
2257 def _VerifyNodeNetwork(self, ninfo, nresult):
2258 """Check the node network connectivity results.
2260 @type ninfo: L{objects.Node}
2261 @param ninfo: the node to check
2262 @param nresult: the remote results for the node
2266 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2268 test = constants.NV_NODELIST not in nresult
2269 _ErrorIf(test, constants.CV_ENODESSH, node,
2270 "node hasn't returned node ssh connectivity data")
2272 if nresult[constants.NV_NODELIST]:
2273 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2274 _ErrorIf(True, constants.CV_ENODESSH, node,
2275 "ssh communication with node '%s': %s", a_node, a_msg)
2277 test = constants.NV_NODENETTEST not in nresult
2278 _ErrorIf(test, constants.CV_ENODENET, node,
2279 "node hasn't returned node tcp connectivity data")
2281 if nresult[constants.NV_NODENETTEST]:
2282 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2284 _ErrorIf(True, constants.CV_ENODENET, node,
2285 "tcp communication with node '%s': %s",
2286 anode, nresult[constants.NV_NODENETTEST][anode])
2288 test = constants.NV_MASTERIP not in nresult
2289 _ErrorIf(test, constants.CV_ENODENET, node,
2290 "node hasn't returned node master IP reachability data")
2292 if not nresult[constants.NV_MASTERIP]:
2293 if node == self.master_node:
2294 msg = "the master node cannot reach the master IP (not configured?)"
2296 msg = "cannot reach the master IP"
2297 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2299 def _VerifyInstance(self, instance, instanceconfig, node_image,
2301 """Verify an instance.
2303 This function checks to see if the required block devices are
2304 available on the instance's node.
2307 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2308 node_current = instanceconfig.primary_node
2310 node_vol_should = {}
2311 instanceconfig.MapLVsByNode(node_vol_should)
2313 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2314 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2315 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, err)
2317 for node in node_vol_should:
2318 n_img = node_image[node]
2319 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2320 # ignore missing volumes on offline or broken nodes
2322 for volume in node_vol_should[node]:
2323 test = volume not in n_img.volumes
2324 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2325 "volume %s missing on node %s", volume, node)
2327 if instanceconfig.admin_state == constants.ADMINST_UP:
2328 pri_img = node_image[node_current]
2329 test = instance not in pri_img.instances and not pri_img.offline
2330 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2331 "instance not running on its primary node %s",
2334 diskdata = [(nname, success, status, idx)
2335 for (nname, disks) in diskstatus.items()
2336 for idx, (success, status) in enumerate(disks)]
2338 for nname, success, bdev_status, idx in diskdata:
2339 # the 'ghost node' construction in Exec() ensures that we have a
2341 snode = node_image[nname]
2342 bad_snode = snode.ghost or snode.offline
2343 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2344 not success and not bad_snode,
2345 constants.CV_EINSTANCEFAULTYDISK, instance,
2346 "couldn't retrieve status for disk/%s on %s: %s",
2347 idx, nname, bdev_status)
2348 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2349 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2350 constants.CV_EINSTANCEFAULTYDISK, instance,
2351 "disk/%s on %s is faulty", idx, nname)
2353 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2354 """Verify if there are any unknown volumes in the cluster.
2356 The .os, .swap and backup volumes are ignored. All other volumes are
2357 reported as unknown.
2359 @type reserved: L{ganeti.utils.FieldSet}
2360 @param reserved: a FieldSet of reserved volume names
2363 for node, n_img in node_image.items():
2364 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2365 # skip non-healthy nodes
2367 for volume in n_img.volumes:
2368 test = ((node not in node_vol_should or
2369 volume not in node_vol_should[node]) and
2370 not reserved.Matches(volume))
2371 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2372 "volume %s is unknown", volume)
2374 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2375 """Verify N+1 Memory Resilience.
2377 Check that if one single node dies we can still start all the
2378 instances it was primary for.
2381 cluster_info = self.cfg.GetClusterInfo()
2382 for node, n_img in node_image.items():
2383 # This code checks that every node which is now listed as
2384 # secondary has enough memory to host all instances it is
2385 # supposed to should a single other node in the cluster fail.
2386 # FIXME: not ready for failover to an arbitrary node
2387 # FIXME: does not support file-backed instances
2388 # WARNING: we currently take into account down instances as well
2389 # as up ones, considering that even if they're down someone
2390 # might want to start them even in the event of a node failure.
2392 # we're skipping offline nodes from the N+1 warning, since
2393 # most likely we don't have good memory infromation from them;
2394 # we already list instances living on such nodes, and that's
2397 #TODO(dynmem): use MINMEM for checking
2398 #TODO(dynmem): also consider ballooning out other instances
2399 for prinode, instances in n_img.sbp.items():
2401 for instance in instances:
2402 bep = cluster_info.FillBE(instance_cfg[instance])
2403 if bep[constants.BE_AUTO_BALANCE]:
2404 needed_mem += bep[constants.BE_MAXMEM]
2405 test = n_img.mfree < needed_mem
2406 self._ErrorIf(test, constants.CV_ENODEN1, node,
2407 "not enough memory to accomodate instance failovers"
2408 " should node %s fail (%dMiB needed, %dMiB available)",
2409 prinode, needed_mem, n_img.mfree)
2412 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2413 (files_all, files_opt, files_mc, files_vm)):
2414 """Verifies file checksums collected from all nodes.
2416 @param errorif: Callback for reporting errors
2417 @param nodeinfo: List of L{objects.Node} objects
2418 @param master_node: Name of master node
2419 @param all_nvinfo: RPC results
2422 # Define functions determining which nodes to consider for a file
2425 (files_mc, lambda node: (node.master_candidate or
2426 node.name == master_node)),
2427 (files_vm, lambda node: node.vm_capable),
2430 # Build mapping from filename to list of nodes which should have the file
2432 for (files, fn) in files2nodefn:
2434 filenodes = nodeinfo
2436 filenodes = filter(fn, nodeinfo)
2437 nodefiles.update((filename,
2438 frozenset(map(operator.attrgetter("name"), filenodes)))
2439 for filename in files)
2441 assert set(nodefiles) == (files_all | files_mc | files_vm)
2443 fileinfo = dict((filename, {}) for filename in nodefiles)
2444 ignore_nodes = set()
2446 for node in nodeinfo:
2448 ignore_nodes.add(node.name)
2451 nresult = all_nvinfo[node.name]
2453 if nresult.fail_msg or not nresult.payload:
2456 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2458 test = not (node_files and isinstance(node_files, dict))
2459 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2460 "Node did not return file checksum data")
2462 ignore_nodes.add(node.name)
2465 # Build per-checksum mapping from filename to nodes having it
2466 for (filename, checksum) in node_files.items():
2467 assert filename in nodefiles
2468 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2470 for (filename, checksums) in fileinfo.items():
2471 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2473 # Nodes having the file
2474 with_file = frozenset(node_name
2475 for nodes in fileinfo[filename].values()
2476 for node_name in nodes) - ignore_nodes
2478 expected_nodes = nodefiles[filename] - ignore_nodes
2480 # Nodes missing file
2481 missing_file = expected_nodes - with_file
2483 if filename in files_opt:
2485 errorif(missing_file and missing_file != expected_nodes,
2486 constants.CV_ECLUSTERFILECHECK, None,
2487 "File %s is optional, but it must exist on all or no"
2488 " nodes (not found on %s)",
2489 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2491 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2492 "File %s is missing from node(s) %s", filename,
2493 utils.CommaJoin(utils.NiceSort(missing_file)))
2495 # Warn if a node has a file it shouldn't
2496 unexpected = with_file - expected_nodes
2498 constants.CV_ECLUSTERFILECHECK, None,
2499 "File %s should not exist on node(s) %s",
2500 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2502 # See if there are multiple versions of the file
2503 test = len(checksums) > 1
2505 variants = ["variant %s on %s" %
2506 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2507 for (idx, (checksum, nodes)) in
2508 enumerate(sorted(checksums.items()))]
2512 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2513 "File %s found with %s different checksums (%s)",
2514 filename, len(checksums), "; ".join(variants))
2516 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2518 """Verifies and the node DRBD status.
2520 @type ninfo: L{objects.Node}
2521 @param ninfo: the node to check
2522 @param nresult: the remote results for the node
2523 @param instanceinfo: the dict of instances
2524 @param drbd_helper: the configured DRBD usermode helper
2525 @param drbd_map: the DRBD map as returned by
2526 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2530 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2533 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2534 test = (helper_result == None)
2535 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2536 "no drbd usermode helper returned")
2538 status, payload = helper_result
2540 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2541 "drbd usermode helper check unsuccessful: %s", payload)
2542 test = status and (payload != drbd_helper)
2543 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2544 "wrong drbd usermode helper: %s", payload)
2546 # compute the DRBD minors
2548 for minor, instance in drbd_map[node].items():
2549 test = instance not in instanceinfo
2550 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2551 "ghost instance '%s' in temporary DRBD map", instance)
2552 # ghost instance should not be running, but otherwise we
2553 # don't give double warnings (both ghost instance and
2554 # unallocated minor in use)
2556 node_drbd[minor] = (instance, False)
2558 instance = instanceinfo[instance]
2559 node_drbd[minor] = (instance.name,
2560 instance.admin_state == constants.ADMINST_UP)
2562 # and now check them
2563 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2564 test = not isinstance(used_minors, (tuple, list))
2565 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2566 "cannot parse drbd status file: %s", str(used_minors))
2568 # we cannot check drbd status
2571 for minor, (iname, must_exist) in node_drbd.items():
2572 test = minor not in used_minors and must_exist
2573 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2574 "drbd minor %d of instance %s is not active", minor, iname)
2575 for minor in used_minors:
2576 test = minor not in node_drbd
2577 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2578 "unallocated drbd minor %d is in use", minor)
2580 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2581 """Builds the node OS structures.
2583 @type ninfo: L{objects.Node}
2584 @param ninfo: the node to check
2585 @param nresult: the remote results for the node
2586 @param nimg: the node image object
2590 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2592 remote_os = nresult.get(constants.NV_OSLIST, None)
2593 test = (not isinstance(remote_os, list) or
2594 not compat.all(isinstance(v, list) and len(v) == 7
2595 for v in remote_os))
2597 _ErrorIf(test, constants.CV_ENODEOS, node,
2598 "node hasn't returned valid OS data")
2607 for (name, os_path, status, diagnose,
2608 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2610 if name not in os_dict:
2613 # parameters is a list of lists instead of list of tuples due to
2614 # JSON lacking a real tuple type, fix it:
2615 parameters = [tuple(v) for v in parameters]
2616 os_dict[name].append((os_path, status, diagnose,
2617 set(variants), set(parameters), set(api_ver)))
2619 nimg.oslist = os_dict
2621 def _VerifyNodeOS(self, ninfo, nimg, base):
2622 """Verifies the node OS list.
2624 @type ninfo: L{objects.Node}
2625 @param ninfo: the node to check
2626 @param nimg: the node image object
2627 @param base: the 'template' node we match against (e.g. from the master)
2631 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2633 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2635 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2636 for os_name, os_data in nimg.oslist.items():
2637 assert os_data, "Empty OS status for OS %s?!" % os_name
2638 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2639 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2640 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2641 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2642 "OS '%s' has multiple entries (first one shadows the rest): %s",
2643 os_name, utils.CommaJoin([v[0] for v in os_data]))
2644 # comparisons with the 'base' image
2645 test = os_name not in base.oslist
2646 _ErrorIf(test, constants.CV_ENODEOS, node,
2647 "Extra OS %s not present on reference node (%s)",
2651 assert base.oslist[os_name], "Base node has empty OS status?"
2652 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2654 # base OS is invalid, skipping
2656 for kind, a, b in [("API version", f_api, b_api),
2657 ("variants list", f_var, b_var),
2658 ("parameters", beautify_params(f_param),
2659 beautify_params(b_param))]:
2660 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2661 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2662 kind, os_name, base.name,
2663 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2665 # check any missing OSes
2666 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2667 _ErrorIf(missing, constants.CV_ENODEOS, node,
2668 "OSes present on reference node %s but missing on this node: %s",
2669 base.name, utils.CommaJoin(missing))
2671 def _VerifyOob(self, ninfo, nresult):
2672 """Verifies out of band functionality of a node.
2674 @type ninfo: L{objects.Node}
2675 @param ninfo: the node to check
2676 @param nresult: the remote results for the node
2680 # We just have to verify the paths on master and/or master candidates
2681 # as the oob helper is invoked on the master
2682 if ((ninfo.master_candidate or ninfo.master_capable) and
2683 constants.NV_OOB_PATHS in nresult):
2684 for path_result in nresult[constants.NV_OOB_PATHS]:
2685 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2687 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2688 """Verifies and updates the node volume data.
2690 This function will update a L{NodeImage}'s internal structures
2691 with data from the remote call.
2693 @type ninfo: L{objects.Node}
2694 @param ninfo: the node to check
2695 @param nresult: the remote results for the node
2696 @param nimg: the node image object
2697 @param vg_name: the configured VG name
2701 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2703 nimg.lvm_fail = True
2704 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2707 elif isinstance(lvdata, basestring):
2708 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2709 utils.SafeEncode(lvdata))
2710 elif not isinstance(lvdata, dict):
2711 _ErrorIf(True, constants.CV_ENODELVM, node,
2712 "rpc call to node failed (lvlist)")
2714 nimg.volumes = lvdata
2715 nimg.lvm_fail = False
2717 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2718 """Verifies and updates the node instance list.
2720 If the listing was successful, then updates this node's instance
2721 list. Otherwise, it marks the RPC call as failed for the instance
2724 @type ninfo: L{objects.Node}
2725 @param ninfo: the node to check
2726 @param nresult: the remote results for the node
2727 @param nimg: the node image object
2730 idata = nresult.get(constants.NV_INSTANCELIST, None)
2731 test = not isinstance(idata, list)
2732 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2733 "rpc call to node failed (instancelist): %s",
2734 utils.SafeEncode(str(idata)))
2736 nimg.hyp_fail = True
2738 nimg.instances = idata
2740 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2741 """Verifies and computes a node information map
2743 @type ninfo: L{objects.Node}
2744 @param ninfo: the node to check
2745 @param nresult: the remote results for the node
2746 @param nimg: the node image object
2747 @param vg_name: the configured VG name
2751 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2753 # try to read free memory (from the hypervisor)
2754 hv_info = nresult.get(constants.NV_HVINFO, None)
2755 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2756 _ErrorIf(test, constants.CV_ENODEHV, node,
2757 "rpc call to node failed (hvinfo)")
2760 nimg.mfree = int(hv_info["memory_free"])
2761 except (ValueError, TypeError):
2762 _ErrorIf(True, constants.CV_ENODERPC, node,
2763 "node returned invalid nodeinfo, check hypervisor")
2765 # FIXME: devise a free space model for file based instances as well
2766 if vg_name is not None:
2767 test = (constants.NV_VGLIST not in nresult or
2768 vg_name not in nresult[constants.NV_VGLIST])
2769 _ErrorIf(test, constants.CV_ENODELVM, node,
2770 "node didn't return data for the volume group '%s'"
2771 " - it is either missing or broken", vg_name)
2774 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2775 except (ValueError, TypeError):
2776 _ErrorIf(True, constants.CV_ENODERPC, node,
2777 "node returned invalid LVM info, check LVM status")
2779 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2780 """Gets per-disk status information for all instances.
2782 @type nodelist: list of strings
2783 @param nodelist: Node names
2784 @type node_image: dict of (name, L{objects.Node})
2785 @param node_image: Node objects
2786 @type instanceinfo: dict of (name, L{objects.Instance})
2787 @param instanceinfo: Instance objects
2788 @rtype: {instance: {node: [(succes, payload)]}}
2789 @return: a dictionary of per-instance dictionaries with nodes as
2790 keys and disk information as values; the disk information is a
2791 list of tuples (success, payload)
2794 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2797 node_disks_devonly = {}
2798 diskless_instances = set()
2799 diskless = constants.DT_DISKLESS
2801 for nname in nodelist:
2802 node_instances = list(itertools.chain(node_image[nname].pinst,
2803 node_image[nname].sinst))
2804 diskless_instances.update(inst for inst in node_instances
2805 if instanceinfo[inst].disk_template == diskless)
2806 disks = [(inst, disk)
2807 for inst in node_instances
2808 for disk in instanceinfo[inst].disks]
2811 # No need to collect data
2814 node_disks[nname] = disks
2816 # Creating copies as SetDiskID below will modify the objects and that can
2817 # lead to incorrect data returned from nodes
2818 devonly = [dev.Copy() for (_, dev) in disks]
2821 self.cfg.SetDiskID(dev, nname)
2823 node_disks_devonly[nname] = devonly
2825 assert len(node_disks) == len(node_disks_devonly)
2827 # Collect data from all nodes with disks
2828 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2831 assert len(result) == len(node_disks)
2835 for (nname, nres) in result.items():
2836 disks = node_disks[nname]
2839 # No data from this node
2840 data = len(disks) * [(False, "node offline")]
2843 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2844 "while getting disk information: %s", msg)
2846 # No data from this node
2847 data = len(disks) * [(False, msg)]
2850 for idx, i in enumerate(nres.payload):
2851 if isinstance(i, (tuple, list)) and len(i) == 2:
2854 logging.warning("Invalid result from node %s, entry %d: %s",
2856 data.append((False, "Invalid result from the remote node"))
2858 for ((inst, _), status) in zip(disks, data):
2859 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2861 # Add empty entries for diskless instances.
2862 for inst in diskless_instances:
2863 assert inst not in instdisk
2866 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2867 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2868 compat.all(isinstance(s, (tuple, list)) and
2869 len(s) == 2 for s in statuses)
2870 for inst, nnames in instdisk.items()
2871 for nname, statuses in nnames.items())
2872 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2877 def _SshNodeSelector(group_uuid, all_nodes):
2878 """Create endless iterators for all potential SSH check hosts.
2881 nodes = [node for node in all_nodes
2882 if (node.group != group_uuid and
2884 keyfunc = operator.attrgetter("group")
2886 return map(itertools.cycle,
2887 [sorted(map(operator.attrgetter("name"), names))
2888 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2892 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2893 """Choose which nodes should talk to which other nodes.
2895 We will make nodes contact all nodes in their group, and one node from
2898 @warning: This algorithm has a known issue if one node group is much
2899 smaller than others (e.g. just one node). In such a case all other
2900 nodes will talk to the single node.
2903 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2904 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2906 return (online_nodes,
2907 dict((name, sorted([i.next() for i in sel]))
2908 for name in online_nodes))
2910 def BuildHooksEnv(self):
2913 Cluster-Verify hooks just ran in the post phase and their failure makes
2914 the output be logged in the verify output and the verification to fail.
2918 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2921 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2922 for node in self.my_node_info.values())
2926 def BuildHooksNodes(self):
2927 """Build hooks nodes.
2930 return ([], self.my_node_names)
2932 def Exec(self, feedback_fn):
2933 """Verify integrity of the node group, performing various test on nodes.
2936 # This method has too many local variables. pylint: disable=R0914
2937 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2939 if not self.my_node_names:
2941 feedback_fn("* Empty node group, skipping verification")
2945 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2946 verbose = self.op.verbose
2947 self._feedback_fn = feedback_fn
2949 vg_name = self.cfg.GetVGName()
2950 drbd_helper = self.cfg.GetDRBDHelper()
2951 cluster = self.cfg.GetClusterInfo()
2952 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2953 hypervisors = cluster.enabled_hypervisors
2954 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2956 i_non_redundant = [] # Non redundant instances
2957 i_non_a_balanced = [] # Non auto-balanced instances
2958 i_offline = 0 # Count of offline instances
2959 n_offline = 0 # Count of offline nodes
2960 n_drained = 0 # Count of nodes being drained
2961 node_vol_should = {}
2963 # FIXME: verify OS list
2966 filemap = _ComputeAncillaryFiles(cluster, False)
2968 # do local checksums
2969 master_node = self.master_node = self.cfg.GetMasterNode()
2970 master_ip = self.cfg.GetMasterIP()
2972 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2975 if self.cfg.GetUseExternalMipScript():
2976 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2978 node_verify_param = {
2979 constants.NV_FILELIST:
2980 utils.UniqueSequence(filename
2981 for files in filemap
2982 for filename in files),
2983 constants.NV_NODELIST:
2984 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2985 self.all_node_info.values()),
2986 constants.NV_HYPERVISOR: hypervisors,
2987 constants.NV_HVPARAMS:
2988 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2989 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2990 for node in node_data_list
2991 if not node.offline],
2992 constants.NV_INSTANCELIST: hypervisors,
2993 constants.NV_VERSION: None,
2994 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2995 constants.NV_NODESETUP: None,
2996 constants.NV_TIME: None,
2997 constants.NV_MASTERIP: (master_node, master_ip),
2998 constants.NV_OSLIST: None,
2999 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3000 constants.NV_USERSCRIPTS: user_scripts,
3003 if vg_name is not None:
3004 node_verify_param[constants.NV_VGLIST] = None
3005 node_verify_param[constants.NV_LVLIST] = vg_name
3006 node_verify_param[constants.NV_PVLIST] = [vg_name]
3007 node_verify_param[constants.NV_DRBDLIST] = None
3010 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3013 # FIXME: this needs to be changed per node-group, not cluster-wide
3015 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3016 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3017 bridges.add(default_nicpp[constants.NIC_LINK])
3018 for instance in self.my_inst_info.values():
3019 for nic in instance.nics:
3020 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3021 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3022 bridges.add(full_nic[constants.NIC_LINK])
3025 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3027 # Build our expected cluster state
3028 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3030 vm_capable=node.vm_capable))
3031 for node in node_data_list)
3035 for node in self.all_node_info.values():
3036 path = _SupportsOob(self.cfg, node)
3037 if path and path not in oob_paths:
3038 oob_paths.append(path)
3041 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3043 for instance in self.my_inst_names:
3044 inst_config = self.my_inst_info[instance]
3046 for nname in inst_config.all_nodes:
3047 if nname not in node_image:
3048 gnode = self.NodeImage(name=nname)
3049 gnode.ghost = (nname not in self.all_node_info)
3050 node_image[nname] = gnode
3052 inst_config.MapLVsByNode(node_vol_should)
3054 pnode = inst_config.primary_node
3055 node_image[pnode].pinst.append(instance)
3057 for snode in inst_config.secondary_nodes:
3058 nimg = node_image[snode]
3059 nimg.sinst.append(instance)
3060 if pnode not in nimg.sbp:
3061 nimg.sbp[pnode] = []
3062 nimg.sbp[pnode].append(instance)
3064 # At this point, we have the in-memory data structures complete,
3065 # except for the runtime information, which we'll gather next
3067 # Due to the way our RPC system works, exact response times cannot be
3068 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3069 # time before and after executing the request, we can at least have a time
3071 nvinfo_starttime = time.time()
3072 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3074 self.cfg.GetClusterName())
3075 nvinfo_endtime = time.time()
3077 if self.extra_lv_nodes and vg_name is not None:
3079 self.rpc.call_node_verify(self.extra_lv_nodes,
3080 {constants.NV_LVLIST: vg_name},
3081 self.cfg.GetClusterName())
3083 extra_lv_nvinfo = {}
3085 all_drbd_map = self.cfg.ComputeDRBDMap()
3087 feedback_fn("* Gathering disk information (%s nodes)" %
3088 len(self.my_node_names))
3089 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3092 feedback_fn("* Verifying configuration file consistency")
3094 # If not all nodes are being checked, we need to make sure the master node
3095 # and a non-checked vm_capable node are in the list.
3096 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3098 vf_nvinfo = all_nvinfo.copy()
3099 vf_node_info = list(self.my_node_info.values())
3100 additional_nodes = []
3101 if master_node not in self.my_node_info:
3102 additional_nodes.append(master_node)
3103 vf_node_info.append(self.all_node_info[master_node])
3104 # Add the first vm_capable node we find which is not included
3105 for node in absent_nodes:
3106 nodeinfo = self.all_node_info[node]
3107 if nodeinfo.vm_capable and not nodeinfo.offline:
3108 additional_nodes.append(node)
3109 vf_node_info.append(self.all_node_info[node])
3111 key = constants.NV_FILELIST
3112 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3113 {key: node_verify_param[key]},
3114 self.cfg.GetClusterName()))
3116 vf_nvinfo = all_nvinfo
3117 vf_node_info = self.my_node_info.values()
3119 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3121 feedback_fn("* Verifying node status")
3125 for node_i in node_data_list:
3127 nimg = node_image[node]
3131 feedback_fn("* Skipping offline node %s" % (node,))
3135 if node == master_node:
3137 elif node_i.master_candidate:
3138 ntype = "master candidate"
3139 elif node_i.drained:
3145 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3147 msg = all_nvinfo[node].fail_msg
3148 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3151 nimg.rpc_fail = True
3154 nresult = all_nvinfo[node].payload
3156 nimg.call_ok = self._VerifyNode(node_i, nresult)
3157 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3158 self._VerifyNodeNetwork(node_i, nresult)
3159 self._VerifyNodeUserScripts(node_i, nresult)
3160 self._VerifyOob(node_i, nresult)
3163 self._VerifyNodeLVM(node_i, nresult, vg_name)
3164 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3167 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3168 self._UpdateNodeInstances(node_i, nresult, nimg)
3169 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3170 self._UpdateNodeOS(node_i, nresult, nimg)
3172 if not nimg.os_fail:
3173 if refos_img is None:
3175 self._VerifyNodeOS(node_i, nimg, refos_img)
3176 self._VerifyNodeBridges(node_i, nresult, bridges)
3178 # Check whether all running instancies are primary for the node. (This
3179 # can no longer be done from _VerifyInstance below, since some of the
3180 # wrong instances could be from other node groups.)
3181 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3183 for inst in non_primary_inst:
3184 # FIXME: investigate best way to handle offline insts
3185 if inst.admin_state == constants.ADMINST_OFFLINE:
3187 feedback_fn("* Skipping offline instance %s" % inst.name)
3190 test = inst in self.all_inst_info
3191 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3192 "instance should not run on node %s", node_i.name)
3193 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3194 "node is running unknown instance %s", inst)
3196 for node, result in extra_lv_nvinfo.items():
3197 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3198 node_image[node], vg_name)
3200 feedback_fn("* Verifying instance status")
3201 for instance in self.my_inst_names:
3203 feedback_fn("* Verifying instance %s" % instance)
3204 inst_config = self.my_inst_info[instance]
3205 self._VerifyInstance(instance, inst_config, node_image,
3207 inst_nodes_offline = []
3209 pnode = inst_config.primary_node
3210 pnode_img = node_image[pnode]
3211 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3212 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3213 " primary node failed", instance)
3215 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3217 constants.CV_EINSTANCEBADNODE, instance,
3218 "instance is marked as running and lives on offline node %s",
3219 inst_config.primary_node)
3221 # If the instance is non-redundant we cannot survive losing its primary
3222 # node, so we are not N+1 compliant. On the other hand we have no disk
3223 # templates with more than one secondary so that situation is not well
3225 # FIXME: does not support file-backed instances
3226 if not inst_config.secondary_nodes:
3227 i_non_redundant.append(instance)
3229 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3230 constants.CV_EINSTANCELAYOUT,
3231 instance, "instance has multiple secondary nodes: %s",
3232 utils.CommaJoin(inst_config.secondary_nodes),
3233 code=self.ETYPE_WARNING)
3235 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3236 pnode = inst_config.primary_node
3237 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3238 instance_groups = {}
3240 for node in instance_nodes:
3241 instance_groups.setdefault(self.all_node_info[node].group,
3245 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3246 # Sort so that we always list the primary node first.
3247 for group, nodes in sorted(instance_groups.items(),
3248 key=lambda (_, nodes): pnode in nodes,
3251 self._ErrorIf(len(instance_groups) > 1,
3252 constants.CV_EINSTANCESPLITGROUPS,
3253 instance, "instance has primary and secondary nodes in"
3254 " different groups: %s", utils.CommaJoin(pretty_list),
3255 code=self.ETYPE_WARNING)
3257 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3258 i_non_a_balanced.append(instance)
3260 for snode in inst_config.secondary_nodes:
3261 s_img = node_image[snode]
3262 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3263 snode, "instance %s, connection to secondary node failed",
3267 inst_nodes_offline.append(snode)
3269 # warn that the instance lives on offline nodes
3270 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3271 "instance has offline secondary node(s) %s",
3272 utils.CommaJoin(inst_nodes_offline))
3273 # ... or ghost/non-vm_capable nodes
3274 for node in inst_config.all_nodes:
3275 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3276 instance, "instance lives on ghost node %s", node)
3277 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3278 instance, "instance lives on non-vm_capable node %s", node)
3280 feedback_fn("* Verifying orphan volumes")
3281 reserved = utils.FieldSet(*cluster.reserved_lvs)
3283 # We will get spurious "unknown volume" warnings if any node of this group
3284 # is secondary for an instance whose primary is in another group. To avoid
3285 # them, we find these instances and add their volumes to node_vol_should.
3286 for inst in self.all_inst_info.values():
3287 for secondary in inst.secondary_nodes:
3288 if (secondary in self.my_node_info
3289 and inst.name not in self.my_inst_info):
3290 inst.MapLVsByNode(node_vol_should)
3293 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3295 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3296 feedback_fn("* Verifying N+1 Memory redundancy")
3297 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3299 feedback_fn("* Other Notes")
3301 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3302 % len(i_non_redundant))
3304 if i_non_a_balanced:
3305 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3306 % len(i_non_a_balanced))
3309 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3312 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3315 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3319 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3320 """Analyze the post-hooks' result
3322 This method analyses the hook result, handles it, and sends some
3323 nicely-formatted feedback back to the user.
3325 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3326 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3327 @param hooks_results: the results of the multi-node hooks rpc call
3328 @param feedback_fn: function used send feedback back to the caller
3329 @param lu_result: previous Exec result
3330 @return: the new Exec result, based on the previous result
3334 # We only really run POST phase hooks, only for non-empty groups,
3335 # and are only interested in their results
3336 if not self.my_node_names:
3339 elif phase == constants.HOOKS_PHASE_POST:
3340 # Used to change hooks' output to proper indentation
3341 feedback_fn("* Hooks Results")
3342 assert hooks_results, "invalid result from hooks"
3344 for node_name in hooks_results:
3345 res = hooks_results[node_name]
3347 test = msg and not res.offline
3348 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3349 "Communication failure in hooks execution: %s", msg)
3350 if res.offline or msg:
3351 # No need to investigate payload if node is offline or gave
3354 for script, hkr, output in res.payload:
3355 test = hkr == constants.HKR_FAIL
3356 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3357 "Script %s failed, output:", script)
3359 output = self._HOOKS_INDENT_RE.sub(" ", output)
3360 feedback_fn("%s" % output)
3366 class LUClusterVerifyDisks(NoHooksLU):
3367 """Verifies the cluster disks status.
3372 def ExpandNames(self):
3373 self.share_locks = _ShareAll()
3374 self.needed_locks = {
3375 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3378 def Exec(self, feedback_fn):
3379 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3381 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3382 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3383 for group in group_names])
3386 class LUGroupVerifyDisks(NoHooksLU):
3387 """Verifies the status of all disks in a node group.
3392 def ExpandNames(self):
3393 # Raises errors.OpPrereqError on its own if group can't be found
3394 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3396 self.share_locks = _ShareAll()
3397 self.needed_locks = {
3398 locking.LEVEL_INSTANCE: [],
3399 locking.LEVEL_NODEGROUP: [],
3400 locking.LEVEL_NODE: [],
3403 def DeclareLocks(self, level):
3404 if level == locking.LEVEL_INSTANCE:
3405 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3407 # Lock instances optimistically, needs verification once node and group
3408 # locks have been acquired
3409 self.needed_locks[locking.LEVEL_INSTANCE] = \
3410 self.cfg.GetNodeGroupInstances(self.group_uuid)
3412 elif level == locking.LEVEL_NODEGROUP:
3413 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3415 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3416 set([self.group_uuid] +
3417 # Lock all groups used by instances optimistically; this requires
3418 # going via the node before it's locked, requiring verification
3421 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3422 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3424 elif level == locking.LEVEL_NODE:
3425 # This will only lock the nodes in the group to be verified which contain
3427 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3428 self._LockInstancesNodes()
3430 # Lock all nodes in group to be verified
3431 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3432 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3433 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3435 def CheckPrereq(self):
3436 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3437 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3438 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3440 assert self.group_uuid in owned_groups
3442 # Check if locked instances are still correct
3443 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3445 # Get instance information
3446 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3448 # Check if node groups for locked instances are still correct
3449 for (instance_name, inst) in self.instances.items():
3450 assert owned_nodes.issuperset(inst.all_nodes), \
3451 "Instance %s's nodes changed while we kept the lock" % instance_name
3453 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3456 assert self.group_uuid in inst_groups, \
3457 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3459 def Exec(self, feedback_fn):
3460 """Verify integrity of cluster disks.
3462 @rtype: tuple of three items
3463 @return: a tuple of (dict of node-to-node_error, list of instances
3464 which need activate-disks, dict of instance: (node, volume) for
3469 res_instances = set()
3472 nv_dict = _MapInstanceDisksToNodes([inst
3473 for inst in self.instances.values()
3474 if inst.admin_state == constants.ADMINST_UP])
3477 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3478 set(self.cfg.GetVmCapableNodeList()))
3480 node_lvs = self.rpc.call_lv_list(nodes, [])
3482 for (node, node_res) in node_lvs.items():
3483 if node_res.offline:
3486 msg = node_res.fail_msg
3488 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3489 res_nodes[node] = msg
3492 for lv_name, (_, _, lv_online) in node_res.payload.items():
3493 inst = nv_dict.pop((node, lv_name), None)
3494 if not (lv_online or inst is None):
3495 res_instances.add(inst)
3497 # any leftover items in nv_dict are missing LVs, let's arrange the data
3499 for key, inst in nv_dict.iteritems():
3500 res_missing.setdefault(inst, []).append(list(key))
3502 return (res_nodes, list(res_instances), res_missing)
3505 class LUClusterRepairDiskSizes(NoHooksLU):
3506 """Verifies the cluster disks sizes.
3511 def ExpandNames(self):
3512 if self.op.instances:
3513 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3514 self.needed_locks = {
3515 locking.LEVEL_NODE_RES: [],
3516 locking.LEVEL_INSTANCE: self.wanted_names,
3518 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3520 self.wanted_names = None
3521 self.needed_locks = {
3522 locking.LEVEL_NODE_RES: locking.ALL_SET,
3523 locking.LEVEL_INSTANCE: locking.ALL_SET,
3525 self.share_locks = {
3526 locking.LEVEL_NODE_RES: 1,
3527 locking.LEVEL_INSTANCE: 0,
3530 def DeclareLocks(self, level):
3531 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3532 self._LockInstancesNodes(primary_only=True, level=level)
3534 def CheckPrereq(self):
3535 """Check prerequisites.
3537 This only checks the optional instance list against the existing names.
3540 if self.wanted_names is None:
3541 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3543 self.wanted_instances = \
3544 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3546 def _EnsureChildSizes(self, disk):
3547 """Ensure children of the disk have the needed disk size.
3549 This is valid mainly for DRBD8 and fixes an issue where the
3550 children have smaller disk size.
3552 @param disk: an L{ganeti.objects.Disk} object
3555 if disk.dev_type == constants.LD_DRBD8:
3556 assert disk.children, "Empty children for DRBD8?"
3557 fchild = disk.children[0]
3558 mismatch = fchild.size < disk.size
3560 self.LogInfo("Child disk has size %d, parent %d, fixing",
3561 fchild.size, disk.size)
3562 fchild.size = disk.size
3564 # and we recurse on this child only, not on the metadev
3565 return self._EnsureChildSizes(fchild) or mismatch
3569 def Exec(self, feedback_fn):
3570 """Verify the size of cluster disks.
3573 # TODO: check child disks too
3574 # TODO: check differences in size between primary/secondary nodes
3576 for instance in self.wanted_instances:
3577 pnode = instance.primary_node
3578 if pnode not in per_node_disks:
3579 per_node_disks[pnode] = []
3580 for idx, disk in enumerate(instance.disks):
3581 per_node_disks[pnode].append((instance, idx, disk))
3583 assert not (frozenset(per_node_disks.keys()) -
3584 self.owned_locks(locking.LEVEL_NODE_RES)), \
3585 "Not owning correct locks"
3586 assert not self.owned_locks(locking.LEVEL_NODE)
3589 for node, dskl in per_node_disks.items():
3590 newl = [v[2].Copy() for v in dskl]
3592 self.cfg.SetDiskID(dsk, node)
3593 result = self.rpc.call_blockdev_getsize(node, newl)
3595 self.LogWarning("Failure in blockdev_getsize call to node"
3596 " %s, ignoring", node)
3598 if len(result.payload) != len(dskl):
3599 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3600 " result.payload=%s", node, len(dskl), result.payload)
3601 self.LogWarning("Invalid result from node %s, ignoring node results",
3604 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3606 self.LogWarning("Disk %d of instance %s did not return size"
3607 " information, ignoring", idx, instance.name)
3609 if not isinstance(size, (int, long)):
3610 self.LogWarning("Disk %d of instance %s did not return valid"
3611 " size information, ignoring", idx, instance.name)
3614 if size != disk.size:
3615 self.LogInfo("Disk %d of instance %s has mismatched size,"
3616 " correcting: recorded %d, actual %d", idx,
3617 instance.name, disk.size, size)
3619 self.cfg.Update(instance, feedback_fn)
3620 changed.append((instance.name, idx, size))
3621 if self._EnsureChildSizes(disk):
3622 self.cfg.Update(instance, feedback_fn)
3623 changed.append((instance.name, idx, disk.size))
3627 class LUClusterRename(LogicalUnit):
3628 """Rename the cluster.
3631 HPATH = "cluster-rename"
3632 HTYPE = constants.HTYPE_CLUSTER
3634 def BuildHooksEnv(self):
3639 "OP_TARGET": self.cfg.GetClusterName(),
3640 "NEW_NAME": self.op.name,
3643 def BuildHooksNodes(self):
3644 """Build hooks nodes.
3647 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3649 def CheckPrereq(self):
3650 """Verify that the passed name is a valid one.
3653 hostname = netutils.GetHostname(name=self.op.name,
3654 family=self.cfg.GetPrimaryIPFamily())
3656 new_name = hostname.name
3657 self.ip = new_ip = hostname.ip
3658 old_name = self.cfg.GetClusterName()
3659 old_ip = self.cfg.GetMasterIP()
3660 if new_name == old_name and new_ip == old_ip:
3661 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3662 " cluster has changed",
3664 if new_ip != old_ip:
3665 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3666 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3667 " reachable on the network" %
3668 new_ip, errors.ECODE_NOTUNIQUE)
3670 self.op.name = new_name
3672 def Exec(self, feedback_fn):
3673 """Rename the cluster.
3676 clustername = self.op.name
3679 # shutdown the master IP
3680 master_params = self.cfg.GetMasterNetworkParameters()
3681 ems = self.cfg.GetUseExternalMipScript()
3682 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3684 result.Raise("Could not disable the master role")
3687 cluster = self.cfg.GetClusterInfo()
3688 cluster.cluster_name = clustername
3689 cluster.master_ip = new_ip
3690 self.cfg.Update(cluster, feedback_fn)
3692 # update the known hosts file
3693 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3694 node_list = self.cfg.GetOnlineNodeList()
3696 node_list.remove(master_params.name)
3699 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3701 master_params.ip = new_ip
3702 result = self.rpc.call_node_activate_master_ip(master_params.name,
3704 msg = result.fail_msg
3706 self.LogWarning("Could not re-enable the master role on"
3707 " the master, please restart manually: %s", msg)
3712 def _ValidateNetmask(cfg, netmask):
3713 """Checks if a netmask is valid.
3715 @type cfg: L{config.ConfigWriter}
3716 @param cfg: The cluster configuration
3718 @param netmask: the netmask to be verified
3719 @raise errors.OpPrereqError: if the validation fails
3722 ip_family = cfg.GetPrimaryIPFamily()
3724 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3725 except errors.ProgrammerError:
3726 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3728 if not ipcls.ValidateNetmask(netmask):
3729 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3733 class LUClusterSetParams(LogicalUnit):
3734 """Change the parameters of the cluster.
3737 HPATH = "cluster-modify"
3738 HTYPE = constants.HTYPE_CLUSTER
3741 def CheckArguments(self):
3745 if self.op.uid_pool:
3746 uidpool.CheckUidPool(self.op.uid_pool)
3748 if self.op.add_uids:
3749 uidpool.CheckUidPool(self.op.add_uids)
3751 if self.op.remove_uids:
3752 uidpool.CheckUidPool(self.op.remove_uids)
3754 if self.op.master_netmask is not None:
3755 _ValidateNetmask(self.cfg, self.op.master_netmask)
3757 if self.op.diskparams:
3758 for dt_params in self.op.diskparams.values():
3759 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3761 def ExpandNames(self):
3762 # FIXME: in the future maybe other cluster params won't require checking on
3763 # all nodes to be modified.
3764 self.needed_locks = {
3765 locking.LEVEL_NODE: locking.ALL_SET,
3767 self.share_locks[locking.LEVEL_NODE] = 1
3769 def BuildHooksEnv(self):
3774 "OP_TARGET": self.cfg.GetClusterName(),
3775 "NEW_VG_NAME": self.op.vg_name,
3778 def BuildHooksNodes(self):
3779 """Build hooks nodes.
3782 mn = self.cfg.GetMasterNode()
3785 def CheckPrereq(self):
3786 """Check prerequisites.
3788 This checks whether the given params don't conflict and
3789 if the given volume group is valid.
3792 if self.op.vg_name is not None and not self.op.vg_name:
3793 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3794 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3795 " instances exist", errors.ECODE_INVAL)
3797 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3798 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3799 raise errors.OpPrereqError("Cannot disable drbd helper while"
3800 " drbd-based instances exist",
3803 node_list = self.owned_locks(locking.LEVEL_NODE)
3805 # if vg_name not None, checks given volume group on all nodes
3807 vglist = self.rpc.call_vg_list(node_list)
3808 for node in node_list:
3809 msg = vglist[node].fail_msg
3811 # ignoring down node
3812 self.LogWarning("Error while gathering data on node %s"
3813 " (ignoring node): %s", node, msg)
3815 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3817 constants.MIN_VG_SIZE)
3819 raise errors.OpPrereqError("Error on node '%s': %s" %
3820 (node, vgstatus), errors.ECODE_ENVIRON)
3822 if self.op.drbd_helper:
3823 # checks given drbd helper on all nodes
3824 helpers = self.rpc.call_drbd_helper(node_list)
3825 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3827 self.LogInfo("Not checking drbd helper on offline node %s", node)
3829 msg = helpers[node].fail_msg
3831 raise errors.OpPrereqError("Error checking drbd helper on node"
3832 " '%s': %s" % (node, msg),
3833 errors.ECODE_ENVIRON)
3834 node_helper = helpers[node].payload
3835 if node_helper != self.op.drbd_helper:
3836 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3837 (node, node_helper), errors.ECODE_ENVIRON)
3839 self.cluster = cluster = self.cfg.GetClusterInfo()
3840 # validate params changes
3841 if self.op.beparams:
3842 objects.UpgradeBeParams(self.op.beparams)
3843 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3844 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3846 if self.op.ndparams:
3847 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3848 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3850 # TODO: we need a more general way to handle resetting
3851 # cluster-level parameters to default values
3852 if self.new_ndparams["oob_program"] == "":
3853 self.new_ndparams["oob_program"] = \
3854 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3856 if self.op.hv_state:
3857 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3858 self.cluster.hv_state_static)
3859 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3860 for hv, values in new_hv_state.items())
3862 if self.op.disk_state:
3863 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3864 self.cluster.disk_state_static)
3865 self.new_disk_state = \
3866 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3867 for name, values in svalues.items()))
3868 for storage, svalues in new_disk_state.items())
3871 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3874 if self.op.nicparams:
3875 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3876 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3877 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3880 # check all instances for consistency
3881 for instance in self.cfg.GetAllInstancesInfo().values():
3882 for nic_idx, nic in enumerate(instance.nics):
3883 params_copy = copy.deepcopy(nic.nicparams)
3884 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3886 # check parameter syntax
3888 objects.NIC.CheckParameterSyntax(params_filled)
3889 except errors.ConfigurationError, err:
3890 nic_errors.append("Instance %s, nic/%d: %s" %
3891 (instance.name, nic_idx, err))
3893 # if we're moving instances to routed, check that they have an ip
3894 target_mode = params_filled[constants.NIC_MODE]
3895 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3896 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3897 " address" % (instance.name, nic_idx))
3899 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3900 "\n".join(nic_errors))
3902 # hypervisor list/parameters
3903 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3904 if self.op.hvparams:
3905 for hv_name, hv_dict in self.op.hvparams.items():
3906 if hv_name not in self.new_hvparams:
3907 self.new_hvparams[hv_name] = hv_dict
3909 self.new_hvparams[hv_name].update(hv_dict)
3911 # disk template parameters
3912 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3913 if self.op.diskparams:
3914 for dt_name, dt_params in self.op.diskparams.items():
3915 if dt_name not in self.op.diskparams:
3916 self.new_diskparams[dt_name] = dt_params
3918 self.new_diskparams[dt_name].update(dt_params)
3920 # os hypervisor parameters
3921 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3923 for os_name, hvs in self.op.os_hvp.items():
3924 if os_name not in self.new_os_hvp:
3925 self.new_os_hvp[os_name] = hvs
3927 for hv_name, hv_dict in hvs.items():
3928 if hv_name not in self.new_os_hvp[os_name]:
3929 self.new_os_hvp[os_name][hv_name] = hv_dict
3931 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3934 self.new_osp = objects.FillDict(cluster.osparams, {})
3935 if self.op.osparams:
3936 for os_name, osp in self.op.osparams.items():
3937 if os_name not in self.new_osp:
3938 self.new_osp[os_name] = {}
3940 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3943 if not self.new_osp[os_name]:
3944 # we removed all parameters
3945 del self.new_osp[os_name]
3947 # check the parameter validity (remote check)
3948 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3949 os_name, self.new_osp[os_name])
3951 # changes to the hypervisor list
3952 if self.op.enabled_hypervisors is not None:
3953 self.hv_list = self.op.enabled_hypervisors
3954 for hv in self.hv_list:
3955 # if the hypervisor doesn't already exist in the cluster
3956 # hvparams, we initialize it to empty, and then (in both
3957 # cases) we make sure to fill the defaults, as we might not
3958 # have a complete defaults list if the hypervisor wasn't
3960 if hv not in new_hvp:
3962 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3963 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3965 self.hv_list = cluster.enabled_hypervisors
3967 if self.op.hvparams or self.op.enabled_hypervisors is not None:
3968 # either the enabled list has changed, or the parameters have, validate
3969 for hv_name, hv_params in self.new_hvparams.items():
3970 if ((self.op.hvparams and hv_name in self.op.hvparams) or
3971 (self.op.enabled_hypervisors and
3972 hv_name in self.op.enabled_hypervisors)):
3973 # either this is a new hypervisor, or its parameters have changed
3974 hv_class = hypervisor.GetHypervisor(hv_name)
3975 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3976 hv_class.CheckParameterSyntax(hv_params)
3977 _CheckHVParams(self, node_list, hv_name, hv_params)
3980 # no need to check any newly-enabled hypervisors, since the
3981 # defaults have already been checked in the above code-block
3982 for os_name, os_hvp in self.new_os_hvp.items():
3983 for hv_name, hv_params in os_hvp.items():
3984 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3985 # we need to fill in the new os_hvp on top of the actual hv_p
3986 cluster_defaults = self.new_hvparams.get(hv_name, {})
3987 new_osp = objects.FillDict(cluster_defaults, hv_params)
3988 hv_class = hypervisor.GetHypervisor(hv_name)
3989 hv_class.CheckParameterSyntax(new_osp)
3990 _CheckHVParams(self, node_list, hv_name, new_osp)
3992 if self.op.default_iallocator:
3993 alloc_script = utils.FindFile(self.op.default_iallocator,
3994 constants.IALLOCATOR_SEARCH_PATH,
3996 if alloc_script is None:
3997 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3998 " specified" % self.op.default_iallocator,
4001 def Exec(self, feedback_fn):
4002 """Change the parameters of the cluster.
4005 if self.op.vg_name is not None:
4006 new_volume = self.op.vg_name
4009 if new_volume != self.cfg.GetVGName():
4010 self.cfg.SetVGName(new_volume)
4012 feedback_fn("Cluster LVM configuration already in desired"
4013 " state, not changing")
4014 if self.op.drbd_helper is not None:
4015 new_helper = self.op.drbd_helper
4018 if new_helper != self.cfg.GetDRBDHelper():
4019 self.cfg.SetDRBDHelper(new_helper)
4021 feedback_fn("Cluster DRBD helper already in desired state,"
4023 if self.op.hvparams:
4024 self.cluster.hvparams = self.new_hvparams
4026 self.cluster.os_hvp = self.new_os_hvp
4027 if self.op.enabled_hypervisors is not None:
4028 self.cluster.hvparams = self.new_hvparams
4029 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4030 if self.op.beparams:
4031 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4032 if self.op.nicparams:
4033 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4035 self.cluster.ipolicy = self.new_ipolicy
4036 if self.op.osparams:
4037 self.cluster.osparams = self.new_osp
4038 if self.op.ndparams:
4039 self.cluster.ndparams = self.new_ndparams
4040 if self.op.diskparams:
4041 self.cluster.diskparams = self.new_diskparams
4042 if self.op.hv_state:
4043 self.cluster.hv_state_static = self.new_hv_state
4044 if self.op.disk_state:
4045 self.cluster.disk_state_static = self.new_disk_state
4047 if self.op.candidate_pool_size is not None:
4048 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4049 # we need to update the pool size here, otherwise the save will fail
4050 _AdjustCandidatePool(self, [])
4052 if self.op.maintain_node_health is not None:
4053 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4054 feedback_fn("Note: CONFD was disabled at build time, node health"
4055 " maintenance is not useful (still enabling it)")
4056 self.cluster.maintain_node_health = self.op.maintain_node_health
4058 if self.op.prealloc_wipe_disks is not None:
4059 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4061 if self.op.add_uids is not None:
4062 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4064 if self.op.remove_uids is not None:
4065 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4067 if self.op.uid_pool is not None:
4068 self.cluster.uid_pool = self.op.uid_pool
4070 if self.op.default_iallocator is not None:
4071 self.cluster.default_iallocator = self.op.default_iallocator
4073 if self.op.reserved_lvs is not None:
4074 self.cluster.reserved_lvs = self.op.reserved_lvs
4076 if self.op.use_external_mip_script is not None:
4077 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4079 def helper_os(aname, mods, desc):
4081 lst = getattr(self.cluster, aname)
4082 for key, val in mods:
4083 if key == constants.DDM_ADD:
4085 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4088 elif key == constants.DDM_REMOVE:
4092 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4094 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4096 if self.op.hidden_os:
4097 helper_os("hidden_os", self.op.hidden_os, "hidden")
4099 if self.op.blacklisted_os:
4100 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4102 if self.op.master_netdev:
4103 master_params = self.cfg.GetMasterNetworkParameters()
4104 ems = self.cfg.GetUseExternalMipScript()
4105 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4106 self.cluster.master_netdev)
4107 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4109 result.Raise("Could not disable the master ip")
4110 feedback_fn("Changing master_netdev from %s to %s" %
4111 (master_params.netdev, self.op.master_netdev))
4112 self.cluster.master_netdev = self.op.master_netdev
4114 if self.op.master_netmask:
4115 master_params = self.cfg.GetMasterNetworkParameters()
4116 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4117 result = self.rpc.call_node_change_master_netmask(master_params.name,
4118 master_params.netmask,
4119 self.op.master_netmask,
4121 master_params.netdev)
4123 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4126 self.cluster.master_netmask = self.op.master_netmask
4128 self.cfg.Update(self.cluster, feedback_fn)
4130 if self.op.master_netdev:
4131 master_params = self.cfg.GetMasterNetworkParameters()
4132 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4133 self.op.master_netdev)
4134 ems = self.cfg.GetUseExternalMipScript()
4135 result = self.rpc.call_node_activate_master_ip(master_params.name,
4138 self.LogWarning("Could not re-enable the master ip on"
4139 " the master, please restart manually: %s",
4143 def _UploadHelper(lu, nodes, fname):
4144 """Helper for uploading a file and showing warnings.
4147 if os.path.exists(fname):
4148 result = lu.rpc.call_upload_file(nodes, fname)
4149 for to_node, to_result in result.items():
4150 msg = to_result.fail_msg
4152 msg = ("Copy of file %s to node %s failed: %s" %
4153 (fname, to_node, msg))
4154 lu.proc.LogWarning(msg)
4157 def _ComputeAncillaryFiles(cluster, redist):
4158 """Compute files external to Ganeti which need to be consistent.
4160 @type redist: boolean
4161 @param redist: Whether to include files which need to be redistributed
4164 # Compute files for all nodes
4166 constants.SSH_KNOWN_HOSTS_FILE,
4167 constants.CONFD_HMAC_KEY,
4168 constants.CLUSTER_DOMAIN_SECRET_FILE,
4169 constants.SPICE_CERT_FILE,
4170 constants.SPICE_CACERT_FILE,
4171 constants.RAPI_USERS_FILE,
4175 files_all.update(constants.ALL_CERT_FILES)
4176 files_all.update(ssconf.SimpleStore().GetFileList())
4178 # we need to ship at least the RAPI certificate
4179 files_all.add(constants.RAPI_CERT_FILE)
4181 if cluster.modify_etc_hosts:
4182 files_all.add(constants.ETC_HOSTS)
4184 # Files which are optional, these must:
4185 # - be present in one other category as well
4186 # - either exist or not exist on all nodes of that category (mc, vm all)
4188 constants.RAPI_USERS_FILE,
4191 # Files which should only be on master candidates
4195 files_mc.add(constants.CLUSTER_CONF_FILE)
4197 # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4199 files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4201 # Files which should only be on VM-capable nodes
4202 files_vm = set(filename
4203 for hv_name in cluster.enabled_hypervisors
4204 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4206 files_opt |= set(filename
4207 for hv_name in cluster.enabled_hypervisors
4208 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4210 # Filenames in each category must be unique
4211 all_files_set = files_all | files_mc | files_vm
4212 assert (len(all_files_set) ==
4213 sum(map(len, [files_all, files_mc, files_vm]))), \
4214 "Found file listed in more than one file list"
4216 # Optional files must be present in one other category
4217 assert all_files_set.issuperset(files_opt), \
4218 "Optional file not in a different required list"
4220 return (files_all, files_opt, files_mc, files_vm)
4223 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4224 """Distribute additional files which are part of the cluster configuration.
4226 ConfigWriter takes care of distributing the config and ssconf files, but
4227 there are more files which should be distributed to all nodes. This function
4228 makes sure those are copied.
4230 @param lu: calling logical unit
4231 @param additional_nodes: list of nodes not in the config to distribute to
4232 @type additional_vm: boolean
4233 @param additional_vm: whether the additional nodes are vm-capable or not
4236 # Gather target nodes
4237 cluster = lu.cfg.GetClusterInfo()
4238 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4240 online_nodes = lu.cfg.GetOnlineNodeList()
4241 vm_nodes = lu.cfg.GetVmCapableNodeList()
4243 if additional_nodes is not None:
4244 online_nodes.extend(additional_nodes)
4246 vm_nodes.extend(additional_nodes)
4248 # Never distribute to master node
4249 for nodelist in [online_nodes, vm_nodes]:
4250 if master_info.name in nodelist:
4251 nodelist.remove(master_info.name)
4254 (files_all, _, files_mc, files_vm) = \
4255 _ComputeAncillaryFiles(cluster, True)
4257 # Never re-distribute configuration file from here
4258 assert not (constants.CLUSTER_CONF_FILE in files_all or
4259 constants.CLUSTER_CONF_FILE in files_vm)
4260 assert not files_mc, "Master candidates not handled in this function"
4263 (online_nodes, files_all),
4264 (vm_nodes, files_vm),
4268 for (node_list, files) in filemap:
4270 _UploadHelper(lu, node_list, fname)
4273 class LUClusterRedistConf(NoHooksLU):
4274 """Force the redistribution of cluster configuration.
4276 This is a very simple LU.
4281 def ExpandNames(self):
4282 self.needed_locks = {
4283 locking.LEVEL_NODE: locking.ALL_SET,
4285 self.share_locks[locking.LEVEL_NODE] = 1
4287 def Exec(self, feedback_fn):
4288 """Redistribute the configuration.
4291 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4292 _RedistributeAncillaryFiles(self)
4295 class LUClusterActivateMasterIp(NoHooksLU):
4296 """Activate the master IP on the master node.
4299 def Exec(self, feedback_fn):
4300 """Activate the master IP.
4303 master_params = self.cfg.GetMasterNetworkParameters()
4304 ems = self.cfg.GetUseExternalMipScript()
4305 result = self.rpc.call_node_activate_master_ip(master_params.name,
4307 result.Raise("Could not activate the master IP")
4310 class LUClusterDeactivateMasterIp(NoHooksLU):
4311 """Deactivate the master IP on the master node.
4314 def Exec(self, feedback_fn):
4315 """Deactivate the master IP.
4318 master_params = self.cfg.GetMasterNetworkParameters()
4319 ems = self.cfg.GetUseExternalMipScript()
4320 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4322 result.Raise("Could not deactivate the master IP")
4325 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4326 """Sleep and poll for an instance's disk to sync.
4329 if not instance.disks or disks is not None and not disks:
4332 disks = _ExpandCheckDisks(instance, disks)
4335 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4337 node = instance.primary_node
4340 lu.cfg.SetDiskID(dev, node)
4342 # TODO: Convert to utils.Retry
4345 degr_retries = 10 # in seconds, as we sleep 1 second each time
4349 cumul_degraded = False
4350 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4351 msg = rstats.fail_msg
4353 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4356 raise errors.RemoteError("Can't contact node %s for mirror data,"
4357 " aborting." % node)
4360 rstats = rstats.payload
4362 for i, mstat in enumerate(rstats):
4364 lu.LogWarning("Can't compute data for node %s/%s",
4365 node, disks[i].iv_name)
4368 cumul_degraded = (cumul_degraded or
4369 (mstat.is_degraded and mstat.sync_percent is None))
4370 if mstat.sync_percent is not None:
4372 if mstat.estimated_time is not None:
4373 rem_time = ("%s remaining (estimated)" %
4374 utils.FormatSeconds(mstat.estimated_time))
4375 max_time = mstat.estimated_time
4377 rem_time = "no time estimate"
4378 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4379 (disks[i].iv_name, mstat.sync_percent, rem_time))
4381 # if we're done but degraded, let's do a few small retries, to
4382 # make sure we see a stable and not transient situation; therefore
4383 # we force restart of the loop
4384 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4385 logging.info("Degraded disks found, %d retries left", degr_retries)
4393 time.sleep(min(60, max_time))
4396 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4397 return not cumul_degraded
4400 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4401 """Check that mirrors are not degraded.
4403 The ldisk parameter, if True, will change the test from the
4404 is_degraded attribute (which represents overall non-ok status for
4405 the device(s)) to the ldisk (representing the local storage status).
4408 lu.cfg.SetDiskID(dev, node)
4412 if on_primary or dev.AssembleOnSecondary():
4413 rstats = lu.rpc.call_blockdev_find(node, dev)
4414 msg = rstats.fail_msg
4416 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4418 elif not rstats.payload:
4419 lu.LogWarning("Can't find disk on node %s", node)
4423 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4425 result = result and not rstats.payload.is_degraded
4428 for child in dev.children:
4429 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4434 class LUOobCommand(NoHooksLU):
4435 """Logical unit for OOB handling.
4439 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4441 def ExpandNames(self):
4442 """Gather locks we need.
4445 if self.op.node_names:
4446 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4447 lock_names = self.op.node_names
4449 lock_names = locking.ALL_SET
4451 self.needed_locks = {
4452 locking.LEVEL_NODE: lock_names,
4455 def CheckPrereq(self):
4456 """Check prerequisites.
4459 - the node exists in the configuration
4462 Any errors are signaled by raising errors.OpPrereqError.
4466 self.master_node = self.cfg.GetMasterNode()
4468 assert self.op.power_delay >= 0.0
4470 if self.op.node_names:
4471 if (self.op.command in self._SKIP_MASTER and
4472 self.master_node in self.op.node_names):
4473 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4474 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4476 if master_oob_handler:
4477 additional_text = ("run '%s %s %s' if you want to operate on the"
4478 " master regardless") % (master_oob_handler,
4482 additional_text = "it does not support out-of-band operations"
4484 raise errors.OpPrereqError(("Operating on the master node %s is not"
4485 " allowed for %s; %s") %
4486 (self.master_node, self.op.command,
4487 additional_text), errors.ECODE_INVAL)
4489 self.op.node_names = self.cfg.GetNodeList()
4490 if self.op.command in self._SKIP_MASTER:
4491 self.op.node_names.remove(self.master_node)
4493 if self.op.command in self._SKIP_MASTER:
4494 assert self.master_node not in self.op.node_names
4496 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4498 raise errors.OpPrereqError("Node %s not found" % node_name,
4501 self.nodes.append(node)
4503 if (not self.op.ignore_status and
4504 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4505 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4506 " not marked offline") % node_name,
4509 def Exec(self, feedback_fn):
4510 """Execute OOB and return result if we expect any.
4513 master_node = self.master_node
4516 for idx, node in enumerate(utils.NiceSort(self.nodes,
4517 key=lambda node: node.name)):
4518 node_entry = [(constants.RS_NORMAL, node.name)]
4519 ret.append(node_entry)
4521 oob_program = _SupportsOob(self.cfg, node)
4524 node_entry.append((constants.RS_UNAVAIL, None))
4527 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4528 self.op.command, oob_program, node.name)
4529 result = self.rpc.call_run_oob(master_node, oob_program,
4530 self.op.command, node.name,
4534 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4535 node.name, result.fail_msg)
4536 node_entry.append((constants.RS_NODATA, None))
4539 self._CheckPayload(result)
4540 except errors.OpExecError, err:
4541 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4543 node_entry.append((constants.RS_NODATA, None))
4545 if self.op.command == constants.OOB_HEALTH:
4546 # For health we should log important events
4547 for item, status in result.payload:
4548 if status in [constants.OOB_STATUS_WARNING,
4549 constants.OOB_STATUS_CRITICAL]:
4550 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4551 item, node.name, status)
4553 if self.op.command == constants.OOB_POWER_ON:
4555 elif self.op.command == constants.OOB_POWER_OFF:
4556 node.powered = False
4557 elif self.op.command == constants.OOB_POWER_STATUS:
4558 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4559 if powered != node.powered:
4560 logging.warning(("Recorded power state (%s) of node '%s' does not"
4561 " match actual power state (%s)"), node.powered,
4564 # For configuration changing commands we should update the node
4565 if self.op.command in (constants.OOB_POWER_ON,
4566 constants.OOB_POWER_OFF):
4567 self.cfg.Update(node, feedback_fn)
4569 node_entry.append((constants.RS_NORMAL, result.payload))
4571 if (self.op.command == constants.OOB_POWER_ON and
4572 idx < len(self.nodes) - 1):
4573 time.sleep(self.op.power_delay)
4577 def _CheckPayload(self, result):
4578 """Checks if the payload is valid.
4580 @param result: RPC result
4581 @raises errors.OpExecError: If payload is not valid
4585 if self.op.command == constants.OOB_HEALTH:
4586 if not isinstance(result.payload, list):
4587 errs.append("command 'health' is expected to return a list but got %s" %
4588 type(result.payload))
4590 for item, status in result.payload:
4591 if status not in constants.OOB_STATUSES:
4592 errs.append("health item '%s' has invalid status '%s'" %
4595 if self.op.command == constants.OOB_POWER_STATUS:
4596 if not isinstance(result.payload, dict):
4597 errs.append("power-status is expected to return a dict but got %s" %
4598 type(result.payload))
4600 if self.op.command in [
4601 constants.OOB_POWER_ON,
4602 constants.OOB_POWER_OFF,
4603 constants.OOB_POWER_CYCLE,
4605 if result.payload is not None:
4606 errs.append("%s is expected to not return payload but got '%s'" %
4607 (self.op.command, result.payload))
4610 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4611 utils.CommaJoin(errs))
4614 class _OsQuery(_QueryBase):
4615 FIELDS = query.OS_FIELDS
4617 def ExpandNames(self, lu):
4618 # Lock all nodes in shared mode
4619 # Temporary removal of locks, should be reverted later
4620 # TODO: reintroduce locks when they are lighter-weight
4621 lu.needed_locks = {}
4622 #self.share_locks[locking.LEVEL_NODE] = 1
4623 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4625 # The following variables interact with _QueryBase._GetNames
4627 self.wanted = self.names
4629 self.wanted = locking.ALL_SET
4631 self.do_locking = self.use_locking
4633 def DeclareLocks(self, lu, level):
4637 def _DiagnoseByOS(rlist):
4638 """Remaps a per-node return list into an a per-os per-node dictionary
4640 @param rlist: a map with node names as keys and OS objects as values
4643 @return: a dictionary with osnames as keys and as value another
4644 map, with nodes as keys and tuples of (path, status, diagnose,
4645 variants, parameters, api_versions) as values, eg::
4647 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4648 (/srv/..., False, "invalid api")],
4649 "node2": [(/srv/..., True, "", [], [])]}
4654 # we build here the list of nodes that didn't fail the RPC (at RPC
4655 # level), so that nodes with a non-responding node daemon don't
4656 # make all OSes invalid
4657 good_nodes = [node_name for node_name in rlist
4658 if not rlist[node_name].fail_msg]
4659 for node_name, nr in rlist.items():
4660 if nr.fail_msg or not nr.payload:
4662 for (name, path, status, diagnose, variants,
4663 params, api_versions) in nr.payload:
4664 if name not in all_os:
4665 # build a list of nodes for this os containing empty lists
4666 # for each node in node_list
4668 for nname in good_nodes:
4669 all_os[name][nname] = []
4670 # convert params from [name, help] to (name, help)
4671 params = [tuple(v) for v in params]
4672 all_os[name][node_name].append((path, status, diagnose,
4673 variants, params, api_versions))
4676 def _GetQueryData(self, lu):
4677 """Computes the list of nodes and their attributes.
4680 # Locking is not used
4681 assert not (compat.any(lu.glm.is_owned(level)
4682 for level in locking.LEVELS
4683 if level != locking.LEVEL_CLUSTER) or
4684 self.do_locking or self.use_locking)
4686 valid_nodes = [node.name
4687 for node in lu.cfg.GetAllNodesInfo().values()
4688 if not node.offline and node.vm_capable]
4689 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4690 cluster = lu.cfg.GetClusterInfo()
4694 for (os_name, os_data) in pol.items():
4695 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4696 hidden=(os_name in cluster.hidden_os),
4697 blacklisted=(os_name in cluster.blacklisted_os))
4701 api_versions = set()
4703 for idx, osl in enumerate(os_data.values()):
4704 info.valid = bool(info.valid and osl and osl[0][1])
4708 (node_variants, node_params, node_api) = osl[0][3:6]
4711 variants.update(node_variants)
4712 parameters.update(node_params)
4713 api_versions.update(node_api)
4715 # Filter out inconsistent values
4716 variants.intersection_update(node_variants)
4717 parameters.intersection_update(node_params)
4718 api_versions.intersection_update(node_api)
4720 info.variants = list(variants)
4721 info.parameters = list(parameters)
4722 info.api_versions = list(api_versions)
4724 data[os_name] = info
4726 # Prepare data in requested order
4727 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4731 class LUOsDiagnose(NoHooksLU):
4732 """Logical unit for OS diagnose/query.
4738 def _BuildFilter(fields, names):
4739 """Builds a filter for querying OSes.
4742 name_filter = qlang.MakeSimpleFilter("name", names)
4744 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4745 # respective field is not requested
4746 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4747 for fname in ["hidden", "blacklisted"]
4748 if fname not in fields]
4749 if "valid" not in fields:
4750 status_filter.append([qlang.OP_TRUE, "valid"])
4753 status_filter.insert(0, qlang.OP_AND)
4755 status_filter = None
4757 if name_filter and status_filter:
4758 return [qlang.OP_AND, name_filter, status_filter]
4762 return status_filter
4764 def CheckArguments(self):
4765 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4766 self.op.output_fields, False)
4768 def ExpandNames(self):
4769 self.oq.ExpandNames(self)
4771 def Exec(self, feedback_fn):
4772 return self.oq.OldStyleQuery(self)
4775 class LUNodeRemove(LogicalUnit):
4776 """Logical unit for removing a node.
4779 HPATH = "node-remove"
4780 HTYPE = constants.HTYPE_NODE
4782 def BuildHooksEnv(self):
4785 This doesn't run on the target node in the pre phase as a failed
4786 node would then be impossible to remove.
4790 "OP_TARGET": self.op.node_name,
4791 "NODE_NAME": self.op.node_name,
4794 def BuildHooksNodes(self):
4795 """Build hooks nodes.
4798 all_nodes = self.cfg.GetNodeList()
4800 all_nodes.remove(self.op.node_name)
4802 logging.warning("Node '%s', which is about to be removed, was not found"
4803 " in the list of all nodes", self.op.node_name)
4804 return (all_nodes, all_nodes)
4806 def CheckPrereq(self):
4807 """Check prerequisites.
4810 - the node exists in the configuration
4811 - it does not have primary or secondary instances
4812 - it's not the master
4814 Any errors are signaled by raising errors.OpPrereqError.
4817 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4818 node = self.cfg.GetNodeInfo(self.op.node_name)
4819 assert node is not None
4821 masternode = self.cfg.GetMasterNode()
4822 if node.name == masternode:
4823 raise errors.OpPrereqError("Node is the master node, failover to another"
4824 " node is required", errors.ECODE_INVAL)
4826 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4827 if node.name in instance.all_nodes:
4828 raise errors.OpPrereqError("Instance %s is still running on the node,"
4829 " please remove first" % instance_name,
4831 self.op.node_name = node.name
4834 def Exec(self, feedback_fn):
4835 """Removes the node from the cluster.
4839 logging.info("Stopping the node daemon and removing configs from node %s",
4842 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4844 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4847 # Promote nodes to master candidate as needed
4848 _AdjustCandidatePool(self, exceptions=[node.name])
4849 self.context.RemoveNode(node.name)
4851 # Run post hooks on the node before it's removed
4852 _RunPostHook(self, node.name)
4854 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4855 msg = result.fail_msg
4857 self.LogWarning("Errors encountered on the remote node while leaving"
4858 " the cluster: %s", msg)
4860 # Remove node from our /etc/hosts
4861 if self.cfg.GetClusterInfo().modify_etc_hosts:
4862 master_node = self.cfg.GetMasterNode()
4863 result = self.rpc.call_etc_hosts_modify(master_node,
4864 constants.ETC_HOSTS_REMOVE,
4866 result.Raise("Can't update hosts file with new host data")
4867 _RedistributeAncillaryFiles(self)
4870 class _NodeQuery(_QueryBase):
4871 FIELDS = query.NODE_FIELDS
4873 def ExpandNames(self, lu):
4874 lu.needed_locks = {}
4875 lu.share_locks = _ShareAll()
4878 self.wanted = _GetWantedNodes(lu, self.names)
4880 self.wanted = locking.ALL_SET
4882 self.do_locking = (self.use_locking and
4883 query.NQ_LIVE in self.requested_data)
4886 # If any non-static field is requested we need to lock the nodes
4887 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4889 def DeclareLocks(self, lu, level):
4892 def _GetQueryData(self, lu):
4893 """Computes the list of nodes and their attributes.
4896 all_info = lu.cfg.GetAllNodesInfo()
4898 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4900 # Gather data as requested
4901 if query.NQ_LIVE in self.requested_data:
4902 # filter out non-vm_capable nodes
4903 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4905 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4906 [lu.cfg.GetHypervisorType()])
4907 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4908 for (name, nresult) in node_data.items()
4909 if not nresult.fail_msg and nresult.payload)
4913 if query.NQ_INST in self.requested_data:
4914 node_to_primary = dict([(name, set()) for name in nodenames])
4915 node_to_secondary = dict([(name, set()) for name in nodenames])
4917 inst_data = lu.cfg.GetAllInstancesInfo()
4919 for inst in inst_data.values():
4920 if inst.primary_node in node_to_primary:
4921 node_to_primary[inst.primary_node].add(inst.name)
4922 for secnode in inst.secondary_nodes:
4923 if secnode in node_to_secondary:
4924 node_to_secondary[secnode].add(inst.name)
4926 node_to_primary = None
4927 node_to_secondary = None
4929 if query.NQ_OOB in self.requested_data:
4930 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4931 for name, node in all_info.iteritems())
4935 if query.NQ_GROUP in self.requested_data:
4936 groups = lu.cfg.GetAllNodeGroupsInfo()
4940 return query.NodeQueryData([all_info[name] for name in nodenames],
4941 live_data, lu.cfg.GetMasterNode(),
4942 node_to_primary, node_to_secondary, groups,
4943 oob_support, lu.cfg.GetClusterInfo())
4946 class LUNodeQuery(NoHooksLU):
4947 """Logical unit for querying nodes.
4950 # pylint: disable=W0142
4953 def CheckArguments(self):
4954 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4955 self.op.output_fields, self.op.use_locking)
4957 def ExpandNames(self):
4958 self.nq.ExpandNames(self)
4960 def DeclareLocks(self, level):
4961 self.nq.DeclareLocks(self, level)
4963 def Exec(self, feedback_fn):
4964 return self.nq.OldStyleQuery(self)
4967 class LUNodeQueryvols(NoHooksLU):
4968 """Logical unit for getting volumes on node(s).
4972 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4973 _FIELDS_STATIC = utils.FieldSet("node")
4975 def CheckArguments(self):
4976 _CheckOutputFields(static=self._FIELDS_STATIC,
4977 dynamic=self._FIELDS_DYNAMIC,
4978 selected=self.op.output_fields)
4980 def ExpandNames(self):
4981 self.share_locks = _ShareAll()
4982 self.needed_locks = {}
4984 if not self.op.nodes:
4985 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4987 self.needed_locks[locking.LEVEL_NODE] = \
4988 _GetWantedNodes(self, self.op.nodes)
4990 def Exec(self, feedback_fn):
4991 """Computes the list of nodes and their attributes.
4994 nodenames = self.owned_locks(locking.LEVEL_NODE)
4995 volumes = self.rpc.call_node_volumes(nodenames)
4997 ilist = self.cfg.GetAllInstancesInfo()
4998 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5001 for node in nodenames:
5002 nresult = volumes[node]
5005 msg = nresult.fail_msg
5007 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5010 node_vols = sorted(nresult.payload,
5011 key=operator.itemgetter("dev"))
5013 for vol in node_vols:
5015 for field in self.op.output_fields:
5018 elif field == "phys":
5022 elif field == "name":
5024 elif field == "size":
5025 val = int(float(vol["size"]))
5026 elif field == "instance":
5027 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5029 raise errors.ParameterError(field)
5030 node_output.append(str(val))
5032 output.append(node_output)
5037 class LUNodeQueryStorage(NoHooksLU):
5038 """Logical unit for getting information on storage units on node(s).
5041 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5044 def CheckArguments(self):
5045 _CheckOutputFields(static=self._FIELDS_STATIC,
5046 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5047 selected=self.op.output_fields)
5049 def ExpandNames(self):
5050 self.share_locks = _ShareAll()
5051 self.needed_locks = {}
5054 self.needed_locks[locking.LEVEL_NODE] = \
5055 _GetWantedNodes(self, self.op.nodes)
5057 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5059 def Exec(self, feedback_fn):
5060 """Computes the list of nodes and their attributes.
5063 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5065 # Always get name to sort by
5066 if constants.SF_NAME in self.op.output_fields:
5067 fields = self.op.output_fields[:]
5069 fields = [constants.SF_NAME] + self.op.output_fields
5071 # Never ask for node or type as it's only known to the LU
5072 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5073 while extra in fields:
5074 fields.remove(extra)
5076 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5077 name_idx = field_idx[constants.SF_NAME]
5079 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5080 data = self.rpc.call_storage_list(self.nodes,
5081 self.op.storage_type, st_args,
5082 self.op.name, fields)
5086 for node in utils.NiceSort(self.nodes):
5087 nresult = data[node]
5091 msg = nresult.fail_msg
5093 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5096 rows = dict([(row[name_idx], row) for row in nresult.payload])
5098 for name in utils.NiceSort(rows.keys()):
5103 for field in self.op.output_fields:
5104 if field == constants.SF_NODE:
5106 elif field == constants.SF_TYPE:
5107 val = self.op.storage_type
5108 elif field in field_idx:
5109 val = row[field_idx[field]]
5111 raise errors.ParameterError(field)
5120 class _InstanceQuery(_QueryBase):
5121 FIELDS = query.INSTANCE_FIELDS
5123 def ExpandNames(self, lu):
5124 lu.needed_locks = {}
5125 lu.share_locks = _ShareAll()
5128 self.wanted = _GetWantedInstances(lu, self.names)
5130 self.wanted = locking.ALL_SET
5132 self.do_locking = (self.use_locking and
5133 query.IQ_LIVE in self.requested_data)
5135 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5136 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5137 lu.needed_locks[locking.LEVEL_NODE] = []
5138 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5140 self.do_grouplocks = (self.do_locking and
5141 query.IQ_NODES in self.requested_data)
5143 def DeclareLocks(self, lu, level):
5145 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5146 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5148 # Lock all groups used by instances optimistically; this requires going
5149 # via the node before it's locked, requiring verification later on
5150 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5152 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5153 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5154 elif level == locking.LEVEL_NODE:
5155 lu._LockInstancesNodes() # pylint: disable=W0212
5158 def _CheckGroupLocks(lu):
5159 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5160 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5162 # Check if node groups for locked instances are still correct
5163 for instance_name in owned_instances:
5164 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5166 def _GetQueryData(self, lu):
5167 """Computes the list of instances and their attributes.
5170 if self.do_grouplocks:
5171 self._CheckGroupLocks(lu)
5173 cluster = lu.cfg.GetClusterInfo()
5174 all_info = lu.cfg.GetAllInstancesInfo()
5176 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5178 instance_list = [all_info[name] for name in instance_names]
5179 nodes = frozenset(itertools.chain(*(inst.all_nodes
5180 for inst in instance_list)))
5181 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5184 wrongnode_inst = set()
5186 # Gather data as requested
5187 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5189 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5191 result = node_data[name]
5193 # offline nodes will be in both lists
5194 assert result.fail_msg
5195 offline_nodes.append(name)
5197 bad_nodes.append(name)
5198 elif result.payload:
5199 for inst in result.payload:
5200 if inst in all_info:
5201 if all_info[inst].primary_node == name:
5202 live_data.update(result.payload)
5204 wrongnode_inst.add(inst)
5206 # orphan instance; we don't list it here as we don't
5207 # handle this case yet in the output of instance listing
5208 logging.warning("Orphan instance '%s' found on node %s",
5210 # else no instance is alive
5214 if query.IQ_DISKUSAGE in self.requested_data:
5215 disk_usage = dict((inst.name,
5216 _ComputeDiskSize(inst.disk_template,
5217 [{constants.IDISK_SIZE: disk.size}
5218 for disk in inst.disks]))
5219 for inst in instance_list)
5223 if query.IQ_CONSOLE in self.requested_data:
5225 for inst in instance_list:
5226 if inst.name in live_data:
5227 # Instance is running
5228 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5230 consinfo[inst.name] = None
5231 assert set(consinfo.keys()) == set(instance_names)
5235 if query.IQ_NODES in self.requested_data:
5236 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5238 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5239 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5240 for uuid in set(map(operator.attrgetter("group"),
5246 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5247 disk_usage, offline_nodes, bad_nodes,
5248 live_data, wrongnode_inst, consinfo,
5252 class LUQuery(NoHooksLU):
5253 """Query for resources/items of a certain kind.
5256 # pylint: disable=W0142
5259 def CheckArguments(self):
5260 qcls = _GetQueryImplementation(self.op.what)
5262 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5264 def ExpandNames(self):
5265 self.impl.ExpandNames(self)
5267 def DeclareLocks(self, level):
5268 self.impl.DeclareLocks(self, level)
5270 def Exec(self, feedback_fn):
5271 return self.impl.NewStyleQuery(self)
5274 class LUQueryFields(NoHooksLU):
5275 """Query for resources/items of a certain kind.
5278 # pylint: disable=W0142
5281 def CheckArguments(self):
5282 self.qcls = _GetQueryImplementation(self.op.what)
5284 def ExpandNames(self):
5285 self.needed_locks = {}
5287 def Exec(self, feedback_fn):
5288 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5291 class LUNodeModifyStorage(NoHooksLU):
5292 """Logical unit for modifying a storage volume on a node.
5297 def CheckArguments(self):
5298 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5300 storage_type = self.op.storage_type
5303 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5305 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5306 " modified" % storage_type,
5309 diff = set(self.op.changes.keys()) - modifiable
5311 raise errors.OpPrereqError("The following fields can not be modified for"
5312 " storage units of type '%s': %r" %
5313 (storage_type, list(diff)),
5316 def ExpandNames(self):
5317 self.needed_locks = {
5318 locking.LEVEL_NODE: self.op.node_name,
5321 def Exec(self, feedback_fn):
5322 """Computes the list of nodes and their attributes.
5325 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5326 result = self.rpc.call_storage_modify(self.op.node_name,
5327 self.op.storage_type, st_args,
5328 self.op.name, self.op.changes)
5329 result.Raise("Failed to modify storage unit '%s' on %s" %
5330 (self.op.name, self.op.node_name))
5333 class LUNodeAdd(LogicalUnit):
5334 """Logical unit for adding node to the cluster.
5338 HTYPE = constants.HTYPE_NODE
5339 _NFLAGS = ["master_capable", "vm_capable"]
5341 def CheckArguments(self):
5342 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5343 # validate/normalize the node name
5344 self.hostname = netutils.GetHostname(name=self.op.node_name,
5345 family=self.primary_ip_family)
5346 self.op.node_name = self.hostname.name
5348 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5349 raise errors.OpPrereqError("Cannot readd the master node",
5352 if self.op.readd and self.op.group:
5353 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5354 " being readded", errors.ECODE_INVAL)
5356 def BuildHooksEnv(self):
5359 This will run on all nodes before, and on all nodes + the new node after.
5363 "OP_TARGET": self.op.node_name,
5364 "NODE_NAME": self.op.node_name,
5365 "NODE_PIP": self.op.primary_ip,
5366 "NODE_SIP": self.op.secondary_ip,
5367 "MASTER_CAPABLE": str(self.op.master_capable),
5368 "VM_CAPABLE": str(self.op.vm_capable),
5371 def BuildHooksNodes(self):
5372 """Build hooks nodes.
5375 # Exclude added node
5376 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5377 post_nodes = pre_nodes + [self.op.node_name, ]
5379 return (pre_nodes, post_nodes)
5381 def CheckPrereq(self):
5382 """Check prerequisites.
5385 - the new node is not already in the config
5387 - its parameters (single/dual homed) matches the cluster
5389 Any errors are signaled by raising errors.OpPrereqError.
5393 hostname = self.hostname
5394 node = hostname.name
5395 primary_ip = self.op.primary_ip = hostname.ip
5396 if self.op.secondary_ip is None:
5397 if self.primary_ip_family == netutils.IP6Address.family:
5398 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5399 " IPv4 address must be given as secondary",
5401 self.op.secondary_ip = primary_ip
5403 secondary_ip = self.op.secondary_ip
5404 if not netutils.IP4Address.IsValid(secondary_ip):
5405 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5406 " address" % secondary_ip, errors.ECODE_INVAL)
5408 node_list = cfg.GetNodeList()
5409 if not self.op.readd and node in node_list:
5410 raise errors.OpPrereqError("Node %s is already in the configuration" %
5411 node, errors.ECODE_EXISTS)
5412 elif self.op.readd and node not in node_list:
5413 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5416 self.changed_primary_ip = False
5418 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5419 if self.op.readd and node == existing_node_name:
5420 if existing_node.secondary_ip != secondary_ip:
5421 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5422 " address configuration as before",
5424 if existing_node.primary_ip != primary_ip:
5425 self.changed_primary_ip = True
5429 if (existing_node.primary_ip == primary_ip or
5430 existing_node.secondary_ip == primary_ip or
5431 existing_node.primary_ip == secondary_ip or
5432 existing_node.secondary_ip == secondary_ip):
5433 raise errors.OpPrereqError("New node ip address(es) conflict with"
5434 " existing node %s" % existing_node.name,
5435 errors.ECODE_NOTUNIQUE)
5437 # After this 'if' block, None is no longer a valid value for the
5438 # _capable op attributes
5440 old_node = self.cfg.GetNodeInfo(node)
5441 assert old_node is not None, "Can't retrieve locked node %s" % node
5442 for attr in self._NFLAGS:
5443 if getattr(self.op, attr) is None:
5444 setattr(self.op, attr, getattr(old_node, attr))
5446 for attr in self._NFLAGS:
5447 if getattr(self.op, attr) is None:
5448 setattr(self.op, attr, True)
5450 if self.op.readd and not self.op.vm_capable:
5451 pri, sec = cfg.GetNodeInstances(node)
5453 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5454 " flag set to false, but it already holds"
5455 " instances" % node,
5458 # check that the type of the node (single versus dual homed) is the
5459 # same as for the master
5460 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5461 master_singlehomed = myself.secondary_ip == myself.primary_ip
5462 newbie_singlehomed = secondary_ip == primary_ip
5463 if master_singlehomed != newbie_singlehomed:
5464 if master_singlehomed:
5465 raise errors.OpPrereqError("The master has no secondary ip but the"
5466 " new node has one",
5469 raise errors.OpPrereqError("The master has a secondary ip but the"
5470 " new node doesn't have one",
5473 # checks reachability
5474 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5475 raise errors.OpPrereqError("Node not reachable by ping",
5476 errors.ECODE_ENVIRON)
5478 if not newbie_singlehomed:
5479 # check reachability from my secondary ip to newbie's secondary ip
5480 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5481 source=myself.secondary_ip):
5482 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5483 " based ping to node daemon port",
5484 errors.ECODE_ENVIRON)
5491 if self.op.master_capable:
5492 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5494 self.master_candidate = False
5497 self.new_node = old_node
5499 node_group = cfg.LookupNodeGroup(self.op.group)
5500 self.new_node = objects.Node(name=node,
5501 primary_ip=primary_ip,
5502 secondary_ip=secondary_ip,
5503 master_candidate=self.master_candidate,
5504 offline=False, drained=False,
5507 if self.op.ndparams:
5508 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5510 if self.op.hv_state:
5511 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5513 if self.op.disk_state:
5514 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5516 def Exec(self, feedback_fn):
5517 """Adds the new node to the cluster.
5520 new_node = self.new_node
5521 node = new_node.name
5523 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5526 # We adding a new node so we assume it's powered
5527 new_node.powered = True
5529 # for re-adds, reset the offline/drained/master-candidate flags;
5530 # we need to reset here, otherwise offline would prevent RPC calls
5531 # later in the procedure; this also means that if the re-add
5532 # fails, we are left with a non-offlined, broken node
5534 new_node.drained = new_node.offline = False # pylint: disable=W0201
5535 self.LogInfo("Readding a node, the offline/drained flags were reset")
5536 # if we demote the node, we do cleanup later in the procedure
5537 new_node.master_candidate = self.master_candidate
5538 if self.changed_primary_ip:
5539 new_node.primary_ip = self.op.primary_ip
5541 # copy the master/vm_capable flags
5542 for attr in self._NFLAGS:
5543 setattr(new_node, attr, getattr(self.op, attr))
5545 # notify the user about any possible mc promotion
5546 if new_node.master_candidate:
5547 self.LogInfo("Node will be a master candidate")
5549 if self.op.ndparams:
5550 new_node.ndparams = self.op.ndparams
5552 new_node.ndparams = {}
5554 if self.op.hv_state:
5555 new_node.hv_state_static = self.new_hv_state
5557 if self.op.disk_state:
5558 new_node.disk_state_static = self.new_disk_state
5560 # check connectivity
5561 result = self.rpc.call_version([node])[node]
5562 result.Raise("Can't get version information from node %s" % node)
5563 if constants.PROTOCOL_VERSION == result.payload:
5564 logging.info("Communication to node %s fine, sw version %s match",
5565 node, result.payload)
5567 raise errors.OpExecError("Version mismatch master version %s,"
5568 " node version %s" %
5569 (constants.PROTOCOL_VERSION, result.payload))
5571 # Add node to our /etc/hosts, and add key to known_hosts
5572 if self.cfg.GetClusterInfo().modify_etc_hosts:
5573 master_node = self.cfg.GetMasterNode()
5574 result = self.rpc.call_etc_hosts_modify(master_node,
5575 constants.ETC_HOSTS_ADD,
5578 result.Raise("Can't update hosts file with new host data")
5580 if new_node.secondary_ip != new_node.primary_ip:
5581 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5584 node_verify_list = [self.cfg.GetMasterNode()]
5585 node_verify_param = {
5586 constants.NV_NODELIST: ([node], {}),
5587 # TODO: do a node-net-test as well?
5590 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5591 self.cfg.GetClusterName())
5592 for verifier in node_verify_list:
5593 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5594 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5596 for failed in nl_payload:
5597 feedback_fn("ssh/hostname verification failed"
5598 " (checking from %s): %s" %
5599 (verifier, nl_payload[failed]))
5600 raise errors.OpExecError("ssh/hostname verification failed")
5603 _RedistributeAncillaryFiles(self)
5604 self.context.ReaddNode(new_node)
5605 # make sure we redistribute the config
5606 self.cfg.Update(new_node, feedback_fn)
5607 # and make sure the new node will not have old files around
5608 if not new_node.master_candidate:
5609 result = self.rpc.call_node_demote_from_mc(new_node.name)
5610 msg = result.fail_msg
5612 self.LogWarning("Node failed to demote itself from master"
5613 " candidate status: %s" % msg)
5615 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5616 additional_vm=self.op.vm_capable)
5617 self.context.AddNode(new_node, self.proc.GetECId())
5620 class LUNodeSetParams(LogicalUnit):
5621 """Modifies the parameters of a node.
5623 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5624 to the node role (as _ROLE_*)
5625 @cvar _R2F: a dictionary from node role to tuples of flags
5626 @cvar _FLAGS: a list of attribute names corresponding to the flags
5629 HPATH = "node-modify"
5630 HTYPE = constants.HTYPE_NODE
5632 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5634 (True, False, False): _ROLE_CANDIDATE,
5635 (False, True, False): _ROLE_DRAINED,
5636 (False, False, True): _ROLE_OFFLINE,
5637 (False, False, False): _ROLE_REGULAR,
5639 _R2F = dict((v, k) for k, v in _F2R.items())
5640 _FLAGS = ["master_candidate", "drained", "offline"]
5642 def CheckArguments(self):
5643 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5644 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5645 self.op.master_capable, self.op.vm_capable,
5646 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5648 if all_mods.count(None) == len(all_mods):
5649 raise errors.OpPrereqError("Please pass at least one modification",
5651 if all_mods.count(True) > 1:
5652 raise errors.OpPrereqError("Can't set the node into more than one"
5653 " state at the same time",
5656 # Boolean value that tells us whether we might be demoting from MC
5657 self.might_demote = (self.op.master_candidate == False or
5658 self.op.offline == True or
5659 self.op.drained == True or
5660 self.op.master_capable == False)
5662 if self.op.secondary_ip:
5663 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5664 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5665 " address" % self.op.secondary_ip,
5668 self.lock_all = self.op.auto_promote and self.might_demote
5669 self.lock_instances = self.op.secondary_ip is not None
5671 def _InstanceFilter(self, instance):
5672 """Filter for getting affected instances.
5675 return (instance.disk_template in constants.DTS_INT_MIRROR and
5676 self.op.node_name in instance.all_nodes)
5678 def ExpandNames(self):
5680 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5682 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5684 # Since modifying a node can have severe effects on currently running
5685 # operations the resource lock is at least acquired in shared mode
5686 self.needed_locks[locking.LEVEL_NODE_RES] = \
5687 self.needed_locks[locking.LEVEL_NODE]
5689 # Get node resource and instance locks in shared mode; they are not used
5690 # for anything but read-only access
5691 self.share_locks[locking.LEVEL_NODE_RES] = 1
5692 self.share_locks[locking.LEVEL_INSTANCE] = 1
5694 if self.lock_instances:
5695 self.needed_locks[locking.LEVEL_INSTANCE] = \
5696 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5698 def BuildHooksEnv(self):
5701 This runs on the master node.
5705 "OP_TARGET": self.op.node_name,
5706 "MASTER_CANDIDATE": str(self.op.master_candidate),
5707 "OFFLINE": str(self.op.offline),
5708 "DRAINED": str(self.op.drained),
5709 "MASTER_CAPABLE": str(self.op.master_capable),
5710 "VM_CAPABLE": str(self.op.vm_capable),
5713 def BuildHooksNodes(self):
5714 """Build hooks nodes.
5717 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5720 def CheckPrereq(self):
5721 """Check prerequisites.
5723 This only checks the instance list against the existing names.
5726 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5728 if self.lock_instances:
5729 affected_instances = \
5730 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5732 # Verify instance locks
5733 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5734 wanted_instances = frozenset(affected_instances.keys())
5735 if wanted_instances - owned_instances:
5736 raise errors.OpPrereqError("Instances affected by changing node %s's"
5737 " secondary IP address have changed since"
5738 " locks were acquired, wanted '%s', have"
5739 " '%s'; retry the operation" %
5741 utils.CommaJoin(wanted_instances),
5742 utils.CommaJoin(owned_instances)),
5745 affected_instances = None
5747 if (self.op.master_candidate is not None or
5748 self.op.drained is not None or
5749 self.op.offline is not None):
5750 # we can't change the master's node flags
5751 if self.op.node_name == self.cfg.GetMasterNode():
5752 raise errors.OpPrereqError("The master role can be changed"
5753 " only via master-failover",
5756 if self.op.master_candidate and not node.master_capable:
5757 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5758 " it a master candidate" % node.name,
5761 if self.op.vm_capable == False:
5762 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5764 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5765 " the vm_capable flag" % node.name,
5768 if node.master_candidate and self.might_demote and not self.lock_all:
5769 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5770 # check if after removing the current node, we're missing master
5772 (mc_remaining, mc_should, _) = \
5773 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5774 if mc_remaining < mc_should:
5775 raise errors.OpPrereqError("Not enough master candidates, please"
5776 " pass auto promote option to allow"
5777 " promotion", errors.ECODE_STATE)
5779 self.old_flags = old_flags = (node.master_candidate,
5780 node.drained, node.offline)
5781 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5782 self.old_role = old_role = self._F2R[old_flags]
5784 # Check for ineffective changes
5785 for attr in self._FLAGS:
5786 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5787 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5788 setattr(self.op, attr, None)
5790 # Past this point, any flag change to False means a transition
5791 # away from the respective state, as only real changes are kept
5793 # TODO: We might query the real power state if it supports OOB
5794 if _SupportsOob(self.cfg, node):
5795 if self.op.offline is False and not (node.powered or
5796 self.op.powered == True):
5797 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5798 " offline status can be reset") %
5800 elif self.op.powered is not None:
5801 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5802 " as it does not support out-of-band"
5803 " handling") % self.op.node_name)
5805 # If we're being deofflined/drained, we'll MC ourself if needed
5806 if (self.op.drained == False or self.op.offline == False or
5807 (self.op.master_capable and not node.master_capable)):
5808 if _DecideSelfPromotion(self):
5809 self.op.master_candidate = True
5810 self.LogInfo("Auto-promoting node to master candidate")
5812 # If we're no longer master capable, we'll demote ourselves from MC
5813 if self.op.master_capable == False and node.master_candidate:
5814 self.LogInfo("Demoting from master candidate")
5815 self.op.master_candidate = False
5818 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5819 if self.op.master_candidate:
5820 new_role = self._ROLE_CANDIDATE
5821 elif self.op.drained:
5822 new_role = self._ROLE_DRAINED
5823 elif self.op.offline:
5824 new_role = self._ROLE_OFFLINE
5825 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5826 # False is still in new flags, which means we're un-setting (the
5828 new_role = self._ROLE_REGULAR
5829 else: # no new flags, nothing, keep old role
5832 self.new_role = new_role
5834 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5835 # Trying to transition out of offline status
5836 # TODO: Use standard RPC runner, but make sure it works when the node is
5837 # still marked offline
5838 result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5840 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5841 " to report its version: %s" %
5842 (node.name, result.fail_msg),
5845 self.LogWarning("Transitioning node from offline to online state"
5846 " without using re-add. Please make sure the node"
5849 if self.op.secondary_ip:
5850 # Ok even without locking, because this can't be changed by any LU
5851 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5852 master_singlehomed = master.secondary_ip == master.primary_ip
5853 if master_singlehomed and self.op.secondary_ip:
5854 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5855 " homed cluster", errors.ECODE_INVAL)
5857 assert not (frozenset(affected_instances) -
5858 self.owned_locks(locking.LEVEL_INSTANCE))
5861 if affected_instances:
5862 raise errors.OpPrereqError("Cannot change secondary IP address:"
5863 " offline node has instances (%s)"
5864 " configured to use it" %
5865 utils.CommaJoin(affected_instances.keys()))
5867 # On online nodes, check that no instances are running, and that
5868 # the node has the new ip and we can reach it.
5869 for instance in affected_instances.values():
5870 _CheckInstanceState(self, instance, INSTANCE_DOWN,
5871 msg="cannot change secondary ip")
5873 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5874 if master.name != node.name:
5875 # check reachability from master secondary ip to new secondary ip
5876 if not netutils.TcpPing(self.op.secondary_ip,
5877 constants.DEFAULT_NODED_PORT,
5878 source=master.secondary_ip):
5879 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5880 " based ping to node daemon port",
5881 errors.ECODE_ENVIRON)
5883 if self.op.ndparams:
5884 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5885 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5886 self.new_ndparams = new_ndparams
5888 if self.op.hv_state:
5889 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5890 self.node.hv_state_static)
5892 if self.op.disk_state:
5893 self.new_disk_state = \
5894 _MergeAndVerifyDiskState(self.op.disk_state,
5895 self.node.disk_state_static)
5897 def Exec(self, feedback_fn):
5902 old_role = self.old_role
5903 new_role = self.new_role
5907 if self.op.ndparams:
5908 node.ndparams = self.new_ndparams
5910 if self.op.powered is not None:
5911 node.powered = self.op.powered
5913 if self.op.hv_state:
5914 node.hv_state_static = self.new_hv_state
5916 if self.op.disk_state:
5917 node.disk_state_static = self.new_disk_state
5919 for attr in ["master_capable", "vm_capable"]:
5920 val = getattr(self.op, attr)
5922 setattr(node, attr, val)
5923 result.append((attr, str(val)))
5925 if new_role != old_role:
5926 # Tell the node to demote itself, if no longer MC and not offline
5927 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5928 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5930 self.LogWarning("Node failed to demote itself: %s", msg)
5932 new_flags = self._R2F[new_role]
5933 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5935 result.append((desc, str(nf)))
5936 (node.master_candidate, node.drained, node.offline) = new_flags
5938 # we locked all nodes, we adjust the CP before updating this node
5940 _AdjustCandidatePool(self, [node.name])
5942 if self.op.secondary_ip:
5943 node.secondary_ip = self.op.secondary_ip
5944 result.append(("secondary_ip", self.op.secondary_ip))
5946 # this will trigger configuration file update, if needed
5947 self.cfg.Update(node, feedback_fn)
5949 # this will trigger job queue propagation or cleanup if the mc
5951 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5952 self.context.ReaddNode(node)
5957 class LUNodePowercycle(NoHooksLU):
5958 """Powercycles a node.
5963 def CheckArguments(self):
5964 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5965 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5966 raise errors.OpPrereqError("The node is the master and the force"
5967 " parameter was not set",
5970 def ExpandNames(self):
5971 """Locking for PowercycleNode.
5973 This is a last-resort option and shouldn't block on other
5974 jobs. Therefore, we grab no locks.
5977 self.needed_locks = {}
5979 def Exec(self, feedback_fn):
5983 result = self.rpc.call_node_powercycle(self.op.node_name,
5984 self.cfg.GetHypervisorType())
5985 result.Raise("Failed to schedule the reboot")
5986 return result.payload
5989 class LUClusterQuery(NoHooksLU):
5990 """Query cluster configuration.
5995 def ExpandNames(self):
5996 self.needed_locks = {}
5998 def Exec(self, feedback_fn):
5999 """Return cluster config.
6002 cluster = self.cfg.GetClusterInfo()
6005 # Filter just for enabled hypervisors
6006 for os_name, hv_dict in cluster.os_hvp.items():
6007 os_hvp[os_name] = {}
6008 for hv_name, hv_params in hv_dict.items():
6009 if hv_name in cluster.enabled_hypervisors:
6010 os_hvp[os_name][hv_name] = hv_params
6012 # Convert ip_family to ip_version
6013 primary_ip_version = constants.IP4_VERSION
6014 if cluster.primary_ip_family == netutils.IP6Address.family:
6015 primary_ip_version = constants.IP6_VERSION
6018 "software_version": constants.RELEASE_VERSION,
6019 "protocol_version": constants.PROTOCOL_VERSION,
6020 "config_version": constants.CONFIG_VERSION,
6021 "os_api_version": max(constants.OS_API_VERSIONS),
6022 "export_version": constants.EXPORT_VERSION,
6023 "architecture": (platform.architecture()[0], platform.machine()),
6024 "name": cluster.cluster_name,
6025 "master": cluster.master_node,
6026 "default_hypervisor": cluster.primary_hypervisor,
6027 "enabled_hypervisors": cluster.enabled_hypervisors,
6028 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6029 for hypervisor_name in cluster.enabled_hypervisors]),
6031 "beparams": cluster.beparams,
6032 "osparams": cluster.osparams,
6033 "ipolicy": cluster.ipolicy,
6034 "nicparams": cluster.nicparams,
6035 "ndparams": cluster.ndparams,
6036 "candidate_pool_size": cluster.candidate_pool_size,
6037 "master_netdev": cluster.master_netdev,
6038 "master_netmask": cluster.master_netmask,
6039 "use_external_mip_script": cluster.use_external_mip_script,
6040 "volume_group_name": cluster.volume_group_name,
6041 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6042 "file_storage_dir": cluster.file_storage_dir,
6043 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6044 "maintain_node_health": cluster.maintain_node_health,
6045 "ctime": cluster.ctime,
6046 "mtime": cluster.mtime,
6047 "uuid": cluster.uuid,
6048 "tags": list(cluster.GetTags()),
6049 "uid_pool": cluster.uid_pool,
6050 "default_iallocator": cluster.default_iallocator,
6051 "reserved_lvs": cluster.reserved_lvs,
6052 "primary_ip_version": primary_ip_version,
6053 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6054 "hidden_os": cluster.hidden_os,
6055 "blacklisted_os": cluster.blacklisted_os,
6061 class LUClusterConfigQuery(NoHooksLU):
6062 """Return configuration values.
6066 _FIELDS_DYNAMIC = utils.FieldSet()
6067 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
6068 "watcher_pause", "volume_group_name")
6070 def CheckArguments(self):
6071 _CheckOutputFields(static=self._FIELDS_STATIC,
6072 dynamic=self._FIELDS_DYNAMIC,
6073 selected=self.op.output_fields)
6075 def ExpandNames(self):
6076 self.needed_locks = {}
6078 def Exec(self, feedback_fn):
6079 """Dump a representation of the cluster config to the standard output.
6083 for field in self.op.output_fields:
6084 if field == "cluster_name":
6085 entry = self.cfg.GetClusterName()
6086 elif field == "master_node":
6087 entry = self.cfg.GetMasterNode()
6088 elif field == "drain_flag":
6089 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6090 elif field == "watcher_pause":
6091 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6092 elif field == "volume_group_name":
6093 entry = self.cfg.GetVGName()
6095 raise errors.ParameterError(field)
6096 values.append(entry)
6100 class LUInstanceActivateDisks(NoHooksLU):
6101 """Bring up an instance's disks.
6106 def ExpandNames(self):
6107 self._ExpandAndLockInstance()
6108 self.needed_locks[locking.LEVEL_NODE] = []
6109 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6111 def DeclareLocks(self, level):
6112 if level == locking.LEVEL_NODE:
6113 self._LockInstancesNodes()
6115 def CheckPrereq(self):
6116 """Check prerequisites.
6118 This checks that the instance is in the cluster.
6121 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6122 assert self.instance is not None, \
6123 "Cannot retrieve locked instance %s" % self.op.instance_name
6124 _CheckNodeOnline(self, self.instance.primary_node)
6126 def Exec(self, feedback_fn):
6127 """Activate the disks.
6130 disks_ok, disks_info = \
6131 _AssembleInstanceDisks(self, self.instance,
6132 ignore_size=self.op.ignore_size)
6134 raise errors.OpExecError("Cannot activate block devices")
6139 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6141 """Prepare the block devices for an instance.
6143 This sets up the block devices on all nodes.
6145 @type lu: L{LogicalUnit}
6146 @param lu: the logical unit on whose behalf we execute
6147 @type instance: L{objects.Instance}
6148 @param instance: the instance for whose disks we assemble
6149 @type disks: list of L{objects.Disk} or None
6150 @param disks: which disks to assemble (or all, if None)
6151 @type ignore_secondaries: boolean
6152 @param ignore_secondaries: if true, errors on secondary nodes
6153 won't result in an error return from the function
6154 @type ignore_size: boolean
6155 @param ignore_size: if true, the current known size of the disk
6156 will not be used during the disk activation, useful for cases
6157 when the size is wrong
6158 @return: False if the operation failed, otherwise a list of
6159 (host, instance_visible_name, node_visible_name)
6160 with the mapping from node devices to instance devices
6165 iname = instance.name
6166 disks = _ExpandCheckDisks(instance, disks)
6168 # With the two passes mechanism we try to reduce the window of
6169 # opportunity for the race condition of switching DRBD to primary
6170 # before handshaking occured, but we do not eliminate it
6172 # The proper fix would be to wait (with some limits) until the
6173 # connection has been made and drbd transitions from WFConnection
6174 # into any other network-connected state (Connected, SyncTarget,
6177 # 1st pass, assemble on all nodes in secondary mode
6178 for idx, inst_disk in enumerate(disks):
6179 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6181 node_disk = node_disk.Copy()
6182 node_disk.UnsetSize()
6183 lu.cfg.SetDiskID(node_disk, node)
6184 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
6185 msg = result.fail_msg
6187 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6188 " (is_primary=False, pass=1): %s",
6189 inst_disk.iv_name, node, msg)
6190 if not ignore_secondaries:
6193 # FIXME: race condition on drbd migration to primary
6195 # 2nd pass, do only the primary node
6196 for idx, inst_disk in enumerate(disks):
6199 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6200 if node != instance.primary_node:
6203 node_disk = node_disk.Copy()
6204 node_disk.UnsetSize()
6205 lu.cfg.SetDiskID(node_disk, node)
6206 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6207 msg = result.fail_msg
6209 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6210 " (is_primary=True, pass=2): %s",
6211 inst_disk.iv_name, node, msg)
6214 dev_path = result.payload
6216 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6218 # leave the disks configured for the primary node
6219 # this is a workaround that would be fixed better by
6220 # improving the logical/physical id handling
6222 lu.cfg.SetDiskID(disk, instance.primary_node)
6224 return disks_ok, device_info
6227 def _StartInstanceDisks(lu, instance, force):
6228 """Start the disks of an instance.
6231 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6232 ignore_secondaries=force)
6234 _ShutdownInstanceDisks(lu, instance)
6235 if force is not None and not force:
6236 lu.proc.LogWarning("", hint="If the message above refers to a"
6238 " you can retry the operation using '--force'.")
6239 raise errors.OpExecError("Disk consistency error")
6242 class LUInstanceDeactivateDisks(NoHooksLU):
6243 """Shutdown an instance's disks.
6248 def ExpandNames(self):
6249 self._ExpandAndLockInstance()
6250 self.needed_locks[locking.LEVEL_NODE] = []
6251 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6253 def DeclareLocks(self, level):
6254 if level == locking.LEVEL_NODE:
6255 self._LockInstancesNodes()
6257 def CheckPrereq(self):
6258 """Check prerequisites.
6260 This checks that the instance is in the cluster.
6263 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6264 assert self.instance is not None, \
6265 "Cannot retrieve locked instance %s" % self.op.instance_name
6267 def Exec(self, feedback_fn):
6268 """Deactivate the disks
6271 instance = self.instance
6273 _ShutdownInstanceDisks(self, instance)
6275 _SafeShutdownInstanceDisks(self, instance)
6278 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6279 """Shutdown block devices of an instance.
6281 This function checks if an instance is running, before calling
6282 _ShutdownInstanceDisks.
6285 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6286 _ShutdownInstanceDisks(lu, instance, disks=disks)
6289 def _ExpandCheckDisks(instance, disks):
6290 """Return the instance disks selected by the disks list
6292 @type disks: list of L{objects.Disk} or None
6293 @param disks: selected disks
6294 @rtype: list of L{objects.Disk}
6295 @return: selected instance disks to act on
6299 return instance.disks
6301 if not set(disks).issubset(instance.disks):
6302 raise errors.ProgrammerError("Can only act on disks belonging to the"
6307 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6308 """Shutdown block devices of an instance.
6310 This does the shutdown on all nodes of the instance.
6312 If the ignore_primary is false, errors on the primary node are
6317 disks = _ExpandCheckDisks(instance, disks)
6320 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6321 lu.cfg.SetDiskID(top_disk, node)
6322 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6323 msg = result.fail_msg
6325 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6326 disk.iv_name, node, msg)
6327 if ((node == instance.primary_node and not ignore_primary) or
6328 (node != instance.primary_node and not result.offline)):
6333 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6334 """Checks if a node has enough free memory.
6336 This function check if a given node has the needed amount of free
6337 memory. In case the node has less memory or we cannot get the
6338 information from the node, this function raise an OpPrereqError
6341 @type lu: C{LogicalUnit}
6342 @param lu: a logical unit from which we get configuration data
6344 @param node: the node to check
6345 @type reason: C{str}
6346 @param reason: string to use in the error message
6347 @type requested: C{int}
6348 @param requested: the amount of memory in MiB to check for
6349 @type hypervisor_name: C{str}
6350 @param hypervisor_name: the hypervisor to ask for memory stats
6351 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6352 we cannot check the node
6355 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6356 nodeinfo[node].Raise("Can't get data from node %s" % node,
6357 prereq=True, ecode=errors.ECODE_ENVIRON)
6358 (_, _, (hv_info, )) = nodeinfo[node].payload
6360 free_mem = hv_info.get("memory_free", None)
6361 if not isinstance(free_mem, int):
6362 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6363 " was '%s'" % (node, free_mem),
6364 errors.ECODE_ENVIRON)
6365 if requested > free_mem:
6366 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6367 " needed %s MiB, available %s MiB" %
6368 (node, reason, requested, free_mem),
6372 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6373 """Checks if nodes have enough free disk space in the all VGs.
6375 This function check if all given nodes have the needed amount of
6376 free disk. In case any node has less disk or we cannot get the
6377 information from the node, this function raise an OpPrereqError
6380 @type lu: C{LogicalUnit}
6381 @param lu: a logical unit from which we get configuration data
6382 @type nodenames: C{list}
6383 @param nodenames: the list of node names to check
6384 @type req_sizes: C{dict}
6385 @param req_sizes: the hash of vg and corresponding amount of disk in
6387 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6388 or we cannot check the node
6391 for vg, req_size in req_sizes.items():
6392 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6395 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6396 """Checks if nodes have enough free disk space in the specified VG.
6398 This function check if all given nodes have the needed amount of
6399 free disk. In case any node has less disk or we cannot get the
6400 information from the node, this function raise an OpPrereqError
6403 @type lu: C{LogicalUnit}
6404 @param lu: a logical unit from which we get configuration data
6405 @type nodenames: C{list}
6406 @param nodenames: the list of node names to check
6408 @param vg: the volume group to check
6409 @type requested: C{int}
6410 @param requested: the amount of disk in MiB to check for
6411 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6412 or we cannot check the node
6415 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6416 for node in nodenames:
6417 info = nodeinfo[node]
6418 info.Raise("Cannot get current information from node %s" % node,
6419 prereq=True, ecode=errors.ECODE_ENVIRON)
6420 (_, (vg_info, ), _) = info.payload
6421 vg_free = vg_info.get("vg_free", None)
6422 if not isinstance(vg_free, int):
6423 raise errors.OpPrereqError("Can't compute free disk space on node"
6424 " %s for vg %s, result was '%s'" %
6425 (node, vg, vg_free), errors.ECODE_ENVIRON)
6426 if requested > vg_free:
6427 raise errors.OpPrereqError("Not enough disk space on target node %s"
6428 " vg %s: required %d MiB, available %d MiB" %
6429 (node, vg, requested, vg_free),
6433 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6434 """Checks if nodes have enough physical CPUs
6436 This function checks if all given nodes have the needed number of
6437 physical CPUs. In case any node has less CPUs or we cannot get the
6438 information from the node, this function raises an OpPrereqError
6441 @type lu: C{LogicalUnit}
6442 @param lu: a logical unit from which we get configuration data
6443 @type nodenames: C{list}
6444 @param nodenames: the list of node names to check
6445 @type requested: C{int}
6446 @param requested: the minimum acceptable number of physical CPUs
6447 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6448 or we cannot check the node
6451 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6452 for node in nodenames:
6453 info = nodeinfo[node]
6454 info.Raise("Cannot get current information from node %s" % node,
6455 prereq=True, ecode=errors.ECODE_ENVIRON)
6456 (_, _, (hv_info, )) = info.payload
6457 num_cpus = hv_info.get("cpu_total", None)
6458 if not isinstance(num_cpus, int):
6459 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6460 " on node %s, result was '%s'" %
6461 (node, num_cpus), errors.ECODE_ENVIRON)
6462 if requested > num_cpus:
6463 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6464 "required" % (node, num_cpus, requested),
6468 class LUInstanceStartup(LogicalUnit):
6469 """Starts an instance.
6472 HPATH = "instance-start"
6473 HTYPE = constants.HTYPE_INSTANCE
6476 def CheckArguments(self):
6478 if self.op.beparams:
6479 # fill the beparams dict
6480 objects.UpgradeBeParams(self.op.beparams)
6481 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6483 def ExpandNames(self):
6484 self._ExpandAndLockInstance()
6485 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6487 def DeclareLocks(self, level):
6488 if level == locking.LEVEL_NODE_RES:
6489 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6491 def BuildHooksEnv(self):
6494 This runs on master, primary and secondary nodes of the instance.
6498 "FORCE": self.op.force,
6501 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6505 def BuildHooksNodes(self):
6506 """Build hooks nodes.
6509 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6512 def CheckPrereq(self):
6513 """Check prerequisites.
6515 This checks that the instance is in the cluster.
6518 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6519 assert self.instance is not None, \
6520 "Cannot retrieve locked instance %s" % self.op.instance_name
6523 if self.op.hvparams:
6524 # check hypervisor parameter syntax (locally)
6525 cluster = self.cfg.GetClusterInfo()
6526 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6527 filled_hvp = cluster.FillHV(instance)
6528 filled_hvp.update(self.op.hvparams)
6529 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6530 hv_type.CheckParameterSyntax(filled_hvp)
6531 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6533 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6535 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6537 if self.primary_offline and self.op.ignore_offline_nodes:
6538 self.proc.LogWarning("Ignoring offline primary node")
6540 if self.op.hvparams or self.op.beparams:
6541 self.proc.LogWarning("Overridden parameters are ignored")
6543 _CheckNodeOnline(self, instance.primary_node)
6545 bep = self.cfg.GetClusterInfo().FillBE(instance)
6546 bep.update(self.op.beparams)
6548 # check bridges existence
6549 _CheckInstanceBridgesExist(self, instance)
6551 remote_info = self.rpc.call_instance_info(instance.primary_node,
6553 instance.hypervisor)
6554 remote_info.Raise("Error checking node %s" % instance.primary_node,
6555 prereq=True, ecode=errors.ECODE_ENVIRON)
6556 if not remote_info.payload: # not running already
6557 _CheckNodeFreeMemory(self, instance.primary_node,
6558 "starting instance %s" % instance.name,
6559 bep[constants.BE_MAXMEM], instance.hypervisor)
6561 def Exec(self, feedback_fn):
6562 """Start the instance.
6565 instance = self.instance
6566 force = self.op.force
6568 if not self.op.no_remember:
6569 self.cfg.MarkInstanceUp(instance.name)
6571 if self.primary_offline:
6572 assert self.op.ignore_offline_nodes
6573 self.proc.LogInfo("Primary node offline, marked instance as started")
6575 node_current = instance.primary_node
6577 _StartInstanceDisks(self, instance, force)
6580 self.rpc.call_instance_start(node_current,
6581 (instance, self.op.hvparams,
6583 self.op.startup_paused)
6584 msg = result.fail_msg
6586 _ShutdownInstanceDisks(self, instance)
6587 raise errors.OpExecError("Could not start instance: %s" % msg)
6590 class LUInstanceReboot(LogicalUnit):
6591 """Reboot an instance.
6594 HPATH = "instance-reboot"
6595 HTYPE = constants.HTYPE_INSTANCE
6598 def ExpandNames(self):
6599 self._ExpandAndLockInstance()
6601 def BuildHooksEnv(self):
6604 This runs on master, primary and secondary nodes of the instance.
6608 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6609 "REBOOT_TYPE": self.op.reboot_type,
6610 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6613 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6617 def BuildHooksNodes(self):
6618 """Build hooks nodes.
6621 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6624 def CheckPrereq(self):
6625 """Check prerequisites.
6627 This checks that the instance is in the cluster.
6630 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6631 assert self.instance is not None, \
6632 "Cannot retrieve locked instance %s" % self.op.instance_name
6633 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6634 _CheckNodeOnline(self, instance.primary_node)
6636 # check bridges existence
6637 _CheckInstanceBridgesExist(self, instance)
6639 def Exec(self, feedback_fn):
6640 """Reboot the instance.
6643 instance = self.instance
6644 ignore_secondaries = self.op.ignore_secondaries
6645 reboot_type = self.op.reboot_type
6647 remote_info = self.rpc.call_instance_info(instance.primary_node,
6649 instance.hypervisor)
6650 remote_info.Raise("Error checking node %s" % instance.primary_node)
6651 instance_running = bool(remote_info.payload)
6653 node_current = instance.primary_node
6655 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6656 constants.INSTANCE_REBOOT_HARD]:
6657 for disk in instance.disks:
6658 self.cfg.SetDiskID(disk, node_current)
6659 result = self.rpc.call_instance_reboot(node_current, instance,
6661 self.op.shutdown_timeout)
6662 result.Raise("Could not reboot instance")
6664 if instance_running:
6665 result = self.rpc.call_instance_shutdown(node_current, instance,
6666 self.op.shutdown_timeout)
6667 result.Raise("Could not shutdown instance for full reboot")
6668 _ShutdownInstanceDisks(self, instance)
6670 self.LogInfo("Instance %s was already stopped, starting now",
6672 _StartInstanceDisks(self, instance, ignore_secondaries)
6673 result = self.rpc.call_instance_start(node_current,
6674 (instance, None, None), False)
6675 msg = result.fail_msg
6677 _ShutdownInstanceDisks(self, instance)
6678 raise errors.OpExecError("Could not start instance for"
6679 " full reboot: %s" % msg)
6681 self.cfg.MarkInstanceUp(instance.name)
6684 class LUInstanceShutdown(LogicalUnit):
6685 """Shutdown an instance.
6688 HPATH = "instance-stop"
6689 HTYPE = constants.HTYPE_INSTANCE
6692 def ExpandNames(self):
6693 self._ExpandAndLockInstance()
6695 def BuildHooksEnv(self):
6698 This runs on master, primary and secondary nodes of the instance.
6701 env = _BuildInstanceHookEnvByObject(self, self.instance)
6702 env["TIMEOUT"] = self.op.timeout
6705 def BuildHooksNodes(self):
6706 """Build hooks nodes.
6709 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6712 def CheckPrereq(self):
6713 """Check prerequisites.
6715 This checks that the instance is in the cluster.
6718 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6719 assert self.instance is not None, \
6720 "Cannot retrieve locked instance %s" % self.op.instance_name
6722 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6724 self.primary_offline = \
6725 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6727 if self.primary_offline and self.op.ignore_offline_nodes:
6728 self.proc.LogWarning("Ignoring offline primary node")
6730 _CheckNodeOnline(self, self.instance.primary_node)
6732 def Exec(self, feedback_fn):
6733 """Shutdown the instance.
6736 instance = self.instance
6737 node_current = instance.primary_node
6738 timeout = self.op.timeout
6740 if not self.op.no_remember:
6741 self.cfg.MarkInstanceDown(instance.name)
6743 if self.primary_offline:
6744 assert self.op.ignore_offline_nodes
6745 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6747 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6748 msg = result.fail_msg
6750 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6752 _ShutdownInstanceDisks(self, instance)
6755 class LUInstanceReinstall(LogicalUnit):
6756 """Reinstall an instance.
6759 HPATH = "instance-reinstall"
6760 HTYPE = constants.HTYPE_INSTANCE
6763 def ExpandNames(self):
6764 self._ExpandAndLockInstance()
6766 def BuildHooksEnv(self):
6769 This runs on master, primary and secondary nodes of the instance.
6772 return _BuildInstanceHookEnvByObject(self, self.instance)
6774 def BuildHooksNodes(self):
6775 """Build hooks nodes.
6778 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6781 def CheckPrereq(self):
6782 """Check prerequisites.
6784 This checks that the instance is in the cluster and is not running.
6787 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6788 assert instance is not None, \
6789 "Cannot retrieve locked instance %s" % self.op.instance_name
6790 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6791 " offline, cannot reinstall")
6792 for node in instance.secondary_nodes:
6793 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6794 " cannot reinstall")
6796 if instance.disk_template == constants.DT_DISKLESS:
6797 raise errors.OpPrereqError("Instance '%s' has no disks" %
6798 self.op.instance_name,
6800 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6802 if self.op.os_type is not None:
6804 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6805 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6806 instance_os = self.op.os_type
6808 instance_os = instance.os
6810 nodelist = list(instance.all_nodes)
6812 if self.op.osparams:
6813 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6814 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6815 self.os_inst = i_osdict # the new dict (without defaults)
6819 self.instance = instance
6821 def Exec(self, feedback_fn):
6822 """Reinstall the instance.
6825 inst = self.instance
6827 if self.op.os_type is not None:
6828 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6829 inst.os = self.op.os_type
6830 # Write to configuration
6831 self.cfg.Update(inst, feedback_fn)
6833 _StartInstanceDisks(self, inst, None)
6835 feedback_fn("Running the instance OS create scripts...")
6836 # FIXME: pass debug option from opcode to backend
6837 result = self.rpc.call_instance_os_add(inst.primary_node,
6838 (inst, self.os_inst), True,
6839 self.op.debug_level)
6840 result.Raise("Could not install OS for instance %s on node %s" %
6841 (inst.name, inst.primary_node))
6843 _ShutdownInstanceDisks(self, inst)
6846 class LUInstanceRecreateDisks(LogicalUnit):
6847 """Recreate an instance's missing disks.
6850 HPATH = "instance-recreate-disks"
6851 HTYPE = constants.HTYPE_INSTANCE
6854 def CheckArguments(self):
6855 # normalise the disk list
6856 self.op.disks = sorted(frozenset(self.op.disks))
6858 def ExpandNames(self):
6859 self._ExpandAndLockInstance()
6860 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6862 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6863 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6865 self.needed_locks[locking.LEVEL_NODE] = []
6867 def DeclareLocks(self, level):
6868 if level == locking.LEVEL_NODE:
6869 # if we replace the nodes, we only need to lock the old primary,
6870 # otherwise we need to lock all nodes for disk re-creation
6871 primary_only = bool(self.op.nodes)
6872 self._LockInstancesNodes(primary_only=primary_only)
6873 elif level == locking.LEVEL_NODE_RES:
6875 self.needed_locks[locking.LEVEL_NODE_RES] = \
6876 self.needed_locks[locking.LEVEL_NODE][:]
6878 def BuildHooksEnv(self):
6881 This runs on master, primary and secondary nodes of the instance.
6884 return _BuildInstanceHookEnvByObject(self, self.instance)
6886 def BuildHooksNodes(self):
6887 """Build hooks nodes.
6890 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6893 def CheckPrereq(self):
6894 """Check prerequisites.
6896 This checks that the instance is in the cluster and is not running.
6899 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6900 assert instance is not None, \
6901 "Cannot retrieve locked instance %s" % self.op.instance_name
6903 if len(self.op.nodes) != len(instance.all_nodes):
6904 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6905 " %d replacement nodes were specified" %
6906 (instance.name, len(instance.all_nodes),
6907 len(self.op.nodes)),
6909 assert instance.disk_template != constants.DT_DRBD8 or \
6910 len(self.op.nodes) == 2
6911 assert instance.disk_template != constants.DT_PLAIN or \
6912 len(self.op.nodes) == 1
6913 primary_node = self.op.nodes[0]
6915 primary_node = instance.primary_node
6916 _CheckNodeOnline(self, primary_node)
6918 if instance.disk_template == constants.DT_DISKLESS:
6919 raise errors.OpPrereqError("Instance '%s' has no disks" %
6920 self.op.instance_name, errors.ECODE_INVAL)
6921 # if we replace nodes *and* the old primary is offline, we don't
6923 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
6924 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
6925 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6926 if not (self.op.nodes and old_pnode.offline):
6927 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6928 msg="cannot recreate disks")
6930 if not self.op.disks:
6931 self.op.disks = range(len(instance.disks))
6933 for idx in self.op.disks:
6934 if idx >= len(instance.disks):
6935 raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6937 if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6938 raise errors.OpPrereqError("Can't recreate disks partially and"
6939 " change the nodes at the same time",
6941 self.instance = instance
6943 def Exec(self, feedback_fn):
6944 """Recreate the disks.
6947 instance = self.instance
6949 assert (self.owned_locks(locking.LEVEL_NODE) ==
6950 self.owned_locks(locking.LEVEL_NODE_RES))
6953 mods = [] # keeps track of needed logical_id changes
6955 for idx, disk in enumerate(instance.disks):
6956 if idx not in self.op.disks: # disk idx has not been passed in
6959 # update secondaries for disks, if needed
6961 if disk.dev_type == constants.LD_DRBD8:
6962 # need to update the nodes and minors
6963 assert len(self.op.nodes) == 2
6964 assert len(disk.logical_id) == 6 # otherwise disk internals
6966 (_, _, old_port, _, _, old_secret) = disk.logical_id
6967 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6968 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6969 new_minors[0], new_minors[1], old_secret)
6970 assert len(disk.logical_id) == len(new_id)
6971 mods.append((idx, new_id))
6973 # now that we have passed all asserts above, we can apply the mods
6974 # in a single run (to avoid partial changes)
6975 for idx, new_id in mods:
6976 instance.disks[idx].logical_id = new_id
6978 # change primary node, if needed
6980 instance.primary_node = self.op.nodes[0]
6981 self.LogWarning("Changing the instance's nodes, you will have to"
6982 " remove any disks left on the older nodes manually")
6985 self.cfg.Update(instance, feedback_fn)
6987 _CreateDisks(self, instance, to_skip=to_skip)
6990 class LUInstanceRename(LogicalUnit):
6991 """Rename an instance.
6994 HPATH = "instance-rename"
6995 HTYPE = constants.HTYPE_INSTANCE
6997 def CheckArguments(self):
7001 if self.op.ip_check and not self.op.name_check:
7002 # TODO: make the ip check more flexible and not depend on the name check
7003 raise errors.OpPrereqError("IP address check requires a name check",
7006 def BuildHooksEnv(self):
7009 This runs on master, primary and secondary nodes of the instance.
7012 env = _BuildInstanceHookEnvByObject(self, self.instance)
7013 env["INSTANCE_NEW_NAME"] = self.op.new_name
7016 def BuildHooksNodes(self):
7017 """Build hooks nodes.
7020 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7023 def CheckPrereq(self):
7024 """Check prerequisites.
7026 This checks that the instance is in the cluster and is not running.
7029 self.op.instance_name = _ExpandInstanceName(self.cfg,
7030 self.op.instance_name)
7031 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7032 assert instance is not None
7033 _CheckNodeOnline(self, instance.primary_node)
7034 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7035 msg="cannot rename")
7036 self.instance = instance
7038 new_name = self.op.new_name
7039 if self.op.name_check:
7040 hostname = netutils.GetHostname(name=new_name)
7041 if hostname.name != new_name:
7042 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7044 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7045 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7046 " same as given hostname '%s'") %
7047 (hostname.name, self.op.new_name),
7049 new_name = self.op.new_name = hostname.name
7050 if (self.op.ip_check and
7051 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7052 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7053 (hostname.ip, new_name),
7054 errors.ECODE_NOTUNIQUE)
7056 instance_list = self.cfg.GetInstanceList()
7057 if new_name in instance_list and new_name != instance.name:
7058 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7059 new_name, errors.ECODE_EXISTS)
7061 def Exec(self, feedback_fn):
7062 """Rename the instance.
7065 inst = self.instance
7066 old_name = inst.name
7068 rename_file_storage = False
7069 if (inst.disk_template in constants.DTS_FILEBASED and
7070 self.op.new_name != inst.name):
7071 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7072 rename_file_storage = True
7074 self.cfg.RenameInstance(inst.name, self.op.new_name)
7075 # Change the instance lock. This is definitely safe while we hold the BGL.
7076 # Otherwise the new lock would have to be added in acquired mode.
7078 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7079 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7081 # re-read the instance from the configuration after rename
7082 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7084 if rename_file_storage:
7085 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7086 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7087 old_file_storage_dir,
7088 new_file_storage_dir)
7089 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7090 " (but the instance has been renamed in Ganeti)" %
7091 (inst.primary_node, old_file_storage_dir,
7092 new_file_storage_dir))
7094 _StartInstanceDisks(self, inst, None)
7096 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7097 old_name, self.op.debug_level)
7098 msg = result.fail_msg
7100 msg = ("Could not run OS rename script for instance %s on node %s"
7101 " (but the instance has been renamed in Ganeti): %s" %
7102 (inst.name, inst.primary_node, msg))
7103 self.proc.LogWarning(msg)
7105 _ShutdownInstanceDisks(self, inst)
7110 class LUInstanceRemove(LogicalUnit):
7111 """Remove an instance.
7114 HPATH = "instance-remove"
7115 HTYPE = constants.HTYPE_INSTANCE
7118 def ExpandNames(self):
7119 self._ExpandAndLockInstance()
7120 self.needed_locks[locking.LEVEL_NODE] = []
7121 self.needed_locks[locking.LEVEL_NODE_RES] = []
7122 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7124 def DeclareLocks(self, level):
7125 if level == locking.LEVEL_NODE:
7126 self._LockInstancesNodes()
7127 elif level == locking.LEVEL_NODE_RES:
7129 self.needed_locks[locking.LEVEL_NODE_RES] = \
7130 self.needed_locks[locking.LEVEL_NODE][:]
7132 def BuildHooksEnv(self):
7135 This runs on master, primary and secondary nodes of the instance.
7138 env = _BuildInstanceHookEnvByObject(self, self.instance)
7139 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7142 def BuildHooksNodes(self):
7143 """Build hooks nodes.
7146 nl = [self.cfg.GetMasterNode()]
7147 nl_post = list(self.instance.all_nodes) + nl
7148 return (nl, nl_post)
7150 def CheckPrereq(self):
7151 """Check prerequisites.
7153 This checks that the instance is in the cluster.
7156 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7157 assert self.instance is not None, \
7158 "Cannot retrieve locked instance %s" % self.op.instance_name
7160 def Exec(self, feedback_fn):
7161 """Remove the instance.
7164 instance = self.instance
7165 logging.info("Shutting down instance %s on node %s",
7166 instance.name, instance.primary_node)
7168 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7169 self.op.shutdown_timeout)
7170 msg = result.fail_msg
7172 if self.op.ignore_failures:
7173 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7175 raise errors.OpExecError("Could not shutdown instance %s on"
7177 (instance.name, instance.primary_node, msg))
7179 assert (self.owned_locks(locking.LEVEL_NODE) ==
7180 self.owned_locks(locking.LEVEL_NODE_RES))
7181 assert not (set(instance.all_nodes) -
7182 self.owned_locks(locking.LEVEL_NODE)), \
7183 "Not owning correct locks"
7185 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7188 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7189 """Utility function to remove an instance.
7192 logging.info("Removing block devices for instance %s", instance.name)
7194 if not _RemoveDisks(lu, instance):
7195 if not ignore_failures:
7196 raise errors.OpExecError("Can't remove instance's disks")
7197 feedback_fn("Warning: can't remove instance's disks")
7199 logging.info("Removing instance %s out of cluster config", instance.name)
7201 lu.cfg.RemoveInstance(instance.name)
7203 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7204 "Instance lock removal conflict"
7206 # Remove lock for the instance
7207 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7210 class LUInstanceQuery(NoHooksLU):
7211 """Logical unit for querying instances.
7214 # pylint: disable=W0142
7217 def CheckArguments(self):
7218 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7219 self.op.output_fields, self.op.use_locking)
7221 def ExpandNames(self):
7222 self.iq.ExpandNames(self)
7224 def DeclareLocks(self, level):
7225 self.iq.DeclareLocks(self, level)
7227 def Exec(self, feedback_fn):
7228 return self.iq.OldStyleQuery(self)
7231 class LUInstanceFailover(LogicalUnit):
7232 """Failover an instance.
7235 HPATH = "instance-failover"
7236 HTYPE = constants.HTYPE_INSTANCE
7239 def CheckArguments(self):
7240 """Check the arguments.
7243 self.iallocator = getattr(self.op, "iallocator", None)
7244 self.target_node = getattr(self.op, "target_node", None)
7246 def ExpandNames(self):
7247 self._ExpandAndLockInstance()
7249 if self.op.target_node is not None:
7250 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7252 self.needed_locks[locking.LEVEL_NODE] = []
7253 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7255 ignore_consistency = self.op.ignore_consistency
7256 shutdown_timeout = self.op.shutdown_timeout
7257 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7260 ignore_consistency=ignore_consistency,
7261 shutdown_timeout=shutdown_timeout,
7262 ignore_ipolicy=self.op.ignore_ipolicy)
7263 self.tasklets = [self._migrater]
7265 def DeclareLocks(self, level):
7266 if level == locking.LEVEL_NODE:
7267 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7268 if instance.disk_template in constants.DTS_EXT_MIRROR:
7269 if self.op.target_node is None:
7270 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7272 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7273 self.op.target_node]
7274 del self.recalculate_locks[locking.LEVEL_NODE]
7276 self._LockInstancesNodes()
7278 def BuildHooksEnv(self):
7281 This runs on master, primary and secondary nodes of the instance.
7284 instance = self._migrater.instance
7285 source_node = instance.primary_node
7286 target_node = self.op.target_node
7288 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7289 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7290 "OLD_PRIMARY": source_node,
7291 "NEW_PRIMARY": target_node,
7294 if instance.disk_template in constants.DTS_INT_MIRROR:
7295 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7296 env["NEW_SECONDARY"] = source_node
7298 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7300 env.update(_BuildInstanceHookEnvByObject(self, instance))
7304 def BuildHooksNodes(self):
7305 """Build hooks nodes.
7308 instance = self._migrater.instance
7309 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7310 return (nl, nl + [instance.primary_node])
7313 class LUInstanceMigrate(LogicalUnit):
7314 """Migrate an instance.
7316 This is migration without shutting down, compared to the failover,
7317 which is done with shutdown.
7320 HPATH = "instance-migrate"
7321 HTYPE = constants.HTYPE_INSTANCE
7324 def ExpandNames(self):
7325 self._ExpandAndLockInstance()
7327 if self.op.target_node is not None:
7328 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7330 self.needed_locks[locking.LEVEL_NODE] = []
7331 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7333 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7334 cleanup=self.op.cleanup,
7336 fallback=self.op.allow_failover,
7337 ignore_ipolicy=self.op.ignore_ipolicy)
7338 self.tasklets = [self._migrater]
7340 def DeclareLocks(self, level):
7341 if level == locking.LEVEL_NODE:
7342 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7343 if instance.disk_template in constants.DTS_EXT_MIRROR:
7344 if self.op.target_node is None:
7345 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7347 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7348 self.op.target_node]
7349 del self.recalculate_locks[locking.LEVEL_NODE]
7351 self._LockInstancesNodes()
7353 def BuildHooksEnv(self):
7356 This runs on master, primary and secondary nodes of the instance.
7359 instance = self._migrater.instance
7360 source_node = instance.primary_node
7361 target_node = self.op.target_node
7362 env = _BuildInstanceHookEnvByObject(self, instance)
7364 "MIGRATE_LIVE": self._migrater.live,
7365 "MIGRATE_CLEANUP": self.op.cleanup,
7366 "OLD_PRIMARY": source_node,
7367 "NEW_PRIMARY": target_node,
7370 if instance.disk_template in constants.DTS_INT_MIRROR:
7371 env["OLD_SECONDARY"] = target_node
7372 env["NEW_SECONDARY"] = source_node
7374 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7378 def BuildHooksNodes(self):
7379 """Build hooks nodes.
7382 instance = self._migrater.instance
7383 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7384 return (nl, nl + [instance.primary_node])
7387 class LUInstanceMove(LogicalUnit):
7388 """Move an instance by data-copying.
7391 HPATH = "instance-move"
7392 HTYPE = constants.HTYPE_INSTANCE
7395 def ExpandNames(self):
7396 self._ExpandAndLockInstance()
7397 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7398 self.op.target_node = target_node
7399 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7400 self.needed_locks[locking.LEVEL_NODE_RES] = []
7401 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7403 def DeclareLocks(self, level):
7404 if level == locking.LEVEL_NODE:
7405 self._LockInstancesNodes(primary_only=True)
7406 elif level == locking.LEVEL_NODE_RES:
7408 self.needed_locks[locking.LEVEL_NODE_RES] = \
7409 self.needed_locks[locking.LEVEL_NODE][:]
7411 def BuildHooksEnv(self):
7414 This runs on master, primary and secondary nodes of the instance.
7418 "TARGET_NODE": self.op.target_node,
7419 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7421 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7424 def BuildHooksNodes(self):
7425 """Build hooks nodes.
7429 self.cfg.GetMasterNode(),
7430 self.instance.primary_node,
7431 self.op.target_node,
7435 def CheckPrereq(self):
7436 """Check prerequisites.
7438 This checks that the instance is in the cluster.
7441 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7442 assert self.instance is not None, \
7443 "Cannot retrieve locked instance %s" % self.op.instance_name
7445 node = self.cfg.GetNodeInfo(self.op.target_node)
7446 assert node is not None, \
7447 "Cannot retrieve locked node %s" % self.op.target_node
7449 self.target_node = target_node = node.name
7451 if target_node == instance.primary_node:
7452 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7453 (instance.name, target_node),
7456 bep = self.cfg.GetClusterInfo().FillBE(instance)
7458 for idx, dsk in enumerate(instance.disks):
7459 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7460 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7461 " cannot copy" % idx, errors.ECODE_STATE)
7463 _CheckNodeOnline(self, target_node)
7464 _CheckNodeNotDrained(self, target_node)
7465 _CheckNodeVmCapable(self, target_node)
7466 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7467 self.cfg.GetNodeGroup(node.group))
7468 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7469 ignore=self.op.ignore_ipolicy)
7471 if instance.admin_state == constants.ADMINST_UP:
7472 # check memory requirements on the secondary node
7473 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7474 instance.name, bep[constants.BE_MAXMEM],
7475 instance.hypervisor)
7477 self.LogInfo("Not checking memory on the secondary node as"
7478 " instance will not be started")
7480 # check bridge existance
7481 _CheckInstanceBridgesExist(self, instance, node=target_node)
7483 def Exec(self, feedback_fn):
7484 """Move an instance.
7486 The move is done by shutting it down on its present node, copying
7487 the data over (slow) and starting it on the new node.
7490 instance = self.instance
7492 source_node = instance.primary_node
7493 target_node = self.target_node
7495 self.LogInfo("Shutting down instance %s on source node %s",
7496 instance.name, source_node)
7498 assert (self.owned_locks(locking.LEVEL_NODE) ==
7499 self.owned_locks(locking.LEVEL_NODE_RES))
7501 result = self.rpc.call_instance_shutdown(source_node, instance,
7502 self.op.shutdown_timeout)
7503 msg = result.fail_msg
7505 if self.op.ignore_consistency:
7506 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7507 " Proceeding anyway. Please make sure node"
7508 " %s is down. Error details: %s",
7509 instance.name, source_node, source_node, msg)
7511 raise errors.OpExecError("Could not shutdown instance %s on"
7513 (instance.name, source_node, msg))
7515 # create the target disks
7517 _CreateDisks(self, instance, target_node=target_node)
7518 except errors.OpExecError:
7519 self.LogWarning("Device creation failed, reverting...")
7521 _RemoveDisks(self, instance, target_node=target_node)
7523 self.cfg.ReleaseDRBDMinors(instance.name)
7526 cluster_name = self.cfg.GetClusterInfo().cluster_name
7529 # activate, get path, copy the data over
7530 for idx, disk in enumerate(instance.disks):
7531 self.LogInfo("Copying data for disk %d", idx)
7532 result = self.rpc.call_blockdev_assemble(target_node, disk,
7533 instance.name, True, idx)
7535 self.LogWarning("Can't assemble newly created disk %d: %s",
7536 idx, result.fail_msg)
7537 errs.append(result.fail_msg)
7539 dev_path = result.payload
7540 result = self.rpc.call_blockdev_export(source_node, disk,
7541 target_node, dev_path,
7544 self.LogWarning("Can't copy data over for disk %d: %s",
7545 idx, result.fail_msg)
7546 errs.append(result.fail_msg)
7550 self.LogWarning("Some disks failed to copy, aborting")
7552 _RemoveDisks(self, instance, target_node=target_node)
7554 self.cfg.ReleaseDRBDMinors(instance.name)
7555 raise errors.OpExecError("Errors during disk copy: %s" %
7558 instance.primary_node = target_node
7559 self.cfg.Update(instance, feedback_fn)
7561 self.LogInfo("Removing the disks on the original node")
7562 _RemoveDisks(self, instance, target_node=source_node)
7564 # Only start the instance if it's marked as up
7565 if instance.admin_state == constants.ADMINST_UP:
7566 self.LogInfo("Starting instance %s on node %s",
7567 instance.name, target_node)
7569 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7570 ignore_secondaries=True)
7572 _ShutdownInstanceDisks(self, instance)
7573 raise errors.OpExecError("Can't activate the instance's disks")
7575 result = self.rpc.call_instance_start(target_node,
7576 (instance, None, None), False)
7577 msg = result.fail_msg
7579 _ShutdownInstanceDisks(self, instance)
7580 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7581 (instance.name, target_node, msg))
7584 class LUNodeMigrate(LogicalUnit):
7585 """Migrate all instances from a node.
7588 HPATH = "node-migrate"
7589 HTYPE = constants.HTYPE_NODE
7592 def CheckArguments(self):
7595 def ExpandNames(self):
7596 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7598 self.share_locks = _ShareAll()
7599 self.needed_locks = {
7600 locking.LEVEL_NODE: [self.op.node_name],
7603 def BuildHooksEnv(self):
7606 This runs on the master, the primary and all the secondaries.
7610 "NODE_NAME": self.op.node_name,
7613 def BuildHooksNodes(self):
7614 """Build hooks nodes.
7617 nl = [self.cfg.GetMasterNode()]
7620 def CheckPrereq(self):
7623 def Exec(self, feedback_fn):
7624 # Prepare jobs for migration instances
7626 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7629 iallocator=self.op.iallocator,
7630 target_node=self.op.target_node,
7631 ignore_ipolicy=self.op.ignore_ipolicy)]
7632 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7635 # TODO: Run iallocator in this opcode and pass correct placement options to
7636 # OpInstanceMigrate. Since other jobs can modify the cluster between
7637 # running the iallocator and the actual migration, a good consistency model
7638 # will have to be found.
7640 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7641 frozenset([self.op.node_name]))
7643 return ResultWithJobs(jobs)
7646 class TLMigrateInstance(Tasklet):
7647 """Tasklet class for instance migration.
7650 @ivar live: whether the migration will be done live or non-live;
7651 this variable is initalized only after CheckPrereq has run
7652 @type cleanup: boolean
7653 @ivar cleanup: Wheater we cleanup from a failed migration
7654 @type iallocator: string
7655 @ivar iallocator: The iallocator used to determine target_node
7656 @type target_node: string
7657 @ivar target_node: If given, the target_node to reallocate the instance to
7658 @type failover: boolean
7659 @ivar failover: Whether operation results in failover or migration
7660 @type fallback: boolean
7661 @ivar fallback: Whether fallback to failover is allowed if migration not
7663 @type ignore_consistency: boolean
7664 @ivar ignore_consistency: Wheter we should ignore consistency between source
7666 @type shutdown_timeout: int
7667 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7668 @type ignore_ipolicy: bool
7669 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7674 _MIGRATION_POLL_INTERVAL = 1 # seconds
7675 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7677 def __init__(self, lu, instance_name, cleanup=False,
7678 failover=False, fallback=False,
7679 ignore_consistency=False,
7680 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7681 ignore_ipolicy=False):
7682 """Initializes this class.
7685 Tasklet.__init__(self, lu)
7688 self.instance_name = instance_name
7689 self.cleanup = cleanup
7690 self.live = False # will be overridden later
7691 self.failover = failover
7692 self.fallback = fallback
7693 self.ignore_consistency = ignore_consistency
7694 self.shutdown_timeout = shutdown_timeout
7695 self.ignore_ipolicy = ignore_ipolicy
7697 def CheckPrereq(self):
7698 """Check prerequisites.
7700 This checks that the instance is in the cluster.
7703 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7704 instance = self.cfg.GetInstanceInfo(instance_name)
7705 assert instance is not None
7706 self.instance = instance
7707 cluster = self.cfg.GetClusterInfo()
7709 if (not self.cleanup and
7710 not instance.admin_state == constants.ADMINST_UP and
7711 not self.failover and self.fallback):
7712 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7713 " switching to failover")
7714 self.failover = True
7716 if instance.disk_template not in constants.DTS_MIRRORED:
7721 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7722 " %s" % (instance.disk_template, text),
7725 if instance.disk_template in constants.DTS_EXT_MIRROR:
7726 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7728 if self.lu.op.iallocator:
7729 self._RunAllocator()
7731 # We set set self.target_node as it is required by
7733 self.target_node = self.lu.op.target_node
7735 # Check that the target node is correct in terms of instance policy
7736 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
7737 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7738 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7739 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7740 ignore=self.ignore_ipolicy)
7742 # self.target_node is already populated, either directly or by the
7744 target_node = self.target_node
7745 if self.target_node == instance.primary_node:
7746 raise errors.OpPrereqError("Cannot migrate instance %s"
7747 " to its primary (%s)" %
7748 (instance.name, instance.primary_node))
7750 if len(self.lu.tasklets) == 1:
7751 # It is safe to release locks only when we're the only tasklet
7753 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7754 keep=[instance.primary_node, self.target_node])
7757 secondary_nodes = instance.secondary_nodes
7758 if not secondary_nodes:
7759 raise errors.ConfigurationError("No secondary node but using"
7760 " %s disk template" %
7761 instance.disk_template)
7762 target_node = secondary_nodes[0]
7763 if self.lu.op.iallocator or (self.lu.op.target_node and
7764 self.lu.op.target_node != target_node):
7766 text = "failed over"
7769 raise errors.OpPrereqError("Instances with disk template %s cannot"
7770 " be %s to arbitrary nodes"
7771 " (neither an iallocator nor a target"
7772 " node can be passed)" %
7773 (instance.disk_template, text),
7775 nodeinfo = self.cfg.GetNodeInfo(target_node)
7776 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7777 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7778 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7779 ignore=self.ignore_ipolicy)
7781 i_be = cluster.FillBE(instance)
7783 # check memory requirements on the secondary node
7784 if not self.failover or instance.admin_state == constants.ADMINST_UP:
7785 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7786 instance.name, i_be[constants.BE_MAXMEM],
7787 instance.hypervisor)
7789 self.lu.LogInfo("Not checking memory on the secondary node as"
7790 " instance will not be started")
7792 # check if failover must be forced instead of migration
7793 if (not self.cleanup and not self.failover and
7794 i_be[constants.BE_ALWAYS_FAILOVER]):
7796 self.lu.LogInfo("Instance configured to always failover; fallback"
7798 self.failover = True
7800 raise errors.OpPrereqError("This instance has been configured to"
7801 " always failover, please allow failover",
7804 # check bridge existance
7805 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7807 if not self.cleanup:
7808 _CheckNodeNotDrained(self.lu, target_node)
7809 if not self.failover:
7810 result = self.rpc.call_instance_migratable(instance.primary_node,
7812 if result.fail_msg and self.fallback:
7813 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7815 self.failover = True
7817 result.Raise("Can't migrate, please use failover",
7818 prereq=True, ecode=errors.ECODE_STATE)
7820 assert not (self.failover and self.cleanup)
7822 if not self.failover:
7823 if self.lu.op.live is not None and self.lu.op.mode is not None:
7824 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7825 " parameters are accepted",
7827 if self.lu.op.live is not None:
7829 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7831 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7832 # reset the 'live' parameter to None so that repeated
7833 # invocations of CheckPrereq do not raise an exception
7834 self.lu.op.live = None
7835 elif self.lu.op.mode is None:
7836 # read the default value from the hypervisor
7837 i_hv = cluster.FillHV(self.instance, skip_globals=False)
7838 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7840 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7842 # Failover is never live
7845 def _RunAllocator(self):
7846 """Run the allocator based on input opcode.
7849 # FIXME: add a self.ignore_ipolicy option
7850 ial = IAllocator(self.cfg, self.rpc,
7851 mode=constants.IALLOCATOR_MODE_RELOC,
7852 name=self.instance_name,
7853 # TODO See why hail breaks with a single node below
7854 relocate_from=[self.instance.primary_node,
7855 self.instance.primary_node],
7858 ial.Run(self.lu.op.iallocator)
7861 raise errors.OpPrereqError("Can't compute nodes using"
7862 " iallocator '%s': %s" %
7863 (self.lu.op.iallocator, ial.info),
7865 if len(ial.result) != ial.required_nodes:
7866 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7867 " of nodes (%s), required %s" %
7868 (self.lu.op.iallocator, len(ial.result),
7869 ial.required_nodes), errors.ECODE_FAULT)
7870 self.target_node = ial.result[0]
7871 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7872 self.instance_name, self.lu.op.iallocator,
7873 utils.CommaJoin(ial.result))
7875 def _WaitUntilSync(self):
7876 """Poll with custom rpc for disk sync.
7878 This uses our own step-based rpc call.
7881 self.feedback_fn("* wait until resync is done")
7885 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7887 self.instance.disks)
7889 for node, nres in result.items():
7890 nres.Raise("Cannot resync disks on node %s" % node)
7891 node_done, node_percent = nres.payload
7892 all_done = all_done and node_done
7893 if node_percent is not None:
7894 min_percent = min(min_percent, node_percent)
7896 if min_percent < 100:
7897 self.feedback_fn(" - progress: %.1f%%" % min_percent)
7900 def _EnsureSecondary(self, node):
7901 """Demote a node to secondary.
7904 self.feedback_fn("* switching node %s to secondary mode" % node)
7906 for dev in self.instance.disks:
7907 self.cfg.SetDiskID(dev, node)
7909 result = self.rpc.call_blockdev_close(node, self.instance.name,
7910 self.instance.disks)
7911 result.Raise("Cannot change disk to secondary on node %s" % node)
7913 def _GoStandalone(self):
7914 """Disconnect from the network.
7917 self.feedback_fn("* changing into standalone mode")
7918 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7919 self.instance.disks)
7920 for node, nres in result.items():
7921 nres.Raise("Cannot disconnect disks node %s" % node)
7923 def _GoReconnect(self, multimaster):
7924 """Reconnect to the network.
7930 msg = "single-master"
7931 self.feedback_fn("* changing disks into %s mode" % msg)
7932 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7933 self.instance.disks,
7934 self.instance.name, multimaster)
7935 for node, nres in result.items():
7936 nres.Raise("Cannot change disks config on node %s" % node)
7938 def _ExecCleanup(self):
7939 """Try to cleanup after a failed migration.
7941 The cleanup is done by:
7942 - check that the instance is running only on one node
7943 (and update the config if needed)
7944 - change disks on its secondary node to secondary
7945 - wait until disks are fully synchronized
7946 - disconnect from the network
7947 - change disks into single-master mode
7948 - wait again until disks are fully synchronized
7951 instance = self.instance
7952 target_node = self.target_node
7953 source_node = self.source_node
7955 # check running on only one node
7956 self.feedback_fn("* checking where the instance actually runs"
7957 " (if this hangs, the hypervisor might be in"
7959 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7960 for node, result in ins_l.items():
7961 result.Raise("Can't contact node %s" % node)
7963 runningon_source = instance.name in ins_l[source_node].payload
7964 runningon_target = instance.name in ins_l[target_node].payload
7966 if runningon_source and runningon_target:
7967 raise errors.OpExecError("Instance seems to be running on two nodes,"
7968 " or the hypervisor is confused; you will have"
7969 " to ensure manually that it runs only on one"
7970 " and restart this operation")
7972 if not (runningon_source or runningon_target):
7973 raise errors.OpExecError("Instance does not seem to be running at all;"
7974 " in this case it's safer to repair by"
7975 " running 'gnt-instance stop' to ensure disk"
7976 " shutdown, and then restarting it")
7978 if runningon_target:
7979 # the migration has actually succeeded, we need to update the config
7980 self.feedback_fn("* instance running on secondary node (%s),"
7981 " updating config" % target_node)
7982 instance.primary_node = target_node
7983 self.cfg.Update(instance, self.feedback_fn)
7984 demoted_node = source_node
7986 self.feedback_fn("* instance confirmed to be running on its"
7987 " primary node (%s)" % source_node)
7988 demoted_node = target_node
7990 if instance.disk_template in constants.DTS_INT_MIRROR:
7991 self._EnsureSecondary(demoted_node)
7993 self._WaitUntilSync()
7994 except errors.OpExecError:
7995 # we ignore here errors, since if the device is standalone, it
7996 # won't be able to sync
7998 self._GoStandalone()
7999 self._GoReconnect(False)
8000 self._WaitUntilSync()
8002 self.feedback_fn("* done")
8004 def _RevertDiskStatus(self):
8005 """Try to revert the disk status after a failed migration.
8008 target_node = self.target_node
8009 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8013 self._EnsureSecondary(target_node)
8014 self._GoStandalone()
8015 self._GoReconnect(False)
8016 self._WaitUntilSync()
8017 except errors.OpExecError, err:
8018 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8019 " please try to recover the instance manually;"
8020 " error '%s'" % str(err))
8022 def _AbortMigration(self):
8023 """Call the hypervisor code to abort a started migration.
8026 instance = self.instance
8027 target_node = self.target_node
8028 source_node = self.source_node
8029 migration_info = self.migration_info
8031 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8035 abort_msg = abort_result.fail_msg
8037 logging.error("Aborting migration failed on target node %s: %s",
8038 target_node, abort_msg)
8039 # Don't raise an exception here, as we stil have to try to revert the
8040 # disk status, even if this step failed.
8042 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8043 instance, False, self.live)
8044 abort_msg = abort_result.fail_msg
8046 logging.error("Aborting migration failed on source node %s: %s",
8047 source_node, abort_msg)
8049 def _ExecMigration(self):
8050 """Migrate an instance.
8052 The migrate is done by:
8053 - change the disks into dual-master mode
8054 - wait until disks are fully synchronized again
8055 - migrate the instance
8056 - change disks on the new secondary node (the old primary) to secondary
8057 - wait until disks are fully synchronized
8058 - change disks into single-master mode
8061 instance = self.instance
8062 target_node = self.target_node
8063 source_node = self.source_node
8065 # Check for hypervisor version mismatch and warn the user.
8066 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8067 None, [self.instance.hypervisor])
8068 for ninfo in nodeinfo.values():
8069 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8071 (_, _, (src_info, )) = nodeinfo[source_node].payload
8072 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8074 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8075 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8076 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8077 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8078 if src_version != dst_version:
8079 self.feedback_fn("* warning: hypervisor version mismatch between"
8080 " source (%s) and target (%s) node" %
8081 (src_version, dst_version))
8083 self.feedback_fn("* checking disk consistency between source and target")
8084 for dev in instance.disks:
8085 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8086 raise errors.OpExecError("Disk %s is degraded or not fully"
8087 " synchronized on target node,"
8088 " aborting migration" % dev.iv_name)
8090 # First get the migration information from the remote node
8091 result = self.rpc.call_migration_info(source_node, instance)
8092 msg = result.fail_msg
8094 log_err = ("Failed fetching source migration information from %s: %s" %
8096 logging.error(log_err)
8097 raise errors.OpExecError(log_err)
8099 self.migration_info = migration_info = result.payload
8101 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8102 # Then switch the disks to master/master mode
8103 self._EnsureSecondary(target_node)
8104 self._GoStandalone()
8105 self._GoReconnect(True)
8106 self._WaitUntilSync()
8108 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8109 result = self.rpc.call_accept_instance(target_node,
8112 self.nodes_ip[target_node])
8114 msg = result.fail_msg
8116 logging.error("Instance pre-migration failed, trying to revert"
8117 " disk status: %s", msg)
8118 self.feedback_fn("Pre-migration failed, aborting")
8119 self._AbortMigration()
8120 self._RevertDiskStatus()
8121 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8122 (instance.name, msg))
8124 self.feedback_fn("* migrating instance to %s" % target_node)
8125 result = self.rpc.call_instance_migrate(source_node, instance,
8126 self.nodes_ip[target_node],
8128 msg = result.fail_msg
8130 logging.error("Instance migration failed, trying to revert"
8131 " disk status: %s", msg)
8132 self.feedback_fn("Migration failed, aborting")
8133 self._AbortMigration()
8134 self._RevertDiskStatus()
8135 raise errors.OpExecError("Could not migrate instance %s: %s" %
8136 (instance.name, msg))
8138 self.feedback_fn("* starting memory transfer")
8139 last_feedback = time.time()
8141 result = self.rpc.call_instance_get_migration_status(source_node,
8143 msg = result.fail_msg
8144 ms = result.payload # MigrationStatus instance
8145 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8146 logging.error("Instance migration failed, trying to revert"
8147 " disk status: %s", msg)
8148 self.feedback_fn("Migration failed, aborting")
8149 self._AbortMigration()
8150 self._RevertDiskStatus()
8151 raise errors.OpExecError("Could not migrate instance %s: %s" %
8152 (instance.name, msg))
8154 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8155 self.feedback_fn("* memory transfer complete")
8158 if (utils.TimeoutExpired(last_feedback,
8159 self._MIGRATION_FEEDBACK_INTERVAL) and
8160 ms.transferred_ram is not None):
8161 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8162 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8163 last_feedback = time.time()
8165 time.sleep(self._MIGRATION_POLL_INTERVAL)
8167 result = self.rpc.call_instance_finalize_migration_src(source_node,
8171 msg = result.fail_msg
8173 logging.error("Instance migration succeeded, but finalization failed"
8174 " on the source node: %s", msg)
8175 raise errors.OpExecError("Could not finalize instance migration: %s" %
8178 instance.primary_node = target_node
8180 # distribute new instance config to the other nodes
8181 self.cfg.Update(instance, self.feedback_fn)
8183 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8187 msg = result.fail_msg
8189 logging.error("Instance migration succeeded, but finalization failed"
8190 " on the target node: %s", msg)
8191 raise errors.OpExecError("Could not finalize instance migration: %s" %
8194 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8195 self._EnsureSecondary(source_node)
8196 self._WaitUntilSync()
8197 self._GoStandalone()
8198 self._GoReconnect(False)
8199 self._WaitUntilSync()
8201 self.feedback_fn("* done")
8203 def _ExecFailover(self):
8204 """Failover an instance.
8206 The failover is done by shutting it down on its present node and
8207 starting it on the secondary.
8210 instance = self.instance
8211 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8213 source_node = instance.primary_node
8214 target_node = self.target_node
8216 if instance.admin_state == constants.ADMINST_UP:
8217 self.feedback_fn("* checking disk consistency between source and target")
8218 for dev in instance.disks:
8219 # for drbd, these are drbd over lvm
8220 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8221 if primary_node.offline:
8222 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8224 (primary_node.name, dev.iv_name, target_node))
8225 elif not self.ignore_consistency:
8226 raise errors.OpExecError("Disk %s is degraded on target node,"
8227 " aborting failover" % dev.iv_name)
8229 self.feedback_fn("* not checking disk consistency as instance is not"
8232 self.feedback_fn("* shutting down instance on source node")
8233 logging.info("Shutting down instance %s on node %s",
8234 instance.name, source_node)
8236 result = self.rpc.call_instance_shutdown(source_node, instance,
8237 self.shutdown_timeout)
8238 msg = result.fail_msg
8240 if self.ignore_consistency or primary_node.offline:
8241 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8242 " proceeding anyway; please make sure node"
8243 " %s is down; error details: %s",
8244 instance.name, source_node, source_node, msg)
8246 raise errors.OpExecError("Could not shutdown instance %s on"
8248 (instance.name, source_node, msg))
8250 self.feedback_fn("* deactivating the instance's disks on source node")
8251 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8252 raise errors.OpExecError("Can't shut down the instance's disks")
8254 instance.primary_node = target_node
8255 # distribute new instance config to the other nodes
8256 self.cfg.Update(instance, self.feedback_fn)
8258 # Only start the instance if it's marked as up
8259 if instance.admin_state == constants.ADMINST_UP:
8260 self.feedback_fn("* activating the instance's disks on target node %s" %
8262 logging.info("Starting instance %s on node %s",
8263 instance.name, target_node)
8265 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8266 ignore_secondaries=True)
8268 _ShutdownInstanceDisks(self.lu, instance)
8269 raise errors.OpExecError("Can't activate the instance's disks")
8271 self.feedback_fn("* starting the instance on the target node %s" %
8273 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8275 msg = result.fail_msg
8277 _ShutdownInstanceDisks(self.lu, instance)
8278 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8279 (instance.name, target_node, msg))
8281 def Exec(self, feedback_fn):
8282 """Perform the migration.
8285 self.feedback_fn = feedback_fn
8286 self.source_node = self.instance.primary_node
8288 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8289 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8290 self.target_node = self.instance.secondary_nodes[0]
8291 # Otherwise self.target_node has been populated either
8292 # directly, or through an iallocator.
8294 self.all_nodes = [self.source_node, self.target_node]
8295 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8296 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8299 feedback_fn("Failover instance %s" % self.instance.name)
8300 self._ExecFailover()
8302 feedback_fn("Migrating instance %s" % self.instance.name)
8305 return self._ExecCleanup()
8307 return self._ExecMigration()
8310 def _CreateBlockDev(lu, node, instance, device, force_create,
8312 """Create a tree of block devices on a given node.
8314 If this device type has to be created on secondaries, create it and
8317 If not, just recurse to children keeping the same 'force' value.
8319 @param lu: the lu on whose behalf we execute
8320 @param node: the node on which to create the device
8321 @type instance: L{objects.Instance}
8322 @param instance: the instance which owns the device
8323 @type device: L{objects.Disk}
8324 @param device: the device to create
8325 @type force_create: boolean
8326 @param force_create: whether to force creation of this device; this
8327 will be change to True whenever we find a device which has
8328 CreateOnSecondary() attribute
8329 @param info: the extra 'metadata' we should attach to the device
8330 (this will be represented as a LVM tag)
8331 @type force_open: boolean
8332 @param force_open: this parameter will be passes to the
8333 L{backend.BlockdevCreate} function where it specifies
8334 whether we run on primary or not, and it affects both
8335 the child assembly and the device own Open() execution
8338 if device.CreateOnSecondary():
8342 for child in device.children:
8343 _CreateBlockDev(lu, node, instance, child, force_create,
8346 if not force_create:
8349 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8352 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8353 """Create a single block device on a given node.
8355 This will not recurse over children of the device, so they must be
8358 @param lu: the lu on whose behalf we execute
8359 @param node: the node on which to create the device
8360 @type instance: L{objects.Instance}
8361 @param instance: the instance which owns the device
8362 @type device: L{objects.Disk}
8363 @param device: the device to create
8364 @param info: the extra 'metadata' we should attach to the device
8365 (this will be represented as a LVM tag)
8366 @type force_open: boolean
8367 @param force_open: this parameter will be passes to the
8368 L{backend.BlockdevCreate} function where it specifies
8369 whether we run on primary or not, and it affects both
8370 the child assembly and the device own Open() execution
8373 lu.cfg.SetDiskID(device, node)
8374 result = lu.rpc.call_blockdev_create(node, device, device.size,
8375 instance.name, force_open, info)
8376 result.Raise("Can't create block device %s on"
8377 " node %s for instance %s" % (device, node, instance.name))
8378 if device.physical_id is None:
8379 device.physical_id = result.payload
8382 def _GenerateUniqueNames(lu, exts):
8383 """Generate a suitable LV name.
8385 This will generate a logical volume name for the given instance.
8390 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8391 results.append("%s%s" % (new_id, val))
8395 def _ComputeLDParams(disk_template, disk_params):
8396 """Computes Logical Disk parameters from Disk Template parameters.
8398 @type disk_template: string
8399 @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8400 @type disk_params: dict
8401 @param disk_params: disk template parameters; dict(template_name -> parameters
8403 @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8404 contains the LD parameters of the node. The tree is flattened in-order.
8407 if disk_template not in constants.DISK_TEMPLATES:
8408 raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8411 dt_params = disk_params[disk_template]
8412 if disk_template == constants.DT_DRBD8:
8414 constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8415 constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8416 constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8417 constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8418 constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8419 constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8420 constants.LDP_DYNAMIC_RESYNC: dt_params[constants.DRBD_DYNAMIC_RESYNC],
8421 constants.LDP_PLAN_AHEAD: dt_params[constants.DRBD_PLAN_AHEAD],
8422 constants.LDP_FILL_TARGET: dt_params[constants.DRBD_FILL_TARGET],
8423 constants.LDP_DELAY_TARGET: dt_params[constants.DRBD_DELAY_TARGET],
8424 constants.LDP_MAX_RATE: dt_params[constants.DRBD_MAX_RATE],
8425 constants.LDP_MIN_RATE: dt_params[constants.DRBD_MIN_RATE],
8429 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8432 result.append(drbd_params)
8436 constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8439 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8441 result.append(data_params)
8445 constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8448 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8450 result.append(meta_params)
8452 elif (disk_template == constants.DT_FILE or
8453 disk_template == constants.DT_SHARED_FILE):
8454 result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8456 elif disk_template == constants.DT_PLAIN:
8458 constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8461 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8463 result.append(params)
8465 elif disk_template == constants.DT_BLOCK:
8466 result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8471 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8472 iv_name, p_minor, s_minor, drbd_params, data_params,
8474 """Generate a drbd8 device complete with its children.
8477 assert len(vgnames) == len(names) == 2
8478 port = lu.cfg.AllocatePort()
8479 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8481 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8482 logical_id=(vgnames[0], names[0]),
8484 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8485 logical_id=(vgnames[1], names[1]),
8487 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8488 logical_id=(primary, secondary, port,
8491 children=[dev_data, dev_meta],
8492 iv_name=iv_name, params=drbd_params)
8496 def _GenerateDiskTemplate(lu, template_name,
8497 instance_name, primary_node,
8498 secondary_nodes, disk_info,
8499 file_storage_dir, file_driver,
8500 base_index, feedback_fn, disk_params):
8501 """Generate the entire disk layout for a given template type.
8504 #TODO: compute space requirements
8506 vgname = lu.cfg.GetVGName()
8507 disk_count = len(disk_info)
8509 ld_params = _ComputeLDParams(template_name, disk_params)
8510 if template_name == constants.DT_DISKLESS:
8512 elif template_name == constants.DT_PLAIN:
8513 if len(secondary_nodes) != 0:
8514 raise errors.ProgrammerError("Wrong template configuration")
8516 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8517 for i in range(disk_count)])
8518 for idx, disk in enumerate(disk_info):
8519 disk_index = idx + base_index
8520 vg = disk.get(constants.IDISK_VG, vgname)
8521 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8522 disk_dev = objects.Disk(dev_type=constants.LD_LV,
8523 size=disk[constants.IDISK_SIZE],
8524 logical_id=(vg, names[idx]),
8525 iv_name="disk/%d" % disk_index,
8526 mode=disk[constants.IDISK_MODE],
8527 params=ld_params[0])
8528 disks.append(disk_dev)
8529 elif template_name == constants.DT_DRBD8:
8530 drbd_params, data_params, meta_params = ld_params
8531 if len(secondary_nodes) != 1:
8532 raise errors.ProgrammerError("Wrong template configuration")
8533 remote_node = secondary_nodes[0]
8534 minors = lu.cfg.AllocateDRBDMinor(
8535 [primary_node, remote_node] * len(disk_info), instance_name)
8538 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8539 for i in range(disk_count)]):
8540 names.append(lv_prefix + "_data")
8541 names.append(lv_prefix + "_meta")
8542 for idx, disk in enumerate(disk_info):
8543 disk_index = idx + base_index
8544 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8545 data_vg = disk.get(constants.IDISK_VG, vgname)
8546 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8547 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8548 disk[constants.IDISK_SIZE],
8550 names[idx * 2:idx * 2 + 2],
8551 "disk/%d" % disk_index,
8552 minors[idx * 2], minors[idx * 2 + 1],
8553 drbd_params, data_params, meta_params)
8554 disk_dev.mode = disk[constants.IDISK_MODE]
8555 disks.append(disk_dev)
8556 elif template_name == constants.DT_FILE:
8557 if len(secondary_nodes) != 0:
8558 raise errors.ProgrammerError("Wrong template configuration")
8560 opcodes.RequireFileStorage()
8562 for idx, disk in enumerate(disk_info):
8563 disk_index = idx + base_index
8564 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8565 size=disk[constants.IDISK_SIZE],
8566 iv_name="disk/%d" % disk_index,
8567 logical_id=(file_driver,
8568 "%s/disk%d" % (file_storage_dir,
8570 mode=disk[constants.IDISK_MODE],
8571 params=ld_params[0])
8572 disks.append(disk_dev)
8573 elif template_name == constants.DT_SHARED_FILE:
8574 if len(secondary_nodes) != 0:
8575 raise errors.ProgrammerError("Wrong template configuration")
8577 opcodes.RequireSharedFileStorage()
8579 for idx, disk in enumerate(disk_info):
8580 disk_index = idx + base_index
8581 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8582 size=disk[constants.IDISK_SIZE],
8583 iv_name="disk/%d" % disk_index,
8584 logical_id=(file_driver,
8585 "%s/disk%d" % (file_storage_dir,
8587 mode=disk[constants.IDISK_MODE],
8588 params=ld_params[0])
8589 disks.append(disk_dev)
8590 elif template_name == constants.DT_BLOCK:
8591 if len(secondary_nodes) != 0:
8592 raise errors.ProgrammerError("Wrong template configuration")
8594 for idx, disk in enumerate(disk_info):
8595 disk_index = idx + base_index
8596 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8597 size=disk[constants.IDISK_SIZE],
8598 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8599 disk[constants.IDISK_ADOPT]),
8600 iv_name="disk/%d" % disk_index,
8601 mode=disk[constants.IDISK_MODE],
8602 params=ld_params[0])
8603 disks.append(disk_dev)
8606 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8610 def _GetInstanceInfoText(instance):
8611 """Compute that text that should be added to the disk's metadata.
8614 return "originstname+%s" % instance.name
8617 def _CalcEta(time_taken, written, total_size):
8618 """Calculates the ETA based on size written and total size.
8620 @param time_taken: The time taken so far
8621 @param written: amount written so far
8622 @param total_size: The total size of data to be written
8623 @return: The remaining time in seconds
8626 avg_time = time_taken / float(written)
8627 return (total_size - written) * avg_time
8630 def _WipeDisks(lu, instance):
8631 """Wipes instance disks.
8633 @type lu: L{LogicalUnit}
8634 @param lu: the logical unit on whose behalf we execute
8635 @type instance: L{objects.Instance}
8636 @param instance: the instance whose disks we should create
8637 @return: the success of the wipe
8640 node = instance.primary_node
8642 for device in instance.disks:
8643 lu.cfg.SetDiskID(device, node)
8645 logging.info("Pause sync of instance %s disks", instance.name)
8646 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8648 for idx, success in enumerate(result.payload):
8650 logging.warn("pause-sync of instance %s for disks %d failed",
8654 for idx, device in enumerate(instance.disks):
8655 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8656 # MAX_WIPE_CHUNK at max
8657 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8658 constants.MIN_WIPE_CHUNK_PERCENT)
8659 # we _must_ make this an int, otherwise rounding errors will
8661 wipe_chunk_size = int(wipe_chunk_size)
8663 lu.LogInfo("* Wiping disk %d", idx)
8664 logging.info("Wiping disk %d for instance %s, node %s using"
8665 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8670 start_time = time.time()
8672 while offset < size:
8673 wipe_size = min(wipe_chunk_size, size - offset)
8674 logging.debug("Wiping disk %d, offset %s, chunk %s",
8675 idx, offset, wipe_size)
8676 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8677 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8678 (idx, offset, wipe_size))
8681 if now - last_output >= 60:
8682 eta = _CalcEta(now - start_time, offset, size)
8683 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8684 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8687 logging.info("Resume sync of instance %s disks", instance.name)
8689 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8691 for idx, success in enumerate(result.payload):
8693 lu.LogWarning("Resume sync of disk %d failed, please have a"
8694 " look at the status and troubleshoot the issue", idx)
8695 logging.warn("resume-sync of instance %s for disks %d failed",
8699 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8700 """Create all disks for an instance.
8702 This abstracts away some work from AddInstance.
8704 @type lu: L{LogicalUnit}
8705 @param lu: the logical unit on whose behalf we execute
8706 @type instance: L{objects.Instance}
8707 @param instance: the instance whose disks we should create
8709 @param to_skip: list of indices to skip
8710 @type target_node: string
8711 @param target_node: if passed, overrides the target node for creation
8713 @return: the success of the creation
8716 info = _GetInstanceInfoText(instance)
8717 if target_node is None:
8718 pnode = instance.primary_node
8719 all_nodes = instance.all_nodes
8724 if instance.disk_template in constants.DTS_FILEBASED:
8725 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8726 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8728 result.Raise("Failed to create directory '%s' on"
8729 " node %s" % (file_storage_dir, pnode))
8731 # Note: this needs to be kept in sync with adding of disks in
8732 # LUInstanceSetParams
8733 for idx, device in enumerate(instance.disks):
8734 if to_skip and idx in to_skip:
8736 logging.info("Creating volume %s for instance %s",
8737 device.iv_name, instance.name)
8739 for node in all_nodes:
8740 f_create = node == pnode
8741 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8744 def _RemoveDisks(lu, instance, target_node=None):
8745 """Remove all disks for an instance.
8747 This abstracts away some work from `AddInstance()` and
8748 `RemoveInstance()`. Note that in case some of the devices couldn't
8749 be removed, the removal will continue with the other ones (compare
8750 with `_CreateDisks()`).
8752 @type lu: L{LogicalUnit}
8753 @param lu: the logical unit on whose behalf we execute
8754 @type instance: L{objects.Instance}
8755 @param instance: the instance whose disks we should remove
8756 @type target_node: string
8757 @param target_node: used to override the node on which to remove the disks
8759 @return: the success of the removal
8762 logging.info("Removing block devices for instance %s", instance.name)
8765 for device in instance.disks:
8767 edata = [(target_node, device)]
8769 edata = device.ComputeNodeTree(instance.primary_node)
8770 for node, disk in edata:
8771 lu.cfg.SetDiskID(disk, node)
8772 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8774 lu.LogWarning("Could not remove block device %s on node %s,"
8775 " continuing anyway: %s", device.iv_name, node, msg)
8778 # if this is a DRBD disk, return its port to the pool
8779 if device.dev_type in constants.LDS_DRBD:
8780 tcp_port = device.logical_id[2]
8781 lu.cfg.AddTcpUdpPort(tcp_port)
8783 if instance.disk_template == constants.DT_FILE:
8784 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8788 tgt = instance.primary_node
8789 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8791 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8792 file_storage_dir, instance.primary_node, result.fail_msg)
8798 def _ComputeDiskSizePerVG(disk_template, disks):
8799 """Compute disk size requirements in the volume group
8802 def _compute(disks, payload):
8803 """Universal algorithm.
8808 vgs[disk[constants.IDISK_VG]] = \
8809 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8813 # Required free disk space as a function of disk and swap space
8815 constants.DT_DISKLESS: {},
8816 constants.DT_PLAIN: _compute(disks, 0),
8817 # 128 MB are added for drbd metadata for each disk
8818 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8819 constants.DT_FILE: {},
8820 constants.DT_SHARED_FILE: {},
8823 if disk_template not in req_size_dict:
8824 raise errors.ProgrammerError("Disk template '%s' size requirement"
8825 " is unknown" % disk_template)
8827 return req_size_dict[disk_template]
8830 def _ComputeDiskSize(disk_template, disks):
8831 """Compute disk size requirements in the volume group
8834 # Required free disk space as a function of disk and swap space
8836 constants.DT_DISKLESS: None,
8837 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8838 # 128 MB are added for drbd metadata for each disk
8840 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8841 constants.DT_FILE: None,
8842 constants.DT_SHARED_FILE: 0,
8843 constants.DT_BLOCK: 0,
8846 if disk_template not in req_size_dict:
8847 raise errors.ProgrammerError("Disk template '%s' size requirement"
8848 " is unknown" % disk_template)
8850 return req_size_dict[disk_template]
8853 def _FilterVmNodes(lu, nodenames):
8854 """Filters out non-vm_capable nodes from a list.
8856 @type lu: L{LogicalUnit}
8857 @param lu: the logical unit for which we check
8858 @type nodenames: list
8859 @param nodenames: the list of nodes on which we should check
8861 @return: the list of vm-capable nodes
8864 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8865 return [name for name in nodenames if name not in vm_nodes]
8868 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8869 """Hypervisor parameter validation.
8871 This function abstract the hypervisor parameter validation to be
8872 used in both instance create and instance modify.
8874 @type lu: L{LogicalUnit}
8875 @param lu: the logical unit for which we check
8876 @type nodenames: list
8877 @param nodenames: the list of nodes on which we should check
8878 @type hvname: string
8879 @param hvname: the name of the hypervisor we should use
8880 @type hvparams: dict
8881 @param hvparams: the parameters which we need to check
8882 @raise errors.OpPrereqError: if the parameters are not valid
8885 nodenames = _FilterVmNodes(lu, nodenames)
8887 cluster = lu.cfg.GetClusterInfo()
8888 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8890 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8891 for node in nodenames:
8895 info.Raise("Hypervisor parameter validation failed on node %s" % node)
8898 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8899 """OS parameters validation.
8901 @type lu: L{LogicalUnit}
8902 @param lu: the logical unit for which we check
8903 @type required: boolean
8904 @param required: whether the validation should fail if the OS is not
8906 @type nodenames: list
8907 @param nodenames: the list of nodes on which we should check
8908 @type osname: string
8909 @param osname: the name of the hypervisor we should use
8910 @type osparams: dict
8911 @param osparams: the parameters which we need to check
8912 @raise errors.OpPrereqError: if the parameters are not valid
8915 nodenames = _FilterVmNodes(lu, nodenames)
8916 result = lu.rpc.call_os_validate(nodenames, required, osname,
8917 [constants.OS_VALIDATE_PARAMETERS],
8919 for node, nres in result.items():
8920 # we don't check for offline cases since this should be run only
8921 # against the master node and/or an instance's nodes
8922 nres.Raise("OS Parameters validation failed on node %s" % node)
8923 if not nres.payload:
8924 lu.LogInfo("OS %s not found on node %s, validation skipped",
8928 class LUInstanceCreate(LogicalUnit):
8929 """Create an instance.
8932 HPATH = "instance-add"
8933 HTYPE = constants.HTYPE_INSTANCE
8936 def CheckArguments(self):
8940 # do not require name_check to ease forward/backward compatibility
8942 if self.op.no_install and self.op.start:
8943 self.LogInfo("No-installation mode selected, disabling startup")
8944 self.op.start = False
8945 # validate/normalize the instance name
8946 self.op.instance_name = \
8947 netutils.Hostname.GetNormalizedName(self.op.instance_name)
8949 if self.op.ip_check and not self.op.name_check:
8950 # TODO: make the ip check more flexible and not depend on the name check
8951 raise errors.OpPrereqError("Cannot do IP address check without a name"
8952 " check", errors.ECODE_INVAL)
8954 # check nics' parameter names
8955 for nic in self.op.nics:
8956 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8958 # check disks. parameter names and consistent adopt/no-adopt strategy
8959 has_adopt = has_no_adopt = False
8960 for disk in self.op.disks:
8961 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8962 if constants.IDISK_ADOPT in disk:
8966 if has_adopt and has_no_adopt:
8967 raise errors.OpPrereqError("Either all disks are adopted or none is",
8970 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8971 raise errors.OpPrereqError("Disk adoption is not supported for the"
8972 " '%s' disk template" %
8973 self.op.disk_template,
8975 if self.op.iallocator is not None:
8976 raise errors.OpPrereqError("Disk adoption not allowed with an"
8977 " iallocator script", errors.ECODE_INVAL)
8978 if self.op.mode == constants.INSTANCE_IMPORT:
8979 raise errors.OpPrereqError("Disk adoption not allowed for"
8980 " instance import", errors.ECODE_INVAL)
8982 if self.op.disk_template in constants.DTS_MUST_ADOPT:
8983 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8984 " but no 'adopt' parameter given" %
8985 self.op.disk_template,
8988 self.adopt_disks = has_adopt
8990 # instance name verification
8991 if self.op.name_check:
8992 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8993 self.op.instance_name = self.hostname1.name
8994 # used in CheckPrereq for ip ping check
8995 self.check_ip = self.hostname1.ip
8997 self.check_ip = None
8999 # file storage checks
9000 if (self.op.file_driver and
9001 not self.op.file_driver in constants.FILE_DRIVER):
9002 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9003 self.op.file_driver, errors.ECODE_INVAL)
9005 if self.op.disk_template == constants.DT_FILE:
9006 opcodes.RequireFileStorage()
9007 elif self.op.disk_template == constants.DT_SHARED_FILE:
9008 opcodes.RequireSharedFileStorage()
9010 ### Node/iallocator related checks
9011 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9013 if self.op.pnode is not None:
9014 if self.op.disk_template in constants.DTS_INT_MIRROR:
9015 if self.op.snode is None:
9016 raise errors.OpPrereqError("The networked disk templates need"
9017 " a mirror node", errors.ECODE_INVAL)
9019 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9021 self.op.snode = None
9023 self._cds = _GetClusterDomainSecret()
9025 if self.op.mode == constants.INSTANCE_IMPORT:
9026 # On import force_variant must be True, because if we forced it at
9027 # initial install, our only chance when importing it back is that it
9029 self.op.force_variant = True
9031 if self.op.no_install:
9032 self.LogInfo("No-installation mode has no effect during import")
9034 elif self.op.mode == constants.INSTANCE_CREATE:
9035 if self.op.os_type is None:
9036 raise errors.OpPrereqError("No guest OS specified",
9038 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9039 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9040 " installation" % self.op.os_type,
9042 if self.op.disk_template is None:
9043 raise errors.OpPrereqError("No disk template specified",
9046 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9047 # Check handshake to ensure both clusters have the same domain secret
9048 src_handshake = self.op.source_handshake
9049 if not src_handshake:
9050 raise errors.OpPrereqError("Missing source handshake",
9053 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9056 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9059 # Load and check source CA
9060 self.source_x509_ca_pem = self.op.source_x509_ca
9061 if not self.source_x509_ca_pem:
9062 raise errors.OpPrereqError("Missing source X509 CA",
9066 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9068 except OpenSSL.crypto.Error, err:
9069 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9070 (err, ), errors.ECODE_INVAL)
9072 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9073 if errcode is not None:
9074 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9077 self.source_x509_ca = cert
9079 src_instance_name = self.op.source_instance_name
9080 if not src_instance_name:
9081 raise errors.OpPrereqError("Missing source instance name",
9084 self.source_instance_name = \
9085 netutils.GetHostname(name=src_instance_name).name
9088 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9089 self.op.mode, errors.ECODE_INVAL)
9091 def ExpandNames(self):
9092 """ExpandNames for CreateInstance.
9094 Figure out the right locks for instance creation.
9097 self.needed_locks = {}
9099 instance_name = self.op.instance_name
9100 # this is just a preventive check, but someone might still add this
9101 # instance in the meantime, and creation will fail at lock-add time
9102 if instance_name in self.cfg.GetInstanceList():
9103 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9104 instance_name, errors.ECODE_EXISTS)
9106 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9108 if self.op.iallocator:
9109 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9110 # specifying a group on instance creation and then selecting nodes from
9112 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9113 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9115 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9116 nodelist = [self.op.pnode]
9117 if self.op.snode is not None:
9118 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9119 nodelist.append(self.op.snode)
9120 self.needed_locks[locking.LEVEL_NODE] = nodelist
9121 # Lock resources of instance's primary and secondary nodes (copy to
9122 # prevent accidential modification)
9123 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9125 # in case of import lock the source node too
9126 if self.op.mode == constants.INSTANCE_IMPORT:
9127 src_node = self.op.src_node
9128 src_path = self.op.src_path
9130 if src_path is None:
9131 self.op.src_path = src_path = self.op.instance_name
9133 if src_node is None:
9134 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9135 self.op.src_node = None
9136 if os.path.isabs(src_path):
9137 raise errors.OpPrereqError("Importing an instance from a path"
9138 " requires a source node option",
9141 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9142 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9143 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9144 if not os.path.isabs(src_path):
9145 self.op.src_path = src_path = \
9146 utils.PathJoin(constants.EXPORT_DIR, src_path)
9148 def _RunAllocator(self):
9149 """Run the allocator based on input opcode.
9152 nics = [n.ToDict() for n in self.nics]
9153 ial = IAllocator(self.cfg, self.rpc,
9154 mode=constants.IALLOCATOR_MODE_ALLOC,
9155 name=self.op.instance_name,
9156 disk_template=self.op.disk_template,
9159 vcpus=self.be_full[constants.BE_VCPUS],
9160 memory=self.be_full[constants.BE_MAXMEM],
9163 hypervisor=self.op.hypervisor,
9166 ial.Run(self.op.iallocator)
9169 raise errors.OpPrereqError("Can't compute nodes using"
9170 " iallocator '%s': %s" %
9171 (self.op.iallocator, ial.info),
9173 if len(ial.result) != ial.required_nodes:
9174 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9175 " of nodes (%s), required %s" %
9176 (self.op.iallocator, len(ial.result),
9177 ial.required_nodes), errors.ECODE_FAULT)
9178 self.op.pnode = ial.result[0]
9179 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9180 self.op.instance_name, self.op.iallocator,
9181 utils.CommaJoin(ial.result))
9182 if ial.required_nodes == 2:
9183 self.op.snode = ial.result[1]
9185 def BuildHooksEnv(self):
9188 This runs on master, primary and secondary nodes of the instance.
9192 "ADD_MODE": self.op.mode,
9194 if self.op.mode == constants.INSTANCE_IMPORT:
9195 env["SRC_NODE"] = self.op.src_node
9196 env["SRC_PATH"] = self.op.src_path
9197 env["SRC_IMAGES"] = self.src_images
9199 env.update(_BuildInstanceHookEnv(
9200 name=self.op.instance_name,
9201 primary_node=self.op.pnode,
9202 secondary_nodes=self.secondaries,
9203 status=self.op.start,
9204 os_type=self.op.os_type,
9205 minmem=self.be_full[constants.BE_MINMEM],
9206 maxmem=self.be_full[constants.BE_MAXMEM],
9207 vcpus=self.be_full[constants.BE_VCPUS],
9208 nics=_NICListToTuple(self, self.nics),
9209 disk_template=self.op.disk_template,
9210 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9211 for d in self.disks],
9214 hypervisor_name=self.op.hypervisor,
9220 def BuildHooksNodes(self):
9221 """Build hooks nodes.
9224 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9227 def _ReadExportInfo(self):
9228 """Reads the export information from disk.
9230 It will override the opcode source node and path with the actual
9231 information, if these two were not specified before.
9233 @return: the export information
9236 assert self.op.mode == constants.INSTANCE_IMPORT
9238 src_node = self.op.src_node
9239 src_path = self.op.src_path
9241 if src_node is None:
9242 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9243 exp_list = self.rpc.call_export_list(locked_nodes)
9245 for node in exp_list:
9246 if exp_list[node].fail_msg:
9248 if src_path in exp_list[node].payload:
9250 self.op.src_node = src_node = node
9251 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9255 raise errors.OpPrereqError("No export found for relative path %s" %
9256 src_path, errors.ECODE_INVAL)
9258 _CheckNodeOnline(self, src_node)
9259 result = self.rpc.call_export_info(src_node, src_path)
9260 result.Raise("No export or invalid export found in dir %s" % src_path)
9262 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9263 if not export_info.has_section(constants.INISECT_EXP):
9264 raise errors.ProgrammerError("Corrupted export config",
9265 errors.ECODE_ENVIRON)
9267 ei_version = export_info.get(constants.INISECT_EXP, "version")
9268 if (int(ei_version) != constants.EXPORT_VERSION):
9269 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9270 (ei_version, constants.EXPORT_VERSION),
9271 errors.ECODE_ENVIRON)
9274 def _ReadExportParams(self, einfo):
9275 """Use export parameters as defaults.
9277 In case the opcode doesn't specify (as in override) some instance
9278 parameters, then try to use them from the export information, if
9282 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9284 if self.op.disk_template is None:
9285 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9286 self.op.disk_template = einfo.get(constants.INISECT_INS,
9288 if self.op.disk_template not in constants.DISK_TEMPLATES:
9289 raise errors.OpPrereqError("Disk template specified in configuration"
9290 " file is not one of the allowed values:"
9291 " %s" % " ".join(constants.DISK_TEMPLATES))
9293 raise errors.OpPrereqError("No disk template specified and the export"
9294 " is missing the disk_template information",
9297 if not self.op.disks:
9299 # TODO: import the disk iv_name too
9300 for idx in range(constants.MAX_DISKS):
9301 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9302 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9303 disks.append({constants.IDISK_SIZE: disk_sz})
9304 self.op.disks = disks
9305 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9306 raise errors.OpPrereqError("No disk info specified and the export"
9307 " is missing the disk information",
9310 if not self.op.nics:
9312 for idx in range(constants.MAX_NICS):
9313 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9315 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9316 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9323 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9324 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9326 if (self.op.hypervisor is None and
9327 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9328 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9330 if einfo.has_section(constants.INISECT_HYP):
9331 # use the export parameters but do not override the ones
9332 # specified by the user
9333 for name, value in einfo.items(constants.INISECT_HYP):
9334 if name not in self.op.hvparams:
9335 self.op.hvparams[name] = value
9337 if einfo.has_section(constants.INISECT_BEP):
9338 # use the parameters, without overriding
9339 for name, value in einfo.items(constants.INISECT_BEP):
9340 if name not in self.op.beparams:
9341 self.op.beparams[name] = value
9342 # Compatibility for the old "memory" be param
9343 if name == constants.BE_MEMORY:
9344 if constants.BE_MAXMEM not in self.op.beparams:
9345 self.op.beparams[constants.BE_MAXMEM] = value
9346 if constants.BE_MINMEM not in self.op.beparams:
9347 self.op.beparams[constants.BE_MINMEM] = value
9349 # try to read the parameters old style, from the main section
9350 for name in constants.BES_PARAMETERS:
9351 if (name not in self.op.beparams and
9352 einfo.has_option(constants.INISECT_INS, name)):
9353 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9355 if einfo.has_section(constants.INISECT_OSP):
9356 # use the parameters, without overriding
9357 for name, value in einfo.items(constants.INISECT_OSP):
9358 if name not in self.op.osparams:
9359 self.op.osparams[name] = value
9361 def _RevertToDefaults(self, cluster):
9362 """Revert the instance parameters to the default values.
9366 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9367 for name in self.op.hvparams.keys():
9368 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9369 del self.op.hvparams[name]
9371 be_defs = cluster.SimpleFillBE({})
9372 for name in self.op.beparams.keys():
9373 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9374 del self.op.beparams[name]
9376 nic_defs = cluster.SimpleFillNIC({})
9377 for nic in self.op.nics:
9378 for name in constants.NICS_PARAMETERS:
9379 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9382 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9383 for name in self.op.osparams.keys():
9384 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9385 del self.op.osparams[name]
9387 def _CalculateFileStorageDir(self):
9388 """Calculate final instance file storage dir.
9391 # file storage dir calculation/check
9392 self.instance_file_storage_dir = None
9393 if self.op.disk_template in constants.DTS_FILEBASED:
9394 # build the full file storage dir path
9397 if self.op.disk_template == constants.DT_SHARED_FILE:
9398 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9400 get_fsd_fn = self.cfg.GetFileStorageDir
9402 cfg_storagedir = get_fsd_fn()
9403 if not cfg_storagedir:
9404 raise errors.OpPrereqError("Cluster file storage dir not defined")
9405 joinargs.append(cfg_storagedir)
9407 if self.op.file_storage_dir is not None:
9408 joinargs.append(self.op.file_storage_dir)
9410 joinargs.append(self.op.instance_name)
9412 # pylint: disable=W0142
9413 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9415 def CheckPrereq(self): # pylint: disable=R0914
9416 """Check prerequisites.
9419 self._CalculateFileStorageDir()
9421 if self.op.mode == constants.INSTANCE_IMPORT:
9422 export_info = self._ReadExportInfo()
9423 self._ReadExportParams(export_info)
9425 if (not self.cfg.GetVGName() and
9426 self.op.disk_template not in constants.DTS_NOT_LVM):
9427 raise errors.OpPrereqError("Cluster does not support lvm-based"
9428 " instances", errors.ECODE_STATE)
9430 if (self.op.hypervisor is None or
9431 self.op.hypervisor == constants.VALUE_AUTO):
9432 self.op.hypervisor = self.cfg.GetHypervisorType()
9434 cluster = self.cfg.GetClusterInfo()
9435 enabled_hvs = cluster.enabled_hypervisors
9436 if self.op.hypervisor not in enabled_hvs:
9437 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9438 " cluster (%s)" % (self.op.hypervisor,
9439 ",".join(enabled_hvs)),
9442 # Check tag validity
9443 for tag in self.op.tags:
9444 objects.TaggableObject.ValidateTag(tag)
9446 # check hypervisor parameter syntax (locally)
9447 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9448 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9450 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9451 hv_type.CheckParameterSyntax(filled_hvp)
9452 self.hv_full = filled_hvp
9453 # check that we don't specify global parameters on an instance
9454 _CheckGlobalHvParams(self.op.hvparams)
9456 # fill and remember the beparams dict
9457 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9458 for param, value in self.op.beparams.iteritems():
9459 if value == constants.VALUE_AUTO:
9460 self.op.beparams[param] = default_beparams[param]
9461 objects.UpgradeBeParams(self.op.beparams)
9462 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9463 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9465 # build os parameters
9466 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9468 # now that hvp/bep are in final format, let's reset to defaults,
9470 if self.op.identify_defaults:
9471 self._RevertToDefaults(cluster)
9475 for idx, nic in enumerate(self.op.nics):
9476 nic_mode_req = nic.get(constants.INIC_MODE, None)
9477 nic_mode = nic_mode_req
9478 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9479 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9481 # in routed mode, for the first nic, the default ip is 'auto'
9482 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9483 default_ip_mode = constants.VALUE_AUTO
9485 default_ip_mode = constants.VALUE_NONE
9487 # ip validity checks
9488 ip = nic.get(constants.INIC_IP, default_ip_mode)
9489 if ip is None or ip.lower() == constants.VALUE_NONE:
9491 elif ip.lower() == constants.VALUE_AUTO:
9492 if not self.op.name_check:
9493 raise errors.OpPrereqError("IP address set to auto but name checks"
9494 " have been skipped",
9496 nic_ip = self.hostname1.ip
9498 if not netutils.IPAddress.IsValid(ip):
9499 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9503 # TODO: check the ip address for uniqueness
9504 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9505 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9508 # MAC address verification
9509 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9510 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9511 mac = utils.NormalizeAndValidateMac(mac)
9514 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9515 except errors.ReservationError:
9516 raise errors.OpPrereqError("MAC address %s already in use"
9517 " in cluster" % mac,
9518 errors.ECODE_NOTUNIQUE)
9520 # Build nic parameters
9521 link = nic.get(constants.INIC_LINK, None)
9522 if link == constants.VALUE_AUTO:
9523 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9526 nicparams[constants.NIC_MODE] = nic_mode
9528 nicparams[constants.NIC_LINK] = link
9530 check_params = cluster.SimpleFillNIC(nicparams)
9531 objects.NIC.CheckParameterSyntax(check_params)
9532 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9534 # disk checks/pre-build
9535 default_vg = self.cfg.GetVGName()
9537 for disk in self.op.disks:
9538 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9539 if mode not in constants.DISK_ACCESS_SET:
9540 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9541 mode, errors.ECODE_INVAL)
9542 size = disk.get(constants.IDISK_SIZE, None)
9544 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9547 except (TypeError, ValueError):
9548 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9551 data_vg = disk.get(constants.IDISK_VG, default_vg)
9553 constants.IDISK_SIZE: size,
9554 constants.IDISK_MODE: mode,
9555 constants.IDISK_VG: data_vg,
9557 if constants.IDISK_METAVG in disk:
9558 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9559 if constants.IDISK_ADOPT in disk:
9560 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9561 self.disks.append(new_disk)
9563 if self.op.mode == constants.INSTANCE_IMPORT:
9565 for idx in range(len(self.disks)):
9566 option = "disk%d_dump" % idx
9567 if export_info.has_option(constants.INISECT_INS, option):
9568 # FIXME: are the old os-es, disk sizes, etc. useful?
9569 export_name = export_info.get(constants.INISECT_INS, option)
9570 image = utils.PathJoin(self.op.src_path, export_name)
9571 disk_images.append(image)
9573 disk_images.append(False)
9575 self.src_images = disk_images
9577 old_name = export_info.get(constants.INISECT_INS, "name")
9578 if self.op.instance_name == old_name:
9579 for idx, nic in enumerate(self.nics):
9580 if nic.mac == constants.VALUE_AUTO:
9581 nic_mac_ini = "nic%d_mac" % idx
9582 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9584 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9586 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9587 if self.op.ip_check:
9588 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9589 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9590 (self.check_ip, self.op.instance_name),
9591 errors.ECODE_NOTUNIQUE)
9593 #### mac address generation
9594 # By generating here the mac address both the allocator and the hooks get
9595 # the real final mac address rather than the 'auto' or 'generate' value.
9596 # There is a race condition between the generation and the instance object
9597 # creation, which means that we know the mac is valid now, but we're not
9598 # sure it will be when we actually add the instance. If things go bad
9599 # adding the instance will abort because of a duplicate mac, and the
9600 # creation job will fail.
9601 for nic in self.nics:
9602 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9603 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9607 if self.op.iallocator is not None:
9608 self._RunAllocator()
9610 # Release all unneeded node locks
9611 _ReleaseLocks(self, locking.LEVEL_NODE,
9612 keep=filter(None, [self.op.pnode, self.op.snode,
9614 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9615 keep=filter(None, [self.op.pnode, self.op.snode,
9618 #### node related checks
9620 # check primary node
9621 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9622 assert self.pnode is not None, \
9623 "Cannot retrieve locked node %s" % self.op.pnode
9625 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9626 pnode.name, errors.ECODE_STATE)
9628 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9629 pnode.name, errors.ECODE_STATE)
9630 if not pnode.vm_capable:
9631 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9632 " '%s'" % pnode.name, errors.ECODE_STATE)
9634 self.secondaries = []
9636 # mirror node verification
9637 if self.op.disk_template in constants.DTS_INT_MIRROR:
9638 if self.op.snode == pnode.name:
9639 raise errors.OpPrereqError("The secondary node cannot be the"
9640 " primary node", errors.ECODE_INVAL)
9641 _CheckNodeOnline(self, self.op.snode)
9642 _CheckNodeNotDrained(self, self.op.snode)
9643 _CheckNodeVmCapable(self, self.op.snode)
9644 self.secondaries.append(self.op.snode)
9646 snode = self.cfg.GetNodeInfo(self.op.snode)
9647 if pnode.group != snode.group:
9648 self.LogWarning("The primary and secondary nodes are in two"
9649 " different node groups; the disk parameters"
9650 " from the first disk's node group will be"
9653 nodenames = [pnode.name] + self.secondaries
9655 # Verify instance specs
9657 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9658 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9659 constants.ISPEC_DISK_COUNT: len(self.disks),
9660 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9661 constants.ISPEC_NIC_COUNT: len(self.nics),
9664 group_info = self.cfg.GetNodeGroup(pnode.group)
9665 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9666 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9667 if not self.op.ignore_ipolicy and res:
9668 raise errors.OpPrereqError(("Instance allocation to group %s violates"
9669 " policy: %s") % (pnode.group,
9670 utils.CommaJoin(res)),
9673 # disk parameters (not customizable at instance or node level)
9674 # just use the primary node parameters, ignoring the secondary.
9675 self.diskparams = group_info.diskparams
9677 if not self.adopt_disks:
9678 # Check lv size requirements, if not adopting
9679 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9680 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9682 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9683 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9684 disk[constants.IDISK_ADOPT])
9685 for disk in self.disks])
9686 if len(all_lvs) != len(self.disks):
9687 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9689 for lv_name in all_lvs:
9691 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9692 # to ReserveLV uses the same syntax
9693 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9694 except errors.ReservationError:
9695 raise errors.OpPrereqError("LV named %s used by another instance" %
9696 lv_name, errors.ECODE_NOTUNIQUE)
9698 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9699 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9701 node_lvs = self.rpc.call_lv_list([pnode.name],
9702 vg_names.payload.keys())[pnode.name]
9703 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9704 node_lvs = node_lvs.payload
9706 delta = all_lvs.difference(node_lvs.keys())
9708 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9709 utils.CommaJoin(delta),
9711 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9713 raise errors.OpPrereqError("Online logical volumes found, cannot"
9714 " adopt: %s" % utils.CommaJoin(online_lvs),
9716 # update the size of disk based on what is found
9717 for dsk in self.disks:
9718 dsk[constants.IDISK_SIZE] = \
9719 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9720 dsk[constants.IDISK_ADOPT])][0]))
9722 elif self.op.disk_template == constants.DT_BLOCK:
9723 # Normalize and de-duplicate device paths
9724 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9725 for disk in self.disks])
9726 if len(all_disks) != len(self.disks):
9727 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9729 baddisks = [d for d in all_disks
9730 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9732 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9733 " cannot be adopted" %
9734 (", ".join(baddisks),
9735 constants.ADOPTABLE_BLOCKDEV_ROOT),
9738 node_disks = self.rpc.call_bdev_sizes([pnode.name],
9739 list(all_disks))[pnode.name]
9740 node_disks.Raise("Cannot get block device information from node %s" %
9742 node_disks = node_disks.payload
9743 delta = all_disks.difference(node_disks.keys())
9745 raise errors.OpPrereqError("Missing block device(s): %s" %
9746 utils.CommaJoin(delta),
9748 for dsk in self.disks:
9749 dsk[constants.IDISK_SIZE] = \
9750 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9752 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9754 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9755 # check OS parameters (remotely)
9756 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9758 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9760 # memory check on primary node
9761 #TODO(dynmem): use MINMEM for checking
9763 _CheckNodeFreeMemory(self, self.pnode.name,
9764 "creating instance %s" % self.op.instance_name,
9765 self.be_full[constants.BE_MAXMEM],
9768 self.dry_run_result = list(nodenames)
9770 def Exec(self, feedback_fn):
9771 """Create and add the instance to the cluster.
9774 instance = self.op.instance_name
9775 pnode_name = self.pnode.name
9777 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9778 self.owned_locks(locking.LEVEL_NODE)), \
9779 "Node locks differ from node resource locks"
9781 ht_kind = self.op.hypervisor
9782 if ht_kind in constants.HTS_REQ_PORT:
9783 network_port = self.cfg.AllocatePort()
9787 disks = _GenerateDiskTemplate(self,
9788 self.op.disk_template,
9789 instance, pnode_name,
9792 self.instance_file_storage_dir,
9793 self.op.file_driver,
9798 iobj = objects.Instance(name=instance, os=self.op.os_type,
9799 primary_node=pnode_name,
9800 nics=self.nics, disks=disks,
9801 disk_template=self.op.disk_template,
9802 admin_state=constants.ADMINST_DOWN,
9803 network_port=network_port,
9804 beparams=self.op.beparams,
9805 hvparams=self.op.hvparams,
9806 hypervisor=self.op.hypervisor,
9807 osparams=self.op.osparams,
9811 for tag in self.op.tags:
9814 if self.adopt_disks:
9815 if self.op.disk_template == constants.DT_PLAIN:
9816 # rename LVs to the newly-generated names; we need to construct
9817 # 'fake' LV disks with the old data, plus the new unique_id
9818 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9820 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9821 rename_to.append(t_dsk.logical_id)
9822 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9823 self.cfg.SetDiskID(t_dsk, pnode_name)
9824 result = self.rpc.call_blockdev_rename(pnode_name,
9825 zip(tmp_disks, rename_to))
9826 result.Raise("Failed to rename adoped LVs")
9828 feedback_fn("* creating instance disks...")
9830 _CreateDisks(self, iobj)
9831 except errors.OpExecError:
9832 self.LogWarning("Device creation failed, reverting...")
9834 _RemoveDisks(self, iobj)
9836 self.cfg.ReleaseDRBDMinors(instance)
9839 feedback_fn("adding instance %s to cluster config" % instance)
9841 self.cfg.AddInstance(iobj, self.proc.GetECId())
9843 # Declare that we don't want to remove the instance lock anymore, as we've
9844 # added the instance to the config
9845 del self.remove_locks[locking.LEVEL_INSTANCE]
9847 if self.op.mode == constants.INSTANCE_IMPORT:
9848 # Release unused nodes
9849 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9852 _ReleaseLocks(self, locking.LEVEL_NODE)
9855 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9856 feedback_fn("* wiping instance disks...")
9858 _WipeDisks(self, iobj)
9859 except errors.OpExecError, err:
9860 logging.exception("Wiping disks failed")
9861 self.LogWarning("Wiping instance disks failed (%s)", err)
9865 # Something is already wrong with the disks, don't do anything else
9867 elif self.op.wait_for_sync:
9868 disk_abort = not _WaitForSync(self, iobj)
9869 elif iobj.disk_template in constants.DTS_INT_MIRROR:
9870 # make sure the disks are not degraded (still sync-ing is ok)
9871 feedback_fn("* checking mirrors status")
9872 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9877 _RemoveDisks(self, iobj)
9878 self.cfg.RemoveInstance(iobj.name)
9879 # Make sure the instance lock gets removed
9880 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9881 raise errors.OpExecError("There are some degraded disks for"
9884 # Release all node resource locks
9885 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
9887 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9888 if self.op.mode == constants.INSTANCE_CREATE:
9889 if not self.op.no_install:
9890 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9891 not self.op.wait_for_sync)
9893 feedback_fn("* pausing disk sync to install instance OS")
9894 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9896 for idx, success in enumerate(result.payload):
9898 logging.warn("pause-sync of instance %s for disk %d failed",
9901 feedback_fn("* running the instance OS create scripts...")
9902 # FIXME: pass debug option from opcode to backend
9904 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9905 self.op.debug_level)
9907 feedback_fn("* resuming disk sync")
9908 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9910 for idx, success in enumerate(result.payload):
9912 logging.warn("resume-sync of instance %s for disk %d failed",
9915 os_add_result.Raise("Could not add os for instance %s"
9916 " on node %s" % (instance, pnode_name))
9918 elif self.op.mode == constants.INSTANCE_IMPORT:
9919 feedback_fn("* running the instance OS import scripts...")
9923 for idx, image in enumerate(self.src_images):
9927 # FIXME: pass debug option from opcode to backend
9928 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9929 constants.IEIO_FILE, (image, ),
9930 constants.IEIO_SCRIPT,
9931 (iobj.disks[idx], idx),
9933 transfers.append(dt)
9936 masterd.instance.TransferInstanceData(self, feedback_fn,
9937 self.op.src_node, pnode_name,
9938 self.pnode.secondary_ip,
9940 if not compat.all(import_result):
9941 self.LogWarning("Some disks for instance %s on node %s were not"
9942 " imported successfully" % (instance, pnode_name))
9944 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9945 feedback_fn("* preparing remote import...")
9946 # The source cluster will stop the instance before attempting to make a
9947 # connection. In some cases stopping an instance can take a long time,
9948 # hence the shutdown timeout is added to the connection timeout.
9949 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9950 self.op.source_shutdown_timeout)
9951 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9953 assert iobj.primary_node == self.pnode.name
9955 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9956 self.source_x509_ca,
9957 self._cds, timeouts)
9958 if not compat.all(disk_results):
9959 # TODO: Should the instance still be started, even if some disks
9960 # failed to import (valid for local imports, too)?
9961 self.LogWarning("Some disks for instance %s on node %s were not"
9962 " imported successfully" % (instance, pnode_name))
9964 # Run rename script on newly imported instance
9965 assert iobj.name == instance
9966 feedback_fn("Running rename script for %s" % instance)
9967 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9968 self.source_instance_name,
9969 self.op.debug_level)
9971 self.LogWarning("Failed to run rename script for %s on node"
9972 " %s: %s" % (instance, pnode_name, result.fail_msg))
9975 # also checked in the prereq part
9976 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9979 assert not self.owned_locks(locking.LEVEL_NODE_RES)
9982 iobj.admin_state = constants.ADMINST_UP
9983 self.cfg.Update(iobj, feedback_fn)
9984 logging.info("Starting instance %s on node %s", instance, pnode_name)
9985 feedback_fn("* starting instance...")
9986 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9988 result.Raise("Could not start instance")
9990 return list(iobj.all_nodes)
9993 class LUInstanceConsole(NoHooksLU):
9994 """Connect to an instance's console.
9996 This is somewhat special in that it returns the command line that
9997 you need to run on the master node in order to connect to the
10003 def ExpandNames(self):
10004 self.share_locks = _ShareAll()
10005 self._ExpandAndLockInstance()
10007 def CheckPrereq(self):
10008 """Check prerequisites.
10010 This checks that the instance is in the cluster.
10013 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10014 assert self.instance is not None, \
10015 "Cannot retrieve locked instance %s" % self.op.instance_name
10016 _CheckNodeOnline(self, self.instance.primary_node)
10018 def Exec(self, feedback_fn):
10019 """Connect to the console of an instance
10022 instance = self.instance
10023 node = instance.primary_node
10025 node_insts = self.rpc.call_instance_list([node],
10026 [instance.hypervisor])[node]
10027 node_insts.Raise("Can't get node information from %s" % node)
10029 if instance.name not in node_insts.payload:
10030 if instance.admin_state == constants.ADMINST_UP:
10031 state = constants.INSTST_ERRORDOWN
10032 elif instance.admin_state == constants.ADMINST_DOWN:
10033 state = constants.INSTST_ADMINDOWN
10035 state = constants.INSTST_ADMINOFFLINE
10036 raise errors.OpExecError("Instance %s is not running (state %s)" %
10037 (instance.name, state))
10039 logging.debug("Connecting to console of %s on %s", instance.name, node)
10041 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10044 def _GetInstanceConsole(cluster, instance):
10045 """Returns console information for an instance.
10047 @type cluster: L{objects.Cluster}
10048 @type instance: L{objects.Instance}
10052 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10053 # beparams and hvparams are passed separately, to avoid editing the
10054 # instance and then saving the defaults in the instance itself.
10055 hvparams = cluster.FillHV(instance)
10056 beparams = cluster.FillBE(instance)
10057 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10059 assert console.instance == instance.name
10060 assert console.Validate()
10062 return console.ToDict()
10065 class LUInstanceReplaceDisks(LogicalUnit):
10066 """Replace the disks of an instance.
10069 HPATH = "mirrors-replace"
10070 HTYPE = constants.HTYPE_INSTANCE
10073 def CheckArguments(self):
10074 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10075 self.op.iallocator)
10077 def ExpandNames(self):
10078 self._ExpandAndLockInstance()
10080 assert locking.LEVEL_NODE not in self.needed_locks
10081 assert locking.LEVEL_NODE_RES not in self.needed_locks
10082 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10084 assert self.op.iallocator is None or self.op.remote_node is None, \
10085 "Conflicting options"
10087 if self.op.remote_node is not None:
10088 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10090 # Warning: do not remove the locking of the new secondary here
10091 # unless DRBD8.AddChildren is changed to work in parallel;
10092 # currently it doesn't since parallel invocations of
10093 # FindUnusedMinor will conflict
10094 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10095 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10097 self.needed_locks[locking.LEVEL_NODE] = []
10098 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10100 if self.op.iallocator is not None:
10101 # iallocator will select a new node in the same group
10102 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10104 self.needed_locks[locking.LEVEL_NODE_RES] = []
10106 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10107 self.op.iallocator, self.op.remote_node,
10108 self.op.disks, False, self.op.early_release,
10109 self.op.ignore_ipolicy)
10111 self.tasklets = [self.replacer]
10113 def DeclareLocks(self, level):
10114 if level == locking.LEVEL_NODEGROUP:
10115 assert self.op.remote_node is None
10116 assert self.op.iallocator is not None
10117 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10119 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10120 # Lock all groups used by instance optimistically; this requires going
10121 # via the node before it's locked, requiring verification later on
10122 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10123 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10125 elif level == locking.LEVEL_NODE:
10126 if self.op.iallocator is not None:
10127 assert self.op.remote_node is None
10128 assert not self.needed_locks[locking.LEVEL_NODE]
10130 # Lock member nodes of all locked groups
10131 self.needed_locks[locking.LEVEL_NODE] = [node_name
10132 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10133 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10135 self._LockInstancesNodes()
10136 elif level == locking.LEVEL_NODE_RES:
10138 self.needed_locks[locking.LEVEL_NODE_RES] = \
10139 self.needed_locks[locking.LEVEL_NODE]
10141 def BuildHooksEnv(self):
10142 """Build hooks env.
10144 This runs on the master, the primary and all the secondaries.
10147 instance = self.replacer.instance
10149 "MODE": self.op.mode,
10150 "NEW_SECONDARY": self.op.remote_node,
10151 "OLD_SECONDARY": instance.secondary_nodes[0],
10153 env.update(_BuildInstanceHookEnvByObject(self, instance))
10156 def BuildHooksNodes(self):
10157 """Build hooks nodes.
10160 instance = self.replacer.instance
10162 self.cfg.GetMasterNode(),
10163 instance.primary_node,
10165 if self.op.remote_node is not None:
10166 nl.append(self.op.remote_node)
10169 def CheckPrereq(self):
10170 """Check prerequisites.
10173 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10174 self.op.iallocator is None)
10176 # Verify if node group locks are still correct
10177 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10179 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10181 return LogicalUnit.CheckPrereq(self)
10184 class TLReplaceDisks(Tasklet):
10185 """Replaces disks for an instance.
10187 Note: Locking is not within the scope of this class.
10190 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10191 disks, delay_iallocator, early_release, ignore_ipolicy):
10192 """Initializes this class.
10195 Tasklet.__init__(self, lu)
10198 self.instance_name = instance_name
10200 self.iallocator_name = iallocator_name
10201 self.remote_node = remote_node
10203 self.delay_iallocator = delay_iallocator
10204 self.early_release = early_release
10205 self.ignore_ipolicy = ignore_ipolicy
10208 self.instance = None
10209 self.new_node = None
10210 self.target_node = None
10211 self.other_node = None
10212 self.remote_node_info = None
10213 self.node_secondary_ip = None
10216 def CheckArguments(mode, remote_node, iallocator):
10217 """Helper function for users of this class.
10220 # check for valid parameter combination
10221 if mode == constants.REPLACE_DISK_CHG:
10222 if remote_node is None and iallocator is None:
10223 raise errors.OpPrereqError("When changing the secondary either an"
10224 " iallocator script must be used or the"
10225 " new node given", errors.ECODE_INVAL)
10227 if remote_node is not None and iallocator is not None:
10228 raise errors.OpPrereqError("Give either the iallocator or the new"
10229 " secondary, not both", errors.ECODE_INVAL)
10231 elif remote_node is not None or iallocator is not None:
10232 # Not replacing the secondary
10233 raise errors.OpPrereqError("The iallocator and new node options can"
10234 " only be used when changing the"
10235 " secondary node", errors.ECODE_INVAL)
10238 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10239 """Compute a new secondary node using an IAllocator.
10242 ial = IAllocator(lu.cfg, lu.rpc,
10243 mode=constants.IALLOCATOR_MODE_RELOC,
10244 name=instance_name,
10245 relocate_from=list(relocate_from))
10247 ial.Run(iallocator_name)
10249 if not ial.success:
10250 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10251 " %s" % (iallocator_name, ial.info),
10252 errors.ECODE_NORES)
10254 if len(ial.result) != ial.required_nodes:
10255 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10256 " of nodes (%s), required %s" %
10258 len(ial.result), ial.required_nodes),
10259 errors.ECODE_FAULT)
10261 remote_node_name = ial.result[0]
10263 lu.LogInfo("Selected new secondary for instance '%s': %s",
10264 instance_name, remote_node_name)
10266 return remote_node_name
10268 def _FindFaultyDisks(self, node_name):
10269 """Wrapper for L{_FindFaultyInstanceDisks}.
10272 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10275 def _CheckDisksActivated(self, instance):
10276 """Checks if the instance disks are activated.
10278 @param instance: The instance to check disks
10279 @return: True if they are activated, False otherwise
10282 nodes = instance.all_nodes
10284 for idx, dev in enumerate(instance.disks):
10286 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10287 self.cfg.SetDiskID(dev, node)
10289 result = self.rpc.call_blockdev_find(node, dev)
10293 elif result.fail_msg or not result.payload:
10298 def CheckPrereq(self):
10299 """Check prerequisites.
10301 This checks that the instance is in the cluster.
10304 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10305 assert instance is not None, \
10306 "Cannot retrieve locked instance %s" % self.instance_name
10308 if instance.disk_template != constants.DT_DRBD8:
10309 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10310 " instances", errors.ECODE_INVAL)
10312 if len(instance.secondary_nodes) != 1:
10313 raise errors.OpPrereqError("The instance has a strange layout,"
10314 " expected one secondary but found %d" %
10315 len(instance.secondary_nodes),
10316 errors.ECODE_FAULT)
10318 if not self.delay_iallocator:
10319 self._CheckPrereq2()
10321 def _CheckPrereq2(self):
10322 """Check prerequisites, second part.
10324 This function should always be part of CheckPrereq. It was separated and is
10325 now called from Exec because during node evacuation iallocator was only
10326 called with an unmodified cluster model, not taking planned changes into
10330 instance = self.instance
10331 secondary_node = instance.secondary_nodes[0]
10333 if self.iallocator_name is None:
10334 remote_node = self.remote_node
10336 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10337 instance.name, instance.secondary_nodes)
10339 if remote_node is None:
10340 self.remote_node_info = None
10342 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10343 "Remote node '%s' is not locked" % remote_node
10345 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10346 assert self.remote_node_info is not None, \
10347 "Cannot retrieve locked node %s" % remote_node
10349 if remote_node == self.instance.primary_node:
10350 raise errors.OpPrereqError("The specified node is the primary node of"
10351 " the instance", errors.ECODE_INVAL)
10353 if remote_node == secondary_node:
10354 raise errors.OpPrereqError("The specified node is already the"
10355 " secondary node of the instance",
10356 errors.ECODE_INVAL)
10358 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10359 constants.REPLACE_DISK_CHG):
10360 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10361 errors.ECODE_INVAL)
10363 if self.mode == constants.REPLACE_DISK_AUTO:
10364 if not self._CheckDisksActivated(instance):
10365 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10366 " first" % self.instance_name,
10367 errors.ECODE_STATE)
10368 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10369 faulty_secondary = self._FindFaultyDisks(secondary_node)
10371 if faulty_primary and faulty_secondary:
10372 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10373 " one node and can not be repaired"
10374 " automatically" % self.instance_name,
10375 errors.ECODE_STATE)
10378 self.disks = faulty_primary
10379 self.target_node = instance.primary_node
10380 self.other_node = secondary_node
10381 check_nodes = [self.target_node, self.other_node]
10382 elif faulty_secondary:
10383 self.disks = faulty_secondary
10384 self.target_node = secondary_node
10385 self.other_node = instance.primary_node
10386 check_nodes = [self.target_node, self.other_node]
10392 # Non-automatic modes
10393 if self.mode == constants.REPLACE_DISK_PRI:
10394 self.target_node = instance.primary_node
10395 self.other_node = secondary_node
10396 check_nodes = [self.target_node, self.other_node]
10398 elif self.mode == constants.REPLACE_DISK_SEC:
10399 self.target_node = secondary_node
10400 self.other_node = instance.primary_node
10401 check_nodes = [self.target_node, self.other_node]
10403 elif self.mode == constants.REPLACE_DISK_CHG:
10404 self.new_node = remote_node
10405 self.other_node = instance.primary_node
10406 self.target_node = secondary_node
10407 check_nodes = [self.new_node, self.other_node]
10409 _CheckNodeNotDrained(self.lu, remote_node)
10410 _CheckNodeVmCapable(self.lu, remote_node)
10412 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10413 assert old_node_info is not None
10414 if old_node_info.offline and not self.early_release:
10415 # doesn't make sense to delay the release
10416 self.early_release = True
10417 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10418 " early-release mode", secondary_node)
10421 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10424 # If not specified all disks should be replaced
10426 self.disks = range(len(self.instance.disks))
10428 # TODO: This is ugly, but right now we can't distinguish between internal
10429 # submitted opcode and external one. We should fix that.
10430 if self.remote_node_info:
10431 # We change the node, lets verify it still meets instance policy
10432 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10433 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10435 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10436 ignore=self.ignore_ipolicy)
10438 # TODO: compute disk parameters
10439 primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10440 secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10441 if primary_node_info.group != secondary_node_info.group:
10442 self.lu.LogInfo("The instance primary and secondary nodes are in two"
10443 " different node groups; the disk parameters of the"
10444 " primary node's group will be applied.")
10446 self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10448 for node in check_nodes:
10449 _CheckNodeOnline(self.lu, node)
10451 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10454 if node_name is not None)
10456 # Release unneeded node and node resource locks
10457 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10458 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10460 # Release any owned node group
10461 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10462 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10464 # Check whether disks are valid
10465 for disk_idx in self.disks:
10466 instance.FindDisk(disk_idx)
10468 # Get secondary node IP addresses
10469 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10470 in self.cfg.GetMultiNodeInfo(touched_nodes))
10472 def Exec(self, feedback_fn):
10473 """Execute disk replacement.
10475 This dispatches the disk replacement to the appropriate handler.
10478 if self.delay_iallocator:
10479 self._CheckPrereq2()
10482 # Verify owned locks before starting operation
10483 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10484 assert set(owned_nodes) == set(self.node_secondary_ip), \
10485 ("Incorrect node locks, owning %s, expected %s" %
10486 (owned_nodes, self.node_secondary_ip.keys()))
10487 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10488 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10490 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10491 assert list(owned_instances) == [self.instance_name], \
10492 "Instance '%s' not locked" % self.instance_name
10494 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10495 "Should not own any node group lock at this point"
10498 feedback_fn("No disks need replacement")
10501 feedback_fn("Replacing disk(s) %s for %s" %
10502 (utils.CommaJoin(self.disks), self.instance.name))
10504 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10506 # Activate the instance disks if we're replacing them on a down instance
10508 _StartInstanceDisks(self.lu, self.instance, True)
10511 # Should we replace the secondary node?
10512 if self.new_node is not None:
10513 fn = self._ExecDrbd8Secondary
10515 fn = self._ExecDrbd8DiskOnly
10517 result = fn(feedback_fn)
10519 # Deactivate the instance disks if we're replacing them on a
10522 _SafeShutdownInstanceDisks(self.lu, self.instance)
10524 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10527 # Verify owned locks
10528 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10529 nodes = frozenset(self.node_secondary_ip)
10530 assert ((self.early_release and not owned_nodes) or
10531 (not self.early_release and not (set(owned_nodes) - nodes))), \
10532 ("Not owning the correct locks, early_release=%s, owned=%r,"
10533 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10537 def _CheckVolumeGroup(self, nodes):
10538 self.lu.LogInfo("Checking volume groups")
10540 vgname = self.cfg.GetVGName()
10542 # Make sure volume group exists on all involved nodes
10543 results = self.rpc.call_vg_list(nodes)
10545 raise errors.OpExecError("Can't list volume groups on the nodes")
10548 res = results[node]
10549 res.Raise("Error checking node %s" % node)
10550 if vgname not in res.payload:
10551 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10554 def _CheckDisksExistence(self, nodes):
10555 # Check disk existence
10556 for idx, dev in enumerate(self.instance.disks):
10557 if idx not in self.disks:
10561 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10562 self.cfg.SetDiskID(dev, node)
10564 result = self.rpc.call_blockdev_find(node, dev)
10566 msg = result.fail_msg
10567 if msg or not result.payload:
10569 msg = "disk not found"
10570 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10573 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10574 for idx, dev in enumerate(self.instance.disks):
10575 if idx not in self.disks:
10578 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10581 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10583 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10584 " replace disks for instance %s" %
10585 (node_name, self.instance.name))
10587 def _CreateNewStorage(self, node_name):
10588 """Create new storage on the primary or secondary node.
10590 This is only used for same-node replaces, not for changing the
10591 secondary node, hence we don't want to modify the existing disk.
10596 for idx, dev in enumerate(self.instance.disks):
10597 if idx not in self.disks:
10600 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10602 self.cfg.SetDiskID(dev, node_name)
10604 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10605 names = _GenerateUniqueNames(self.lu, lv_names)
10607 _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10609 vg_data = dev.children[0].logical_id[0]
10610 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10611 logical_id=(vg_data, names[0]), params=data_p)
10612 vg_meta = dev.children[1].logical_id[0]
10613 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10614 logical_id=(vg_meta, names[1]), params=meta_p)
10616 new_lvs = [lv_data, lv_meta]
10617 old_lvs = [child.Copy() for child in dev.children]
10618 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10620 # we pass force_create=True to force the LVM creation
10621 for new_lv in new_lvs:
10622 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10623 _GetInstanceInfoText(self.instance), False)
10627 def _CheckDevices(self, node_name, iv_names):
10628 for name, (dev, _, _) in iv_names.iteritems():
10629 self.cfg.SetDiskID(dev, node_name)
10631 result = self.rpc.call_blockdev_find(node_name, dev)
10633 msg = result.fail_msg
10634 if msg or not result.payload:
10636 msg = "disk not found"
10637 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10640 if result.payload.is_degraded:
10641 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10643 def _RemoveOldStorage(self, node_name, iv_names):
10644 for name, (_, old_lvs, _) in iv_names.iteritems():
10645 self.lu.LogInfo("Remove logical volumes for %s" % name)
10648 self.cfg.SetDiskID(lv, node_name)
10650 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10652 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10653 hint="remove unused LVs manually")
10655 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10656 """Replace a disk on the primary or secondary for DRBD 8.
10658 The algorithm for replace is quite complicated:
10660 1. for each disk to be replaced:
10662 1. create new LVs on the target node with unique names
10663 1. detach old LVs from the drbd device
10664 1. rename old LVs to name_replaced.<time_t>
10665 1. rename new LVs to old LVs
10666 1. attach the new LVs (with the old names now) to the drbd device
10668 1. wait for sync across all devices
10670 1. for each modified disk:
10672 1. remove old LVs (which have the name name_replaces.<time_t>)
10674 Failures are not very well handled.
10679 # Step: check device activation
10680 self.lu.LogStep(1, steps_total, "Check device existence")
10681 self._CheckDisksExistence([self.other_node, self.target_node])
10682 self._CheckVolumeGroup([self.target_node, self.other_node])
10684 # Step: check other node consistency
10685 self.lu.LogStep(2, steps_total, "Check peer consistency")
10686 self._CheckDisksConsistency(self.other_node,
10687 self.other_node == self.instance.primary_node,
10690 # Step: create new storage
10691 self.lu.LogStep(3, steps_total, "Allocate new storage")
10692 iv_names = self._CreateNewStorage(self.target_node)
10694 # Step: for each lv, detach+rename*2+attach
10695 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10696 for dev, old_lvs, new_lvs in iv_names.itervalues():
10697 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10699 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10701 result.Raise("Can't detach drbd from local storage on node"
10702 " %s for device %s" % (self.target_node, dev.iv_name))
10704 #cfg.Update(instance)
10706 # ok, we created the new LVs, so now we know we have the needed
10707 # storage; as such, we proceed on the target node to rename
10708 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10709 # using the assumption that logical_id == physical_id (which in
10710 # turn is the unique_id on that node)
10712 # FIXME(iustin): use a better name for the replaced LVs
10713 temp_suffix = int(time.time())
10714 ren_fn = lambda d, suff: (d.physical_id[0],
10715 d.physical_id[1] + "_replaced-%s" % suff)
10717 # Build the rename list based on what LVs exist on the node
10718 rename_old_to_new = []
10719 for to_ren in old_lvs:
10720 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10721 if not result.fail_msg and result.payload:
10723 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10725 self.lu.LogInfo("Renaming the old LVs on the target node")
10726 result = self.rpc.call_blockdev_rename(self.target_node,
10728 result.Raise("Can't rename old LVs on node %s" % self.target_node)
10730 # Now we rename the new LVs to the old LVs
10731 self.lu.LogInfo("Renaming the new LVs on the target node")
10732 rename_new_to_old = [(new, old.physical_id)
10733 for old, new in zip(old_lvs, new_lvs)]
10734 result = self.rpc.call_blockdev_rename(self.target_node,
10736 result.Raise("Can't rename new LVs on node %s" % self.target_node)
10738 # Intermediate steps of in memory modifications
10739 for old, new in zip(old_lvs, new_lvs):
10740 new.logical_id = old.logical_id
10741 self.cfg.SetDiskID(new, self.target_node)
10743 # We need to modify old_lvs so that removal later removes the
10744 # right LVs, not the newly added ones; note that old_lvs is a
10746 for disk in old_lvs:
10747 disk.logical_id = ren_fn(disk, temp_suffix)
10748 self.cfg.SetDiskID(disk, self.target_node)
10750 # Now that the new lvs have the old name, we can add them to the device
10751 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10752 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10754 msg = result.fail_msg
10756 for new_lv in new_lvs:
10757 msg2 = self.rpc.call_blockdev_remove(self.target_node,
10760 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10761 hint=("cleanup manually the unused logical"
10763 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10765 cstep = itertools.count(5)
10767 if self.early_release:
10768 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10769 self._RemoveOldStorage(self.target_node, iv_names)
10770 # TODO: Check if releasing locks early still makes sense
10771 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10773 # Release all resource locks except those used by the instance
10774 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10775 keep=self.node_secondary_ip.keys())
10777 # Release all node locks while waiting for sync
10778 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10780 # TODO: Can the instance lock be downgraded here? Take the optional disk
10781 # shutdown in the caller into consideration.
10784 # This can fail as the old devices are degraded and _WaitForSync
10785 # does a combined result over all disks, so we don't check its return value
10786 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10787 _WaitForSync(self.lu, self.instance)
10789 # Check all devices manually
10790 self._CheckDevices(self.instance.primary_node, iv_names)
10792 # Step: remove old storage
10793 if not self.early_release:
10794 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10795 self._RemoveOldStorage(self.target_node, iv_names)
10797 def _ExecDrbd8Secondary(self, feedback_fn):
10798 """Replace the secondary node for DRBD 8.
10800 The algorithm for replace is quite complicated:
10801 - for all disks of the instance:
10802 - create new LVs on the new node with same names
10803 - shutdown the drbd device on the old secondary
10804 - disconnect the drbd network on the primary
10805 - create the drbd device on the new secondary
10806 - network attach the drbd on the primary, using an artifice:
10807 the drbd code for Attach() will connect to the network if it
10808 finds a device which is connected to the good local disks but
10809 not network enabled
10810 - wait for sync across all devices
10811 - remove all disks from the old secondary
10813 Failures are not very well handled.
10818 pnode = self.instance.primary_node
10820 # Step: check device activation
10821 self.lu.LogStep(1, steps_total, "Check device existence")
10822 self._CheckDisksExistence([self.instance.primary_node])
10823 self._CheckVolumeGroup([self.instance.primary_node])
10825 # Step: check other node consistency
10826 self.lu.LogStep(2, steps_total, "Check peer consistency")
10827 self._CheckDisksConsistency(self.instance.primary_node, True, True)
10829 # Step: create new storage
10830 self.lu.LogStep(3, steps_total, "Allocate new storage")
10831 for idx, dev in enumerate(self.instance.disks):
10832 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10833 (self.new_node, idx))
10834 # we pass force_create=True to force LVM creation
10835 for new_lv in dev.children:
10836 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10837 _GetInstanceInfoText(self.instance), False)
10839 # Step 4: dbrd minors and drbd setups changes
10840 # after this, we must manually remove the drbd minors on both the
10841 # error and the success paths
10842 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10843 minors = self.cfg.AllocateDRBDMinor([self.new_node
10844 for dev in self.instance.disks],
10845 self.instance.name)
10846 logging.debug("Allocated minors %r", minors)
10849 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10850 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10851 (self.new_node, idx))
10852 # create new devices on new_node; note that we create two IDs:
10853 # one without port, so the drbd will be activated without
10854 # networking information on the new node at this stage, and one
10855 # with network, for the latter activation in step 4
10856 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10857 if self.instance.primary_node == o_node1:
10860 assert self.instance.primary_node == o_node2, "Three-node instance?"
10863 new_alone_id = (self.instance.primary_node, self.new_node, None,
10864 p_minor, new_minor, o_secret)
10865 new_net_id = (self.instance.primary_node, self.new_node, o_port,
10866 p_minor, new_minor, o_secret)
10868 iv_names[idx] = (dev, dev.children, new_net_id)
10869 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10871 drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10872 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10873 logical_id=new_alone_id,
10874 children=dev.children,
10876 params=drbd_params)
10878 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10879 _GetInstanceInfoText(self.instance), False)
10880 except errors.GenericError:
10881 self.cfg.ReleaseDRBDMinors(self.instance.name)
10884 # We have new devices, shutdown the drbd on the old secondary
10885 for idx, dev in enumerate(self.instance.disks):
10886 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10887 self.cfg.SetDiskID(dev, self.target_node)
10888 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10890 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10891 "node: %s" % (idx, msg),
10892 hint=("Please cleanup this device manually as"
10893 " soon as possible"))
10895 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10896 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10897 self.instance.disks)[pnode]
10899 msg = result.fail_msg
10901 # detaches didn't succeed (unlikely)
10902 self.cfg.ReleaseDRBDMinors(self.instance.name)
10903 raise errors.OpExecError("Can't detach the disks from the network on"
10904 " old node: %s" % (msg,))
10906 # if we managed to detach at least one, we update all the disks of
10907 # the instance to point to the new secondary
10908 self.lu.LogInfo("Updating instance configuration")
10909 for dev, _, new_logical_id in iv_names.itervalues():
10910 dev.logical_id = new_logical_id
10911 self.cfg.SetDiskID(dev, self.instance.primary_node)
10913 self.cfg.Update(self.instance, feedback_fn)
10915 # Release all node locks (the configuration has been updated)
10916 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10918 # and now perform the drbd attach
10919 self.lu.LogInfo("Attaching primary drbds to new secondary"
10920 " (standalone => connected)")
10921 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10923 self.node_secondary_ip,
10924 self.instance.disks,
10925 self.instance.name,
10927 for to_node, to_result in result.items():
10928 msg = to_result.fail_msg
10930 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10932 hint=("please do a gnt-instance info to see the"
10933 " status of disks"))
10935 cstep = itertools.count(5)
10937 if self.early_release:
10938 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10939 self._RemoveOldStorage(self.target_node, iv_names)
10940 # TODO: Check if releasing locks early still makes sense
10941 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10943 # Release all resource locks except those used by the instance
10944 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10945 keep=self.node_secondary_ip.keys())
10947 # TODO: Can the instance lock be downgraded here? Take the optional disk
10948 # shutdown in the caller into consideration.
10951 # This can fail as the old devices are degraded and _WaitForSync
10952 # does a combined result over all disks, so we don't check its return value
10953 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10954 _WaitForSync(self.lu, self.instance)
10956 # Check all devices manually
10957 self._CheckDevices(self.instance.primary_node, iv_names)
10959 # Step: remove old storage
10960 if not self.early_release:
10961 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10962 self._RemoveOldStorage(self.target_node, iv_names)
10965 class LURepairNodeStorage(NoHooksLU):
10966 """Repairs the volume group on a node.
10971 def CheckArguments(self):
10972 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10974 storage_type = self.op.storage_type
10976 if (constants.SO_FIX_CONSISTENCY not in
10977 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10978 raise errors.OpPrereqError("Storage units of type '%s' can not be"
10979 " repaired" % storage_type,
10980 errors.ECODE_INVAL)
10982 def ExpandNames(self):
10983 self.needed_locks = {
10984 locking.LEVEL_NODE: [self.op.node_name],
10987 def _CheckFaultyDisks(self, instance, node_name):
10988 """Ensure faulty disks abort the opcode or at least warn."""
10990 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10992 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10993 " node '%s'" % (instance.name, node_name),
10994 errors.ECODE_STATE)
10995 except errors.OpPrereqError, err:
10996 if self.op.ignore_consistency:
10997 self.proc.LogWarning(str(err.args[0]))
11001 def CheckPrereq(self):
11002 """Check prerequisites.
11005 # Check whether any instance on this node has faulty disks
11006 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11007 if inst.admin_state != constants.ADMINST_UP:
11009 check_nodes = set(inst.all_nodes)
11010 check_nodes.discard(self.op.node_name)
11011 for inst_node_name in check_nodes:
11012 self._CheckFaultyDisks(inst, inst_node_name)
11014 def Exec(self, feedback_fn):
11015 feedback_fn("Repairing storage unit '%s' on %s ..." %
11016 (self.op.name, self.op.node_name))
11018 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11019 result = self.rpc.call_storage_execute(self.op.node_name,
11020 self.op.storage_type, st_args,
11022 constants.SO_FIX_CONSISTENCY)
11023 result.Raise("Failed to repair storage unit '%s' on %s" %
11024 (self.op.name, self.op.node_name))
11027 class LUNodeEvacuate(NoHooksLU):
11028 """Evacuates instances off a list of nodes.
11033 _MODE2IALLOCATOR = {
11034 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11035 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11036 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11038 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11039 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11040 constants.IALLOCATOR_NEVAC_MODES)
11042 def CheckArguments(self):
11043 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11045 def ExpandNames(self):
11046 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11048 if self.op.remote_node is not None:
11049 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11050 assert self.op.remote_node
11052 if self.op.remote_node == self.op.node_name:
11053 raise errors.OpPrereqError("Can not use evacuated node as a new"
11054 " secondary node", errors.ECODE_INVAL)
11056 if self.op.mode != constants.NODE_EVAC_SEC:
11057 raise errors.OpPrereqError("Without the use of an iallocator only"
11058 " secondary instances can be evacuated",
11059 errors.ECODE_INVAL)
11062 self.share_locks = _ShareAll()
11063 self.needed_locks = {
11064 locking.LEVEL_INSTANCE: [],
11065 locking.LEVEL_NODEGROUP: [],
11066 locking.LEVEL_NODE: [],
11069 # Determine nodes (via group) optimistically, needs verification once locks
11070 # have been acquired
11071 self.lock_nodes = self._DetermineNodes()
11073 def _DetermineNodes(self):
11074 """Gets the list of nodes to operate on.
11077 if self.op.remote_node is None:
11078 # Iallocator will choose any node(s) in the same group
11079 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11081 group_nodes = frozenset([self.op.remote_node])
11083 # Determine nodes to be locked
11084 return set([self.op.node_name]) | group_nodes
11086 def _DetermineInstances(self):
11087 """Builds list of instances to operate on.
11090 assert self.op.mode in constants.NODE_EVAC_MODES
11092 if self.op.mode == constants.NODE_EVAC_PRI:
11093 # Primary instances only
11094 inst_fn = _GetNodePrimaryInstances
11095 assert self.op.remote_node is None, \
11096 "Evacuating primary instances requires iallocator"
11097 elif self.op.mode == constants.NODE_EVAC_SEC:
11098 # Secondary instances only
11099 inst_fn = _GetNodeSecondaryInstances
11102 assert self.op.mode == constants.NODE_EVAC_ALL
11103 inst_fn = _GetNodeInstances
11104 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11106 raise errors.OpPrereqError("Due to an issue with the iallocator"
11107 " interface it is not possible to evacuate"
11108 " all instances at once; specify explicitly"
11109 " whether to evacuate primary or secondary"
11111 errors.ECODE_INVAL)
11113 return inst_fn(self.cfg, self.op.node_name)
11115 def DeclareLocks(self, level):
11116 if level == locking.LEVEL_INSTANCE:
11117 # Lock instances optimistically, needs verification once node and group
11118 # locks have been acquired
11119 self.needed_locks[locking.LEVEL_INSTANCE] = \
11120 set(i.name for i in self._DetermineInstances())
11122 elif level == locking.LEVEL_NODEGROUP:
11123 # Lock node groups for all potential target nodes optimistically, needs
11124 # verification once nodes have been acquired
11125 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11126 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11128 elif level == locking.LEVEL_NODE:
11129 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11131 def CheckPrereq(self):
11133 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11134 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11135 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11137 need_nodes = self._DetermineNodes()
11139 if not owned_nodes.issuperset(need_nodes):
11140 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11141 " locks were acquired, current nodes are"
11142 " are '%s', used to be '%s'; retry the"
11144 (self.op.node_name,
11145 utils.CommaJoin(need_nodes),
11146 utils.CommaJoin(owned_nodes)),
11147 errors.ECODE_STATE)
11149 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11150 if owned_groups != wanted_groups:
11151 raise errors.OpExecError("Node groups changed since locks were acquired,"
11152 " current groups are '%s', used to be '%s';"
11153 " retry the operation" %
11154 (utils.CommaJoin(wanted_groups),
11155 utils.CommaJoin(owned_groups)))
11157 # Determine affected instances
11158 self.instances = self._DetermineInstances()
11159 self.instance_names = [i.name for i in self.instances]
11161 if set(self.instance_names) != owned_instances:
11162 raise errors.OpExecError("Instances on node '%s' changed since locks"
11163 " were acquired, current instances are '%s',"
11164 " used to be '%s'; retry the operation" %
11165 (self.op.node_name,
11166 utils.CommaJoin(self.instance_names),
11167 utils.CommaJoin(owned_instances)))
11169 if self.instance_names:
11170 self.LogInfo("Evacuating instances from node '%s': %s",
11172 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11174 self.LogInfo("No instances to evacuate from node '%s'",
11177 if self.op.remote_node is not None:
11178 for i in self.instances:
11179 if i.primary_node == self.op.remote_node:
11180 raise errors.OpPrereqError("Node %s is the primary node of"
11181 " instance %s, cannot use it as"
11183 (self.op.remote_node, i.name),
11184 errors.ECODE_INVAL)
11186 def Exec(self, feedback_fn):
11187 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11189 if not self.instance_names:
11190 # No instances to evacuate
11193 elif self.op.iallocator is not None:
11194 # TODO: Implement relocation to other group
11195 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11196 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11197 instances=list(self.instance_names))
11199 ial.Run(self.op.iallocator)
11201 if not ial.success:
11202 raise errors.OpPrereqError("Can't compute node evacuation using"
11203 " iallocator '%s': %s" %
11204 (self.op.iallocator, ial.info),
11205 errors.ECODE_NORES)
11207 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11209 elif self.op.remote_node is not None:
11210 assert self.op.mode == constants.NODE_EVAC_SEC
11212 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11213 remote_node=self.op.remote_node,
11215 mode=constants.REPLACE_DISK_CHG,
11216 early_release=self.op.early_release)]
11217 for instance_name in self.instance_names
11221 raise errors.ProgrammerError("No iallocator or remote node")
11223 return ResultWithJobs(jobs)
11226 def _SetOpEarlyRelease(early_release, op):
11227 """Sets C{early_release} flag on opcodes if available.
11231 op.early_release = early_release
11232 except AttributeError:
11233 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11238 def _NodeEvacDest(use_nodes, group, nodes):
11239 """Returns group or nodes depending on caller's choice.
11243 return utils.CommaJoin(nodes)
11248 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11249 """Unpacks the result of change-group and node-evacuate iallocator requests.
11251 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11252 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11254 @type lu: L{LogicalUnit}
11255 @param lu: Logical unit instance
11256 @type alloc_result: tuple/list
11257 @param alloc_result: Result from iallocator
11258 @type early_release: bool
11259 @param early_release: Whether to release locks early if possible
11260 @type use_nodes: bool
11261 @param use_nodes: Whether to display node names instead of groups
11264 (moved, failed, jobs) = alloc_result
11267 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11268 for (name, reason) in failed)
11269 lu.LogWarning("Unable to evacuate instances %s", failreason)
11270 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11273 lu.LogInfo("Instances to be moved: %s",
11274 utils.CommaJoin("%s (to %s)" %
11275 (name, _NodeEvacDest(use_nodes, group, nodes))
11276 for (name, group, nodes) in moved))
11278 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11279 map(opcodes.OpCode.LoadOpCode, ops))
11283 class LUInstanceGrowDisk(LogicalUnit):
11284 """Grow a disk of an instance.
11287 HPATH = "disk-grow"
11288 HTYPE = constants.HTYPE_INSTANCE
11291 def ExpandNames(self):
11292 self._ExpandAndLockInstance()
11293 self.needed_locks[locking.LEVEL_NODE] = []
11294 self.needed_locks[locking.LEVEL_NODE_RES] = []
11295 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11296 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11298 def DeclareLocks(self, level):
11299 if level == locking.LEVEL_NODE:
11300 self._LockInstancesNodes()
11301 elif level == locking.LEVEL_NODE_RES:
11303 self.needed_locks[locking.LEVEL_NODE_RES] = \
11304 self.needed_locks[locking.LEVEL_NODE][:]
11306 def BuildHooksEnv(self):
11307 """Build hooks env.
11309 This runs on the master, the primary and all the secondaries.
11313 "DISK": self.op.disk,
11314 "AMOUNT": self.op.amount,
11316 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11319 def BuildHooksNodes(self):
11320 """Build hooks nodes.
11323 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11326 def CheckPrereq(self):
11327 """Check prerequisites.
11329 This checks that the instance is in the cluster.
11332 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11333 assert instance is not None, \
11334 "Cannot retrieve locked instance %s" % self.op.instance_name
11335 nodenames = list(instance.all_nodes)
11336 for node in nodenames:
11337 _CheckNodeOnline(self, node)
11339 self.instance = instance
11341 if instance.disk_template not in constants.DTS_GROWABLE:
11342 raise errors.OpPrereqError("Instance's disk layout does not support"
11343 " growing", errors.ECODE_INVAL)
11345 self.disk = instance.FindDisk(self.op.disk)
11347 if instance.disk_template not in (constants.DT_FILE,
11348 constants.DT_SHARED_FILE):
11349 # TODO: check the free disk space for file, when that feature will be
11351 _CheckNodesFreeDiskPerVG(self, nodenames,
11352 self.disk.ComputeGrowth(self.op.amount))
11354 def Exec(self, feedback_fn):
11355 """Execute disk grow.
11358 instance = self.instance
11361 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11362 assert (self.owned_locks(locking.LEVEL_NODE) ==
11363 self.owned_locks(locking.LEVEL_NODE_RES))
11365 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11367 raise errors.OpExecError("Cannot activate block device to grow")
11369 feedback_fn("Growing disk %s of instance '%s' by %s" %
11370 (self.op.disk, instance.name,
11371 utils.FormatUnit(self.op.amount, "h")))
11373 # First run all grow ops in dry-run mode
11374 for node in instance.all_nodes:
11375 self.cfg.SetDiskID(disk, node)
11376 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11377 result.Raise("Grow request failed to node %s" % node)
11379 # We know that (as far as we can test) operations across different
11380 # nodes will succeed, time to run it for real
11381 for node in instance.all_nodes:
11382 self.cfg.SetDiskID(disk, node)
11383 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11384 result.Raise("Grow request failed to node %s" % node)
11386 # TODO: Rewrite code to work properly
11387 # DRBD goes into sync mode for a short amount of time after executing the
11388 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11389 # calling "resize" in sync mode fails. Sleeping for a short amount of
11390 # time is a work-around.
11393 disk.RecordGrow(self.op.amount)
11394 self.cfg.Update(instance, feedback_fn)
11396 # Changes have been recorded, release node lock
11397 _ReleaseLocks(self, locking.LEVEL_NODE)
11399 # Downgrade lock while waiting for sync
11400 self.glm.downgrade(locking.LEVEL_INSTANCE)
11402 if self.op.wait_for_sync:
11403 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11405 self.proc.LogWarning("Disk sync-ing has not returned a good"
11406 " status; please check the instance")
11407 if instance.admin_state != constants.ADMINST_UP:
11408 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11409 elif instance.admin_state != constants.ADMINST_UP:
11410 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11411 " not supposed to be running because no wait for"
11412 " sync mode was requested")
11414 assert self.owned_locks(locking.LEVEL_NODE_RES)
11415 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11418 class LUInstanceQueryData(NoHooksLU):
11419 """Query runtime instance data.
11424 def ExpandNames(self):
11425 self.needed_locks = {}
11427 # Use locking if requested or when non-static information is wanted
11428 if not (self.op.static or self.op.use_locking):
11429 self.LogWarning("Non-static data requested, locks need to be acquired")
11430 self.op.use_locking = True
11432 if self.op.instances or not self.op.use_locking:
11433 # Expand instance names right here
11434 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11436 # Will use acquired locks
11437 self.wanted_names = None
11439 if self.op.use_locking:
11440 self.share_locks = _ShareAll()
11442 if self.wanted_names is None:
11443 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11445 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11447 self.needed_locks[locking.LEVEL_NODE] = []
11448 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11450 def DeclareLocks(self, level):
11451 if self.op.use_locking and level == locking.LEVEL_NODE:
11452 self._LockInstancesNodes()
11454 def CheckPrereq(self):
11455 """Check prerequisites.
11457 This only checks the optional instance list against the existing names.
11460 if self.wanted_names is None:
11461 assert self.op.use_locking, "Locking was not used"
11462 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11464 self.wanted_instances = \
11465 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11467 def _ComputeBlockdevStatus(self, node, instance_name, dev):
11468 """Returns the status of a block device
11471 if self.op.static or not node:
11474 self.cfg.SetDiskID(dev, node)
11476 result = self.rpc.call_blockdev_find(node, dev)
11480 result.Raise("Can't compute disk status for %s" % instance_name)
11482 status = result.payload
11486 return (status.dev_path, status.major, status.minor,
11487 status.sync_percent, status.estimated_time,
11488 status.is_degraded, status.ldisk_status)
11490 def _ComputeDiskStatus(self, instance, snode, dev):
11491 """Compute block device status.
11494 if dev.dev_type in constants.LDS_DRBD:
11495 # we change the snode then (otherwise we use the one passed in)
11496 if dev.logical_id[0] == instance.primary_node:
11497 snode = dev.logical_id[1]
11499 snode = dev.logical_id[0]
11501 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11502 instance.name, dev)
11503 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11506 dev_children = map(compat.partial(self._ComputeDiskStatus,
11513 "iv_name": dev.iv_name,
11514 "dev_type": dev.dev_type,
11515 "logical_id": dev.logical_id,
11516 "physical_id": dev.physical_id,
11517 "pstatus": dev_pstatus,
11518 "sstatus": dev_sstatus,
11519 "children": dev_children,
11524 def Exec(self, feedback_fn):
11525 """Gather and return data"""
11528 cluster = self.cfg.GetClusterInfo()
11530 pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11531 for i in self.wanted_instances)
11532 for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11533 if self.op.static or pnode.offline:
11534 remote_state = None
11536 self.LogWarning("Primary node %s is marked offline, returning static"
11537 " information only for instance %s" %
11538 (pnode.name, instance.name))
11540 remote_info = self.rpc.call_instance_info(instance.primary_node,
11542 instance.hypervisor)
11543 remote_info.Raise("Error checking node %s" % instance.primary_node)
11544 remote_info = remote_info.payload
11545 if remote_info and "state" in remote_info:
11546 remote_state = "up"
11548 if instance.admin_state == constants.ADMINST_UP:
11549 remote_state = "down"
11551 remote_state = instance.admin_state
11553 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11556 result[instance.name] = {
11557 "name": instance.name,
11558 "config_state": instance.admin_state,
11559 "run_state": remote_state,
11560 "pnode": instance.primary_node,
11561 "snodes": instance.secondary_nodes,
11563 # this happens to be the same format used for hooks
11564 "nics": _NICListToTuple(self, instance.nics),
11565 "disk_template": instance.disk_template,
11567 "hypervisor": instance.hypervisor,
11568 "network_port": instance.network_port,
11569 "hv_instance": instance.hvparams,
11570 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11571 "be_instance": instance.beparams,
11572 "be_actual": cluster.FillBE(instance),
11573 "os_instance": instance.osparams,
11574 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11575 "serial_no": instance.serial_no,
11576 "mtime": instance.mtime,
11577 "ctime": instance.ctime,
11578 "uuid": instance.uuid,
11584 class LUInstanceSetParams(LogicalUnit):
11585 """Modifies an instances's parameters.
11588 HPATH = "instance-modify"
11589 HTYPE = constants.HTYPE_INSTANCE
11592 def CheckArguments(self):
11593 if not (self.op.nics or self.op.disks or self.op.disk_template or
11594 self.op.hvparams or self.op.beparams or self.op.os_name or
11595 self.op.online_inst or self.op.offline_inst or
11596 self.op.runtime_mem):
11597 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
11599 if self.op.hvparams:
11600 _CheckGlobalHvParams(self.op.hvparams)
11604 for disk_op, disk_dict in self.op.disks:
11605 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
11606 if disk_op == constants.DDM_REMOVE:
11607 disk_addremove += 1
11609 elif disk_op == constants.DDM_ADD:
11610 disk_addremove += 1
11612 if not isinstance(disk_op, int):
11613 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11614 if not isinstance(disk_dict, dict):
11615 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11616 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11618 if disk_op == constants.DDM_ADD:
11619 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11620 if mode not in constants.DISK_ACCESS_SET:
11621 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11622 errors.ECODE_INVAL)
11623 size = disk_dict.get(constants.IDISK_SIZE, None)
11625 raise errors.OpPrereqError("Required disk parameter size missing",
11626 errors.ECODE_INVAL)
11629 except (TypeError, ValueError), err:
11630 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11631 str(err), errors.ECODE_INVAL)
11632 disk_dict[constants.IDISK_SIZE] = size
11634 # modification of disk
11635 if constants.IDISK_SIZE in disk_dict:
11636 raise errors.OpPrereqError("Disk size change not possible, use"
11637 " grow-disk", errors.ECODE_INVAL)
11639 if disk_addremove > 1:
11640 raise errors.OpPrereqError("Only one disk add or remove operation"
11641 " supported at a time", errors.ECODE_INVAL)
11643 if self.op.disks and self.op.disk_template is not None:
11644 raise errors.OpPrereqError("Disk template conversion and other disk"
11645 " changes not supported at the same time",
11646 errors.ECODE_INVAL)
11648 if (self.op.disk_template and
11649 self.op.disk_template in constants.DTS_INT_MIRROR and
11650 self.op.remote_node is None):
11651 raise errors.OpPrereqError("Changing the disk template to a mirrored"
11652 " one requires specifying a secondary node",
11653 errors.ECODE_INVAL)
11657 for nic_op, nic_dict in self.op.nics:
11658 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11659 if nic_op == constants.DDM_REMOVE:
11662 elif nic_op == constants.DDM_ADD:
11665 if not isinstance(nic_op, int):
11666 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11667 if not isinstance(nic_dict, dict):
11668 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11669 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11671 # nic_dict should be a dict
11672 nic_ip = nic_dict.get(constants.INIC_IP, None)
11673 if nic_ip is not None:
11674 if nic_ip.lower() == constants.VALUE_NONE:
11675 nic_dict[constants.INIC_IP] = None
11677 if not netutils.IPAddress.IsValid(nic_ip):
11678 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11679 errors.ECODE_INVAL)
11681 nic_bridge = nic_dict.get("bridge", None)
11682 nic_link = nic_dict.get(constants.INIC_LINK, None)
11683 if nic_bridge and nic_link:
11684 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11685 " at the same time", errors.ECODE_INVAL)
11686 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11687 nic_dict["bridge"] = None
11688 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11689 nic_dict[constants.INIC_LINK] = None
11691 if nic_op == constants.DDM_ADD:
11692 nic_mac = nic_dict.get(constants.INIC_MAC, None)
11693 if nic_mac is None:
11694 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11696 if constants.INIC_MAC in nic_dict:
11697 nic_mac = nic_dict[constants.INIC_MAC]
11698 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11699 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11701 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11702 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11703 " modifying an existing nic",
11704 errors.ECODE_INVAL)
11706 if nic_addremove > 1:
11707 raise errors.OpPrereqError("Only one NIC add or remove operation"
11708 " supported at a time", errors.ECODE_INVAL)
11710 def ExpandNames(self):
11711 self._ExpandAndLockInstance()
11712 # Can't even acquire node locks in shared mode as upcoming changes in
11713 # Ganeti 2.6 will start to modify the node object on disk conversion
11714 self.needed_locks[locking.LEVEL_NODE] = []
11715 self.needed_locks[locking.LEVEL_NODE_RES] = []
11716 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11718 def DeclareLocks(self, level):
11719 if level == locking.LEVEL_NODE:
11720 self._LockInstancesNodes()
11721 if self.op.disk_template and self.op.remote_node:
11722 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11723 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11724 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11726 self.needed_locks[locking.LEVEL_NODE_RES] = \
11727 self.needed_locks[locking.LEVEL_NODE][:]
11729 def BuildHooksEnv(self):
11730 """Build hooks env.
11732 This runs on the master, primary and secondaries.
11736 if constants.BE_MINMEM in self.be_new:
11737 args["minmem"] = self.be_new[constants.BE_MINMEM]
11738 if constants.BE_MAXMEM in self.be_new:
11739 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
11740 if constants.BE_VCPUS in self.be_new:
11741 args["vcpus"] = self.be_new[constants.BE_VCPUS]
11742 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11743 # information at all.
11746 nic_override = dict(self.op.nics)
11747 for idx, nic in enumerate(self.instance.nics):
11748 if idx in nic_override:
11749 this_nic_override = nic_override[idx]
11751 this_nic_override = {}
11752 if constants.INIC_IP in this_nic_override:
11753 ip = this_nic_override[constants.INIC_IP]
11756 if constants.INIC_MAC in this_nic_override:
11757 mac = this_nic_override[constants.INIC_MAC]
11760 if idx in self.nic_pnew:
11761 nicparams = self.nic_pnew[idx]
11763 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11764 mode = nicparams[constants.NIC_MODE]
11765 link = nicparams[constants.NIC_LINK]
11766 args["nics"].append((ip, mac, mode, link))
11767 if constants.DDM_ADD in nic_override:
11768 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11769 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11770 nicparams = self.nic_pnew[constants.DDM_ADD]
11771 mode = nicparams[constants.NIC_MODE]
11772 link = nicparams[constants.NIC_LINK]
11773 args["nics"].append((ip, mac, mode, link))
11774 elif constants.DDM_REMOVE in nic_override:
11775 del args["nics"][-1]
11777 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11778 if self.op.disk_template:
11779 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11780 if self.op.runtime_mem:
11781 env["RUNTIME_MEMORY"] = self.op.runtime_mem
11785 def BuildHooksNodes(self):
11786 """Build hooks nodes.
11789 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11792 def CheckPrereq(self):
11793 """Check prerequisites.
11795 This only checks the instance list against the existing names.
11798 # checking the new params on the primary/secondary nodes
11800 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11801 cluster = self.cluster = self.cfg.GetClusterInfo()
11802 assert self.instance is not None, \
11803 "Cannot retrieve locked instance %s" % self.op.instance_name
11804 pnode = instance.primary_node
11805 nodelist = list(instance.all_nodes)
11806 pnode_info = self.cfg.GetNodeInfo(pnode)
11807 self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
11810 if self.op.os_name and not self.op.force:
11811 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11812 self.op.force_variant)
11813 instance_os = self.op.os_name
11815 instance_os = instance.os
11817 if self.op.disk_template:
11818 if instance.disk_template == self.op.disk_template:
11819 raise errors.OpPrereqError("Instance already has disk template %s" %
11820 instance.disk_template, errors.ECODE_INVAL)
11822 if (instance.disk_template,
11823 self.op.disk_template) not in self._DISK_CONVERSIONS:
11824 raise errors.OpPrereqError("Unsupported disk template conversion from"
11825 " %s to %s" % (instance.disk_template,
11826 self.op.disk_template),
11827 errors.ECODE_INVAL)
11828 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11829 msg="cannot change disk template")
11830 if self.op.disk_template in constants.DTS_INT_MIRROR:
11831 if self.op.remote_node == pnode:
11832 raise errors.OpPrereqError("Given new secondary node %s is the same"
11833 " as the primary node of the instance" %
11834 self.op.remote_node, errors.ECODE_STATE)
11835 _CheckNodeOnline(self, self.op.remote_node)
11836 _CheckNodeNotDrained(self, self.op.remote_node)
11837 # FIXME: here we assume that the old instance type is DT_PLAIN
11838 assert instance.disk_template == constants.DT_PLAIN
11839 disks = [{constants.IDISK_SIZE: d.size,
11840 constants.IDISK_VG: d.logical_id[0]}
11841 for d in instance.disks]
11842 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11843 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11845 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
11846 snode_group = self.cfg.GetNodeGroup(snode_info.group)
11847 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
11848 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
11849 ignore=self.op.ignore_ipolicy)
11850 if pnode_info.group != snode_info.group:
11851 self.LogWarning("The primary and secondary nodes are in two"
11852 " different node groups; the disk parameters"
11853 " from the first disk's node group will be"
11856 # hvparams processing
11857 if self.op.hvparams:
11858 hv_type = instance.hypervisor
11859 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11860 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11861 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11864 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11865 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11866 self.hv_proposed = self.hv_new = hv_new # the new actual values
11867 self.hv_inst = i_hvdict # the new dict (without defaults)
11869 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11871 self.hv_new = self.hv_inst = {}
11873 # beparams processing
11874 if self.op.beparams:
11875 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11877 objects.UpgradeBeParams(i_bedict)
11878 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11879 be_new = cluster.SimpleFillBE(i_bedict)
11880 self.be_proposed = self.be_new = be_new # the new actual values
11881 self.be_inst = i_bedict # the new dict (without defaults)
11883 self.be_new = self.be_inst = {}
11884 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11885 be_old = cluster.FillBE(instance)
11887 # CPU param validation -- checking every time a paramtere is
11888 # changed to cover all cases where either CPU mask or vcpus have
11890 if (constants.BE_VCPUS in self.be_proposed and
11891 constants.HV_CPU_MASK in self.hv_proposed):
11893 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11894 # Verify mask is consistent with number of vCPUs. Can skip this
11895 # test if only 1 entry in the CPU mask, which means same mask
11896 # is applied to all vCPUs.
11897 if (len(cpu_list) > 1 and
11898 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11899 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11901 (self.be_proposed[constants.BE_VCPUS],
11902 self.hv_proposed[constants.HV_CPU_MASK]),
11903 errors.ECODE_INVAL)
11905 # Only perform this test if a new CPU mask is given
11906 if constants.HV_CPU_MASK in self.hv_new:
11907 # Calculate the largest CPU number requested
11908 max_requested_cpu = max(map(max, cpu_list))
11909 # Check that all of the instance's nodes have enough physical CPUs to
11910 # satisfy the requested CPU mask
11911 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11912 max_requested_cpu + 1, instance.hypervisor)
11914 # osparams processing
11915 if self.op.osparams:
11916 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11917 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11918 self.os_inst = i_osdict # the new dict (without defaults)
11924 #TODO(dynmem): do the appropriate check involving MINMEM
11925 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
11926 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
11927 mem_check_list = [pnode]
11928 if be_new[constants.BE_AUTO_BALANCE]:
11929 # either we changed auto_balance to yes or it was from before
11930 mem_check_list.extend(instance.secondary_nodes)
11931 instance_info = self.rpc.call_instance_info(pnode, instance.name,
11932 instance.hypervisor)
11933 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11934 [instance.hypervisor])
11935 pninfo = nodeinfo[pnode]
11936 msg = pninfo.fail_msg
11938 # Assume the primary node is unreachable and go ahead
11939 self.warn.append("Can't get info from primary node %s: %s" %
11942 (_, _, (pnhvinfo, )) = pninfo.payload
11943 if not isinstance(pnhvinfo.get("memory_free", None), int):
11944 self.warn.append("Node data from primary node %s doesn't contain"
11945 " free memory information" % pnode)
11946 elif instance_info.fail_msg:
11947 self.warn.append("Can't get instance runtime information: %s" %
11948 instance_info.fail_msg)
11950 if instance_info.payload:
11951 current_mem = int(instance_info.payload["memory"])
11953 # Assume instance not running
11954 # (there is a slight race condition here, but it's not very
11955 # probable, and we have no other way to check)
11956 # TODO: Describe race condition
11958 #TODO(dynmem): do the appropriate check involving MINMEM
11959 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
11960 pnhvinfo["memory_free"])
11962 raise errors.OpPrereqError("This change will prevent the instance"
11963 " from starting, due to %d MB of memory"
11964 " missing on its primary node" %
11966 errors.ECODE_NORES)
11968 if be_new[constants.BE_AUTO_BALANCE]:
11969 for node, nres in nodeinfo.items():
11970 if node not in instance.secondary_nodes:
11972 nres.Raise("Can't get info from secondary node %s" % node,
11973 prereq=True, ecode=errors.ECODE_STATE)
11974 (_, _, (nhvinfo, )) = nres.payload
11975 if not isinstance(nhvinfo.get("memory_free", None), int):
11976 raise errors.OpPrereqError("Secondary node %s didn't return free"
11977 " memory information" % node,
11978 errors.ECODE_STATE)
11979 #TODO(dynmem): do the appropriate check involving MINMEM
11980 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
11981 raise errors.OpPrereqError("This change will prevent the instance"
11982 " from failover to its secondary node"
11983 " %s, due to not enough memory" % node,
11984 errors.ECODE_STATE)
11986 if self.op.runtime_mem:
11987 remote_info = self.rpc.call_instance_info(instance.primary_node,
11989 instance.hypervisor)
11990 remote_info.Raise("Error checking node %s" % instance.primary_node)
11991 if not remote_info.payload: # not running already
11992 raise errors.OpPrereqError("Instance %s is not running" % instance.name,
11993 errors.ECODE_STATE)
11995 current_memory = remote_info.payload["memory"]
11996 if (not self.op.force and
11997 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
11998 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
11999 raise errors.OpPrereqError("Instance %s must have memory between %d"
12000 " and %d MB of memory unless --force is"
12001 " given" % (instance.name,
12002 self.be_proposed[constants.BE_MINMEM],
12003 self.be_proposed[constants.BE_MAXMEM]),
12004 errors.ECODE_INVAL)
12006 if self.op.runtime_mem > current_memory:
12007 _CheckNodeFreeMemory(self, instance.primary_node,
12008 "ballooning memory for instance %s" %
12010 self.op.memory - current_memory,
12011 instance.hypervisor)
12015 self.nic_pinst = {}
12016 for nic_op, nic_dict in self.op.nics:
12017 if nic_op == constants.DDM_REMOVE:
12018 if not instance.nics:
12019 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
12020 errors.ECODE_INVAL)
12022 if nic_op != constants.DDM_ADD:
12024 if not instance.nics:
12025 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
12026 " no NICs" % nic_op,
12027 errors.ECODE_INVAL)
12028 if nic_op < 0 or nic_op >= len(instance.nics):
12029 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
12031 (nic_op, len(instance.nics) - 1),
12032 errors.ECODE_INVAL)
12033 old_nic_params = instance.nics[nic_op].nicparams
12034 old_nic_ip = instance.nics[nic_op].ip
12036 old_nic_params = {}
12039 update_params_dict = dict([(key, nic_dict[key])
12040 for key in constants.NICS_PARAMETERS
12041 if key in nic_dict])
12043 if "bridge" in nic_dict:
12044 update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
12046 new_nic_params = _GetUpdatedParams(old_nic_params,
12047 update_params_dict)
12048 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
12049 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
12050 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
12051 self.nic_pinst[nic_op] = new_nic_params
12052 self.nic_pnew[nic_op] = new_filled_nic_params
12053 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
12055 if new_nic_mode == constants.NIC_MODE_BRIDGED:
12056 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
12057 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
12059 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
12061 self.warn.append(msg)
12063 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12064 if new_nic_mode == constants.NIC_MODE_ROUTED:
12065 if constants.INIC_IP in nic_dict:
12066 nic_ip = nic_dict[constants.INIC_IP]
12068 nic_ip = old_nic_ip
12070 raise errors.OpPrereqError("Cannot set the nic ip to None"
12071 " on a routed nic", errors.ECODE_INVAL)
12072 if constants.INIC_MAC in nic_dict:
12073 nic_mac = nic_dict[constants.INIC_MAC]
12074 if nic_mac is None:
12075 raise errors.OpPrereqError("Cannot set the nic mac to None",
12076 errors.ECODE_INVAL)
12077 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12078 # otherwise generate the mac
12079 nic_dict[constants.INIC_MAC] = \
12080 self.cfg.GenerateMAC(self.proc.GetECId())
12082 # or validate/reserve the current one
12084 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
12085 except errors.ReservationError:
12086 raise errors.OpPrereqError("MAC address %s already in use"
12087 " in cluster" % nic_mac,
12088 errors.ECODE_NOTUNIQUE)
12091 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12092 raise errors.OpPrereqError("Disk operations not supported for"
12093 " diskless instances",
12094 errors.ECODE_INVAL)
12095 for disk_op, _ in self.op.disks:
12096 if disk_op == constants.DDM_REMOVE:
12097 if len(instance.disks) == 1:
12098 raise errors.OpPrereqError("Cannot remove the last disk of"
12099 " an instance", errors.ECODE_INVAL)
12100 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12101 msg="cannot remove disks")
12103 if (disk_op == constants.DDM_ADD and
12104 len(instance.disks) >= constants.MAX_DISKS):
12105 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
12106 " add more" % constants.MAX_DISKS,
12107 errors.ECODE_STATE)
12108 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
12110 if disk_op < 0 or disk_op >= len(instance.disks):
12111 raise errors.OpPrereqError("Invalid disk index %s, valid values"
12113 (disk_op, len(instance.disks)),
12114 errors.ECODE_INVAL)
12116 # disabling the instance
12117 if self.op.offline_inst:
12118 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12119 msg="cannot change instance state to offline")
12121 # enabling the instance
12122 if self.op.online_inst:
12123 _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
12124 msg="cannot make instance go online")
12126 def _ConvertPlainToDrbd(self, feedback_fn):
12127 """Converts an instance from plain to drbd.
12130 feedback_fn("Converting template to drbd")
12131 instance = self.instance
12132 pnode = instance.primary_node
12133 snode = self.op.remote_node
12135 assert instance.disk_template == constants.DT_PLAIN
12137 # create a fake disk info for _GenerateDiskTemplate
12138 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12139 constants.IDISK_VG: d.logical_id[0]}
12140 for d in instance.disks]
12141 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12142 instance.name, pnode, [snode],
12143 disk_info, None, None, 0, feedback_fn,
12145 info = _GetInstanceInfoText(instance)
12146 feedback_fn("Creating aditional volumes...")
12147 # first, create the missing data and meta devices
12148 for disk in new_disks:
12149 # unfortunately this is... not too nice
12150 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12152 for child in disk.children:
12153 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12154 # at this stage, all new LVs have been created, we can rename the
12156 feedback_fn("Renaming original volumes...")
12157 rename_list = [(o, n.children[0].logical_id)
12158 for (o, n) in zip(instance.disks, new_disks)]
12159 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12160 result.Raise("Failed to rename original LVs")
12162 feedback_fn("Initializing DRBD devices...")
12163 # all child devices are in place, we can now create the DRBD devices
12164 for disk in new_disks:
12165 for node in [pnode, snode]:
12166 f_create = node == pnode
12167 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12169 # at this point, the instance has been modified
12170 instance.disk_template = constants.DT_DRBD8
12171 instance.disks = new_disks
12172 self.cfg.Update(instance, feedback_fn)
12174 # Release node locks while waiting for sync
12175 _ReleaseLocks(self, locking.LEVEL_NODE)
12177 # disks are created, waiting for sync
12178 disk_abort = not _WaitForSync(self, instance,
12179 oneshot=not self.op.wait_for_sync)
12181 raise errors.OpExecError("There are some degraded disks for"
12182 " this instance, please cleanup manually")
12184 # Node resource locks will be released by caller
12186 def _ConvertDrbdToPlain(self, feedback_fn):
12187 """Converts an instance from drbd to plain.
12190 instance = self.instance
12192 assert len(instance.secondary_nodes) == 1
12193 assert instance.disk_template == constants.DT_DRBD8
12195 pnode = instance.primary_node
12196 snode = instance.secondary_nodes[0]
12197 feedback_fn("Converting template to plain")
12199 old_disks = instance.disks
12200 new_disks = [d.children[0] for d in old_disks]
12202 # copy over size and mode
12203 for parent, child in zip(old_disks, new_disks):
12204 child.size = parent.size
12205 child.mode = parent.mode
12207 # update instance structure
12208 instance.disks = new_disks
12209 instance.disk_template = constants.DT_PLAIN
12210 self.cfg.Update(instance, feedback_fn)
12212 # Release locks in case removing disks takes a while
12213 _ReleaseLocks(self, locking.LEVEL_NODE)
12215 feedback_fn("Removing volumes on the secondary node...")
12216 for disk in old_disks:
12217 self.cfg.SetDiskID(disk, snode)
12218 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12220 self.LogWarning("Could not remove block device %s on node %s,"
12221 " continuing anyway: %s", disk.iv_name, snode, msg)
12223 feedback_fn("Removing unneeded volumes on the primary node...")
12224 for idx, disk in enumerate(old_disks):
12225 meta = disk.children[1]
12226 self.cfg.SetDiskID(meta, pnode)
12227 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12229 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12230 " continuing anyway: %s", idx, pnode, msg)
12232 # this is a DRBD disk, return its port to the pool
12233 for disk in old_disks:
12234 tcp_port = disk.logical_id[2]
12235 self.cfg.AddTcpUdpPort(tcp_port)
12237 # Node resource locks will be released by caller
12239 def Exec(self, feedback_fn):
12240 """Modifies an instance.
12242 All parameters take effect only at the next restart of the instance.
12245 # Process here the warnings from CheckPrereq, as we don't have a
12246 # feedback_fn there.
12247 for warn in self.warn:
12248 feedback_fn("WARNING: %s" % warn)
12250 assert ((self.op.disk_template is None) ^
12251 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12252 "Not owning any node resource locks"
12255 instance = self.instance
12258 if self.op.runtime_mem:
12259 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12261 self.op.runtime_mem)
12262 rpcres.Raise("Cannot modify instance runtime memory")
12263 result.append(("runtime_memory", self.op.runtime_mem))
12266 for disk_op, disk_dict in self.op.disks:
12267 if disk_op == constants.DDM_REMOVE:
12268 # remove the last disk
12269 device = instance.disks.pop()
12270 device_idx = len(instance.disks)
12271 for node, disk in device.ComputeNodeTree(instance.primary_node):
12272 self.cfg.SetDiskID(disk, node)
12273 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12275 self.LogWarning("Could not remove disk/%d on node %s: %s,"
12276 " continuing anyway", device_idx, node, msg)
12277 result.append(("disk/%d" % device_idx, "remove"))
12279 # if this is a DRBD disk, return its port to the pool
12280 if device.dev_type in constants.LDS_DRBD:
12281 tcp_port = device.logical_id[2]
12282 self.cfg.AddTcpUdpPort(tcp_port)
12283 elif disk_op == constants.DDM_ADD:
12285 if instance.disk_template in (constants.DT_FILE,
12286 constants.DT_SHARED_FILE):
12287 file_driver, file_path = instance.disks[0].logical_id
12288 file_path = os.path.dirname(file_path)
12290 file_driver = file_path = None
12291 disk_idx_base = len(instance.disks)
12292 new_disk = _GenerateDiskTemplate(self,
12293 instance.disk_template,
12294 instance.name, instance.primary_node,
12295 instance.secondary_nodes,
12301 self.diskparams)[0]
12302 instance.disks.append(new_disk)
12303 info = _GetInstanceInfoText(instance)
12305 logging.info("Creating volume %s for instance %s",
12306 new_disk.iv_name, instance.name)
12307 # Note: this needs to be kept in sync with _CreateDisks
12309 for node in instance.all_nodes:
12310 f_create = node == instance.primary_node
12312 _CreateBlockDev(self, node, instance, new_disk,
12313 f_create, info, f_create)
12314 except errors.OpExecError, err:
12315 self.LogWarning("Failed to create volume %s (%s) on"
12317 new_disk.iv_name, new_disk, node, err)
12318 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
12319 (new_disk.size, new_disk.mode)))
12321 # change a given disk
12322 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
12323 result.append(("disk.mode/%d" % disk_op,
12324 disk_dict[constants.IDISK_MODE]))
12326 if self.op.disk_template:
12328 check_nodes = set(instance.all_nodes)
12329 if self.op.remote_node:
12330 check_nodes.add(self.op.remote_node)
12331 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12332 owned = self.owned_locks(level)
12333 assert not (check_nodes - owned), \
12334 ("Not owning the correct locks, owning %r, expected at least %r" %
12335 (owned, check_nodes))
12337 r_shut = _ShutdownInstanceDisks(self, instance)
12339 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12340 " proceed with disk template conversion")
12341 mode = (instance.disk_template, self.op.disk_template)
12343 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12345 self.cfg.ReleaseDRBDMinors(instance.name)
12347 result.append(("disk_template", self.op.disk_template))
12349 assert instance.disk_template == self.op.disk_template, \
12350 ("Expected disk template '%s', found '%s'" %
12351 (self.op.disk_template, instance.disk_template))
12353 # Release node and resource locks if there are any (they might already have
12354 # been released during disk conversion)
12355 _ReleaseLocks(self, locking.LEVEL_NODE)
12356 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12359 for nic_op, nic_dict in self.op.nics:
12360 if nic_op == constants.DDM_REMOVE:
12361 # remove the last nic
12362 del instance.nics[-1]
12363 result.append(("nic.%d" % len(instance.nics), "remove"))
12364 elif nic_op == constants.DDM_ADD:
12365 # mac and bridge should be set, by now
12366 mac = nic_dict[constants.INIC_MAC]
12367 ip = nic_dict.get(constants.INIC_IP, None)
12368 nicparams = self.nic_pinst[constants.DDM_ADD]
12369 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
12370 instance.nics.append(new_nic)
12371 result.append(("nic.%d" % (len(instance.nics) - 1),
12372 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12373 (new_nic.mac, new_nic.ip,
12374 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
12375 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
12378 for key in (constants.INIC_MAC, constants.INIC_IP):
12379 if key in nic_dict:
12380 setattr(instance.nics[nic_op], key, nic_dict[key])
12381 if nic_op in self.nic_pinst:
12382 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
12383 for key, val in nic_dict.iteritems():
12384 result.append(("nic.%s/%d" % (key, nic_op), val))
12387 if self.op.hvparams:
12388 instance.hvparams = self.hv_inst
12389 for key, val in self.op.hvparams.iteritems():
12390 result.append(("hv/%s" % key, val))
12393 if self.op.beparams:
12394 instance.beparams = self.be_inst
12395 for key, val in self.op.beparams.iteritems():
12396 result.append(("be/%s" % key, val))
12399 if self.op.os_name:
12400 instance.os = self.op.os_name
12403 if self.op.osparams:
12404 instance.osparams = self.os_inst
12405 for key, val in self.op.osparams.iteritems():
12406 result.append(("os/%s" % key, val))
12408 # online/offline instance
12409 if self.op.online_inst:
12410 self.cfg.MarkInstanceDown(instance.name)
12411 result.append(("admin_state", constants.ADMINST_DOWN))
12412 if self.op.offline_inst:
12413 self.cfg.MarkInstanceOffline(instance.name)
12414 result.append(("admin_state", constants.ADMINST_OFFLINE))
12416 self.cfg.Update(instance, feedback_fn)
12418 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12419 self.owned_locks(locking.LEVEL_NODE)), \
12420 "All node locks should have been released by now"
12424 _DISK_CONVERSIONS = {
12425 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12426 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12430 class LUInstanceChangeGroup(LogicalUnit):
12431 HPATH = "instance-change-group"
12432 HTYPE = constants.HTYPE_INSTANCE
12435 def ExpandNames(self):
12436 self.share_locks = _ShareAll()
12437 self.needed_locks = {
12438 locking.LEVEL_NODEGROUP: [],
12439 locking.LEVEL_NODE: [],
12442 self._ExpandAndLockInstance()
12444 if self.op.target_groups:
12445 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12446 self.op.target_groups)
12448 self.req_target_uuids = None
12450 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12452 def DeclareLocks(self, level):
12453 if level == locking.LEVEL_NODEGROUP:
12454 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12456 if self.req_target_uuids:
12457 lock_groups = set(self.req_target_uuids)
12459 # Lock all groups used by instance optimistically; this requires going
12460 # via the node before it's locked, requiring verification later on
12461 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12462 lock_groups.update(instance_groups)
12464 # No target groups, need to lock all of them
12465 lock_groups = locking.ALL_SET
12467 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12469 elif level == locking.LEVEL_NODE:
12470 if self.req_target_uuids:
12471 # Lock all nodes used by instances
12472 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12473 self._LockInstancesNodes()
12475 # Lock all nodes in all potential target groups
12476 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12477 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12478 member_nodes = [node_name
12479 for group in lock_groups
12480 for node_name in self.cfg.GetNodeGroup(group).members]
12481 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12483 # Lock all nodes as all groups are potential targets
12484 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12486 def CheckPrereq(self):
12487 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12488 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12489 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12491 assert (self.req_target_uuids is None or
12492 owned_groups.issuperset(self.req_target_uuids))
12493 assert owned_instances == set([self.op.instance_name])
12495 # Get instance information
12496 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12498 # Check if node groups for locked instance are still correct
12499 assert owned_nodes.issuperset(self.instance.all_nodes), \
12500 ("Instance %s's nodes changed while we kept the lock" %
12501 self.op.instance_name)
12503 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12506 if self.req_target_uuids:
12507 # User requested specific target groups
12508 self.target_uuids = self.req_target_uuids
12510 # All groups except those used by the instance are potential targets
12511 self.target_uuids = owned_groups - inst_groups
12513 conflicting_groups = self.target_uuids & inst_groups
12514 if conflicting_groups:
12515 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12516 " used by the instance '%s'" %
12517 (utils.CommaJoin(conflicting_groups),
12518 self.op.instance_name),
12519 errors.ECODE_INVAL)
12521 if not self.target_uuids:
12522 raise errors.OpPrereqError("There are no possible target groups",
12523 errors.ECODE_INVAL)
12525 def BuildHooksEnv(self):
12526 """Build hooks env.
12529 assert self.target_uuids
12532 "TARGET_GROUPS": " ".join(self.target_uuids),
12535 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12539 def BuildHooksNodes(self):
12540 """Build hooks nodes.
12543 mn = self.cfg.GetMasterNode()
12544 return ([mn], [mn])
12546 def Exec(self, feedback_fn):
12547 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12549 assert instances == [self.op.instance_name], "Instance not locked"
12551 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12552 instances=instances, target_groups=list(self.target_uuids))
12554 ial.Run(self.op.iallocator)
12556 if not ial.success:
12557 raise errors.OpPrereqError("Can't compute solution for changing group of"
12558 " instance '%s' using iallocator '%s': %s" %
12559 (self.op.instance_name, self.op.iallocator,
12561 errors.ECODE_NORES)
12563 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12565 self.LogInfo("Iallocator returned %s job(s) for changing group of"
12566 " instance '%s'", len(jobs), self.op.instance_name)
12568 return ResultWithJobs(jobs)
12571 class LUBackupQuery(NoHooksLU):
12572 """Query the exports list
12577 def ExpandNames(self):
12578 self.needed_locks = {}
12579 self.share_locks[locking.LEVEL_NODE] = 1
12580 if not self.op.nodes:
12581 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12583 self.needed_locks[locking.LEVEL_NODE] = \
12584 _GetWantedNodes(self, self.op.nodes)
12586 def Exec(self, feedback_fn):
12587 """Compute the list of all the exported system images.
12590 @return: a dictionary with the structure node->(export-list)
12591 where export-list is a list of the instances exported on
12595 self.nodes = self.owned_locks(locking.LEVEL_NODE)
12596 rpcresult = self.rpc.call_export_list(self.nodes)
12598 for node in rpcresult:
12599 if rpcresult[node].fail_msg:
12600 result[node] = False
12602 result[node] = rpcresult[node].payload
12607 class LUBackupPrepare(NoHooksLU):
12608 """Prepares an instance for an export and returns useful information.
12613 def ExpandNames(self):
12614 self._ExpandAndLockInstance()
12616 def CheckPrereq(self):
12617 """Check prerequisites.
12620 instance_name = self.op.instance_name
12622 self.instance = self.cfg.GetInstanceInfo(instance_name)
12623 assert self.instance is not None, \
12624 "Cannot retrieve locked instance %s" % self.op.instance_name
12625 _CheckNodeOnline(self, self.instance.primary_node)
12627 self._cds = _GetClusterDomainSecret()
12629 def Exec(self, feedback_fn):
12630 """Prepares an instance for an export.
12633 instance = self.instance
12635 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12636 salt = utils.GenerateSecret(8)
12638 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
12639 result = self.rpc.call_x509_cert_create(instance.primary_node,
12640 constants.RIE_CERT_VALIDITY)
12641 result.Raise("Can't create X509 key and certificate on %s" % result.node)
12643 (name, cert_pem) = result.payload
12645 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
12649 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
12650 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
12652 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
12658 class LUBackupExport(LogicalUnit):
12659 """Export an instance to an image in the cluster.
12662 HPATH = "instance-export"
12663 HTYPE = constants.HTYPE_INSTANCE
12666 def CheckArguments(self):
12667 """Check the arguments.
12670 self.x509_key_name = self.op.x509_key_name
12671 self.dest_x509_ca_pem = self.op.destination_x509_ca
12673 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12674 if not self.x509_key_name:
12675 raise errors.OpPrereqError("Missing X509 key name for encryption",
12676 errors.ECODE_INVAL)
12678 if not self.dest_x509_ca_pem:
12679 raise errors.OpPrereqError("Missing destination X509 CA",
12680 errors.ECODE_INVAL)
12682 def ExpandNames(self):
12683 self._ExpandAndLockInstance()
12685 # Lock all nodes for local exports
12686 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12687 # FIXME: lock only instance primary and destination node
12689 # Sad but true, for now we have do lock all nodes, as we don't know where
12690 # the previous export might be, and in this LU we search for it and
12691 # remove it from its current node. In the future we could fix this by:
12692 # - making a tasklet to search (share-lock all), then create the
12693 # new one, then one to remove, after
12694 # - removing the removal operation altogether
12695 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12697 def DeclareLocks(self, level):
12698 """Last minute lock declaration."""
12699 # All nodes are locked anyway, so nothing to do here.
12701 def BuildHooksEnv(self):
12702 """Build hooks env.
12704 This will run on the master, primary node and target node.
12708 "EXPORT_MODE": self.op.mode,
12709 "EXPORT_NODE": self.op.target_node,
12710 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12711 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12712 # TODO: Generic function for boolean env variables
12713 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12716 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12720 def BuildHooksNodes(self):
12721 """Build hooks nodes.
12724 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12726 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12727 nl.append(self.op.target_node)
12731 def CheckPrereq(self):
12732 """Check prerequisites.
12734 This checks that the instance and node names are valid.
12737 instance_name = self.op.instance_name
12739 self.instance = self.cfg.GetInstanceInfo(instance_name)
12740 assert self.instance is not None, \
12741 "Cannot retrieve locked instance %s" % self.op.instance_name
12742 _CheckNodeOnline(self, self.instance.primary_node)
12744 if (self.op.remove_instance and
12745 self.instance.admin_state == constants.ADMINST_UP and
12746 not self.op.shutdown):
12747 raise errors.OpPrereqError("Can not remove instance without shutting it"
12750 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12751 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12752 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12753 assert self.dst_node is not None
12755 _CheckNodeOnline(self, self.dst_node.name)
12756 _CheckNodeNotDrained(self, self.dst_node.name)
12759 self.dest_disk_info = None
12760 self.dest_x509_ca = None
12762 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12763 self.dst_node = None
12765 if len(self.op.target_node) != len(self.instance.disks):
12766 raise errors.OpPrereqError(("Received destination information for %s"
12767 " disks, but instance %s has %s disks") %
12768 (len(self.op.target_node), instance_name,
12769 len(self.instance.disks)),
12770 errors.ECODE_INVAL)
12772 cds = _GetClusterDomainSecret()
12774 # Check X509 key name
12776 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12777 except (TypeError, ValueError), err:
12778 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12780 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12781 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12782 errors.ECODE_INVAL)
12784 # Load and verify CA
12786 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
12787 except OpenSSL.crypto.Error, err:
12788 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
12789 (err, ), errors.ECODE_INVAL)
12791 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
12792 if errcode is not None:
12793 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
12794 (msg, ), errors.ECODE_INVAL)
12796 self.dest_x509_ca = cert
12798 # Verify target information
12800 for idx, disk_data in enumerate(self.op.target_node):
12802 (host, port, magic) = \
12803 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
12804 except errors.GenericError, err:
12805 raise errors.OpPrereqError("Target info for disk %s: %s" %
12806 (idx, err), errors.ECODE_INVAL)
12808 disk_info.append((host, port, magic))
12810 assert len(disk_info) == len(self.op.target_node)
12811 self.dest_disk_info = disk_info
12814 raise errors.ProgrammerError("Unhandled export mode %r" %
12817 # instance disk type verification
12818 # TODO: Implement export support for file-based disks
12819 for disk in self.instance.disks:
12820 if disk.dev_type == constants.LD_FILE:
12821 raise errors.OpPrereqError("Export not supported for instances with"
12822 " file-based disks", errors.ECODE_INVAL)
12824 def _CleanupExports(self, feedback_fn):
12825 """Removes exports of current instance from all other nodes.
12827 If an instance in a cluster with nodes A..D was exported to node C, its
12828 exports will be removed from the nodes A, B and D.
12831 assert self.op.mode != constants.EXPORT_MODE_REMOTE
12833 nodelist = self.cfg.GetNodeList()
12834 nodelist.remove(self.dst_node.name)
12836 # on one-node clusters nodelist will be empty after the removal
12837 # if we proceed the backup would be removed because OpBackupQuery
12838 # substitutes an empty list with the full cluster node list.
12839 iname = self.instance.name
12841 feedback_fn("Removing old exports for instance %s" % iname)
12842 exportlist = self.rpc.call_export_list(nodelist)
12843 for node in exportlist:
12844 if exportlist[node].fail_msg:
12846 if iname in exportlist[node].payload:
12847 msg = self.rpc.call_export_remove(node, iname).fail_msg
12849 self.LogWarning("Could not remove older export for instance %s"
12850 " on node %s: %s", iname, node, msg)
12852 def Exec(self, feedback_fn):
12853 """Export an instance to an image in the cluster.
12856 assert self.op.mode in constants.EXPORT_MODES
12858 instance = self.instance
12859 src_node = instance.primary_node
12861 if self.op.shutdown:
12862 # shutdown the instance, but not the disks
12863 feedback_fn("Shutting down instance %s" % instance.name)
12864 result = self.rpc.call_instance_shutdown(src_node, instance,
12865 self.op.shutdown_timeout)
12866 # TODO: Maybe ignore failures if ignore_remove_failures is set
12867 result.Raise("Could not shutdown instance %s on"
12868 " node %s" % (instance.name, src_node))
12870 # set the disks ID correctly since call_instance_start needs the
12871 # correct drbd minor to create the symlinks
12872 for disk in instance.disks:
12873 self.cfg.SetDiskID(disk, src_node)
12875 activate_disks = (instance.admin_state != constants.ADMINST_UP)
12878 # Activate the instance disks if we'exporting a stopped instance
12879 feedback_fn("Activating disks for %s" % instance.name)
12880 _StartInstanceDisks(self, instance, None)
12883 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
12886 helper.CreateSnapshots()
12888 if (self.op.shutdown and
12889 instance.admin_state == constants.ADMINST_UP and
12890 not self.op.remove_instance):
12891 assert not activate_disks
12892 feedback_fn("Starting instance %s" % instance.name)
12893 result = self.rpc.call_instance_start(src_node,
12894 (instance, None, None), False)
12895 msg = result.fail_msg
12897 feedback_fn("Failed to start instance: %s" % msg)
12898 _ShutdownInstanceDisks(self, instance)
12899 raise errors.OpExecError("Could not start instance: %s" % msg)
12901 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12902 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
12903 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12904 connect_timeout = constants.RIE_CONNECT_TIMEOUT
12905 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
12907 (key_name, _, _) = self.x509_key_name
12910 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
12913 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
12914 key_name, dest_ca_pem,
12919 # Check for backwards compatibility
12920 assert len(dresults) == len(instance.disks)
12921 assert compat.all(isinstance(i, bool) for i in dresults), \
12922 "Not all results are boolean: %r" % dresults
12926 feedback_fn("Deactivating disks for %s" % instance.name)
12927 _ShutdownInstanceDisks(self, instance)
12929 if not (compat.all(dresults) and fin_resu):
12932 failures.append("export finalization")
12933 if not compat.all(dresults):
12934 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12936 failures.append("disk export: disk(s) %s" % fdsk)
12938 raise errors.OpExecError("Export failed, errors in %s" %
12939 utils.CommaJoin(failures))
12941 # At this point, the export was successful, we can cleanup/finish
12943 # Remove instance if requested
12944 if self.op.remove_instance:
12945 feedback_fn("Removing instance %s" % instance.name)
12946 _RemoveInstance(self, feedback_fn, instance,
12947 self.op.ignore_remove_failures)
12949 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12950 self._CleanupExports(feedback_fn)
12952 return fin_resu, dresults
12955 class LUBackupRemove(NoHooksLU):
12956 """Remove exports related to the named instance.
12961 def ExpandNames(self):
12962 self.needed_locks = {}
12963 # We need all nodes to be locked in order for RemoveExport to work, but we
12964 # don't need to lock the instance itself, as nothing will happen to it (and
12965 # we can remove exports also for a removed instance)
12966 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12968 def Exec(self, feedback_fn):
12969 """Remove any export.
12972 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12973 # If the instance was not found we'll try with the name that was passed in.
12974 # This will only work if it was an FQDN, though.
12976 if not instance_name:
12978 instance_name = self.op.instance_name
12980 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12981 exportlist = self.rpc.call_export_list(locked_nodes)
12983 for node in exportlist:
12984 msg = exportlist[node].fail_msg
12986 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12988 if instance_name in exportlist[node].payload:
12990 result = self.rpc.call_export_remove(node, instance_name)
12991 msg = result.fail_msg
12993 logging.error("Could not remove export for instance %s"
12994 " on node %s: %s", instance_name, node, msg)
12996 if fqdn_warn and not found:
12997 feedback_fn("Export not found. If trying to remove an export belonging"
12998 " to a deleted instance please use its Fully Qualified"
13002 class LUGroupAdd(LogicalUnit):
13003 """Logical unit for creating node groups.
13006 HPATH = "group-add"
13007 HTYPE = constants.HTYPE_GROUP
13010 def ExpandNames(self):
13011 # We need the new group's UUID here so that we can create and acquire the
13012 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13013 # that it should not check whether the UUID exists in the configuration.
13014 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13015 self.needed_locks = {}
13016 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13018 def CheckPrereq(self):
13019 """Check prerequisites.
13021 This checks that the given group name is not an existing node group
13026 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13027 except errors.OpPrereqError:
13030 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13031 " node group (UUID: %s)" %
13032 (self.op.group_name, existing_uuid),
13033 errors.ECODE_EXISTS)
13035 if self.op.ndparams:
13036 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13038 if self.op.hv_state:
13039 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13041 self.new_hv_state = None
13043 if self.op.disk_state:
13044 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13046 self.new_disk_state = None
13048 if self.op.diskparams:
13049 for templ in constants.DISK_TEMPLATES:
13050 if templ not in self.op.diskparams:
13051 self.op.diskparams[templ] = {}
13052 utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
13054 self.op.diskparams = self.cfg.GetClusterInfo().diskparams
13056 if self.op.ipolicy:
13057 cluster = self.cfg.GetClusterInfo()
13058 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13060 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
13061 except errors.ConfigurationError, err:
13062 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13063 errors.ECODE_INVAL)
13065 def BuildHooksEnv(self):
13066 """Build hooks env.
13070 "GROUP_NAME": self.op.group_name,
13073 def BuildHooksNodes(self):
13074 """Build hooks nodes.
13077 mn = self.cfg.GetMasterNode()
13078 return ([mn], [mn])
13080 def Exec(self, feedback_fn):
13081 """Add the node group to the cluster.
13084 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13085 uuid=self.group_uuid,
13086 alloc_policy=self.op.alloc_policy,
13087 ndparams=self.op.ndparams,
13088 diskparams=self.op.diskparams,
13089 ipolicy=self.op.ipolicy,
13090 hv_state_static=self.new_hv_state,
13091 disk_state_static=self.new_disk_state)
13093 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13094 del self.remove_locks[locking.LEVEL_NODEGROUP]
13097 class LUGroupAssignNodes(NoHooksLU):
13098 """Logical unit for assigning nodes to groups.
13103 def ExpandNames(self):
13104 # These raise errors.OpPrereqError on their own:
13105 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13106 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13108 # We want to lock all the affected nodes and groups. We have readily
13109 # available the list of nodes, and the *destination* group. To gather the
13110 # list of "source" groups, we need to fetch node information later on.
13111 self.needed_locks = {
13112 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13113 locking.LEVEL_NODE: self.op.nodes,
13116 def DeclareLocks(self, level):
13117 if level == locking.LEVEL_NODEGROUP:
13118 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13120 # Try to get all affected nodes' groups without having the group or node
13121 # lock yet. Needs verification later in the code flow.
13122 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13124 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13126 def CheckPrereq(self):
13127 """Check prerequisites.
13130 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13131 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13132 frozenset(self.op.nodes))
13134 expected_locks = (set([self.group_uuid]) |
13135 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13136 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13137 if actual_locks != expected_locks:
13138 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13139 " current groups are '%s', used to be '%s'" %
13140 (utils.CommaJoin(expected_locks),
13141 utils.CommaJoin(actual_locks)))
13143 self.node_data = self.cfg.GetAllNodesInfo()
13144 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13145 instance_data = self.cfg.GetAllInstancesInfo()
13147 if self.group is None:
13148 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13149 (self.op.group_name, self.group_uuid))
13151 (new_splits, previous_splits) = \
13152 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13153 for node in self.op.nodes],
13154 self.node_data, instance_data)
13157 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13159 if not self.op.force:
13160 raise errors.OpExecError("The following instances get split by this"
13161 " change and --force was not given: %s" %
13164 self.LogWarning("This operation will split the following instances: %s",
13167 if previous_splits:
13168 self.LogWarning("In addition, these already-split instances continue"
13169 " to be split across groups: %s",
13170 utils.CommaJoin(utils.NiceSort(previous_splits)))
13172 def Exec(self, feedback_fn):
13173 """Assign nodes to a new group.
13176 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13178 self.cfg.AssignGroupNodes(mods)
13181 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13182 """Check for split instances after a node assignment.
13184 This method considers a series of node assignments as an atomic operation,
13185 and returns information about split instances after applying the set of
13188 In particular, it returns information about newly split instances, and
13189 instances that were already split, and remain so after the change.
13191 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13194 @type changes: list of (node_name, new_group_uuid) pairs.
13195 @param changes: list of node assignments to consider.
13196 @param node_data: a dict with data for all nodes
13197 @param instance_data: a dict with all instances to consider
13198 @rtype: a two-tuple
13199 @return: a list of instances that were previously okay and result split as a
13200 consequence of this change, and a list of instances that were previously
13201 split and this change does not fix.
13204 changed_nodes = dict((node, group) for node, group in changes
13205 if node_data[node].group != group)
13207 all_split_instances = set()
13208 previously_split_instances = set()
13210 def InstanceNodes(instance):
13211 return [instance.primary_node] + list(instance.secondary_nodes)
13213 for inst in instance_data.values():
13214 if inst.disk_template not in constants.DTS_INT_MIRROR:
13217 instance_nodes = InstanceNodes(inst)
13219 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13220 previously_split_instances.add(inst.name)
13222 if len(set(changed_nodes.get(node, node_data[node].group)
13223 for node in instance_nodes)) > 1:
13224 all_split_instances.add(inst.name)
13226 return (list(all_split_instances - previously_split_instances),
13227 list(previously_split_instances & all_split_instances))
13230 class _GroupQuery(_QueryBase):
13231 FIELDS = query.GROUP_FIELDS
13233 def ExpandNames(self, lu):
13234 lu.needed_locks = {}
13236 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13237 self._cluster = lu.cfg.GetClusterInfo()
13238 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13241 self.wanted = [name_to_uuid[name]
13242 for name in utils.NiceSort(name_to_uuid.keys())]
13244 # Accept names to be either names or UUIDs.
13247 all_uuid = frozenset(self._all_groups.keys())
13249 for name in self.names:
13250 if name in all_uuid:
13251 self.wanted.append(name)
13252 elif name in name_to_uuid:
13253 self.wanted.append(name_to_uuid[name])
13255 missing.append(name)
13258 raise errors.OpPrereqError("Some groups do not exist: %s" %
13259 utils.CommaJoin(missing),
13260 errors.ECODE_NOENT)
13262 def DeclareLocks(self, lu, level):
13265 def _GetQueryData(self, lu):
13266 """Computes the list of node groups and their attributes.
13269 do_nodes = query.GQ_NODE in self.requested_data
13270 do_instances = query.GQ_INST in self.requested_data
13272 group_to_nodes = None
13273 group_to_instances = None
13275 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13276 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13277 # latter GetAllInstancesInfo() is not enough, for we have to go through
13278 # instance->node. Hence, we will need to process nodes even if we only need
13279 # instance information.
13280 if do_nodes or do_instances:
13281 all_nodes = lu.cfg.GetAllNodesInfo()
13282 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13285 for node in all_nodes.values():
13286 if node.group in group_to_nodes:
13287 group_to_nodes[node.group].append(node.name)
13288 node_to_group[node.name] = node.group
13291 all_instances = lu.cfg.GetAllInstancesInfo()
13292 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13294 for instance in all_instances.values():
13295 node = instance.primary_node
13296 if node in node_to_group:
13297 group_to_instances[node_to_group[node]].append(instance.name)
13300 # Do not pass on node information if it was not requested.
13301 group_to_nodes = None
13303 return query.GroupQueryData(self._cluster,
13304 [self._all_groups[uuid]
13305 for uuid in self.wanted],
13306 group_to_nodes, group_to_instances)
13309 class LUGroupQuery(NoHooksLU):
13310 """Logical unit for querying node groups.
13315 def CheckArguments(self):
13316 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13317 self.op.output_fields, False)
13319 def ExpandNames(self):
13320 self.gq.ExpandNames(self)
13322 def DeclareLocks(self, level):
13323 self.gq.DeclareLocks(self, level)
13325 def Exec(self, feedback_fn):
13326 return self.gq.OldStyleQuery(self)
13329 class LUGroupSetParams(LogicalUnit):
13330 """Modifies the parameters of a node group.
13333 HPATH = "group-modify"
13334 HTYPE = constants.HTYPE_GROUP
13337 def CheckArguments(self):
13340 self.op.diskparams,
13341 self.op.alloc_policy,
13343 self.op.disk_state,
13347 if all_changes.count(None) == len(all_changes):
13348 raise errors.OpPrereqError("Please pass at least one modification",
13349 errors.ECODE_INVAL)
13351 def ExpandNames(self):
13352 # This raises errors.OpPrereqError on its own:
13353 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13355 self.needed_locks = {
13356 locking.LEVEL_NODEGROUP: [self.group_uuid],
13359 def CheckPrereq(self):
13360 """Check prerequisites.
13363 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13365 if self.group is None:
13366 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13367 (self.op.group_name, self.group_uuid))
13369 if self.op.ndparams:
13370 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13371 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13372 self.new_ndparams = new_ndparams
13374 if self.op.diskparams:
13375 self.new_diskparams = dict()
13376 for templ in constants.DISK_TEMPLATES:
13377 if templ not in self.op.diskparams:
13378 self.op.diskparams[templ] = {}
13379 new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13380 self.op.diskparams[templ])
13381 utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13382 self.new_diskparams[templ] = new_templ_params
13384 if self.op.hv_state:
13385 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13386 self.group.hv_state_static)
13388 if self.op.disk_state:
13389 self.new_disk_state = \
13390 _MergeAndVerifyDiskState(self.op.disk_state,
13391 self.group.disk_state_static)
13393 if self.op.ipolicy:
13394 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
13398 def BuildHooksEnv(self):
13399 """Build hooks env.
13403 "GROUP_NAME": self.op.group_name,
13404 "NEW_ALLOC_POLICY": self.op.alloc_policy,
13407 def BuildHooksNodes(self):
13408 """Build hooks nodes.
13411 mn = self.cfg.GetMasterNode()
13412 return ([mn], [mn])
13414 def Exec(self, feedback_fn):
13415 """Modifies the node group.
13420 if self.op.ndparams:
13421 self.group.ndparams = self.new_ndparams
13422 result.append(("ndparams", str(self.group.ndparams)))
13424 if self.op.diskparams:
13425 self.group.diskparams = self.new_diskparams
13426 result.append(("diskparams", str(self.group.diskparams)))
13428 if self.op.alloc_policy:
13429 self.group.alloc_policy = self.op.alloc_policy
13431 if self.op.hv_state:
13432 self.group.hv_state_static = self.new_hv_state
13434 if self.op.disk_state:
13435 self.group.disk_state_static = self.new_disk_state
13437 if self.op.ipolicy:
13438 self.group.ipolicy = self.new_ipolicy
13440 self.cfg.Update(self.group, feedback_fn)
13444 class LUGroupRemove(LogicalUnit):
13445 HPATH = "group-remove"
13446 HTYPE = constants.HTYPE_GROUP
13449 def ExpandNames(self):
13450 # This will raises errors.OpPrereqError on its own:
13451 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13452 self.needed_locks = {
13453 locking.LEVEL_NODEGROUP: [self.group_uuid],
13456 def CheckPrereq(self):
13457 """Check prerequisites.
13459 This checks that the given group name exists as a node group, that is
13460 empty (i.e., contains no nodes), and that is not the last group of the
13464 # Verify that the group is empty.
13465 group_nodes = [node.name
13466 for node in self.cfg.GetAllNodesInfo().values()
13467 if node.group == self.group_uuid]
13470 raise errors.OpPrereqError("Group '%s' not empty, has the following"
13472 (self.op.group_name,
13473 utils.CommaJoin(utils.NiceSort(group_nodes))),
13474 errors.ECODE_STATE)
13476 # Verify the cluster would not be left group-less.
13477 if len(self.cfg.GetNodeGroupList()) == 1:
13478 raise errors.OpPrereqError("Group '%s' is the only group,"
13479 " cannot be removed" %
13480 self.op.group_name,
13481 errors.ECODE_STATE)
13483 def BuildHooksEnv(self):
13484 """Build hooks env.
13488 "GROUP_NAME": self.op.group_name,
13491 def BuildHooksNodes(self):
13492 """Build hooks nodes.
13495 mn = self.cfg.GetMasterNode()
13496 return ([mn], [mn])
13498 def Exec(self, feedback_fn):
13499 """Remove the node group.
13503 self.cfg.RemoveNodeGroup(self.group_uuid)
13504 except errors.ConfigurationError:
13505 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13506 (self.op.group_name, self.group_uuid))
13508 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13511 class LUGroupRename(LogicalUnit):
13512 HPATH = "group-rename"
13513 HTYPE = constants.HTYPE_GROUP
13516 def ExpandNames(self):
13517 # This raises errors.OpPrereqError on its own:
13518 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13520 self.needed_locks = {
13521 locking.LEVEL_NODEGROUP: [self.group_uuid],
13524 def CheckPrereq(self):
13525 """Check prerequisites.
13527 Ensures requested new name is not yet used.
13531 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13532 except errors.OpPrereqError:
13535 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13536 " node group (UUID: %s)" %
13537 (self.op.new_name, new_name_uuid),
13538 errors.ECODE_EXISTS)
13540 def BuildHooksEnv(self):
13541 """Build hooks env.
13545 "OLD_NAME": self.op.group_name,
13546 "NEW_NAME": self.op.new_name,
13549 def BuildHooksNodes(self):
13550 """Build hooks nodes.
13553 mn = self.cfg.GetMasterNode()
13555 all_nodes = self.cfg.GetAllNodesInfo()
13556 all_nodes.pop(mn, None)
13559 run_nodes.extend(node.name for node in all_nodes.values()
13560 if node.group == self.group_uuid)
13562 return (run_nodes, run_nodes)
13564 def Exec(self, feedback_fn):
13565 """Rename the node group.
13568 group = self.cfg.GetNodeGroup(self.group_uuid)
13571 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13572 (self.op.group_name, self.group_uuid))
13574 group.name = self.op.new_name
13575 self.cfg.Update(group, feedback_fn)
13577 return self.op.new_name
13580 class LUGroupEvacuate(LogicalUnit):
13581 HPATH = "group-evacuate"
13582 HTYPE = constants.HTYPE_GROUP
13585 def ExpandNames(self):
13586 # This raises errors.OpPrereqError on its own:
13587 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13589 if self.op.target_groups:
13590 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13591 self.op.target_groups)
13593 self.req_target_uuids = []
13595 if self.group_uuid in self.req_target_uuids:
13596 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
13597 " as a target group (targets are %s)" %
13599 utils.CommaJoin(self.req_target_uuids)),
13600 errors.ECODE_INVAL)
13602 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13604 self.share_locks = _ShareAll()
13605 self.needed_locks = {
13606 locking.LEVEL_INSTANCE: [],
13607 locking.LEVEL_NODEGROUP: [],
13608 locking.LEVEL_NODE: [],
13611 def DeclareLocks(self, level):
13612 if level == locking.LEVEL_INSTANCE:
13613 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13615 # Lock instances optimistically, needs verification once node and group
13616 # locks have been acquired
13617 self.needed_locks[locking.LEVEL_INSTANCE] = \
13618 self.cfg.GetNodeGroupInstances(self.group_uuid)
13620 elif level == locking.LEVEL_NODEGROUP:
13621 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13623 if self.req_target_uuids:
13624 lock_groups = set([self.group_uuid] + self.req_target_uuids)
13626 # Lock all groups used by instances optimistically; this requires going
13627 # via the node before it's locked, requiring verification later on
13628 lock_groups.update(group_uuid
13629 for instance_name in
13630 self.owned_locks(locking.LEVEL_INSTANCE)
13632 self.cfg.GetInstanceNodeGroups(instance_name))
13634 # No target groups, need to lock all of them
13635 lock_groups = locking.ALL_SET
13637 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13639 elif level == locking.LEVEL_NODE:
13640 # This will only lock the nodes in the group to be evacuated which
13641 # contain actual instances
13642 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13643 self._LockInstancesNodes()
13645 # Lock all nodes in group to be evacuated and target groups
13646 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13647 assert self.group_uuid in owned_groups
13648 member_nodes = [node_name
13649 for group in owned_groups
13650 for node_name in self.cfg.GetNodeGroup(group).members]
13651 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13653 def CheckPrereq(self):
13654 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13655 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13656 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13658 assert owned_groups.issuperset(self.req_target_uuids)
13659 assert self.group_uuid in owned_groups
13661 # Check if locked instances are still correct
13662 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13664 # Get instance information
13665 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
13667 # Check if node groups for locked instances are still correct
13668 for instance_name in owned_instances:
13669 inst = self.instances[instance_name]
13670 assert owned_nodes.issuperset(inst.all_nodes), \
13671 "Instance %s's nodes changed while we kept the lock" % instance_name
13673 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
13676 assert self.group_uuid in inst_groups, \
13677 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
13679 if self.req_target_uuids:
13680 # User requested specific target groups
13681 self.target_uuids = self.req_target_uuids
13683 # All groups except the one to be evacuated are potential targets
13684 self.target_uuids = [group_uuid for group_uuid in owned_groups
13685 if group_uuid != self.group_uuid]
13687 if not self.target_uuids:
13688 raise errors.OpPrereqError("There are no possible target groups",
13689 errors.ECODE_INVAL)
13691 def BuildHooksEnv(self):
13692 """Build hooks env.
13696 "GROUP_NAME": self.op.group_name,
13697 "TARGET_GROUPS": " ".join(self.target_uuids),
13700 def BuildHooksNodes(self):
13701 """Build hooks nodes.
13704 mn = self.cfg.GetMasterNode()
13706 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
13708 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
13710 return (run_nodes, run_nodes)
13712 def Exec(self, feedback_fn):
13713 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13715 assert self.group_uuid not in self.target_uuids
13717 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13718 instances=instances, target_groups=self.target_uuids)
13720 ial.Run(self.op.iallocator)
13722 if not ial.success:
13723 raise errors.OpPrereqError("Can't compute group evacuation using"
13724 " iallocator '%s': %s" %
13725 (self.op.iallocator, ial.info),
13726 errors.ECODE_NORES)
13728 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13730 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
13731 len(jobs), self.op.group_name)
13733 return ResultWithJobs(jobs)
13736 class TagsLU(NoHooksLU): # pylint: disable=W0223
13737 """Generic tags LU.
13739 This is an abstract class which is the parent of all the other tags LUs.
13742 def ExpandNames(self):
13743 self.group_uuid = None
13744 self.needed_locks = {}
13745 if self.op.kind == constants.TAG_NODE:
13746 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
13747 self.needed_locks[locking.LEVEL_NODE] = self.op.name
13748 elif self.op.kind == constants.TAG_INSTANCE:
13749 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
13750 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
13751 elif self.op.kind == constants.TAG_NODEGROUP:
13752 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
13754 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
13755 # not possible to acquire the BGL based on opcode parameters)
13757 def CheckPrereq(self):
13758 """Check prerequisites.
13761 if self.op.kind == constants.TAG_CLUSTER:
13762 self.target = self.cfg.GetClusterInfo()
13763 elif self.op.kind == constants.TAG_NODE:
13764 self.target = self.cfg.GetNodeInfo(self.op.name)
13765 elif self.op.kind == constants.TAG_INSTANCE:
13766 self.target = self.cfg.GetInstanceInfo(self.op.name)
13767 elif self.op.kind == constants.TAG_NODEGROUP:
13768 self.target = self.cfg.GetNodeGroup(self.group_uuid)
13770 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
13771 str(self.op.kind), errors.ECODE_INVAL)
13774 class LUTagsGet(TagsLU):
13775 """Returns the tags of a given object.
13780 def ExpandNames(self):
13781 TagsLU.ExpandNames(self)
13783 # Share locks as this is only a read operation
13784 self.share_locks = _ShareAll()
13786 def Exec(self, feedback_fn):
13787 """Returns the tag list.
13790 return list(self.target.GetTags())
13793 class LUTagsSearch(NoHooksLU):
13794 """Searches the tags for a given pattern.
13799 def ExpandNames(self):
13800 self.needed_locks = {}
13802 def CheckPrereq(self):
13803 """Check prerequisites.
13805 This checks the pattern passed for validity by compiling it.
13809 self.re = re.compile(self.op.pattern)
13810 except re.error, err:
13811 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
13812 (self.op.pattern, err), errors.ECODE_INVAL)
13814 def Exec(self, feedback_fn):
13815 """Returns the tag list.
13819 tgts = [("/cluster", cfg.GetClusterInfo())]
13820 ilist = cfg.GetAllInstancesInfo().values()
13821 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
13822 nlist = cfg.GetAllNodesInfo().values()
13823 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
13824 tgts.extend(("/nodegroup/%s" % n.name, n)
13825 for n in cfg.GetAllNodeGroupsInfo().values())
13827 for path, target in tgts:
13828 for tag in target.GetTags():
13829 if self.re.search(tag):
13830 results.append((path, tag))
13834 class LUTagsSet(TagsLU):
13835 """Sets a tag on a given object.
13840 def CheckPrereq(self):
13841 """Check prerequisites.
13843 This checks the type and length of the tag name and value.
13846 TagsLU.CheckPrereq(self)
13847 for tag in self.op.tags:
13848 objects.TaggableObject.ValidateTag(tag)
13850 def Exec(self, feedback_fn):
13855 for tag in self.op.tags:
13856 self.target.AddTag(tag)
13857 except errors.TagError, err:
13858 raise errors.OpExecError("Error while setting tag: %s" % str(err))
13859 self.cfg.Update(self.target, feedback_fn)
13862 class LUTagsDel(TagsLU):
13863 """Delete a list of tags from a given object.
13868 def CheckPrereq(self):
13869 """Check prerequisites.
13871 This checks that we have the given tag.
13874 TagsLU.CheckPrereq(self)
13875 for tag in self.op.tags:
13876 objects.TaggableObject.ValidateTag(tag)
13877 del_tags = frozenset(self.op.tags)
13878 cur_tags = self.target.GetTags()
13880 diff_tags = del_tags - cur_tags
13882 diff_names = ("'%s'" % i for i in sorted(diff_tags))
13883 raise errors.OpPrereqError("Tag(s) %s not found" %
13884 (utils.CommaJoin(diff_names), ),
13885 errors.ECODE_NOENT)
13887 def Exec(self, feedback_fn):
13888 """Remove the tag from the object.
13891 for tag in self.op.tags:
13892 self.target.RemoveTag(tag)
13893 self.cfg.Update(self.target, feedback_fn)
13896 class LUTestDelay(NoHooksLU):
13897 """Sleep for a specified amount of time.
13899 This LU sleeps on the master and/or nodes for a specified amount of
13905 def ExpandNames(self):
13906 """Expand names and set required locks.
13908 This expands the node list, if any.
13911 self.needed_locks = {}
13912 if self.op.on_nodes:
13913 # _GetWantedNodes can be used here, but is not always appropriate to use
13914 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
13915 # more information.
13916 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
13917 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
13919 def _TestDelay(self):
13920 """Do the actual sleep.
13923 if self.op.on_master:
13924 if not utils.TestDelay(self.op.duration):
13925 raise errors.OpExecError("Error during master delay test")
13926 if self.op.on_nodes:
13927 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
13928 for node, node_result in result.items():
13929 node_result.Raise("Failure during rpc call to node %s" % node)
13931 def Exec(self, feedback_fn):
13932 """Execute the test delay opcode, with the wanted repetitions.
13935 if self.op.repeat == 0:
13938 top_value = self.op.repeat - 1
13939 for i in range(self.op.repeat):
13940 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
13944 class LUTestJqueue(NoHooksLU):
13945 """Utility LU to test some aspects of the job queue.
13950 # Must be lower than default timeout for WaitForJobChange to see whether it
13951 # notices changed jobs
13952 _CLIENT_CONNECT_TIMEOUT = 20.0
13953 _CLIENT_CONFIRM_TIMEOUT = 60.0
13956 def _NotifyUsingSocket(cls, cb, errcls):
13957 """Opens a Unix socket and waits for another program to connect.
13960 @param cb: Callback to send socket name to client
13961 @type errcls: class
13962 @param errcls: Exception class to use for errors
13965 # Using a temporary directory as there's no easy way to create temporary
13966 # sockets without writing a custom loop around tempfile.mktemp and
13968 tmpdir = tempfile.mkdtemp()
13970 tmpsock = utils.PathJoin(tmpdir, "sock")
13972 logging.debug("Creating temporary socket at %s", tmpsock)
13973 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
13978 # Send details to client
13981 # Wait for client to connect before continuing
13982 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
13984 (conn, _) = sock.accept()
13985 except socket.error, err:
13986 raise errcls("Client didn't connect in time (%s)" % err)
13990 # Remove as soon as client is connected
13991 shutil.rmtree(tmpdir)
13993 # Wait for client to close
13996 # pylint: disable=E1101
13997 # Instance of '_socketobject' has no ... member
13998 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14000 except socket.error, err:
14001 raise errcls("Client failed to confirm notification (%s)" % err)
14005 def _SendNotification(self, test, arg, sockname):
14006 """Sends a notification to the client.
14009 @param test: Test name
14010 @param arg: Test argument (depends on test)
14011 @type sockname: string
14012 @param sockname: Socket path
14015 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14017 def _Notify(self, prereq, test, arg):
14018 """Notifies the client of a test.
14021 @param prereq: Whether this is a prereq-phase test
14023 @param test: Test name
14024 @param arg: Test argument (depends on test)
14028 errcls = errors.OpPrereqError
14030 errcls = errors.OpExecError
14032 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14036 def CheckArguments(self):
14037 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14038 self.expandnames_calls = 0
14040 def ExpandNames(self):
14041 checkargs_calls = getattr(self, "checkargs_calls", 0)
14042 if checkargs_calls < 1:
14043 raise errors.ProgrammerError("CheckArguments was not called")
14045 self.expandnames_calls += 1
14047 if self.op.notify_waitlock:
14048 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14050 self.LogInfo("Expanding names")
14052 # Get lock on master node (just to get a lock, not for a particular reason)
14053 self.needed_locks = {
14054 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14057 def Exec(self, feedback_fn):
14058 if self.expandnames_calls < 1:
14059 raise errors.ProgrammerError("ExpandNames was not called")
14061 if self.op.notify_exec:
14062 self._Notify(False, constants.JQT_EXEC, None)
14064 self.LogInfo("Executing")
14066 if self.op.log_messages:
14067 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14068 for idx, msg in enumerate(self.op.log_messages):
14069 self.LogInfo("Sending log message %s", idx + 1)
14070 feedback_fn(constants.JQT_MSGPREFIX + msg)
14071 # Report how many test messages have been sent
14072 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14075 raise errors.OpExecError("Opcode failure was requested")
14080 class IAllocator(object):
14081 """IAllocator framework.
14083 An IAllocator instance has three sets of attributes:
14084 - cfg that is needed to query the cluster
14085 - input data (all members of the _KEYS class attribute are required)
14086 - four buffer attributes (in|out_data|text), that represent the
14087 input (to the external script) in text and data structure format,
14088 and the output from it, again in two formats
14089 - the result variables from the script (success, info, nodes) for
14093 # pylint: disable=R0902
14094 # lots of instance attributes
14096 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14098 self.rpc = rpc_runner
14099 # init buffer variables
14100 self.in_text = self.out_text = self.in_data = self.out_data = None
14101 # init all input fields so that pylint is happy
14103 self.memory = self.disks = self.disk_template = None
14104 self.os = self.tags = self.nics = self.vcpus = None
14105 self.hypervisor = None
14106 self.relocate_from = None
14108 self.instances = None
14109 self.evac_mode = None
14110 self.target_groups = []
14112 self.required_nodes = None
14113 # init result fields
14114 self.success = self.info = self.result = None
14117 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14119 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14120 " IAllocator" % self.mode)
14122 keyset = [n for (n, _) in keydata]
14125 if key not in keyset:
14126 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14127 " IAllocator" % key)
14128 setattr(self, key, kwargs[key])
14131 if key not in kwargs:
14132 raise errors.ProgrammerError("Missing input parameter '%s' to"
14133 " IAllocator" % key)
14134 self._BuildInputData(compat.partial(fn, self), keydata)
14136 def _ComputeClusterData(self):
14137 """Compute the generic allocator input data.
14139 This is the data that is independent of the actual operation.
14143 cluster_info = cfg.GetClusterInfo()
14146 "version": constants.IALLOCATOR_VERSION,
14147 "cluster_name": cfg.GetClusterName(),
14148 "cluster_tags": list(cluster_info.GetTags()),
14149 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14150 # we don't have job IDs
14152 ninfo = cfg.GetAllNodesInfo()
14153 iinfo = cfg.GetAllInstancesInfo().values()
14154 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14157 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14159 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14160 hypervisor_name = self.hypervisor
14161 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14162 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14164 hypervisor_name = cluster_info.primary_hypervisor
14166 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14169 self.rpc.call_all_instances_info(node_list,
14170 cluster_info.enabled_hypervisors)
14172 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14174 config_ndata = self._ComputeBasicNodeData(ninfo)
14175 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14176 i_list, config_ndata)
14177 assert len(data["nodes"]) == len(ninfo), \
14178 "Incomplete node data computed"
14180 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14182 self.in_data = data
14185 def _ComputeNodeGroupData(cfg):
14186 """Compute node groups data.
14189 ng = dict((guuid, {
14190 "name": gdata.name,
14191 "alloc_policy": gdata.alloc_policy,
14193 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14198 def _ComputeBasicNodeData(node_cfg):
14199 """Compute global node data.
14202 @returns: a dict of name: (node dict, node config)
14205 # fill in static (config-based) values
14206 node_results = dict((ninfo.name, {
14207 "tags": list(ninfo.GetTags()),
14208 "primary_ip": ninfo.primary_ip,
14209 "secondary_ip": ninfo.secondary_ip,
14210 "offline": ninfo.offline,
14211 "drained": ninfo.drained,
14212 "master_candidate": ninfo.master_candidate,
14213 "group": ninfo.group,
14214 "master_capable": ninfo.master_capable,
14215 "vm_capable": ninfo.vm_capable,
14217 for ninfo in node_cfg.values())
14219 return node_results
14222 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14224 """Compute global node data.
14226 @param node_results: the basic node structures as filled from the config
14229 #TODO(dynmem): compute the right data on MAX and MIN memory
14230 # make a copy of the current dict
14231 node_results = dict(node_results)
14232 for nname, nresult in node_data.items():
14233 assert nname in node_results, "Missing basic data for node %s" % nname
14234 ninfo = node_cfg[nname]
14236 if not (ninfo.offline or ninfo.drained):
14237 nresult.Raise("Can't get data for node %s" % nname)
14238 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14240 remote_info = _MakeLegacyNodeInfo(nresult.payload)
14242 for attr in ["memory_total", "memory_free", "memory_dom0",
14243 "vg_size", "vg_free", "cpu_total"]:
14244 if attr not in remote_info:
14245 raise errors.OpExecError("Node '%s' didn't return attribute"
14246 " '%s'" % (nname, attr))
14247 if not isinstance(remote_info[attr], int):
14248 raise errors.OpExecError("Node '%s' returned invalid value"
14250 (nname, attr, remote_info[attr]))
14251 # compute memory used by primary instances
14252 i_p_mem = i_p_up_mem = 0
14253 for iinfo, beinfo in i_list:
14254 if iinfo.primary_node == nname:
14255 i_p_mem += beinfo[constants.BE_MAXMEM]
14256 if iinfo.name not in node_iinfo[nname].payload:
14259 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14260 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14261 remote_info["memory_free"] -= max(0, i_mem_diff)
14263 if iinfo.admin_state == constants.ADMINST_UP:
14264 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14266 # compute memory used by instances
14268 "total_memory": remote_info["memory_total"],
14269 "reserved_memory": remote_info["memory_dom0"],
14270 "free_memory": remote_info["memory_free"],
14271 "total_disk": remote_info["vg_size"],
14272 "free_disk": remote_info["vg_free"],
14273 "total_cpus": remote_info["cpu_total"],
14274 "i_pri_memory": i_p_mem,
14275 "i_pri_up_memory": i_p_up_mem,
14277 pnr_dyn.update(node_results[nname])
14278 node_results[nname] = pnr_dyn
14280 return node_results
14283 def _ComputeInstanceData(cluster_info, i_list):
14284 """Compute global instance data.
14288 for iinfo, beinfo in i_list:
14290 for nic in iinfo.nics:
14291 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14295 "mode": filled_params[constants.NIC_MODE],
14296 "link": filled_params[constants.NIC_LINK],
14298 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14299 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14300 nic_data.append(nic_dict)
14302 "tags": list(iinfo.GetTags()),
14303 "admin_state": iinfo.admin_state,
14304 "vcpus": beinfo[constants.BE_VCPUS],
14305 "memory": beinfo[constants.BE_MAXMEM],
14307 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14309 "disks": [{constants.IDISK_SIZE: dsk.size,
14310 constants.IDISK_MODE: dsk.mode}
14311 for dsk in iinfo.disks],
14312 "disk_template": iinfo.disk_template,
14313 "hypervisor": iinfo.hypervisor,
14315 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14317 instance_data[iinfo.name] = pir
14319 return instance_data
14321 def _AddNewInstance(self):
14322 """Add new instance data to allocator structure.
14324 This in combination with _AllocatorGetClusterData will create the
14325 correct structure needed as input for the allocator.
14327 The checks for the completeness of the opcode must have already been
14331 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14333 if self.disk_template in constants.DTS_INT_MIRROR:
14334 self.required_nodes = 2
14336 self.required_nodes = 1
14340 "disk_template": self.disk_template,
14343 "vcpus": self.vcpus,
14344 "memory": self.memory,
14345 "disks": self.disks,
14346 "disk_space_total": disk_space,
14348 "required_nodes": self.required_nodes,
14349 "hypervisor": self.hypervisor,
14354 def _AddRelocateInstance(self):
14355 """Add relocate instance data to allocator structure.
14357 This in combination with _IAllocatorGetClusterData will create the
14358 correct structure needed as input for the allocator.
14360 The checks for the completeness of the opcode must have already been
14364 instance = self.cfg.GetInstanceInfo(self.name)
14365 if instance is None:
14366 raise errors.ProgrammerError("Unknown instance '%s' passed to"
14367 " IAllocator" % self.name)
14369 if instance.disk_template not in constants.DTS_MIRRORED:
14370 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14371 errors.ECODE_INVAL)
14373 if instance.disk_template in constants.DTS_INT_MIRROR and \
14374 len(instance.secondary_nodes) != 1:
14375 raise errors.OpPrereqError("Instance has not exactly one secondary node",
14376 errors.ECODE_STATE)
14378 self.required_nodes = 1
14379 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14380 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14384 "disk_space_total": disk_space,
14385 "required_nodes": self.required_nodes,
14386 "relocate_from": self.relocate_from,
14390 def _AddNodeEvacuate(self):
14391 """Get data for node-evacuate requests.
14395 "instances": self.instances,
14396 "evac_mode": self.evac_mode,
14399 def _AddChangeGroup(self):
14400 """Get data for node-evacuate requests.
14404 "instances": self.instances,
14405 "target_groups": self.target_groups,
14408 def _BuildInputData(self, fn, keydata):
14409 """Build input data structures.
14412 self._ComputeClusterData()
14415 request["type"] = self.mode
14416 for keyname, keytype in keydata:
14417 if keyname not in request:
14418 raise errors.ProgrammerError("Request parameter %s is missing" %
14420 val = request[keyname]
14421 if not keytype(val):
14422 raise errors.ProgrammerError("Request parameter %s doesn't pass"
14423 " validation, value %s, expected"
14424 " type %s" % (keyname, val, keytype))
14425 self.in_data["request"] = request
14427 self.in_text = serializer.Dump(self.in_data)
14429 _STRING_LIST = ht.TListOf(ht.TString)
14430 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14431 # pylint: disable=E1101
14432 # Class '...' has no 'OP_ID' member
14433 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14434 opcodes.OpInstanceMigrate.OP_ID,
14435 opcodes.OpInstanceReplaceDisks.OP_ID])
14439 ht.TListOf(ht.TAnd(ht.TIsLength(3),
14440 ht.TItems([ht.TNonEmptyString,
14441 ht.TNonEmptyString,
14442 ht.TListOf(ht.TNonEmptyString),
14445 ht.TListOf(ht.TAnd(ht.TIsLength(2),
14446 ht.TItems([ht.TNonEmptyString,
14449 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14450 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14453 constants.IALLOCATOR_MODE_ALLOC:
14456 ("name", ht.TString),
14457 ("memory", ht.TInt),
14458 ("disks", ht.TListOf(ht.TDict)),
14459 ("disk_template", ht.TString),
14460 ("os", ht.TString),
14461 ("tags", _STRING_LIST),
14462 ("nics", ht.TListOf(ht.TDict)),
14463 ("vcpus", ht.TInt),
14464 ("hypervisor", ht.TString),
14466 constants.IALLOCATOR_MODE_RELOC:
14467 (_AddRelocateInstance,
14468 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14470 constants.IALLOCATOR_MODE_NODE_EVAC:
14471 (_AddNodeEvacuate, [
14472 ("instances", _STRING_LIST),
14473 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14475 constants.IALLOCATOR_MODE_CHG_GROUP:
14476 (_AddChangeGroup, [
14477 ("instances", _STRING_LIST),
14478 ("target_groups", _STRING_LIST),
14482 def Run(self, name, validate=True, call_fn=None):
14483 """Run an instance allocator and return the results.
14486 if call_fn is None:
14487 call_fn = self.rpc.call_iallocator_runner
14489 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14490 result.Raise("Failure while running the iallocator script")
14492 self.out_text = result.payload
14494 self._ValidateResult()
14496 def _ValidateResult(self):
14497 """Process the allocator results.
14499 This will process and if successful save the result in
14500 self.out_data and the other parameters.
14504 rdict = serializer.Load(self.out_text)
14505 except Exception, err:
14506 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14508 if not isinstance(rdict, dict):
14509 raise errors.OpExecError("Can't parse iallocator results: not a dict")
14511 # TODO: remove backwards compatiblity in later versions
14512 if "nodes" in rdict and "result" not in rdict:
14513 rdict["result"] = rdict["nodes"]
14516 for key in "success", "info", "result":
14517 if key not in rdict:
14518 raise errors.OpExecError("Can't parse iallocator results:"
14519 " missing key '%s'" % key)
14520 setattr(self, key, rdict[key])
14522 if not self._result_check(self.result):
14523 raise errors.OpExecError("Iallocator returned invalid result,"
14524 " expected %s, got %s" %
14525 (self._result_check, self.result),
14526 errors.ECODE_INVAL)
14528 if self.mode == constants.IALLOCATOR_MODE_RELOC:
14529 assert self.relocate_from is not None
14530 assert self.required_nodes == 1
14532 node2group = dict((name, ndata["group"])
14533 for (name, ndata) in self.in_data["nodes"].items())
14535 fn = compat.partial(self._NodesToGroups, node2group,
14536 self.in_data["nodegroups"])
14538 instance = self.cfg.GetInstanceInfo(self.name)
14539 request_groups = fn(self.relocate_from + [instance.primary_node])
14540 result_groups = fn(rdict["result"] + [instance.primary_node])
14542 if self.success and not set(result_groups).issubset(request_groups):
14543 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14544 " differ from original groups (%s)" %
14545 (utils.CommaJoin(result_groups),
14546 utils.CommaJoin(request_groups)))
14548 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14549 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14551 self.out_data = rdict
14554 def _NodesToGroups(node2group, groups, nodes):
14555 """Returns a list of unique group names for a list of nodes.
14557 @type node2group: dict
14558 @param node2group: Map from node name to group UUID
14560 @param groups: Group information
14562 @param nodes: Node names
14569 group_uuid = node2group[node]
14571 # Ignore unknown node
14575 group = groups[group_uuid]
14577 # Can't find group, let's use UUID
14578 group_name = group_uuid
14580 group_name = group["name"]
14582 result.add(group_name)
14584 return sorted(result)
14587 class LUTestAllocator(NoHooksLU):
14588 """Run allocator tests.
14590 This LU runs the allocator tests
14593 def CheckPrereq(self):
14594 """Check prerequisites.
14596 This checks the opcode parameters depending on the director and mode test.
14599 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14600 for attr in ["memory", "disks", "disk_template",
14601 "os", "tags", "nics", "vcpus"]:
14602 if not hasattr(self.op, attr):
14603 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14604 attr, errors.ECODE_INVAL)
14605 iname = self.cfg.ExpandInstanceName(self.op.name)
14606 if iname is not None:
14607 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14608 iname, errors.ECODE_EXISTS)
14609 if not isinstance(self.op.nics, list):
14610 raise errors.OpPrereqError("Invalid parameter 'nics'",
14611 errors.ECODE_INVAL)
14612 if not isinstance(self.op.disks, list):
14613 raise errors.OpPrereqError("Invalid parameter 'disks'",
14614 errors.ECODE_INVAL)
14615 for row in self.op.disks:
14616 if (not isinstance(row, dict) or
14617 constants.IDISK_SIZE not in row or
14618 not isinstance(row[constants.IDISK_SIZE], int) or
14619 constants.IDISK_MODE not in row or
14620 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
14621 raise errors.OpPrereqError("Invalid contents of the 'disks'"
14622 " parameter", errors.ECODE_INVAL)
14623 if self.op.hypervisor is None:
14624 self.op.hypervisor = self.cfg.GetHypervisorType()
14625 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14626 fname = _ExpandInstanceName(self.cfg, self.op.name)
14627 self.op.name = fname
14628 self.relocate_from = \
14629 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
14630 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
14631 constants.IALLOCATOR_MODE_NODE_EVAC):
14632 if not self.op.instances:
14633 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
14634 self.op.instances = _GetWantedInstances(self, self.op.instances)
14636 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
14637 self.op.mode, errors.ECODE_INVAL)
14639 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
14640 if self.op.allocator is None:
14641 raise errors.OpPrereqError("Missing allocator name",
14642 errors.ECODE_INVAL)
14643 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
14644 raise errors.OpPrereqError("Wrong allocator test '%s'" %
14645 self.op.direction, errors.ECODE_INVAL)
14647 def Exec(self, feedback_fn):
14648 """Run the allocator test.
14651 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14652 ial = IAllocator(self.cfg, self.rpc,
14655 memory=self.op.memory,
14656 disks=self.op.disks,
14657 disk_template=self.op.disk_template,
14661 vcpus=self.op.vcpus,
14662 hypervisor=self.op.hypervisor,
14664 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14665 ial = IAllocator(self.cfg, self.rpc,
14668 relocate_from=list(self.relocate_from),
14670 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14671 ial = IAllocator(self.cfg, self.rpc,
14673 instances=self.op.instances,
14674 target_groups=self.op.target_groups)
14675 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14676 ial = IAllocator(self.cfg, self.rpc,
14678 instances=self.op.instances,
14679 evac_mode=self.op.evac_mode)
14681 raise errors.ProgrammerError("Uncatched mode %s in"
14682 " LUTestAllocator.Exec", self.op.mode)
14684 if self.op.direction == constants.IALLOCATOR_DIR_IN:
14685 result = ial.in_text
14687 ial.Run(self.op.allocator, validate=False)
14688 result = ial.out_text
14692 #: Query type implementations
14694 constants.QR_INSTANCE: _InstanceQuery,
14695 constants.QR_NODE: _NodeQuery,
14696 constants.QR_GROUP: _GroupQuery,
14697 constants.QR_OS: _OsQuery,
14700 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14703 def _GetQueryImplementation(name):
14704 """Returns the implemtnation for a query type.
14706 @param name: Query type, must be one of L{constants.QR_VIA_OP}
14710 return _QUERY_IMPL[name]
14712 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14713 errors.ECODE_INVAL)