4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
62 from ganeti import rpc
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_UP = [constants.ADMINST_UP]
72 INSTANCE_DOWN = [constants.ADMINST_DOWN]
73 INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
74 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
75 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
79 """Data container for LU results with jobs.
81 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
83 contained in the C{jobs} attribute and include the job IDs in the opcode
87 def __init__(self, jobs, **kwargs):
88 """Initializes this class.
90 Additional return values can be specified as keyword arguments.
92 @type jobs: list of lists of L{opcode.OpCode}
93 @param jobs: A list of lists of opcode objects
100 class LogicalUnit(object):
101 """Logical Unit base class.
103 Subclasses must follow these rules:
104 - implement ExpandNames
105 - implement CheckPrereq (except when tasklets are used)
106 - implement Exec (except when tasklets are used)
107 - implement BuildHooksEnv
108 - implement BuildHooksNodes
109 - redefine HPATH and HTYPE
110 - optionally redefine their run requirements:
111 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
113 Note that all commands require root permissions.
115 @ivar dry_run_result: the value (if any) that will be returned to the caller
116 in dry-run mode (signalled by opcode dry_run parameter)
123 def __init__(self, processor, op, context, rpc_runner):
124 """Constructor for LogicalUnit.
126 This needs to be overridden in derived classes in order to check op
130 self.proc = processor
132 self.cfg = context.cfg
133 self.glm = context.glm
135 self.owned_locks = context.glm.list_owned
136 self.context = context
137 self.rpc = rpc_runner
138 # Dicts used to declare locking needs to mcpu
139 self.needed_locks = None
140 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
142 self.remove_locks = {}
143 # Used to force good behavior when calling helper functions
144 self.recalculate_locks = {}
146 self.Log = processor.Log # pylint: disable=C0103
147 self.LogWarning = processor.LogWarning # pylint: disable=C0103
148 self.LogInfo = processor.LogInfo # pylint: disable=C0103
149 self.LogStep = processor.LogStep # pylint: disable=C0103
150 # support for dry-run
151 self.dry_run_result = None
152 # support for generic debug attribute
153 if (not hasattr(self.op, "debug_level") or
154 not isinstance(self.op.debug_level, int)):
155 self.op.debug_level = 0
160 # Validate opcode parameters and set defaults
161 self.op.Validate(True)
163 self.CheckArguments()
165 def CheckArguments(self):
166 """Check syntactic validity for the opcode arguments.
168 This method is for doing a simple syntactic check and ensure
169 validity of opcode parameters, without any cluster-related
170 checks. While the same can be accomplished in ExpandNames and/or
171 CheckPrereq, doing these separate is better because:
173 - ExpandNames is left as as purely a lock-related function
174 - CheckPrereq is run after we have acquired locks (and possible
177 The function is allowed to change the self.op attribute so that
178 later methods can no longer worry about missing parameters.
183 def ExpandNames(self):
184 """Expand names for this LU.
186 This method is called before starting to execute the opcode, and it should
187 update all the parameters of the opcode to their canonical form (e.g. a
188 short node name must be fully expanded after this method has successfully
189 completed). This way locking, hooks, logging, etc. can work correctly.
191 LUs which implement this method must also populate the self.needed_locks
192 member, as a dict with lock levels as keys, and a list of needed lock names
195 - use an empty dict if you don't need any lock
196 - if you don't need any lock at a particular level omit that level
197 - don't put anything for the BGL level
198 - if you want all locks at a level use locking.ALL_SET as a value
200 If you need to share locks (rather than acquire them exclusively) at one
201 level you can modify self.share_locks, setting a true value (usually 1) for
202 that level. By default locks are not shared.
204 This function can also define a list of tasklets, which then will be
205 executed in order instead of the usual LU-level CheckPrereq and Exec
206 functions, if those are not defined by the LU.
210 # Acquire all nodes and one instance
211 self.needed_locks = {
212 locking.LEVEL_NODE: locking.ALL_SET,
213 locking.LEVEL_INSTANCE: ['instance1.example.com'],
215 # Acquire just two nodes
216 self.needed_locks = {
217 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
220 self.needed_locks = {} # No, you can't leave it to the default value None
223 # The implementation of this method is mandatory only if the new LU is
224 # concurrent, so that old LUs don't need to be changed all at the same
227 self.needed_locks = {} # Exclusive LUs don't need locks.
229 raise NotImplementedError
231 def DeclareLocks(self, level):
232 """Declare LU locking needs for a level
234 While most LUs can just declare their locking needs at ExpandNames time,
235 sometimes there's the need to calculate some locks after having acquired
236 the ones before. This function is called just before acquiring locks at a
237 particular level, but after acquiring the ones at lower levels, and permits
238 such calculations. It can be used to modify self.needed_locks, and by
239 default it does nothing.
241 This function is only called if you have something already set in
242 self.needed_locks for the level.
244 @param level: Locking level which is going to be locked
245 @type level: member of ganeti.locking.LEVELS
249 def CheckPrereq(self):
250 """Check prerequisites for this LU.
252 This method should check that the prerequisites for the execution
253 of this LU are fulfilled. It can do internode communication, but
254 it should be idempotent - no cluster or system changes are
257 The method should raise errors.OpPrereqError in case something is
258 not fulfilled. Its return value is ignored.
260 This method should also update all the parameters of the opcode to
261 their canonical form if it hasn't been done by ExpandNames before.
264 if self.tasklets is not None:
265 for (idx, tl) in enumerate(self.tasklets):
266 logging.debug("Checking prerequisites for tasklet %s/%s",
267 idx + 1, len(self.tasklets))
272 def Exec(self, feedback_fn):
275 This method should implement the actual work. It should raise
276 errors.OpExecError for failures that are somewhat dealt with in
280 if self.tasklets is not None:
281 for (idx, tl) in enumerate(self.tasklets):
282 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
285 raise NotImplementedError
287 def BuildHooksEnv(self):
288 """Build hooks environment for this LU.
291 @return: Dictionary containing the environment that will be used for
292 running the hooks for this LU. The keys of the dict must not be prefixed
293 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
294 will extend the environment with additional variables. If no environment
295 should be defined, an empty dictionary should be returned (not C{None}).
296 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
300 raise NotImplementedError
302 def BuildHooksNodes(self):
303 """Build list of nodes to run LU's hooks.
305 @rtype: tuple; (list, list)
306 @return: Tuple containing a list of node names on which the hook
307 should run before the execution and a list of node names on which the
308 hook should run after the execution. No nodes should be returned as an
309 empty list (and not None).
310 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
314 raise NotImplementedError
316 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
317 """Notify the LU about the results of its hooks.
319 This method is called every time a hooks phase is executed, and notifies
320 the Logical Unit about the hooks' result. The LU can then use it to alter
321 its result based on the hooks. By default the method does nothing and the
322 previous result is passed back unchanged but any LU can define it if it
323 wants to use the local cluster hook-scripts somehow.
325 @param phase: one of L{constants.HOOKS_PHASE_POST} or
326 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
327 @param hook_results: the results of the multi-node hooks rpc call
328 @param feedback_fn: function used send feedback back to the caller
329 @param lu_result: the previous Exec result this LU had, or None
331 @return: the new Exec result, based on the previous result
335 # API must be kept, thus we ignore the unused argument and could
336 # be a function warnings
337 # pylint: disable=W0613,R0201
340 def _ExpandAndLockInstance(self):
341 """Helper function to expand and lock an instance.
343 Many LUs that work on an instance take its name in self.op.instance_name
344 and need to expand it and then declare the expanded name for locking. This
345 function does it, and then updates self.op.instance_name to the expanded
346 name. It also initializes needed_locks as a dict, if this hasn't been done
350 if self.needed_locks is None:
351 self.needed_locks = {}
353 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
354 "_ExpandAndLockInstance called with instance-level locks set"
355 self.op.instance_name = _ExpandInstanceName(self.cfg,
356 self.op.instance_name)
357 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
359 def _LockInstancesNodes(self, primary_only=False,
360 level=locking.LEVEL_NODE):
361 """Helper function to declare instances' nodes for locking.
363 This function should be called after locking one or more instances to lock
364 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
365 with all primary or secondary nodes for instances already locked and
366 present in self.needed_locks[locking.LEVEL_INSTANCE].
368 It should be called from DeclareLocks, and for safety only works if
369 self.recalculate_locks[locking.LEVEL_NODE] is set.
371 In the future it may grow parameters to just lock some instance's nodes, or
372 to just lock primaries or secondary nodes, if needed.
374 If should be called in DeclareLocks in a way similar to::
376 if level == locking.LEVEL_NODE:
377 self._LockInstancesNodes()
379 @type primary_only: boolean
380 @param primary_only: only lock primary nodes of locked instances
381 @param level: Which lock level to use for locking nodes
384 assert level in self.recalculate_locks, \
385 "_LockInstancesNodes helper function called with no nodes to recalculate"
387 # TODO: check if we're really been called with the instance locks held
389 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
390 # future we might want to have different behaviors depending on the value
391 # of self.recalculate_locks[locking.LEVEL_NODE]
393 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
394 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
395 wanted_nodes.append(instance.primary_node)
397 wanted_nodes.extend(instance.secondary_nodes)
399 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
400 self.needed_locks[level] = wanted_nodes
401 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
402 self.needed_locks[level].extend(wanted_nodes)
404 raise errors.ProgrammerError("Unknown recalculation mode")
406 del self.recalculate_locks[level]
409 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
410 """Simple LU which runs no hooks.
412 This LU is intended as a parent for other LogicalUnits which will
413 run no hooks, in order to reduce duplicate code.
419 def BuildHooksEnv(self):
420 """Empty BuildHooksEnv for NoHooksLu.
422 This just raises an error.
425 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
427 def BuildHooksNodes(self):
428 """Empty BuildHooksNodes for NoHooksLU.
431 raise AssertionError("BuildHooksNodes called for NoHooksLU")
435 """Tasklet base class.
437 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
438 they can mix legacy code with tasklets. Locking needs to be done in the LU,
439 tasklets know nothing about locks.
441 Subclasses must follow these rules:
442 - Implement CheckPrereq
446 def __init__(self, lu):
453 def CheckPrereq(self):
454 """Check prerequisites for this tasklets.
456 This method should check whether the prerequisites for the execution of
457 this tasklet are fulfilled. It can do internode communication, but it
458 should be idempotent - no cluster or system changes are allowed.
460 The method should raise errors.OpPrereqError in case something is not
461 fulfilled. Its return value is ignored.
463 This method should also update all parameters to their canonical form if it
464 hasn't been done before.
469 def Exec(self, feedback_fn):
470 """Execute the tasklet.
472 This method should implement the actual work. It should raise
473 errors.OpExecError for failures that are somewhat dealt with in code, or
477 raise NotImplementedError
481 """Base for query utility classes.
484 #: Attribute holding field definitions
487 def __init__(self, qfilter, fields, use_locking):
488 """Initializes this class.
491 self.use_locking = use_locking
493 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
495 self.requested_data = self.query.RequestedData()
496 self.names = self.query.RequestedNames()
498 # Sort only if no names were requested
499 self.sort_by_name = not self.names
501 self.do_locking = None
504 def _GetNames(self, lu, all_names, lock_level):
505 """Helper function to determine names asked for in the query.
509 names = lu.owned_locks(lock_level)
513 if self.wanted == locking.ALL_SET:
514 assert not self.names
515 # caller didn't specify names, so ordering is not important
516 return utils.NiceSort(names)
518 # caller specified names and we must keep the same order
520 assert not self.do_locking or lu.glm.is_owned(lock_level)
522 missing = set(self.wanted).difference(names)
524 raise errors.OpExecError("Some items were removed before retrieving"
525 " their data: %s" % missing)
527 # Return expanded names
530 def ExpandNames(self, lu):
531 """Expand names for this query.
533 See L{LogicalUnit.ExpandNames}.
536 raise NotImplementedError()
538 def DeclareLocks(self, lu, level):
539 """Declare locks for this query.
541 See L{LogicalUnit.DeclareLocks}.
544 raise NotImplementedError()
546 def _GetQueryData(self, lu):
547 """Collects all data for this query.
549 @return: Query data object
552 raise NotImplementedError()
554 def NewStyleQuery(self, lu):
555 """Collect data and execute query.
558 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
559 sort_by_name=self.sort_by_name)
561 def OldStyleQuery(self, lu):
562 """Collect data and execute query.
565 return self.query.OldStyleQuery(self._GetQueryData(lu),
566 sort_by_name=self.sort_by_name)
570 """Returns a dict declaring all lock levels shared.
573 return dict.fromkeys(locking.LEVELS, 1)
576 def _MakeLegacyNodeInfo(data):
577 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
579 Converts the data into a single dictionary. This is fine for most use cases,
580 but some require information from more than one volume group or hypervisor.
583 (bootid, (vg_info, ), (hv_info, )) = data
585 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
590 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
591 """Checks if the owned node groups are still correct for an instance.
593 @type cfg: L{config.ConfigWriter}
594 @param cfg: The cluster configuration
595 @type instance_name: string
596 @param instance_name: Instance name
597 @type owned_groups: set or frozenset
598 @param owned_groups: List of currently owned node groups
601 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
603 if not owned_groups.issuperset(inst_groups):
604 raise errors.OpPrereqError("Instance %s's node groups changed since"
605 " locks were acquired, current groups are"
606 " are '%s', owning groups '%s'; retry the"
609 utils.CommaJoin(inst_groups),
610 utils.CommaJoin(owned_groups)),
616 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
617 """Checks if the instances in a node group are still correct.
619 @type cfg: L{config.ConfigWriter}
620 @param cfg: The cluster configuration
621 @type group_uuid: string
622 @param group_uuid: Node group UUID
623 @type owned_instances: set or frozenset
624 @param owned_instances: List of currently owned instances
627 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
628 if owned_instances != wanted_instances:
629 raise errors.OpPrereqError("Instances in node group '%s' changed since"
630 " locks were acquired, wanted '%s', have '%s';"
631 " retry the operation" %
633 utils.CommaJoin(wanted_instances),
634 utils.CommaJoin(owned_instances)),
637 return wanted_instances
640 def _SupportsOob(cfg, node):
641 """Tells if node supports OOB.
643 @type cfg: L{config.ConfigWriter}
644 @param cfg: The cluster configuration
645 @type node: L{objects.Node}
646 @param node: The node
647 @return: The OOB script if supported or an empty string otherwise
650 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
653 def _GetWantedNodes(lu, nodes):
654 """Returns list of checked and expanded node names.
656 @type lu: L{LogicalUnit}
657 @param lu: the logical unit on whose behalf we execute
659 @param nodes: list of node names or None for all nodes
661 @return: the list of nodes, sorted
662 @raise errors.ProgrammerError: if the nodes parameter is wrong type
666 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
668 return utils.NiceSort(lu.cfg.GetNodeList())
671 def _GetWantedInstances(lu, instances):
672 """Returns list of checked and expanded instance names.
674 @type lu: L{LogicalUnit}
675 @param lu: the logical unit on whose behalf we execute
676 @type instances: list
677 @param instances: list of instance names or None for all instances
679 @return: the list of instances, sorted
680 @raise errors.OpPrereqError: if the instances parameter is wrong type
681 @raise errors.OpPrereqError: if any of the passed instances is not found
685 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
687 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
691 def _GetUpdatedParams(old_params, update_dict,
692 use_default=True, use_none=False):
693 """Return the new version of a parameter dictionary.
695 @type old_params: dict
696 @param old_params: old parameters
697 @type update_dict: dict
698 @param update_dict: dict containing new parameter values, or
699 constants.VALUE_DEFAULT to reset the parameter to its default
701 @param use_default: boolean
702 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
703 values as 'to be deleted' values
704 @param use_none: boolean
705 @type use_none: whether to recognise C{None} values as 'to be
708 @return: the new parameter dictionary
711 params_copy = copy.deepcopy(old_params)
712 for key, val in update_dict.iteritems():
713 if ((use_default and val == constants.VALUE_DEFAULT) or
714 (use_none and val is None)):
720 params_copy[key] = val
724 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
725 """Return the new version of a instance policy.
727 @param group_policy: whether this policy applies to a group and thus
728 we should support removal of policy entries
731 use_none = use_default = group_policy
732 ipolicy = copy.deepcopy(old_ipolicy)
733 for key, value in new_ipolicy.items():
734 if key not in constants.IPOLICY_ALL_KEYS:
735 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
737 if key in constants.IPOLICY_PARAMETERS:
738 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
739 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
741 use_default=use_default)
743 # FIXME: we assume all others are lists; this should be redone
745 if not value or value == [constants.VALUE_DEFAULT]:
749 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
750 " on the cluster'" % key,
753 ipolicy[key] = list(value)
755 objects.InstancePolicy.CheckParameterSyntax(ipolicy)
756 except errors.ConfigurationError, err:
757 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
762 def _UpdateAndVerifySubDict(base, updates, type_check):
763 """Updates and verifies a dict with sub dicts of the same type.
765 @param base: The dict with the old data
766 @param updates: The dict with the new data
767 @param type_check: Dict suitable to ForceDictType to verify correct types
768 @returns: A new dict with updated and verified values
772 new = _GetUpdatedParams(old, value)
773 utils.ForceDictType(new, type_check)
776 ret = copy.deepcopy(base)
777 ret.update(dict((key, fn(base.get(key, {}), value))
778 for key, value in updates.items()))
782 def _MergeAndVerifyHvState(op_input, obj_input):
783 """Combines the hv state from an opcode with the one of the object
785 @param op_input: The input dict from the opcode
786 @param obj_input: The input dict from the objects
787 @return: The verified and updated dict
791 invalid_hvs = set(op_input) - constants.HYPER_TYPES
793 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
794 " %s" % utils.CommaJoin(invalid_hvs),
796 if obj_input is None:
798 type_check = constants.HVSTS_PARAMETER_TYPES
799 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
804 def _MergeAndVerifyDiskState(op_input, obj_input):
805 """Combines the disk state from an opcode with the one of the object
807 @param op_input: The input dict from the opcode
808 @param obj_input: The input dict from the objects
809 @return: The verified and updated dict
812 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
814 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
815 utils.CommaJoin(invalid_dst),
817 type_check = constants.DSS_PARAMETER_TYPES
818 if obj_input is None:
820 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
822 for key, value in op_input.items())
827 def _ReleaseLocks(lu, level, names=None, keep=None):
828 """Releases locks owned by an LU.
830 @type lu: L{LogicalUnit}
831 @param level: Lock level
832 @type names: list or None
833 @param names: Names of locks to release
834 @type keep: list or None
835 @param keep: Names of locks to retain
838 assert not (keep is not None and names is not None), \
839 "Only one of the 'names' and the 'keep' parameters can be given"
841 if names is not None:
842 should_release = names.__contains__
844 should_release = lambda name: name not in keep
846 should_release = None
848 owned = lu.owned_locks(level)
850 # Not owning any lock at this level, do nothing
857 # Determine which locks to release
859 if should_release(name):
864 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
866 # Release just some locks
867 lu.glm.release(level, names=release)
869 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
872 lu.glm.release(level)
874 assert not lu.glm.is_owned(level), "No locks should be owned"
877 def _MapInstanceDisksToNodes(instances):
878 """Creates a map from (node, volume) to instance name.
880 @type instances: list of L{objects.Instance}
881 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
884 return dict(((node, vol), inst.name)
885 for inst in instances
886 for (node, vols) in inst.MapLVsByNode().items()
890 def _RunPostHook(lu, node_name):
891 """Runs the post-hook for an opcode on a single node.
894 hm = lu.proc.BuildHooksManager(lu)
896 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
898 # pylint: disable=W0702
899 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
902 def _CheckOutputFields(static, dynamic, selected):
903 """Checks whether all selected fields are valid.
905 @type static: L{utils.FieldSet}
906 @param static: static fields set
907 @type dynamic: L{utils.FieldSet}
908 @param dynamic: dynamic fields set
915 delta = f.NonMatching(selected)
917 raise errors.OpPrereqError("Unknown output fields selected: %s"
918 % ",".join(delta), errors.ECODE_INVAL)
921 def _CheckGlobalHvParams(params):
922 """Validates that given hypervisor params are not global ones.
924 This will ensure that instances don't get customised versions of
928 used_globals = constants.HVC_GLOBALS.intersection(params)
930 msg = ("The following hypervisor parameters are global and cannot"
931 " be customized at instance level, please modify them at"
932 " cluster level: %s" % utils.CommaJoin(used_globals))
933 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
936 def _CheckNodeOnline(lu, node, msg=None):
937 """Ensure that a given node is online.
939 @param lu: the LU on behalf of which we make the check
940 @param node: the node to check
941 @param msg: if passed, should be a message to replace the default one
942 @raise errors.OpPrereqError: if the node is offline
946 msg = "Can't use offline node"
947 if lu.cfg.GetNodeInfo(node).offline:
948 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
951 def _CheckNodeNotDrained(lu, node):
952 """Ensure that a given node is not drained.
954 @param lu: the LU on behalf of which we make the check
955 @param node: the node to check
956 @raise errors.OpPrereqError: if the node is drained
959 if lu.cfg.GetNodeInfo(node).drained:
960 raise errors.OpPrereqError("Can't use drained node %s" % node,
964 def _CheckNodeVmCapable(lu, node):
965 """Ensure that a given node is vm capable.
967 @param lu: the LU on behalf of which we make the check
968 @param node: the node to check
969 @raise errors.OpPrereqError: if the node is not vm capable
972 if not lu.cfg.GetNodeInfo(node).vm_capable:
973 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
977 def _CheckNodeHasOS(lu, node, os_name, force_variant):
978 """Ensure that a node supports a given OS.
980 @param lu: the LU on behalf of which we make the check
981 @param node: the node to check
982 @param os_name: the OS to query about
983 @param force_variant: whether to ignore variant errors
984 @raise errors.OpPrereqError: if the node is not supporting the OS
987 result = lu.rpc.call_os_get(node, os_name)
988 result.Raise("OS '%s' not in supported OS list for node %s" %
990 prereq=True, ecode=errors.ECODE_INVAL)
991 if not force_variant:
992 _CheckOSVariant(result.payload, os_name)
995 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
996 """Ensure that a node has the given secondary ip.
998 @type lu: L{LogicalUnit}
999 @param lu: the LU on behalf of which we make the check
1001 @param node: the node to check
1002 @type secondary_ip: string
1003 @param secondary_ip: the ip to check
1004 @type prereq: boolean
1005 @param prereq: whether to throw a prerequisite or an execute error
1006 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1007 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1010 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1011 result.Raise("Failure checking secondary ip on node %s" % node,
1012 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1013 if not result.payload:
1014 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1015 " please fix and re-run this command" % secondary_ip)
1017 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1019 raise errors.OpExecError(msg)
1022 def _GetClusterDomainSecret():
1023 """Reads the cluster domain secret.
1026 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1030 def _CheckInstanceState(lu, instance, req_states, msg=None):
1031 """Ensure that an instance is in one of the required states.
1033 @param lu: the LU on behalf of which we make the check
1034 @param instance: the instance to check
1035 @param msg: if passed, should be a message to replace the default one
1036 @raise errors.OpPrereqError: if the instance is not in the required state
1040 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1041 if instance.admin_state not in req_states:
1042 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1043 (instance.name, instance.admin_state, msg),
1046 if constants.ADMINST_UP not in req_states:
1047 pnode = instance.primary_node
1048 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1049 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1050 prereq=True, ecode=errors.ECODE_ENVIRON)
1052 if instance.name in ins_l.payload:
1053 raise errors.OpPrereqError("Instance %s is running, %s" %
1054 (instance.name, msg), errors.ECODE_STATE)
1057 def _ComputeMinMaxSpec(name, ipolicy, value):
1058 """Computes if value is in the desired range.
1060 @param name: name of the parameter for which we perform the check
1061 @param ipolicy: dictionary containing min, max and std values
1062 @param value: actual value that we want to use
1063 @return: None or element not meeting the criteria
1067 if value in [None, constants.VALUE_AUTO]:
1069 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1070 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1071 if value > max_v or min_v > value:
1072 return ("%s value %s is not in range [%s, %s]" %
1073 (name, value, min_v, max_v))
1077 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1078 nic_count, disk_sizes,
1079 _compute_fn=_ComputeMinMaxSpec):
1080 """Verifies ipolicy against provided specs.
1083 @param ipolicy: The ipolicy
1085 @param mem_size: The memory size
1086 @type cpu_count: int
1087 @param cpu_count: Used cpu cores
1088 @type disk_count: int
1089 @param disk_count: Number of disks used
1090 @type nic_count: int
1091 @param nic_count: Number of nics used
1092 @type disk_sizes: list of ints
1093 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1094 @param _compute_fn: The compute function (unittest only)
1095 @return: A list of violations, or an empty list of no violations are found
1098 assert disk_count == len(disk_sizes)
1101 (constants.ISPEC_MEM_SIZE, mem_size),
1102 (constants.ISPEC_CPU_COUNT, cpu_count),
1103 (constants.ISPEC_DISK_COUNT, disk_count),
1104 (constants.ISPEC_NIC_COUNT, nic_count),
1105 ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
1108 (_compute_fn(name, ipolicy, value)
1109 for (name, value) in test_settings))
1112 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1113 _compute_fn=_ComputeIPolicySpecViolation):
1114 """Compute if instance meets the specs of ipolicy.
1117 @param ipolicy: The ipolicy to verify against
1118 @type instance: L{objects.Instance}
1119 @param instance: The instance to verify
1120 @param _compute_fn: The function to verify ipolicy (unittest only)
1121 @see: L{_ComputeIPolicySpecViolation}
1124 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1125 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1126 disk_count = len(instance.disks)
1127 disk_sizes = [disk.size for disk in instance.disks]
1128 nic_count = len(instance.nics)
1130 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1134 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1135 _compute_fn=_ComputeIPolicySpecViolation):
1136 """Compute if instance specs meets the specs of ipolicy.
1139 @param ipolicy: The ipolicy to verify against
1140 @param instance_spec: dict
1141 @param instance_spec: The instance spec to verify
1142 @param _compute_fn: The function to verify ipolicy (unittest only)
1143 @see: L{_ComputeIPolicySpecViolation}
1146 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1147 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1148 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1149 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1150 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1152 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1156 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1158 _compute_fn=_ComputeIPolicyInstanceViolation):
1159 """Compute if instance meets the specs of the new target group.
1161 @param ipolicy: The ipolicy to verify
1162 @param instance: The instance object to verify
1163 @param current_group: The current group of the instance
1164 @param target_group: The new group of the instance
1165 @param _compute_fn: The function to verify ipolicy (unittest only)
1166 @see: L{_ComputeIPolicySpecViolation}
1169 if current_group == target_group:
1172 return _compute_fn(ipolicy, instance)
1175 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1176 _compute_fn=_ComputeIPolicyNodeViolation):
1177 """Checks that the target node is correct in terms of instance policy.
1179 @param ipolicy: The ipolicy to verify
1180 @param instance: The instance object to verify
1181 @param node: The new node to relocate
1182 @param ignore: Ignore violations of the ipolicy
1183 @param _compute_fn: The function to verify ipolicy (unittest only)
1184 @see: L{_ComputeIPolicySpecViolation}
1187 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1188 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1191 msg = ("Instance does not meet target node group's (%s) instance"
1192 " policy: %s") % (node.group, utils.CommaJoin(res))
1196 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1199 def _ExpandItemName(fn, name, kind):
1200 """Expand an item name.
1202 @param fn: the function to use for expansion
1203 @param name: requested item name
1204 @param kind: text description ('Node' or 'Instance')
1205 @return: the resolved (full) name
1206 @raise errors.OpPrereqError: if the item is not found
1209 full_name = fn(name)
1210 if full_name is None:
1211 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1216 def _ExpandNodeName(cfg, name):
1217 """Wrapper over L{_ExpandItemName} for nodes."""
1218 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1221 def _ExpandInstanceName(cfg, name):
1222 """Wrapper over L{_ExpandItemName} for instance."""
1223 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1226 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1227 minmem, maxmem, vcpus, nics, disk_template, disks,
1228 bep, hvp, hypervisor_name, tags):
1229 """Builds instance related env variables for hooks
1231 This builds the hook environment from individual variables.
1234 @param name: the name of the instance
1235 @type primary_node: string
1236 @param primary_node: the name of the instance's primary node
1237 @type secondary_nodes: list
1238 @param secondary_nodes: list of secondary nodes as strings
1239 @type os_type: string
1240 @param os_type: the name of the instance's OS
1241 @type status: string
1242 @param status: the desired status of the instance
1243 @type minmem: string
1244 @param minmem: the minimum memory size of the instance
1245 @type maxmem: string
1246 @param maxmem: the maximum memory size of the instance
1248 @param vcpus: the count of VCPUs the instance has
1250 @param nics: list of tuples (ip, mac, mode, link) representing
1251 the NICs the instance has
1252 @type disk_template: string
1253 @param disk_template: the disk template of the instance
1255 @param disks: the list of (size, mode) pairs
1257 @param bep: the backend parameters for the instance
1259 @param hvp: the hypervisor parameters for the instance
1260 @type hypervisor_name: string
1261 @param hypervisor_name: the hypervisor for the instance
1263 @param tags: list of instance tags as strings
1265 @return: the hook environment for this instance
1270 "INSTANCE_NAME": name,
1271 "INSTANCE_PRIMARY": primary_node,
1272 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1273 "INSTANCE_OS_TYPE": os_type,
1274 "INSTANCE_STATUS": status,
1275 "INSTANCE_MINMEM": minmem,
1276 "INSTANCE_MAXMEM": maxmem,
1277 # TODO(2.7) remove deprecated "memory" value
1278 "INSTANCE_MEMORY": maxmem,
1279 "INSTANCE_VCPUS": vcpus,
1280 "INSTANCE_DISK_TEMPLATE": disk_template,
1281 "INSTANCE_HYPERVISOR": hypervisor_name,
1284 nic_count = len(nics)
1285 for idx, (ip, mac, mode, link) in enumerate(nics):
1288 env["INSTANCE_NIC%d_IP" % idx] = ip
1289 env["INSTANCE_NIC%d_MAC" % idx] = mac
1290 env["INSTANCE_NIC%d_MODE" % idx] = mode
1291 env["INSTANCE_NIC%d_LINK" % idx] = link
1292 if mode == constants.NIC_MODE_BRIDGED:
1293 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1297 env["INSTANCE_NIC_COUNT"] = nic_count
1300 disk_count = len(disks)
1301 for idx, (size, mode) in enumerate(disks):
1302 env["INSTANCE_DISK%d_SIZE" % idx] = size
1303 env["INSTANCE_DISK%d_MODE" % idx] = mode
1307 env["INSTANCE_DISK_COUNT"] = disk_count
1312 env["INSTANCE_TAGS"] = " ".join(tags)
1314 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1315 for key, value in source.items():
1316 env["INSTANCE_%s_%s" % (kind, key)] = value
1321 def _NICListToTuple(lu, nics):
1322 """Build a list of nic information tuples.
1324 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1325 value in LUInstanceQueryData.
1327 @type lu: L{LogicalUnit}
1328 @param lu: the logical unit on whose behalf we execute
1329 @type nics: list of L{objects.NIC}
1330 @param nics: list of nics to convert to hooks tuples
1334 cluster = lu.cfg.GetClusterInfo()
1338 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1339 mode = filled_params[constants.NIC_MODE]
1340 link = filled_params[constants.NIC_LINK]
1341 hooks_nics.append((ip, mac, mode, link))
1345 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1346 """Builds instance related env variables for hooks from an object.
1348 @type lu: L{LogicalUnit}
1349 @param lu: the logical unit on whose behalf we execute
1350 @type instance: L{objects.Instance}
1351 @param instance: the instance for which we should build the
1353 @type override: dict
1354 @param override: dictionary with key/values that will override
1357 @return: the hook environment dictionary
1360 cluster = lu.cfg.GetClusterInfo()
1361 bep = cluster.FillBE(instance)
1362 hvp = cluster.FillHV(instance)
1364 "name": instance.name,
1365 "primary_node": instance.primary_node,
1366 "secondary_nodes": instance.secondary_nodes,
1367 "os_type": instance.os,
1368 "status": instance.admin_state,
1369 "maxmem": bep[constants.BE_MAXMEM],
1370 "minmem": bep[constants.BE_MINMEM],
1371 "vcpus": bep[constants.BE_VCPUS],
1372 "nics": _NICListToTuple(lu, instance.nics),
1373 "disk_template": instance.disk_template,
1374 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1377 "hypervisor_name": instance.hypervisor,
1378 "tags": instance.tags,
1381 args.update(override)
1382 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1385 def _AdjustCandidatePool(lu, exceptions):
1386 """Adjust the candidate pool after node operations.
1389 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1391 lu.LogInfo("Promoted nodes to master candidate role: %s",
1392 utils.CommaJoin(node.name for node in mod_list))
1393 for name in mod_list:
1394 lu.context.ReaddNode(name)
1395 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1397 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1401 def _DecideSelfPromotion(lu, exceptions=None):
1402 """Decide whether I should promote myself as a master candidate.
1405 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1406 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1407 # the new node will increase mc_max with one, so:
1408 mc_should = min(mc_should + 1, cp_size)
1409 return mc_now < mc_should
1412 def _CalculateGroupIPolicy(cluster, group):
1413 """Calculate instance policy for group.
1416 return cluster.SimpleFillIPolicy(group.ipolicy)
1419 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1420 """Check that the brigdes needed by a list of nics exist.
1423 cluster = lu.cfg.GetClusterInfo()
1424 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1425 brlist = [params[constants.NIC_LINK] for params in paramslist
1426 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1428 result = lu.rpc.call_bridges_exist(target_node, brlist)
1429 result.Raise("Error checking bridges on destination node '%s'" %
1430 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1433 def _CheckInstanceBridgesExist(lu, instance, node=None):
1434 """Check that the brigdes needed by an instance exist.
1438 node = instance.primary_node
1439 _CheckNicsBridgesExist(lu, instance.nics, node)
1442 def _CheckOSVariant(os_obj, name):
1443 """Check whether an OS name conforms to the os variants specification.
1445 @type os_obj: L{objects.OS}
1446 @param os_obj: OS object to check
1448 @param name: OS name passed by the user, to check for validity
1451 variant = objects.OS.GetVariant(name)
1452 if not os_obj.supported_variants:
1454 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1455 " passed)" % (os_obj.name, variant),
1459 raise errors.OpPrereqError("OS name must include a variant",
1462 if variant not in os_obj.supported_variants:
1463 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1466 def _GetNodeInstancesInner(cfg, fn):
1467 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1470 def _GetNodeInstances(cfg, node_name):
1471 """Returns a list of all primary and secondary instances on a node.
1475 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1478 def _GetNodePrimaryInstances(cfg, node_name):
1479 """Returns primary instances on a node.
1482 return _GetNodeInstancesInner(cfg,
1483 lambda inst: node_name == inst.primary_node)
1486 def _GetNodeSecondaryInstances(cfg, node_name):
1487 """Returns secondary instances on a node.
1490 return _GetNodeInstancesInner(cfg,
1491 lambda inst: node_name in inst.secondary_nodes)
1494 def _GetStorageTypeArgs(cfg, storage_type):
1495 """Returns the arguments for a storage type.
1498 # Special case for file storage
1499 if storage_type == constants.ST_FILE:
1500 # storage.FileStorage wants a list of storage directories
1501 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1506 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1509 for dev in instance.disks:
1510 cfg.SetDiskID(dev, node_name)
1512 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1513 result.Raise("Failed to get disk status from node %s" % node_name,
1514 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1516 for idx, bdev_status in enumerate(result.payload):
1517 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1523 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1524 """Check the sanity of iallocator and node arguments and use the
1525 cluster-wide iallocator if appropriate.
1527 Check that at most one of (iallocator, node) is specified. If none is
1528 specified, then the LU's opcode's iallocator slot is filled with the
1529 cluster-wide default iallocator.
1531 @type iallocator_slot: string
1532 @param iallocator_slot: the name of the opcode iallocator slot
1533 @type node_slot: string
1534 @param node_slot: the name of the opcode target node slot
1537 node = getattr(lu.op, node_slot, None)
1538 iallocator = getattr(lu.op, iallocator_slot, None)
1540 if node is not None and iallocator is not None:
1541 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1543 elif node is None and iallocator is None:
1544 default_iallocator = lu.cfg.GetDefaultIAllocator()
1545 if default_iallocator:
1546 setattr(lu.op, iallocator_slot, default_iallocator)
1548 raise errors.OpPrereqError("No iallocator or node given and no"
1549 " cluster-wide default iallocator found;"
1550 " please specify either an iallocator or a"
1551 " node, or set a cluster-wide default"
1555 def _GetDefaultIAllocator(cfg, iallocator):
1556 """Decides on which iallocator to use.
1558 @type cfg: L{config.ConfigWriter}
1559 @param cfg: Cluster configuration object
1560 @type iallocator: string or None
1561 @param iallocator: Iallocator specified in opcode
1563 @return: Iallocator name
1567 # Use default iallocator
1568 iallocator = cfg.GetDefaultIAllocator()
1571 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1572 " opcode nor as a cluster-wide default",
1578 class LUClusterPostInit(LogicalUnit):
1579 """Logical unit for running hooks after cluster initialization.
1582 HPATH = "cluster-init"
1583 HTYPE = constants.HTYPE_CLUSTER
1585 def BuildHooksEnv(self):
1590 "OP_TARGET": self.cfg.GetClusterName(),
1593 def BuildHooksNodes(self):
1594 """Build hooks nodes.
1597 return ([], [self.cfg.GetMasterNode()])
1599 def Exec(self, feedback_fn):
1606 class LUClusterDestroy(LogicalUnit):
1607 """Logical unit for destroying the cluster.
1610 HPATH = "cluster-destroy"
1611 HTYPE = constants.HTYPE_CLUSTER
1613 def BuildHooksEnv(self):
1618 "OP_TARGET": self.cfg.GetClusterName(),
1621 def BuildHooksNodes(self):
1622 """Build hooks nodes.
1627 def CheckPrereq(self):
1628 """Check prerequisites.
1630 This checks whether the cluster is empty.
1632 Any errors are signaled by raising errors.OpPrereqError.
1635 master = self.cfg.GetMasterNode()
1637 nodelist = self.cfg.GetNodeList()
1638 if len(nodelist) != 1 or nodelist[0] != master:
1639 raise errors.OpPrereqError("There are still %d node(s) in"
1640 " this cluster." % (len(nodelist) - 1),
1642 instancelist = self.cfg.GetInstanceList()
1644 raise errors.OpPrereqError("There are still %d instance(s) in"
1645 " this cluster." % len(instancelist),
1648 def Exec(self, feedback_fn):
1649 """Destroys the cluster.
1652 master_params = self.cfg.GetMasterNetworkParameters()
1654 # Run post hooks on master node before it's removed
1655 _RunPostHook(self, master_params.name)
1657 ems = self.cfg.GetUseExternalMipScript()
1658 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1661 self.LogWarning("Error disabling the master IP address: %s",
1664 return master_params.name
1667 def _VerifyCertificate(filename):
1668 """Verifies a certificate for L{LUClusterVerifyConfig}.
1670 @type filename: string
1671 @param filename: Path to PEM file
1675 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1676 utils.ReadFile(filename))
1677 except Exception, err: # pylint: disable=W0703
1678 return (LUClusterVerifyConfig.ETYPE_ERROR,
1679 "Failed to load X509 certificate %s: %s" % (filename, err))
1682 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1683 constants.SSL_CERT_EXPIRATION_ERROR)
1686 fnamemsg = "While verifying %s: %s" % (filename, msg)
1691 return (None, fnamemsg)
1692 elif errcode == utils.CERT_WARNING:
1693 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1694 elif errcode == utils.CERT_ERROR:
1695 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1697 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1700 def _GetAllHypervisorParameters(cluster, instances):
1701 """Compute the set of all hypervisor parameters.
1703 @type cluster: L{objects.Cluster}
1704 @param cluster: the cluster object
1705 @param instances: list of L{objects.Instance}
1706 @param instances: additional instances from which to obtain parameters
1707 @rtype: list of (origin, hypervisor, parameters)
1708 @return: a list with all parameters found, indicating the hypervisor they
1709 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1714 for hv_name in cluster.enabled_hypervisors:
1715 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1717 for os_name, os_hvp in cluster.os_hvp.items():
1718 for hv_name, hv_params in os_hvp.items():
1720 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1721 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1723 # TODO: collapse identical parameter values in a single one
1724 for instance in instances:
1725 if instance.hvparams:
1726 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1727 cluster.FillHV(instance)))
1732 class _VerifyErrors(object):
1733 """Mix-in for cluster/group verify LUs.
1735 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1736 self.op and self._feedback_fn to be available.)
1740 ETYPE_FIELD = "code"
1741 ETYPE_ERROR = "ERROR"
1742 ETYPE_WARNING = "WARNING"
1744 def _Error(self, ecode, item, msg, *args, **kwargs):
1745 """Format an error message.
1747 Based on the opcode's error_codes parameter, either format a
1748 parseable error code, or a simpler error string.
1750 This must be called only from Exec and functions called from Exec.
1753 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1754 itype, etxt, _ = ecode
1755 # first complete the msg
1758 # then format the whole message
1759 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1760 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1766 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1767 # and finally report it via the feedback_fn
1768 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1770 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1771 """Log an error message if the passed condition is True.
1775 or self.op.debug_simulate_errors) # pylint: disable=E1101
1777 # If the error code is in the list of ignored errors, demote the error to a
1779 (_, etxt, _) = ecode
1780 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1781 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1784 self._Error(ecode, *args, **kwargs)
1786 # do not mark the operation as failed for WARN cases only
1787 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1788 self.bad = self.bad or cond
1791 class LUClusterVerify(NoHooksLU):
1792 """Submits all jobs necessary to verify the cluster.
1797 def ExpandNames(self):
1798 self.needed_locks = {}
1800 def Exec(self, feedback_fn):
1803 if self.op.group_name:
1804 groups = [self.op.group_name]
1805 depends_fn = lambda: None
1807 groups = self.cfg.GetNodeGroupList()
1809 # Verify global configuration
1811 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1814 # Always depend on global verification
1815 depends_fn = lambda: [(-len(jobs), [])]
1817 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1818 ignore_errors=self.op.ignore_errors,
1819 depends=depends_fn())]
1820 for group in groups)
1822 # Fix up all parameters
1823 for op in itertools.chain(*jobs): # pylint: disable=W0142
1824 op.debug_simulate_errors = self.op.debug_simulate_errors
1825 op.verbose = self.op.verbose
1826 op.error_codes = self.op.error_codes
1828 op.skip_checks = self.op.skip_checks
1829 except AttributeError:
1830 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1832 return ResultWithJobs(jobs)
1835 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1836 """Verifies the cluster config.
1841 def _VerifyHVP(self, hvp_data):
1842 """Verifies locally the syntax of the hypervisor parameters.
1845 for item, hv_name, hv_params in hvp_data:
1846 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1849 hv_class = hypervisor.GetHypervisor(hv_name)
1850 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1851 hv_class.CheckParameterSyntax(hv_params)
1852 except errors.GenericError, err:
1853 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1855 def ExpandNames(self):
1856 # Information can be safely retrieved as the BGL is acquired in exclusive
1858 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1859 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1860 self.all_node_info = self.cfg.GetAllNodesInfo()
1861 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1862 self.needed_locks = {}
1864 def Exec(self, feedback_fn):
1865 """Verify integrity of cluster, performing various test on nodes.
1869 self._feedback_fn = feedback_fn
1871 feedback_fn("* Verifying cluster config")
1873 for msg in self.cfg.VerifyConfig():
1874 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1876 feedback_fn("* Verifying cluster certificate files")
1878 for cert_filename in constants.ALL_CERT_FILES:
1879 (errcode, msg) = _VerifyCertificate(cert_filename)
1880 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1882 feedback_fn("* Verifying hypervisor parameters")
1884 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1885 self.all_inst_info.values()))
1887 feedback_fn("* Verifying all nodes belong to an existing group")
1889 # We do this verification here because, should this bogus circumstance
1890 # occur, it would never be caught by VerifyGroup, which only acts on
1891 # nodes/instances reachable from existing node groups.
1893 dangling_nodes = set(node.name for node in self.all_node_info.values()
1894 if node.group not in self.all_group_info)
1896 dangling_instances = {}
1897 no_node_instances = []
1899 for inst in self.all_inst_info.values():
1900 if inst.primary_node in dangling_nodes:
1901 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1902 elif inst.primary_node not in self.all_node_info:
1903 no_node_instances.append(inst.name)
1908 utils.CommaJoin(dangling_instances.get(node.name,
1910 for node in dangling_nodes]
1912 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1914 "the following nodes (and their instances) belong to a non"
1915 " existing group: %s", utils.CommaJoin(pretty_dangling))
1917 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1919 "the following instances have a non-existing primary-node:"
1920 " %s", utils.CommaJoin(no_node_instances))
1925 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1926 """Verifies the status of a node group.
1929 HPATH = "cluster-verify"
1930 HTYPE = constants.HTYPE_CLUSTER
1933 _HOOKS_INDENT_RE = re.compile("^", re.M)
1935 class NodeImage(object):
1936 """A class representing the logical and physical status of a node.
1939 @ivar name: the node name to which this object refers
1940 @ivar volumes: a structure as returned from
1941 L{ganeti.backend.GetVolumeList} (runtime)
1942 @ivar instances: a list of running instances (runtime)
1943 @ivar pinst: list of configured primary instances (config)
1944 @ivar sinst: list of configured secondary instances (config)
1945 @ivar sbp: dictionary of {primary-node: list of instances} for all
1946 instances for which this node is secondary (config)
1947 @ivar mfree: free memory, as reported by hypervisor (runtime)
1948 @ivar dfree: free disk, as reported by the node (runtime)
1949 @ivar offline: the offline status (config)
1950 @type rpc_fail: boolean
1951 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1952 not whether the individual keys were correct) (runtime)
1953 @type lvm_fail: boolean
1954 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1955 @type hyp_fail: boolean
1956 @ivar hyp_fail: whether the RPC call didn't return the instance list
1957 @type ghost: boolean
1958 @ivar ghost: whether this is a known node or not (config)
1959 @type os_fail: boolean
1960 @ivar os_fail: whether the RPC call didn't return valid OS data
1962 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1963 @type vm_capable: boolean
1964 @ivar vm_capable: whether the node can host instances
1967 def __init__(self, offline=False, name=None, vm_capable=True):
1976 self.offline = offline
1977 self.vm_capable = vm_capable
1978 self.rpc_fail = False
1979 self.lvm_fail = False
1980 self.hyp_fail = False
1982 self.os_fail = False
1985 def ExpandNames(self):
1986 # This raises errors.OpPrereqError on its own:
1987 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1989 # Get instances in node group; this is unsafe and needs verification later
1990 inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1992 self.needed_locks = {
1993 locking.LEVEL_INSTANCE: inst_names,
1994 locking.LEVEL_NODEGROUP: [self.group_uuid],
1995 locking.LEVEL_NODE: [],
1998 self.share_locks = _ShareAll()
2000 def DeclareLocks(self, level):
2001 if level == locking.LEVEL_NODE:
2002 # Get members of node group; this is unsafe and needs verification later
2003 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2005 all_inst_info = self.cfg.GetAllInstancesInfo()
2007 # In Exec(), we warn about mirrored instances that have primary and
2008 # secondary living in separate node groups. To fully verify that
2009 # volumes for these instances are healthy, we will need to do an
2010 # extra call to their secondaries. We ensure here those nodes will
2012 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2013 # Important: access only the instances whose lock is owned
2014 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2015 nodes.update(all_inst_info[inst].secondary_nodes)
2017 self.needed_locks[locking.LEVEL_NODE] = nodes
2019 def CheckPrereq(self):
2020 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2021 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2023 group_nodes = set(self.group_info.members)
2024 group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
2027 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2029 unlocked_instances = \
2030 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2033 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2034 utils.CommaJoin(unlocked_nodes))
2036 if unlocked_instances:
2037 raise errors.OpPrereqError("Missing lock for instances: %s" %
2038 utils.CommaJoin(unlocked_instances))
2040 self.all_node_info = self.cfg.GetAllNodesInfo()
2041 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2043 self.my_node_names = utils.NiceSort(group_nodes)
2044 self.my_inst_names = utils.NiceSort(group_instances)
2046 self.my_node_info = dict((name, self.all_node_info[name])
2047 for name in self.my_node_names)
2049 self.my_inst_info = dict((name, self.all_inst_info[name])
2050 for name in self.my_inst_names)
2052 # We detect here the nodes that will need the extra RPC calls for verifying
2053 # split LV volumes; they should be locked.
2054 extra_lv_nodes = set()
2056 for inst in self.my_inst_info.values():
2057 if inst.disk_template in constants.DTS_INT_MIRROR:
2058 group = self.my_node_info[inst.primary_node].group
2059 for nname in inst.secondary_nodes:
2060 if self.all_node_info[nname].group != group:
2061 extra_lv_nodes.add(nname)
2063 unlocked_lv_nodes = \
2064 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2066 if unlocked_lv_nodes:
2067 raise errors.OpPrereqError("these nodes could be locked: %s" %
2068 utils.CommaJoin(unlocked_lv_nodes))
2069 self.extra_lv_nodes = list(extra_lv_nodes)
2071 def _VerifyNode(self, ninfo, nresult):
2072 """Perform some basic validation on data returned from a node.
2074 - check the result data structure is well formed and has all the
2076 - check ganeti version
2078 @type ninfo: L{objects.Node}
2079 @param ninfo: the node to check
2080 @param nresult: the results from the node
2082 @return: whether overall this call was successful (and we can expect
2083 reasonable values in the respose)
2087 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2089 # main result, nresult should be a non-empty dict
2090 test = not nresult or not isinstance(nresult, dict)
2091 _ErrorIf(test, constants.CV_ENODERPC, node,
2092 "unable to verify node: no data returned")
2096 # compares ganeti version
2097 local_version = constants.PROTOCOL_VERSION
2098 remote_version = nresult.get("version", None)
2099 test = not (remote_version and
2100 isinstance(remote_version, (list, tuple)) and
2101 len(remote_version) == 2)
2102 _ErrorIf(test, constants.CV_ENODERPC, node,
2103 "connection to node returned invalid data")
2107 test = local_version != remote_version[0]
2108 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2109 "incompatible protocol versions: master %s,"
2110 " node %s", local_version, remote_version[0])
2114 # node seems compatible, we can actually try to look into its results
2116 # full package version
2117 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2118 constants.CV_ENODEVERSION, node,
2119 "software version mismatch: master %s, node %s",
2120 constants.RELEASE_VERSION, remote_version[1],
2121 code=self.ETYPE_WARNING)
2123 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2124 if ninfo.vm_capable and isinstance(hyp_result, dict):
2125 for hv_name, hv_result in hyp_result.iteritems():
2126 test = hv_result is not None
2127 _ErrorIf(test, constants.CV_ENODEHV, node,
2128 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2130 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2131 if ninfo.vm_capable and isinstance(hvp_result, list):
2132 for item, hv_name, hv_result in hvp_result:
2133 _ErrorIf(True, constants.CV_ENODEHV, node,
2134 "hypervisor %s parameter verify failure (source %s): %s",
2135 hv_name, item, hv_result)
2137 test = nresult.get(constants.NV_NODESETUP,
2138 ["Missing NODESETUP results"])
2139 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2144 def _VerifyNodeTime(self, ninfo, nresult,
2145 nvinfo_starttime, nvinfo_endtime):
2146 """Check the node time.
2148 @type ninfo: L{objects.Node}
2149 @param ninfo: the node to check
2150 @param nresult: the remote results for the node
2151 @param nvinfo_starttime: the start time of the RPC call
2152 @param nvinfo_endtime: the end time of the RPC call
2156 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2158 ntime = nresult.get(constants.NV_TIME, None)
2160 ntime_merged = utils.MergeTime(ntime)
2161 except (ValueError, TypeError):
2162 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2165 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2166 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2167 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2168 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2172 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2173 "Node time diverges by at least %s from master node time",
2176 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2177 """Check the node LVM results.
2179 @type ninfo: L{objects.Node}
2180 @param ninfo: the node to check
2181 @param nresult: the remote results for the node
2182 @param vg_name: the configured VG name
2189 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2191 # checks vg existence and size > 20G
2192 vglist = nresult.get(constants.NV_VGLIST, None)
2194 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2196 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2197 constants.MIN_VG_SIZE)
2198 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2201 pvlist = nresult.get(constants.NV_PVLIST, None)
2202 test = pvlist is None
2203 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2205 # check that ':' is not present in PV names, since it's a
2206 # special character for lvcreate (denotes the range of PEs to
2208 for _, pvname, owner_vg in pvlist:
2209 test = ":" in pvname
2210 _ErrorIf(test, constants.CV_ENODELVM, node,
2211 "Invalid character ':' in PV '%s' of VG '%s'",
2214 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2215 """Check the node bridges.
2217 @type ninfo: L{objects.Node}
2218 @param ninfo: the node to check
2219 @param nresult: the remote results for the node
2220 @param bridges: the expected list of bridges
2227 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2229 missing = nresult.get(constants.NV_BRIDGES, None)
2230 test = not isinstance(missing, list)
2231 _ErrorIf(test, constants.CV_ENODENET, node,
2232 "did not return valid bridge information")
2234 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2235 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2237 def _VerifyNodeUserScripts(self, ninfo, nresult):
2238 """Check the results of user scripts presence and executability on the node
2240 @type ninfo: L{objects.Node}
2241 @param ninfo: the node to check
2242 @param nresult: the remote results for the node
2247 test = not constants.NV_USERSCRIPTS in nresult
2248 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2249 "did not return user scripts information")
2251 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2253 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2254 "user scripts not present or not executable: %s" %
2255 utils.CommaJoin(sorted(broken_scripts)))
2257 def _VerifyNodeNetwork(self, ninfo, nresult):
2258 """Check the node network connectivity results.
2260 @type ninfo: L{objects.Node}
2261 @param ninfo: the node to check
2262 @param nresult: the remote results for the node
2266 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2268 test = constants.NV_NODELIST not in nresult
2269 _ErrorIf(test, constants.CV_ENODESSH, node,
2270 "node hasn't returned node ssh connectivity data")
2272 if nresult[constants.NV_NODELIST]:
2273 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2274 _ErrorIf(True, constants.CV_ENODESSH, node,
2275 "ssh communication with node '%s': %s", a_node, a_msg)
2277 test = constants.NV_NODENETTEST not in nresult
2278 _ErrorIf(test, constants.CV_ENODENET, node,
2279 "node hasn't returned node tcp connectivity data")
2281 if nresult[constants.NV_NODENETTEST]:
2282 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2284 _ErrorIf(True, constants.CV_ENODENET, node,
2285 "tcp communication with node '%s': %s",
2286 anode, nresult[constants.NV_NODENETTEST][anode])
2288 test = constants.NV_MASTERIP not in nresult
2289 _ErrorIf(test, constants.CV_ENODENET, node,
2290 "node hasn't returned node master IP reachability data")
2292 if not nresult[constants.NV_MASTERIP]:
2293 if node == self.master_node:
2294 msg = "the master node cannot reach the master IP (not configured?)"
2296 msg = "cannot reach the master IP"
2297 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2299 def _VerifyInstance(self, instance, instanceconfig, node_image,
2301 """Verify an instance.
2303 This function checks to see if the required block devices are
2304 available on the instance's node.
2307 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2308 node_current = instanceconfig.primary_node
2310 node_vol_should = {}
2311 instanceconfig.MapLVsByNode(node_vol_should)
2313 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2314 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2315 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, err)
2317 for node in node_vol_should:
2318 n_img = node_image[node]
2319 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2320 # ignore missing volumes on offline or broken nodes
2322 for volume in node_vol_should[node]:
2323 test = volume not in n_img.volumes
2324 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2325 "volume %s missing on node %s", volume, node)
2327 if instanceconfig.admin_state == constants.ADMINST_UP:
2328 pri_img = node_image[node_current]
2329 test = instance not in pri_img.instances and not pri_img.offline
2330 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2331 "instance not running on its primary node %s",
2334 diskdata = [(nname, success, status, idx)
2335 for (nname, disks) in diskstatus.items()
2336 for idx, (success, status) in enumerate(disks)]
2338 for nname, success, bdev_status, idx in diskdata:
2339 # the 'ghost node' construction in Exec() ensures that we have a
2341 snode = node_image[nname]
2342 bad_snode = snode.ghost or snode.offline
2343 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2344 not success and not bad_snode,
2345 constants.CV_EINSTANCEFAULTYDISK, instance,
2346 "couldn't retrieve status for disk/%s on %s: %s",
2347 idx, nname, bdev_status)
2348 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2349 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2350 constants.CV_EINSTANCEFAULTYDISK, instance,
2351 "disk/%s on %s is faulty", idx, nname)
2353 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2354 """Verify if there are any unknown volumes in the cluster.
2356 The .os, .swap and backup volumes are ignored. All other volumes are
2357 reported as unknown.
2359 @type reserved: L{ganeti.utils.FieldSet}
2360 @param reserved: a FieldSet of reserved volume names
2363 for node, n_img in node_image.items():
2364 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2365 # skip non-healthy nodes
2367 for volume in n_img.volumes:
2368 test = ((node not in node_vol_should or
2369 volume not in node_vol_should[node]) and
2370 not reserved.Matches(volume))
2371 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2372 "volume %s is unknown", volume)
2374 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2375 """Verify N+1 Memory Resilience.
2377 Check that if one single node dies we can still start all the
2378 instances it was primary for.
2381 cluster_info = self.cfg.GetClusterInfo()
2382 for node, n_img in node_image.items():
2383 # This code checks that every node which is now listed as
2384 # secondary has enough memory to host all instances it is
2385 # supposed to should a single other node in the cluster fail.
2386 # FIXME: not ready for failover to an arbitrary node
2387 # FIXME: does not support file-backed instances
2388 # WARNING: we currently take into account down instances as well
2389 # as up ones, considering that even if they're down someone
2390 # might want to start them even in the event of a node failure.
2392 # we're skipping offline nodes from the N+1 warning, since
2393 # most likely we don't have good memory infromation from them;
2394 # we already list instances living on such nodes, and that's
2397 #TODO(dynmem): use MINMEM for checking
2398 #TODO(dynmem): also consider ballooning out other instances
2399 for prinode, instances in n_img.sbp.items():
2401 for instance in instances:
2402 bep = cluster_info.FillBE(instance_cfg[instance])
2403 if bep[constants.BE_AUTO_BALANCE]:
2404 needed_mem += bep[constants.BE_MAXMEM]
2405 test = n_img.mfree < needed_mem
2406 self._ErrorIf(test, constants.CV_ENODEN1, node,
2407 "not enough memory to accomodate instance failovers"
2408 " should node %s fail (%dMiB needed, %dMiB available)",
2409 prinode, needed_mem, n_img.mfree)
2412 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2413 (files_all, files_opt, files_mc, files_vm)):
2414 """Verifies file checksums collected from all nodes.
2416 @param errorif: Callback for reporting errors
2417 @param nodeinfo: List of L{objects.Node} objects
2418 @param master_node: Name of master node
2419 @param all_nvinfo: RPC results
2422 # Define functions determining which nodes to consider for a file
2425 (files_mc, lambda node: (node.master_candidate or
2426 node.name == master_node)),
2427 (files_vm, lambda node: node.vm_capable),
2430 # Build mapping from filename to list of nodes which should have the file
2432 for (files, fn) in files2nodefn:
2434 filenodes = nodeinfo
2436 filenodes = filter(fn, nodeinfo)
2437 nodefiles.update((filename,
2438 frozenset(map(operator.attrgetter("name"), filenodes)))
2439 for filename in files)
2441 assert set(nodefiles) == (files_all | files_mc | files_vm)
2443 fileinfo = dict((filename, {}) for filename in nodefiles)
2444 ignore_nodes = set()
2446 for node in nodeinfo:
2448 ignore_nodes.add(node.name)
2451 nresult = all_nvinfo[node.name]
2453 if nresult.fail_msg or not nresult.payload:
2456 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2458 test = not (node_files and isinstance(node_files, dict))
2459 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2460 "Node did not return file checksum data")
2462 ignore_nodes.add(node.name)
2465 # Build per-checksum mapping from filename to nodes having it
2466 for (filename, checksum) in node_files.items():
2467 assert filename in nodefiles
2468 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2470 for (filename, checksums) in fileinfo.items():
2471 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2473 # Nodes having the file
2474 with_file = frozenset(node_name
2475 for nodes in fileinfo[filename].values()
2476 for node_name in nodes) - ignore_nodes
2478 expected_nodes = nodefiles[filename] - ignore_nodes
2480 # Nodes missing file
2481 missing_file = expected_nodes - with_file
2483 if filename in files_opt:
2485 errorif(missing_file and missing_file != expected_nodes,
2486 constants.CV_ECLUSTERFILECHECK, None,
2487 "File %s is optional, but it must exist on all or no"
2488 " nodes (not found on %s)",
2489 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2491 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2492 "File %s is missing from node(s) %s", filename,
2493 utils.CommaJoin(utils.NiceSort(missing_file)))
2495 # Warn if a node has a file it shouldn't
2496 unexpected = with_file - expected_nodes
2498 constants.CV_ECLUSTERFILECHECK, None,
2499 "File %s should not exist on node(s) %s",
2500 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2502 # See if there are multiple versions of the file
2503 test = len(checksums) > 1
2505 variants = ["variant %s on %s" %
2506 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2507 for (idx, (checksum, nodes)) in
2508 enumerate(sorted(checksums.items()))]
2512 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2513 "File %s found with %s different checksums (%s)",
2514 filename, len(checksums), "; ".join(variants))
2516 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2518 """Verifies and the node DRBD status.
2520 @type ninfo: L{objects.Node}
2521 @param ninfo: the node to check
2522 @param nresult: the remote results for the node
2523 @param instanceinfo: the dict of instances
2524 @param drbd_helper: the configured DRBD usermode helper
2525 @param drbd_map: the DRBD map as returned by
2526 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2530 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2533 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2534 test = (helper_result == None)
2535 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2536 "no drbd usermode helper returned")
2538 status, payload = helper_result
2540 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2541 "drbd usermode helper check unsuccessful: %s", payload)
2542 test = status and (payload != drbd_helper)
2543 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2544 "wrong drbd usermode helper: %s", payload)
2546 # compute the DRBD minors
2548 for minor, instance in drbd_map[node].items():
2549 test = instance not in instanceinfo
2550 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2551 "ghost instance '%s' in temporary DRBD map", instance)
2552 # ghost instance should not be running, but otherwise we
2553 # don't give double warnings (both ghost instance and
2554 # unallocated minor in use)
2556 node_drbd[minor] = (instance, False)
2558 instance = instanceinfo[instance]
2559 node_drbd[minor] = (instance.name,
2560 instance.admin_state == constants.ADMINST_UP)
2562 # and now check them
2563 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2564 test = not isinstance(used_minors, (tuple, list))
2565 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2566 "cannot parse drbd status file: %s", str(used_minors))
2568 # we cannot check drbd status
2571 for minor, (iname, must_exist) in node_drbd.items():
2572 test = minor not in used_minors and must_exist
2573 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2574 "drbd minor %d of instance %s is not active", minor, iname)
2575 for minor in used_minors:
2576 test = minor not in node_drbd
2577 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2578 "unallocated drbd minor %d is in use", minor)
2580 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2581 """Builds the node OS structures.
2583 @type ninfo: L{objects.Node}
2584 @param ninfo: the node to check
2585 @param nresult: the remote results for the node
2586 @param nimg: the node image object
2590 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2592 remote_os = nresult.get(constants.NV_OSLIST, None)
2593 test = (not isinstance(remote_os, list) or
2594 not compat.all(isinstance(v, list) and len(v) == 7
2595 for v in remote_os))
2597 _ErrorIf(test, constants.CV_ENODEOS, node,
2598 "node hasn't returned valid OS data")
2607 for (name, os_path, status, diagnose,
2608 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2610 if name not in os_dict:
2613 # parameters is a list of lists instead of list of tuples due to
2614 # JSON lacking a real tuple type, fix it:
2615 parameters = [tuple(v) for v in parameters]
2616 os_dict[name].append((os_path, status, diagnose,
2617 set(variants), set(parameters), set(api_ver)))
2619 nimg.oslist = os_dict
2621 def _VerifyNodeOS(self, ninfo, nimg, base):
2622 """Verifies the node OS list.
2624 @type ninfo: L{objects.Node}
2625 @param ninfo: the node to check
2626 @param nimg: the node image object
2627 @param base: the 'template' node we match against (e.g. from the master)
2631 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2633 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2635 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2636 for os_name, os_data in nimg.oslist.items():
2637 assert os_data, "Empty OS status for OS %s?!" % os_name
2638 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2639 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2640 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2641 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2642 "OS '%s' has multiple entries (first one shadows the rest): %s",
2643 os_name, utils.CommaJoin([v[0] for v in os_data]))
2644 # comparisons with the 'base' image
2645 test = os_name not in base.oslist
2646 _ErrorIf(test, constants.CV_ENODEOS, node,
2647 "Extra OS %s not present on reference node (%s)",
2651 assert base.oslist[os_name], "Base node has empty OS status?"
2652 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2654 # base OS is invalid, skipping
2656 for kind, a, b in [("API version", f_api, b_api),
2657 ("variants list", f_var, b_var),
2658 ("parameters", beautify_params(f_param),
2659 beautify_params(b_param))]:
2660 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2661 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2662 kind, os_name, base.name,
2663 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2665 # check any missing OSes
2666 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2667 _ErrorIf(missing, constants.CV_ENODEOS, node,
2668 "OSes present on reference node %s but missing on this node: %s",
2669 base.name, utils.CommaJoin(missing))
2671 def _VerifyOob(self, ninfo, nresult):
2672 """Verifies out of band functionality of a node.
2674 @type ninfo: L{objects.Node}
2675 @param ninfo: the node to check
2676 @param nresult: the remote results for the node
2680 # We just have to verify the paths on master and/or master candidates
2681 # as the oob helper is invoked on the master
2682 if ((ninfo.master_candidate or ninfo.master_capable) and
2683 constants.NV_OOB_PATHS in nresult):
2684 for path_result in nresult[constants.NV_OOB_PATHS]:
2685 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2687 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2688 """Verifies and updates the node volume data.
2690 This function will update a L{NodeImage}'s internal structures
2691 with data from the remote call.
2693 @type ninfo: L{objects.Node}
2694 @param ninfo: the node to check
2695 @param nresult: the remote results for the node
2696 @param nimg: the node image object
2697 @param vg_name: the configured VG name
2701 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2703 nimg.lvm_fail = True
2704 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2707 elif isinstance(lvdata, basestring):
2708 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2709 utils.SafeEncode(lvdata))
2710 elif not isinstance(lvdata, dict):
2711 _ErrorIf(True, constants.CV_ENODELVM, node,
2712 "rpc call to node failed (lvlist)")
2714 nimg.volumes = lvdata
2715 nimg.lvm_fail = False
2717 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2718 """Verifies and updates the node instance list.
2720 If the listing was successful, then updates this node's instance
2721 list. Otherwise, it marks the RPC call as failed for the instance
2724 @type ninfo: L{objects.Node}
2725 @param ninfo: the node to check
2726 @param nresult: the remote results for the node
2727 @param nimg: the node image object
2730 idata = nresult.get(constants.NV_INSTANCELIST, None)
2731 test = not isinstance(idata, list)
2732 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2733 "rpc call to node failed (instancelist): %s",
2734 utils.SafeEncode(str(idata)))
2736 nimg.hyp_fail = True
2738 nimg.instances = idata
2740 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2741 """Verifies and computes a node information map
2743 @type ninfo: L{objects.Node}
2744 @param ninfo: the node to check
2745 @param nresult: the remote results for the node
2746 @param nimg: the node image object
2747 @param vg_name: the configured VG name
2751 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2753 # try to read free memory (from the hypervisor)
2754 hv_info = nresult.get(constants.NV_HVINFO, None)
2755 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2756 _ErrorIf(test, constants.CV_ENODEHV, node,
2757 "rpc call to node failed (hvinfo)")
2760 nimg.mfree = int(hv_info["memory_free"])
2761 except (ValueError, TypeError):
2762 _ErrorIf(True, constants.CV_ENODERPC, node,
2763 "node returned invalid nodeinfo, check hypervisor")
2765 # FIXME: devise a free space model for file based instances as well
2766 if vg_name is not None:
2767 test = (constants.NV_VGLIST not in nresult or
2768 vg_name not in nresult[constants.NV_VGLIST])
2769 _ErrorIf(test, constants.CV_ENODELVM, node,
2770 "node didn't return data for the volume group '%s'"
2771 " - it is either missing or broken", vg_name)
2774 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2775 except (ValueError, TypeError):
2776 _ErrorIf(True, constants.CV_ENODERPC, node,
2777 "node returned invalid LVM info, check LVM status")
2779 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2780 """Gets per-disk status information for all instances.
2782 @type nodelist: list of strings
2783 @param nodelist: Node names
2784 @type node_image: dict of (name, L{objects.Node})
2785 @param node_image: Node objects
2786 @type instanceinfo: dict of (name, L{objects.Instance})
2787 @param instanceinfo: Instance objects
2788 @rtype: {instance: {node: [(succes, payload)]}}
2789 @return: a dictionary of per-instance dictionaries with nodes as
2790 keys and disk information as values; the disk information is a
2791 list of tuples (success, payload)
2794 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2797 node_disks_devonly = {}
2798 diskless_instances = set()
2799 diskless = constants.DT_DISKLESS
2801 for nname in nodelist:
2802 node_instances = list(itertools.chain(node_image[nname].pinst,
2803 node_image[nname].sinst))
2804 diskless_instances.update(inst for inst in node_instances
2805 if instanceinfo[inst].disk_template == diskless)
2806 disks = [(inst, disk)
2807 for inst in node_instances
2808 for disk in instanceinfo[inst].disks]
2811 # No need to collect data
2814 node_disks[nname] = disks
2816 # Creating copies as SetDiskID below will modify the objects and that can
2817 # lead to incorrect data returned from nodes
2818 devonly = [dev.Copy() for (_, dev) in disks]
2821 self.cfg.SetDiskID(dev, nname)
2823 node_disks_devonly[nname] = devonly
2825 assert len(node_disks) == len(node_disks_devonly)
2827 # Collect data from all nodes with disks
2828 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2831 assert len(result) == len(node_disks)
2835 for (nname, nres) in result.items():
2836 disks = node_disks[nname]
2839 # No data from this node
2840 data = len(disks) * [(False, "node offline")]
2843 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2844 "while getting disk information: %s", msg)
2846 # No data from this node
2847 data = len(disks) * [(False, msg)]
2850 for idx, i in enumerate(nres.payload):
2851 if isinstance(i, (tuple, list)) and len(i) == 2:
2854 logging.warning("Invalid result from node %s, entry %d: %s",
2856 data.append((False, "Invalid result from the remote node"))
2858 for ((inst, _), status) in zip(disks, data):
2859 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2861 # Add empty entries for diskless instances.
2862 for inst in diskless_instances:
2863 assert inst not in instdisk
2866 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2867 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2868 compat.all(isinstance(s, (tuple, list)) and
2869 len(s) == 2 for s in statuses)
2870 for inst, nnames in instdisk.items()
2871 for nname, statuses in nnames.items())
2872 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2877 def _SshNodeSelector(group_uuid, all_nodes):
2878 """Create endless iterators for all potential SSH check hosts.
2881 nodes = [node for node in all_nodes
2882 if (node.group != group_uuid and
2884 keyfunc = operator.attrgetter("group")
2886 return map(itertools.cycle,
2887 [sorted(map(operator.attrgetter("name"), names))
2888 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2892 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2893 """Choose which nodes should talk to which other nodes.
2895 We will make nodes contact all nodes in their group, and one node from
2898 @warning: This algorithm has a known issue if one node group is much
2899 smaller than others (e.g. just one node). In such a case all other
2900 nodes will talk to the single node.
2903 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2904 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2906 return (online_nodes,
2907 dict((name, sorted([i.next() for i in sel]))
2908 for name in online_nodes))
2910 def BuildHooksEnv(self):
2913 Cluster-Verify hooks just ran in the post phase and their failure makes
2914 the output be logged in the verify output and the verification to fail.
2918 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2921 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2922 for node in self.my_node_info.values())
2926 def BuildHooksNodes(self):
2927 """Build hooks nodes.
2930 return ([], self.my_node_names)
2932 def Exec(self, feedback_fn):
2933 """Verify integrity of the node group, performing various test on nodes.
2936 # This method has too many local variables. pylint: disable=R0914
2937 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2939 if not self.my_node_names:
2941 feedback_fn("* Empty node group, skipping verification")
2945 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2946 verbose = self.op.verbose
2947 self._feedback_fn = feedback_fn
2949 vg_name = self.cfg.GetVGName()
2950 drbd_helper = self.cfg.GetDRBDHelper()
2951 cluster = self.cfg.GetClusterInfo()
2952 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2953 hypervisors = cluster.enabled_hypervisors
2954 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2956 i_non_redundant = [] # Non redundant instances
2957 i_non_a_balanced = [] # Non auto-balanced instances
2958 i_offline = 0 # Count of offline instances
2959 n_offline = 0 # Count of offline nodes
2960 n_drained = 0 # Count of nodes being drained
2961 node_vol_should = {}
2963 # FIXME: verify OS list
2966 filemap = _ComputeAncillaryFiles(cluster, False)
2968 # do local checksums
2969 master_node = self.master_node = self.cfg.GetMasterNode()
2970 master_ip = self.cfg.GetMasterIP()
2972 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2975 if self.cfg.GetUseExternalMipScript():
2976 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2978 node_verify_param = {
2979 constants.NV_FILELIST:
2980 utils.UniqueSequence(filename
2981 for files in filemap
2982 for filename in files),
2983 constants.NV_NODELIST:
2984 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2985 self.all_node_info.values()),
2986 constants.NV_HYPERVISOR: hypervisors,
2987 constants.NV_HVPARAMS:
2988 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2989 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2990 for node in node_data_list
2991 if not node.offline],
2992 constants.NV_INSTANCELIST: hypervisors,
2993 constants.NV_VERSION: None,
2994 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2995 constants.NV_NODESETUP: None,
2996 constants.NV_TIME: None,
2997 constants.NV_MASTERIP: (master_node, master_ip),
2998 constants.NV_OSLIST: None,
2999 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3000 constants.NV_USERSCRIPTS: user_scripts,
3003 if vg_name is not None:
3004 node_verify_param[constants.NV_VGLIST] = None
3005 node_verify_param[constants.NV_LVLIST] = vg_name
3006 node_verify_param[constants.NV_PVLIST] = [vg_name]
3007 node_verify_param[constants.NV_DRBDLIST] = None
3010 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3013 # FIXME: this needs to be changed per node-group, not cluster-wide
3015 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3016 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3017 bridges.add(default_nicpp[constants.NIC_LINK])
3018 for instance in self.my_inst_info.values():
3019 for nic in instance.nics:
3020 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3021 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3022 bridges.add(full_nic[constants.NIC_LINK])
3025 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3027 # Build our expected cluster state
3028 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3030 vm_capable=node.vm_capable))
3031 for node in node_data_list)
3035 for node in self.all_node_info.values():
3036 path = _SupportsOob(self.cfg, node)
3037 if path and path not in oob_paths:
3038 oob_paths.append(path)
3041 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3043 for instance in self.my_inst_names:
3044 inst_config = self.my_inst_info[instance]
3046 for nname in inst_config.all_nodes:
3047 if nname not in node_image:
3048 gnode = self.NodeImage(name=nname)
3049 gnode.ghost = (nname not in self.all_node_info)
3050 node_image[nname] = gnode
3052 inst_config.MapLVsByNode(node_vol_should)
3054 pnode = inst_config.primary_node
3055 node_image[pnode].pinst.append(instance)
3057 for snode in inst_config.secondary_nodes:
3058 nimg = node_image[snode]
3059 nimg.sinst.append(instance)
3060 if pnode not in nimg.sbp:
3061 nimg.sbp[pnode] = []
3062 nimg.sbp[pnode].append(instance)
3064 # At this point, we have the in-memory data structures complete,
3065 # except for the runtime information, which we'll gather next
3067 # Due to the way our RPC system works, exact response times cannot be
3068 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3069 # time before and after executing the request, we can at least have a time
3071 nvinfo_starttime = time.time()
3072 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3074 self.cfg.GetClusterName())
3075 nvinfo_endtime = time.time()
3077 if self.extra_lv_nodes and vg_name is not None:
3079 self.rpc.call_node_verify(self.extra_lv_nodes,
3080 {constants.NV_LVLIST: vg_name},
3081 self.cfg.GetClusterName())
3083 extra_lv_nvinfo = {}
3085 all_drbd_map = self.cfg.ComputeDRBDMap()
3087 feedback_fn("* Gathering disk information (%s nodes)" %
3088 len(self.my_node_names))
3089 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3092 feedback_fn("* Verifying configuration file consistency")
3094 # If not all nodes are being checked, we need to make sure the master node
3095 # and a non-checked vm_capable node are in the list.
3096 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3098 vf_nvinfo = all_nvinfo.copy()
3099 vf_node_info = list(self.my_node_info.values())
3100 additional_nodes = []
3101 if master_node not in self.my_node_info:
3102 additional_nodes.append(master_node)
3103 vf_node_info.append(self.all_node_info[master_node])
3104 # Add the first vm_capable node we find which is not included
3105 for node in absent_nodes:
3106 nodeinfo = self.all_node_info[node]
3107 if nodeinfo.vm_capable and not nodeinfo.offline:
3108 additional_nodes.append(node)
3109 vf_node_info.append(self.all_node_info[node])
3111 key = constants.NV_FILELIST
3112 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3113 {key: node_verify_param[key]},
3114 self.cfg.GetClusterName()))
3116 vf_nvinfo = all_nvinfo
3117 vf_node_info = self.my_node_info.values()
3119 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3121 feedback_fn("* Verifying node status")
3125 for node_i in node_data_list:
3127 nimg = node_image[node]
3131 feedback_fn("* Skipping offline node %s" % (node,))
3135 if node == master_node:
3137 elif node_i.master_candidate:
3138 ntype = "master candidate"
3139 elif node_i.drained:
3145 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3147 msg = all_nvinfo[node].fail_msg
3148 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3151 nimg.rpc_fail = True
3154 nresult = all_nvinfo[node].payload
3156 nimg.call_ok = self._VerifyNode(node_i, nresult)
3157 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3158 self._VerifyNodeNetwork(node_i, nresult)
3159 self._VerifyNodeUserScripts(node_i, nresult)
3160 self._VerifyOob(node_i, nresult)
3163 self._VerifyNodeLVM(node_i, nresult, vg_name)
3164 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3167 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3168 self._UpdateNodeInstances(node_i, nresult, nimg)
3169 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3170 self._UpdateNodeOS(node_i, nresult, nimg)
3172 if not nimg.os_fail:
3173 if refos_img is None:
3175 self._VerifyNodeOS(node_i, nimg, refos_img)
3176 self._VerifyNodeBridges(node_i, nresult, bridges)
3178 # Check whether all running instancies are primary for the node. (This
3179 # can no longer be done from _VerifyInstance below, since some of the
3180 # wrong instances could be from other node groups.)
3181 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3183 for inst in non_primary_inst:
3184 # FIXME: investigate best way to handle offline insts
3185 if inst.admin_state == constants.ADMINST_OFFLINE:
3187 feedback_fn("* Skipping offline instance %s" % inst.name)
3190 test = inst in self.all_inst_info
3191 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3192 "instance should not run on node %s", node_i.name)
3193 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3194 "node is running unknown instance %s", inst)
3196 for node, result in extra_lv_nvinfo.items():
3197 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3198 node_image[node], vg_name)
3200 feedback_fn("* Verifying instance status")
3201 for instance in self.my_inst_names:
3203 feedback_fn("* Verifying instance %s" % instance)
3204 inst_config = self.my_inst_info[instance]
3205 self._VerifyInstance(instance, inst_config, node_image,
3207 inst_nodes_offline = []
3209 pnode = inst_config.primary_node
3210 pnode_img = node_image[pnode]
3211 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3212 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3213 " primary node failed", instance)
3215 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3217 constants.CV_EINSTANCEBADNODE, instance,
3218 "instance is marked as running and lives on offline node %s",
3219 inst_config.primary_node)
3221 # If the instance is non-redundant we cannot survive losing its primary
3222 # node, so we are not N+1 compliant. On the other hand we have no disk
3223 # templates with more than one secondary so that situation is not well
3225 # FIXME: does not support file-backed instances
3226 if not inst_config.secondary_nodes:
3227 i_non_redundant.append(instance)
3229 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3230 constants.CV_EINSTANCELAYOUT,
3231 instance, "instance has multiple secondary nodes: %s",
3232 utils.CommaJoin(inst_config.secondary_nodes),
3233 code=self.ETYPE_WARNING)
3235 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3236 pnode = inst_config.primary_node
3237 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3238 instance_groups = {}
3240 for node in instance_nodes:
3241 instance_groups.setdefault(self.all_node_info[node].group,
3245 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3246 # Sort so that we always list the primary node first.
3247 for group, nodes in sorted(instance_groups.items(),
3248 key=lambda (_, nodes): pnode in nodes,
3251 self._ErrorIf(len(instance_groups) > 1,
3252 constants.CV_EINSTANCESPLITGROUPS,
3253 instance, "instance has primary and secondary nodes in"
3254 " different groups: %s", utils.CommaJoin(pretty_list),
3255 code=self.ETYPE_WARNING)
3257 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3258 i_non_a_balanced.append(instance)
3260 for snode in inst_config.secondary_nodes:
3261 s_img = node_image[snode]
3262 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3263 snode, "instance %s, connection to secondary node failed",
3267 inst_nodes_offline.append(snode)
3269 # warn that the instance lives on offline nodes
3270 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3271 "instance has offline secondary node(s) %s",
3272 utils.CommaJoin(inst_nodes_offline))
3273 # ... or ghost/non-vm_capable nodes
3274 for node in inst_config.all_nodes:
3275 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3276 instance, "instance lives on ghost node %s", node)
3277 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3278 instance, "instance lives on non-vm_capable node %s", node)
3280 feedback_fn("* Verifying orphan volumes")
3281 reserved = utils.FieldSet(*cluster.reserved_lvs)
3283 # We will get spurious "unknown volume" warnings if any node of this group
3284 # is secondary for an instance whose primary is in another group. To avoid
3285 # them, we find these instances and add their volumes to node_vol_should.
3286 for inst in self.all_inst_info.values():
3287 for secondary in inst.secondary_nodes:
3288 if (secondary in self.my_node_info
3289 and inst.name not in self.my_inst_info):
3290 inst.MapLVsByNode(node_vol_should)
3293 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3295 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3296 feedback_fn("* Verifying N+1 Memory redundancy")
3297 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3299 feedback_fn("* Other Notes")
3301 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3302 % len(i_non_redundant))
3304 if i_non_a_balanced:
3305 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3306 % len(i_non_a_balanced))
3309 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3312 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3315 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3319 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3320 """Analyze the post-hooks' result
3322 This method analyses the hook result, handles it, and sends some
3323 nicely-formatted feedback back to the user.
3325 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3326 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3327 @param hooks_results: the results of the multi-node hooks rpc call
3328 @param feedback_fn: function used send feedback back to the caller
3329 @param lu_result: previous Exec result
3330 @return: the new Exec result, based on the previous result
3334 # We only really run POST phase hooks, only for non-empty groups,
3335 # and are only interested in their results
3336 if not self.my_node_names:
3339 elif phase == constants.HOOKS_PHASE_POST:
3340 # Used to change hooks' output to proper indentation
3341 feedback_fn("* Hooks Results")
3342 assert hooks_results, "invalid result from hooks"
3344 for node_name in hooks_results:
3345 res = hooks_results[node_name]
3347 test = msg and not res.offline
3348 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3349 "Communication failure in hooks execution: %s", msg)
3350 if res.offline or msg:
3351 # No need to investigate payload if node is offline or gave
3354 for script, hkr, output in res.payload:
3355 test = hkr == constants.HKR_FAIL
3356 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3357 "Script %s failed, output:", script)
3359 output = self._HOOKS_INDENT_RE.sub(" ", output)
3360 feedback_fn("%s" % output)
3366 class LUClusterVerifyDisks(NoHooksLU):
3367 """Verifies the cluster disks status.
3372 def ExpandNames(self):
3373 self.share_locks = _ShareAll()
3374 self.needed_locks = {
3375 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3378 def Exec(self, feedback_fn):
3379 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3381 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3382 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3383 for group in group_names])
3386 class LUGroupVerifyDisks(NoHooksLU):
3387 """Verifies the status of all disks in a node group.
3392 def ExpandNames(self):
3393 # Raises errors.OpPrereqError on its own if group can't be found
3394 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3396 self.share_locks = _ShareAll()
3397 self.needed_locks = {
3398 locking.LEVEL_INSTANCE: [],
3399 locking.LEVEL_NODEGROUP: [],
3400 locking.LEVEL_NODE: [],
3403 def DeclareLocks(self, level):
3404 if level == locking.LEVEL_INSTANCE:
3405 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3407 # Lock instances optimistically, needs verification once node and group
3408 # locks have been acquired
3409 self.needed_locks[locking.LEVEL_INSTANCE] = \
3410 self.cfg.GetNodeGroupInstances(self.group_uuid)
3412 elif level == locking.LEVEL_NODEGROUP:
3413 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3415 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3416 set([self.group_uuid] +
3417 # Lock all groups used by instances optimistically; this requires
3418 # going via the node before it's locked, requiring verification
3421 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3422 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3424 elif level == locking.LEVEL_NODE:
3425 # This will only lock the nodes in the group to be verified which contain
3427 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3428 self._LockInstancesNodes()
3430 # Lock all nodes in group to be verified
3431 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3432 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3433 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3435 def CheckPrereq(self):
3436 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3437 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3438 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3440 assert self.group_uuid in owned_groups
3442 # Check if locked instances are still correct
3443 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3445 # Get instance information
3446 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3448 # Check if node groups for locked instances are still correct
3449 for (instance_name, inst) in self.instances.items():
3450 assert owned_nodes.issuperset(inst.all_nodes), \
3451 "Instance %s's nodes changed while we kept the lock" % instance_name
3453 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3456 assert self.group_uuid in inst_groups, \
3457 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3459 def Exec(self, feedback_fn):
3460 """Verify integrity of cluster disks.
3462 @rtype: tuple of three items
3463 @return: a tuple of (dict of node-to-node_error, list of instances
3464 which need activate-disks, dict of instance: (node, volume) for
3469 res_instances = set()
3472 nv_dict = _MapInstanceDisksToNodes([inst
3473 for inst in self.instances.values()
3474 if inst.admin_state == constants.ADMINST_UP])
3477 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3478 set(self.cfg.GetVmCapableNodeList()))
3480 node_lvs = self.rpc.call_lv_list(nodes, [])
3482 for (node, node_res) in node_lvs.items():
3483 if node_res.offline:
3486 msg = node_res.fail_msg
3488 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3489 res_nodes[node] = msg
3492 for lv_name, (_, _, lv_online) in node_res.payload.items():
3493 inst = nv_dict.pop((node, lv_name), None)
3494 if not (lv_online or inst is None):
3495 res_instances.add(inst)
3497 # any leftover items in nv_dict are missing LVs, let's arrange the data
3499 for key, inst in nv_dict.iteritems():
3500 res_missing.setdefault(inst, []).append(list(key))
3502 return (res_nodes, list(res_instances), res_missing)
3505 class LUClusterRepairDiskSizes(NoHooksLU):
3506 """Verifies the cluster disks sizes.
3511 def ExpandNames(self):
3512 if self.op.instances:
3513 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3514 self.needed_locks = {
3515 locking.LEVEL_NODE_RES: [],
3516 locking.LEVEL_INSTANCE: self.wanted_names,
3518 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3520 self.wanted_names = None
3521 self.needed_locks = {
3522 locking.LEVEL_NODE_RES: locking.ALL_SET,
3523 locking.LEVEL_INSTANCE: locking.ALL_SET,
3525 self.share_locks = {
3526 locking.LEVEL_NODE_RES: 1,
3527 locking.LEVEL_INSTANCE: 0,
3530 def DeclareLocks(self, level):
3531 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3532 self._LockInstancesNodes(primary_only=True, level=level)
3534 def CheckPrereq(self):
3535 """Check prerequisites.
3537 This only checks the optional instance list against the existing names.
3540 if self.wanted_names is None:
3541 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3543 self.wanted_instances = \
3544 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3546 def _EnsureChildSizes(self, disk):
3547 """Ensure children of the disk have the needed disk size.
3549 This is valid mainly for DRBD8 and fixes an issue where the
3550 children have smaller disk size.
3552 @param disk: an L{ganeti.objects.Disk} object
3555 if disk.dev_type == constants.LD_DRBD8:
3556 assert disk.children, "Empty children for DRBD8?"
3557 fchild = disk.children[0]
3558 mismatch = fchild.size < disk.size
3560 self.LogInfo("Child disk has size %d, parent %d, fixing",
3561 fchild.size, disk.size)
3562 fchild.size = disk.size
3564 # and we recurse on this child only, not on the metadev
3565 return self._EnsureChildSizes(fchild) or mismatch
3569 def Exec(self, feedback_fn):
3570 """Verify the size of cluster disks.
3573 # TODO: check child disks too
3574 # TODO: check differences in size between primary/secondary nodes
3576 for instance in self.wanted_instances:
3577 pnode = instance.primary_node
3578 if pnode not in per_node_disks:
3579 per_node_disks[pnode] = []
3580 for idx, disk in enumerate(instance.disks):
3581 per_node_disks[pnode].append((instance, idx, disk))
3583 assert not (frozenset(per_node_disks.keys()) -
3584 self.owned_locks(locking.LEVEL_NODE_RES)), \
3585 "Not owning correct locks"
3586 assert not self.owned_locks(locking.LEVEL_NODE)
3589 for node, dskl in per_node_disks.items():
3590 newl = [v[2].Copy() for v in dskl]
3592 self.cfg.SetDiskID(dsk, node)
3593 result = self.rpc.call_blockdev_getsize(node, newl)
3595 self.LogWarning("Failure in blockdev_getsize call to node"
3596 " %s, ignoring", node)
3598 if len(result.payload) != len(dskl):
3599 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3600 " result.payload=%s", node, len(dskl), result.payload)
3601 self.LogWarning("Invalid result from node %s, ignoring node results",
3604 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3606 self.LogWarning("Disk %d of instance %s did not return size"
3607 " information, ignoring", idx, instance.name)
3609 if not isinstance(size, (int, long)):
3610 self.LogWarning("Disk %d of instance %s did not return valid"
3611 " size information, ignoring", idx, instance.name)
3614 if size != disk.size:
3615 self.LogInfo("Disk %d of instance %s has mismatched size,"
3616 " correcting: recorded %d, actual %d", idx,
3617 instance.name, disk.size, size)
3619 self.cfg.Update(instance, feedback_fn)
3620 changed.append((instance.name, idx, size))
3621 if self._EnsureChildSizes(disk):
3622 self.cfg.Update(instance, feedback_fn)
3623 changed.append((instance.name, idx, disk.size))
3627 class LUClusterRename(LogicalUnit):
3628 """Rename the cluster.
3631 HPATH = "cluster-rename"
3632 HTYPE = constants.HTYPE_CLUSTER
3634 def BuildHooksEnv(self):
3639 "OP_TARGET": self.cfg.GetClusterName(),
3640 "NEW_NAME": self.op.name,
3643 def BuildHooksNodes(self):
3644 """Build hooks nodes.
3647 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3649 def CheckPrereq(self):
3650 """Verify that the passed name is a valid one.
3653 hostname = netutils.GetHostname(name=self.op.name,
3654 family=self.cfg.GetPrimaryIPFamily())
3656 new_name = hostname.name
3657 self.ip = new_ip = hostname.ip
3658 old_name = self.cfg.GetClusterName()
3659 old_ip = self.cfg.GetMasterIP()
3660 if new_name == old_name and new_ip == old_ip:
3661 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3662 " cluster has changed",
3664 if new_ip != old_ip:
3665 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3666 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3667 " reachable on the network" %
3668 new_ip, errors.ECODE_NOTUNIQUE)
3670 self.op.name = new_name
3672 def Exec(self, feedback_fn):
3673 """Rename the cluster.
3676 clustername = self.op.name
3679 # shutdown the master IP
3680 master_params = self.cfg.GetMasterNetworkParameters()
3681 ems = self.cfg.GetUseExternalMipScript()
3682 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3684 result.Raise("Could not disable the master role")
3687 cluster = self.cfg.GetClusterInfo()
3688 cluster.cluster_name = clustername
3689 cluster.master_ip = new_ip
3690 self.cfg.Update(cluster, feedback_fn)
3692 # update the known hosts file
3693 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3694 node_list = self.cfg.GetOnlineNodeList()
3696 node_list.remove(master_params.name)
3699 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3701 master_params.ip = new_ip
3702 result = self.rpc.call_node_activate_master_ip(master_params.name,
3704 msg = result.fail_msg
3706 self.LogWarning("Could not re-enable the master role on"
3707 " the master, please restart manually: %s", msg)
3712 def _ValidateNetmask(cfg, netmask):
3713 """Checks if a netmask is valid.
3715 @type cfg: L{config.ConfigWriter}
3716 @param cfg: The cluster configuration
3718 @param netmask: the netmask to be verified
3719 @raise errors.OpPrereqError: if the validation fails
3722 ip_family = cfg.GetPrimaryIPFamily()
3724 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3725 except errors.ProgrammerError:
3726 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3728 if not ipcls.ValidateNetmask(netmask):
3729 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3733 class LUClusterSetParams(LogicalUnit):
3734 """Change the parameters of the cluster.
3737 HPATH = "cluster-modify"
3738 HTYPE = constants.HTYPE_CLUSTER
3741 def CheckArguments(self):
3745 if self.op.uid_pool:
3746 uidpool.CheckUidPool(self.op.uid_pool)
3748 if self.op.add_uids:
3749 uidpool.CheckUidPool(self.op.add_uids)
3751 if self.op.remove_uids:
3752 uidpool.CheckUidPool(self.op.remove_uids)
3754 if self.op.master_netmask is not None:
3755 _ValidateNetmask(self.cfg, self.op.master_netmask)
3757 if self.op.diskparams:
3758 for dt_params in self.op.diskparams.values():
3759 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3761 def ExpandNames(self):
3762 # FIXME: in the future maybe other cluster params won't require checking on
3763 # all nodes to be modified.
3764 self.needed_locks = {
3765 locking.LEVEL_NODE: locking.ALL_SET,
3767 self.share_locks[locking.LEVEL_NODE] = 1
3769 def BuildHooksEnv(self):
3774 "OP_TARGET": self.cfg.GetClusterName(),
3775 "NEW_VG_NAME": self.op.vg_name,
3778 def BuildHooksNodes(self):
3779 """Build hooks nodes.
3782 mn = self.cfg.GetMasterNode()
3785 def CheckPrereq(self):
3786 """Check prerequisites.
3788 This checks whether the given params don't conflict and
3789 if the given volume group is valid.
3792 if self.op.vg_name is not None and not self.op.vg_name:
3793 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3794 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3795 " instances exist", errors.ECODE_INVAL)
3797 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3798 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3799 raise errors.OpPrereqError("Cannot disable drbd helper while"
3800 " drbd-based instances exist",
3803 node_list = self.owned_locks(locking.LEVEL_NODE)
3805 # if vg_name not None, checks given volume group on all nodes
3807 vglist = self.rpc.call_vg_list(node_list)
3808 for node in node_list:
3809 msg = vglist[node].fail_msg
3811 # ignoring down node
3812 self.LogWarning("Error while gathering data on node %s"
3813 " (ignoring node): %s", node, msg)
3815 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3817 constants.MIN_VG_SIZE)
3819 raise errors.OpPrereqError("Error on node '%s': %s" %
3820 (node, vgstatus), errors.ECODE_ENVIRON)
3822 if self.op.drbd_helper:
3823 # checks given drbd helper on all nodes
3824 helpers = self.rpc.call_drbd_helper(node_list)
3825 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3827 self.LogInfo("Not checking drbd helper on offline node %s", node)
3829 msg = helpers[node].fail_msg
3831 raise errors.OpPrereqError("Error checking drbd helper on node"
3832 " '%s': %s" % (node, msg),
3833 errors.ECODE_ENVIRON)
3834 node_helper = helpers[node].payload
3835 if node_helper != self.op.drbd_helper:
3836 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3837 (node, node_helper), errors.ECODE_ENVIRON)
3839 self.cluster = cluster = self.cfg.GetClusterInfo()
3840 # validate params changes
3841 if self.op.beparams:
3842 objects.UpgradeBeParams(self.op.beparams)
3843 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3844 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3846 if self.op.ndparams:
3847 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3848 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3850 # TODO: we need a more general way to handle resetting
3851 # cluster-level parameters to default values
3852 if self.new_ndparams["oob_program"] == "":
3853 self.new_ndparams["oob_program"] = \
3854 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3856 if self.op.hv_state:
3857 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3858 self.cluster.hv_state_static)
3859 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3860 for hv, values in new_hv_state.items())
3862 if self.op.disk_state:
3863 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3864 self.cluster.disk_state_static)
3865 self.new_disk_state = \
3866 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3867 for name, values in svalues.items()))
3868 for storage, svalues in new_disk_state.items())
3871 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3874 if self.op.nicparams:
3875 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3876 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3877 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3880 # check all instances for consistency
3881 for instance in self.cfg.GetAllInstancesInfo().values():
3882 for nic_idx, nic in enumerate(instance.nics):
3883 params_copy = copy.deepcopy(nic.nicparams)
3884 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3886 # check parameter syntax
3888 objects.NIC.CheckParameterSyntax(params_filled)
3889 except errors.ConfigurationError, err:
3890 nic_errors.append("Instance %s, nic/%d: %s" %
3891 (instance.name, nic_idx, err))
3893 # if we're moving instances to routed, check that they have an ip
3894 target_mode = params_filled[constants.NIC_MODE]
3895 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3896 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3897 " address" % (instance.name, nic_idx))
3899 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3900 "\n".join(nic_errors))
3902 # hypervisor list/parameters
3903 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3904 if self.op.hvparams:
3905 for hv_name, hv_dict in self.op.hvparams.items():
3906 if hv_name not in self.new_hvparams:
3907 self.new_hvparams[hv_name] = hv_dict
3909 self.new_hvparams[hv_name].update(hv_dict)
3911 # disk template parameters
3912 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3913 if self.op.diskparams:
3914 for dt_name, dt_params in self.op.diskparams.items():
3915 if dt_name not in self.op.diskparams:
3916 self.new_diskparams[dt_name] = dt_params
3918 self.new_diskparams[dt_name].update(dt_params)
3920 # os hypervisor parameters
3921 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3923 for os_name, hvs in self.op.os_hvp.items():
3924 if os_name not in self.new_os_hvp:
3925 self.new_os_hvp[os_name] = hvs
3927 for hv_name, hv_dict in hvs.items():
3928 if hv_name not in self.new_os_hvp[os_name]:
3929 self.new_os_hvp[os_name][hv_name] = hv_dict
3931 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3934 self.new_osp = objects.FillDict(cluster.osparams, {})
3935 if self.op.osparams:
3936 for os_name, osp in self.op.osparams.items():
3937 if os_name not in self.new_osp:
3938 self.new_osp[os_name] = {}
3940 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3943 if not self.new_osp[os_name]:
3944 # we removed all parameters
3945 del self.new_osp[os_name]
3947 # check the parameter validity (remote check)
3948 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3949 os_name, self.new_osp[os_name])
3951 # changes to the hypervisor list
3952 if self.op.enabled_hypervisors is not None:
3953 self.hv_list = self.op.enabled_hypervisors
3954 for hv in self.hv_list:
3955 # if the hypervisor doesn't already exist in the cluster
3956 # hvparams, we initialize it to empty, and then (in both
3957 # cases) we make sure to fill the defaults, as we might not
3958 # have a complete defaults list if the hypervisor wasn't
3960 if hv not in new_hvp:
3962 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3963 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3965 self.hv_list = cluster.enabled_hypervisors
3967 if self.op.hvparams or self.op.enabled_hypervisors is not None:
3968 # either the enabled list has changed, or the parameters have, validate
3969 for hv_name, hv_params in self.new_hvparams.items():
3970 if ((self.op.hvparams and hv_name in self.op.hvparams) or
3971 (self.op.enabled_hypervisors and
3972 hv_name in self.op.enabled_hypervisors)):
3973 # either this is a new hypervisor, or its parameters have changed
3974 hv_class = hypervisor.GetHypervisor(hv_name)
3975 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3976 hv_class.CheckParameterSyntax(hv_params)
3977 _CheckHVParams(self, node_list, hv_name, hv_params)
3980 # no need to check any newly-enabled hypervisors, since the
3981 # defaults have already been checked in the above code-block
3982 for os_name, os_hvp in self.new_os_hvp.items():
3983 for hv_name, hv_params in os_hvp.items():
3984 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3985 # we need to fill in the new os_hvp on top of the actual hv_p
3986 cluster_defaults = self.new_hvparams.get(hv_name, {})
3987 new_osp = objects.FillDict(cluster_defaults, hv_params)
3988 hv_class = hypervisor.GetHypervisor(hv_name)
3989 hv_class.CheckParameterSyntax(new_osp)
3990 _CheckHVParams(self, node_list, hv_name, new_osp)
3992 if self.op.default_iallocator:
3993 alloc_script = utils.FindFile(self.op.default_iallocator,
3994 constants.IALLOCATOR_SEARCH_PATH,
3996 if alloc_script is None:
3997 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3998 " specified" % self.op.default_iallocator,
4001 def Exec(self, feedback_fn):
4002 """Change the parameters of the cluster.
4005 if self.op.vg_name is not None:
4006 new_volume = self.op.vg_name
4009 if new_volume != self.cfg.GetVGName():
4010 self.cfg.SetVGName(new_volume)
4012 feedback_fn("Cluster LVM configuration already in desired"
4013 " state, not changing")
4014 if self.op.drbd_helper is not None:
4015 new_helper = self.op.drbd_helper
4018 if new_helper != self.cfg.GetDRBDHelper():
4019 self.cfg.SetDRBDHelper(new_helper)
4021 feedback_fn("Cluster DRBD helper already in desired state,"
4023 if self.op.hvparams:
4024 self.cluster.hvparams = self.new_hvparams
4026 self.cluster.os_hvp = self.new_os_hvp
4027 if self.op.enabled_hypervisors is not None:
4028 self.cluster.hvparams = self.new_hvparams
4029 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4030 if self.op.beparams:
4031 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4032 if self.op.nicparams:
4033 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4035 self.cluster.ipolicy = self.new_ipolicy
4036 if self.op.osparams:
4037 self.cluster.osparams = self.new_osp
4038 if self.op.ndparams:
4039 self.cluster.ndparams = self.new_ndparams
4040 if self.op.diskparams:
4041 self.cluster.diskparams = self.new_diskparams
4042 if self.op.hv_state:
4043 self.cluster.hv_state_static = self.new_hv_state
4044 if self.op.disk_state:
4045 self.cluster.disk_state_static = self.new_disk_state
4047 if self.op.candidate_pool_size is not None:
4048 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4049 # we need to update the pool size here, otherwise the save will fail
4050 _AdjustCandidatePool(self, [])
4052 if self.op.maintain_node_health is not None:
4053 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4054 feedback_fn("Note: CONFD was disabled at build time, node health"
4055 " maintenance is not useful (still enabling it)")
4056 self.cluster.maintain_node_health = self.op.maintain_node_health
4058 if self.op.prealloc_wipe_disks is not None:
4059 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4061 if self.op.add_uids is not None:
4062 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4064 if self.op.remove_uids is not None:
4065 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4067 if self.op.uid_pool is not None:
4068 self.cluster.uid_pool = self.op.uid_pool
4070 if self.op.default_iallocator is not None:
4071 self.cluster.default_iallocator = self.op.default_iallocator
4073 if self.op.reserved_lvs is not None:
4074 self.cluster.reserved_lvs = self.op.reserved_lvs
4076 if self.op.use_external_mip_script is not None:
4077 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4079 def helper_os(aname, mods, desc):
4081 lst = getattr(self.cluster, aname)
4082 for key, val in mods:
4083 if key == constants.DDM_ADD:
4085 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4088 elif key == constants.DDM_REMOVE:
4092 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4094 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4096 if self.op.hidden_os:
4097 helper_os("hidden_os", self.op.hidden_os, "hidden")
4099 if self.op.blacklisted_os:
4100 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4102 if self.op.master_netdev:
4103 master_params = self.cfg.GetMasterNetworkParameters()
4104 ems = self.cfg.GetUseExternalMipScript()
4105 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4106 self.cluster.master_netdev)
4107 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4109 result.Raise("Could not disable the master ip")
4110 feedback_fn("Changing master_netdev from %s to %s" %
4111 (master_params.netdev, self.op.master_netdev))
4112 self.cluster.master_netdev = self.op.master_netdev
4114 if self.op.master_netmask:
4115 master_params = self.cfg.GetMasterNetworkParameters()
4116 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4117 result = self.rpc.call_node_change_master_netmask(master_params.name,
4118 master_params.netmask,
4119 self.op.master_netmask,
4121 master_params.netdev)
4123 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4126 self.cluster.master_netmask = self.op.master_netmask
4128 self.cfg.Update(self.cluster, feedback_fn)
4130 if self.op.master_netdev:
4131 master_params = self.cfg.GetMasterNetworkParameters()
4132 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4133 self.op.master_netdev)
4134 ems = self.cfg.GetUseExternalMipScript()
4135 result = self.rpc.call_node_activate_master_ip(master_params.name,
4138 self.LogWarning("Could not re-enable the master ip on"
4139 " the master, please restart manually: %s",
4143 def _UploadHelper(lu, nodes, fname):
4144 """Helper for uploading a file and showing warnings.
4147 if os.path.exists(fname):
4148 result = lu.rpc.call_upload_file(nodes, fname)
4149 for to_node, to_result in result.items():
4150 msg = to_result.fail_msg
4152 msg = ("Copy of file %s to node %s failed: %s" %
4153 (fname, to_node, msg))
4154 lu.proc.LogWarning(msg)
4157 def _ComputeAncillaryFiles(cluster, redist):
4158 """Compute files external to Ganeti which need to be consistent.
4160 @type redist: boolean
4161 @param redist: Whether to include files which need to be redistributed
4164 # Compute files for all nodes
4166 constants.SSH_KNOWN_HOSTS_FILE,
4167 constants.CONFD_HMAC_KEY,
4168 constants.CLUSTER_DOMAIN_SECRET_FILE,
4169 constants.SPICE_CERT_FILE,
4170 constants.SPICE_CACERT_FILE,
4171 constants.RAPI_USERS_FILE,
4175 files_all.update(constants.ALL_CERT_FILES)
4176 files_all.update(ssconf.SimpleStore().GetFileList())
4178 # we need to ship at least the RAPI certificate
4179 files_all.add(constants.RAPI_CERT_FILE)
4181 if cluster.modify_etc_hosts:
4182 files_all.add(constants.ETC_HOSTS)
4184 # Files which are optional, these must:
4185 # - be present in one other category as well
4186 # - either exist or not exist on all nodes of that category (mc, vm all)
4188 constants.RAPI_USERS_FILE,
4191 # Files which should only be on master candidates
4195 files_mc.add(constants.CLUSTER_CONF_FILE)
4197 # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4199 files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4201 # Files which should only be on VM-capable nodes
4202 files_vm = set(filename
4203 for hv_name in cluster.enabled_hypervisors
4204 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4206 files_opt |= set(filename
4207 for hv_name in cluster.enabled_hypervisors
4208 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4210 # Filenames in each category must be unique
4211 all_files_set = files_all | files_mc | files_vm
4212 assert (len(all_files_set) ==
4213 sum(map(len, [files_all, files_mc, files_vm]))), \
4214 "Found file listed in more than one file list"
4216 # Optional files must be present in one other category
4217 assert all_files_set.issuperset(files_opt), \
4218 "Optional file not in a different required list"
4220 return (files_all, files_opt, files_mc, files_vm)
4223 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4224 """Distribute additional files which are part of the cluster configuration.
4226 ConfigWriter takes care of distributing the config and ssconf files, but
4227 there are more files which should be distributed to all nodes. This function
4228 makes sure those are copied.
4230 @param lu: calling logical unit
4231 @param additional_nodes: list of nodes not in the config to distribute to
4232 @type additional_vm: boolean
4233 @param additional_vm: whether the additional nodes are vm-capable or not
4236 # Gather target nodes
4237 cluster = lu.cfg.GetClusterInfo()
4238 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4240 online_nodes = lu.cfg.GetOnlineNodeList()
4241 vm_nodes = lu.cfg.GetVmCapableNodeList()
4243 if additional_nodes is not None:
4244 online_nodes.extend(additional_nodes)
4246 vm_nodes.extend(additional_nodes)
4248 # Never distribute to master node
4249 for nodelist in [online_nodes, vm_nodes]:
4250 if master_info.name in nodelist:
4251 nodelist.remove(master_info.name)
4254 (files_all, _, files_mc, files_vm) = \
4255 _ComputeAncillaryFiles(cluster, True)
4257 # Never re-distribute configuration file from here
4258 assert not (constants.CLUSTER_CONF_FILE in files_all or
4259 constants.CLUSTER_CONF_FILE in files_vm)
4260 assert not files_mc, "Master candidates not handled in this function"
4263 (online_nodes, files_all),
4264 (vm_nodes, files_vm),
4268 for (node_list, files) in filemap:
4270 _UploadHelper(lu, node_list, fname)
4273 class LUClusterRedistConf(NoHooksLU):
4274 """Force the redistribution of cluster configuration.
4276 This is a very simple LU.
4281 def ExpandNames(self):
4282 self.needed_locks = {
4283 locking.LEVEL_NODE: locking.ALL_SET,
4285 self.share_locks[locking.LEVEL_NODE] = 1
4287 def Exec(self, feedback_fn):
4288 """Redistribute the configuration.
4291 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4292 _RedistributeAncillaryFiles(self)
4295 class LUClusterActivateMasterIp(NoHooksLU):
4296 """Activate the master IP on the master node.
4299 def Exec(self, feedback_fn):
4300 """Activate the master IP.
4303 master_params = self.cfg.GetMasterNetworkParameters()
4304 ems = self.cfg.GetUseExternalMipScript()
4305 result = self.rpc.call_node_activate_master_ip(master_params.name,
4307 result.Raise("Could not activate the master IP")
4310 class LUClusterDeactivateMasterIp(NoHooksLU):
4311 """Deactivate the master IP on the master node.
4314 def Exec(self, feedback_fn):
4315 """Deactivate the master IP.
4318 master_params = self.cfg.GetMasterNetworkParameters()
4319 ems = self.cfg.GetUseExternalMipScript()
4320 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4322 result.Raise("Could not deactivate the master IP")
4325 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4326 """Sleep and poll for an instance's disk to sync.
4329 if not instance.disks or disks is not None and not disks:
4332 disks = _ExpandCheckDisks(instance, disks)
4335 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4337 node = instance.primary_node
4340 lu.cfg.SetDiskID(dev, node)
4342 # TODO: Convert to utils.Retry
4345 degr_retries = 10 # in seconds, as we sleep 1 second each time
4349 cumul_degraded = False
4350 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4351 msg = rstats.fail_msg
4353 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4356 raise errors.RemoteError("Can't contact node %s for mirror data,"
4357 " aborting." % node)
4360 rstats = rstats.payload
4362 for i, mstat in enumerate(rstats):
4364 lu.LogWarning("Can't compute data for node %s/%s",
4365 node, disks[i].iv_name)
4368 cumul_degraded = (cumul_degraded or
4369 (mstat.is_degraded and mstat.sync_percent is None))
4370 if mstat.sync_percent is not None:
4372 if mstat.estimated_time is not None:
4373 rem_time = ("%s remaining (estimated)" %
4374 utils.FormatSeconds(mstat.estimated_time))
4375 max_time = mstat.estimated_time
4377 rem_time = "no time estimate"
4378 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4379 (disks[i].iv_name, mstat.sync_percent, rem_time))
4381 # if we're done but degraded, let's do a few small retries, to
4382 # make sure we see a stable and not transient situation; therefore
4383 # we force restart of the loop
4384 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4385 logging.info("Degraded disks found, %d retries left", degr_retries)
4393 time.sleep(min(60, max_time))
4396 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4397 return not cumul_degraded
4400 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4401 """Check that mirrors are not degraded.
4403 The ldisk parameter, if True, will change the test from the
4404 is_degraded attribute (which represents overall non-ok status for
4405 the device(s)) to the ldisk (representing the local storage status).
4408 lu.cfg.SetDiskID(dev, node)
4412 if on_primary or dev.AssembleOnSecondary():
4413 rstats = lu.rpc.call_blockdev_find(node, dev)
4414 msg = rstats.fail_msg
4416 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4418 elif not rstats.payload:
4419 lu.LogWarning("Can't find disk on node %s", node)
4423 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4425 result = result and not rstats.payload.is_degraded
4428 for child in dev.children:
4429 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4434 class LUOobCommand(NoHooksLU):
4435 """Logical unit for OOB handling.
4439 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4441 def ExpandNames(self):
4442 """Gather locks we need.
4445 if self.op.node_names:
4446 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4447 lock_names = self.op.node_names
4449 lock_names = locking.ALL_SET
4451 self.needed_locks = {
4452 locking.LEVEL_NODE: lock_names,
4455 def CheckPrereq(self):
4456 """Check prerequisites.
4459 - the node exists in the configuration
4462 Any errors are signaled by raising errors.OpPrereqError.
4466 self.master_node = self.cfg.GetMasterNode()
4468 assert self.op.power_delay >= 0.0
4470 if self.op.node_names:
4471 if (self.op.command in self._SKIP_MASTER and
4472 self.master_node in self.op.node_names):
4473 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4474 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4476 if master_oob_handler:
4477 additional_text = ("run '%s %s %s' if you want to operate on the"
4478 " master regardless") % (master_oob_handler,
4482 additional_text = "it does not support out-of-band operations"
4484 raise errors.OpPrereqError(("Operating on the master node %s is not"
4485 " allowed for %s; %s") %
4486 (self.master_node, self.op.command,
4487 additional_text), errors.ECODE_INVAL)
4489 self.op.node_names = self.cfg.GetNodeList()
4490 if self.op.command in self._SKIP_MASTER:
4491 self.op.node_names.remove(self.master_node)
4493 if self.op.command in self._SKIP_MASTER:
4494 assert self.master_node not in self.op.node_names
4496 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4498 raise errors.OpPrereqError("Node %s not found" % node_name,
4501 self.nodes.append(node)
4503 if (not self.op.ignore_status and
4504 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4505 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4506 " not marked offline") % node_name,
4509 def Exec(self, feedback_fn):
4510 """Execute OOB and return result if we expect any.
4513 master_node = self.master_node
4516 for idx, node in enumerate(utils.NiceSort(self.nodes,
4517 key=lambda node: node.name)):
4518 node_entry = [(constants.RS_NORMAL, node.name)]
4519 ret.append(node_entry)
4521 oob_program = _SupportsOob(self.cfg, node)
4524 node_entry.append((constants.RS_UNAVAIL, None))
4527 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4528 self.op.command, oob_program, node.name)
4529 result = self.rpc.call_run_oob(master_node, oob_program,
4530 self.op.command, node.name,
4534 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4535 node.name, result.fail_msg)
4536 node_entry.append((constants.RS_NODATA, None))
4539 self._CheckPayload(result)
4540 except errors.OpExecError, err:
4541 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4543 node_entry.append((constants.RS_NODATA, None))
4545 if self.op.command == constants.OOB_HEALTH:
4546 # For health we should log important events
4547 for item, status in result.payload:
4548 if status in [constants.OOB_STATUS_WARNING,
4549 constants.OOB_STATUS_CRITICAL]:
4550 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4551 item, node.name, status)
4553 if self.op.command == constants.OOB_POWER_ON:
4555 elif self.op.command == constants.OOB_POWER_OFF:
4556 node.powered = False
4557 elif self.op.command == constants.OOB_POWER_STATUS:
4558 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4559 if powered != node.powered:
4560 logging.warning(("Recorded power state (%s) of node '%s' does not"
4561 " match actual power state (%s)"), node.powered,
4564 # For configuration changing commands we should update the node
4565 if self.op.command in (constants.OOB_POWER_ON,
4566 constants.OOB_POWER_OFF):
4567 self.cfg.Update(node, feedback_fn)
4569 node_entry.append((constants.RS_NORMAL, result.payload))
4571 if (self.op.command == constants.OOB_POWER_ON and
4572 idx < len(self.nodes) - 1):
4573 time.sleep(self.op.power_delay)
4577 def _CheckPayload(self, result):
4578 """Checks if the payload is valid.
4580 @param result: RPC result
4581 @raises errors.OpExecError: If payload is not valid
4585 if self.op.command == constants.OOB_HEALTH:
4586 if not isinstance(result.payload, list):
4587 errs.append("command 'health' is expected to return a list but got %s" %
4588 type(result.payload))
4590 for item, status in result.payload:
4591 if status not in constants.OOB_STATUSES:
4592 errs.append("health item '%s' has invalid status '%s'" %
4595 if self.op.command == constants.OOB_POWER_STATUS:
4596 if not isinstance(result.payload, dict):
4597 errs.append("power-status is expected to return a dict but got %s" %
4598 type(result.payload))
4600 if self.op.command in [
4601 constants.OOB_POWER_ON,
4602 constants.OOB_POWER_OFF,
4603 constants.OOB_POWER_CYCLE,
4605 if result.payload is not None:
4606 errs.append("%s is expected to not return payload but got '%s'" %
4607 (self.op.command, result.payload))
4610 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4611 utils.CommaJoin(errs))
4614 class _OsQuery(_QueryBase):
4615 FIELDS = query.OS_FIELDS
4617 def ExpandNames(self, lu):
4618 # Lock all nodes in shared mode
4619 # Temporary removal of locks, should be reverted later
4620 # TODO: reintroduce locks when they are lighter-weight
4621 lu.needed_locks = {}
4622 #self.share_locks[locking.LEVEL_NODE] = 1
4623 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4625 # The following variables interact with _QueryBase._GetNames
4627 self.wanted = self.names
4629 self.wanted = locking.ALL_SET
4631 self.do_locking = self.use_locking
4633 def DeclareLocks(self, lu, level):
4637 def _DiagnoseByOS(rlist):
4638 """Remaps a per-node return list into an a per-os per-node dictionary
4640 @param rlist: a map with node names as keys and OS objects as values
4643 @return: a dictionary with osnames as keys and as value another
4644 map, with nodes as keys and tuples of (path, status, diagnose,
4645 variants, parameters, api_versions) as values, eg::
4647 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4648 (/srv/..., False, "invalid api")],
4649 "node2": [(/srv/..., True, "", [], [])]}
4654 # we build here the list of nodes that didn't fail the RPC (at RPC
4655 # level), so that nodes with a non-responding node daemon don't
4656 # make all OSes invalid
4657 good_nodes = [node_name for node_name in rlist
4658 if not rlist[node_name].fail_msg]
4659 for node_name, nr in rlist.items():
4660 if nr.fail_msg or not nr.payload:
4662 for (name, path, status, diagnose, variants,
4663 params, api_versions) in nr.payload:
4664 if name not in all_os:
4665 # build a list of nodes for this os containing empty lists
4666 # for each node in node_list
4668 for nname in good_nodes:
4669 all_os[name][nname] = []
4670 # convert params from [name, help] to (name, help)
4671 params = [tuple(v) for v in params]
4672 all_os[name][node_name].append((path, status, diagnose,
4673 variants, params, api_versions))
4676 def _GetQueryData(self, lu):
4677 """Computes the list of nodes and their attributes.
4680 # Locking is not used
4681 assert not (compat.any(lu.glm.is_owned(level)
4682 for level in locking.LEVELS
4683 if level != locking.LEVEL_CLUSTER) or
4684 self.do_locking or self.use_locking)
4686 valid_nodes = [node.name
4687 for node in lu.cfg.GetAllNodesInfo().values()
4688 if not node.offline and node.vm_capable]
4689 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4690 cluster = lu.cfg.GetClusterInfo()
4694 for (os_name, os_data) in pol.items():
4695 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4696 hidden=(os_name in cluster.hidden_os),
4697 blacklisted=(os_name in cluster.blacklisted_os))
4701 api_versions = set()
4703 for idx, osl in enumerate(os_data.values()):
4704 info.valid = bool(info.valid and osl and osl[0][1])
4708 (node_variants, node_params, node_api) = osl[0][3:6]
4711 variants.update(node_variants)
4712 parameters.update(node_params)
4713 api_versions.update(node_api)
4715 # Filter out inconsistent values
4716 variants.intersection_update(node_variants)
4717 parameters.intersection_update(node_params)
4718 api_versions.intersection_update(node_api)
4720 info.variants = list(variants)
4721 info.parameters = list(parameters)
4722 info.api_versions = list(api_versions)
4724 data[os_name] = info
4726 # Prepare data in requested order
4727 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4731 class LUOsDiagnose(NoHooksLU):
4732 """Logical unit for OS diagnose/query.
4738 def _BuildFilter(fields, names):
4739 """Builds a filter for querying OSes.
4742 name_filter = qlang.MakeSimpleFilter("name", names)
4744 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4745 # respective field is not requested
4746 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4747 for fname in ["hidden", "blacklisted"]
4748 if fname not in fields]
4749 if "valid" not in fields:
4750 status_filter.append([qlang.OP_TRUE, "valid"])
4753 status_filter.insert(0, qlang.OP_AND)
4755 status_filter = None
4757 if name_filter and status_filter:
4758 return [qlang.OP_AND, name_filter, status_filter]
4762 return status_filter
4764 def CheckArguments(self):
4765 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4766 self.op.output_fields, False)
4768 def ExpandNames(self):
4769 self.oq.ExpandNames(self)
4771 def Exec(self, feedback_fn):
4772 return self.oq.OldStyleQuery(self)
4775 class LUNodeRemove(LogicalUnit):
4776 """Logical unit for removing a node.
4779 HPATH = "node-remove"
4780 HTYPE = constants.HTYPE_NODE
4782 def BuildHooksEnv(self):
4785 This doesn't run on the target node in the pre phase as a failed
4786 node would then be impossible to remove.
4790 "OP_TARGET": self.op.node_name,
4791 "NODE_NAME": self.op.node_name,
4794 def BuildHooksNodes(self):
4795 """Build hooks nodes.
4798 all_nodes = self.cfg.GetNodeList()
4800 all_nodes.remove(self.op.node_name)
4802 logging.warning("Node '%s', which is about to be removed, was not found"
4803 " in the list of all nodes", self.op.node_name)
4804 return (all_nodes, all_nodes)
4806 def CheckPrereq(self):
4807 """Check prerequisites.
4810 - the node exists in the configuration
4811 - it does not have primary or secondary instances
4812 - it's not the master
4814 Any errors are signaled by raising errors.OpPrereqError.
4817 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4818 node = self.cfg.GetNodeInfo(self.op.node_name)
4819 assert node is not None
4821 masternode = self.cfg.GetMasterNode()
4822 if node.name == masternode:
4823 raise errors.OpPrereqError("Node is the master node, failover to another"
4824 " node is required", errors.ECODE_INVAL)
4826 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4827 if node.name in instance.all_nodes:
4828 raise errors.OpPrereqError("Instance %s is still running on the node,"
4829 " please remove first" % instance_name,
4831 self.op.node_name = node.name
4834 def Exec(self, feedback_fn):
4835 """Removes the node from the cluster.
4839 logging.info("Stopping the node daemon and removing configs from node %s",
4842 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4844 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4847 # Promote nodes to master candidate as needed
4848 _AdjustCandidatePool(self, exceptions=[node.name])
4849 self.context.RemoveNode(node.name)
4851 # Run post hooks on the node before it's removed
4852 _RunPostHook(self, node.name)
4854 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4855 msg = result.fail_msg
4857 self.LogWarning("Errors encountered on the remote node while leaving"
4858 " the cluster: %s", msg)
4860 # Remove node from our /etc/hosts
4861 if self.cfg.GetClusterInfo().modify_etc_hosts:
4862 master_node = self.cfg.GetMasterNode()
4863 result = self.rpc.call_etc_hosts_modify(master_node,
4864 constants.ETC_HOSTS_REMOVE,
4866 result.Raise("Can't update hosts file with new host data")
4867 _RedistributeAncillaryFiles(self)
4870 class _NodeQuery(_QueryBase):
4871 FIELDS = query.NODE_FIELDS
4873 def ExpandNames(self, lu):
4874 lu.needed_locks = {}
4875 lu.share_locks = _ShareAll()
4878 self.wanted = _GetWantedNodes(lu, self.names)
4880 self.wanted = locking.ALL_SET
4882 self.do_locking = (self.use_locking and
4883 query.NQ_LIVE in self.requested_data)
4886 # If any non-static field is requested we need to lock the nodes
4887 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4889 def DeclareLocks(self, lu, level):
4892 def _GetQueryData(self, lu):
4893 """Computes the list of nodes and their attributes.
4896 all_info = lu.cfg.GetAllNodesInfo()
4898 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4900 # Gather data as requested
4901 if query.NQ_LIVE in self.requested_data:
4902 # filter out non-vm_capable nodes
4903 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4905 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4906 [lu.cfg.GetHypervisorType()])
4907 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4908 for (name, nresult) in node_data.items()
4909 if not nresult.fail_msg and nresult.payload)
4913 if query.NQ_INST in self.requested_data:
4914 node_to_primary = dict([(name, set()) for name in nodenames])
4915 node_to_secondary = dict([(name, set()) for name in nodenames])
4917 inst_data = lu.cfg.GetAllInstancesInfo()
4919 for inst in inst_data.values():
4920 if inst.primary_node in node_to_primary:
4921 node_to_primary[inst.primary_node].add(inst.name)
4922 for secnode in inst.secondary_nodes:
4923 if secnode in node_to_secondary:
4924 node_to_secondary[secnode].add(inst.name)
4926 node_to_primary = None
4927 node_to_secondary = None
4929 if query.NQ_OOB in self.requested_data:
4930 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4931 for name, node in all_info.iteritems())
4935 if query.NQ_GROUP in self.requested_data:
4936 groups = lu.cfg.GetAllNodeGroupsInfo()
4940 return query.NodeQueryData([all_info[name] for name in nodenames],
4941 live_data, lu.cfg.GetMasterNode(),
4942 node_to_primary, node_to_secondary, groups,
4943 oob_support, lu.cfg.GetClusterInfo())
4946 class LUNodeQuery(NoHooksLU):
4947 """Logical unit for querying nodes.
4950 # pylint: disable=W0142
4953 def CheckArguments(self):
4954 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4955 self.op.output_fields, self.op.use_locking)
4957 def ExpandNames(self):
4958 self.nq.ExpandNames(self)
4960 def DeclareLocks(self, level):
4961 self.nq.DeclareLocks(self, level)
4963 def Exec(self, feedback_fn):
4964 return self.nq.OldStyleQuery(self)
4967 class LUNodeQueryvols(NoHooksLU):
4968 """Logical unit for getting volumes on node(s).
4972 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4973 _FIELDS_STATIC = utils.FieldSet("node")
4975 def CheckArguments(self):
4976 _CheckOutputFields(static=self._FIELDS_STATIC,
4977 dynamic=self._FIELDS_DYNAMIC,
4978 selected=self.op.output_fields)
4980 def ExpandNames(self):
4981 self.share_locks = _ShareAll()
4982 self.needed_locks = {}
4984 if not self.op.nodes:
4985 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4987 self.needed_locks[locking.LEVEL_NODE] = \
4988 _GetWantedNodes(self, self.op.nodes)
4990 def Exec(self, feedback_fn):
4991 """Computes the list of nodes and their attributes.
4994 nodenames = self.owned_locks(locking.LEVEL_NODE)
4995 volumes = self.rpc.call_node_volumes(nodenames)
4997 ilist = self.cfg.GetAllInstancesInfo()
4998 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5001 for node in nodenames:
5002 nresult = volumes[node]
5005 msg = nresult.fail_msg
5007 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5010 node_vols = sorted(nresult.payload,
5011 key=operator.itemgetter("dev"))
5013 for vol in node_vols:
5015 for field in self.op.output_fields:
5018 elif field == "phys":
5022 elif field == "name":
5024 elif field == "size":
5025 val = int(float(vol["size"]))
5026 elif field == "instance":
5027 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5029 raise errors.ParameterError(field)
5030 node_output.append(str(val))
5032 output.append(node_output)
5037 class LUNodeQueryStorage(NoHooksLU):
5038 """Logical unit for getting information on storage units on node(s).
5041 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5044 def CheckArguments(self):
5045 _CheckOutputFields(static=self._FIELDS_STATIC,
5046 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5047 selected=self.op.output_fields)
5049 def ExpandNames(self):
5050 self.share_locks = _ShareAll()
5051 self.needed_locks = {}
5054 self.needed_locks[locking.LEVEL_NODE] = \
5055 _GetWantedNodes(self, self.op.nodes)
5057 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5059 def Exec(self, feedback_fn):
5060 """Computes the list of nodes and their attributes.
5063 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5065 # Always get name to sort by
5066 if constants.SF_NAME in self.op.output_fields:
5067 fields = self.op.output_fields[:]
5069 fields = [constants.SF_NAME] + self.op.output_fields
5071 # Never ask for node or type as it's only known to the LU
5072 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5073 while extra in fields:
5074 fields.remove(extra)
5076 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5077 name_idx = field_idx[constants.SF_NAME]
5079 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5080 data = self.rpc.call_storage_list(self.nodes,
5081 self.op.storage_type, st_args,
5082 self.op.name, fields)
5086 for node in utils.NiceSort(self.nodes):
5087 nresult = data[node]
5091 msg = nresult.fail_msg
5093 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5096 rows = dict([(row[name_idx], row) for row in nresult.payload])
5098 for name in utils.NiceSort(rows.keys()):
5103 for field in self.op.output_fields:
5104 if field == constants.SF_NODE:
5106 elif field == constants.SF_TYPE:
5107 val = self.op.storage_type
5108 elif field in field_idx:
5109 val = row[field_idx[field]]
5111 raise errors.ParameterError(field)
5120 class _InstanceQuery(_QueryBase):
5121 FIELDS = query.INSTANCE_FIELDS
5123 def ExpandNames(self, lu):
5124 lu.needed_locks = {}
5125 lu.share_locks = _ShareAll()
5128 self.wanted = _GetWantedInstances(lu, self.names)
5130 self.wanted = locking.ALL_SET
5132 self.do_locking = (self.use_locking and
5133 query.IQ_LIVE in self.requested_data)
5135 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5136 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5137 lu.needed_locks[locking.LEVEL_NODE] = []
5138 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5140 self.do_grouplocks = (self.do_locking and
5141 query.IQ_NODES in self.requested_data)
5143 def DeclareLocks(self, lu, level):
5145 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5146 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5148 # Lock all groups used by instances optimistically; this requires going
5149 # via the node before it's locked, requiring verification later on
5150 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5152 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5153 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5154 elif level == locking.LEVEL_NODE:
5155 lu._LockInstancesNodes() # pylint: disable=W0212
5158 def _CheckGroupLocks(lu):
5159 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5160 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5162 # Check if node groups for locked instances are still correct
5163 for instance_name in owned_instances:
5164 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5166 def _GetQueryData(self, lu):
5167 """Computes the list of instances and their attributes.
5170 if self.do_grouplocks:
5171 self._CheckGroupLocks(lu)
5173 cluster = lu.cfg.GetClusterInfo()
5174 all_info = lu.cfg.GetAllInstancesInfo()
5176 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5178 instance_list = [all_info[name] for name in instance_names]
5179 nodes = frozenset(itertools.chain(*(inst.all_nodes
5180 for inst in instance_list)))
5181 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5184 wrongnode_inst = set()
5186 # Gather data as requested
5187 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5189 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5191 result = node_data[name]
5193 # offline nodes will be in both lists
5194 assert result.fail_msg
5195 offline_nodes.append(name)
5197 bad_nodes.append(name)
5198 elif result.payload:
5199 for inst in result.payload:
5200 if inst in all_info:
5201 if all_info[inst].primary_node == name:
5202 live_data.update(result.payload)
5204 wrongnode_inst.add(inst)
5206 # orphan instance; we don't list it here as we don't
5207 # handle this case yet in the output of instance listing
5208 logging.warning("Orphan instance '%s' found on node %s",
5210 # else no instance is alive
5214 if query.IQ_DISKUSAGE in self.requested_data:
5215 disk_usage = dict((inst.name,
5216 _ComputeDiskSize(inst.disk_template,
5217 [{constants.IDISK_SIZE: disk.size}
5218 for disk in inst.disks]))
5219 for inst in instance_list)
5223 if query.IQ_CONSOLE in self.requested_data:
5225 for inst in instance_list:
5226 if inst.name in live_data:
5227 # Instance is running
5228 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5230 consinfo[inst.name] = None
5231 assert set(consinfo.keys()) == set(instance_names)
5235 if query.IQ_NODES in self.requested_data:
5236 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5238 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5239 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5240 for uuid in set(map(operator.attrgetter("group"),
5246 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5247 disk_usage, offline_nodes, bad_nodes,
5248 live_data, wrongnode_inst, consinfo,
5252 class LUQuery(NoHooksLU):
5253 """Query for resources/items of a certain kind.
5256 # pylint: disable=W0142
5259 def CheckArguments(self):
5260 qcls = _GetQueryImplementation(self.op.what)
5262 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5264 def ExpandNames(self):
5265 self.impl.ExpandNames(self)
5267 def DeclareLocks(self, level):
5268 self.impl.DeclareLocks(self, level)
5270 def Exec(self, feedback_fn):
5271 return self.impl.NewStyleQuery(self)
5274 class LUQueryFields(NoHooksLU):
5275 """Query for resources/items of a certain kind.
5278 # pylint: disable=W0142
5281 def CheckArguments(self):
5282 self.qcls = _GetQueryImplementation(self.op.what)
5284 def ExpandNames(self):
5285 self.needed_locks = {}
5287 def Exec(self, feedback_fn):
5288 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5291 class LUNodeModifyStorage(NoHooksLU):
5292 """Logical unit for modifying a storage volume on a node.
5297 def CheckArguments(self):
5298 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5300 storage_type = self.op.storage_type
5303 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5305 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5306 " modified" % storage_type,
5309 diff = set(self.op.changes.keys()) - modifiable
5311 raise errors.OpPrereqError("The following fields can not be modified for"
5312 " storage units of type '%s': %r" %
5313 (storage_type, list(diff)),
5316 def ExpandNames(self):
5317 self.needed_locks = {
5318 locking.LEVEL_NODE: self.op.node_name,
5321 def Exec(self, feedback_fn):
5322 """Computes the list of nodes and their attributes.
5325 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5326 result = self.rpc.call_storage_modify(self.op.node_name,
5327 self.op.storage_type, st_args,
5328 self.op.name, self.op.changes)
5329 result.Raise("Failed to modify storage unit '%s' on %s" %
5330 (self.op.name, self.op.node_name))
5333 class LUNodeAdd(LogicalUnit):
5334 """Logical unit for adding node to the cluster.
5338 HTYPE = constants.HTYPE_NODE
5339 _NFLAGS = ["master_capable", "vm_capable"]
5341 def CheckArguments(self):
5342 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5343 # validate/normalize the node name
5344 self.hostname = netutils.GetHostname(name=self.op.node_name,
5345 family=self.primary_ip_family)
5346 self.op.node_name = self.hostname.name
5348 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5349 raise errors.OpPrereqError("Cannot readd the master node",
5352 if self.op.readd and self.op.group:
5353 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5354 " being readded", errors.ECODE_INVAL)
5356 def BuildHooksEnv(self):
5359 This will run on all nodes before, and on all nodes + the new node after.
5363 "OP_TARGET": self.op.node_name,
5364 "NODE_NAME": self.op.node_name,
5365 "NODE_PIP": self.op.primary_ip,
5366 "NODE_SIP": self.op.secondary_ip,
5367 "MASTER_CAPABLE": str(self.op.master_capable),
5368 "VM_CAPABLE": str(self.op.vm_capable),
5371 def BuildHooksNodes(self):
5372 """Build hooks nodes.
5375 # Exclude added node
5376 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5377 post_nodes = pre_nodes + [self.op.node_name, ]
5379 return (pre_nodes, post_nodes)
5381 def CheckPrereq(self):
5382 """Check prerequisites.
5385 - the new node is not already in the config
5387 - its parameters (single/dual homed) matches the cluster
5389 Any errors are signaled by raising errors.OpPrereqError.
5393 hostname = self.hostname
5394 node = hostname.name
5395 primary_ip = self.op.primary_ip = hostname.ip
5396 if self.op.secondary_ip is None:
5397 if self.primary_ip_family == netutils.IP6Address.family:
5398 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5399 " IPv4 address must be given as secondary",
5401 self.op.secondary_ip = primary_ip
5403 secondary_ip = self.op.secondary_ip
5404 if not netutils.IP4Address.IsValid(secondary_ip):
5405 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5406 " address" % secondary_ip, errors.ECODE_INVAL)
5408 node_list = cfg.GetNodeList()
5409 if not self.op.readd and node in node_list:
5410 raise errors.OpPrereqError("Node %s is already in the configuration" %
5411 node, errors.ECODE_EXISTS)
5412 elif self.op.readd and node not in node_list:
5413 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5416 self.changed_primary_ip = False
5418 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5419 if self.op.readd and node == existing_node_name:
5420 if existing_node.secondary_ip != secondary_ip:
5421 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5422 " address configuration as before",
5424 if existing_node.primary_ip != primary_ip:
5425 self.changed_primary_ip = True
5429 if (existing_node.primary_ip == primary_ip or
5430 existing_node.secondary_ip == primary_ip or
5431 existing_node.primary_ip == secondary_ip or
5432 existing_node.secondary_ip == secondary_ip):
5433 raise errors.OpPrereqError("New node ip address(es) conflict with"
5434 " existing node %s" % existing_node.name,
5435 errors.ECODE_NOTUNIQUE)
5437 # After this 'if' block, None is no longer a valid value for the
5438 # _capable op attributes
5440 old_node = self.cfg.GetNodeInfo(node)
5441 assert old_node is not None, "Can't retrieve locked node %s" % node
5442 for attr in self._NFLAGS:
5443 if getattr(self.op, attr) is None:
5444 setattr(self.op, attr, getattr(old_node, attr))
5446 for attr in self._NFLAGS:
5447 if getattr(self.op, attr) is None:
5448 setattr(self.op, attr, True)
5450 if self.op.readd and not self.op.vm_capable:
5451 pri, sec = cfg.GetNodeInstances(node)
5453 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5454 " flag set to false, but it already holds"
5455 " instances" % node,
5458 # check that the type of the node (single versus dual homed) is the
5459 # same as for the master
5460 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5461 master_singlehomed = myself.secondary_ip == myself.primary_ip
5462 newbie_singlehomed = secondary_ip == primary_ip
5463 if master_singlehomed != newbie_singlehomed:
5464 if master_singlehomed:
5465 raise errors.OpPrereqError("The master has no secondary ip but the"
5466 " new node has one",
5469 raise errors.OpPrereqError("The master has a secondary ip but the"
5470 " new node doesn't have one",
5473 # checks reachability
5474 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5475 raise errors.OpPrereqError("Node not reachable by ping",
5476 errors.ECODE_ENVIRON)
5478 if not newbie_singlehomed:
5479 # check reachability from my secondary ip to newbie's secondary ip
5480 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5481 source=myself.secondary_ip):
5482 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5483 " based ping to node daemon port",
5484 errors.ECODE_ENVIRON)
5491 if self.op.master_capable:
5492 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5494 self.master_candidate = False
5497 self.new_node = old_node
5499 node_group = cfg.LookupNodeGroup(self.op.group)
5500 self.new_node = objects.Node(name=node,
5501 primary_ip=primary_ip,
5502 secondary_ip=secondary_ip,
5503 master_candidate=self.master_candidate,
5504 offline=False, drained=False,
5507 if self.op.ndparams:
5508 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5510 if self.op.hv_state:
5511 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5513 if self.op.disk_state:
5514 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5516 def Exec(self, feedback_fn):
5517 """Adds the new node to the cluster.
5520 new_node = self.new_node
5521 node = new_node.name
5523 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5526 # We adding a new node so we assume it's powered
5527 new_node.powered = True
5529 # for re-adds, reset the offline/drained/master-candidate flags;
5530 # we need to reset here, otherwise offline would prevent RPC calls
5531 # later in the procedure; this also means that if the re-add
5532 # fails, we are left with a non-offlined, broken node
5534 new_node.drained = new_node.offline = False # pylint: disable=W0201
5535 self.LogInfo("Readding a node, the offline/drained flags were reset")
5536 # if we demote the node, we do cleanup later in the procedure
5537 new_node.master_candidate = self.master_candidate
5538 if self.changed_primary_ip:
5539 new_node.primary_ip = self.op.primary_ip
5541 # copy the master/vm_capable flags
5542 for attr in self._NFLAGS:
5543 setattr(new_node, attr, getattr(self.op, attr))
5545 # notify the user about any possible mc promotion
5546 if new_node.master_candidate:
5547 self.LogInfo("Node will be a master candidate")
5549 if self.op.ndparams:
5550 new_node.ndparams = self.op.ndparams
5552 new_node.ndparams = {}
5554 if self.op.hv_state:
5555 new_node.hv_state_static = self.new_hv_state
5557 if self.op.disk_state:
5558 new_node.disk_state_static = self.new_disk_state
5560 # check connectivity
5561 result = self.rpc.call_version([node])[node]
5562 result.Raise("Can't get version information from node %s" % node)
5563 if constants.PROTOCOL_VERSION == result.payload:
5564 logging.info("Communication to node %s fine, sw version %s match",
5565 node, result.payload)
5567 raise errors.OpExecError("Version mismatch master version %s,"
5568 " node version %s" %
5569 (constants.PROTOCOL_VERSION, result.payload))
5571 # Add node to our /etc/hosts, and add key to known_hosts
5572 if self.cfg.GetClusterInfo().modify_etc_hosts:
5573 master_node = self.cfg.GetMasterNode()
5574 result = self.rpc.call_etc_hosts_modify(master_node,
5575 constants.ETC_HOSTS_ADD,
5578 result.Raise("Can't update hosts file with new host data")
5580 if new_node.secondary_ip != new_node.primary_ip:
5581 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5584 node_verify_list = [self.cfg.GetMasterNode()]
5585 node_verify_param = {
5586 constants.NV_NODELIST: ([node], {}),
5587 # TODO: do a node-net-test as well?
5590 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5591 self.cfg.GetClusterName())
5592 for verifier in node_verify_list:
5593 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5594 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5596 for failed in nl_payload:
5597 feedback_fn("ssh/hostname verification failed"
5598 " (checking from %s): %s" %
5599 (verifier, nl_payload[failed]))
5600 raise errors.OpExecError("ssh/hostname verification failed")
5603 _RedistributeAncillaryFiles(self)
5604 self.context.ReaddNode(new_node)
5605 # make sure we redistribute the config
5606 self.cfg.Update(new_node, feedback_fn)
5607 # and make sure the new node will not have old files around
5608 if not new_node.master_candidate:
5609 result = self.rpc.call_node_demote_from_mc(new_node.name)
5610 msg = result.fail_msg
5612 self.LogWarning("Node failed to demote itself from master"
5613 " candidate status: %s" % msg)
5615 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5616 additional_vm=self.op.vm_capable)
5617 self.context.AddNode(new_node, self.proc.GetECId())
5620 class LUNodeSetParams(LogicalUnit):
5621 """Modifies the parameters of a node.
5623 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5624 to the node role (as _ROLE_*)
5625 @cvar _R2F: a dictionary from node role to tuples of flags
5626 @cvar _FLAGS: a list of attribute names corresponding to the flags
5629 HPATH = "node-modify"
5630 HTYPE = constants.HTYPE_NODE
5632 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5634 (True, False, False): _ROLE_CANDIDATE,
5635 (False, True, False): _ROLE_DRAINED,
5636 (False, False, True): _ROLE_OFFLINE,
5637 (False, False, False): _ROLE_REGULAR,
5639 _R2F = dict((v, k) for k, v in _F2R.items())
5640 _FLAGS = ["master_candidate", "drained", "offline"]
5642 def CheckArguments(self):
5643 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5644 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5645 self.op.master_capable, self.op.vm_capable,
5646 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5648 if all_mods.count(None) == len(all_mods):
5649 raise errors.OpPrereqError("Please pass at least one modification",
5651 if all_mods.count(True) > 1:
5652 raise errors.OpPrereqError("Can't set the node into more than one"
5653 " state at the same time",
5656 # Boolean value that tells us whether we might be demoting from MC
5657 self.might_demote = (self.op.master_candidate == False or
5658 self.op.offline == True or
5659 self.op.drained == True or
5660 self.op.master_capable == False)
5662 if self.op.secondary_ip:
5663 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5664 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5665 " address" % self.op.secondary_ip,
5668 self.lock_all = self.op.auto_promote and self.might_demote
5669 self.lock_instances = self.op.secondary_ip is not None
5671 def _InstanceFilter(self, instance):
5672 """Filter for getting affected instances.
5675 return (instance.disk_template in constants.DTS_INT_MIRROR and
5676 self.op.node_name in instance.all_nodes)
5678 def ExpandNames(self):
5680 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5682 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5684 # Since modifying a node can have severe effects on currently running
5685 # operations the resource lock is at least acquired in shared mode
5686 self.needed_locks[locking.LEVEL_NODE_RES] = \
5687 self.needed_locks[locking.LEVEL_NODE]
5689 # Get node resource and instance locks in shared mode; they are not used
5690 # for anything but read-only access
5691 self.share_locks[locking.LEVEL_NODE_RES] = 1
5692 self.share_locks[locking.LEVEL_INSTANCE] = 1
5694 if self.lock_instances:
5695 self.needed_locks[locking.LEVEL_INSTANCE] = \
5696 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5698 def BuildHooksEnv(self):
5701 This runs on the master node.
5705 "OP_TARGET": self.op.node_name,
5706 "MASTER_CANDIDATE": str(self.op.master_candidate),
5707 "OFFLINE": str(self.op.offline),
5708 "DRAINED": str(self.op.drained),
5709 "MASTER_CAPABLE": str(self.op.master_capable),
5710 "VM_CAPABLE": str(self.op.vm_capable),
5713 def BuildHooksNodes(self):
5714 """Build hooks nodes.
5717 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5720 def CheckPrereq(self):
5721 """Check prerequisites.
5723 This only checks the instance list against the existing names.
5726 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5728 if self.lock_instances:
5729 affected_instances = \
5730 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5732 # Verify instance locks
5733 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5734 wanted_instances = frozenset(affected_instances.keys())
5735 if wanted_instances - owned_instances:
5736 raise errors.OpPrereqError("Instances affected by changing node %s's"
5737 " secondary IP address have changed since"
5738 " locks were acquired, wanted '%s', have"
5739 " '%s'; retry the operation" %
5741 utils.CommaJoin(wanted_instances),
5742 utils.CommaJoin(owned_instances)),
5745 affected_instances = None
5747 if (self.op.master_candidate is not None or
5748 self.op.drained is not None or
5749 self.op.offline is not None):
5750 # we can't change the master's node flags
5751 if self.op.node_name == self.cfg.GetMasterNode():
5752 raise errors.OpPrereqError("The master role can be changed"
5753 " only via master-failover",
5756 if self.op.master_candidate and not node.master_capable:
5757 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5758 " it a master candidate" % node.name,
5761 if self.op.vm_capable == False:
5762 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5764 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5765 " the vm_capable flag" % node.name,
5768 if node.master_candidate and self.might_demote and not self.lock_all:
5769 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5770 # check if after removing the current node, we're missing master
5772 (mc_remaining, mc_should, _) = \
5773 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5774 if mc_remaining < mc_should:
5775 raise errors.OpPrereqError("Not enough master candidates, please"
5776 " pass auto promote option to allow"
5777 " promotion", errors.ECODE_STATE)
5779 self.old_flags = old_flags = (node.master_candidate,
5780 node.drained, node.offline)
5781 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5782 self.old_role = old_role = self._F2R[old_flags]
5784 # Check for ineffective changes
5785 for attr in self._FLAGS:
5786 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5787 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5788 setattr(self.op, attr, None)
5790 # Past this point, any flag change to False means a transition
5791 # away from the respective state, as only real changes are kept
5793 # TODO: We might query the real power state if it supports OOB
5794 if _SupportsOob(self.cfg, node):
5795 if self.op.offline is False and not (node.powered or
5796 self.op.powered == True):
5797 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5798 " offline status can be reset") %
5800 elif self.op.powered is not None:
5801 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5802 " as it does not support out-of-band"
5803 " handling") % self.op.node_name)
5805 # If we're being deofflined/drained, we'll MC ourself if needed
5806 if (self.op.drained == False or self.op.offline == False or
5807 (self.op.master_capable and not node.master_capable)):
5808 if _DecideSelfPromotion(self):
5809 self.op.master_candidate = True
5810 self.LogInfo("Auto-promoting node to master candidate")
5812 # If we're no longer master capable, we'll demote ourselves from MC
5813 if self.op.master_capable == False and node.master_candidate:
5814 self.LogInfo("Demoting from master candidate")
5815 self.op.master_candidate = False
5818 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5819 if self.op.master_candidate:
5820 new_role = self._ROLE_CANDIDATE
5821 elif self.op.drained:
5822 new_role = self._ROLE_DRAINED
5823 elif self.op.offline:
5824 new_role = self._ROLE_OFFLINE
5825 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5826 # False is still in new flags, which means we're un-setting (the
5828 new_role = self._ROLE_REGULAR
5829 else: # no new flags, nothing, keep old role
5832 self.new_role = new_role
5834 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5835 # Trying to transition out of offline status
5836 # TODO: Use standard RPC runner, but make sure it works when the node is
5837 # still marked offline
5838 result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5840 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5841 " to report its version: %s" %
5842 (node.name, result.fail_msg),
5845 self.LogWarning("Transitioning node from offline to online state"
5846 " without using re-add. Please make sure the node"
5849 if self.op.secondary_ip:
5850 # Ok even without locking, because this can't be changed by any LU
5851 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5852 master_singlehomed = master.secondary_ip == master.primary_ip
5853 if master_singlehomed and self.op.secondary_ip:
5854 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5855 " homed cluster", errors.ECODE_INVAL)
5857 assert not (frozenset(affected_instances) -
5858 self.owned_locks(locking.LEVEL_INSTANCE))
5861 if affected_instances:
5862 raise errors.OpPrereqError("Cannot change secondary IP address:"
5863 " offline node has instances (%s)"
5864 " configured to use it" %
5865 utils.CommaJoin(affected_instances.keys()))
5867 # On online nodes, check that no instances are running, and that
5868 # the node has the new ip and we can reach it.
5869 for instance in affected_instances.values():
5870 _CheckInstanceState(self, instance, INSTANCE_DOWN,
5871 msg="cannot change secondary ip")
5873 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5874 if master.name != node.name:
5875 # check reachability from master secondary ip to new secondary ip
5876 if not netutils.TcpPing(self.op.secondary_ip,
5877 constants.DEFAULT_NODED_PORT,
5878 source=master.secondary_ip):
5879 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5880 " based ping to node daemon port",
5881 errors.ECODE_ENVIRON)
5883 if self.op.ndparams:
5884 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5885 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5886 self.new_ndparams = new_ndparams
5888 if self.op.hv_state:
5889 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5890 self.node.hv_state_static)
5892 if self.op.disk_state:
5893 self.new_disk_state = \
5894 _MergeAndVerifyDiskState(self.op.disk_state,
5895 self.node.disk_state_static)
5897 def Exec(self, feedback_fn):
5902 old_role = self.old_role
5903 new_role = self.new_role
5907 if self.op.ndparams:
5908 node.ndparams = self.new_ndparams
5910 if self.op.powered is not None:
5911 node.powered = self.op.powered
5913 if self.op.hv_state:
5914 node.hv_state_static = self.new_hv_state
5916 if self.op.disk_state:
5917 node.disk_state_static = self.new_disk_state
5919 for attr in ["master_capable", "vm_capable"]:
5920 val = getattr(self.op, attr)
5922 setattr(node, attr, val)
5923 result.append((attr, str(val)))
5925 if new_role != old_role:
5926 # Tell the node to demote itself, if no longer MC and not offline
5927 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5928 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5930 self.LogWarning("Node failed to demote itself: %s", msg)
5932 new_flags = self._R2F[new_role]
5933 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5935 result.append((desc, str(nf)))
5936 (node.master_candidate, node.drained, node.offline) = new_flags
5938 # we locked all nodes, we adjust the CP before updating this node
5940 _AdjustCandidatePool(self, [node.name])
5942 if self.op.secondary_ip:
5943 node.secondary_ip = self.op.secondary_ip
5944 result.append(("secondary_ip", self.op.secondary_ip))
5946 # this will trigger configuration file update, if needed
5947 self.cfg.Update(node, feedback_fn)
5949 # this will trigger job queue propagation or cleanup if the mc
5951 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5952 self.context.ReaddNode(node)
5957 class LUNodePowercycle(NoHooksLU):
5958 """Powercycles a node.
5963 def CheckArguments(self):
5964 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5965 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5966 raise errors.OpPrereqError("The node is the master and the force"
5967 " parameter was not set",
5970 def ExpandNames(self):
5971 """Locking for PowercycleNode.
5973 This is a last-resort option and shouldn't block on other
5974 jobs. Therefore, we grab no locks.
5977 self.needed_locks = {}
5979 def Exec(self, feedback_fn):
5983 result = self.rpc.call_node_powercycle(self.op.node_name,
5984 self.cfg.GetHypervisorType())
5985 result.Raise("Failed to schedule the reboot")
5986 return result.payload
5989 class LUClusterQuery(NoHooksLU):
5990 """Query cluster configuration.
5995 def ExpandNames(self):
5996 self.needed_locks = {}
5998 def Exec(self, feedback_fn):
5999 """Return cluster config.
6002 cluster = self.cfg.GetClusterInfo()
6005 # Filter just for enabled hypervisors
6006 for os_name, hv_dict in cluster.os_hvp.items():
6007 os_hvp[os_name] = {}
6008 for hv_name, hv_params in hv_dict.items():
6009 if hv_name in cluster.enabled_hypervisors:
6010 os_hvp[os_name][hv_name] = hv_params
6012 # Convert ip_family to ip_version
6013 primary_ip_version = constants.IP4_VERSION
6014 if cluster.primary_ip_family == netutils.IP6Address.family:
6015 primary_ip_version = constants.IP6_VERSION
6018 "software_version": constants.RELEASE_VERSION,
6019 "protocol_version": constants.PROTOCOL_VERSION,
6020 "config_version": constants.CONFIG_VERSION,
6021 "os_api_version": max(constants.OS_API_VERSIONS),
6022 "export_version": constants.EXPORT_VERSION,
6023 "architecture": (platform.architecture()[0], platform.machine()),
6024 "name": cluster.cluster_name,
6025 "master": cluster.master_node,
6026 "default_hypervisor": cluster.primary_hypervisor,
6027 "enabled_hypervisors": cluster.enabled_hypervisors,
6028 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6029 for hypervisor_name in cluster.enabled_hypervisors]),
6031 "beparams": cluster.beparams,
6032 "osparams": cluster.osparams,
6033 "ipolicy": cluster.ipolicy,
6034 "nicparams": cluster.nicparams,
6035 "ndparams": cluster.ndparams,
6036 "candidate_pool_size": cluster.candidate_pool_size,
6037 "master_netdev": cluster.master_netdev,
6038 "master_netmask": cluster.master_netmask,
6039 "use_external_mip_script": cluster.use_external_mip_script,
6040 "volume_group_name": cluster.volume_group_name,
6041 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6042 "file_storage_dir": cluster.file_storage_dir,
6043 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6044 "maintain_node_health": cluster.maintain_node_health,
6045 "ctime": cluster.ctime,
6046 "mtime": cluster.mtime,
6047 "uuid": cluster.uuid,
6048 "tags": list(cluster.GetTags()),
6049 "uid_pool": cluster.uid_pool,
6050 "default_iallocator": cluster.default_iallocator,
6051 "reserved_lvs": cluster.reserved_lvs,
6052 "primary_ip_version": primary_ip_version,
6053 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6054 "hidden_os": cluster.hidden_os,
6055 "blacklisted_os": cluster.blacklisted_os,
6061 class LUClusterConfigQuery(NoHooksLU):
6062 """Return configuration values.
6066 _FIELDS_DYNAMIC = utils.FieldSet()
6067 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
6068 "watcher_pause", "volume_group_name")
6070 def CheckArguments(self):
6071 _CheckOutputFields(static=self._FIELDS_STATIC,
6072 dynamic=self._FIELDS_DYNAMIC,
6073 selected=self.op.output_fields)
6075 def ExpandNames(self):
6076 self.needed_locks = {}
6078 def Exec(self, feedback_fn):
6079 """Dump a representation of the cluster config to the standard output.
6083 for field in self.op.output_fields:
6084 if field == "cluster_name":
6085 entry = self.cfg.GetClusterName()
6086 elif field == "master_node":
6087 entry = self.cfg.GetMasterNode()
6088 elif field == "drain_flag":
6089 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6090 elif field == "watcher_pause":
6091 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6092 elif field == "volume_group_name":
6093 entry = self.cfg.GetVGName()
6095 raise errors.ParameterError(field)
6096 values.append(entry)
6100 class LUInstanceActivateDisks(NoHooksLU):
6101 """Bring up an instance's disks.
6106 def ExpandNames(self):
6107 self._ExpandAndLockInstance()
6108 self.needed_locks[locking.LEVEL_NODE] = []
6109 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6111 def DeclareLocks(self, level):
6112 if level == locking.LEVEL_NODE:
6113 self._LockInstancesNodes()
6115 def CheckPrereq(self):
6116 """Check prerequisites.
6118 This checks that the instance is in the cluster.
6121 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6122 assert self.instance is not None, \
6123 "Cannot retrieve locked instance %s" % self.op.instance_name
6124 _CheckNodeOnline(self, self.instance.primary_node)
6126 def Exec(self, feedback_fn):
6127 """Activate the disks.
6130 disks_ok, disks_info = \
6131 _AssembleInstanceDisks(self, self.instance,
6132 ignore_size=self.op.ignore_size)
6134 raise errors.OpExecError("Cannot activate block devices")
6139 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6141 """Prepare the block devices for an instance.
6143 This sets up the block devices on all nodes.
6145 @type lu: L{LogicalUnit}
6146 @param lu: the logical unit on whose behalf we execute
6147 @type instance: L{objects.Instance}
6148 @param instance: the instance for whose disks we assemble
6149 @type disks: list of L{objects.Disk} or None
6150 @param disks: which disks to assemble (or all, if None)
6151 @type ignore_secondaries: boolean
6152 @param ignore_secondaries: if true, errors on secondary nodes
6153 won't result in an error return from the function
6154 @type ignore_size: boolean
6155 @param ignore_size: if true, the current known size of the disk
6156 will not be used during the disk activation, useful for cases
6157 when the size is wrong
6158 @return: False if the operation failed, otherwise a list of
6159 (host, instance_visible_name, node_visible_name)
6160 with the mapping from node devices to instance devices
6165 iname = instance.name
6166 disks = _ExpandCheckDisks(instance, disks)
6168 # With the two passes mechanism we try to reduce the window of
6169 # opportunity for the race condition of switching DRBD to primary
6170 # before handshaking occured, but we do not eliminate it
6172 # The proper fix would be to wait (with some limits) until the
6173 # connection has been made and drbd transitions from WFConnection
6174 # into any other network-connected state (Connected, SyncTarget,
6177 # 1st pass, assemble on all nodes in secondary mode
6178 for idx, inst_disk in enumerate(disks):
6179 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6181 node_disk = node_disk.Copy()
6182 node_disk.UnsetSize()
6183 lu.cfg.SetDiskID(node_disk, node)
6184 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
6185 msg = result.fail_msg
6187 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6188 " (is_primary=False, pass=1): %s",
6189 inst_disk.iv_name, node, msg)
6190 if not ignore_secondaries:
6193 # FIXME: race condition on drbd migration to primary
6195 # 2nd pass, do only the primary node
6196 for idx, inst_disk in enumerate(disks):
6199 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6200 if node != instance.primary_node:
6203 node_disk = node_disk.Copy()
6204 node_disk.UnsetSize()
6205 lu.cfg.SetDiskID(node_disk, node)
6206 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6207 msg = result.fail_msg
6209 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6210 " (is_primary=True, pass=2): %s",
6211 inst_disk.iv_name, node, msg)
6214 dev_path = result.payload
6216 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6218 # leave the disks configured for the primary node
6219 # this is a workaround that would be fixed better by
6220 # improving the logical/physical id handling
6222 lu.cfg.SetDiskID(disk, instance.primary_node)
6224 return disks_ok, device_info
6227 def _StartInstanceDisks(lu, instance, force):
6228 """Start the disks of an instance.
6231 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6232 ignore_secondaries=force)
6234 _ShutdownInstanceDisks(lu, instance)
6235 if force is not None and not force:
6236 lu.proc.LogWarning("", hint="If the message above refers to a"
6238 " you can retry the operation using '--force'.")
6239 raise errors.OpExecError("Disk consistency error")
6242 class LUInstanceDeactivateDisks(NoHooksLU):
6243 """Shutdown an instance's disks.
6248 def ExpandNames(self):
6249 self._ExpandAndLockInstance()
6250 self.needed_locks[locking.LEVEL_NODE] = []
6251 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6253 def DeclareLocks(self, level):
6254 if level == locking.LEVEL_NODE:
6255 self._LockInstancesNodes()
6257 def CheckPrereq(self):
6258 """Check prerequisites.
6260 This checks that the instance is in the cluster.
6263 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6264 assert self.instance is not None, \
6265 "Cannot retrieve locked instance %s" % self.op.instance_name
6267 def Exec(self, feedback_fn):
6268 """Deactivate the disks
6271 instance = self.instance
6273 _ShutdownInstanceDisks(self, instance)
6275 _SafeShutdownInstanceDisks(self, instance)
6278 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6279 """Shutdown block devices of an instance.
6281 This function checks if an instance is running, before calling
6282 _ShutdownInstanceDisks.
6285 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6286 _ShutdownInstanceDisks(lu, instance, disks=disks)
6289 def _ExpandCheckDisks(instance, disks):
6290 """Return the instance disks selected by the disks list
6292 @type disks: list of L{objects.Disk} or None
6293 @param disks: selected disks
6294 @rtype: list of L{objects.Disk}
6295 @return: selected instance disks to act on
6299 return instance.disks
6301 if not set(disks).issubset(instance.disks):
6302 raise errors.ProgrammerError("Can only act on disks belonging to the"
6307 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6308 """Shutdown block devices of an instance.
6310 This does the shutdown on all nodes of the instance.
6312 If the ignore_primary is false, errors on the primary node are
6317 disks = _ExpandCheckDisks(instance, disks)
6320 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6321 lu.cfg.SetDiskID(top_disk, node)
6322 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6323 msg = result.fail_msg
6325 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6326 disk.iv_name, node, msg)
6327 if ((node == instance.primary_node and not ignore_primary) or
6328 (node != instance.primary_node and not result.offline)):
6333 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6334 """Checks if a node has enough free memory.
6336 This function check if a given node has the needed amount of free
6337 memory. In case the node has less memory or we cannot get the
6338 information from the node, this function raise an OpPrereqError
6341 @type lu: C{LogicalUnit}
6342 @param lu: a logical unit from which we get configuration data
6344 @param node: the node to check
6345 @type reason: C{str}
6346 @param reason: string to use in the error message
6347 @type requested: C{int}
6348 @param requested: the amount of memory in MiB to check for
6349 @type hypervisor_name: C{str}
6350 @param hypervisor_name: the hypervisor to ask for memory stats
6351 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6352 we cannot check the node
6355 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6356 nodeinfo[node].Raise("Can't get data from node %s" % node,
6357 prereq=True, ecode=errors.ECODE_ENVIRON)
6358 (_, _, (hv_info, )) = nodeinfo[node].payload
6360 free_mem = hv_info.get("memory_free", None)
6361 if not isinstance(free_mem, int):
6362 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6363 " was '%s'" % (node, free_mem),
6364 errors.ECODE_ENVIRON)
6365 if requested > free_mem:
6366 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6367 " needed %s MiB, available %s MiB" %
6368 (node, reason, requested, free_mem),
6372 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6373 """Checks if nodes have enough free disk space in the all VGs.
6375 This function check if all given nodes have the needed amount of
6376 free disk. In case any node has less disk or we cannot get the
6377 information from the node, this function raise an OpPrereqError
6380 @type lu: C{LogicalUnit}
6381 @param lu: a logical unit from which we get configuration data
6382 @type nodenames: C{list}
6383 @param nodenames: the list of node names to check
6384 @type req_sizes: C{dict}
6385 @param req_sizes: the hash of vg and corresponding amount of disk in
6387 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6388 or we cannot check the node
6391 for vg, req_size in req_sizes.items():
6392 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6395 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6396 """Checks if nodes have enough free disk space in the specified VG.
6398 This function check if all given nodes have the needed amount of
6399 free disk. In case any node has less disk or we cannot get the
6400 information from the node, this function raise an OpPrereqError
6403 @type lu: C{LogicalUnit}
6404 @param lu: a logical unit from which we get configuration data
6405 @type nodenames: C{list}
6406 @param nodenames: the list of node names to check
6408 @param vg: the volume group to check
6409 @type requested: C{int}
6410 @param requested: the amount of disk in MiB to check for
6411 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6412 or we cannot check the node
6415 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6416 for node in nodenames:
6417 info = nodeinfo[node]
6418 info.Raise("Cannot get current information from node %s" % node,
6419 prereq=True, ecode=errors.ECODE_ENVIRON)
6420 (_, (vg_info, ), _) = info.payload
6421 vg_free = vg_info.get("vg_free", None)
6422 if not isinstance(vg_free, int):
6423 raise errors.OpPrereqError("Can't compute free disk space on node"
6424 " %s for vg %s, result was '%s'" %
6425 (node, vg, vg_free), errors.ECODE_ENVIRON)
6426 if requested > vg_free:
6427 raise errors.OpPrereqError("Not enough disk space on target node %s"
6428 " vg %s: required %d MiB, available %d MiB" %
6429 (node, vg, requested, vg_free),
6433 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6434 """Checks if nodes have enough physical CPUs
6436 This function checks if all given nodes have the needed number of
6437 physical CPUs. In case any node has less CPUs or we cannot get the
6438 information from the node, this function raises an OpPrereqError
6441 @type lu: C{LogicalUnit}
6442 @param lu: a logical unit from which we get configuration data
6443 @type nodenames: C{list}
6444 @param nodenames: the list of node names to check
6445 @type requested: C{int}
6446 @param requested: the minimum acceptable number of physical CPUs
6447 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6448 or we cannot check the node
6451 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6452 for node in nodenames:
6453 info = nodeinfo[node]
6454 info.Raise("Cannot get current information from node %s" % node,
6455 prereq=True, ecode=errors.ECODE_ENVIRON)
6456 (_, _, (hv_info, )) = info.payload
6457 num_cpus = hv_info.get("cpu_total", None)
6458 if not isinstance(num_cpus, int):
6459 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6460 " on node %s, result was '%s'" %
6461 (node, num_cpus), errors.ECODE_ENVIRON)
6462 if requested > num_cpus:
6463 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6464 "required" % (node, num_cpus, requested),
6468 class LUInstanceStartup(LogicalUnit):
6469 """Starts an instance.
6472 HPATH = "instance-start"
6473 HTYPE = constants.HTYPE_INSTANCE
6476 def CheckArguments(self):
6478 if self.op.beparams:
6479 # fill the beparams dict
6480 objects.UpgradeBeParams(self.op.beparams)
6481 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6483 def ExpandNames(self):
6484 self._ExpandAndLockInstance()
6485 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6487 def DeclareLocks(self, level):
6488 if level == locking.LEVEL_NODE_RES:
6489 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6491 def BuildHooksEnv(self):
6494 This runs on master, primary and secondary nodes of the instance.
6498 "FORCE": self.op.force,
6501 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6505 def BuildHooksNodes(self):
6506 """Build hooks nodes.
6509 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6512 def CheckPrereq(self):
6513 """Check prerequisites.
6515 This checks that the instance is in the cluster.
6518 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6519 assert self.instance is not None, \
6520 "Cannot retrieve locked instance %s" % self.op.instance_name
6523 if self.op.hvparams:
6524 # check hypervisor parameter syntax (locally)
6525 cluster = self.cfg.GetClusterInfo()
6526 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6527 filled_hvp = cluster.FillHV(instance)
6528 filled_hvp.update(self.op.hvparams)
6529 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6530 hv_type.CheckParameterSyntax(filled_hvp)
6531 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6533 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6535 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6537 if self.primary_offline and self.op.ignore_offline_nodes:
6538 self.proc.LogWarning("Ignoring offline primary node")
6540 if self.op.hvparams or self.op.beparams:
6541 self.proc.LogWarning("Overridden parameters are ignored")
6543 _CheckNodeOnline(self, instance.primary_node)
6545 bep = self.cfg.GetClusterInfo().FillBE(instance)
6546 bep.update(self.op.beparams)
6548 # check bridges existence
6549 _CheckInstanceBridgesExist(self, instance)
6551 remote_info = self.rpc.call_instance_info(instance.primary_node,
6553 instance.hypervisor)
6554 remote_info.Raise("Error checking node %s" % instance.primary_node,
6555 prereq=True, ecode=errors.ECODE_ENVIRON)
6556 if not remote_info.payload: # not running already
6557 _CheckNodeFreeMemory(self, instance.primary_node,
6558 "starting instance %s" % instance.name,
6559 bep[constants.BE_MINMEM], instance.hypervisor)
6561 def Exec(self, feedback_fn):
6562 """Start the instance.
6565 instance = self.instance
6566 force = self.op.force
6568 if not self.op.no_remember:
6569 self.cfg.MarkInstanceUp(instance.name)
6571 if self.primary_offline:
6572 assert self.op.ignore_offline_nodes
6573 self.proc.LogInfo("Primary node offline, marked instance as started")
6575 node_current = instance.primary_node
6577 _StartInstanceDisks(self, instance, force)
6580 self.rpc.call_instance_start(node_current,
6581 (instance, self.op.hvparams,
6583 self.op.startup_paused)
6584 msg = result.fail_msg
6586 _ShutdownInstanceDisks(self, instance)
6587 raise errors.OpExecError("Could not start instance: %s" % msg)
6590 class LUInstanceReboot(LogicalUnit):
6591 """Reboot an instance.
6594 HPATH = "instance-reboot"
6595 HTYPE = constants.HTYPE_INSTANCE
6598 def ExpandNames(self):
6599 self._ExpandAndLockInstance()
6601 def BuildHooksEnv(self):
6604 This runs on master, primary and secondary nodes of the instance.
6608 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6609 "REBOOT_TYPE": self.op.reboot_type,
6610 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6613 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6617 def BuildHooksNodes(self):
6618 """Build hooks nodes.
6621 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6624 def CheckPrereq(self):
6625 """Check prerequisites.
6627 This checks that the instance is in the cluster.
6630 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6631 assert self.instance is not None, \
6632 "Cannot retrieve locked instance %s" % self.op.instance_name
6633 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6634 _CheckNodeOnline(self, instance.primary_node)
6636 # check bridges existence
6637 _CheckInstanceBridgesExist(self, instance)
6639 def Exec(self, feedback_fn):
6640 """Reboot the instance.
6643 instance = self.instance
6644 ignore_secondaries = self.op.ignore_secondaries
6645 reboot_type = self.op.reboot_type
6647 remote_info = self.rpc.call_instance_info(instance.primary_node,
6649 instance.hypervisor)
6650 remote_info.Raise("Error checking node %s" % instance.primary_node)
6651 instance_running = bool(remote_info.payload)
6653 node_current = instance.primary_node
6655 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6656 constants.INSTANCE_REBOOT_HARD]:
6657 for disk in instance.disks:
6658 self.cfg.SetDiskID(disk, node_current)
6659 result = self.rpc.call_instance_reboot(node_current, instance,
6661 self.op.shutdown_timeout)
6662 result.Raise("Could not reboot instance")
6664 if instance_running:
6665 result = self.rpc.call_instance_shutdown(node_current, instance,
6666 self.op.shutdown_timeout)
6667 result.Raise("Could not shutdown instance for full reboot")
6668 _ShutdownInstanceDisks(self, instance)
6670 self.LogInfo("Instance %s was already stopped, starting now",
6672 _StartInstanceDisks(self, instance, ignore_secondaries)
6673 result = self.rpc.call_instance_start(node_current,
6674 (instance, None, None), False)
6675 msg = result.fail_msg
6677 _ShutdownInstanceDisks(self, instance)
6678 raise errors.OpExecError("Could not start instance for"
6679 " full reboot: %s" % msg)
6681 self.cfg.MarkInstanceUp(instance.name)
6684 class LUInstanceShutdown(LogicalUnit):
6685 """Shutdown an instance.
6688 HPATH = "instance-stop"
6689 HTYPE = constants.HTYPE_INSTANCE
6692 def ExpandNames(self):
6693 self._ExpandAndLockInstance()
6695 def BuildHooksEnv(self):
6698 This runs on master, primary and secondary nodes of the instance.
6701 env = _BuildInstanceHookEnvByObject(self, self.instance)
6702 env["TIMEOUT"] = self.op.timeout
6705 def BuildHooksNodes(self):
6706 """Build hooks nodes.
6709 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6712 def CheckPrereq(self):
6713 """Check prerequisites.
6715 This checks that the instance is in the cluster.
6718 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6719 assert self.instance is not None, \
6720 "Cannot retrieve locked instance %s" % self.op.instance_name
6722 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6724 self.primary_offline = \
6725 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6727 if self.primary_offline and self.op.ignore_offline_nodes:
6728 self.proc.LogWarning("Ignoring offline primary node")
6730 _CheckNodeOnline(self, self.instance.primary_node)
6732 def Exec(self, feedback_fn):
6733 """Shutdown the instance.
6736 instance = self.instance
6737 node_current = instance.primary_node
6738 timeout = self.op.timeout
6740 if not self.op.no_remember:
6741 self.cfg.MarkInstanceDown(instance.name)
6743 if self.primary_offline:
6744 assert self.op.ignore_offline_nodes
6745 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6747 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6748 msg = result.fail_msg
6750 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6752 _ShutdownInstanceDisks(self, instance)
6755 class LUInstanceReinstall(LogicalUnit):
6756 """Reinstall an instance.
6759 HPATH = "instance-reinstall"
6760 HTYPE = constants.HTYPE_INSTANCE
6763 def ExpandNames(self):
6764 self._ExpandAndLockInstance()
6766 def BuildHooksEnv(self):
6769 This runs on master, primary and secondary nodes of the instance.
6772 return _BuildInstanceHookEnvByObject(self, self.instance)
6774 def BuildHooksNodes(self):
6775 """Build hooks nodes.
6778 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6781 def CheckPrereq(self):
6782 """Check prerequisites.
6784 This checks that the instance is in the cluster and is not running.
6787 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6788 assert instance is not None, \
6789 "Cannot retrieve locked instance %s" % self.op.instance_name
6790 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6791 " offline, cannot reinstall")
6792 for node in instance.secondary_nodes:
6793 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6794 " cannot reinstall")
6796 if instance.disk_template == constants.DT_DISKLESS:
6797 raise errors.OpPrereqError("Instance '%s' has no disks" %
6798 self.op.instance_name,
6800 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6802 if self.op.os_type is not None:
6804 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6805 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6806 instance_os = self.op.os_type
6808 instance_os = instance.os
6810 nodelist = list(instance.all_nodes)
6812 if self.op.osparams:
6813 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6814 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6815 self.os_inst = i_osdict # the new dict (without defaults)
6819 self.instance = instance
6821 def Exec(self, feedback_fn):
6822 """Reinstall the instance.
6825 inst = self.instance
6827 if self.op.os_type is not None:
6828 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6829 inst.os = self.op.os_type
6830 # Write to configuration
6831 self.cfg.Update(inst, feedback_fn)
6833 _StartInstanceDisks(self, inst, None)
6835 feedback_fn("Running the instance OS create scripts...")
6836 # FIXME: pass debug option from opcode to backend
6837 result = self.rpc.call_instance_os_add(inst.primary_node,
6838 (inst, self.os_inst), True,
6839 self.op.debug_level)
6840 result.Raise("Could not install OS for instance %s on node %s" %
6841 (inst.name, inst.primary_node))
6843 _ShutdownInstanceDisks(self, inst)
6846 class LUInstanceRecreateDisks(LogicalUnit):
6847 """Recreate an instance's missing disks.
6850 HPATH = "instance-recreate-disks"
6851 HTYPE = constants.HTYPE_INSTANCE
6854 def CheckArguments(self):
6855 # normalise the disk list
6856 self.op.disks = sorted(frozenset(self.op.disks))
6858 def ExpandNames(self):
6859 self._ExpandAndLockInstance()
6860 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6862 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6863 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6865 self.needed_locks[locking.LEVEL_NODE] = []
6866 self.needed_locks[locking.LEVEL_NODE_RES] = []
6868 def DeclareLocks(self, level):
6869 if level == locking.LEVEL_NODE:
6870 # if we replace the nodes, we only need to lock the old primary,
6871 # otherwise we need to lock all nodes for disk re-creation
6872 primary_only = bool(self.op.nodes)
6873 self._LockInstancesNodes(primary_only=primary_only)
6874 elif level == locking.LEVEL_NODE_RES:
6876 self.needed_locks[locking.LEVEL_NODE_RES] = \
6877 self.needed_locks[locking.LEVEL_NODE][:]
6879 def BuildHooksEnv(self):
6882 This runs on master, primary and secondary nodes of the instance.
6885 return _BuildInstanceHookEnvByObject(self, self.instance)
6887 def BuildHooksNodes(self):
6888 """Build hooks nodes.
6891 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6894 def CheckPrereq(self):
6895 """Check prerequisites.
6897 This checks that the instance is in the cluster and is not running.
6900 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6901 assert instance is not None, \
6902 "Cannot retrieve locked instance %s" % self.op.instance_name
6904 if len(self.op.nodes) != len(instance.all_nodes):
6905 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6906 " %d replacement nodes were specified" %
6907 (instance.name, len(instance.all_nodes),
6908 len(self.op.nodes)),
6910 assert instance.disk_template != constants.DT_DRBD8 or \
6911 len(self.op.nodes) == 2
6912 assert instance.disk_template != constants.DT_PLAIN or \
6913 len(self.op.nodes) == 1
6914 primary_node = self.op.nodes[0]
6916 primary_node = instance.primary_node
6917 _CheckNodeOnline(self, primary_node)
6919 if instance.disk_template == constants.DT_DISKLESS:
6920 raise errors.OpPrereqError("Instance '%s' has no disks" %
6921 self.op.instance_name, errors.ECODE_INVAL)
6922 # if we replace nodes *and* the old primary is offline, we don't
6924 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
6925 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
6926 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6927 if not (self.op.nodes and old_pnode.offline):
6928 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6929 msg="cannot recreate disks")
6931 if not self.op.disks:
6932 self.op.disks = range(len(instance.disks))
6934 for idx in self.op.disks:
6935 if idx >= len(instance.disks):
6936 raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6938 if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6939 raise errors.OpPrereqError("Can't recreate disks partially and"
6940 " change the nodes at the same time",
6942 self.instance = instance
6944 def Exec(self, feedback_fn):
6945 """Recreate the disks.
6948 instance = self.instance
6950 assert (self.owned_locks(locking.LEVEL_NODE) ==
6951 self.owned_locks(locking.LEVEL_NODE_RES))
6954 mods = [] # keeps track of needed logical_id changes
6956 for idx, disk in enumerate(instance.disks):
6957 if idx not in self.op.disks: # disk idx has not been passed in
6960 # update secondaries for disks, if needed
6962 if disk.dev_type == constants.LD_DRBD8:
6963 # need to update the nodes and minors
6964 assert len(self.op.nodes) == 2
6965 assert len(disk.logical_id) == 6 # otherwise disk internals
6967 (_, _, old_port, _, _, old_secret) = disk.logical_id
6968 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6969 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6970 new_minors[0], new_minors[1], old_secret)
6971 assert len(disk.logical_id) == len(new_id)
6972 mods.append((idx, new_id))
6974 # now that we have passed all asserts above, we can apply the mods
6975 # in a single run (to avoid partial changes)
6976 for idx, new_id in mods:
6977 instance.disks[idx].logical_id = new_id
6979 # change primary node, if needed
6981 instance.primary_node = self.op.nodes[0]
6982 self.LogWarning("Changing the instance's nodes, you will have to"
6983 " remove any disks left on the older nodes manually")
6986 self.cfg.Update(instance, feedback_fn)
6988 _CreateDisks(self, instance, to_skip=to_skip)
6991 class LUInstanceRename(LogicalUnit):
6992 """Rename an instance.
6995 HPATH = "instance-rename"
6996 HTYPE = constants.HTYPE_INSTANCE
6998 def CheckArguments(self):
7002 if self.op.ip_check and not self.op.name_check:
7003 # TODO: make the ip check more flexible and not depend on the name check
7004 raise errors.OpPrereqError("IP address check requires a name check",
7007 def BuildHooksEnv(self):
7010 This runs on master, primary and secondary nodes of the instance.
7013 env = _BuildInstanceHookEnvByObject(self, self.instance)
7014 env["INSTANCE_NEW_NAME"] = self.op.new_name
7017 def BuildHooksNodes(self):
7018 """Build hooks nodes.
7021 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7024 def CheckPrereq(self):
7025 """Check prerequisites.
7027 This checks that the instance is in the cluster and is not running.
7030 self.op.instance_name = _ExpandInstanceName(self.cfg,
7031 self.op.instance_name)
7032 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7033 assert instance is not None
7034 _CheckNodeOnline(self, instance.primary_node)
7035 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7036 msg="cannot rename")
7037 self.instance = instance
7039 new_name = self.op.new_name
7040 if self.op.name_check:
7041 hostname = netutils.GetHostname(name=new_name)
7042 if hostname.name != new_name:
7043 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7045 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7046 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7047 " same as given hostname '%s'") %
7048 (hostname.name, self.op.new_name),
7050 new_name = self.op.new_name = hostname.name
7051 if (self.op.ip_check and
7052 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7053 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7054 (hostname.ip, new_name),
7055 errors.ECODE_NOTUNIQUE)
7057 instance_list = self.cfg.GetInstanceList()
7058 if new_name in instance_list and new_name != instance.name:
7059 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7060 new_name, errors.ECODE_EXISTS)
7062 def Exec(self, feedback_fn):
7063 """Rename the instance.
7066 inst = self.instance
7067 old_name = inst.name
7069 rename_file_storage = False
7070 if (inst.disk_template in constants.DTS_FILEBASED and
7071 self.op.new_name != inst.name):
7072 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7073 rename_file_storage = True
7075 self.cfg.RenameInstance(inst.name, self.op.new_name)
7076 # Change the instance lock. This is definitely safe while we hold the BGL.
7077 # Otherwise the new lock would have to be added in acquired mode.
7079 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7080 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7082 # re-read the instance from the configuration after rename
7083 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7085 if rename_file_storage:
7086 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7087 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7088 old_file_storage_dir,
7089 new_file_storage_dir)
7090 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7091 " (but the instance has been renamed in Ganeti)" %
7092 (inst.primary_node, old_file_storage_dir,
7093 new_file_storage_dir))
7095 _StartInstanceDisks(self, inst, None)
7097 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7098 old_name, self.op.debug_level)
7099 msg = result.fail_msg
7101 msg = ("Could not run OS rename script for instance %s on node %s"
7102 " (but the instance has been renamed in Ganeti): %s" %
7103 (inst.name, inst.primary_node, msg))
7104 self.proc.LogWarning(msg)
7106 _ShutdownInstanceDisks(self, inst)
7111 class LUInstanceRemove(LogicalUnit):
7112 """Remove an instance.
7115 HPATH = "instance-remove"
7116 HTYPE = constants.HTYPE_INSTANCE
7119 def ExpandNames(self):
7120 self._ExpandAndLockInstance()
7121 self.needed_locks[locking.LEVEL_NODE] = []
7122 self.needed_locks[locking.LEVEL_NODE_RES] = []
7123 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7125 def DeclareLocks(self, level):
7126 if level == locking.LEVEL_NODE:
7127 self._LockInstancesNodes()
7128 elif level == locking.LEVEL_NODE_RES:
7130 self.needed_locks[locking.LEVEL_NODE_RES] = \
7131 self.needed_locks[locking.LEVEL_NODE][:]
7133 def BuildHooksEnv(self):
7136 This runs on master, primary and secondary nodes of the instance.
7139 env = _BuildInstanceHookEnvByObject(self, self.instance)
7140 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7143 def BuildHooksNodes(self):
7144 """Build hooks nodes.
7147 nl = [self.cfg.GetMasterNode()]
7148 nl_post = list(self.instance.all_nodes) + nl
7149 return (nl, nl_post)
7151 def CheckPrereq(self):
7152 """Check prerequisites.
7154 This checks that the instance is in the cluster.
7157 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7158 assert self.instance is not None, \
7159 "Cannot retrieve locked instance %s" % self.op.instance_name
7161 def Exec(self, feedback_fn):
7162 """Remove the instance.
7165 instance = self.instance
7166 logging.info("Shutting down instance %s on node %s",
7167 instance.name, instance.primary_node)
7169 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7170 self.op.shutdown_timeout)
7171 msg = result.fail_msg
7173 if self.op.ignore_failures:
7174 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7176 raise errors.OpExecError("Could not shutdown instance %s on"
7178 (instance.name, instance.primary_node, msg))
7180 assert (self.owned_locks(locking.LEVEL_NODE) ==
7181 self.owned_locks(locking.LEVEL_NODE_RES))
7182 assert not (set(instance.all_nodes) -
7183 self.owned_locks(locking.LEVEL_NODE)), \
7184 "Not owning correct locks"
7186 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7189 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7190 """Utility function to remove an instance.
7193 logging.info("Removing block devices for instance %s", instance.name)
7195 if not _RemoveDisks(lu, instance):
7196 if not ignore_failures:
7197 raise errors.OpExecError("Can't remove instance's disks")
7198 feedback_fn("Warning: can't remove instance's disks")
7200 logging.info("Removing instance %s out of cluster config", instance.name)
7202 lu.cfg.RemoveInstance(instance.name)
7204 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7205 "Instance lock removal conflict"
7207 # Remove lock for the instance
7208 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7211 class LUInstanceQuery(NoHooksLU):
7212 """Logical unit for querying instances.
7215 # pylint: disable=W0142
7218 def CheckArguments(self):
7219 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7220 self.op.output_fields, self.op.use_locking)
7222 def ExpandNames(self):
7223 self.iq.ExpandNames(self)
7225 def DeclareLocks(self, level):
7226 self.iq.DeclareLocks(self, level)
7228 def Exec(self, feedback_fn):
7229 return self.iq.OldStyleQuery(self)
7232 class LUInstanceFailover(LogicalUnit):
7233 """Failover an instance.
7236 HPATH = "instance-failover"
7237 HTYPE = constants.HTYPE_INSTANCE
7240 def CheckArguments(self):
7241 """Check the arguments.
7244 self.iallocator = getattr(self.op, "iallocator", None)
7245 self.target_node = getattr(self.op, "target_node", None)
7247 def ExpandNames(self):
7248 self._ExpandAndLockInstance()
7250 if self.op.target_node is not None:
7251 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7253 self.needed_locks[locking.LEVEL_NODE] = []
7254 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7256 ignore_consistency = self.op.ignore_consistency
7257 shutdown_timeout = self.op.shutdown_timeout
7258 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7261 ignore_consistency=ignore_consistency,
7262 shutdown_timeout=shutdown_timeout,
7263 ignore_ipolicy=self.op.ignore_ipolicy)
7264 self.tasklets = [self._migrater]
7266 def DeclareLocks(self, level):
7267 if level == locking.LEVEL_NODE:
7268 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7269 if instance.disk_template in constants.DTS_EXT_MIRROR:
7270 if self.op.target_node is None:
7271 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7273 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7274 self.op.target_node]
7275 del self.recalculate_locks[locking.LEVEL_NODE]
7277 self._LockInstancesNodes()
7279 def BuildHooksEnv(self):
7282 This runs on master, primary and secondary nodes of the instance.
7285 instance = self._migrater.instance
7286 source_node = instance.primary_node
7287 target_node = self.op.target_node
7289 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7290 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7291 "OLD_PRIMARY": source_node,
7292 "NEW_PRIMARY": target_node,
7295 if instance.disk_template in constants.DTS_INT_MIRROR:
7296 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7297 env["NEW_SECONDARY"] = source_node
7299 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7301 env.update(_BuildInstanceHookEnvByObject(self, instance))
7305 def BuildHooksNodes(self):
7306 """Build hooks nodes.
7309 instance = self._migrater.instance
7310 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7311 return (nl, nl + [instance.primary_node])
7314 class LUInstanceMigrate(LogicalUnit):
7315 """Migrate an instance.
7317 This is migration without shutting down, compared to the failover,
7318 which is done with shutdown.
7321 HPATH = "instance-migrate"
7322 HTYPE = constants.HTYPE_INSTANCE
7325 def ExpandNames(self):
7326 self._ExpandAndLockInstance()
7328 if self.op.target_node is not None:
7329 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7331 self.needed_locks[locking.LEVEL_NODE] = []
7332 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7334 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7335 cleanup=self.op.cleanup,
7337 fallback=self.op.allow_failover,
7338 ignore_ipolicy=self.op.ignore_ipolicy)
7339 self.tasklets = [self._migrater]
7341 def DeclareLocks(self, level):
7342 if level == locking.LEVEL_NODE:
7343 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7344 if instance.disk_template in constants.DTS_EXT_MIRROR:
7345 if self.op.target_node is None:
7346 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7348 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7349 self.op.target_node]
7350 del self.recalculate_locks[locking.LEVEL_NODE]
7352 self._LockInstancesNodes()
7354 def BuildHooksEnv(self):
7357 This runs on master, primary and secondary nodes of the instance.
7360 instance = self._migrater.instance
7361 source_node = instance.primary_node
7362 target_node = self.op.target_node
7363 env = _BuildInstanceHookEnvByObject(self, instance)
7365 "MIGRATE_LIVE": self._migrater.live,
7366 "MIGRATE_CLEANUP": self.op.cleanup,
7367 "OLD_PRIMARY": source_node,
7368 "NEW_PRIMARY": target_node,
7371 if instance.disk_template in constants.DTS_INT_MIRROR:
7372 env["OLD_SECONDARY"] = target_node
7373 env["NEW_SECONDARY"] = source_node
7375 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7379 def BuildHooksNodes(self):
7380 """Build hooks nodes.
7383 instance = self._migrater.instance
7384 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7385 return (nl, nl + [instance.primary_node])
7388 class LUInstanceMove(LogicalUnit):
7389 """Move an instance by data-copying.
7392 HPATH = "instance-move"
7393 HTYPE = constants.HTYPE_INSTANCE
7396 def ExpandNames(self):
7397 self._ExpandAndLockInstance()
7398 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7399 self.op.target_node = target_node
7400 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7401 self.needed_locks[locking.LEVEL_NODE_RES] = []
7402 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7404 def DeclareLocks(self, level):
7405 if level == locking.LEVEL_NODE:
7406 self._LockInstancesNodes(primary_only=True)
7407 elif level == locking.LEVEL_NODE_RES:
7409 self.needed_locks[locking.LEVEL_NODE_RES] = \
7410 self.needed_locks[locking.LEVEL_NODE][:]
7412 def BuildHooksEnv(self):
7415 This runs on master, primary and secondary nodes of the instance.
7419 "TARGET_NODE": self.op.target_node,
7420 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7422 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7425 def BuildHooksNodes(self):
7426 """Build hooks nodes.
7430 self.cfg.GetMasterNode(),
7431 self.instance.primary_node,
7432 self.op.target_node,
7436 def CheckPrereq(self):
7437 """Check prerequisites.
7439 This checks that the instance is in the cluster.
7442 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7443 assert self.instance is not None, \
7444 "Cannot retrieve locked instance %s" % self.op.instance_name
7446 node = self.cfg.GetNodeInfo(self.op.target_node)
7447 assert node is not None, \
7448 "Cannot retrieve locked node %s" % self.op.target_node
7450 self.target_node = target_node = node.name
7452 if target_node == instance.primary_node:
7453 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7454 (instance.name, target_node),
7457 bep = self.cfg.GetClusterInfo().FillBE(instance)
7459 for idx, dsk in enumerate(instance.disks):
7460 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7461 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7462 " cannot copy" % idx, errors.ECODE_STATE)
7464 _CheckNodeOnline(self, target_node)
7465 _CheckNodeNotDrained(self, target_node)
7466 _CheckNodeVmCapable(self, target_node)
7467 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7468 self.cfg.GetNodeGroup(node.group))
7469 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7470 ignore=self.op.ignore_ipolicy)
7472 if instance.admin_state == constants.ADMINST_UP:
7473 # check memory requirements on the secondary node
7474 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7475 instance.name, bep[constants.BE_MAXMEM],
7476 instance.hypervisor)
7478 self.LogInfo("Not checking memory on the secondary node as"
7479 " instance will not be started")
7481 # check bridge existance
7482 _CheckInstanceBridgesExist(self, instance, node=target_node)
7484 def Exec(self, feedback_fn):
7485 """Move an instance.
7487 The move is done by shutting it down on its present node, copying
7488 the data over (slow) and starting it on the new node.
7491 instance = self.instance
7493 source_node = instance.primary_node
7494 target_node = self.target_node
7496 self.LogInfo("Shutting down instance %s on source node %s",
7497 instance.name, source_node)
7499 assert (self.owned_locks(locking.LEVEL_NODE) ==
7500 self.owned_locks(locking.LEVEL_NODE_RES))
7502 result = self.rpc.call_instance_shutdown(source_node, instance,
7503 self.op.shutdown_timeout)
7504 msg = result.fail_msg
7506 if self.op.ignore_consistency:
7507 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7508 " Proceeding anyway. Please make sure node"
7509 " %s is down. Error details: %s",
7510 instance.name, source_node, source_node, msg)
7512 raise errors.OpExecError("Could not shutdown instance %s on"
7514 (instance.name, source_node, msg))
7516 # create the target disks
7518 _CreateDisks(self, instance, target_node=target_node)
7519 except errors.OpExecError:
7520 self.LogWarning("Device creation failed, reverting...")
7522 _RemoveDisks(self, instance, target_node=target_node)
7524 self.cfg.ReleaseDRBDMinors(instance.name)
7527 cluster_name = self.cfg.GetClusterInfo().cluster_name
7530 # activate, get path, copy the data over
7531 for idx, disk in enumerate(instance.disks):
7532 self.LogInfo("Copying data for disk %d", idx)
7533 result = self.rpc.call_blockdev_assemble(target_node, disk,
7534 instance.name, True, idx)
7536 self.LogWarning("Can't assemble newly created disk %d: %s",
7537 idx, result.fail_msg)
7538 errs.append(result.fail_msg)
7540 dev_path = result.payload
7541 result = self.rpc.call_blockdev_export(source_node, disk,
7542 target_node, dev_path,
7545 self.LogWarning("Can't copy data over for disk %d: %s",
7546 idx, result.fail_msg)
7547 errs.append(result.fail_msg)
7551 self.LogWarning("Some disks failed to copy, aborting")
7553 _RemoveDisks(self, instance, target_node=target_node)
7555 self.cfg.ReleaseDRBDMinors(instance.name)
7556 raise errors.OpExecError("Errors during disk copy: %s" %
7559 instance.primary_node = target_node
7560 self.cfg.Update(instance, feedback_fn)
7562 self.LogInfo("Removing the disks on the original node")
7563 _RemoveDisks(self, instance, target_node=source_node)
7565 # Only start the instance if it's marked as up
7566 if instance.admin_state == constants.ADMINST_UP:
7567 self.LogInfo("Starting instance %s on node %s",
7568 instance.name, target_node)
7570 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7571 ignore_secondaries=True)
7573 _ShutdownInstanceDisks(self, instance)
7574 raise errors.OpExecError("Can't activate the instance's disks")
7576 result = self.rpc.call_instance_start(target_node,
7577 (instance, None, None), False)
7578 msg = result.fail_msg
7580 _ShutdownInstanceDisks(self, instance)
7581 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7582 (instance.name, target_node, msg))
7585 class LUNodeMigrate(LogicalUnit):
7586 """Migrate all instances from a node.
7589 HPATH = "node-migrate"
7590 HTYPE = constants.HTYPE_NODE
7593 def CheckArguments(self):
7596 def ExpandNames(self):
7597 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7599 self.share_locks = _ShareAll()
7600 self.needed_locks = {
7601 locking.LEVEL_NODE: [self.op.node_name],
7604 def BuildHooksEnv(self):
7607 This runs on the master, the primary and all the secondaries.
7611 "NODE_NAME": self.op.node_name,
7614 def BuildHooksNodes(self):
7615 """Build hooks nodes.
7618 nl = [self.cfg.GetMasterNode()]
7621 def CheckPrereq(self):
7624 def Exec(self, feedback_fn):
7625 # Prepare jobs for migration instances
7627 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7630 iallocator=self.op.iallocator,
7631 target_node=self.op.target_node,
7632 ignore_ipolicy=self.op.ignore_ipolicy)]
7633 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7636 # TODO: Run iallocator in this opcode and pass correct placement options to
7637 # OpInstanceMigrate. Since other jobs can modify the cluster between
7638 # running the iallocator and the actual migration, a good consistency model
7639 # will have to be found.
7641 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7642 frozenset([self.op.node_name]))
7644 return ResultWithJobs(jobs)
7647 class TLMigrateInstance(Tasklet):
7648 """Tasklet class for instance migration.
7651 @ivar live: whether the migration will be done live or non-live;
7652 this variable is initalized only after CheckPrereq has run
7653 @type cleanup: boolean
7654 @ivar cleanup: Wheater we cleanup from a failed migration
7655 @type iallocator: string
7656 @ivar iallocator: The iallocator used to determine target_node
7657 @type target_node: string
7658 @ivar target_node: If given, the target_node to reallocate the instance to
7659 @type failover: boolean
7660 @ivar failover: Whether operation results in failover or migration
7661 @type fallback: boolean
7662 @ivar fallback: Whether fallback to failover is allowed if migration not
7664 @type ignore_consistency: boolean
7665 @ivar ignore_consistency: Wheter we should ignore consistency between source
7667 @type shutdown_timeout: int
7668 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7669 @type ignore_ipolicy: bool
7670 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7675 _MIGRATION_POLL_INTERVAL = 1 # seconds
7676 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7678 def __init__(self, lu, instance_name, cleanup=False,
7679 failover=False, fallback=False,
7680 ignore_consistency=False,
7681 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7682 ignore_ipolicy=False):
7683 """Initializes this class.
7686 Tasklet.__init__(self, lu)
7689 self.instance_name = instance_name
7690 self.cleanup = cleanup
7691 self.live = False # will be overridden later
7692 self.failover = failover
7693 self.fallback = fallback
7694 self.ignore_consistency = ignore_consistency
7695 self.shutdown_timeout = shutdown_timeout
7696 self.ignore_ipolicy = ignore_ipolicy
7698 def CheckPrereq(self):
7699 """Check prerequisites.
7701 This checks that the instance is in the cluster.
7704 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7705 instance = self.cfg.GetInstanceInfo(instance_name)
7706 assert instance is not None
7707 self.instance = instance
7708 cluster = self.cfg.GetClusterInfo()
7710 if (not self.cleanup and
7711 not instance.admin_state == constants.ADMINST_UP and
7712 not self.failover and self.fallback):
7713 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7714 " switching to failover")
7715 self.failover = True
7717 if instance.disk_template not in constants.DTS_MIRRORED:
7722 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7723 " %s" % (instance.disk_template, text),
7726 if instance.disk_template in constants.DTS_EXT_MIRROR:
7727 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7729 if self.lu.op.iallocator:
7730 self._RunAllocator()
7732 # We set set self.target_node as it is required by
7734 self.target_node = self.lu.op.target_node
7736 # Check that the target node is correct in terms of instance policy
7737 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
7738 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7739 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7740 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7741 ignore=self.ignore_ipolicy)
7743 # self.target_node is already populated, either directly or by the
7745 target_node = self.target_node
7746 if self.target_node == instance.primary_node:
7747 raise errors.OpPrereqError("Cannot migrate instance %s"
7748 " to its primary (%s)" %
7749 (instance.name, instance.primary_node))
7751 if len(self.lu.tasklets) == 1:
7752 # It is safe to release locks only when we're the only tasklet
7754 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7755 keep=[instance.primary_node, self.target_node])
7758 secondary_nodes = instance.secondary_nodes
7759 if not secondary_nodes:
7760 raise errors.ConfigurationError("No secondary node but using"
7761 " %s disk template" %
7762 instance.disk_template)
7763 target_node = secondary_nodes[0]
7764 if self.lu.op.iallocator or (self.lu.op.target_node and
7765 self.lu.op.target_node != target_node):
7767 text = "failed over"
7770 raise errors.OpPrereqError("Instances with disk template %s cannot"
7771 " be %s to arbitrary nodes"
7772 " (neither an iallocator nor a target"
7773 " node can be passed)" %
7774 (instance.disk_template, text),
7776 nodeinfo = self.cfg.GetNodeInfo(target_node)
7777 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7778 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7779 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7780 ignore=self.ignore_ipolicy)
7782 i_be = cluster.FillBE(instance)
7784 # check memory requirements on the secondary node
7785 if (not self.cleanup and
7786 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
7787 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7788 instance.name, i_be[constants.BE_MAXMEM],
7789 instance.hypervisor)
7791 self.lu.LogInfo("Not checking memory on the secondary node as"
7792 " instance will not be started")
7794 # check if failover must be forced instead of migration
7795 if (not self.cleanup and not self.failover and
7796 i_be[constants.BE_ALWAYS_FAILOVER]):
7798 self.lu.LogInfo("Instance configured to always failover; fallback"
7800 self.failover = True
7802 raise errors.OpPrereqError("This instance has been configured to"
7803 " always failover, please allow failover",
7806 # check bridge existance
7807 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7809 if not self.cleanup:
7810 _CheckNodeNotDrained(self.lu, target_node)
7811 if not self.failover:
7812 result = self.rpc.call_instance_migratable(instance.primary_node,
7814 if result.fail_msg and self.fallback:
7815 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7817 self.failover = True
7819 result.Raise("Can't migrate, please use failover",
7820 prereq=True, ecode=errors.ECODE_STATE)
7822 assert not (self.failover and self.cleanup)
7824 if not self.failover:
7825 if self.lu.op.live is not None and self.lu.op.mode is not None:
7826 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7827 " parameters are accepted",
7829 if self.lu.op.live is not None:
7831 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7833 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7834 # reset the 'live' parameter to None so that repeated
7835 # invocations of CheckPrereq do not raise an exception
7836 self.lu.op.live = None
7837 elif self.lu.op.mode is None:
7838 # read the default value from the hypervisor
7839 i_hv = cluster.FillHV(self.instance, skip_globals=False)
7840 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7842 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7844 # Failover is never live
7847 def _RunAllocator(self):
7848 """Run the allocator based on input opcode.
7851 # FIXME: add a self.ignore_ipolicy option
7852 ial = IAllocator(self.cfg, self.rpc,
7853 mode=constants.IALLOCATOR_MODE_RELOC,
7854 name=self.instance_name,
7855 # TODO See why hail breaks with a single node below
7856 relocate_from=[self.instance.primary_node,
7857 self.instance.primary_node],
7860 ial.Run(self.lu.op.iallocator)
7863 raise errors.OpPrereqError("Can't compute nodes using"
7864 " iallocator '%s': %s" %
7865 (self.lu.op.iallocator, ial.info),
7867 if len(ial.result) != ial.required_nodes:
7868 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7869 " of nodes (%s), required %s" %
7870 (self.lu.op.iallocator, len(ial.result),
7871 ial.required_nodes), errors.ECODE_FAULT)
7872 self.target_node = ial.result[0]
7873 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7874 self.instance_name, self.lu.op.iallocator,
7875 utils.CommaJoin(ial.result))
7877 def _WaitUntilSync(self):
7878 """Poll with custom rpc for disk sync.
7880 This uses our own step-based rpc call.
7883 self.feedback_fn("* wait until resync is done")
7887 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7889 self.instance.disks)
7891 for node, nres in result.items():
7892 nres.Raise("Cannot resync disks on node %s" % node)
7893 node_done, node_percent = nres.payload
7894 all_done = all_done and node_done
7895 if node_percent is not None:
7896 min_percent = min(min_percent, node_percent)
7898 if min_percent < 100:
7899 self.feedback_fn(" - progress: %.1f%%" % min_percent)
7902 def _EnsureSecondary(self, node):
7903 """Demote a node to secondary.
7906 self.feedback_fn("* switching node %s to secondary mode" % node)
7908 for dev in self.instance.disks:
7909 self.cfg.SetDiskID(dev, node)
7911 result = self.rpc.call_blockdev_close(node, self.instance.name,
7912 self.instance.disks)
7913 result.Raise("Cannot change disk to secondary on node %s" % node)
7915 def _GoStandalone(self):
7916 """Disconnect from the network.
7919 self.feedback_fn("* changing into standalone mode")
7920 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7921 self.instance.disks)
7922 for node, nres in result.items():
7923 nres.Raise("Cannot disconnect disks node %s" % node)
7925 def _GoReconnect(self, multimaster):
7926 """Reconnect to the network.
7932 msg = "single-master"
7933 self.feedback_fn("* changing disks into %s mode" % msg)
7934 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7935 self.instance.disks,
7936 self.instance.name, multimaster)
7937 for node, nres in result.items():
7938 nres.Raise("Cannot change disks config on node %s" % node)
7940 def _ExecCleanup(self):
7941 """Try to cleanup after a failed migration.
7943 The cleanup is done by:
7944 - check that the instance is running only on one node
7945 (and update the config if needed)
7946 - change disks on its secondary node to secondary
7947 - wait until disks are fully synchronized
7948 - disconnect from the network
7949 - change disks into single-master mode
7950 - wait again until disks are fully synchronized
7953 instance = self.instance
7954 target_node = self.target_node
7955 source_node = self.source_node
7957 # check running on only one node
7958 self.feedback_fn("* checking where the instance actually runs"
7959 " (if this hangs, the hypervisor might be in"
7961 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7962 for node, result in ins_l.items():
7963 result.Raise("Can't contact node %s" % node)
7965 runningon_source = instance.name in ins_l[source_node].payload
7966 runningon_target = instance.name in ins_l[target_node].payload
7968 if runningon_source and runningon_target:
7969 raise errors.OpExecError("Instance seems to be running on two nodes,"
7970 " or the hypervisor is confused; you will have"
7971 " to ensure manually that it runs only on one"
7972 " and restart this operation")
7974 if not (runningon_source or runningon_target):
7975 raise errors.OpExecError("Instance does not seem to be running at all;"
7976 " in this case it's safer to repair by"
7977 " running 'gnt-instance stop' to ensure disk"
7978 " shutdown, and then restarting it")
7980 if runningon_target:
7981 # the migration has actually succeeded, we need to update the config
7982 self.feedback_fn("* instance running on secondary node (%s),"
7983 " updating config" % target_node)
7984 instance.primary_node = target_node
7985 self.cfg.Update(instance, self.feedback_fn)
7986 demoted_node = source_node
7988 self.feedback_fn("* instance confirmed to be running on its"
7989 " primary node (%s)" % source_node)
7990 demoted_node = target_node
7992 if instance.disk_template in constants.DTS_INT_MIRROR:
7993 self._EnsureSecondary(demoted_node)
7995 self._WaitUntilSync()
7996 except errors.OpExecError:
7997 # we ignore here errors, since if the device is standalone, it
7998 # won't be able to sync
8000 self._GoStandalone()
8001 self._GoReconnect(False)
8002 self._WaitUntilSync()
8004 self.feedback_fn("* done")
8006 def _RevertDiskStatus(self):
8007 """Try to revert the disk status after a failed migration.
8010 target_node = self.target_node
8011 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8015 self._EnsureSecondary(target_node)
8016 self._GoStandalone()
8017 self._GoReconnect(False)
8018 self._WaitUntilSync()
8019 except errors.OpExecError, err:
8020 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8021 " please try to recover the instance manually;"
8022 " error '%s'" % str(err))
8024 def _AbortMigration(self):
8025 """Call the hypervisor code to abort a started migration.
8028 instance = self.instance
8029 target_node = self.target_node
8030 source_node = self.source_node
8031 migration_info = self.migration_info
8033 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8037 abort_msg = abort_result.fail_msg
8039 logging.error("Aborting migration failed on target node %s: %s",
8040 target_node, abort_msg)
8041 # Don't raise an exception here, as we stil have to try to revert the
8042 # disk status, even if this step failed.
8044 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8045 instance, False, self.live)
8046 abort_msg = abort_result.fail_msg
8048 logging.error("Aborting migration failed on source node %s: %s",
8049 source_node, abort_msg)
8051 def _ExecMigration(self):
8052 """Migrate an instance.
8054 The migrate is done by:
8055 - change the disks into dual-master mode
8056 - wait until disks are fully synchronized again
8057 - migrate the instance
8058 - change disks on the new secondary node (the old primary) to secondary
8059 - wait until disks are fully synchronized
8060 - change disks into single-master mode
8063 instance = self.instance
8064 target_node = self.target_node
8065 source_node = self.source_node
8067 # Check for hypervisor version mismatch and warn the user.
8068 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8069 None, [self.instance.hypervisor])
8070 for ninfo in nodeinfo.values():
8071 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8073 (_, _, (src_info, )) = nodeinfo[source_node].payload
8074 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8076 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8077 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8078 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8079 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8080 if src_version != dst_version:
8081 self.feedback_fn("* warning: hypervisor version mismatch between"
8082 " source (%s) and target (%s) node" %
8083 (src_version, dst_version))
8085 self.feedback_fn("* checking disk consistency between source and target")
8086 for dev in instance.disks:
8087 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8088 raise errors.OpExecError("Disk %s is degraded or not fully"
8089 " synchronized on target node,"
8090 " aborting migration" % dev.iv_name)
8092 # First get the migration information from the remote node
8093 result = self.rpc.call_migration_info(source_node, instance)
8094 msg = result.fail_msg
8096 log_err = ("Failed fetching source migration information from %s: %s" %
8098 logging.error(log_err)
8099 raise errors.OpExecError(log_err)
8101 self.migration_info = migration_info = result.payload
8103 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8104 # Then switch the disks to master/master mode
8105 self._EnsureSecondary(target_node)
8106 self._GoStandalone()
8107 self._GoReconnect(True)
8108 self._WaitUntilSync()
8110 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8111 result = self.rpc.call_accept_instance(target_node,
8114 self.nodes_ip[target_node])
8116 msg = result.fail_msg
8118 logging.error("Instance pre-migration failed, trying to revert"
8119 " disk status: %s", msg)
8120 self.feedback_fn("Pre-migration failed, aborting")
8121 self._AbortMigration()
8122 self._RevertDiskStatus()
8123 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8124 (instance.name, msg))
8126 self.feedback_fn("* migrating instance to %s" % target_node)
8127 result = self.rpc.call_instance_migrate(source_node, instance,
8128 self.nodes_ip[target_node],
8130 msg = result.fail_msg
8132 logging.error("Instance migration failed, trying to revert"
8133 " disk status: %s", msg)
8134 self.feedback_fn("Migration failed, aborting")
8135 self._AbortMigration()
8136 self._RevertDiskStatus()
8137 raise errors.OpExecError("Could not migrate instance %s: %s" %
8138 (instance.name, msg))
8140 self.feedback_fn("* starting memory transfer")
8141 last_feedback = time.time()
8143 result = self.rpc.call_instance_get_migration_status(source_node,
8145 msg = result.fail_msg
8146 ms = result.payload # MigrationStatus instance
8147 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8148 logging.error("Instance migration failed, trying to revert"
8149 " disk status: %s", msg)
8150 self.feedback_fn("Migration failed, aborting")
8151 self._AbortMigration()
8152 self._RevertDiskStatus()
8153 raise errors.OpExecError("Could not migrate instance %s: %s" %
8154 (instance.name, msg))
8156 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8157 self.feedback_fn("* memory transfer complete")
8160 if (utils.TimeoutExpired(last_feedback,
8161 self._MIGRATION_FEEDBACK_INTERVAL) and
8162 ms.transferred_ram is not None):
8163 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8164 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8165 last_feedback = time.time()
8167 time.sleep(self._MIGRATION_POLL_INTERVAL)
8169 result = self.rpc.call_instance_finalize_migration_src(source_node,
8173 msg = result.fail_msg
8175 logging.error("Instance migration succeeded, but finalization failed"
8176 " on the source node: %s", msg)
8177 raise errors.OpExecError("Could not finalize instance migration: %s" %
8180 instance.primary_node = target_node
8182 # distribute new instance config to the other nodes
8183 self.cfg.Update(instance, self.feedback_fn)
8185 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8189 msg = result.fail_msg
8191 logging.error("Instance migration succeeded, but finalization failed"
8192 " on the target node: %s", msg)
8193 raise errors.OpExecError("Could not finalize instance migration: %s" %
8196 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8197 self._EnsureSecondary(source_node)
8198 self._WaitUntilSync()
8199 self._GoStandalone()
8200 self._GoReconnect(False)
8201 self._WaitUntilSync()
8203 self.feedback_fn("* done")
8205 def _ExecFailover(self):
8206 """Failover an instance.
8208 The failover is done by shutting it down on its present node and
8209 starting it on the secondary.
8212 instance = self.instance
8213 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8215 source_node = instance.primary_node
8216 target_node = self.target_node
8218 if instance.admin_state == constants.ADMINST_UP:
8219 self.feedback_fn("* checking disk consistency between source and target")
8220 for dev in instance.disks:
8221 # for drbd, these are drbd over lvm
8222 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8223 if primary_node.offline:
8224 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8226 (primary_node.name, dev.iv_name, target_node))
8227 elif not self.ignore_consistency:
8228 raise errors.OpExecError("Disk %s is degraded on target node,"
8229 " aborting failover" % dev.iv_name)
8231 self.feedback_fn("* not checking disk consistency as instance is not"
8234 self.feedback_fn("* shutting down instance on source node")
8235 logging.info("Shutting down instance %s on node %s",
8236 instance.name, source_node)
8238 result = self.rpc.call_instance_shutdown(source_node, instance,
8239 self.shutdown_timeout)
8240 msg = result.fail_msg
8242 if self.ignore_consistency or primary_node.offline:
8243 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8244 " proceeding anyway; please make sure node"
8245 " %s is down; error details: %s",
8246 instance.name, source_node, source_node, msg)
8248 raise errors.OpExecError("Could not shutdown instance %s on"
8250 (instance.name, source_node, msg))
8252 self.feedback_fn("* deactivating the instance's disks on source node")
8253 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8254 raise errors.OpExecError("Can't shut down the instance's disks")
8256 instance.primary_node = target_node
8257 # distribute new instance config to the other nodes
8258 self.cfg.Update(instance, self.feedback_fn)
8260 # Only start the instance if it's marked as up
8261 if instance.admin_state == constants.ADMINST_UP:
8262 self.feedback_fn("* activating the instance's disks on target node %s" %
8264 logging.info("Starting instance %s on node %s",
8265 instance.name, target_node)
8267 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8268 ignore_secondaries=True)
8270 _ShutdownInstanceDisks(self.lu, instance)
8271 raise errors.OpExecError("Can't activate the instance's disks")
8273 self.feedback_fn("* starting the instance on the target node %s" %
8275 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8277 msg = result.fail_msg
8279 _ShutdownInstanceDisks(self.lu, instance)
8280 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8281 (instance.name, target_node, msg))
8283 def Exec(self, feedback_fn):
8284 """Perform the migration.
8287 self.feedback_fn = feedback_fn
8288 self.source_node = self.instance.primary_node
8290 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8291 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8292 self.target_node = self.instance.secondary_nodes[0]
8293 # Otherwise self.target_node has been populated either
8294 # directly, or through an iallocator.
8296 self.all_nodes = [self.source_node, self.target_node]
8297 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8298 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8301 feedback_fn("Failover instance %s" % self.instance.name)
8302 self._ExecFailover()
8304 feedback_fn("Migrating instance %s" % self.instance.name)
8307 return self._ExecCleanup()
8309 return self._ExecMigration()
8312 def _CreateBlockDev(lu, node, instance, device, force_create,
8314 """Create a tree of block devices on a given node.
8316 If this device type has to be created on secondaries, create it and
8319 If not, just recurse to children keeping the same 'force' value.
8321 @param lu: the lu on whose behalf we execute
8322 @param node: the node on which to create the device
8323 @type instance: L{objects.Instance}
8324 @param instance: the instance which owns the device
8325 @type device: L{objects.Disk}
8326 @param device: the device to create
8327 @type force_create: boolean
8328 @param force_create: whether to force creation of this device; this
8329 will be change to True whenever we find a device which has
8330 CreateOnSecondary() attribute
8331 @param info: the extra 'metadata' we should attach to the device
8332 (this will be represented as a LVM tag)
8333 @type force_open: boolean
8334 @param force_open: this parameter will be passes to the
8335 L{backend.BlockdevCreate} function where it specifies
8336 whether we run on primary or not, and it affects both
8337 the child assembly and the device own Open() execution
8340 if device.CreateOnSecondary():
8344 for child in device.children:
8345 _CreateBlockDev(lu, node, instance, child, force_create,
8348 if not force_create:
8351 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8354 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8355 """Create a single block device on a given node.
8357 This will not recurse over children of the device, so they must be
8360 @param lu: the lu on whose behalf we execute
8361 @param node: the node on which to create the device
8362 @type instance: L{objects.Instance}
8363 @param instance: the instance which owns the device
8364 @type device: L{objects.Disk}
8365 @param device: the device to create
8366 @param info: the extra 'metadata' we should attach to the device
8367 (this will be represented as a LVM tag)
8368 @type force_open: boolean
8369 @param force_open: this parameter will be passes to the
8370 L{backend.BlockdevCreate} function where it specifies
8371 whether we run on primary or not, and it affects both
8372 the child assembly and the device own Open() execution
8375 lu.cfg.SetDiskID(device, node)
8376 result = lu.rpc.call_blockdev_create(node, device, device.size,
8377 instance.name, force_open, info)
8378 result.Raise("Can't create block device %s on"
8379 " node %s for instance %s" % (device, node, instance.name))
8380 if device.physical_id is None:
8381 device.physical_id = result.payload
8384 def _GenerateUniqueNames(lu, exts):
8385 """Generate a suitable LV name.
8387 This will generate a logical volume name for the given instance.
8392 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8393 results.append("%s%s" % (new_id, val))
8397 def _ComputeLDParams(disk_template, disk_params):
8398 """Computes Logical Disk parameters from Disk Template parameters.
8400 @type disk_template: string
8401 @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8402 @type disk_params: dict
8403 @param disk_params: disk template parameters; dict(template_name -> parameters
8405 @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8406 contains the LD parameters of the node. The tree is flattened in-order.
8409 if disk_template not in constants.DISK_TEMPLATES:
8410 raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8413 dt_params = disk_params[disk_template]
8414 if disk_template == constants.DT_DRBD8:
8416 constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8417 constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8418 constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8419 constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8420 constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8421 constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8422 constants.LDP_DYNAMIC_RESYNC: dt_params[constants.DRBD_DYNAMIC_RESYNC],
8423 constants.LDP_PLAN_AHEAD: dt_params[constants.DRBD_PLAN_AHEAD],
8424 constants.LDP_FILL_TARGET: dt_params[constants.DRBD_FILL_TARGET],
8425 constants.LDP_DELAY_TARGET: dt_params[constants.DRBD_DELAY_TARGET],
8426 constants.LDP_MAX_RATE: dt_params[constants.DRBD_MAX_RATE],
8427 constants.LDP_MIN_RATE: dt_params[constants.DRBD_MIN_RATE],
8431 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8434 result.append(drbd_params)
8438 constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8441 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8443 result.append(data_params)
8447 constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8450 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8452 result.append(meta_params)
8454 elif (disk_template == constants.DT_FILE or
8455 disk_template == constants.DT_SHARED_FILE):
8456 result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8458 elif disk_template == constants.DT_PLAIN:
8460 constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8463 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8465 result.append(params)
8467 elif disk_template == constants.DT_BLOCK:
8468 result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8473 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8474 iv_name, p_minor, s_minor, drbd_params, data_params,
8476 """Generate a drbd8 device complete with its children.
8479 assert len(vgnames) == len(names) == 2
8480 port = lu.cfg.AllocatePort()
8481 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8483 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8484 logical_id=(vgnames[0], names[0]),
8486 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8487 logical_id=(vgnames[1], names[1]),
8489 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8490 logical_id=(primary, secondary, port,
8493 children=[dev_data, dev_meta],
8494 iv_name=iv_name, params=drbd_params)
8498 def _GenerateDiskTemplate(lu, template_name,
8499 instance_name, primary_node,
8500 secondary_nodes, disk_info,
8501 file_storage_dir, file_driver,
8502 base_index, feedback_fn, disk_params):
8503 """Generate the entire disk layout for a given template type.
8506 #TODO: compute space requirements
8508 vgname = lu.cfg.GetVGName()
8509 disk_count = len(disk_info)
8511 ld_params = _ComputeLDParams(template_name, disk_params)
8512 if template_name == constants.DT_DISKLESS:
8514 elif template_name == constants.DT_PLAIN:
8516 raise errors.ProgrammerError("Wrong template configuration")
8518 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8519 for i in range(disk_count)])
8520 for idx, disk in enumerate(disk_info):
8521 disk_index = idx + base_index
8522 vg = disk.get(constants.IDISK_VG, vgname)
8523 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8524 disk_dev = objects.Disk(dev_type=constants.LD_LV,
8525 size=disk[constants.IDISK_SIZE],
8526 logical_id=(vg, names[idx]),
8527 iv_name="disk/%d" % disk_index,
8528 mode=disk[constants.IDISK_MODE],
8529 params=ld_params[0])
8530 disks.append(disk_dev)
8531 elif template_name == constants.DT_DRBD8:
8532 drbd_params, data_params, meta_params = ld_params
8533 if len(secondary_nodes) != 1:
8534 raise errors.ProgrammerError("Wrong template configuration")
8535 remote_node = secondary_nodes[0]
8536 minors = lu.cfg.AllocateDRBDMinor(
8537 [primary_node, remote_node] * len(disk_info), instance_name)
8540 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8541 for i in range(disk_count)]):
8542 names.append(lv_prefix + "_data")
8543 names.append(lv_prefix + "_meta")
8544 for idx, disk in enumerate(disk_info):
8545 disk_index = idx + base_index
8546 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8547 data_vg = disk.get(constants.IDISK_VG, vgname)
8548 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8549 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8550 disk[constants.IDISK_SIZE],
8552 names[idx * 2:idx * 2 + 2],
8553 "disk/%d" % disk_index,
8554 minors[idx * 2], minors[idx * 2 + 1],
8555 drbd_params, data_params, meta_params)
8556 disk_dev.mode = disk[constants.IDISK_MODE]
8557 disks.append(disk_dev)
8558 elif template_name == constants.DT_FILE:
8560 raise errors.ProgrammerError("Wrong template configuration")
8562 opcodes.RequireFileStorage()
8564 for idx, disk in enumerate(disk_info):
8565 disk_index = idx + base_index
8566 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8567 size=disk[constants.IDISK_SIZE],
8568 iv_name="disk/%d" % disk_index,
8569 logical_id=(file_driver,
8570 "%s/disk%d" % (file_storage_dir,
8572 mode=disk[constants.IDISK_MODE],
8573 params=ld_params[0])
8574 disks.append(disk_dev)
8575 elif template_name == constants.DT_SHARED_FILE:
8577 raise errors.ProgrammerError("Wrong template configuration")
8579 opcodes.RequireSharedFileStorage()
8581 for idx, disk in enumerate(disk_info):
8582 disk_index = idx + base_index
8583 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8584 size=disk[constants.IDISK_SIZE],
8585 iv_name="disk/%d" % disk_index,
8586 logical_id=(file_driver,
8587 "%s/disk%d" % (file_storage_dir,
8589 mode=disk[constants.IDISK_MODE],
8590 params=ld_params[0])
8591 disks.append(disk_dev)
8592 elif template_name == constants.DT_BLOCK:
8594 raise errors.ProgrammerError("Wrong template configuration")
8596 for idx, disk in enumerate(disk_info):
8597 disk_index = idx + base_index
8598 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8599 size=disk[constants.IDISK_SIZE],
8600 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8601 disk[constants.IDISK_ADOPT]),
8602 iv_name="disk/%d" % disk_index,
8603 mode=disk[constants.IDISK_MODE],
8604 params=ld_params[0])
8605 disks.append(disk_dev)
8608 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8612 def _GetInstanceInfoText(instance):
8613 """Compute that text that should be added to the disk's metadata.
8616 return "originstname+%s" % instance.name
8619 def _CalcEta(time_taken, written, total_size):
8620 """Calculates the ETA based on size written and total size.
8622 @param time_taken: The time taken so far
8623 @param written: amount written so far
8624 @param total_size: The total size of data to be written
8625 @return: The remaining time in seconds
8628 avg_time = time_taken / float(written)
8629 return (total_size - written) * avg_time
8632 def _WipeDisks(lu, instance):
8633 """Wipes instance disks.
8635 @type lu: L{LogicalUnit}
8636 @param lu: the logical unit on whose behalf we execute
8637 @type instance: L{objects.Instance}
8638 @param instance: the instance whose disks we should create
8639 @return: the success of the wipe
8642 node = instance.primary_node
8644 for device in instance.disks:
8645 lu.cfg.SetDiskID(device, node)
8647 logging.info("Pause sync of instance %s disks", instance.name)
8648 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8650 for idx, success in enumerate(result.payload):
8652 logging.warn("pause-sync of instance %s for disks %d failed",
8656 for idx, device in enumerate(instance.disks):
8657 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8658 # MAX_WIPE_CHUNK at max
8659 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8660 constants.MIN_WIPE_CHUNK_PERCENT)
8661 # we _must_ make this an int, otherwise rounding errors will
8663 wipe_chunk_size = int(wipe_chunk_size)
8665 lu.LogInfo("* Wiping disk %d", idx)
8666 logging.info("Wiping disk %d for instance %s, node %s using"
8667 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8672 start_time = time.time()
8674 while offset < size:
8675 wipe_size = min(wipe_chunk_size, size - offset)
8676 logging.debug("Wiping disk %d, offset %s, chunk %s",
8677 idx, offset, wipe_size)
8678 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8679 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8680 (idx, offset, wipe_size))
8683 if now - last_output >= 60:
8684 eta = _CalcEta(now - start_time, offset, size)
8685 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8686 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8689 logging.info("Resume sync of instance %s disks", instance.name)
8691 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8693 for idx, success in enumerate(result.payload):
8695 lu.LogWarning("Resume sync of disk %d failed, please have a"
8696 " look at the status and troubleshoot the issue", idx)
8697 logging.warn("resume-sync of instance %s for disks %d failed",
8701 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8702 """Create all disks for an instance.
8704 This abstracts away some work from AddInstance.
8706 @type lu: L{LogicalUnit}
8707 @param lu: the logical unit on whose behalf we execute
8708 @type instance: L{objects.Instance}
8709 @param instance: the instance whose disks we should create
8711 @param to_skip: list of indices to skip
8712 @type target_node: string
8713 @param target_node: if passed, overrides the target node for creation
8715 @return: the success of the creation
8718 info = _GetInstanceInfoText(instance)
8719 if target_node is None:
8720 pnode = instance.primary_node
8721 all_nodes = instance.all_nodes
8726 if instance.disk_template in constants.DTS_FILEBASED:
8727 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8728 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8730 result.Raise("Failed to create directory '%s' on"
8731 " node %s" % (file_storage_dir, pnode))
8733 # Note: this needs to be kept in sync with adding of disks in
8734 # LUInstanceSetParams
8735 for idx, device in enumerate(instance.disks):
8736 if to_skip and idx in to_skip:
8738 logging.info("Creating volume %s for instance %s",
8739 device.iv_name, instance.name)
8741 for node in all_nodes:
8742 f_create = node == pnode
8743 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8746 def _RemoveDisks(lu, instance, target_node=None):
8747 """Remove all disks for an instance.
8749 This abstracts away some work from `AddInstance()` and
8750 `RemoveInstance()`. Note that in case some of the devices couldn't
8751 be removed, the removal will continue with the other ones (compare
8752 with `_CreateDisks()`).
8754 @type lu: L{LogicalUnit}
8755 @param lu: the logical unit on whose behalf we execute
8756 @type instance: L{objects.Instance}
8757 @param instance: the instance whose disks we should remove
8758 @type target_node: string
8759 @param target_node: used to override the node on which to remove the disks
8761 @return: the success of the removal
8764 logging.info("Removing block devices for instance %s", instance.name)
8767 for device in instance.disks:
8769 edata = [(target_node, device)]
8771 edata = device.ComputeNodeTree(instance.primary_node)
8772 for node, disk in edata:
8773 lu.cfg.SetDiskID(disk, node)
8774 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8776 lu.LogWarning("Could not remove block device %s on node %s,"
8777 " continuing anyway: %s", device.iv_name, node, msg)
8780 # if this is a DRBD disk, return its port to the pool
8781 if device.dev_type in constants.LDS_DRBD:
8782 tcp_port = device.logical_id[2]
8783 lu.cfg.AddTcpUdpPort(tcp_port)
8785 if instance.disk_template == constants.DT_FILE:
8786 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8790 tgt = instance.primary_node
8791 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8793 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8794 file_storage_dir, instance.primary_node, result.fail_msg)
8800 def _ComputeDiskSizePerVG(disk_template, disks):
8801 """Compute disk size requirements in the volume group
8804 def _compute(disks, payload):
8805 """Universal algorithm.
8810 vgs[disk[constants.IDISK_VG]] = \
8811 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8815 # Required free disk space as a function of disk and swap space
8817 constants.DT_DISKLESS: {},
8818 constants.DT_PLAIN: _compute(disks, 0),
8819 # 128 MB are added for drbd metadata for each disk
8820 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8821 constants.DT_FILE: {},
8822 constants.DT_SHARED_FILE: {},
8825 if disk_template not in req_size_dict:
8826 raise errors.ProgrammerError("Disk template '%s' size requirement"
8827 " is unknown" % disk_template)
8829 return req_size_dict[disk_template]
8832 def _ComputeDiskSize(disk_template, disks):
8833 """Compute disk size requirements in the volume group
8836 # Required free disk space as a function of disk and swap space
8838 constants.DT_DISKLESS: None,
8839 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8840 # 128 MB are added for drbd metadata for each disk
8842 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8843 constants.DT_FILE: None,
8844 constants.DT_SHARED_FILE: 0,
8845 constants.DT_BLOCK: 0,
8848 if disk_template not in req_size_dict:
8849 raise errors.ProgrammerError("Disk template '%s' size requirement"
8850 " is unknown" % disk_template)
8852 return req_size_dict[disk_template]
8855 def _FilterVmNodes(lu, nodenames):
8856 """Filters out non-vm_capable nodes from a list.
8858 @type lu: L{LogicalUnit}
8859 @param lu: the logical unit for which we check
8860 @type nodenames: list
8861 @param nodenames: the list of nodes on which we should check
8863 @return: the list of vm-capable nodes
8866 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8867 return [name for name in nodenames if name not in vm_nodes]
8870 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8871 """Hypervisor parameter validation.
8873 This function abstract the hypervisor parameter validation to be
8874 used in both instance create and instance modify.
8876 @type lu: L{LogicalUnit}
8877 @param lu: the logical unit for which we check
8878 @type nodenames: list
8879 @param nodenames: the list of nodes on which we should check
8880 @type hvname: string
8881 @param hvname: the name of the hypervisor we should use
8882 @type hvparams: dict
8883 @param hvparams: the parameters which we need to check
8884 @raise errors.OpPrereqError: if the parameters are not valid
8887 nodenames = _FilterVmNodes(lu, nodenames)
8889 cluster = lu.cfg.GetClusterInfo()
8890 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8892 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8893 for node in nodenames:
8897 info.Raise("Hypervisor parameter validation failed on node %s" % node)
8900 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8901 """OS parameters validation.
8903 @type lu: L{LogicalUnit}
8904 @param lu: the logical unit for which we check
8905 @type required: boolean
8906 @param required: whether the validation should fail if the OS is not
8908 @type nodenames: list
8909 @param nodenames: the list of nodes on which we should check
8910 @type osname: string
8911 @param osname: the name of the hypervisor we should use
8912 @type osparams: dict
8913 @param osparams: the parameters which we need to check
8914 @raise errors.OpPrereqError: if the parameters are not valid
8917 nodenames = _FilterVmNodes(lu, nodenames)
8918 result = lu.rpc.call_os_validate(nodenames, required, osname,
8919 [constants.OS_VALIDATE_PARAMETERS],
8921 for node, nres in result.items():
8922 # we don't check for offline cases since this should be run only
8923 # against the master node and/or an instance's nodes
8924 nres.Raise("OS Parameters validation failed on node %s" % node)
8925 if not nres.payload:
8926 lu.LogInfo("OS %s not found on node %s, validation skipped",
8930 class LUInstanceCreate(LogicalUnit):
8931 """Create an instance.
8934 HPATH = "instance-add"
8935 HTYPE = constants.HTYPE_INSTANCE
8938 def CheckArguments(self):
8942 # do not require name_check to ease forward/backward compatibility
8944 if self.op.no_install and self.op.start:
8945 self.LogInfo("No-installation mode selected, disabling startup")
8946 self.op.start = False
8947 # validate/normalize the instance name
8948 self.op.instance_name = \
8949 netutils.Hostname.GetNormalizedName(self.op.instance_name)
8951 if self.op.ip_check and not self.op.name_check:
8952 # TODO: make the ip check more flexible and not depend on the name check
8953 raise errors.OpPrereqError("Cannot do IP address check without a name"
8954 " check", errors.ECODE_INVAL)
8956 # check nics' parameter names
8957 for nic in self.op.nics:
8958 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8960 # check disks. parameter names and consistent adopt/no-adopt strategy
8961 has_adopt = has_no_adopt = False
8962 for disk in self.op.disks:
8963 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8964 if constants.IDISK_ADOPT in disk:
8968 if has_adopt and has_no_adopt:
8969 raise errors.OpPrereqError("Either all disks are adopted or none is",
8972 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8973 raise errors.OpPrereqError("Disk adoption is not supported for the"
8974 " '%s' disk template" %
8975 self.op.disk_template,
8977 if self.op.iallocator is not None:
8978 raise errors.OpPrereqError("Disk adoption not allowed with an"
8979 " iallocator script", errors.ECODE_INVAL)
8980 if self.op.mode == constants.INSTANCE_IMPORT:
8981 raise errors.OpPrereqError("Disk adoption not allowed for"
8982 " instance import", errors.ECODE_INVAL)
8984 if self.op.disk_template in constants.DTS_MUST_ADOPT:
8985 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8986 " but no 'adopt' parameter given" %
8987 self.op.disk_template,
8990 self.adopt_disks = has_adopt
8992 # instance name verification
8993 if self.op.name_check:
8994 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8995 self.op.instance_name = self.hostname1.name
8996 # used in CheckPrereq for ip ping check
8997 self.check_ip = self.hostname1.ip
8999 self.check_ip = None
9001 # file storage checks
9002 if (self.op.file_driver and
9003 not self.op.file_driver in constants.FILE_DRIVER):
9004 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9005 self.op.file_driver, errors.ECODE_INVAL)
9007 if self.op.disk_template == constants.DT_FILE:
9008 opcodes.RequireFileStorage()
9009 elif self.op.disk_template == constants.DT_SHARED_FILE:
9010 opcodes.RequireSharedFileStorage()
9012 ### Node/iallocator related checks
9013 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9015 if self.op.pnode is not None:
9016 if self.op.disk_template in constants.DTS_INT_MIRROR:
9017 if self.op.snode is None:
9018 raise errors.OpPrereqError("The networked disk templates need"
9019 " a mirror node", errors.ECODE_INVAL)
9021 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9023 self.op.snode = None
9025 self._cds = _GetClusterDomainSecret()
9027 if self.op.mode == constants.INSTANCE_IMPORT:
9028 # On import force_variant must be True, because if we forced it at
9029 # initial install, our only chance when importing it back is that it
9031 self.op.force_variant = True
9033 if self.op.no_install:
9034 self.LogInfo("No-installation mode has no effect during import")
9036 elif self.op.mode == constants.INSTANCE_CREATE:
9037 if self.op.os_type is None:
9038 raise errors.OpPrereqError("No guest OS specified",
9040 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9041 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9042 " installation" % self.op.os_type,
9044 if self.op.disk_template is None:
9045 raise errors.OpPrereqError("No disk template specified",
9048 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9049 # Check handshake to ensure both clusters have the same domain secret
9050 src_handshake = self.op.source_handshake
9051 if not src_handshake:
9052 raise errors.OpPrereqError("Missing source handshake",
9055 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9058 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9061 # Load and check source CA
9062 self.source_x509_ca_pem = self.op.source_x509_ca
9063 if not self.source_x509_ca_pem:
9064 raise errors.OpPrereqError("Missing source X509 CA",
9068 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9070 except OpenSSL.crypto.Error, err:
9071 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9072 (err, ), errors.ECODE_INVAL)
9074 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9075 if errcode is not None:
9076 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9079 self.source_x509_ca = cert
9081 src_instance_name = self.op.source_instance_name
9082 if not src_instance_name:
9083 raise errors.OpPrereqError("Missing source instance name",
9086 self.source_instance_name = \
9087 netutils.GetHostname(name=src_instance_name).name
9090 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9091 self.op.mode, errors.ECODE_INVAL)
9093 def ExpandNames(self):
9094 """ExpandNames for CreateInstance.
9096 Figure out the right locks for instance creation.
9099 self.needed_locks = {}
9101 instance_name = self.op.instance_name
9102 # this is just a preventive check, but someone might still add this
9103 # instance in the meantime, and creation will fail at lock-add time
9104 if instance_name in self.cfg.GetInstanceList():
9105 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9106 instance_name, errors.ECODE_EXISTS)
9108 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9110 if self.op.iallocator:
9111 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9112 # specifying a group on instance creation and then selecting nodes from
9114 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9115 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9117 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9118 nodelist = [self.op.pnode]
9119 if self.op.snode is not None:
9120 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9121 nodelist.append(self.op.snode)
9122 self.needed_locks[locking.LEVEL_NODE] = nodelist
9123 # Lock resources of instance's primary and secondary nodes (copy to
9124 # prevent accidential modification)
9125 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9127 # in case of import lock the source node too
9128 if self.op.mode == constants.INSTANCE_IMPORT:
9129 src_node = self.op.src_node
9130 src_path = self.op.src_path
9132 if src_path is None:
9133 self.op.src_path = src_path = self.op.instance_name
9135 if src_node is None:
9136 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9137 self.op.src_node = None
9138 if os.path.isabs(src_path):
9139 raise errors.OpPrereqError("Importing an instance from a path"
9140 " requires a source node option",
9143 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9144 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9145 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9146 if not os.path.isabs(src_path):
9147 self.op.src_path = src_path = \
9148 utils.PathJoin(constants.EXPORT_DIR, src_path)
9150 def _RunAllocator(self):
9151 """Run the allocator based on input opcode.
9154 nics = [n.ToDict() for n in self.nics]
9155 ial = IAllocator(self.cfg, self.rpc,
9156 mode=constants.IALLOCATOR_MODE_ALLOC,
9157 name=self.op.instance_name,
9158 disk_template=self.op.disk_template,
9161 vcpus=self.be_full[constants.BE_VCPUS],
9162 memory=self.be_full[constants.BE_MAXMEM],
9165 hypervisor=self.op.hypervisor,
9168 ial.Run(self.op.iallocator)
9171 raise errors.OpPrereqError("Can't compute nodes using"
9172 " iallocator '%s': %s" %
9173 (self.op.iallocator, ial.info),
9175 if len(ial.result) != ial.required_nodes:
9176 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9177 " of nodes (%s), required %s" %
9178 (self.op.iallocator, len(ial.result),
9179 ial.required_nodes), errors.ECODE_FAULT)
9180 self.op.pnode = ial.result[0]
9181 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9182 self.op.instance_name, self.op.iallocator,
9183 utils.CommaJoin(ial.result))
9184 if ial.required_nodes == 2:
9185 self.op.snode = ial.result[1]
9187 def BuildHooksEnv(self):
9190 This runs on master, primary and secondary nodes of the instance.
9194 "ADD_MODE": self.op.mode,
9196 if self.op.mode == constants.INSTANCE_IMPORT:
9197 env["SRC_NODE"] = self.op.src_node
9198 env["SRC_PATH"] = self.op.src_path
9199 env["SRC_IMAGES"] = self.src_images
9201 env.update(_BuildInstanceHookEnv(
9202 name=self.op.instance_name,
9203 primary_node=self.op.pnode,
9204 secondary_nodes=self.secondaries,
9205 status=self.op.start,
9206 os_type=self.op.os_type,
9207 minmem=self.be_full[constants.BE_MINMEM],
9208 maxmem=self.be_full[constants.BE_MAXMEM],
9209 vcpus=self.be_full[constants.BE_VCPUS],
9210 nics=_NICListToTuple(self, self.nics),
9211 disk_template=self.op.disk_template,
9212 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9213 for d in self.disks],
9216 hypervisor_name=self.op.hypervisor,
9222 def BuildHooksNodes(self):
9223 """Build hooks nodes.
9226 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9229 def _ReadExportInfo(self):
9230 """Reads the export information from disk.
9232 It will override the opcode source node and path with the actual
9233 information, if these two were not specified before.
9235 @return: the export information
9238 assert self.op.mode == constants.INSTANCE_IMPORT
9240 src_node = self.op.src_node
9241 src_path = self.op.src_path
9243 if src_node is None:
9244 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9245 exp_list = self.rpc.call_export_list(locked_nodes)
9247 for node in exp_list:
9248 if exp_list[node].fail_msg:
9250 if src_path in exp_list[node].payload:
9252 self.op.src_node = src_node = node
9253 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9257 raise errors.OpPrereqError("No export found for relative path %s" %
9258 src_path, errors.ECODE_INVAL)
9260 _CheckNodeOnline(self, src_node)
9261 result = self.rpc.call_export_info(src_node, src_path)
9262 result.Raise("No export or invalid export found in dir %s" % src_path)
9264 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9265 if not export_info.has_section(constants.INISECT_EXP):
9266 raise errors.ProgrammerError("Corrupted export config",
9267 errors.ECODE_ENVIRON)
9269 ei_version = export_info.get(constants.INISECT_EXP, "version")
9270 if (int(ei_version) != constants.EXPORT_VERSION):
9271 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9272 (ei_version, constants.EXPORT_VERSION),
9273 errors.ECODE_ENVIRON)
9276 def _ReadExportParams(self, einfo):
9277 """Use export parameters as defaults.
9279 In case the opcode doesn't specify (as in override) some instance
9280 parameters, then try to use them from the export information, if
9284 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9286 if self.op.disk_template is None:
9287 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9288 self.op.disk_template = einfo.get(constants.INISECT_INS,
9290 if self.op.disk_template not in constants.DISK_TEMPLATES:
9291 raise errors.OpPrereqError("Disk template specified in configuration"
9292 " file is not one of the allowed values:"
9293 " %s" % " ".join(constants.DISK_TEMPLATES))
9295 raise errors.OpPrereqError("No disk template specified and the export"
9296 " is missing the disk_template information",
9299 if not self.op.disks:
9301 # TODO: import the disk iv_name too
9302 for idx in range(constants.MAX_DISKS):
9303 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9304 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9305 disks.append({constants.IDISK_SIZE: disk_sz})
9306 self.op.disks = disks
9307 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9308 raise errors.OpPrereqError("No disk info specified and the export"
9309 " is missing the disk information",
9312 if not self.op.nics:
9314 for idx in range(constants.MAX_NICS):
9315 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9317 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9318 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9325 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9326 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9328 if (self.op.hypervisor is None and
9329 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9330 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9332 if einfo.has_section(constants.INISECT_HYP):
9333 # use the export parameters but do not override the ones
9334 # specified by the user
9335 for name, value in einfo.items(constants.INISECT_HYP):
9336 if name not in self.op.hvparams:
9337 self.op.hvparams[name] = value
9339 if einfo.has_section(constants.INISECT_BEP):
9340 # use the parameters, without overriding
9341 for name, value in einfo.items(constants.INISECT_BEP):
9342 if name not in self.op.beparams:
9343 self.op.beparams[name] = value
9344 # Compatibility for the old "memory" be param
9345 if name == constants.BE_MEMORY:
9346 if constants.BE_MAXMEM not in self.op.beparams:
9347 self.op.beparams[constants.BE_MAXMEM] = value
9348 if constants.BE_MINMEM not in self.op.beparams:
9349 self.op.beparams[constants.BE_MINMEM] = value
9351 # try to read the parameters old style, from the main section
9352 for name in constants.BES_PARAMETERS:
9353 if (name not in self.op.beparams and
9354 einfo.has_option(constants.INISECT_INS, name)):
9355 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9357 if einfo.has_section(constants.INISECT_OSP):
9358 # use the parameters, without overriding
9359 for name, value in einfo.items(constants.INISECT_OSP):
9360 if name not in self.op.osparams:
9361 self.op.osparams[name] = value
9363 def _RevertToDefaults(self, cluster):
9364 """Revert the instance parameters to the default values.
9368 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9369 for name in self.op.hvparams.keys():
9370 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9371 del self.op.hvparams[name]
9373 be_defs = cluster.SimpleFillBE({})
9374 for name in self.op.beparams.keys():
9375 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9376 del self.op.beparams[name]
9378 nic_defs = cluster.SimpleFillNIC({})
9379 for nic in self.op.nics:
9380 for name in constants.NICS_PARAMETERS:
9381 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9384 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9385 for name in self.op.osparams.keys():
9386 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9387 del self.op.osparams[name]
9389 def _CalculateFileStorageDir(self):
9390 """Calculate final instance file storage dir.
9393 # file storage dir calculation/check
9394 self.instance_file_storage_dir = None
9395 if self.op.disk_template in constants.DTS_FILEBASED:
9396 # build the full file storage dir path
9399 if self.op.disk_template == constants.DT_SHARED_FILE:
9400 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9402 get_fsd_fn = self.cfg.GetFileStorageDir
9404 cfg_storagedir = get_fsd_fn()
9405 if not cfg_storagedir:
9406 raise errors.OpPrereqError("Cluster file storage dir not defined")
9407 joinargs.append(cfg_storagedir)
9409 if self.op.file_storage_dir is not None:
9410 joinargs.append(self.op.file_storage_dir)
9412 joinargs.append(self.op.instance_name)
9414 # pylint: disable=W0142
9415 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9417 def CheckPrereq(self): # pylint: disable=R0914
9418 """Check prerequisites.
9421 self._CalculateFileStorageDir()
9423 if self.op.mode == constants.INSTANCE_IMPORT:
9424 export_info = self._ReadExportInfo()
9425 self._ReadExportParams(export_info)
9427 if (not self.cfg.GetVGName() and
9428 self.op.disk_template not in constants.DTS_NOT_LVM):
9429 raise errors.OpPrereqError("Cluster does not support lvm-based"
9430 " instances", errors.ECODE_STATE)
9432 if (self.op.hypervisor is None or
9433 self.op.hypervisor == constants.VALUE_AUTO):
9434 self.op.hypervisor = self.cfg.GetHypervisorType()
9436 cluster = self.cfg.GetClusterInfo()
9437 enabled_hvs = cluster.enabled_hypervisors
9438 if self.op.hypervisor not in enabled_hvs:
9439 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9440 " cluster (%s)" % (self.op.hypervisor,
9441 ",".join(enabled_hvs)),
9444 # Check tag validity
9445 for tag in self.op.tags:
9446 objects.TaggableObject.ValidateTag(tag)
9448 # check hypervisor parameter syntax (locally)
9449 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9450 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9452 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9453 hv_type.CheckParameterSyntax(filled_hvp)
9454 self.hv_full = filled_hvp
9455 # check that we don't specify global parameters on an instance
9456 _CheckGlobalHvParams(self.op.hvparams)
9458 # fill and remember the beparams dict
9459 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9460 for param, value in self.op.beparams.iteritems():
9461 if value == constants.VALUE_AUTO:
9462 self.op.beparams[param] = default_beparams[param]
9463 objects.UpgradeBeParams(self.op.beparams)
9464 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9465 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9467 # build os parameters
9468 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9470 # now that hvp/bep are in final format, let's reset to defaults,
9472 if self.op.identify_defaults:
9473 self._RevertToDefaults(cluster)
9477 for idx, nic in enumerate(self.op.nics):
9478 nic_mode_req = nic.get(constants.INIC_MODE, None)
9479 nic_mode = nic_mode_req
9480 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9481 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9483 # in routed mode, for the first nic, the default ip is 'auto'
9484 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9485 default_ip_mode = constants.VALUE_AUTO
9487 default_ip_mode = constants.VALUE_NONE
9489 # ip validity checks
9490 ip = nic.get(constants.INIC_IP, default_ip_mode)
9491 if ip is None or ip.lower() == constants.VALUE_NONE:
9493 elif ip.lower() == constants.VALUE_AUTO:
9494 if not self.op.name_check:
9495 raise errors.OpPrereqError("IP address set to auto but name checks"
9496 " have been skipped",
9498 nic_ip = self.hostname1.ip
9500 if not netutils.IPAddress.IsValid(ip):
9501 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9505 # TODO: check the ip address for uniqueness
9506 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9507 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9510 # MAC address verification
9511 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9512 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9513 mac = utils.NormalizeAndValidateMac(mac)
9516 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9517 except errors.ReservationError:
9518 raise errors.OpPrereqError("MAC address %s already in use"
9519 " in cluster" % mac,
9520 errors.ECODE_NOTUNIQUE)
9522 # Build nic parameters
9523 link = nic.get(constants.INIC_LINK, None)
9524 if link == constants.VALUE_AUTO:
9525 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9528 nicparams[constants.NIC_MODE] = nic_mode
9530 nicparams[constants.NIC_LINK] = link
9532 check_params = cluster.SimpleFillNIC(nicparams)
9533 objects.NIC.CheckParameterSyntax(check_params)
9534 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9536 # disk checks/pre-build
9537 default_vg = self.cfg.GetVGName()
9539 for disk in self.op.disks:
9540 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9541 if mode not in constants.DISK_ACCESS_SET:
9542 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9543 mode, errors.ECODE_INVAL)
9544 size = disk.get(constants.IDISK_SIZE, None)
9546 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9549 except (TypeError, ValueError):
9550 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9553 data_vg = disk.get(constants.IDISK_VG, default_vg)
9555 constants.IDISK_SIZE: size,
9556 constants.IDISK_MODE: mode,
9557 constants.IDISK_VG: data_vg,
9559 if constants.IDISK_METAVG in disk:
9560 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9561 if constants.IDISK_ADOPT in disk:
9562 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9563 self.disks.append(new_disk)
9565 if self.op.mode == constants.INSTANCE_IMPORT:
9567 for idx in range(len(self.disks)):
9568 option = "disk%d_dump" % idx
9569 if export_info.has_option(constants.INISECT_INS, option):
9570 # FIXME: are the old os-es, disk sizes, etc. useful?
9571 export_name = export_info.get(constants.INISECT_INS, option)
9572 image = utils.PathJoin(self.op.src_path, export_name)
9573 disk_images.append(image)
9575 disk_images.append(False)
9577 self.src_images = disk_images
9579 old_name = export_info.get(constants.INISECT_INS, "name")
9580 if self.op.instance_name == old_name:
9581 for idx, nic in enumerate(self.nics):
9582 if nic.mac == constants.VALUE_AUTO:
9583 nic_mac_ini = "nic%d_mac" % idx
9584 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9586 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9588 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9589 if self.op.ip_check:
9590 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9591 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9592 (self.check_ip, self.op.instance_name),
9593 errors.ECODE_NOTUNIQUE)
9595 #### mac address generation
9596 # By generating here the mac address both the allocator and the hooks get
9597 # the real final mac address rather than the 'auto' or 'generate' value.
9598 # There is a race condition between the generation and the instance object
9599 # creation, which means that we know the mac is valid now, but we're not
9600 # sure it will be when we actually add the instance. If things go bad
9601 # adding the instance will abort because of a duplicate mac, and the
9602 # creation job will fail.
9603 for nic in self.nics:
9604 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9605 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9609 if self.op.iallocator is not None:
9610 self._RunAllocator()
9612 # Release all unneeded node locks
9613 _ReleaseLocks(self, locking.LEVEL_NODE,
9614 keep=filter(None, [self.op.pnode, self.op.snode,
9616 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9617 keep=filter(None, [self.op.pnode, self.op.snode,
9620 #### node related checks
9622 # check primary node
9623 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9624 assert self.pnode is not None, \
9625 "Cannot retrieve locked node %s" % self.op.pnode
9627 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9628 pnode.name, errors.ECODE_STATE)
9630 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9631 pnode.name, errors.ECODE_STATE)
9632 if not pnode.vm_capable:
9633 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9634 " '%s'" % pnode.name, errors.ECODE_STATE)
9636 self.secondaries = []
9638 # mirror node verification
9639 if self.op.disk_template in constants.DTS_INT_MIRROR:
9640 if self.op.snode == pnode.name:
9641 raise errors.OpPrereqError("The secondary node cannot be the"
9642 " primary node", errors.ECODE_INVAL)
9643 _CheckNodeOnline(self, self.op.snode)
9644 _CheckNodeNotDrained(self, self.op.snode)
9645 _CheckNodeVmCapable(self, self.op.snode)
9646 self.secondaries.append(self.op.snode)
9648 snode = self.cfg.GetNodeInfo(self.op.snode)
9649 if pnode.group != snode.group:
9650 self.LogWarning("The primary and secondary nodes are in two"
9651 " different node groups; the disk parameters"
9652 " from the first disk's node group will be"
9655 nodenames = [pnode.name] + self.secondaries
9657 # Verify instance specs
9659 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9660 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9661 constants.ISPEC_DISK_COUNT: len(self.disks),
9662 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9663 constants.ISPEC_NIC_COUNT: len(self.nics),
9666 group_info = self.cfg.GetNodeGroup(pnode.group)
9667 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9668 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9669 if not self.op.ignore_ipolicy and res:
9670 raise errors.OpPrereqError(("Instance allocation to group %s violates"
9671 " policy: %s") % (pnode.group,
9672 utils.CommaJoin(res)),
9675 # disk parameters (not customizable at instance or node level)
9676 # just use the primary node parameters, ignoring the secondary.
9677 self.diskparams = group_info.diskparams
9679 if not self.adopt_disks:
9680 # Check lv size requirements, if not adopting
9681 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9682 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9684 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9685 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9686 disk[constants.IDISK_ADOPT])
9687 for disk in self.disks])
9688 if len(all_lvs) != len(self.disks):
9689 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9691 for lv_name in all_lvs:
9693 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9694 # to ReserveLV uses the same syntax
9695 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9696 except errors.ReservationError:
9697 raise errors.OpPrereqError("LV named %s used by another instance" %
9698 lv_name, errors.ECODE_NOTUNIQUE)
9700 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9701 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9703 node_lvs = self.rpc.call_lv_list([pnode.name],
9704 vg_names.payload.keys())[pnode.name]
9705 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9706 node_lvs = node_lvs.payload
9708 delta = all_lvs.difference(node_lvs.keys())
9710 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9711 utils.CommaJoin(delta),
9713 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9715 raise errors.OpPrereqError("Online logical volumes found, cannot"
9716 " adopt: %s" % utils.CommaJoin(online_lvs),
9718 # update the size of disk based on what is found
9719 for dsk in self.disks:
9720 dsk[constants.IDISK_SIZE] = \
9721 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9722 dsk[constants.IDISK_ADOPT])][0]))
9724 elif self.op.disk_template == constants.DT_BLOCK:
9725 # Normalize and de-duplicate device paths
9726 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9727 for disk in self.disks])
9728 if len(all_disks) != len(self.disks):
9729 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9731 baddisks = [d for d in all_disks
9732 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9734 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9735 " cannot be adopted" %
9736 (", ".join(baddisks),
9737 constants.ADOPTABLE_BLOCKDEV_ROOT),
9740 node_disks = self.rpc.call_bdev_sizes([pnode.name],
9741 list(all_disks))[pnode.name]
9742 node_disks.Raise("Cannot get block device information from node %s" %
9744 node_disks = node_disks.payload
9745 delta = all_disks.difference(node_disks.keys())
9747 raise errors.OpPrereqError("Missing block device(s): %s" %
9748 utils.CommaJoin(delta),
9750 for dsk in self.disks:
9751 dsk[constants.IDISK_SIZE] = \
9752 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9754 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9756 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9757 # check OS parameters (remotely)
9758 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9760 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9762 # memory check on primary node
9763 #TODO(dynmem): use MINMEM for checking
9765 _CheckNodeFreeMemory(self, self.pnode.name,
9766 "creating instance %s" % self.op.instance_name,
9767 self.be_full[constants.BE_MAXMEM],
9770 self.dry_run_result = list(nodenames)
9772 def Exec(self, feedback_fn):
9773 """Create and add the instance to the cluster.
9776 instance = self.op.instance_name
9777 pnode_name = self.pnode.name
9779 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9780 self.owned_locks(locking.LEVEL_NODE)), \
9781 "Node locks differ from node resource locks"
9783 ht_kind = self.op.hypervisor
9784 if ht_kind in constants.HTS_REQ_PORT:
9785 network_port = self.cfg.AllocatePort()
9789 disks = _GenerateDiskTemplate(self,
9790 self.op.disk_template,
9791 instance, pnode_name,
9794 self.instance_file_storage_dir,
9795 self.op.file_driver,
9800 iobj = objects.Instance(name=instance, os=self.op.os_type,
9801 primary_node=pnode_name,
9802 nics=self.nics, disks=disks,
9803 disk_template=self.op.disk_template,
9804 admin_state=constants.ADMINST_DOWN,
9805 network_port=network_port,
9806 beparams=self.op.beparams,
9807 hvparams=self.op.hvparams,
9808 hypervisor=self.op.hypervisor,
9809 osparams=self.op.osparams,
9813 for tag in self.op.tags:
9816 if self.adopt_disks:
9817 if self.op.disk_template == constants.DT_PLAIN:
9818 # rename LVs to the newly-generated names; we need to construct
9819 # 'fake' LV disks with the old data, plus the new unique_id
9820 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9822 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9823 rename_to.append(t_dsk.logical_id)
9824 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9825 self.cfg.SetDiskID(t_dsk, pnode_name)
9826 result = self.rpc.call_blockdev_rename(pnode_name,
9827 zip(tmp_disks, rename_to))
9828 result.Raise("Failed to rename adoped LVs")
9830 feedback_fn("* creating instance disks...")
9832 _CreateDisks(self, iobj)
9833 except errors.OpExecError:
9834 self.LogWarning("Device creation failed, reverting...")
9836 _RemoveDisks(self, iobj)
9838 self.cfg.ReleaseDRBDMinors(instance)
9841 feedback_fn("adding instance %s to cluster config" % instance)
9843 self.cfg.AddInstance(iobj, self.proc.GetECId())
9845 # Declare that we don't want to remove the instance lock anymore, as we've
9846 # added the instance to the config
9847 del self.remove_locks[locking.LEVEL_INSTANCE]
9849 if self.op.mode == constants.INSTANCE_IMPORT:
9850 # Release unused nodes
9851 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9854 _ReleaseLocks(self, locking.LEVEL_NODE)
9857 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9858 feedback_fn("* wiping instance disks...")
9860 _WipeDisks(self, iobj)
9861 except errors.OpExecError, err:
9862 logging.exception("Wiping disks failed")
9863 self.LogWarning("Wiping instance disks failed (%s)", err)
9867 # Something is already wrong with the disks, don't do anything else
9869 elif self.op.wait_for_sync:
9870 disk_abort = not _WaitForSync(self, iobj)
9871 elif iobj.disk_template in constants.DTS_INT_MIRROR:
9872 # make sure the disks are not degraded (still sync-ing is ok)
9873 feedback_fn("* checking mirrors status")
9874 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9879 _RemoveDisks(self, iobj)
9880 self.cfg.RemoveInstance(iobj.name)
9881 # Make sure the instance lock gets removed
9882 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9883 raise errors.OpExecError("There are some degraded disks for"
9886 # Release all node resource locks
9887 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
9889 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9890 if self.op.mode == constants.INSTANCE_CREATE:
9891 if not self.op.no_install:
9892 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9893 not self.op.wait_for_sync)
9895 feedback_fn("* pausing disk sync to install instance OS")
9896 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9898 for idx, success in enumerate(result.payload):
9900 logging.warn("pause-sync of instance %s for disk %d failed",
9903 feedback_fn("* running the instance OS create scripts...")
9904 # FIXME: pass debug option from opcode to backend
9906 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9907 self.op.debug_level)
9909 feedback_fn("* resuming disk sync")
9910 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9912 for idx, success in enumerate(result.payload):
9914 logging.warn("resume-sync of instance %s for disk %d failed",
9917 os_add_result.Raise("Could not add os for instance %s"
9918 " on node %s" % (instance, pnode_name))
9920 elif self.op.mode == constants.INSTANCE_IMPORT:
9921 feedback_fn("* running the instance OS import scripts...")
9925 for idx, image in enumerate(self.src_images):
9929 # FIXME: pass debug option from opcode to backend
9930 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9931 constants.IEIO_FILE, (image, ),
9932 constants.IEIO_SCRIPT,
9933 (iobj.disks[idx], idx),
9935 transfers.append(dt)
9938 masterd.instance.TransferInstanceData(self, feedback_fn,
9939 self.op.src_node, pnode_name,
9940 self.pnode.secondary_ip,
9942 if not compat.all(import_result):
9943 self.LogWarning("Some disks for instance %s on node %s were not"
9944 " imported successfully" % (instance, pnode_name))
9946 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9947 feedback_fn("* preparing remote import...")
9948 # The source cluster will stop the instance before attempting to make a
9949 # connection. In some cases stopping an instance can take a long time,
9950 # hence the shutdown timeout is added to the connection timeout.
9951 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9952 self.op.source_shutdown_timeout)
9953 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9955 assert iobj.primary_node == self.pnode.name
9957 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9958 self.source_x509_ca,
9959 self._cds, timeouts)
9960 if not compat.all(disk_results):
9961 # TODO: Should the instance still be started, even if some disks
9962 # failed to import (valid for local imports, too)?
9963 self.LogWarning("Some disks for instance %s on node %s were not"
9964 " imported successfully" % (instance, pnode_name))
9966 # Run rename script on newly imported instance
9967 assert iobj.name == instance
9968 feedback_fn("Running rename script for %s" % instance)
9969 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9970 self.source_instance_name,
9971 self.op.debug_level)
9973 self.LogWarning("Failed to run rename script for %s on node"
9974 " %s: %s" % (instance, pnode_name, result.fail_msg))
9977 # also checked in the prereq part
9978 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9981 assert not self.owned_locks(locking.LEVEL_NODE_RES)
9984 iobj.admin_state = constants.ADMINST_UP
9985 self.cfg.Update(iobj, feedback_fn)
9986 logging.info("Starting instance %s on node %s", instance, pnode_name)
9987 feedback_fn("* starting instance...")
9988 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9990 result.Raise("Could not start instance")
9992 return list(iobj.all_nodes)
9995 class LUInstanceConsole(NoHooksLU):
9996 """Connect to an instance's console.
9998 This is somewhat special in that it returns the command line that
9999 you need to run on the master node in order to connect to the
10005 def ExpandNames(self):
10006 self.share_locks = _ShareAll()
10007 self._ExpandAndLockInstance()
10009 def CheckPrereq(self):
10010 """Check prerequisites.
10012 This checks that the instance is in the cluster.
10015 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10016 assert self.instance is not None, \
10017 "Cannot retrieve locked instance %s" % self.op.instance_name
10018 _CheckNodeOnline(self, self.instance.primary_node)
10020 def Exec(self, feedback_fn):
10021 """Connect to the console of an instance
10024 instance = self.instance
10025 node = instance.primary_node
10027 node_insts = self.rpc.call_instance_list([node],
10028 [instance.hypervisor])[node]
10029 node_insts.Raise("Can't get node information from %s" % node)
10031 if instance.name not in node_insts.payload:
10032 if instance.admin_state == constants.ADMINST_UP:
10033 state = constants.INSTST_ERRORDOWN
10034 elif instance.admin_state == constants.ADMINST_DOWN:
10035 state = constants.INSTST_ADMINDOWN
10037 state = constants.INSTST_ADMINOFFLINE
10038 raise errors.OpExecError("Instance %s is not running (state %s)" %
10039 (instance.name, state))
10041 logging.debug("Connecting to console of %s on %s", instance.name, node)
10043 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10046 def _GetInstanceConsole(cluster, instance):
10047 """Returns console information for an instance.
10049 @type cluster: L{objects.Cluster}
10050 @type instance: L{objects.Instance}
10054 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10055 # beparams and hvparams are passed separately, to avoid editing the
10056 # instance and then saving the defaults in the instance itself.
10057 hvparams = cluster.FillHV(instance)
10058 beparams = cluster.FillBE(instance)
10059 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10061 assert console.instance == instance.name
10062 assert console.Validate()
10064 return console.ToDict()
10067 class LUInstanceReplaceDisks(LogicalUnit):
10068 """Replace the disks of an instance.
10071 HPATH = "mirrors-replace"
10072 HTYPE = constants.HTYPE_INSTANCE
10075 def CheckArguments(self):
10076 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10077 self.op.iallocator)
10079 def ExpandNames(self):
10080 self._ExpandAndLockInstance()
10082 assert locking.LEVEL_NODE not in self.needed_locks
10083 assert locking.LEVEL_NODE_RES not in self.needed_locks
10084 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10086 assert self.op.iallocator is None or self.op.remote_node is None, \
10087 "Conflicting options"
10089 if self.op.remote_node is not None:
10090 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10092 # Warning: do not remove the locking of the new secondary here
10093 # unless DRBD8.AddChildren is changed to work in parallel;
10094 # currently it doesn't since parallel invocations of
10095 # FindUnusedMinor will conflict
10096 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10097 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10099 self.needed_locks[locking.LEVEL_NODE] = []
10100 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10102 if self.op.iallocator is not None:
10103 # iallocator will select a new node in the same group
10104 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10106 self.needed_locks[locking.LEVEL_NODE_RES] = []
10108 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10109 self.op.iallocator, self.op.remote_node,
10110 self.op.disks, False, self.op.early_release,
10111 self.op.ignore_ipolicy)
10113 self.tasklets = [self.replacer]
10115 def DeclareLocks(self, level):
10116 if level == locking.LEVEL_NODEGROUP:
10117 assert self.op.remote_node is None
10118 assert self.op.iallocator is not None
10119 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10121 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10122 # Lock all groups used by instance optimistically; this requires going
10123 # via the node before it's locked, requiring verification later on
10124 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10125 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10127 elif level == locking.LEVEL_NODE:
10128 if self.op.iallocator is not None:
10129 assert self.op.remote_node is None
10130 assert not self.needed_locks[locking.LEVEL_NODE]
10132 # Lock member nodes of all locked groups
10133 self.needed_locks[locking.LEVEL_NODE] = [node_name
10134 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10135 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10137 self._LockInstancesNodes()
10138 elif level == locking.LEVEL_NODE_RES:
10140 self.needed_locks[locking.LEVEL_NODE_RES] = \
10141 self.needed_locks[locking.LEVEL_NODE]
10143 def BuildHooksEnv(self):
10144 """Build hooks env.
10146 This runs on the master, the primary and all the secondaries.
10149 instance = self.replacer.instance
10151 "MODE": self.op.mode,
10152 "NEW_SECONDARY": self.op.remote_node,
10153 "OLD_SECONDARY": instance.secondary_nodes[0],
10155 env.update(_BuildInstanceHookEnvByObject(self, instance))
10158 def BuildHooksNodes(self):
10159 """Build hooks nodes.
10162 instance = self.replacer.instance
10164 self.cfg.GetMasterNode(),
10165 instance.primary_node,
10167 if self.op.remote_node is not None:
10168 nl.append(self.op.remote_node)
10171 def CheckPrereq(self):
10172 """Check prerequisites.
10175 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10176 self.op.iallocator is None)
10178 # Verify if node group locks are still correct
10179 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10181 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10183 return LogicalUnit.CheckPrereq(self)
10186 class TLReplaceDisks(Tasklet):
10187 """Replaces disks for an instance.
10189 Note: Locking is not within the scope of this class.
10192 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10193 disks, delay_iallocator, early_release, ignore_ipolicy):
10194 """Initializes this class.
10197 Tasklet.__init__(self, lu)
10200 self.instance_name = instance_name
10202 self.iallocator_name = iallocator_name
10203 self.remote_node = remote_node
10205 self.delay_iallocator = delay_iallocator
10206 self.early_release = early_release
10207 self.ignore_ipolicy = ignore_ipolicy
10210 self.instance = None
10211 self.new_node = None
10212 self.target_node = None
10213 self.other_node = None
10214 self.remote_node_info = None
10215 self.node_secondary_ip = None
10218 def CheckArguments(mode, remote_node, iallocator):
10219 """Helper function for users of this class.
10222 # check for valid parameter combination
10223 if mode == constants.REPLACE_DISK_CHG:
10224 if remote_node is None and iallocator is None:
10225 raise errors.OpPrereqError("When changing the secondary either an"
10226 " iallocator script must be used or the"
10227 " new node given", errors.ECODE_INVAL)
10229 if remote_node is not None and iallocator is not None:
10230 raise errors.OpPrereqError("Give either the iallocator or the new"
10231 " secondary, not both", errors.ECODE_INVAL)
10233 elif remote_node is not None or iallocator is not None:
10234 # Not replacing the secondary
10235 raise errors.OpPrereqError("The iallocator and new node options can"
10236 " only be used when changing the"
10237 " secondary node", errors.ECODE_INVAL)
10240 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10241 """Compute a new secondary node using an IAllocator.
10244 ial = IAllocator(lu.cfg, lu.rpc,
10245 mode=constants.IALLOCATOR_MODE_RELOC,
10246 name=instance_name,
10247 relocate_from=list(relocate_from))
10249 ial.Run(iallocator_name)
10251 if not ial.success:
10252 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10253 " %s" % (iallocator_name, ial.info),
10254 errors.ECODE_NORES)
10256 if len(ial.result) != ial.required_nodes:
10257 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10258 " of nodes (%s), required %s" %
10260 len(ial.result), ial.required_nodes),
10261 errors.ECODE_FAULT)
10263 remote_node_name = ial.result[0]
10265 lu.LogInfo("Selected new secondary for instance '%s': %s",
10266 instance_name, remote_node_name)
10268 return remote_node_name
10270 def _FindFaultyDisks(self, node_name):
10271 """Wrapper for L{_FindFaultyInstanceDisks}.
10274 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10277 def _CheckDisksActivated(self, instance):
10278 """Checks if the instance disks are activated.
10280 @param instance: The instance to check disks
10281 @return: True if they are activated, False otherwise
10284 nodes = instance.all_nodes
10286 for idx, dev in enumerate(instance.disks):
10288 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10289 self.cfg.SetDiskID(dev, node)
10291 result = self.rpc.call_blockdev_find(node, dev)
10295 elif result.fail_msg or not result.payload:
10300 def CheckPrereq(self):
10301 """Check prerequisites.
10303 This checks that the instance is in the cluster.
10306 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10307 assert instance is not None, \
10308 "Cannot retrieve locked instance %s" % self.instance_name
10310 if instance.disk_template != constants.DT_DRBD8:
10311 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10312 " instances", errors.ECODE_INVAL)
10314 if len(instance.secondary_nodes) != 1:
10315 raise errors.OpPrereqError("The instance has a strange layout,"
10316 " expected one secondary but found %d" %
10317 len(instance.secondary_nodes),
10318 errors.ECODE_FAULT)
10320 if not self.delay_iallocator:
10321 self._CheckPrereq2()
10323 def _CheckPrereq2(self):
10324 """Check prerequisites, second part.
10326 This function should always be part of CheckPrereq. It was separated and is
10327 now called from Exec because during node evacuation iallocator was only
10328 called with an unmodified cluster model, not taking planned changes into
10332 instance = self.instance
10333 secondary_node = instance.secondary_nodes[0]
10335 if self.iallocator_name is None:
10336 remote_node = self.remote_node
10338 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10339 instance.name, instance.secondary_nodes)
10341 if remote_node is None:
10342 self.remote_node_info = None
10344 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10345 "Remote node '%s' is not locked" % remote_node
10347 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10348 assert self.remote_node_info is not None, \
10349 "Cannot retrieve locked node %s" % remote_node
10351 if remote_node == self.instance.primary_node:
10352 raise errors.OpPrereqError("The specified node is the primary node of"
10353 " the instance", errors.ECODE_INVAL)
10355 if remote_node == secondary_node:
10356 raise errors.OpPrereqError("The specified node is already the"
10357 " secondary node of the instance",
10358 errors.ECODE_INVAL)
10360 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10361 constants.REPLACE_DISK_CHG):
10362 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10363 errors.ECODE_INVAL)
10365 if self.mode == constants.REPLACE_DISK_AUTO:
10366 if not self._CheckDisksActivated(instance):
10367 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10368 " first" % self.instance_name,
10369 errors.ECODE_STATE)
10370 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10371 faulty_secondary = self._FindFaultyDisks(secondary_node)
10373 if faulty_primary and faulty_secondary:
10374 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10375 " one node and can not be repaired"
10376 " automatically" % self.instance_name,
10377 errors.ECODE_STATE)
10380 self.disks = faulty_primary
10381 self.target_node = instance.primary_node
10382 self.other_node = secondary_node
10383 check_nodes = [self.target_node, self.other_node]
10384 elif faulty_secondary:
10385 self.disks = faulty_secondary
10386 self.target_node = secondary_node
10387 self.other_node = instance.primary_node
10388 check_nodes = [self.target_node, self.other_node]
10394 # Non-automatic modes
10395 if self.mode == constants.REPLACE_DISK_PRI:
10396 self.target_node = instance.primary_node
10397 self.other_node = secondary_node
10398 check_nodes = [self.target_node, self.other_node]
10400 elif self.mode == constants.REPLACE_DISK_SEC:
10401 self.target_node = secondary_node
10402 self.other_node = instance.primary_node
10403 check_nodes = [self.target_node, self.other_node]
10405 elif self.mode == constants.REPLACE_DISK_CHG:
10406 self.new_node = remote_node
10407 self.other_node = instance.primary_node
10408 self.target_node = secondary_node
10409 check_nodes = [self.new_node, self.other_node]
10411 _CheckNodeNotDrained(self.lu, remote_node)
10412 _CheckNodeVmCapable(self.lu, remote_node)
10414 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10415 assert old_node_info is not None
10416 if old_node_info.offline and not self.early_release:
10417 # doesn't make sense to delay the release
10418 self.early_release = True
10419 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10420 " early-release mode", secondary_node)
10423 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10426 # If not specified all disks should be replaced
10428 self.disks = range(len(self.instance.disks))
10430 # TODO: This is ugly, but right now we can't distinguish between internal
10431 # submitted opcode and external one. We should fix that.
10432 if self.remote_node_info:
10433 # We change the node, lets verify it still meets instance policy
10434 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10435 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10437 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10438 ignore=self.ignore_ipolicy)
10440 # TODO: compute disk parameters
10441 primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10442 secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10443 if primary_node_info.group != secondary_node_info.group:
10444 self.lu.LogInfo("The instance primary and secondary nodes are in two"
10445 " different node groups; the disk parameters of the"
10446 " primary node's group will be applied.")
10448 self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10450 for node in check_nodes:
10451 _CheckNodeOnline(self.lu, node)
10453 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10456 if node_name is not None)
10458 # Release unneeded node and node resource locks
10459 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10460 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10462 # Release any owned node group
10463 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10464 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10466 # Check whether disks are valid
10467 for disk_idx in self.disks:
10468 instance.FindDisk(disk_idx)
10470 # Get secondary node IP addresses
10471 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10472 in self.cfg.GetMultiNodeInfo(touched_nodes))
10474 def Exec(self, feedback_fn):
10475 """Execute disk replacement.
10477 This dispatches the disk replacement to the appropriate handler.
10480 if self.delay_iallocator:
10481 self._CheckPrereq2()
10484 # Verify owned locks before starting operation
10485 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10486 assert set(owned_nodes) == set(self.node_secondary_ip), \
10487 ("Incorrect node locks, owning %s, expected %s" %
10488 (owned_nodes, self.node_secondary_ip.keys()))
10489 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10490 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10492 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10493 assert list(owned_instances) == [self.instance_name], \
10494 "Instance '%s' not locked" % self.instance_name
10496 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10497 "Should not own any node group lock at this point"
10500 feedback_fn("No disks need replacement")
10503 feedback_fn("Replacing disk(s) %s for %s" %
10504 (utils.CommaJoin(self.disks), self.instance.name))
10506 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10508 # Activate the instance disks if we're replacing them on a down instance
10510 _StartInstanceDisks(self.lu, self.instance, True)
10513 # Should we replace the secondary node?
10514 if self.new_node is not None:
10515 fn = self._ExecDrbd8Secondary
10517 fn = self._ExecDrbd8DiskOnly
10519 result = fn(feedback_fn)
10521 # Deactivate the instance disks if we're replacing them on a
10524 _SafeShutdownInstanceDisks(self.lu, self.instance)
10526 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10529 # Verify owned locks
10530 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10531 nodes = frozenset(self.node_secondary_ip)
10532 assert ((self.early_release and not owned_nodes) or
10533 (not self.early_release and not (set(owned_nodes) - nodes))), \
10534 ("Not owning the correct locks, early_release=%s, owned=%r,"
10535 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10539 def _CheckVolumeGroup(self, nodes):
10540 self.lu.LogInfo("Checking volume groups")
10542 vgname = self.cfg.GetVGName()
10544 # Make sure volume group exists on all involved nodes
10545 results = self.rpc.call_vg_list(nodes)
10547 raise errors.OpExecError("Can't list volume groups on the nodes")
10550 res = results[node]
10551 res.Raise("Error checking node %s" % node)
10552 if vgname not in res.payload:
10553 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10556 def _CheckDisksExistence(self, nodes):
10557 # Check disk existence
10558 for idx, dev in enumerate(self.instance.disks):
10559 if idx not in self.disks:
10563 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10564 self.cfg.SetDiskID(dev, node)
10566 result = self.rpc.call_blockdev_find(node, dev)
10568 msg = result.fail_msg
10569 if msg or not result.payload:
10571 msg = "disk not found"
10572 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10575 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10576 for idx, dev in enumerate(self.instance.disks):
10577 if idx not in self.disks:
10580 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10583 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10585 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10586 " replace disks for instance %s" %
10587 (node_name, self.instance.name))
10589 def _CreateNewStorage(self, node_name):
10590 """Create new storage on the primary or secondary node.
10592 This is only used for same-node replaces, not for changing the
10593 secondary node, hence we don't want to modify the existing disk.
10598 for idx, dev in enumerate(self.instance.disks):
10599 if idx not in self.disks:
10602 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10604 self.cfg.SetDiskID(dev, node_name)
10606 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10607 names = _GenerateUniqueNames(self.lu, lv_names)
10609 _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10611 vg_data = dev.children[0].logical_id[0]
10612 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10613 logical_id=(vg_data, names[0]), params=data_p)
10614 vg_meta = dev.children[1].logical_id[0]
10615 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10616 logical_id=(vg_meta, names[1]), params=meta_p)
10618 new_lvs = [lv_data, lv_meta]
10619 old_lvs = [child.Copy() for child in dev.children]
10620 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10622 # we pass force_create=True to force the LVM creation
10623 for new_lv in new_lvs:
10624 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10625 _GetInstanceInfoText(self.instance), False)
10629 def _CheckDevices(self, node_name, iv_names):
10630 for name, (dev, _, _) in iv_names.iteritems():
10631 self.cfg.SetDiskID(dev, node_name)
10633 result = self.rpc.call_blockdev_find(node_name, dev)
10635 msg = result.fail_msg
10636 if msg or not result.payload:
10638 msg = "disk not found"
10639 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10642 if result.payload.is_degraded:
10643 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10645 def _RemoveOldStorage(self, node_name, iv_names):
10646 for name, (_, old_lvs, _) in iv_names.iteritems():
10647 self.lu.LogInfo("Remove logical volumes for %s" % name)
10650 self.cfg.SetDiskID(lv, node_name)
10652 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10654 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10655 hint="remove unused LVs manually")
10657 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10658 """Replace a disk on the primary or secondary for DRBD 8.
10660 The algorithm for replace is quite complicated:
10662 1. for each disk to be replaced:
10664 1. create new LVs on the target node with unique names
10665 1. detach old LVs from the drbd device
10666 1. rename old LVs to name_replaced.<time_t>
10667 1. rename new LVs to old LVs
10668 1. attach the new LVs (with the old names now) to the drbd device
10670 1. wait for sync across all devices
10672 1. for each modified disk:
10674 1. remove old LVs (which have the name name_replaces.<time_t>)
10676 Failures are not very well handled.
10681 # Step: check device activation
10682 self.lu.LogStep(1, steps_total, "Check device existence")
10683 self._CheckDisksExistence([self.other_node, self.target_node])
10684 self._CheckVolumeGroup([self.target_node, self.other_node])
10686 # Step: check other node consistency
10687 self.lu.LogStep(2, steps_total, "Check peer consistency")
10688 self._CheckDisksConsistency(self.other_node,
10689 self.other_node == self.instance.primary_node,
10692 # Step: create new storage
10693 self.lu.LogStep(3, steps_total, "Allocate new storage")
10694 iv_names = self._CreateNewStorage(self.target_node)
10696 # Step: for each lv, detach+rename*2+attach
10697 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10698 for dev, old_lvs, new_lvs in iv_names.itervalues():
10699 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10701 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10703 result.Raise("Can't detach drbd from local storage on node"
10704 " %s for device %s" % (self.target_node, dev.iv_name))
10706 #cfg.Update(instance)
10708 # ok, we created the new LVs, so now we know we have the needed
10709 # storage; as such, we proceed on the target node to rename
10710 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10711 # using the assumption that logical_id == physical_id (which in
10712 # turn is the unique_id on that node)
10714 # FIXME(iustin): use a better name for the replaced LVs
10715 temp_suffix = int(time.time())
10716 ren_fn = lambda d, suff: (d.physical_id[0],
10717 d.physical_id[1] + "_replaced-%s" % suff)
10719 # Build the rename list based on what LVs exist on the node
10720 rename_old_to_new = []
10721 for to_ren in old_lvs:
10722 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10723 if not result.fail_msg and result.payload:
10725 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10727 self.lu.LogInfo("Renaming the old LVs on the target node")
10728 result = self.rpc.call_blockdev_rename(self.target_node,
10730 result.Raise("Can't rename old LVs on node %s" % self.target_node)
10732 # Now we rename the new LVs to the old LVs
10733 self.lu.LogInfo("Renaming the new LVs on the target node")
10734 rename_new_to_old = [(new, old.physical_id)
10735 for old, new in zip(old_lvs, new_lvs)]
10736 result = self.rpc.call_blockdev_rename(self.target_node,
10738 result.Raise("Can't rename new LVs on node %s" % self.target_node)
10740 # Intermediate steps of in memory modifications
10741 for old, new in zip(old_lvs, new_lvs):
10742 new.logical_id = old.logical_id
10743 self.cfg.SetDiskID(new, self.target_node)
10745 # We need to modify old_lvs so that removal later removes the
10746 # right LVs, not the newly added ones; note that old_lvs is a
10748 for disk in old_lvs:
10749 disk.logical_id = ren_fn(disk, temp_suffix)
10750 self.cfg.SetDiskID(disk, self.target_node)
10752 # Now that the new lvs have the old name, we can add them to the device
10753 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10754 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10756 msg = result.fail_msg
10758 for new_lv in new_lvs:
10759 msg2 = self.rpc.call_blockdev_remove(self.target_node,
10762 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10763 hint=("cleanup manually the unused logical"
10765 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10767 cstep = itertools.count(5)
10769 if self.early_release:
10770 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10771 self._RemoveOldStorage(self.target_node, iv_names)
10772 # TODO: Check if releasing locks early still makes sense
10773 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10775 # Release all resource locks except those used by the instance
10776 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10777 keep=self.node_secondary_ip.keys())
10779 # Release all node locks while waiting for sync
10780 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10782 # TODO: Can the instance lock be downgraded here? Take the optional disk
10783 # shutdown in the caller into consideration.
10786 # This can fail as the old devices are degraded and _WaitForSync
10787 # does a combined result over all disks, so we don't check its return value
10788 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10789 _WaitForSync(self.lu, self.instance)
10791 # Check all devices manually
10792 self._CheckDevices(self.instance.primary_node, iv_names)
10794 # Step: remove old storage
10795 if not self.early_release:
10796 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10797 self._RemoveOldStorage(self.target_node, iv_names)
10799 def _ExecDrbd8Secondary(self, feedback_fn):
10800 """Replace the secondary node for DRBD 8.
10802 The algorithm for replace is quite complicated:
10803 - for all disks of the instance:
10804 - create new LVs on the new node with same names
10805 - shutdown the drbd device on the old secondary
10806 - disconnect the drbd network on the primary
10807 - create the drbd device on the new secondary
10808 - network attach the drbd on the primary, using an artifice:
10809 the drbd code for Attach() will connect to the network if it
10810 finds a device which is connected to the good local disks but
10811 not network enabled
10812 - wait for sync across all devices
10813 - remove all disks from the old secondary
10815 Failures are not very well handled.
10820 pnode = self.instance.primary_node
10822 # Step: check device activation
10823 self.lu.LogStep(1, steps_total, "Check device existence")
10824 self._CheckDisksExistence([self.instance.primary_node])
10825 self._CheckVolumeGroup([self.instance.primary_node])
10827 # Step: check other node consistency
10828 self.lu.LogStep(2, steps_total, "Check peer consistency")
10829 self._CheckDisksConsistency(self.instance.primary_node, True, True)
10831 # Step: create new storage
10832 self.lu.LogStep(3, steps_total, "Allocate new storage")
10833 for idx, dev in enumerate(self.instance.disks):
10834 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10835 (self.new_node, idx))
10836 # we pass force_create=True to force LVM creation
10837 for new_lv in dev.children:
10838 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10839 _GetInstanceInfoText(self.instance), False)
10841 # Step 4: dbrd minors and drbd setups changes
10842 # after this, we must manually remove the drbd minors on both the
10843 # error and the success paths
10844 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10845 minors = self.cfg.AllocateDRBDMinor([self.new_node
10846 for dev in self.instance.disks],
10847 self.instance.name)
10848 logging.debug("Allocated minors %r", minors)
10851 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10852 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10853 (self.new_node, idx))
10854 # create new devices on new_node; note that we create two IDs:
10855 # one without port, so the drbd will be activated without
10856 # networking information on the new node at this stage, and one
10857 # with network, for the latter activation in step 4
10858 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10859 if self.instance.primary_node == o_node1:
10862 assert self.instance.primary_node == o_node2, "Three-node instance?"
10865 new_alone_id = (self.instance.primary_node, self.new_node, None,
10866 p_minor, new_minor, o_secret)
10867 new_net_id = (self.instance.primary_node, self.new_node, o_port,
10868 p_minor, new_minor, o_secret)
10870 iv_names[idx] = (dev, dev.children, new_net_id)
10871 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10873 drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10874 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10875 logical_id=new_alone_id,
10876 children=dev.children,
10878 params=drbd_params)
10880 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10881 _GetInstanceInfoText(self.instance), False)
10882 except errors.GenericError:
10883 self.cfg.ReleaseDRBDMinors(self.instance.name)
10886 # We have new devices, shutdown the drbd on the old secondary
10887 for idx, dev in enumerate(self.instance.disks):
10888 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10889 self.cfg.SetDiskID(dev, self.target_node)
10890 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10892 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10893 "node: %s" % (idx, msg),
10894 hint=("Please cleanup this device manually as"
10895 " soon as possible"))
10897 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10898 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10899 self.instance.disks)[pnode]
10901 msg = result.fail_msg
10903 # detaches didn't succeed (unlikely)
10904 self.cfg.ReleaseDRBDMinors(self.instance.name)
10905 raise errors.OpExecError("Can't detach the disks from the network on"
10906 " old node: %s" % (msg,))
10908 # if we managed to detach at least one, we update all the disks of
10909 # the instance to point to the new secondary
10910 self.lu.LogInfo("Updating instance configuration")
10911 for dev, _, new_logical_id in iv_names.itervalues():
10912 dev.logical_id = new_logical_id
10913 self.cfg.SetDiskID(dev, self.instance.primary_node)
10915 self.cfg.Update(self.instance, feedback_fn)
10917 # Release all node locks (the configuration has been updated)
10918 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10920 # and now perform the drbd attach
10921 self.lu.LogInfo("Attaching primary drbds to new secondary"
10922 " (standalone => connected)")
10923 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10925 self.node_secondary_ip,
10926 self.instance.disks,
10927 self.instance.name,
10929 for to_node, to_result in result.items():
10930 msg = to_result.fail_msg
10932 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10934 hint=("please do a gnt-instance info to see the"
10935 " status of disks"))
10937 cstep = itertools.count(5)
10939 if self.early_release:
10940 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10941 self._RemoveOldStorage(self.target_node, iv_names)
10942 # TODO: Check if releasing locks early still makes sense
10943 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10945 # Release all resource locks except those used by the instance
10946 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10947 keep=self.node_secondary_ip.keys())
10949 # TODO: Can the instance lock be downgraded here? Take the optional disk
10950 # shutdown in the caller into consideration.
10953 # This can fail as the old devices are degraded and _WaitForSync
10954 # does a combined result over all disks, so we don't check its return value
10955 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10956 _WaitForSync(self.lu, self.instance)
10958 # Check all devices manually
10959 self._CheckDevices(self.instance.primary_node, iv_names)
10961 # Step: remove old storage
10962 if not self.early_release:
10963 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10964 self._RemoveOldStorage(self.target_node, iv_names)
10967 class LURepairNodeStorage(NoHooksLU):
10968 """Repairs the volume group on a node.
10973 def CheckArguments(self):
10974 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10976 storage_type = self.op.storage_type
10978 if (constants.SO_FIX_CONSISTENCY not in
10979 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10980 raise errors.OpPrereqError("Storage units of type '%s' can not be"
10981 " repaired" % storage_type,
10982 errors.ECODE_INVAL)
10984 def ExpandNames(self):
10985 self.needed_locks = {
10986 locking.LEVEL_NODE: [self.op.node_name],
10989 def _CheckFaultyDisks(self, instance, node_name):
10990 """Ensure faulty disks abort the opcode or at least warn."""
10992 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10994 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10995 " node '%s'" % (instance.name, node_name),
10996 errors.ECODE_STATE)
10997 except errors.OpPrereqError, err:
10998 if self.op.ignore_consistency:
10999 self.proc.LogWarning(str(err.args[0]))
11003 def CheckPrereq(self):
11004 """Check prerequisites.
11007 # Check whether any instance on this node has faulty disks
11008 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11009 if inst.admin_state != constants.ADMINST_UP:
11011 check_nodes = set(inst.all_nodes)
11012 check_nodes.discard(self.op.node_name)
11013 for inst_node_name in check_nodes:
11014 self._CheckFaultyDisks(inst, inst_node_name)
11016 def Exec(self, feedback_fn):
11017 feedback_fn("Repairing storage unit '%s' on %s ..." %
11018 (self.op.name, self.op.node_name))
11020 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11021 result = self.rpc.call_storage_execute(self.op.node_name,
11022 self.op.storage_type, st_args,
11024 constants.SO_FIX_CONSISTENCY)
11025 result.Raise("Failed to repair storage unit '%s' on %s" %
11026 (self.op.name, self.op.node_name))
11029 class LUNodeEvacuate(NoHooksLU):
11030 """Evacuates instances off a list of nodes.
11035 _MODE2IALLOCATOR = {
11036 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11037 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11038 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11040 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11041 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11042 constants.IALLOCATOR_NEVAC_MODES)
11044 def CheckArguments(self):
11045 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11047 def ExpandNames(self):
11048 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11050 if self.op.remote_node is not None:
11051 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11052 assert self.op.remote_node
11054 if self.op.remote_node == self.op.node_name:
11055 raise errors.OpPrereqError("Can not use evacuated node as a new"
11056 " secondary node", errors.ECODE_INVAL)
11058 if self.op.mode != constants.NODE_EVAC_SEC:
11059 raise errors.OpPrereqError("Without the use of an iallocator only"
11060 " secondary instances can be evacuated",
11061 errors.ECODE_INVAL)
11064 self.share_locks = _ShareAll()
11065 self.needed_locks = {
11066 locking.LEVEL_INSTANCE: [],
11067 locking.LEVEL_NODEGROUP: [],
11068 locking.LEVEL_NODE: [],
11071 # Determine nodes (via group) optimistically, needs verification once locks
11072 # have been acquired
11073 self.lock_nodes = self._DetermineNodes()
11075 def _DetermineNodes(self):
11076 """Gets the list of nodes to operate on.
11079 if self.op.remote_node is None:
11080 # Iallocator will choose any node(s) in the same group
11081 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11083 group_nodes = frozenset([self.op.remote_node])
11085 # Determine nodes to be locked
11086 return set([self.op.node_name]) | group_nodes
11088 def _DetermineInstances(self):
11089 """Builds list of instances to operate on.
11092 assert self.op.mode in constants.NODE_EVAC_MODES
11094 if self.op.mode == constants.NODE_EVAC_PRI:
11095 # Primary instances only
11096 inst_fn = _GetNodePrimaryInstances
11097 assert self.op.remote_node is None, \
11098 "Evacuating primary instances requires iallocator"
11099 elif self.op.mode == constants.NODE_EVAC_SEC:
11100 # Secondary instances only
11101 inst_fn = _GetNodeSecondaryInstances
11104 assert self.op.mode == constants.NODE_EVAC_ALL
11105 inst_fn = _GetNodeInstances
11106 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11108 raise errors.OpPrereqError("Due to an issue with the iallocator"
11109 " interface it is not possible to evacuate"
11110 " all instances at once; specify explicitly"
11111 " whether to evacuate primary or secondary"
11113 errors.ECODE_INVAL)
11115 return inst_fn(self.cfg, self.op.node_name)
11117 def DeclareLocks(self, level):
11118 if level == locking.LEVEL_INSTANCE:
11119 # Lock instances optimistically, needs verification once node and group
11120 # locks have been acquired
11121 self.needed_locks[locking.LEVEL_INSTANCE] = \
11122 set(i.name for i in self._DetermineInstances())
11124 elif level == locking.LEVEL_NODEGROUP:
11125 # Lock node groups for all potential target nodes optimistically, needs
11126 # verification once nodes have been acquired
11127 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11128 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11130 elif level == locking.LEVEL_NODE:
11131 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11133 def CheckPrereq(self):
11135 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11136 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11137 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11139 need_nodes = self._DetermineNodes()
11141 if not owned_nodes.issuperset(need_nodes):
11142 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11143 " locks were acquired, current nodes are"
11144 " are '%s', used to be '%s'; retry the"
11146 (self.op.node_name,
11147 utils.CommaJoin(need_nodes),
11148 utils.CommaJoin(owned_nodes)),
11149 errors.ECODE_STATE)
11151 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11152 if owned_groups != wanted_groups:
11153 raise errors.OpExecError("Node groups changed since locks were acquired,"
11154 " current groups are '%s', used to be '%s';"
11155 " retry the operation" %
11156 (utils.CommaJoin(wanted_groups),
11157 utils.CommaJoin(owned_groups)))
11159 # Determine affected instances
11160 self.instances = self._DetermineInstances()
11161 self.instance_names = [i.name for i in self.instances]
11163 if set(self.instance_names) != owned_instances:
11164 raise errors.OpExecError("Instances on node '%s' changed since locks"
11165 " were acquired, current instances are '%s',"
11166 " used to be '%s'; retry the operation" %
11167 (self.op.node_name,
11168 utils.CommaJoin(self.instance_names),
11169 utils.CommaJoin(owned_instances)))
11171 if self.instance_names:
11172 self.LogInfo("Evacuating instances from node '%s': %s",
11174 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11176 self.LogInfo("No instances to evacuate from node '%s'",
11179 if self.op.remote_node is not None:
11180 for i in self.instances:
11181 if i.primary_node == self.op.remote_node:
11182 raise errors.OpPrereqError("Node %s is the primary node of"
11183 " instance %s, cannot use it as"
11185 (self.op.remote_node, i.name),
11186 errors.ECODE_INVAL)
11188 def Exec(self, feedback_fn):
11189 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11191 if not self.instance_names:
11192 # No instances to evacuate
11195 elif self.op.iallocator is not None:
11196 # TODO: Implement relocation to other group
11197 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11198 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11199 instances=list(self.instance_names))
11201 ial.Run(self.op.iallocator)
11203 if not ial.success:
11204 raise errors.OpPrereqError("Can't compute node evacuation using"
11205 " iallocator '%s': %s" %
11206 (self.op.iallocator, ial.info),
11207 errors.ECODE_NORES)
11209 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11211 elif self.op.remote_node is not None:
11212 assert self.op.mode == constants.NODE_EVAC_SEC
11214 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11215 remote_node=self.op.remote_node,
11217 mode=constants.REPLACE_DISK_CHG,
11218 early_release=self.op.early_release)]
11219 for instance_name in self.instance_names
11223 raise errors.ProgrammerError("No iallocator or remote node")
11225 return ResultWithJobs(jobs)
11228 def _SetOpEarlyRelease(early_release, op):
11229 """Sets C{early_release} flag on opcodes if available.
11233 op.early_release = early_release
11234 except AttributeError:
11235 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11240 def _NodeEvacDest(use_nodes, group, nodes):
11241 """Returns group or nodes depending on caller's choice.
11245 return utils.CommaJoin(nodes)
11250 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11251 """Unpacks the result of change-group and node-evacuate iallocator requests.
11253 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11254 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11256 @type lu: L{LogicalUnit}
11257 @param lu: Logical unit instance
11258 @type alloc_result: tuple/list
11259 @param alloc_result: Result from iallocator
11260 @type early_release: bool
11261 @param early_release: Whether to release locks early if possible
11262 @type use_nodes: bool
11263 @param use_nodes: Whether to display node names instead of groups
11266 (moved, failed, jobs) = alloc_result
11269 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11270 for (name, reason) in failed)
11271 lu.LogWarning("Unable to evacuate instances %s", failreason)
11272 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11275 lu.LogInfo("Instances to be moved: %s",
11276 utils.CommaJoin("%s (to %s)" %
11277 (name, _NodeEvacDest(use_nodes, group, nodes))
11278 for (name, group, nodes) in moved))
11280 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11281 map(opcodes.OpCode.LoadOpCode, ops))
11285 class LUInstanceGrowDisk(LogicalUnit):
11286 """Grow a disk of an instance.
11289 HPATH = "disk-grow"
11290 HTYPE = constants.HTYPE_INSTANCE
11293 def ExpandNames(self):
11294 self._ExpandAndLockInstance()
11295 self.needed_locks[locking.LEVEL_NODE] = []
11296 self.needed_locks[locking.LEVEL_NODE_RES] = []
11297 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11298 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11300 def DeclareLocks(self, level):
11301 if level == locking.LEVEL_NODE:
11302 self._LockInstancesNodes()
11303 elif level == locking.LEVEL_NODE_RES:
11305 self.needed_locks[locking.LEVEL_NODE_RES] = \
11306 self.needed_locks[locking.LEVEL_NODE][:]
11308 def BuildHooksEnv(self):
11309 """Build hooks env.
11311 This runs on the master, the primary and all the secondaries.
11315 "DISK": self.op.disk,
11316 "AMOUNT": self.op.amount,
11318 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11321 def BuildHooksNodes(self):
11322 """Build hooks nodes.
11325 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11328 def CheckPrereq(self):
11329 """Check prerequisites.
11331 This checks that the instance is in the cluster.
11334 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11335 assert instance is not None, \
11336 "Cannot retrieve locked instance %s" % self.op.instance_name
11337 nodenames = list(instance.all_nodes)
11338 for node in nodenames:
11339 _CheckNodeOnline(self, node)
11341 self.instance = instance
11343 if instance.disk_template not in constants.DTS_GROWABLE:
11344 raise errors.OpPrereqError("Instance's disk layout does not support"
11345 " growing", errors.ECODE_INVAL)
11347 self.disk = instance.FindDisk(self.op.disk)
11349 if instance.disk_template not in (constants.DT_FILE,
11350 constants.DT_SHARED_FILE):
11351 # TODO: check the free disk space for file, when that feature will be
11353 _CheckNodesFreeDiskPerVG(self, nodenames,
11354 self.disk.ComputeGrowth(self.op.amount))
11356 def Exec(self, feedback_fn):
11357 """Execute disk grow.
11360 instance = self.instance
11363 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11364 assert (self.owned_locks(locking.LEVEL_NODE) ==
11365 self.owned_locks(locking.LEVEL_NODE_RES))
11367 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11369 raise errors.OpExecError("Cannot activate block device to grow")
11371 feedback_fn("Growing disk %s of instance '%s' by %s" %
11372 (self.op.disk, instance.name,
11373 utils.FormatUnit(self.op.amount, "h")))
11375 # First run all grow ops in dry-run mode
11376 for node in instance.all_nodes:
11377 self.cfg.SetDiskID(disk, node)
11378 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11379 result.Raise("Grow request failed to node %s" % node)
11381 # We know that (as far as we can test) operations across different
11382 # nodes will succeed, time to run it for real
11383 for node in instance.all_nodes:
11384 self.cfg.SetDiskID(disk, node)
11385 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11386 result.Raise("Grow request failed to node %s" % node)
11388 # TODO: Rewrite code to work properly
11389 # DRBD goes into sync mode for a short amount of time after executing the
11390 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11391 # calling "resize" in sync mode fails. Sleeping for a short amount of
11392 # time is a work-around.
11395 disk.RecordGrow(self.op.amount)
11396 self.cfg.Update(instance, feedback_fn)
11398 # Changes have been recorded, release node lock
11399 _ReleaseLocks(self, locking.LEVEL_NODE)
11401 # Downgrade lock while waiting for sync
11402 self.glm.downgrade(locking.LEVEL_INSTANCE)
11404 if self.op.wait_for_sync:
11405 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11407 self.proc.LogWarning("Disk sync-ing has not returned a good"
11408 " status; please check the instance")
11409 if instance.admin_state != constants.ADMINST_UP:
11410 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11411 elif instance.admin_state != constants.ADMINST_UP:
11412 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11413 " not supposed to be running because no wait for"
11414 " sync mode was requested")
11416 assert self.owned_locks(locking.LEVEL_NODE_RES)
11417 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11420 class LUInstanceQueryData(NoHooksLU):
11421 """Query runtime instance data.
11426 def ExpandNames(self):
11427 self.needed_locks = {}
11429 # Use locking if requested or when non-static information is wanted
11430 if not (self.op.static or self.op.use_locking):
11431 self.LogWarning("Non-static data requested, locks need to be acquired")
11432 self.op.use_locking = True
11434 if self.op.instances or not self.op.use_locking:
11435 # Expand instance names right here
11436 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11438 # Will use acquired locks
11439 self.wanted_names = None
11441 if self.op.use_locking:
11442 self.share_locks = _ShareAll()
11444 if self.wanted_names is None:
11445 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11447 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11449 self.needed_locks[locking.LEVEL_NODE] = []
11450 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11452 def DeclareLocks(self, level):
11453 if self.op.use_locking and level == locking.LEVEL_NODE:
11454 self._LockInstancesNodes()
11456 def CheckPrereq(self):
11457 """Check prerequisites.
11459 This only checks the optional instance list against the existing names.
11462 if self.wanted_names is None:
11463 assert self.op.use_locking, "Locking was not used"
11464 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11466 self.wanted_instances = \
11467 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11469 def _ComputeBlockdevStatus(self, node, instance_name, dev):
11470 """Returns the status of a block device
11473 if self.op.static or not node:
11476 self.cfg.SetDiskID(dev, node)
11478 result = self.rpc.call_blockdev_find(node, dev)
11482 result.Raise("Can't compute disk status for %s" % instance_name)
11484 status = result.payload
11488 return (status.dev_path, status.major, status.minor,
11489 status.sync_percent, status.estimated_time,
11490 status.is_degraded, status.ldisk_status)
11492 def _ComputeDiskStatus(self, instance, snode, dev):
11493 """Compute block device status.
11496 if dev.dev_type in constants.LDS_DRBD:
11497 # we change the snode then (otherwise we use the one passed in)
11498 if dev.logical_id[0] == instance.primary_node:
11499 snode = dev.logical_id[1]
11501 snode = dev.logical_id[0]
11503 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11504 instance.name, dev)
11505 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11508 dev_children = map(compat.partial(self._ComputeDiskStatus,
11515 "iv_name": dev.iv_name,
11516 "dev_type": dev.dev_type,
11517 "logical_id": dev.logical_id,
11518 "physical_id": dev.physical_id,
11519 "pstatus": dev_pstatus,
11520 "sstatus": dev_sstatus,
11521 "children": dev_children,
11526 def Exec(self, feedback_fn):
11527 """Gather and return data"""
11530 cluster = self.cfg.GetClusterInfo()
11532 pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11533 for i in self.wanted_instances)
11534 for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11535 if self.op.static or pnode.offline:
11536 remote_state = None
11538 self.LogWarning("Primary node %s is marked offline, returning static"
11539 " information only for instance %s" %
11540 (pnode.name, instance.name))
11542 remote_info = self.rpc.call_instance_info(instance.primary_node,
11544 instance.hypervisor)
11545 remote_info.Raise("Error checking node %s" % instance.primary_node)
11546 remote_info = remote_info.payload
11547 if remote_info and "state" in remote_info:
11548 remote_state = "up"
11550 if instance.admin_state == constants.ADMINST_UP:
11551 remote_state = "down"
11553 remote_state = instance.admin_state
11555 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11558 result[instance.name] = {
11559 "name": instance.name,
11560 "config_state": instance.admin_state,
11561 "run_state": remote_state,
11562 "pnode": instance.primary_node,
11563 "snodes": instance.secondary_nodes,
11565 # this happens to be the same format used for hooks
11566 "nics": _NICListToTuple(self, instance.nics),
11567 "disk_template": instance.disk_template,
11569 "hypervisor": instance.hypervisor,
11570 "network_port": instance.network_port,
11571 "hv_instance": instance.hvparams,
11572 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11573 "be_instance": instance.beparams,
11574 "be_actual": cluster.FillBE(instance),
11575 "os_instance": instance.osparams,
11576 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11577 "serial_no": instance.serial_no,
11578 "mtime": instance.mtime,
11579 "ctime": instance.ctime,
11580 "uuid": instance.uuid,
11586 class LUInstanceSetParams(LogicalUnit):
11587 """Modifies an instances's parameters.
11590 HPATH = "instance-modify"
11591 HTYPE = constants.HTYPE_INSTANCE
11594 def CheckArguments(self):
11595 if not (self.op.nics or self.op.disks or self.op.disk_template or
11596 self.op.hvparams or self.op.beparams or self.op.os_name or
11597 self.op.online_inst or self.op.offline_inst or
11598 self.op.runtime_mem):
11599 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
11601 if self.op.hvparams:
11602 _CheckGlobalHvParams(self.op.hvparams)
11606 for disk_op, disk_dict in self.op.disks:
11607 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
11608 if disk_op == constants.DDM_REMOVE:
11609 disk_addremove += 1
11611 elif disk_op == constants.DDM_ADD:
11612 disk_addremove += 1
11614 if not isinstance(disk_op, int):
11615 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11616 if not isinstance(disk_dict, dict):
11617 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11618 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11620 if disk_op == constants.DDM_ADD:
11621 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11622 if mode not in constants.DISK_ACCESS_SET:
11623 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11624 errors.ECODE_INVAL)
11625 size = disk_dict.get(constants.IDISK_SIZE, None)
11627 raise errors.OpPrereqError("Required disk parameter size missing",
11628 errors.ECODE_INVAL)
11631 except (TypeError, ValueError), err:
11632 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11633 str(err), errors.ECODE_INVAL)
11634 disk_dict[constants.IDISK_SIZE] = size
11636 # modification of disk
11637 if constants.IDISK_SIZE in disk_dict:
11638 raise errors.OpPrereqError("Disk size change not possible, use"
11639 " grow-disk", errors.ECODE_INVAL)
11641 if disk_addremove > 1:
11642 raise errors.OpPrereqError("Only one disk add or remove operation"
11643 " supported at a time", errors.ECODE_INVAL)
11645 if self.op.disks and self.op.disk_template is not None:
11646 raise errors.OpPrereqError("Disk template conversion and other disk"
11647 " changes not supported at the same time",
11648 errors.ECODE_INVAL)
11650 if (self.op.disk_template and
11651 self.op.disk_template in constants.DTS_INT_MIRROR and
11652 self.op.remote_node is None):
11653 raise errors.OpPrereqError("Changing the disk template to a mirrored"
11654 " one requires specifying a secondary node",
11655 errors.ECODE_INVAL)
11659 for nic_op, nic_dict in self.op.nics:
11660 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11661 if nic_op == constants.DDM_REMOVE:
11664 elif nic_op == constants.DDM_ADD:
11667 if not isinstance(nic_op, int):
11668 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11669 if not isinstance(nic_dict, dict):
11670 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11671 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11673 # nic_dict should be a dict
11674 nic_ip = nic_dict.get(constants.INIC_IP, None)
11675 if nic_ip is not None:
11676 if nic_ip.lower() == constants.VALUE_NONE:
11677 nic_dict[constants.INIC_IP] = None
11679 if not netutils.IPAddress.IsValid(nic_ip):
11680 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11681 errors.ECODE_INVAL)
11683 nic_bridge = nic_dict.get("bridge", None)
11684 nic_link = nic_dict.get(constants.INIC_LINK, None)
11685 if nic_bridge and nic_link:
11686 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11687 " at the same time", errors.ECODE_INVAL)
11688 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11689 nic_dict["bridge"] = None
11690 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11691 nic_dict[constants.INIC_LINK] = None
11693 if nic_op == constants.DDM_ADD:
11694 nic_mac = nic_dict.get(constants.INIC_MAC, None)
11695 if nic_mac is None:
11696 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11698 if constants.INIC_MAC in nic_dict:
11699 nic_mac = nic_dict[constants.INIC_MAC]
11700 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11701 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11703 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11704 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11705 " modifying an existing nic",
11706 errors.ECODE_INVAL)
11708 if nic_addremove > 1:
11709 raise errors.OpPrereqError("Only one NIC add or remove operation"
11710 " supported at a time", errors.ECODE_INVAL)
11712 def ExpandNames(self):
11713 self._ExpandAndLockInstance()
11714 # Can't even acquire node locks in shared mode as upcoming changes in
11715 # Ganeti 2.6 will start to modify the node object on disk conversion
11716 self.needed_locks[locking.LEVEL_NODE] = []
11717 self.needed_locks[locking.LEVEL_NODE_RES] = []
11718 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11720 def DeclareLocks(self, level):
11721 if level == locking.LEVEL_NODE:
11722 self._LockInstancesNodes()
11723 if self.op.disk_template and self.op.remote_node:
11724 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11725 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11726 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11728 self.needed_locks[locking.LEVEL_NODE_RES] = \
11729 self.needed_locks[locking.LEVEL_NODE][:]
11731 def BuildHooksEnv(self):
11732 """Build hooks env.
11734 This runs on the master, primary and secondaries.
11738 if constants.BE_MINMEM in self.be_new:
11739 args["minmem"] = self.be_new[constants.BE_MINMEM]
11740 if constants.BE_MAXMEM in self.be_new:
11741 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
11742 if constants.BE_VCPUS in self.be_new:
11743 args["vcpus"] = self.be_new[constants.BE_VCPUS]
11744 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11745 # information at all.
11748 nic_override = dict(self.op.nics)
11749 for idx, nic in enumerate(self.instance.nics):
11750 if idx in nic_override:
11751 this_nic_override = nic_override[idx]
11753 this_nic_override = {}
11754 if constants.INIC_IP in this_nic_override:
11755 ip = this_nic_override[constants.INIC_IP]
11758 if constants.INIC_MAC in this_nic_override:
11759 mac = this_nic_override[constants.INIC_MAC]
11762 if idx in self.nic_pnew:
11763 nicparams = self.nic_pnew[idx]
11765 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11766 mode = nicparams[constants.NIC_MODE]
11767 link = nicparams[constants.NIC_LINK]
11768 args["nics"].append((ip, mac, mode, link))
11769 if constants.DDM_ADD in nic_override:
11770 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11771 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11772 nicparams = self.nic_pnew[constants.DDM_ADD]
11773 mode = nicparams[constants.NIC_MODE]
11774 link = nicparams[constants.NIC_LINK]
11775 args["nics"].append((ip, mac, mode, link))
11776 elif constants.DDM_REMOVE in nic_override:
11777 del args["nics"][-1]
11779 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11780 if self.op.disk_template:
11781 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11782 if self.op.runtime_mem:
11783 env["RUNTIME_MEMORY"] = self.op.runtime_mem
11787 def BuildHooksNodes(self):
11788 """Build hooks nodes.
11791 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11794 def CheckPrereq(self):
11795 """Check prerequisites.
11797 This only checks the instance list against the existing names.
11800 # checking the new params on the primary/secondary nodes
11802 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11803 cluster = self.cluster = self.cfg.GetClusterInfo()
11804 assert self.instance is not None, \
11805 "Cannot retrieve locked instance %s" % self.op.instance_name
11806 pnode = instance.primary_node
11807 nodelist = list(instance.all_nodes)
11808 pnode_info = self.cfg.GetNodeInfo(pnode)
11809 self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
11812 if self.op.os_name and not self.op.force:
11813 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11814 self.op.force_variant)
11815 instance_os = self.op.os_name
11817 instance_os = instance.os
11819 if self.op.disk_template:
11820 if instance.disk_template == self.op.disk_template:
11821 raise errors.OpPrereqError("Instance already has disk template %s" %
11822 instance.disk_template, errors.ECODE_INVAL)
11824 if (instance.disk_template,
11825 self.op.disk_template) not in self._DISK_CONVERSIONS:
11826 raise errors.OpPrereqError("Unsupported disk template conversion from"
11827 " %s to %s" % (instance.disk_template,
11828 self.op.disk_template),
11829 errors.ECODE_INVAL)
11830 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11831 msg="cannot change disk template")
11832 if self.op.disk_template in constants.DTS_INT_MIRROR:
11833 if self.op.remote_node == pnode:
11834 raise errors.OpPrereqError("Given new secondary node %s is the same"
11835 " as the primary node of the instance" %
11836 self.op.remote_node, errors.ECODE_STATE)
11837 _CheckNodeOnline(self, self.op.remote_node)
11838 _CheckNodeNotDrained(self, self.op.remote_node)
11839 # FIXME: here we assume that the old instance type is DT_PLAIN
11840 assert instance.disk_template == constants.DT_PLAIN
11841 disks = [{constants.IDISK_SIZE: d.size,
11842 constants.IDISK_VG: d.logical_id[0]}
11843 for d in instance.disks]
11844 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11845 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11847 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
11848 snode_group = self.cfg.GetNodeGroup(snode_info.group)
11849 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
11850 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
11851 ignore=self.op.ignore_ipolicy)
11852 if pnode_info.group != snode_info.group:
11853 self.LogWarning("The primary and secondary nodes are in two"
11854 " different node groups; the disk parameters"
11855 " from the first disk's node group will be"
11858 # hvparams processing
11859 if self.op.hvparams:
11860 hv_type = instance.hypervisor
11861 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11862 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11863 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11866 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11867 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11868 self.hv_proposed = self.hv_new = hv_new # the new actual values
11869 self.hv_inst = i_hvdict # the new dict (without defaults)
11871 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11873 self.hv_new = self.hv_inst = {}
11875 # beparams processing
11876 if self.op.beparams:
11877 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11879 objects.UpgradeBeParams(i_bedict)
11880 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11881 be_new = cluster.SimpleFillBE(i_bedict)
11882 self.be_proposed = self.be_new = be_new # the new actual values
11883 self.be_inst = i_bedict # the new dict (without defaults)
11885 self.be_new = self.be_inst = {}
11886 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11887 be_old = cluster.FillBE(instance)
11889 # CPU param validation -- checking every time a paramtere is
11890 # changed to cover all cases where either CPU mask or vcpus have
11892 if (constants.BE_VCPUS in self.be_proposed and
11893 constants.HV_CPU_MASK in self.hv_proposed):
11895 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11896 # Verify mask is consistent with number of vCPUs. Can skip this
11897 # test if only 1 entry in the CPU mask, which means same mask
11898 # is applied to all vCPUs.
11899 if (len(cpu_list) > 1 and
11900 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11901 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11903 (self.be_proposed[constants.BE_VCPUS],
11904 self.hv_proposed[constants.HV_CPU_MASK]),
11905 errors.ECODE_INVAL)
11907 # Only perform this test if a new CPU mask is given
11908 if constants.HV_CPU_MASK in self.hv_new:
11909 # Calculate the largest CPU number requested
11910 max_requested_cpu = max(map(max, cpu_list))
11911 # Check that all of the instance's nodes have enough physical CPUs to
11912 # satisfy the requested CPU mask
11913 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11914 max_requested_cpu + 1, instance.hypervisor)
11916 # osparams processing
11917 if self.op.osparams:
11918 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11919 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11920 self.os_inst = i_osdict # the new dict (without defaults)
11926 #TODO(dynmem): do the appropriate check involving MINMEM
11927 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
11928 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
11929 mem_check_list = [pnode]
11930 if be_new[constants.BE_AUTO_BALANCE]:
11931 # either we changed auto_balance to yes or it was from before
11932 mem_check_list.extend(instance.secondary_nodes)
11933 instance_info = self.rpc.call_instance_info(pnode, instance.name,
11934 instance.hypervisor)
11935 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11936 [instance.hypervisor])
11937 pninfo = nodeinfo[pnode]
11938 msg = pninfo.fail_msg
11940 # Assume the primary node is unreachable and go ahead
11941 self.warn.append("Can't get info from primary node %s: %s" %
11944 (_, _, (pnhvinfo, )) = pninfo.payload
11945 if not isinstance(pnhvinfo.get("memory_free", None), int):
11946 self.warn.append("Node data from primary node %s doesn't contain"
11947 " free memory information" % pnode)
11948 elif instance_info.fail_msg:
11949 self.warn.append("Can't get instance runtime information: %s" %
11950 instance_info.fail_msg)
11952 if instance_info.payload:
11953 current_mem = int(instance_info.payload["memory"])
11955 # Assume instance not running
11956 # (there is a slight race condition here, but it's not very
11957 # probable, and we have no other way to check)
11958 # TODO: Describe race condition
11960 #TODO(dynmem): do the appropriate check involving MINMEM
11961 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
11962 pnhvinfo["memory_free"])
11964 raise errors.OpPrereqError("This change will prevent the instance"
11965 " from starting, due to %d MB of memory"
11966 " missing on its primary node" %
11968 errors.ECODE_NORES)
11970 if be_new[constants.BE_AUTO_BALANCE]:
11971 for node, nres in nodeinfo.items():
11972 if node not in instance.secondary_nodes:
11974 nres.Raise("Can't get info from secondary node %s" % node,
11975 prereq=True, ecode=errors.ECODE_STATE)
11976 (_, _, (nhvinfo, )) = nres.payload
11977 if not isinstance(nhvinfo.get("memory_free", None), int):
11978 raise errors.OpPrereqError("Secondary node %s didn't return free"
11979 " memory information" % node,
11980 errors.ECODE_STATE)
11981 #TODO(dynmem): do the appropriate check involving MINMEM
11982 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
11983 raise errors.OpPrereqError("This change will prevent the instance"
11984 " from failover to its secondary node"
11985 " %s, due to not enough memory" % node,
11986 errors.ECODE_STATE)
11988 if self.op.runtime_mem:
11989 remote_info = self.rpc.call_instance_info(instance.primary_node,
11991 instance.hypervisor)
11992 remote_info.Raise("Error checking node %s" % instance.primary_node)
11993 if not remote_info.payload: # not running already
11994 raise errors.OpPrereqError("Instance %s is not running" % instance.name,
11995 errors.ECODE_STATE)
11997 current_memory = remote_info.payload["memory"]
11998 if (not self.op.force and
11999 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12000 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12001 raise errors.OpPrereqError("Instance %s must have memory between %d"
12002 " and %d MB of memory unless --force is"
12003 " given" % (instance.name,
12004 self.be_proposed[constants.BE_MINMEM],
12005 self.be_proposed[constants.BE_MAXMEM]),
12006 errors.ECODE_INVAL)
12008 if self.op.runtime_mem > current_memory:
12009 _CheckNodeFreeMemory(self, instance.primary_node,
12010 "ballooning memory for instance %s" %
12012 self.op.memory - current_memory,
12013 instance.hypervisor)
12017 self.nic_pinst = {}
12018 for nic_op, nic_dict in self.op.nics:
12019 if nic_op == constants.DDM_REMOVE:
12020 if not instance.nics:
12021 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
12022 errors.ECODE_INVAL)
12024 if nic_op != constants.DDM_ADD:
12026 if not instance.nics:
12027 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
12028 " no NICs" % nic_op,
12029 errors.ECODE_INVAL)
12030 if nic_op < 0 or nic_op >= len(instance.nics):
12031 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
12033 (nic_op, len(instance.nics) - 1),
12034 errors.ECODE_INVAL)
12035 old_nic_params = instance.nics[nic_op].nicparams
12036 old_nic_ip = instance.nics[nic_op].ip
12038 old_nic_params = {}
12041 update_params_dict = dict([(key, nic_dict[key])
12042 for key in constants.NICS_PARAMETERS
12043 if key in nic_dict])
12045 if "bridge" in nic_dict:
12046 update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
12048 new_nic_params = _GetUpdatedParams(old_nic_params,
12049 update_params_dict)
12050 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
12051 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
12052 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
12053 self.nic_pinst[nic_op] = new_nic_params
12054 self.nic_pnew[nic_op] = new_filled_nic_params
12055 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
12057 if new_nic_mode == constants.NIC_MODE_BRIDGED:
12058 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
12059 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
12061 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
12063 self.warn.append(msg)
12065 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12066 if new_nic_mode == constants.NIC_MODE_ROUTED:
12067 if constants.INIC_IP in nic_dict:
12068 nic_ip = nic_dict[constants.INIC_IP]
12070 nic_ip = old_nic_ip
12072 raise errors.OpPrereqError("Cannot set the nic ip to None"
12073 " on a routed nic", errors.ECODE_INVAL)
12074 if constants.INIC_MAC in nic_dict:
12075 nic_mac = nic_dict[constants.INIC_MAC]
12076 if nic_mac is None:
12077 raise errors.OpPrereqError("Cannot set the nic mac to None",
12078 errors.ECODE_INVAL)
12079 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12080 # otherwise generate the mac
12081 nic_dict[constants.INIC_MAC] = \
12082 self.cfg.GenerateMAC(self.proc.GetECId())
12084 # or validate/reserve the current one
12086 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
12087 except errors.ReservationError:
12088 raise errors.OpPrereqError("MAC address %s already in use"
12089 " in cluster" % nic_mac,
12090 errors.ECODE_NOTUNIQUE)
12093 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12094 raise errors.OpPrereqError("Disk operations not supported for"
12095 " diskless instances",
12096 errors.ECODE_INVAL)
12097 for disk_op, _ in self.op.disks:
12098 if disk_op == constants.DDM_REMOVE:
12099 if len(instance.disks) == 1:
12100 raise errors.OpPrereqError("Cannot remove the last disk of"
12101 " an instance", errors.ECODE_INVAL)
12102 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12103 msg="cannot remove disks")
12105 if (disk_op == constants.DDM_ADD and
12106 len(instance.disks) >= constants.MAX_DISKS):
12107 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
12108 " add more" % constants.MAX_DISKS,
12109 errors.ECODE_STATE)
12110 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
12112 if disk_op < 0 or disk_op >= len(instance.disks):
12113 raise errors.OpPrereqError("Invalid disk index %s, valid values"
12115 (disk_op, len(instance.disks)),
12116 errors.ECODE_INVAL)
12118 # disabling the instance
12119 if self.op.offline_inst:
12120 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12121 msg="cannot change instance state to offline")
12123 # enabling the instance
12124 if self.op.online_inst:
12125 _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
12126 msg="cannot make instance go online")
12128 def _ConvertPlainToDrbd(self, feedback_fn):
12129 """Converts an instance from plain to drbd.
12132 feedback_fn("Converting template to drbd")
12133 instance = self.instance
12134 pnode = instance.primary_node
12135 snode = self.op.remote_node
12137 assert instance.disk_template == constants.DT_PLAIN
12139 # create a fake disk info for _GenerateDiskTemplate
12140 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12141 constants.IDISK_VG: d.logical_id[0]}
12142 for d in instance.disks]
12143 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12144 instance.name, pnode, [snode],
12145 disk_info, None, None, 0, feedback_fn,
12147 info = _GetInstanceInfoText(instance)
12148 feedback_fn("Creating aditional volumes...")
12149 # first, create the missing data and meta devices
12150 for disk in new_disks:
12151 # unfortunately this is... not too nice
12152 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12154 for child in disk.children:
12155 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12156 # at this stage, all new LVs have been created, we can rename the
12158 feedback_fn("Renaming original volumes...")
12159 rename_list = [(o, n.children[0].logical_id)
12160 for (o, n) in zip(instance.disks, new_disks)]
12161 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12162 result.Raise("Failed to rename original LVs")
12164 feedback_fn("Initializing DRBD devices...")
12165 # all child devices are in place, we can now create the DRBD devices
12166 for disk in new_disks:
12167 for node in [pnode, snode]:
12168 f_create = node == pnode
12169 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12171 # at this point, the instance has been modified
12172 instance.disk_template = constants.DT_DRBD8
12173 instance.disks = new_disks
12174 self.cfg.Update(instance, feedback_fn)
12176 # Release node locks while waiting for sync
12177 _ReleaseLocks(self, locking.LEVEL_NODE)
12179 # disks are created, waiting for sync
12180 disk_abort = not _WaitForSync(self, instance,
12181 oneshot=not self.op.wait_for_sync)
12183 raise errors.OpExecError("There are some degraded disks for"
12184 " this instance, please cleanup manually")
12186 # Node resource locks will be released by caller
12188 def _ConvertDrbdToPlain(self, feedback_fn):
12189 """Converts an instance from drbd to plain.
12192 instance = self.instance
12194 assert len(instance.secondary_nodes) == 1
12195 assert instance.disk_template == constants.DT_DRBD8
12197 pnode = instance.primary_node
12198 snode = instance.secondary_nodes[0]
12199 feedback_fn("Converting template to plain")
12201 old_disks = instance.disks
12202 new_disks = [d.children[0] for d in old_disks]
12204 # copy over size and mode
12205 for parent, child in zip(old_disks, new_disks):
12206 child.size = parent.size
12207 child.mode = parent.mode
12209 # update instance structure
12210 instance.disks = new_disks
12211 instance.disk_template = constants.DT_PLAIN
12212 self.cfg.Update(instance, feedback_fn)
12214 # Release locks in case removing disks takes a while
12215 _ReleaseLocks(self, locking.LEVEL_NODE)
12217 feedback_fn("Removing volumes on the secondary node...")
12218 for disk in old_disks:
12219 self.cfg.SetDiskID(disk, snode)
12220 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12222 self.LogWarning("Could not remove block device %s on node %s,"
12223 " continuing anyway: %s", disk.iv_name, snode, msg)
12225 feedback_fn("Removing unneeded volumes on the primary node...")
12226 for idx, disk in enumerate(old_disks):
12227 meta = disk.children[1]
12228 self.cfg.SetDiskID(meta, pnode)
12229 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12231 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12232 " continuing anyway: %s", idx, pnode, msg)
12234 # this is a DRBD disk, return its port to the pool
12235 for disk in old_disks:
12236 tcp_port = disk.logical_id[2]
12237 self.cfg.AddTcpUdpPort(tcp_port)
12239 # Node resource locks will be released by caller
12241 def Exec(self, feedback_fn):
12242 """Modifies an instance.
12244 All parameters take effect only at the next restart of the instance.
12247 # Process here the warnings from CheckPrereq, as we don't have a
12248 # feedback_fn there.
12249 for warn in self.warn:
12250 feedback_fn("WARNING: %s" % warn)
12252 assert ((self.op.disk_template is None) ^
12253 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12254 "Not owning any node resource locks"
12257 instance = self.instance
12260 if self.op.runtime_mem:
12261 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12263 self.op.runtime_mem)
12264 rpcres.Raise("Cannot modify instance runtime memory")
12265 result.append(("runtime_memory", self.op.runtime_mem))
12268 for disk_op, disk_dict in self.op.disks:
12269 if disk_op == constants.DDM_REMOVE:
12270 # remove the last disk
12271 device = instance.disks.pop()
12272 device_idx = len(instance.disks)
12273 for node, disk in device.ComputeNodeTree(instance.primary_node):
12274 self.cfg.SetDiskID(disk, node)
12275 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12277 self.LogWarning("Could not remove disk/%d on node %s: %s,"
12278 " continuing anyway", device_idx, node, msg)
12279 result.append(("disk/%d" % device_idx, "remove"))
12281 # if this is a DRBD disk, return its port to the pool
12282 if device.dev_type in constants.LDS_DRBD:
12283 tcp_port = device.logical_id[2]
12284 self.cfg.AddTcpUdpPort(tcp_port)
12285 elif disk_op == constants.DDM_ADD:
12287 if instance.disk_template in (constants.DT_FILE,
12288 constants.DT_SHARED_FILE):
12289 file_driver, file_path = instance.disks[0].logical_id
12290 file_path = os.path.dirname(file_path)
12292 file_driver = file_path = None
12293 disk_idx_base = len(instance.disks)
12294 new_disk = _GenerateDiskTemplate(self,
12295 instance.disk_template,
12296 instance.name, instance.primary_node,
12297 instance.secondary_nodes,
12303 self.diskparams)[0]
12304 instance.disks.append(new_disk)
12305 info = _GetInstanceInfoText(instance)
12307 logging.info("Creating volume %s for instance %s",
12308 new_disk.iv_name, instance.name)
12309 # Note: this needs to be kept in sync with _CreateDisks
12311 for node in instance.all_nodes:
12312 f_create = node == instance.primary_node
12314 _CreateBlockDev(self, node, instance, new_disk,
12315 f_create, info, f_create)
12316 except errors.OpExecError, err:
12317 self.LogWarning("Failed to create volume %s (%s) on"
12319 new_disk.iv_name, new_disk, node, err)
12320 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
12321 (new_disk.size, new_disk.mode)))
12323 # change a given disk
12324 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
12325 result.append(("disk.mode/%d" % disk_op,
12326 disk_dict[constants.IDISK_MODE]))
12328 if self.op.disk_template:
12330 check_nodes = set(instance.all_nodes)
12331 if self.op.remote_node:
12332 check_nodes.add(self.op.remote_node)
12333 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12334 owned = self.owned_locks(level)
12335 assert not (check_nodes - owned), \
12336 ("Not owning the correct locks, owning %r, expected at least %r" %
12337 (owned, check_nodes))
12339 r_shut = _ShutdownInstanceDisks(self, instance)
12341 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12342 " proceed with disk template conversion")
12343 mode = (instance.disk_template, self.op.disk_template)
12345 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12347 self.cfg.ReleaseDRBDMinors(instance.name)
12349 result.append(("disk_template", self.op.disk_template))
12351 assert instance.disk_template == self.op.disk_template, \
12352 ("Expected disk template '%s', found '%s'" %
12353 (self.op.disk_template, instance.disk_template))
12355 # Release node and resource locks if there are any (they might already have
12356 # been released during disk conversion)
12357 _ReleaseLocks(self, locking.LEVEL_NODE)
12358 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12361 for nic_op, nic_dict in self.op.nics:
12362 if nic_op == constants.DDM_REMOVE:
12363 # remove the last nic
12364 del instance.nics[-1]
12365 result.append(("nic.%d" % len(instance.nics), "remove"))
12366 elif nic_op == constants.DDM_ADD:
12367 # mac and bridge should be set, by now
12368 mac = nic_dict[constants.INIC_MAC]
12369 ip = nic_dict.get(constants.INIC_IP, None)
12370 nicparams = self.nic_pinst[constants.DDM_ADD]
12371 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
12372 instance.nics.append(new_nic)
12373 result.append(("nic.%d" % (len(instance.nics) - 1),
12374 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12375 (new_nic.mac, new_nic.ip,
12376 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
12377 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
12380 for key in (constants.INIC_MAC, constants.INIC_IP):
12381 if key in nic_dict:
12382 setattr(instance.nics[nic_op], key, nic_dict[key])
12383 if nic_op in self.nic_pinst:
12384 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
12385 for key, val in nic_dict.iteritems():
12386 result.append(("nic.%s/%d" % (key, nic_op), val))
12389 if self.op.hvparams:
12390 instance.hvparams = self.hv_inst
12391 for key, val in self.op.hvparams.iteritems():
12392 result.append(("hv/%s" % key, val))
12395 if self.op.beparams:
12396 instance.beparams = self.be_inst
12397 for key, val in self.op.beparams.iteritems():
12398 result.append(("be/%s" % key, val))
12401 if self.op.os_name:
12402 instance.os = self.op.os_name
12405 if self.op.osparams:
12406 instance.osparams = self.os_inst
12407 for key, val in self.op.osparams.iteritems():
12408 result.append(("os/%s" % key, val))
12410 # online/offline instance
12411 if self.op.online_inst:
12412 self.cfg.MarkInstanceDown(instance.name)
12413 result.append(("admin_state", constants.ADMINST_DOWN))
12414 if self.op.offline_inst:
12415 self.cfg.MarkInstanceOffline(instance.name)
12416 result.append(("admin_state", constants.ADMINST_OFFLINE))
12418 self.cfg.Update(instance, feedback_fn)
12420 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12421 self.owned_locks(locking.LEVEL_NODE)), \
12422 "All node locks should have been released by now"
12426 _DISK_CONVERSIONS = {
12427 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12428 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12432 class LUInstanceChangeGroup(LogicalUnit):
12433 HPATH = "instance-change-group"
12434 HTYPE = constants.HTYPE_INSTANCE
12437 def ExpandNames(self):
12438 self.share_locks = _ShareAll()
12439 self.needed_locks = {
12440 locking.LEVEL_NODEGROUP: [],
12441 locking.LEVEL_NODE: [],
12444 self._ExpandAndLockInstance()
12446 if self.op.target_groups:
12447 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12448 self.op.target_groups)
12450 self.req_target_uuids = None
12452 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12454 def DeclareLocks(self, level):
12455 if level == locking.LEVEL_NODEGROUP:
12456 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12458 if self.req_target_uuids:
12459 lock_groups = set(self.req_target_uuids)
12461 # Lock all groups used by instance optimistically; this requires going
12462 # via the node before it's locked, requiring verification later on
12463 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12464 lock_groups.update(instance_groups)
12466 # No target groups, need to lock all of them
12467 lock_groups = locking.ALL_SET
12469 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12471 elif level == locking.LEVEL_NODE:
12472 if self.req_target_uuids:
12473 # Lock all nodes used by instances
12474 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12475 self._LockInstancesNodes()
12477 # Lock all nodes in all potential target groups
12478 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12479 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12480 member_nodes = [node_name
12481 for group in lock_groups
12482 for node_name in self.cfg.GetNodeGroup(group).members]
12483 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12485 # Lock all nodes as all groups are potential targets
12486 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12488 def CheckPrereq(self):
12489 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12490 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12491 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12493 assert (self.req_target_uuids is None or
12494 owned_groups.issuperset(self.req_target_uuids))
12495 assert owned_instances == set([self.op.instance_name])
12497 # Get instance information
12498 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12500 # Check if node groups for locked instance are still correct
12501 assert owned_nodes.issuperset(self.instance.all_nodes), \
12502 ("Instance %s's nodes changed while we kept the lock" %
12503 self.op.instance_name)
12505 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12508 if self.req_target_uuids:
12509 # User requested specific target groups
12510 self.target_uuids = self.req_target_uuids
12512 # All groups except those used by the instance are potential targets
12513 self.target_uuids = owned_groups - inst_groups
12515 conflicting_groups = self.target_uuids & inst_groups
12516 if conflicting_groups:
12517 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12518 " used by the instance '%s'" %
12519 (utils.CommaJoin(conflicting_groups),
12520 self.op.instance_name),
12521 errors.ECODE_INVAL)
12523 if not self.target_uuids:
12524 raise errors.OpPrereqError("There are no possible target groups",
12525 errors.ECODE_INVAL)
12527 def BuildHooksEnv(self):
12528 """Build hooks env.
12531 assert self.target_uuids
12534 "TARGET_GROUPS": " ".join(self.target_uuids),
12537 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12541 def BuildHooksNodes(self):
12542 """Build hooks nodes.
12545 mn = self.cfg.GetMasterNode()
12546 return ([mn], [mn])
12548 def Exec(self, feedback_fn):
12549 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12551 assert instances == [self.op.instance_name], "Instance not locked"
12553 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12554 instances=instances, target_groups=list(self.target_uuids))
12556 ial.Run(self.op.iallocator)
12558 if not ial.success:
12559 raise errors.OpPrereqError("Can't compute solution for changing group of"
12560 " instance '%s' using iallocator '%s': %s" %
12561 (self.op.instance_name, self.op.iallocator,
12563 errors.ECODE_NORES)
12565 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12567 self.LogInfo("Iallocator returned %s job(s) for changing group of"
12568 " instance '%s'", len(jobs), self.op.instance_name)
12570 return ResultWithJobs(jobs)
12573 class LUBackupQuery(NoHooksLU):
12574 """Query the exports list
12579 def ExpandNames(self):
12580 self.needed_locks = {}
12581 self.share_locks[locking.LEVEL_NODE] = 1
12582 if not self.op.nodes:
12583 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12585 self.needed_locks[locking.LEVEL_NODE] = \
12586 _GetWantedNodes(self, self.op.nodes)
12588 def Exec(self, feedback_fn):
12589 """Compute the list of all the exported system images.
12592 @return: a dictionary with the structure node->(export-list)
12593 where export-list is a list of the instances exported on
12597 self.nodes = self.owned_locks(locking.LEVEL_NODE)
12598 rpcresult = self.rpc.call_export_list(self.nodes)
12600 for node in rpcresult:
12601 if rpcresult[node].fail_msg:
12602 result[node] = False
12604 result[node] = rpcresult[node].payload
12609 class LUBackupPrepare(NoHooksLU):
12610 """Prepares an instance for an export and returns useful information.
12615 def ExpandNames(self):
12616 self._ExpandAndLockInstance()
12618 def CheckPrereq(self):
12619 """Check prerequisites.
12622 instance_name = self.op.instance_name
12624 self.instance = self.cfg.GetInstanceInfo(instance_name)
12625 assert self.instance is not None, \
12626 "Cannot retrieve locked instance %s" % self.op.instance_name
12627 _CheckNodeOnline(self, self.instance.primary_node)
12629 self._cds = _GetClusterDomainSecret()
12631 def Exec(self, feedback_fn):
12632 """Prepares an instance for an export.
12635 instance = self.instance
12637 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12638 salt = utils.GenerateSecret(8)
12640 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
12641 result = self.rpc.call_x509_cert_create(instance.primary_node,
12642 constants.RIE_CERT_VALIDITY)
12643 result.Raise("Can't create X509 key and certificate on %s" % result.node)
12645 (name, cert_pem) = result.payload
12647 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
12651 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
12652 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
12654 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
12660 class LUBackupExport(LogicalUnit):
12661 """Export an instance to an image in the cluster.
12664 HPATH = "instance-export"
12665 HTYPE = constants.HTYPE_INSTANCE
12668 def CheckArguments(self):
12669 """Check the arguments.
12672 self.x509_key_name = self.op.x509_key_name
12673 self.dest_x509_ca_pem = self.op.destination_x509_ca
12675 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12676 if not self.x509_key_name:
12677 raise errors.OpPrereqError("Missing X509 key name for encryption",
12678 errors.ECODE_INVAL)
12680 if not self.dest_x509_ca_pem:
12681 raise errors.OpPrereqError("Missing destination X509 CA",
12682 errors.ECODE_INVAL)
12684 def ExpandNames(self):
12685 self._ExpandAndLockInstance()
12687 # Lock all nodes for local exports
12688 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12689 # FIXME: lock only instance primary and destination node
12691 # Sad but true, for now we have do lock all nodes, as we don't know where
12692 # the previous export might be, and in this LU we search for it and
12693 # remove it from its current node. In the future we could fix this by:
12694 # - making a tasklet to search (share-lock all), then create the
12695 # new one, then one to remove, after
12696 # - removing the removal operation altogether
12697 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12699 def DeclareLocks(self, level):
12700 """Last minute lock declaration."""
12701 # All nodes are locked anyway, so nothing to do here.
12703 def BuildHooksEnv(self):
12704 """Build hooks env.
12706 This will run on the master, primary node and target node.
12710 "EXPORT_MODE": self.op.mode,
12711 "EXPORT_NODE": self.op.target_node,
12712 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12713 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12714 # TODO: Generic function for boolean env variables
12715 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12718 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12722 def BuildHooksNodes(self):
12723 """Build hooks nodes.
12726 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12728 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12729 nl.append(self.op.target_node)
12733 def CheckPrereq(self):
12734 """Check prerequisites.
12736 This checks that the instance and node names are valid.
12739 instance_name = self.op.instance_name
12741 self.instance = self.cfg.GetInstanceInfo(instance_name)
12742 assert self.instance is not None, \
12743 "Cannot retrieve locked instance %s" % self.op.instance_name
12744 _CheckNodeOnline(self, self.instance.primary_node)
12746 if (self.op.remove_instance and
12747 self.instance.admin_state == constants.ADMINST_UP and
12748 not self.op.shutdown):
12749 raise errors.OpPrereqError("Can not remove instance without shutting it"
12752 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12753 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12754 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12755 assert self.dst_node is not None
12757 _CheckNodeOnline(self, self.dst_node.name)
12758 _CheckNodeNotDrained(self, self.dst_node.name)
12761 self.dest_disk_info = None
12762 self.dest_x509_ca = None
12764 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12765 self.dst_node = None
12767 if len(self.op.target_node) != len(self.instance.disks):
12768 raise errors.OpPrereqError(("Received destination information for %s"
12769 " disks, but instance %s has %s disks") %
12770 (len(self.op.target_node), instance_name,
12771 len(self.instance.disks)),
12772 errors.ECODE_INVAL)
12774 cds = _GetClusterDomainSecret()
12776 # Check X509 key name
12778 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12779 except (TypeError, ValueError), err:
12780 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12782 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12783 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12784 errors.ECODE_INVAL)
12786 # Load and verify CA
12788 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
12789 except OpenSSL.crypto.Error, err:
12790 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
12791 (err, ), errors.ECODE_INVAL)
12793 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
12794 if errcode is not None:
12795 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
12796 (msg, ), errors.ECODE_INVAL)
12798 self.dest_x509_ca = cert
12800 # Verify target information
12802 for idx, disk_data in enumerate(self.op.target_node):
12804 (host, port, magic) = \
12805 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
12806 except errors.GenericError, err:
12807 raise errors.OpPrereqError("Target info for disk %s: %s" %
12808 (idx, err), errors.ECODE_INVAL)
12810 disk_info.append((host, port, magic))
12812 assert len(disk_info) == len(self.op.target_node)
12813 self.dest_disk_info = disk_info
12816 raise errors.ProgrammerError("Unhandled export mode %r" %
12819 # instance disk type verification
12820 # TODO: Implement export support for file-based disks
12821 for disk in self.instance.disks:
12822 if disk.dev_type == constants.LD_FILE:
12823 raise errors.OpPrereqError("Export not supported for instances with"
12824 " file-based disks", errors.ECODE_INVAL)
12826 def _CleanupExports(self, feedback_fn):
12827 """Removes exports of current instance from all other nodes.
12829 If an instance in a cluster with nodes A..D was exported to node C, its
12830 exports will be removed from the nodes A, B and D.
12833 assert self.op.mode != constants.EXPORT_MODE_REMOTE
12835 nodelist = self.cfg.GetNodeList()
12836 nodelist.remove(self.dst_node.name)
12838 # on one-node clusters nodelist will be empty after the removal
12839 # if we proceed the backup would be removed because OpBackupQuery
12840 # substitutes an empty list with the full cluster node list.
12841 iname = self.instance.name
12843 feedback_fn("Removing old exports for instance %s" % iname)
12844 exportlist = self.rpc.call_export_list(nodelist)
12845 for node in exportlist:
12846 if exportlist[node].fail_msg:
12848 if iname in exportlist[node].payload:
12849 msg = self.rpc.call_export_remove(node, iname).fail_msg
12851 self.LogWarning("Could not remove older export for instance %s"
12852 " on node %s: %s", iname, node, msg)
12854 def Exec(self, feedback_fn):
12855 """Export an instance to an image in the cluster.
12858 assert self.op.mode in constants.EXPORT_MODES
12860 instance = self.instance
12861 src_node = instance.primary_node
12863 if self.op.shutdown:
12864 # shutdown the instance, but not the disks
12865 feedback_fn("Shutting down instance %s" % instance.name)
12866 result = self.rpc.call_instance_shutdown(src_node, instance,
12867 self.op.shutdown_timeout)
12868 # TODO: Maybe ignore failures if ignore_remove_failures is set
12869 result.Raise("Could not shutdown instance %s on"
12870 " node %s" % (instance.name, src_node))
12872 # set the disks ID correctly since call_instance_start needs the
12873 # correct drbd minor to create the symlinks
12874 for disk in instance.disks:
12875 self.cfg.SetDiskID(disk, src_node)
12877 activate_disks = (instance.admin_state != constants.ADMINST_UP)
12880 # Activate the instance disks if we'exporting a stopped instance
12881 feedback_fn("Activating disks for %s" % instance.name)
12882 _StartInstanceDisks(self, instance, None)
12885 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
12888 helper.CreateSnapshots()
12890 if (self.op.shutdown and
12891 instance.admin_state == constants.ADMINST_UP and
12892 not self.op.remove_instance):
12893 assert not activate_disks
12894 feedback_fn("Starting instance %s" % instance.name)
12895 result = self.rpc.call_instance_start(src_node,
12896 (instance, None, None), False)
12897 msg = result.fail_msg
12899 feedback_fn("Failed to start instance: %s" % msg)
12900 _ShutdownInstanceDisks(self, instance)
12901 raise errors.OpExecError("Could not start instance: %s" % msg)
12903 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12904 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
12905 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12906 connect_timeout = constants.RIE_CONNECT_TIMEOUT
12907 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
12909 (key_name, _, _) = self.x509_key_name
12912 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
12915 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
12916 key_name, dest_ca_pem,
12921 # Check for backwards compatibility
12922 assert len(dresults) == len(instance.disks)
12923 assert compat.all(isinstance(i, bool) for i in dresults), \
12924 "Not all results are boolean: %r" % dresults
12928 feedback_fn("Deactivating disks for %s" % instance.name)
12929 _ShutdownInstanceDisks(self, instance)
12931 if not (compat.all(dresults) and fin_resu):
12934 failures.append("export finalization")
12935 if not compat.all(dresults):
12936 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12938 failures.append("disk export: disk(s) %s" % fdsk)
12940 raise errors.OpExecError("Export failed, errors in %s" %
12941 utils.CommaJoin(failures))
12943 # At this point, the export was successful, we can cleanup/finish
12945 # Remove instance if requested
12946 if self.op.remove_instance:
12947 feedback_fn("Removing instance %s" % instance.name)
12948 _RemoveInstance(self, feedback_fn, instance,
12949 self.op.ignore_remove_failures)
12951 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12952 self._CleanupExports(feedback_fn)
12954 return fin_resu, dresults
12957 class LUBackupRemove(NoHooksLU):
12958 """Remove exports related to the named instance.
12963 def ExpandNames(self):
12964 self.needed_locks = {}
12965 # We need all nodes to be locked in order for RemoveExport to work, but we
12966 # don't need to lock the instance itself, as nothing will happen to it (and
12967 # we can remove exports also for a removed instance)
12968 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12970 def Exec(self, feedback_fn):
12971 """Remove any export.
12974 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12975 # If the instance was not found we'll try with the name that was passed in.
12976 # This will only work if it was an FQDN, though.
12978 if not instance_name:
12980 instance_name = self.op.instance_name
12982 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12983 exportlist = self.rpc.call_export_list(locked_nodes)
12985 for node in exportlist:
12986 msg = exportlist[node].fail_msg
12988 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12990 if instance_name in exportlist[node].payload:
12992 result = self.rpc.call_export_remove(node, instance_name)
12993 msg = result.fail_msg
12995 logging.error("Could not remove export for instance %s"
12996 " on node %s: %s", instance_name, node, msg)
12998 if fqdn_warn and not found:
12999 feedback_fn("Export not found. If trying to remove an export belonging"
13000 " to a deleted instance please use its Fully Qualified"
13004 class LUGroupAdd(LogicalUnit):
13005 """Logical unit for creating node groups.
13008 HPATH = "group-add"
13009 HTYPE = constants.HTYPE_GROUP
13012 def ExpandNames(self):
13013 # We need the new group's UUID here so that we can create and acquire the
13014 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13015 # that it should not check whether the UUID exists in the configuration.
13016 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13017 self.needed_locks = {}
13018 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13020 def CheckPrereq(self):
13021 """Check prerequisites.
13023 This checks that the given group name is not an existing node group
13028 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13029 except errors.OpPrereqError:
13032 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13033 " node group (UUID: %s)" %
13034 (self.op.group_name, existing_uuid),
13035 errors.ECODE_EXISTS)
13037 if self.op.ndparams:
13038 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13040 if self.op.hv_state:
13041 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13043 self.new_hv_state = None
13045 if self.op.disk_state:
13046 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13048 self.new_disk_state = None
13050 if self.op.diskparams:
13051 for templ in constants.DISK_TEMPLATES:
13052 if templ not in self.op.diskparams:
13053 self.op.diskparams[templ] = {}
13054 utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
13056 self.op.diskparams = self.cfg.GetClusterInfo().diskparams
13058 if self.op.ipolicy:
13059 cluster = self.cfg.GetClusterInfo()
13060 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13062 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
13063 except errors.ConfigurationError, err:
13064 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13065 errors.ECODE_INVAL)
13067 def BuildHooksEnv(self):
13068 """Build hooks env.
13072 "GROUP_NAME": self.op.group_name,
13075 def BuildHooksNodes(self):
13076 """Build hooks nodes.
13079 mn = self.cfg.GetMasterNode()
13080 return ([mn], [mn])
13082 def Exec(self, feedback_fn):
13083 """Add the node group to the cluster.
13086 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13087 uuid=self.group_uuid,
13088 alloc_policy=self.op.alloc_policy,
13089 ndparams=self.op.ndparams,
13090 diskparams=self.op.diskparams,
13091 ipolicy=self.op.ipolicy,
13092 hv_state_static=self.new_hv_state,
13093 disk_state_static=self.new_disk_state)
13095 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13096 del self.remove_locks[locking.LEVEL_NODEGROUP]
13099 class LUGroupAssignNodes(NoHooksLU):
13100 """Logical unit for assigning nodes to groups.
13105 def ExpandNames(self):
13106 # These raise errors.OpPrereqError on their own:
13107 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13108 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13110 # We want to lock all the affected nodes and groups. We have readily
13111 # available the list of nodes, and the *destination* group. To gather the
13112 # list of "source" groups, we need to fetch node information later on.
13113 self.needed_locks = {
13114 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13115 locking.LEVEL_NODE: self.op.nodes,
13118 def DeclareLocks(self, level):
13119 if level == locking.LEVEL_NODEGROUP:
13120 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13122 # Try to get all affected nodes' groups without having the group or node
13123 # lock yet. Needs verification later in the code flow.
13124 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13126 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13128 def CheckPrereq(self):
13129 """Check prerequisites.
13132 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13133 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13134 frozenset(self.op.nodes))
13136 expected_locks = (set([self.group_uuid]) |
13137 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13138 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13139 if actual_locks != expected_locks:
13140 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13141 " current groups are '%s', used to be '%s'" %
13142 (utils.CommaJoin(expected_locks),
13143 utils.CommaJoin(actual_locks)))
13145 self.node_data = self.cfg.GetAllNodesInfo()
13146 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13147 instance_data = self.cfg.GetAllInstancesInfo()
13149 if self.group is None:
13150 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13151 (self.op.group_name, self.group_uuid))
13153 (new_splits, previous_splits) = \
13154 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13155 for node in self.op.nodes],
13156 self.node_data, instance_data)
13159 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13161 if not self.op.force:
13162 raise errors.OpExecError("The following instances get split by this"
13163 " change and --force was not given: %s" %
13166 self.LogWarning("This operation will split the following instances: %s",
13169 if previous_splits:
13170 self.LogWarning("In addition, these already-split instances continue"
13171 " to be split across groups: %s",
13172 utils.CommaJoin(utils.NiceSort(previous_splits)))
13174 def Exec(self, feedback_fn):
13175 """Assign nodes to a new group.
13178 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13180 self.cfg.AssignGroupNodes(mods)
13183 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13184 """Check for split instances after a node assignment.
13186 This method considers a series of node assignments as an atomic operation,
13187 and returns information about split instances after applying the set of
13190 In particular, it returns information about newly split instances, and
13191 instances that were already split, and remain so after the change.
13193 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13196 @type changes: list of (node_name, new_group_uuid) pairs.
13197 @param changes: list of node assignments to consider.
13198 @param node_data: a dict with data for all nodes
13199 @param instance_data: a dict with all instances to consider
13200 @rtype: a two-tuple
13201 @return: a list of instances that were previously okay and result split as a
13202 consequence of this change, and a list of instances that were previously
13203 split and this change does not fix.
13206 changed_nodes = dict((node, group) for node, group in changes
13207 if node_data[node].group != group)
13209 all_split_instances = set()
13210 previously_split_instances = set()
13212 def InstanceNodes(instance):
13213 return [instance.primary_node] + list(instance.secondary_nodes)
13215 for inst in instance_data.values():
13216 if inst.disk_template not in constants.DTS_INT_MIRROR:
13219 instance_nodes = InstanceNodes(inst)
13221 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13222 previously_split_instances.add(inst.name)
13224 if len(set(changed_nodes.get(node, node_data[node].group)
13225 for node in instance_nodes)) > 1:
13226 all_split_instances.add(inst.name)
13228 return (list(all_split_instances - previously_split_instances),
13229 list(previously_split_instances & all_split_instances))
13232 class _GroupQuery(_QueryBase):
13233 FIELDS = query.GROUP_FIELDS
13235 def ExpandNames(self, lu):
13236 lu.needed_locks = {}
13238 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13239 self._cluster = lu.cfg.GetClusterInfo()
13240 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13243 self.wanted = [name_to_uuid[name]
13244 for name in utils.NiceSort(name_to_uuid.keys())]
13246 # Accept names to be either names or UUIDs.
13249 all_uuid = frozenset(self._all_groups.keys())
13251 for name in self.names:
13252 if name in all_uuid:
13253 self.wanted.append(name)
13254 elif name in name_to_uuid:
13255 self.wanted.append(name_to_uuid[name])
13257 missing.append(name)
13260 raise errors.OpPrereqError("Some groups do not exist: %s" %
13261 utils.CommaJoin(missing),
13262 errors.ECODE_NOENT)
13264 def DeclareLocks(self, lu, level):
13267 def _GetQueryData(self, lu):
13268 """Computes the list of node groups and their attributes.
13271 do_nodes = query.GQ_NODE in self.requested_data
13272 do_instances = query.GQ_INST in self.requested_data
13274 group_to_nodes = None
13275 group_to_instances = None
13277 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13278 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13279 # latter GetAllInstancesInfo() is not enough, for we have to go through
13280 # instance->node. Hence, we will need to process nodes even if we only need
13281 # instance information.
13282 if do_nodes or do_instances:
13283 all_nodes = lu.cfg.GetAllNodesInfo()
13284 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13287 for node in all_nodes.values():
13288 if node.group in group_to_nodes:
13289 group_to_nodes[node.group].append(node.name)
13290 node_to_group[node.name] = node.group
13293 all_instances = lu.cfg.GetAllInstancesInfo()
13294 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13296 for instance in all_instances.values():
13297 node = instance.primary_node
13298 if node in node_to_group:
13299 group_to_instances[node_to_group[node]].append(instance.name)
13302 # Do not pass on node information if it was not requested.
13303 group_to_nodes = None
13305 return query.GroupQueryData(self._cluster,
13306 [self._all_groups[uuid]
13307 for uuid in self.wanted],
13308 group_to_nodes, group_to_instances)
13311 class LUGroupQuery(NoHooksLU):
13312 """Logical unit for querying node groups.
13317 def CheckArguments(self):
13318 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13319 self.op.output_fields, False)
13321 def ExpandNames(self):
13322 self.gq.ExpandNames(self)
13324 def DeclareLocks(self, level):
13325 self.gq.DeclareLocks(self, level)
13327 def Exec(self, feedback_fn):
13328 return self.gq.OldStyleQuery(self)
13331 class LUGroupSetParams(LogicalUnit):
13332 """Modifies the parameters of a node group.
13335 HPATH = "group-modify"
13336 HTYPE = constants.HTYPE_GROUP
13339 def CheckArguments(self):
13342 self.op.diskparams,
13343 self.op.alloc_policy,
13345 self.op.disk_state,
13349 if all_changes.count(None) == len(all_changes):
13350 raise errors.OpPrereqError("Please pass at least one modification",
13351 errors.ECODE_INVAL)
13353 def ExpandNames(self):
13354 # This raises errors.OpPrereqError on its own:
13355 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13357 self.needed_locks = {
13358 locking.LEVEL_NODEGROUP: [self.group_uuid],
13361 def CheckPrereq(self):
13362 """Check prerequisites.
13365 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13367 if self.group is None:
13368 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13369 (self.op.group_name, self.group_uuid))
13371 if self.op.ndparams:
13372 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13373 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13374 self.new_ndparams = new_ndparams
13376 if self.op.diskparams:
13377 self.new_diskparams = dict()
13378 for templ in constants.DISK_TEMPLATES:
13379 if templ not in self.op.diskparams:
13380 self.op.diskparams[templ] = {}
13381 new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13382 self.op.diskparams[templ])
13383 utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13384 self.new_diskparams[templ] = new_templ_params
13386 if self.op.hv_state:
13387 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13388 self.group.hv_state_static)
13390 if self.op.disk_state:
13391 self.new_disk_state = \
13392 _MergeAndVerifyDiskState(self.op.disk_state,
13393 self.group.disk_state_static)
13395 if self.op.ipolicy:
13396 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
13400 def BuildHooksEnv(self):
13401 """Build hooks env.
13405 "GROUP_NAME": self.op.group_name,
13406 "NEW_ALLOC_POLICY": self.op.alloc_policy,
13409 def BuildHooksNodes(self):
13410 """Build hooks nodes.
13413 mn = self.cfg.GetMasterNode()
13414 return ([mn], [mn])
13416 def Exec(self, feedback_fn):
13417 """Modifies the node group.
13422 if self.op.ndparams:
13423 self.group.ndparams = self.new_ndparams
13424 result.append(("ndparams", str(self.group.ndparams)))
13426 if self.op.diskparams:
13427 self.group.diskparams = self.new_diskparams
13428 result.append(("diskparams", str(self.group.diskparams)))
13430 if self.op.alloc_policy:
13431 self.group.alloc_policy = self.op.alloc_policy
13433 if self.op.hv_state:
13434 self.group.hv_state_static = self.new_hv_state
13436 if self.op.disk_state:
13437 self.group.disk_state_static = self.new_disk_state
13439 if self.op.ipolicy:
13440 self.group.ipolicy = self.new_ipolicy
13442 self.cfg.Update(self.group, feedback_fn)
13446 class LUGroupRemove(LogicalUnit):
13447 HPATH = "group-remove"
13448 HTYPE = constants.HTYPE_GROUP
13451 def ExpandNames(self):
13452 # This will raises errors.OpPrereqError on its own:
13453 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13454 self.needed_locks = {
13455 locking.LEVEL_NODEGROUP: [self.group_uuid],
13458 def CheckPrereq(self):
13459 """Check prerequisites.
13461 This checks that the given group name exists as a node group, that is
13462 empty (i.e., contains no nodes), and that is not the last group of the
13466 # Verify that the group is empty.
13467 group_nodes = [node.name
13468 for node in self.cfg.GetAllNodesInfo().values()
13469 if node.group == self.group_uuid]
13472 raise errors.OpPrereqError("Group '%s' not empty, has the following"
13474 (self.op.group_name,
13475 utils.CommaJoin(utils.NiceSort(group_nodes))),
13476 errors.ECODE_STATE)
13478 # Verify the cluster would not be left group-less.
13479 if len(self.cfg.GetNodeGroupList()) == 1:
13480 raise errors.OpPrereqError("Group '%s' is the only group,"
13481 " cannot be removed" %
13482 self.op.group_name,
13483 errors.ECODE_STATE)
13485 def BuildHooksEnv(self):
13486 """Build hooks env.
13490 "GROUP_NAME": self.op.group_name,
13493 def BuildHooksNodes(self):
13494 """Build hooks nodes.
13497 mn = self.cfg.GetMasterNode()
13498 return ([mn], [mn])
13500 def Exec(self, feedback_fn):
13501 """Remove the node group.
13505 self.cfg.RemoveNodeGroup(self.group_uuid)
13506 except errors.ConfigurationError:
13507 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13508 (self.op.group_name, self.group_uuid))
13510 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13513 class LUGroupRename(LogicalUnit):
13514 HPATH = "group-rename"
13515 HTYPE = constants.HTYPE_GROUP
13518 def ExpandNames(self):
13519 # This raises errors.OpPrereqError on its own:
13520 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13522 self.needed_locks = {
13523 locking.LEVEL_NODEGROUP: [self.group_uuid],
13526 def CheckPrereq(self):
13527 """Check prerequisites.
13529 Ensures requested new name is not yet used.
13533 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13534 except errors.OpPrereqError:
13537 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13538 " node group (UUID: %s)" %
13539 (self.op.new_name, new_name_uuid),
13540 errors.ECODE_EXISTS)
13542 def BuildHooksEnv(self):
13543 """Build hooks env.
13547 "OLD_NAME": self.op.group_name,
13548 "NEW_NAME": self.op.new_name,
13551 def BuildHooksNodes(self):
13552 """Build hooks nodes.
13555 mn = self.cfg.GetMasterNode()
13557 all_nodes = self.cfg.GetAllNodesInfo()
13558 all_nodes.pop(mn, None)
13561 run_nodes.extend(node.name for node in all_nodes.values()
13562 if node.group == self.group_uuid)
13564 return (run_nodes, run_nodes)
13566 def Exec(self, feedback_fn):
13567 """Rename the node group.
13570 group = self.cfg.GetNodeGroup(self.group_uuid)
13573 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13574 (self.op.group_name, self.group_uuid))
13576 group.name = self.op.new_name
13577 self.cfg.Update(group, feedback_fn)
13579 return self.op.new_name
13582 class LUGroupEvacuate(LogicalUnit):
13583 HPATH = "group-evacuate"
13584 HTYPE = constants.HTYPE_GROUP
13587 def ExpandNames(self):
13588 # This raises errors.OpPrereqError on its own:
13589 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13591 if self.op.target_groups:
13592 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13593 self.op.target_groups)
13595 self.req_target_uuids = []
13597 if self.group_uuid in self.req_target_uuids:
13598 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
13599 " as a target group (targets are %s)" %
13601 utils.CommaJoin(self.req_target_uuids)),
13602 errors.ECODE_INVAL)
13604 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13606 self.share_locks = _ShareAll()
13607 self.needed_locks = {
13608 locking.LEVEL_INSTANCE: [],
13609 locking.LEVEL_NODEGROUP: [],
13610 locking.LEVEL_NODE: [],
13613 def DeclareLocks(self, level):
13614 if level == locking.LEVEL_INSTANCE:
13615 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13617 # Lock instances optimistically, needs verification once node and group
13618 # locks have been acquired
13619 self.needed_locks[locking.LEVEL_INSTANCE] = \
13620 self.cfg.GetNodeGroupInstances(self.group_uuid)
13622 elif level == locking.LEVEL_NODEGROUP:
13623 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13625 if self.req_target_uuids:
13626 lock_groups = set([self.group_uuid] + self.req_target_uuids)
13628 # Lock all groups used by instances optimistically; this requires going
13629 # via the node before it's locked, requiring verification later on
13630 lock_groups.update(group_uuid
13631 for instance_name in
13632 self.owned_locks(locking.LEVEL_INSTANCE)
13634 self.cfg.GetInstanceNodeGroups(instance_name))
13636 # No target groups, need to lock all of them
13637 lock_groups = locking.ALL_SET
13639 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13641 elif level == locking.LEVEL_NODE:
13642 # This will only lock the nodes in the group to be evacuated which
13643 # contain actual instances
13644 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13645 self._LockInstancesNodes()
13647 # Lock all nodes in group to be evacuated and target groups
13648 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13649 assert self.group_uuid in owned_groups
13650 member_nodes = [node_name
13651 for group in owned_groups
13652 for node_name in self.cfg.GetNodeGroup(group).members]
13653 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13655 def CheckPrereq(self):
13656 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13657 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13658 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13660 assert owned_groups.issuperset(self.req_target_uuids)
13661 assert self.group_uuid in owned_groups
13663 # Check if locked instances are still correct
13664 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13666 # Get instance information
13667 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
13669 # Check if node groups for locked instances are still correct
13670 for instance_name in owned_instances:
13671 inst = self.instances[instance_name]
13672 assert owned_nodes.issuperset(inst.all_nodes), \
13673 "Instance %s's nodes changed while we kept the lock" % instance_name
13675 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
13678 assert self.group_uuid in inst_groups, \
13679 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
13681 if self.req_target_uuids:
13682 # User requested specific target groups
13683 self.target_uuids = self.req_target_uuids
13685 # All groups except the one to be evacuated are potential targets
13686 self.target_uuids = [group_uuid for group_uuid in owned_groups
13687 if group_uuid != self.group_uuid]
13689 if not self.target_uuids:
13690 raise errors.OpPrereqError("There are no possible target groups",
13691 errors.ECODE_INVAL)
13693 def BuildHooksEnv(self):
13694 """Build hooks env.
13698 "GROUP_NAME": self.op.group_name,
13699 "TARGET_GROUPS": " ".join(self.target_uuids),
13702 def BuildHooksNodes(self):
13703 """Build hooks nodes.
13706 mn = self.cfg.GetMasterNode()
13708 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
13710 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
13712 return (run_nodes, run_nodes)
13714 def Exec(self, feedback_fn):
13715 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13717 assert self.group_uuid not in self.target_uuids
13719 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13720 instances=instances, target_groups=self.target_uuids)
13722 ial.Run(self.op.iallocator)
13724 if not ial.success:
13725 raise errors.OpPrereqError("Can't compute group evacuation using"
13726 " iallocator '%s': %s" %
13727 (self.op.iallocator, ial.info),
13728 errors.ECODE_NORES)
13730 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13732 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
13733 len(jobs), self.op.group_name)
13735 return ResultWithJobs(jobs)
13738 class TagsLU(NoHooksLU): # pylint: disable=W0223
13739 """Generic tags LU.
13741 This is an abstract class which is the parent of all the other tags LUs.
13744 def ExpandNames(self):
13745 self.group_uuid = None
13746 self.needed_locks = {}
13747 if self.op.kind == constants.TAG_NODE:
13748 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
13749 self.needed_locks[locking.LEVEL_NODE] = self.op.name
13750 elif self.op.kind == constants.TAG_INSTANCE:
13751 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
13752 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
13753 elif self.op.kind == constants.TAG_NODEGROUP:
13754 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
13756 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
13757 # not possible to acquire the BGL based on opcode parameters)
13759 def CheckPrereq(self):
13760 """Check prerequisites.
13763 if self.op.kind == constants.TAG_CLUSTER:
13764 self.target = self.cfg.GetClusterInfo()
13765 elif self.op.kind == constants.TAG_NODE:
13766 self.target = self.cfg.GetNodeInfo(self.op.name)
13767 elif self.op.kind == constants.TAG_INSTANCE:
13768 self.target = self.cfg.GetInstanceInfo(self.op.name)
13769 elif self.op.kind == constants.TAG_NODEGROUP:
13770 self.target = self.cfg.GetNodeGroup(self.group_uuid)
13772 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
13773 str(self.op.kind), errors.ECODE_INVAL)
13776 class LUTagsGet(TagsLU):
13777 """Returns the tags of a given object.
13782 def ExpandNames(self):
13783 TagsLU.ExpandNames(self)
13785 # Share locks as this is only a read operation
13786 self.share_locks = _ShareAll()
13788 def Exec(self, feedback_fn):
13789 """Returns the tag list.
13792 return list(self.target.GetTags())
13795 class LUTagsSearch(NoHooksLU):
13796 """Searches the tags for a given pattern.
13801 def ExpandNames(self):
13802 self.needed_locks = {}
13804 def CheckPrereq(self):
13805 """Check prerequisites.
13807 This checks the pattern passed for validity by compiling it.
13811 self.re = re.compile(self.op.pattern)
13812 except re.error, err:
13813 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
13814 (self.op.pattern, err), errors.ECODE_INVAL)
13816 def Exec(self, feedback_fn):
13817 """Returns the tag list.
13821 tgts = [("/cluster", cfg.GetClusterInfo())]
13822 ilist = cfg.GetAllInstancesInfo().values()
13823 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
13824 nlist = cfg.GetAllNodesInfo().values()
13825 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
13826 tgts.extend(("/nodegroup/%s" % n.name, n)
13827 for n in cfg.GetAllNodeGroupsInfo().values())
13829 for path, target in tgts:
13830 for tag in target.GetTags():
13831 if self.re.search(tag):
13832 results.append((path, tag))
13836 class LUTagsSet(TagsLU):
13837 """Sets a tag on a given object.
13842 def CheckPrereq(self):
13843 """Check prerequisites.
13845 This checks the type and length of the tag name and value.
13848 TagsLU.CheckPrereq(self)
13849 for tag in self.op.tags:
13850 objects.TaggableObject.ValidateTag(tag)
13852 def Exec(self, feedback_fn):
13857 for tag in self.op.tags:
13858 self.target.AddTag(tag)
13859 except errors.TagError, err:
13860 raise errors.OpExecError("Error while setting tag: %s" % str(err))
13861 self.cfg.Update(self.target, feedback_fn)
13864 class LUTagsDel(TagsLU):
13865 """Delete a list of tags from a given object.
13870 def CheckPrereq(self):
13871 """Check prerequisites.
13873 This checks that we have the given tag.
13876 TagsLU.CheckPrereq(self)
13877 for tag in self.op.tags:
13878 objects.TaggableObject.ValidateTag(tag)
13879 del_tags = frozenset(self.op.tags)
13880 cur_tags = self.target.GetTags()
13882 diff_tags = del_tags - cur_tags
13884 diff_names = ("'%s'" % i for i in sorted(diff_tags))
13885 raise errors.OpPrereqError("Tag(s) %s not found" %
13886 (utils.CommaJoin(diff_names), ),
13887 errors.ECODE_NOENT)
13889 def Exec(self, feedback_fn):
13890 """Remove the tag from the object.
13893 for tag in self.op.tags:
13894 self.target.RemoveTag(tag)
13895 self.cfg.Update(self.target, feedback_fn)
13898 class LUTestDelay(NoHooksLU):
13899 """Sleep for a specified amount of time.
13901 This LU sleeps on the master and/or nodes for a specified amount of
13907 def ExpandNames(self):
13908 """Expand names and set required locks.
13910 This expands the node list, if any.
13913 self.needed_locks = {}
13914 if self.op.on_nodes:
13915 # _GetWantedNodes can be used here, but is not always appropriate to use
13916 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
13917 # more information.
13918 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
13919 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
13921 def _TestDelay(self):
13922 """Do the actual sleep.
13925 if self.op.on_master:
13926 if not utils.TestDelay(self.op.duration):
13927 raise errors.OpExecError("Error during master delay test")
13928 if self.op.on_nodes:
13929 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
13930 for node, node_result in result.items():
13931 node_result.Raise("Failure during rpc call to node %s" % node)
13933 def Exec(self, feedback_fn):
13934 """Execute the test delay opcode, with the wanted repetitions.
13937 if self.op.repeat == 0:
13940 top_value = self.op.repeat - 1
13941 for i in range(self.op.repeat):
13942 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
13946 class LUTestJqueue(NoHooksLU):
13947 """Utility LU to test some aspects of the job queue.
13952 # Must be lower than default timeout for WaitForJobChange to see whether it
13953 # notices changed jobs
13954 _CLIENT_CONNECT_TIMEOUT = 20.0
13955 _CLIENT_CONFIRM_TIMEOUT = 60.0
13958 def _NotifyUsingSocket(cls, cb, errcls):
13959 """Opens a Unix socket and waits for another program to connect.
13962 @param cb: Callback to send socket name to client
13963 @type errcls: class
13964 @param errcls: Exception class to use for errors
13967 # Using a temporary directory as there's no easy way to create temporary
13968 # sockets without writing a custom loop around tempfile.mktemp and
13970 tmpdir = tempfile.mkdtemp()
13972 tmpsock = utils.PathJoin(tmpdir, "sock")
13974 logging.debug("Creating temporary socket at %s", tmpsock)
13975 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
13980 # Send details to client
13983 # Wait for client to connect before continuing
13984 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
13986 (conn, _) = sock.accept()
13987 except socket.error, err:
13988 raise errcls("Client didn't connect in time (%s)" % err)
13992 # Remove as soon as client is connected
13993 shutil.rmtree(tmpdir)
13995 # Wait for client to close
13998 # pylint: disable=E1101
13999 # Instance of '_socketobject' has no ... member
14000 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14002 except socket.error, err:
14003 raise errcls("Client failed to confirm notification (%s)" % err)
14007 def _SendNotification(self, test, arg, sockname):
14008 """Sends a notification to the client.
14011 @param test: Test name
14012 @param arg: Test argument (depends on test)
14013 @type sockname: string
14014 @param sockname: Socket path
14017 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14019 def _Notify(self, prereq, test, arg):
14020 """Notifies the client of a test.
14023 @param prereq: Whether this is a prereq-phase test
14025 @param test: Test name
14026 @param arg: Test argument (depends on test)
14030 errcls = errors.OpPrereqError
14032 errcls = errors.OpExecError
14034 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14038 def CheckArguments(self):
14039 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14040 self.expandnames_calls = 0
14042 def ExpandNames(self):
14043 checkargs_calls = getattr(self, "checkargs_calls", 0)
14044 if checkargs_calls < 1:
14045 raise errors.ProgrammerError("CheckArguments was not called")
14047 self.expandnames_calls += 1
14049 if self.op.notify_waitlock:
14050 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14052 self.LogInfo("Expanding names")
14054 # Get lock on master node (just to get a lock, not for a particular reason)
14055 self.needed_locks = {
14056 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14059 def Exec(self, feedback_fn):
14060 if self.expandnames_calls < 1:
14061 raise errors.ProgrammerError("ExpandNames was not called")
14063 if self.op.notify_exec:
14064 self._Notify(False, constants.JQT_EXEC, None)
14066 self.LogInfo("Executing")
14068 if self.op.log_messages:
14069 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14070 for idx, msg in enumerate(self.op.log_messages):
14071 self.LogInfo("Sending log message %s", idx + 1)
14072 feedback_fn(constants.JQT_MSGPREFIX + msg)
14073 # Report how many test messages have been sent
14074 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14077 raise errors.OpExecError("Opcode failure was requested")
14082 class IAllocator(object):
14083 """IAllocator framework.
14085 An IAllocator instance has three sets of attributes:
14086 - cfg that is needed to query the cluster
14087 - input data (all members of the _KEYS class attribute are required)
14088 - four buffer attributes (in|out_data|text), that represent the
14089 input (to the external script) in text and data structure format,
14090 and the output from it, again in two formats
14091 - the result variables from the script (success, info, nodes) for
14095 # pylint: disable=R0902
14096 # lots of instance attributes
14098 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14100 self.rpc = rpc_runner
14101 # init buffer variables
14102 self.in_text = self.out_text = self.in_data = self.out_data = None
14103 # init all input fields so that pylint is happy
14105 self.memory = self.disks = self.disk_template = None
14106 self.os = self.tags = self.nics = self.vcpus = None
14107 self.hypervisor = None
14108 self.relocate_from = None
14110 self.instances = None
14111 self.evac_mode = None
14112 self.target_groups = []
14114 self.required_nodes = None
14115 # init result fields
14116 self.success = self.info = self.result = None
14119 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14121 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14122 " IAllocator" % self.mode)
14124 keyset = [n for (n, _) in keydata]
14127 if key not in keyset:
14128 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14129 " IAllocator" % key)
14130 setattr(self, key, kwargs[key])
14133 if key not in kwargs:
14134 raise errors.ProgrammerError("Missing input parameter '%s' to"
14135 " IAllocator" % key)
14136 self._BuildInputData(compat.partial(fn, self), keydata)
14138 def _ComputeClusterData(self):
14139 """Compute the generic allocator input data.
14141 This is the data that is independent of the actual operation.
14145 cluster_info = cfg.GetClusterInfo()
14148 "version": constants.IALLOCATOR_VERSION,
14149 "cluster_name": cfg.GetClusterName(),
14150 "cluster_tags": list(cluster_info.GetTags()),
14151 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14152 # we don't have job IDs
14154 ninfo = cfg.GetAllNodesInfo()
14155 iinfo = cfg.GetAllInstancesInfo().values()
14156 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14159 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14161 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14162 hypervisor_name = self.hypervisor
14163 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14164 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14166 hypervisor_name = cluster_info.primary_hypervisor
14168 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14171 self.rpc.call_all_instances_info(node_list,
14172 cluster_info.enabled_hypervisors)
14174 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14176 config_ndata = self._ComputeBasicNodeData(ninfo)
14177 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14178 i_list, config_ndata)
14179 assert len(data["nodes"]) == len(ninfo), \
14180 "Incomplete node data computed"
14182 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14184 self.in_data = data
14187 def _ComputeNodeGroupData(cfg):
14188 """Compute node groups data.
14191 cluster = cfg.GetClusterInfo()
14192 ng = dict((guuid, {
14193 "name": gdata.name,
14194 "alloc_policy": gdata.alloc_policy,
14195 "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14197 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14202 def _ComputeBasicNodeData(node_cfg):
14203 """Compute global node data.
14206 @returns: a dict of name: (node dict, node config)
14209 # fill in static (config-based) values
14210 node_results = dict((ninfo.name, {
14211 "tags": list(ninfo.GetTags()),
14212 "primary_ip": ninfo.primary_ip,
14213 "secondary_ip": ninfo.secondary_ip,
14214 "offline": ninfo.offline,
14215 "drained": ninfo.drained,
14216 "master_candidate": ninfo.master_candidate,
14217 "group": ninfo.group,
14218 "master_capable": ninfo.master_capable,
14219 "vm_capable": ninfo.vm_capable,
14221 for ninfo in node_cfg.values())
14223 return node_results
14226 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14228 """Compute global node data.
14230 @param node_results: the basic node structures as filled from the config
14233 #TODO(dynmem): compute the right data on MAX and MIN memory
14234 # make a copy of the current dict
14235 node_results = dict(node_results)
14236 for nname, nresult in node_data.items():
14237 assert nname in node_results, "Missing basic data for node %s" % nname
14238 ninfo = node_cfg[nname]
14240 if not (ninfo.offline or ninfo.drained):
14241 nresult.Raise("Can't get data for node %s" % nname)
14242 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14244 remote_info = _MakeLegacyNodeInfo(nresult.payload)
14246 for attr in ["memory_total", "memory_free", "memory_dom0",
14247 "vg_size", "vg_free", "cpu_total"]:
14248 if attr not in remote_info:
14249 raise errors.OpExecError("Node '%s' didn't return attribute"
14250 " '%s'" % (nname, attr))
14251 if not isinstance(remote_info[attr], int):
14252 raise errors.OpExecError("Node '%s' returned invalid value"
14254 (nname, attr, remote_info[attr]))
14255 # compute memory used by primary instances
14256 i_p_mem = i_p_up_mem = 0
14257 for iinfo, beinfo in i_list:
14258 if iinfo.primary_node == nname:
14259 i_p_mem += beinfo[constants.BE_MAXMEM]
14260 if iinfo.name not in node_iinfo[nname].payload:
14263 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14264 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14265 remote_info["memory_free"] -= max(0, i_mem_diff)
14267 if iinfo.admin_state == constants.ADMINST_UP:
14268 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14270 # compute memory used by instances
14272 "total_memory": remote_info["memory_total"],
14273 "reserved_memory": remote_info["memory_dom0"],
14274 "free_memory": remote_info["memory_free"],
14275 "total_disk": remote_info["vg_size"],
14276 "free_disk": remote_info["vg_free"],
14277 "total_cpus": remote_info["cpu_total"],
14278 "i_pri_memory": i_p_mem,
14279 "i_pri_up_memory": i_p_up_mem,
14281 pnr_dyn.update(node_results[nname])
14282 node_results[nname] = pnr_dyn
14284 return node_results
14287 def _ComputeInstanceData(cluster_info, i_list):
14288 """Compute global instance data.
14292 for iinfo, beinfo in i_list:
14294 for nic in iinfo.nics:
14295 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14299 "mode": filled_params[constants.NIC_MODE],
14300 "link": filled_params[constants.NIC_LINK],
14302 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14303 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14304 nic_data.append(nic_dict)
14306 "tags": list(iinfo.GetTags()),
14307 "admin_state": iinfo.admin_state,
14308 "vcpus": beinfo[constants.BE_VCPUS],
14309 "memory": beinfo[constants.BE_MAXMEM],
14311 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14313 "disks": [{constants.IDISK_SIZE: dsk.size,
14314 constants.IDISK_MODE: dsk.mode}
14315 for dsk in iinfo.disks],
14316 "disk_template": iinfo.disk_template,
14317 "hypervisor": iinfo.hypervisor,
14319 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14321 instance_data[iinfo.name] = pir
14323 return instance_data
14325 def _AddNewInstance(self):
14326 """Add new instance data to allocator structure.
14328 This in combination with _AllocatorGetClusterData will create the
14329 correct structure needed as input for the allocator.
14331 The checks for the completeness of the opcode must have already been
14335 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14337 if self.disk_template in constants.DTS_INT_MIRROR:
14338 self.required_nodes = 2
14340 self.required_nodes = 1
14344 "disk_template": self.disk_template,
14347 "vcpus": self.vcpus,
14348 "memory": self.memory,
14349 "disks": self.disks,
14350 "disk_space_total": disk_space,
14352 "required_nodes": self.required_nodes,
14353 "hypervisor": self.hypervisor,
14358 def _AddRelocateInstance(self):
14359 """Add relocate instance data to allocator structure.
14361 This in combination with _IAllocatorGetClusterData will create the
14362 correct structure needed as input for the allocator.
14364 The checks for the completeness of the opcode must have already been
14368 instance = self.cfg.GetInstanceInfo(self.name)
14369 if instance is None:
14370 raise errors.ProgrammerError("Unknown instance '%s' passed to"
14371 " IAllocator" % self.name)
14373 if instance.disk_template not in constants.DTS_MIRRORED:
14374 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14375 errors.ECODE_INVAL)
14377 if instance.disk_template in constants.DTS_INT_MIRROR and \
14378 len(instance.secondary_nodes) != 1:
14379 raise errors.OpPrereqError("Instance has not exactly one secondary node",
14380 errors.ECODE_STATE)
14382 self.required_nodes = 1
14383 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14384 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14388 "disk_space_total": disk_space,
14389 "required_nodes": self.required_nodes,
14390 "relocate_from": self.relocate_from,
14394 def _AddNodeEvacuate(self):
14395 """Get data for node-evacuate requests.
14399 "instances": self.instances,
14400 "evac_mode": self.evac_mode,
14403 def _AddChangeGroup(self):
14404 """Get data for node-evacuate requests.
14408 "instances": self.instances,
14409 "target_groups": self.target_groups,
14412 def _BuildInputData(self, fn, keydata):
14413 """Build input data structures.
14416 self._ComputeClusterData()
14419 request["type"] = self.mode
14420 for keyname, keytype in keydata:
14421 if keyname not in request:
14422 raise errors.ProgrammerError("Request parameter %s is missing" %
14424 val = request[keyname]
14425 if not keytype(val):
14426 raise errors.ProgrammerError("Request parameter %s doesn't pass"
14427 " validation, value %s, expected"
14428 " type %s" % (keyname, val, keytype))
14429 self.in_data["request"] = request
14431 self.in_text = serializer.Dump(self.in_data)
14433 _STRING_LIST = ht.TListOf(ht.TString)
14434 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14435 # pylint: disable=E1101
14436 # Class '...' has no 'OP_ID' member
14437 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14438 opcodes.OpInstanceMigrate.OP_ID,
14439 opcodes.OpInstanceReplaceDisks.OP_ID])
14443 ht.TListOf(ht.TAnd(ht.TIsLength(3),
14444 ht.TItems([ht.TNonEmptyString,
14445 ht.TNonEmptyString,
14446 ht.TListOf(ht.TNonEmptyString),
14449 ht.TListOf(ht.TAnd(ht.TIsLength(2),
14450 ht.TItems([ht.TNonEmptyString,
14453 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14454 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14457 constants.IALLOCATOR_MODE_ALLOC:
14460 ("name", ht.TString),
14461 ("memory", ht.TInt),
14462 ("disks", ht.TListOf(ht.TDict)),
14463 ("disk_template", ht.TString),
14464 ("os", ht.TString),
14465 ("tags", _STRING_LIST),
14466 ("nics", ht.TListOf(ht.TDict)),
14467 ("vcpus", ht.TInt),
14468 ("hypervisor", ht.TString),
14470 constants.IALLOCATOR_MODE_RELOC:
14471 (_AddRelocateInstance,
14472 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14474 constants.IALLOCATOR_MODE_NODE_EVAC:
14475 (_AddNodeEvacuate, [
14476 ("instances", _STRING_LIST),
14477 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14479 constants.IALLOCATOR_MODE_CHG_GROUP:
14480 (_AddChangeGroup, [
14481 ("instances", _STRING_LIST),
14482 ("target_groups", _STRING_LIST),
14486 def Run(self, name, validate=True, call_fn=None):
14487 """Run an instance allocator and return the results.
14490 if call_fn is None:
14491 call_fn = self.rpc.call_iallocator_runner
14493 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14494 result.Raise("Failure while running the iallocator script")
14496 self.out_text = result.payload
14498 self._ValidateResult()
14500 def _ValidateResult(self):
14501 """Process the allocator results.
14503 This will process and if successful save the result in
14504 self.out_data and the other parameters.
14508 rdict = serializer.Load(self.out_text)
14509 except Exception, err:
14510 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14512 if not isinstance(rdict, dict):
14513 raise errors.OpExecError("Can't parse iallocator results: not a dict")
14515 # TODO: remove backwards compatiblity in later versions
14516 if "nodes" in rdict and "result" not in rdict:
14517 rdict["result"] = rdict["nodes"]
14520 for key in "success", "info", "result":
14521 if key not in rdict:
14522 raise errors.OpExecError("Can't parse iallocator results:"
14523 " missing key '%s'" % key)
14524 setattr(self, key, rdict[key])
14526 if not self._result_check(self.result):
14527 raise errors.OpExecError("Iallocator returned invalid result,"
14528 " expected %s, got %s" %
14529 (self._result_check, self.result),
14530 errors.ECODE_INVAL)
14532 if self.mode == constants.IALLOCATOR_MODE_RELOC:
14533 assert self.relocate_from is not None
14534 assert self.required_nodes == 1
14536 node2group = dict((name, ndata["group"])
14537 for (name, ndata) in self.in_data["nodes"].items())
14539 fn = compat.partial(self._NodesToGroups, node2group,
14540 self.in_data["nodegroups"])
14542 instance = self.cfg.GetInstanceInfo(self.name)
14543 request_groups = fn(self.relocate_from + [instance.primary_node])
14544 result_groups = fn(rdict["result"] + [instance.primary_node])
14546 if self.success and not set(result_groups).issubset(request_groups):
14547 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14548 " differ from original groups (%s)" %
14549 (utils.CommaJoin(result_groups),
14550 utils.CommaJoin(request_groups)))
14552 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14553 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14555 self.out_data = rdict
14558 def _NodesToGroups(node2group, groups, nodes):
14559 """Returns a list of unique group names for a list of nodes.
14561 @type node2group: dict
14562 @param node2group: Map from node name to group UUID
14564 @param groups: Group information
14566 @param nodes: Node names
14573 group_uuid = node2group[node]
14575 # Ignore unknown node
14579 group = groups[group_uuid]
14581 # Can't find group, let's use UUID
14582 group_name = group_uuid
14584 group_name = group["name"]
14586 result.add(group_name)
14588 return sorted(result)
14591 class LUTestAllocator(NoHooksLU):
14592 """Run allocator tests.
14594 This LU runs the allocator tests
14597 def CheckPrereq(self):
14598 """Check prerequisites.
14600 This checks the opcode parameters depending on the director and mode test.
14603 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14604 for attr in ["memory", "disks", "disk_template",
14605 "os", "tags", "nics", "vcpus"]:
14606 if not hasattr(self.op, attr):
14607 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14608 attr, errors.ECODE_INVAL)
14609 iname = self.cfg.ExpandInstanceName(self.op.name)
14610 if iname is not None:
14611 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14612 iname, errors.ECODE_EXISTS)
14613 if not isinstance(self.op.nics, list):
14614 raise errors.OpPrereqError("Invalid parameter 'nics'",
14615 errors.ECODE_INVAL)
14616 if not isinstance(self.op.disks, list):
14617 raise errors.OpPrereqError("Invalid parameter 'disks'",
14618 errors.ECODE_INVAL)
14619 for row in self.op.disks:
14620 if (not isinstance(row, dict) or
14621 constants.IDISK_SIZE not in row or
14622 not isinstance(row[constants.IDISK_SIZE], int) or
14623 constants.IDISK_MODE not in row or
14624 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
14625 raise errors.OpPrereqError("Invalid contents of the 'disks'"
14626 " parameter", errors.ECODE_INVAL)
14627 if self.op.hypervisor is None:
14628 self.op.hypervisor = self.cfg.GetHypervisorType()
14629 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14630 fname = _ExpandInstanceName(self.cfg, self.op.name)
14631 self.op.name = fname
14632 self.relocate_from = \
14633 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
14634 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
14635 constants.IALLOCATOR_MODE_NODE_EVAC):
14636 if not self.op.instances:
14637 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
14638 self.op.instances = _GetWantedInstances(self, self.op.instances)
14640 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
14641 self.op.mode, errors.ECODE_INVAL)
14643 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
14644 if self.op.allocator is None:
14645 raise errors.OpPrereqError("Missing allocator name",
14646 errors.ECODE_INVAL)
14647 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
14648 raise errors.OpPrereqError("Wrong allocator test '%s'" %
14649 self.op.direction, errors.ECODE_INVAL)
14651 def Exec(self, feedback_fn):
14652 """Run the allocator test.
14655 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14656 ial = IAllocator(self.cfg, self.rpc,
14659 memory=self.op.memory,
14660 disks=self.op.disks,
14661 disk_template=self.op.disk_template,
14665 vcpus=self.op.vcpus,
14666 hypervisor=self.op.hypervisor,
14668 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14669 ial = IAllocator(self.cfg, self.rpc,
14672 relocate_from=list(self.relocate_from),
14674 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14675 ial = IAllocator(self.cfg, self.rpc,
14677 instances=self.op.instances,
14678 target_groups=self.op.target_groups)
14679 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14680 ial = IAllocator(self.cfg, self.rpc,
14682 instances=self.op.instances,
14683 evac_mode=self.op.evac_mode)
14685 raise errors.ProgrammerError("Uncatched mode %s in"
14686 " LUTestAllocator.Exec", self.op.mode)
14688 if self.op.direction == constants.IALLOCATOR_DIR_IN:
14689 result = ial.in_text
14691 ial.Run(self.op.allocator, validate=False)
14692 result = ial.out_text
14696 #: Query type implementations
14698 constants.QR_INSTANCE: _InstanceQuery,
14699 constants.QR_NODE: _NodeQuery,
14700 constants.QR_GROUP: _GroupQuery,
14701 constants.QR_OS: _OsQuery,
14704 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14707 def _GetQueryImplementation(name):
14708 """Returns the implemtnation for a query type.
14710 @param name: Query type, must be one of L{constants.QR_VIA_OP}
14714 return _QUERY_IMPL[name]
14716 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14717 errors.ECODE_INVAL)