4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
62 from ganeti import rpc
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_UP = [constants.ADMINST_UP]
72 INSTANCE_DOWN = [constants.ADMINST_DOWN]
73 INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
74 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
75 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
79 """Data container for LU results with jobs.
81 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
83 contained in the C{jobs} attribute and include the job IDs in the opcode
87 def __init__(self, jobs, **kwargs):
88 """Initializes this class.
90 Additional return values can be specified as keyword arguments.
92 @type jobs: list of lists of L{opcode.OpCode}
93 @param jobs: A list of lists of opcode objects
100 class LogicalUnit(object):
101 """Logical Unit base class.
103 Subclasses must follow these rules:
104 - implement ExpandNames
105 - implement CheckPrereq (except when tasklets are used)
106 - implement Exec (except when tasklets are used)
107 - implement BuildHooksEnv
108 - implement BuildHooksNodes
109 - redefine HPATH and HTYPE
110 - optionally redefine their run requirements:
111 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
113 Note that all commands require root permissions.
115 @ivar dry_run_result: the value (if any) that will be returned to the caller
116 in dry-run mode (signalled by opcode dry_run parameter)
123 def __init__(self, processor, op, context, rpc_runner):
124 """Constructor for LogicalUnit.
126 This needs to be overridden in derived classes in order to check op
130 self.proc = processor
132 self.cfg = context.cfg
133 self.glm = context.glm
135 self.owned_locks = context.glm.list_owned
136 self.context = context
137 self.rpc = rpc_runner
138 # Dicts used to declare locking needs to mcpu
139 self.needed_locks = None
140 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
142 self.remove_locks = {}
143 # Used to force good behavior when calling helper functions
144 self.recalculate_locks = {}
146 self.Log = processor.Log # pylint: disable=C0103
147 self.LogWarning = processor.LogWarning # pylint: disable=C0103
148 self.LogInfo = processor.LogInfo # pylint: disable=C0103
149 self.LogStep = processor.LogStep # pylint: disable=C0103
150 # support for dry-run
151 self.dry_run_result = None
152 # support for generic debug attribute
153 if (not hasattr(self.op, "debug_level") or
154 not isinstance(self.op.debug_level, int)):
155 self.op.debug_level = 0
160 # Validate opcode parameters and set defaults
161 self.op.Validate(True)
163 self.CheckArguments()
165 def CheckArguments(self):
166 """Check syntactic validity for the opcode arguments.
168 This method is for doing a simple syntactic check and ensure
169 validity of opcode parameters, without any cluster-related
170 checks. While the same can be accomplished in ExpandNames and/or
171 CheckPrereq, doing these separate is better because:
173 - ExpandNames is left as as purely a lock-related function
174 - CheckPrereq is run after we have acquired locks (and possible
177 The function is allowed to change the self.op attribute so that
178 later methods can no longer worry about missing parameters.
183 def ExpandNames(self):
184 """Expand names for this LU.
186 This method is called before starting to execute the opcode, and it should
187 update all the parameters of the opcode to their canonical form (e.g. a
188 short node name must be fully expanded after this method has successfully
189 completed). This way locking, hooks, logging, etc. can work correctly.
191 LUs which implement this method must also populate the self.needed_locks
192 member, as a dict with lock levels as keys, and a list of needed lock names
195 - use an empty dict if you don't need any lock
196 - if you don't need any lock at a particular level omit that level
197 - don't put anything for the BGL level
198 - if you want all locks at a level use locking.ALL_SET as a value
200 If you need to share locks (rather than acquire them exclusively) at one
201 level you can modify self.share_locks, setting a true value (usually 1) for
202 that level. By default locks are not shared.
204 This function can also define a list of tasklets, which then will be
205 executed in order instead of the usual LU-level CheckPrereq and Exec
206 functions, if those are not defined by the LU.
210 # Acquire all nodes and one instance
211 self.needed_locks = {
212 locking.LEVEL_NODE: locking.ALL_SET,
213 locking.LEVEL_INSTANCE: ['instance1.example.com'],
215 # Acquire just two nodes
216 self.needed_locks = {
217 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
220 self.needed_locks = {} # No, you can't leave it to the default value None
223 # The implementation of this method is mandatory only if the new LU is
224 # concurrent, so that old LUs don't need to be changed all at the same
227 self.needed_locks = {} # Exclusive LUs don't need locks.
229 raise NotImplementedError
231 def DeclareLocks(self, level):
232 """Declare LU locking needs for a level
234 While most LUs can just declare their locking needs at ExpandNames time,
235 sometimes there's the need to calculate some locks after having acquired
236 the ones before. This function is called just before acquiring locks at a
237 particular level, but after acquiring the ones at lower levels, and permits
238 such calculations. It can be used to modify self.needed_locks, and by
239 default it does nothing.
241 This function is only called if you have something already set in
242 self.needed_locks for the level.
244 @param level: Locking level which is going to be locked
245 @type level: member of ganeti.locking.LEVELS
249 def CheckPrereq(self):
250 """Check prerequisites for this LU.
252 This method should check that the prerequisites for the execution
253 of this LU are fulfilled. It can do internode communication, but
254 it should be idempotent - no cluster or system changes are
257 The method should raise errors.OpPrereqError in case something is
258 not fulfilled. Its return value is ignored.
260 This method should also update all the parameters of the opcode to
261 their canonical form if it hasn't been done by ExpandNames before.
264 if self.tasklets is not None:
265 for (idx, tl) in enumerate(self.tasklets):
266 logging.debug("Checking prerequisites for tasklet %s/%s",
267 idx + 1, len(self.tasklets))
272 def Exec(self, feedback_fn):
275 This method should implement the actual work. It should raise
276 errors.OpExecError for failures that are somewhat dealt with in
280 if self.tasklets is not None:
281 for (idx, tl) in enumerate(self.tasklets):
282 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
285 raise NotImplementedError
287 def BuildHooksEnv(self):
288 """Build hooks environment for this LU.
291 @return: Dictionary containing the environment that will be used for
292 running the hooks for this LU. The keys of the dict must not be prefixed
293 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
294 will extend the environment with additional variables. If no environment
295 should be defined, an empty dictionary should be returned (not C{None}).
296 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
300 raise NotImplementedError
302 def BuildHooksNodes(self):
303 """Build list of nodes to run LU's hooks.
305 @rtype: tuple; (list, list)
306 @return: Tuple containing a list of node names on which the hook
307 should run before the execution and a list of node names on which the
308 hook should run after the execution. No nodes should be returned as an
309 empty list (and not None).
310 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
314 raise NotImplementedError
316 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
317 """Notify the LU about the results of its hooks.
319 This method is called every time a hooks phase is executed, and notifies
320 the Logical Unit about the hooks' result. The LU can then use it to alter
321 its result based on the hooks. By default the method does nothing and the
322 previous result is passed back unchanged but any LU can define it if it
323 wants to use the local cluster hook-scripts somehow.
325 @param phase: one of L{constants.HOOKS_PHASE_POST} or
326 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
327 @param hook_results: the results of the multi-node hooks rpc call
328 @param feedback_fn: function used send feedback back to the caller
329 @param lu_result: the previous Exec result this LU had, or None
331 @return: the new Exec result, based on the previous result
335 # API must be kept, thus we ignore the unused argument and could
336 # be a function warnings
337 # pylint: disable=W0613,R0201
340 def _ExpandAndLockInstance(self):
341 """Helper function to expand and lock an instance.
343 Many LUs that work on an instance take its name in self.op.instance_name
344 and need to expand it and then declare the expanded name for locking. This
345 function does it, and then updates self.op.instance_name to the expanded
346 name. It also initializes needed_locks as a dict, if this hasn't been done
350 if self.needed_locks is None:
351 self.needed_locks = {}
353 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
354 "_ExpandAndLockInstance called with instance-level locks set"
355 self.op.instance_name = _ExpandInstanceName(self.cfg,
356 self.op.instance_name)
357 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
359 def _LockInstancesNodes(self, primary_only=False,
360 level=locking.LEVEL_NODE):
361 """Helper function to declare instances' nodes for locking.
363 This function should be called after locking one or more instances to lock
364 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
365 with all primary or secondary nodes for instances already locked and
366 present in self.needed_locks[locking.LEVEL_INSTANCE].
368 It should be called from DeclareLocks, and for safety only works if
369 self.recalculate_locks[locking.LEVEL_NODE] is set.
371 In the future it may grow parameters to just lock some instance's nodes, or
372 to just lock primaries or secondary nodes, if needed.
374 If should be called in DeclareLocks in a way similar to::
376 if level == locking.LEVEL_NODE:
377 self._LockInstancesNodes()
379 @type primary_only: boolean
380 @param primary_only: only lock primary nodes of locked instances
381 @param level: Which lock level to use for locking nodes
384 assert level in self.recalculate_locks, \
385 "_LockInstancesNodes helper function called with no nodes to recalculate"
387 # TODO: check if we're really been called with the instance locks held
389 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
390 # future we might want to have different behaviors depending on the value
391 # of self.recalculate_locks[locking.LEVEL_NODE]
393 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
394 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
395 wanted_nodes.append(instance.primary_node)
397 wanted_nodes.extend(instance.secondary_nodes)
399 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
400 self.needed_locks[level] = wanted_nodes
401 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
402 self.needed_locks[level].extend(wanted_nodes)
404 raise errors.ProgrammerError("Unknown recalculation mode")
406 del self.recalculate_locks[level]
409 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
410 """Simple LU which runs no hooks.
412 This LU is intended as a parent for other LogicalUnits which will
413 run no hooks, in order to reduce duplicate code.
419 def BuildHooksEnv(self):
420 """Empty BuildHooksEnv for NoHooksLu.
422 This just raises an error.
425 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
427 def BuildHooksNodes(self):
428 """Empty BuildHooksNodes for NoHooksLU.
431 raise AssertionError("BuildHooksNodes called for NoHooksLU")
435 """Tasklet base class.
437 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
438 they can mix legacy code with tasklets. Locking needs to be done in the LU,
439 tasklets know nothing about locks.
441 Subclasses must follow these rules:
442 - Implement CheckPrereq
446 def __init__(self, lu):
453 def CheckPrereq(self):
454 """Check prerequisites for this tasklets.
456 This method should check whether the prerequisites for the execution of
457 this tasklet are fulfilled. It can do internode communication, but it
458 should be idempotent - no cluster or system changes are allowed.
460 The method should raise errors.OpPrereqError in case something is not
461 fulfilled. Its return value is ignored.
463 This method should also update all parameters to their canonical form if it
464 hasn't been done before.
469 def Exec(self, feedback_fn):
470 """Execute the tasklet.
472 This method should implement the actual work. It should raise
473 errors.OpExecError for failures that are somewhat dealt with in code, or
477 raise NotImplementedError
481 """Base for query utility classes.
484 #: Attribute holding field definitions
487 def __init__(self, qfilter, fields, use_locking):
488 """Initializes this class.
491 self.use_locking = use_locking
493 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
495 self.requested_data = self.query.RequestedData()
496 self.names = self.query.RequestedNames()
498 # Sort only if no names were requested
499 self.sort_by_name = not self.names
501 self.do_locking = None
504 def _GetNames(self, lu, all_names, lock_level):
505 """Helper function to determine names asked for in the query.
509 names = lu.owned_locks(lock_level)
513 if self.wanted == locking.ALL_SET:
514 assert not self.names
515 # caller didn't specify names, so ordering is not important
516 return utils.NiceSort(names)
518 # caller specified names and we must keep the same order
520 assert not self.do_locking or lu.glm.is_owned(lock_level)
522 missing = set(self.wanted).difference(names)
524 raise errors.OpExecError("Some items were removed before retrieving"
525 " their data: %s" % missing)
527 # Return expanded names
530 def ExpandNames(self, lu):
531 """Expand names for this query.
533 See L{LogicalUnit.ExpandNames}.
536 raise NotImplementedError()
538 def DeclareLocks(self, lu, level):
539 """Declare locks for this query.
541 See L{LogicalUnit.DeclareLocks}.
544 raise NotImplementedError()
546 def _GetQueryData(self, lu):
547 """Collects all data for this query.
549 @return: Query data object
552 raise NotImplementedError()
554 def NewStyleQuery(self, lu):
555 """Collect data and execute query.
558 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
559 sort_by_name=self.sort_by_name)
561 def OldStyleQuery(self, lu):
562 """Collect data and execute query.
565 return self.query.OldStyleQuery(self._GetQueryData(lu),
566 sort_by_name=self.sort_by_name)
570 """Returns a dict declaring all lock levels shared.
573 return dict.fromkeys(locking.LEVELS, 1)
576 def _MakeLegacyNodeInfo(data):
577 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
579 Converts the data into a single dictionary. This is fine for most use cases,
580 but some require information from more than one volume group or hypervisor.
583 (bootid, (vg_info, ), (hv_info, )) = data
585 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
590 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
591 """Checks if the owned node groups are still correct for an instance.
593 @type cfg: L{config.ConfigWriter}
594 @param cfg: The cluster configuration
595 @type instance_name: string
596 @param instance_name: Instance name
597 @type owned_groups: set or frozenset
598 @param owned_groups: List of currently owned node groups
601 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
603 if not owned_groups.issuperset(inst_groups):
604 raise errors.OpPrereqError("Instance %s's node groups changed since"
605 " locks were acquired, current groups are"
606 " are '%s', owning groups '%s'; retry the"
609 utils.CommaJoin(inst_groups),
610 utils.CommaJoin(owned_groups)),
616 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
617 """Checks if the instances in a node group are still correct.
619 @type cfg: L{config.ConfigWriter}
620 @param cfg: The cluster configuration
621 @type group_uuid: string
622 @param group_uuid: Node group UUID
623 @type owned_instances: set or frozenset
624 @param owned_instances: List of currently owned instances
627 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
628 if owned_instances != wanted_instances:
629 raise errors.OpPrereqError("Instances in node group '%s' changed since"
630 " locks were acquired, wanted '%s', have '%s';"
631 " retry the operation" %
633 utils.CommaJoin(wanted_instances),
634 utils.CommaJoin(owned_instances)),
637 return wanted_instances
640 def _SupportsOob(cfg, node):
641 """Tells if node supports OOB.
643 @type cfg: L{config.ConfigWriter}
644 @param cfg: The cluster configuration
645 @type node: L{objects.Node}
646 @param node: The node
647 @return: The OOB script if supported or an empty string otherwise
650 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
653 def _GetWantedNodes(lu, nodes):
654 """Returns list of checked and expanded node names.
656 @type lu: L{LogicalUnit}
657 @param lu: the logical unit on whose behalf we execute
659 @param nodes: list of node names or None for all nodes
661 @return: the list of nodes, sorted
662 @raise errors.ProgrammerError: if the nodes parameter is wrong type
666 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
668 return utils.NiceSort(lu.cfg.GetNodeList())
671 def _GetWantedInstances(lu, instances):
672 """Returns list of checked and expanded instance names.
674 @type lu: L{LogicalUnit}
675 @param lu: the logical unit on whose behalf we execute
676 @type instances: list
677 @param instances: list of instance names or None for all instances
679 @return: the list of instances, sorted
680 @raise errors.OpPrereqError: if the instances parameter is wrong type
681 @raise errors.OpPrereqError: if any of the passed instances is not found
685 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
687 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
691 def _GetUpdatedParams(old_params, update_dict,
692 use_default=True, use_none=False):
693 """Return the new version of a parameter dictionary.
695 @type old_params: dict
696 @param old_params: old parameters
697 @type update_dict: dict
698 @param update_dict: dict containing new parameter values, or
699 constants.VALUE_DEFAULT to reset the parameter to its default
701 @param use_default: boolean
702 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
703 values as 'to be deleted' values
704 @param use_none: boolean
705 @type use_none: whether to recognise C{None} values as 'to be
708 @return: the new parameter dictionary
711 params_copy = copy.deepcopy(old_params)
712 for key, val in update_dict.iteritems():
713 if ((use_default and val == constants.VALUE_DEFAULT) or
714 (use_none and val is None)):
720 params_copy[key] = val
724 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
725 """Return the new version of a instance policy.
727 @param group_policy: whether this policy applies to a group and thus
728 we should support removal of policy entries
731 use_none = use_default = group_policy
732 ipolicy = copy.deepcopy(old_ipolicy)
733 for key, value in new_ipolicy.items():
734 if key not in constants.IPOLICY_ALL_KEYS:
735 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
737 if key in constants.IPOLICY_PARAMETERS:
738 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
739 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
741 use_default=use_default)
743 # FIXME: we assume all others are lists; this should be redone
745 if not value or value == [constants.VALUE_DEFAULT]:
749 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
750 " on the cluster'" % key,
753 ipolicy[key] = list(value)
755 objects.InstancePolicy.CheckParameterSyntax(ipolicy)
756 except errors.ConfigurationError, err:
757 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
762 def _UpdateAndVerifySubDict(base, updates, type_check):
763 """Updates and verifies a dict with sub dicts of the same type.
765 @param base: The dict with the old data
766 @param updates: The dict with the new data
767 @param type_check: Dict suitable to ForceDictType to verify correct types
768 @returns: A new dict with updated and verified values
772 new = _GetUpdatedParams(old, value)
773 utils.ForceDictType(new, type_check)
776 ret = copy.deepcopy(base)
777 ret.update(dict((key, fn(base.get(key, {}), value))
778 for key, value in updates.items()))
782 def _MergeAndVerifyHvState(op_input, obj_input):
783 """Combines the hv state from an opcode with the one of the object
785 @param op_input: The input dict from the opcode
786 @param obj_input: The input dict from the objects
787 @return: The verified and updated dict
791 invalid_hvs = set(op_input) - constants.HYPER_TYPES
793 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
794 " %s" % utils.CommaJoin(invalid_hvs),
796 if obj_input is None:
798 type_check = constants.HVSTS_PARAMETER_TYPES
799 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
804 def _MergeAndVerifyDiskState(op_input, obj_input):
805 """Combines the disk state from an opcode with the one of the object
807 @param op_input: The input dict from the opcode
808 @param obj_input: The input dict from the objects
809 @return: The verified and updated dict
812 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
814 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
815 utils.CommaJoin(invalid_dst),
817 type_check = constants.DSS_PARAMETER_TYPES
818 if obj_input is None:
820 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
822 for key, value in op_input.items())
827 def _ReleaseLocks(lu, level, names=None, keep=None):
828 """Releases locks owned by an LU.
830 @type lu: L{LogicalUnit}
831 @param level: Lock level
832 @type names: list or None
833 @param names: Names of locks to release
834 @type keep: list or None
835 @param keep: Names of locks to retain
838 assert not (keep is not None and names is not None), \
839 "Only one of the 'names' and the 'keep' parameters can be given"
841 if names is not None:
842 should_release = names.__contains__
844 should_release = lambda name: name not in keep
846 should_release = None
848 owned = lu.owned_locks(level)
850 # Not owning any lock at this level, do nothing
857 # Determine which locks to release
859 if should_release(name):
864 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
866 # Release just some locks
867 lu.glm.release(level, names=release)
869 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
872 lu.glm.release(level)
874 assert not lu.glm.is_owned(level), "No locks should be owned"
877 def _MapInstanceDisksToNodes(instances):
878 """Creates a map from (node, volume) to instance name.
880 @type instances: list of L{objects.Instance}
881 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
884 return dict(((node, vol), inst.name)
885 for inst in instances
886 for (node, vols) in inst.MapLVsByNode().items()
890 def _RunPostHook(lu, node_name):
891 """Runs the post-hook for an opcode on a single node.
894 hm = lu.proc.BuildHooksManager(lu)
896 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
898 # pylint: disable=W0702
899 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
902 def _CheckOutputFields(static, dynamic, selected):
903 """Checks whether all selected fields are valid.
905 @type static: L{utils.FieldSet}
906 @param static: static fields set
907 @type dynamic: L{utils.FieldSet}
908 @param dynamic: dynamic fields set
915 delta = f.NonMatching(selected)
917 raise errors.OpPrereqError("Unknown output fields selected: %s"
918 % ",".join(delta), errors.ECODE_INVAL)
921 def _CheckGlobalHvParams(params):
922 """Validates that given hypervisor params are not global ones.
924 This will ensure that instances don't get customised versions of
928 used_globals = constants.HVC_GLOBALS.intersection(params)
930 msg = ("The following hypervisor parameters are global and cannot"
931 " be customized at instance level, please modify them at"
932 " cluster level: %s" % utils.CommaJoin(used_globals))
933 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
936 def _CheckNodeOnline(lu, node, msg=None):
937 """Ensure that a given node is online.
939 @param lu: the LU on behalf of which we make the check
940 @param node: the node to check
941 @param msg: if passed, should be a message to replace the default one
942 @raise errors.OpPrereqError: if the node is offline
946 msg = "Can't use offline node"
947 if lu.cfg.GetNodeInfo(node).offline:
948 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
951 def _CheckNodeNotDrained(lu, node):
952 """Ensure that a given node is not drained.
954 @param lu: the LU on behalf of which we make the check
955 @param node: the node to check
956 @raise errors.OpPrereqError: if the node is drained
959 if lu.cfg.GetNodeInfo(node).drained:
960 raise errors.OpPrereqError("Can't use drained node %s" % node,
964 def _CheckNodeVmCapable(lu, node):
965 """Ensure that a given node is vm capable.
967 @param lu: the LU on behalf of which we make the check
968 @param node: the node to check
969 @raise errors.OpPrereqError: if the node is not vm capable
972 if not lu.cfg.GetNodeInfo(node).vm_capable:
973 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
977 def _CheckNodeHasOS(lu, node, os_name, force_variant):
978 """Ensure that a node supports a given OS.
980 @param lu: the LU on behalf of which we make the check
981 @param node: the node to check
982 @param os_name: the OS to query about
983 @param force_variant: whether to ignore variant errors
984 @raise errors.OpPrereqError: if the node is not supporting the OS
987 result = lu.rpc.call_os_get(node, os_name)
988 result.Raise("OS '%s' not in supported OS list for node %s" %
990 prereq=True, ecode=errors.ECODE_INVAL)
991 if not force_variant:
992 _CheckOSVariant(result.payload, os_name)
995 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
996 """Ensure that a node has the given secondary ip.
998 @type lu: L{LogicalUnit}
999 @param lu: the LU on behalf of which we make the check
1001 @param node: the node to check
1002 @type secondary_ip: string
1003 @param secondary_ip: the ip to check
1004 @type prereq: boolean
1005 @param prereq: whether to throw a prerequisite or an execute error
1006 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1007 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1010 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1011 result.Raise("Failure checking secondary ip on node %s" % node,
1012 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1013 if not result.payload:
1014 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1015 " please fix and re-run this command" % secondary_ip)
1017 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1019 raise errors.OpExecError(msg)
1022 def _GetClusterDomainSecret():
1023 """Reads the cluster domain secret.
1026 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1030 def _CheckInstanceState(lu, instance, req_states, msg=None):
1031 """Ensure that an instance is in one of the required states.
1033 @param lu: the LU on behalf of which we make the check
1034 @param instance: the instance to check
1035 @param msg: if passed, should be a message to replace the default one
1036 @raise errors.OpPrereqError: if the instance is not in the required state
1040 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1041 if instance.admin_state not in req_states:
1042 raise errors.OpPrereqError("Instance %s is marked to be %s, %s" %
1043 (instance, instance.admin_state, msg),
1046 if constants.ADMINST_UP not in req_states:
1047 pnode = instance.primary_node
1048 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1049 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1050 prereq=True, ecode=errors.ECODE_ENVIRON)
1052 if instance.name in ins_l.payload:
1053 raise errors.OpPrereqError("Instance %s is running, %s" %
1054 (instance.name, msg), errors.ECODE_STATE)
1057 def _ComputeMinMaxSpec(name, ipolicy, value):
1058 """Computes if value is in the desired range.
1060 @param name: name of the parameter for which we perform the check
1061 @param ipolicy: dictionary containing min, max and std values
1062 @param value: actual value that we want to use
1063 @return: None or element not meeting the criteria
1067 if value in [None, constants.VALUE_AUTO]:
1069 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1070 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1071 if value > max_v or min_v > value:
1072 return ("%s value %s is not in range [%s, %s]" %
1073 (name, value, min_v, max_v))
1077 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1078 nic_count, disk_sizes,
1079 _compute_fn=_ComputeMinMaxSpec):
1080 """Verifies ipolicy against provided specs.
1083 @param ipolicy: The ipolicy
1085 @param mem_size: The memory size
1086 @type cpu_count: int
1087 @param cpu_count: Used cpu cores
1088 @type disk_count: int
1089 @param disk_count: Number of disks used
1090 @type nic_count: int
1091 @param nic_count: Number of nics used
1092 @type disk_sizes: list of ints
1093 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1094 @param _compute_fn: The compute function (unittest only)
1095 @return: A list of violations, or an empty list of no violations are found
1098 assert disk_count == len(disk_sizes)
1101 (constants.ISPEC_MEM_SIZE, mem_size),
1102 (constants.ISPEC_CPU_COUNT, cpu_count),
1103 (constants.ISPEC_DISK_COUNT, disk_count),
1104 (constants.ISPEC_NIC_COUNT, nic_count),
1105 ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
1108 (_compute_fn(name, ipolicy, value)
1109 for (name, value) in test_settings))
1112 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1113 _compute_fn=_ComputeIPolicySpecViolation):
1114 """Compute if instance meets the specs of ipolicy.
1117 @param ipolicy: The ipolicy to verify against
1118 @type instance: L{objects.Instance}
1119 @param instance: The instance to verify
1120 @param _compute_fn: The function to verify ipolicy (unittest only)
1121 @see: L{_ComputeIPolicySpecViolation}
1124 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1125 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1126 disk_count = len(instance.disks)
1127 disk_sizes = [disk.size for disk in instance.disks]
1128 nic_count = len(instance.nics)
1130 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1134 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1135 _compute_fn=_ComputeIPolicySpecViolation):
1136 """Compute if instance specs meets the specs of ipolicy.
1139 @param ipolicy: The ipolicy to verify against
1140 @param instance_spec: dict
1141 @param instance_spec: The instance spec to verify
1142 @param _compute_fn: The function to verify ipolicy (unittest only)
1143 @see: L{_ComputeIPolicySpecViolation}
1146 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1147 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1148 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1149 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1150 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1152 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1156 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1158 _compute_fn=_ComputeIPolicyInstanceViolation):
1159 """Compute if instance meets the specs of the new target group.
1161 @param ipolicy: The ipolicy to verify
1162 @param instance: The instance object to verify
1163 @param current_group: The current group of the instance
1164 @param target_group: The new group of the instance
1165 @param _compute_fn: The function to verify ipolicy (unittest only)
1166 @see: L{_ComputeIPolicySpecViolation}
1169 if current_group == target_group:
1172 return _compute_fn(ipolicy, instance)
1175 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1176 _compute_fn=_ComputeIPolicyNodeViolation):
1177 """Checks that the target node is correct in terms of instance policy.
1179 @param ipolicy: The ipolicy to verify
1180 @param instance: The instance object to verify
1181 @param node: The new node to relocate
1182 @param ignore: Ignore violations of the ipolicy
1183 @param _compute_fn: The function to verify ipolicy (unittest only)
1184 @see: L{_ComputeIPolicySpecViolation}
1187 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1188 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1191 msg = ("Instance does not meet target node group's (%s) instance"
1192 " policy: %s") % (node.group, utils.CommaJoin(res))
1196 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1199 def _ExpandItemName(fn, name, kind):
1200 """Expand an item name.
1202 @param fn: the function to use for expansion
1203 @param name: requested item name
1204 @param kind: text description ('Node' or 'Instance')
1205 @return: the resolved (full) name
1206 @raise errors.OpPrereqError: if the item is not found
1209 full_name = fn(name)
1210 if full_name is None:
1211 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1216 def _ExpandNodeName(cfg, name):
1217 """Wrapper over L{_ExpandItemName} for nodes."""
1218 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1221 def _ExpandInstanceName(cfg, name):
1222 """Wrapper over L{_ExpandItemName} for instance."""
1223 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1226 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1227 minmem, maxmem, vcpus, nics, disk_template, disks,
1228 bep, hvp, hypervisor_name, tags):
1229 """Builds instance related env variables for hooks
1231 This builds the hook environment from individual variables.
1234 @param name: the name of the instance
1235 @type primary_node: string
1236 @param primary_node: the name of the instance's primary node
1237 @type secondary_nodes: list
1238 @param secondary_nodes: list of secondary nodes as strings
1239 @type os_type: string
1240 @param os_type: the name of the instance's OS
1241 @type status: string
1242 @param status: the desired status of the instance
1243 @type minmem: string
1244 @param minmem: the minimum memory size of the instance
1245 @type maxmem: string
1246 @param maxmem: the maximum memory size of the instance
1248 @param vcpus: the count of VCPUs the instance has
1250 @param nics: list of tuples (ip, mac, mode, link) representing
1251 the NICs the instance has
1252 @type disk_template: string
1253 @param disk_template: the disk template of the instance
1255 @param disks: the list of (size, mode) pairs
1257 @param bep: the backend parameters for the instance
1259 @param hvp: the hypervisor parameters for the instance
1260 @type hypervisor_name: string
1261 @param hypervisor_name: the hypervisor for the instance
1263 @param tags: list of instance tags as strings
1265 @return: the hook environment for this instance
1270 "INSTANCE_NAME": name,
1271 "INSTANCE_PRIMARY": primary_node,
1272 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1273 "INSTANCE_OS_TYPE": os_type,
1274 "INSTANCE_STATUS": status,
1275 "INSTANCE_MINMEM": minmem,
1276 "INSTANCE_MAXMEM": maxmem,
1277 # TODO(2.7) remove deprecated "memory" value
1278 "INSTANCE_MEMORY": maxmem,
1279 "INSTANCE_VCPUS": vcpus,
1280 "INSTANCE_DISK_TEMPLATE": disk_template,
1281 "INSTANCE_HYPERVISOR": hypervisor_name,
1284 nic_count = len(nics)
1285 for idx, (ip, mac, mode, link) in enumerate(nics):
1288 env["INSTANCE_NIC%d_IP" % idx] = ip
1289 env["INSTANCE_NIC%d_MAC" % idx] = mac
1290 env["INSTANCE_NIC%d_MODE" % idx] = mode
1291 env["INSTANCE_NIC%d_LINK" % idx] = link
1292 if mode == constants.NIC_MODE_BRIDGED:
1293 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1297 env["INSTANCE_NIC_COUNT"] = nic_count
1300 disk_count = len(disks)
1301 for idx, (size, mode) in enumerate(disks):
1302 env["INSTANCE_DISK%d_SIZE" % idx] = size
1303 env["INSTANCE_DISK%d_MODE" % idx] = mode
1307 env["INSTANCE_DISK_COUNT"] = disk_count
1312 env["INSTANCE_TAGS"] = " ".join(tags)
1314 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1315 for key, value in source.items():
1316 env["INSTANCE_%s_%s" % (kind, key)] = value
1321 def _NICListToTuple(lu, nics):
1322 """Build a list of nic information tuples.
1324 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1325 value in LUInstanceQueryData.
1327 @type lu: L{LogicalUnit}
1328 @param lu: the logical unit on whose behalf we execute
1329 @type nics: list of L{objects.NIC}
1330 @param nics: list of nics to convert to hooks tuples
1334 cluster = lu.cfg.GetClusterInfo()
1338 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1339 mode = filled_params[constants.NIC_MODE]
1340 link = filled_params[constants.NIC_LINK]
1341 hooks_nics.append((ip, mac, mode, link))
1345 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1346 """Builds instance related env variables for hooks from an object.
1348 @type lu: L{LogicalUnit}
1349 @param lu: the logical unit on whose behalf we execute
1350 @type instance: L{objects.Instance}
1351 @param instance: the instance for which we should build the
1353 @type override: dict
1354 @param override: dictionary with key/values that will override
1357 @return: the hook environment dictionary
1360 cluster = lu.cfg.GetClusterInfo()
1361 bep = cluster.FillBE(instance)
1362 hvp = cluster.FillHV(instance)
1364 "name": instance.name,
1365 "primary_node": instance.primary_node,
1366 "secondary_nodes": instance.secondary_nodes,
1367 "os_type": instance.os,
1368 "status": instance.admin_state,
1369 "maxmem": bep[constants.BE_MAXMEM],
1370 "minmem": bep[constants.BE_MINMEM],
1371 "vcpus": bep[constants.BE_VCPUS],
1372 "nics": _NICListToTuple(lu, instance.nics),
1373 "disk_template": instance.disk_template,
1374 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1377 "hypervisor_name": instance.hypervisor,
1378 "tags": instance.tags,
1381 args.update(override)
1382 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1385 def _AdjustCandidatePool(lu, exceptions):
1386 """Adjust the candidate pool after node operations.
1389 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1391 lu.LogInfo("Promoted nodes to master candidate role: %s",
1392 utils.CommaJoin(node.name for node in mod_list))
1393 for name in mod_list:
1394 lu.context.ReaddNode(name)
1395 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1397 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1401 def _DecideSelfPromotion(lu, exceptions=None):
1402 """Decide whether I should promote myself as a master candidate.
1405 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1406 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1407 # the new node will increase mc_max with one, so:
1408 mc_should = min(mc_should + 1, cp_size)
1409 return mc_now < mc_should
1412 def _CalculateGroupIPolicy(cluster, group):
1413 """Calculate instance policy for group.
1416 return cluster.SimpleFillIPolicy(group.ipolicy)
1419 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1420 """Check that the brigdes needed by a list of nics exist.
1423 cluster = lu.cfg.GetClusterInfo()
1424 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1425 brlist = [params[constants.NIC_LINK] for params in paramslist
1426 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1428 result = lu.rpc.call_bridges_exist(target_node, brlist)
1429 result.Raise("Error checking bridges on destination node '%s'" %
1430 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1433 def _CheckInstanceBridgesExist(lu, instance, node=None):
1434 """Check that the brigdes needed by an instance exist.
1438 node = instance.primary_node
1439 _CheckNicsBridgesExist(lu, instance.nics, node)
1442 def _CheckOSVariant(os_obj, name):
1443 """Check whether an OS name conforms to the os variants specification.
1445 @type os_obj: L{objects.OS}
1446 @param os_obj: OS object to check
1448 @param name: OS name passed by the user, to check for validity
1451 variant = objects.OS.GetVariant(name)
1452 if not os_obj.supported_variants:
1454 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1455 " passed)" % (os_obj.name, variant),
1459 raise errors.OpPrereqError("OS name must include a variant",
1462 if variant not in os_obj.supported_variants:
1463 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1466 def _GetNodeInstancesInner(cfg, fn):
1467 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1470 def _GetNodeInstances(cfg, node_name):
1471 """Returns a list of all primary and secondary instances on a node.
1475 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1478 def _GetNodePrimaryInstances(cfg, node_name):
1479 """Returns primary instances on a node.
1482 return _GetNodeInstancesInner(cfg,
1483 lambda inst: node_name == inst.primary_node)
1486 def _GetNodeSecondaryInstances(cfg, node_name):
1487 """Returns secondary instances on a node.
1490 return _GetNodeInstancesInner(cfg,
1491 lambda inst: node_name in inst.secondary_nodes)
1494 def _GetStorageTypeArgs(cfg, storage_type):
1495 """Returns the arguments for a storage type.
1498 # Special case for file storage
1499 if storage_type == constants.ST_FILE:
1500 # storage.FileStorage wants a list of storage directories
1501 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1506 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1509 for dev in instance.disks:
1510 cfg.SetDiskID(dev, node_name)
1512 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1513 result.Raise("Failed to get disk status from node %s" % node_name,
1514 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1516 for idx, bdev_status in enumerate(result.payload):
1517 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1523 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1524 """Check the sanity of iallocator and node arguments and use the
1525 cluster-wide iallocator if appropriate.
1527 Check that at most one of (iallocator, node) is specified. If none is
1528 specified, then the LU's opcode's iallocator slot is filled with the
1529 cluster-wide default iallocator.
1531 @type iallocator_slot: string
1532 @param iallocator_slot: the name of the opcode iallocator slot
1533 @type node_slot: string
1534 @param node_slot: the name of the opcode target node slot
1537 node = getattr(lu.op, node_slot, None)
1538 iallocator = getattr(lu.op, iallocator_slot, None)
1540 if node is not None and iallocator is not None:
1541 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1543 elif node is None and iallocator is None:
1544 default_iallocator = lu.cfg.GetDefaultIAllocator()
1545 if default_iallocator:
1546 setattr(lu.op, iallocator_slot, default_iallocator)
1548 raise errors.OpPrereqError("No iallocator or node given and no"
1549 " cluster-wide default iallocator found;"
1550 " please specify either an iallocator or a"
1551 " node, or set a cluster-wide default"
1555 def _GetDefaultIAllocator(cfg, iallocator):
1556 """Decides on which iallocator to use.
1558 @type cfg: L{config.ConfigWriter}
1559 @param cfg: Cluster configuration object
1560 @type iallocator: string or None
1561 @param iallocator: Iallocator specified in opcode
1563 @return: Iallocator name
1567 # Use default iallocator
1568 iallocator = cfg.GetDefaultIAllocator()
1571 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1572 " opcode nor as a cluster-wide default",
1578 class LUClusterPostInit(LogicalUnit):
1579 """Logical unit for running hooks after cluster initialization.
1582 HPATH = "cluster-init"
1583 HTYPE = constants.HTYPE_CLUSTER
1585 def BuildHooksEnv(self):
1590 "OP_TARGET": self.cfg.GetClusterName(),
1593 def BuildHooksNodes(self):
1594 """Build hooks nodes.
1597 return ([], [self.cfg.GetMasterNode()])
1599 def Exec(self, feedback_fn):
1606 class LUClusterDestroy(LogicalUnit):
1607 """Logical unit for destroying the cluster.
1610 HPATH = "cluster-destroy"
1611 HTYPE = constants.HTYPE_CLUSTER
1613 def BuildHooksEnv(self):
1618 "OP_TARGET": self.cfg.GetClusterName(),
1621 def BuildHooksNodes(self):
1622 """Build hooks nodes.
1627 def CheckPrereq(self):
1628 """Check prerequisites.
1630 This checks whether the cluster is empty.
1632 Any errors are signaled by raising errors.OpPrereqError.
1635 master = self.cfg.GetMasterNode()
1637 nodelist = self.cfg.GetNodeList()
1638 if len(nodelist) != 1 or nodelist[0] != master:
1639 raise errors.OpPrereqError("There are still %d node(s) in"
1640 " this cluster." % (len(nodelist) - 1),
1642 instancelist = self.cfg.GetInstanceList()
1644 raise errors.OpPrereqError("There are still %d instance(s) in"
1645 " this cluster." % len(instancelist),
1648 def Exec(self, feedback_fn):
1649 """Destroys the cluster.
1652 master_params = self.cfg.GetMasterNetworkParameters()
1654 # Run post hooks on master node before it's removed
1655 _RunPostHook(self, master_params.name)
1657 ems = self.cfg.GetUseExternalMipScript()
1658 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1661 self.LogWarning("Error disabling the master IP address: %s",
1664 return master_params.name
1667 def _VerifyCertificate(filename):
1668 """Verifies a certificate for L{LUClusterVerifyConfig}.
1670 @type filename: string
1671 @param filename: Path to PEM file
1675 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1676 utils.ReadFile(filename))
1677 except Exception, err: # pylint: disable=W0703
1678 return (LUClusterVerifyConfig.ETYPE_ERROR,
1679 "Failed to load X509 certificate %s: %s" % (filename, err))
1682 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1683 constants.SSL_CERT_EXPIRATION_ERROR)
1686 fnamemsg = "While verifying %s: %s" % (filename, msg)
1691 return (None, fnamemsg)
1692 elif errcode == utils.CERT_WARNING:
1693 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1694 elif errcode == utils.CERT_ERROR:
1695 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1697 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1700 def _GetAllHypervisorParameters(cluster, instances):
1701 """Compute the set of all hypervisor parameters.
1703 @type cluster: L{objects.Cluster}
1704 @param cluster: the cluster object
1705 @param instances: list of L{objects.Instance}
1706 @param instances: additional instances from which to obtain parameters
1707 @rtype: list of (origin, hypervisor, parameters)
1708 @return: a list with all parameters found, indicating the hypervisor they
1709 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1714 for hv_name in cluster.enabled_hypervisors:
1715 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1717 for os_name, os_hvp in cluster.os_hvp.items():
1718 for hv_name, hv_params in os_hvp.items():
1720 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1721 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1723 # TODO: collapse identical parameter values in a single one
1724 for instance in instances:
1725 if instance.hvparams:
1726 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1727 cluster.FillHV(instance)))
1732 class _VerifyErrors(object):
1733 """Mix-in for cluster/group verify LUs.
1735 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1736 self.op and self._feedback_fn to be available.)
1740 ETYPE_FIELD = "code"
1741 ETYPE_ERROR = "ERROR"
1742 ETYPE_WARNING = "WARNING"
1744 def _Error(self, ecode, item, msg, *args, **kwargs):
1745 """Format an error message.
1747 Based on the opcode's error_codes parameter, either format a
1748 parseable error code, or a simpler error string.
1750 This must be called only from Exec and functions called from Exec.
1753 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1754 itype, etxt, _ = ecode
1755 # first complete the msg
1758 # then format the whole message
1759 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1760 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1766 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1767 # and finally report it via the feedback_fn
1768 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1770 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1771 """Log an error message if the passed condition is True.
1775 or self.op.debug_simulate_errors) # pylint: disable=E1101
1777 # If the error code is in the list of ignored errors, demote the error to a
1779 (_, etxt, _) = ecode
1780 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1781 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1784 self._Error(ecode, *args, **kwargs)
1786 # do not mark the operation as failed for WARN cases only
1787 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1788 self.bad = self.bad or cond
1791 class LUClusterVerify(NoHooksLU):
1792 """Submits all jobs necessary to verify the cluster.
1797 def ExpandNames(self):
1798 self.needed_locks = {}
1800 def Exec(self, feedback_fn):
1803 if self.op.group_name:
1804 groups = [self.op.group_name]
1805 depends_fn = lambda: None
1807 groups = self.cfg.GetNodeGroupList()
1809 # Verify global configuration
1811 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1814 # Always depend on global verification
1815 depends_fn = lambda: [(-len(jobs), [])]
1817 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1818 ignore_errors=self.op.ignore_errors,
1819 depends=depends_fn())]
1820 for group in groups)
1822 # Fix up all parameters
1823 for op in itertools.chain(*jobs): # pylint: disable=W0142
1824 op.debug_simulate_errors = self.op.debug_simulate_errors
1825 op.verbose = self.op.verbose
1826 op.error_codes = self.op.error_codes
1828 op.skip_checks = self.op.skip_checks
1829 except AttributeError:
1830 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1832 return ResultWithJobs(jobs)
1835 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1836 """Verifies the cluster config.
1841 def _VerifyHVP(self, hvp_data):
1842 """Verifies locally the syntax of the hypervisor parameters.
1845 for item, hv_name, hv_params in hvp_data:
1846 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1849 hv_class = hypervisor.GetHypervisor(hv_name)
1850 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1851 hv_class.CheckParameterSyntax(hv_params)
1852 except errors.GenericError, err:
1853 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1855 def ExpandNames(self):
1856 # Information can be safely retrieved as the BGL is acquired in exclusive
1858 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1859 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1860 self.all_node_info = self.cfg.GetAllNodesInfo()
1861 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1862 self.needed_locks = {}
1864 def Exec(self, feedback_fn):
1865 """Verify integrity of cluster, performing various test on nodes.
1869 self._feedback_fn = feedback_fn
1871 feedback_fn("* Verifying cluster config")
1873 for msg in self.cfg.VerifyConfig():
1874 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1876 feedback_fn("* Verifying cluster certificate files")
1878 for cert_filename in constants.ALL_CERT_FILES:
1879 (errcode, msg) = _VerifyCertificate(cert_filename)
1880 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1882 feedback_fn("* Verifying hypervisor parameters")
1884 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1885 self.all_inst_info.values()))
1887 feedback_fn("* Verifying all nodes belong to an existing group")
1889 # We do this verification here because, should this bogus circumstance
1890 # occur, it would never be caught by VerifyGroup, which only acts on
1891 # nodes/instances reachable from existing node groups.
1893 dangling_nodes = set(node.name for node in self.all_node_info.values()
1894 if node.group not in self.all_group_info)
1896 dangling_instances = {}
1897 no_node_instances = []
1899 for inst in self.all_inst_info.values():
1900 if inst.primary_node in dangling_nodes:
1901 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1902 elif inst.primary_node not in self.all_node_info:
1903 no_node_instances.append(inst.name)
1908 utils.CommaJoin(dangling_instances.get(node.name,
1910 for node in dangling_nodes]
1912 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1914 "the following nodes (and their instances) belong to a non"
1915 " existing group: %s", utils.CommaJoin(pretty_dangling))
1917 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1919 "the following instances have a non-existing primary-node:"
1920 " %s", utils.CommaJoin(no_node_instances))
1925 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1926 """Verifies the status of a node group.
1929 HPATH = "cluster-verify"
1930 HTYPE = constants.HTYPE_CLUSTER
1933 _HOOKS_INDENT_RE = re.compile("^", re.M)
1935 class NodeImage(object):
1936 """A class representing the logical and physical status of a node.
1939 @ivar name: the node name to which this object refers
1940 @ivar volumes: a structure as returned from
1941 L{ganeti.backend.GetVolumeList} (runtime)
1942 @ivar instances: a list of running instances (runtime)
1943 @ivar pinst: list of configured primary instances (config)
1944 @ivar sinst: list of configured secondary instances (config)
1945 @ivar sbp: dictionary of {primary-node: list of instances} for all
1946 instances for which this node is secondary (config)
1947 @ivar mfree: free memory, as reported by hypervisor (runtime)
1948 @ivar dfree: free disk, as reported by the node (runtime)
1949 @ivar offline: the offline status (config)
1950 @type rpc_fail: boolean
1951 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1952 not whether the individual keys were correct) (runtime)
1953 @type lvm_fail: boolean
1954 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1955 @type hyp_fail: boolean
1956 @ivar hyp_fail: whether the RPC call didn't return the instance list
1957 @type ghost: boolean
1958 @ivar ghost: whether this is a known node or not (config)
1959 @type os_fail: boolean
1960 @ivar os_fail: whether the RPC call didn't return valid OS data
1962 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1963 @type vm_capable: boolean
1964 @ivar vm_capable: whether the node can host instances
1967 def __init__(self, offline=False, name=None, vm_capable=True):
1976 self.offline = offline
1977 self.vm_capable = vm_capable
1978 self.rpc_fail = False
1979 self.lvm_fail = False
1980 self.hyp_fail = False
1982 self.os_fail = False
1985 def ExpandNames(self):
1986 # This raises errors.OpPrereqError on its own:
1987 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1989 # Get instances in node group; this is unsafe and needs verification later
1990 inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1992 self.needed_locks = {
1993 locking.LEVEL_INSTANCE: inst_names,
1994 locking.LEVEL_NODEGROUP: [self.group_uuid],
1995 locking.LEVEL_NODE: [],
1998 self.share_locks = _ShareAll()
2000 def DeclareLocks(self, level):
2001 if level == locking.LEVEL_NODE:
2002 # Get members of node group; this is unsafe and needs verification later
2003 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2005 all_inst_info = self.cfg.GetAllInstancesInfo()
2007 # In Exec(), we warn about mirrored instances that have primary and
2008 # secondary living in separate node groups. To fully verify that
2009 # volumes for these instances are healthy, we will need to do an
2010 # extra call to their secondaries. We ensure here those nodes will
2012 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2013 # Important: access only the instances whose lock is owned
2014 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2015 nodes.update(all_inst_info[inst].secondary_nodes)
2017 self.needed_locks[locking.LEVEL_NODE] = nodes
2019 def CheckPrereq(self):
2020 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2021 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2023 group_nodes = set(self.group_info.members)
2024 group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
2027 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2029 unlocked_instances = \
2030 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2033 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2034 utils.CommaJoin(unlocked_nodes))
2036 if unlocked_instances:
2037 raise errors.OpPrereqError("Missing lock for instances: %s" %
2038 utils.CommaJoin(unlocked_instances))
2040 self.all_node_info = self.cfg.GetAllNodesInfo()
2041 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2043 self.my_node_names = utils.NiceSort(group_nodes)
2044 self.my_inst_names = utils.NiceSort(group_instances)
2046 self.my_node_info = dict((name, self.all_node_info[name])
2047 for name in self.my_node_names)
2049 self.my_inst_info = dict((name, self.all_inst_info[name])
2050 for name in self.my_inst_names)
2052 # We detect here the nodes that will need the extra RPC calls for verifying
2053 # split LV volumes; they should be locked.
2054 extra_lv_nodes = set()
2056 for inst in self.my_inst_info.values():
2057 if inst.disk_template in constants.DTS_INT_MIRROR:
2058 group = self.my_node_info[inst.primary_node].group
2059 for nname in inst.secondary_nodes:
2060 if self.all_node_info[nname].group != group:
2061 extra_lv_nodes.add(nname)
2063 unlocked_lv_nodes = \
2064 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2066 if unlocked_lv_nodes:
2067 raise errors.OpPrereqError("these nodes could be locked: %s" %
2068 utils.CommaJoin(unlocked_lv_nodes))
2069 self.extra_lv_nodes = list(extra_lv_nodes)
2071 def _VerifyNode(self, ninfo, nresult):
2072 """Perform some basic validation on data returned from a node.
2074 - check the result data structure is well formed and has all the
2076 - check ganeti version
2078 @type ninfo: L{objects.Node}
2079 @param ninfo: the node to check
2080 @param nresult: the results from the node
2082 @return: whether overall this call was successful (and we can expect
2083 reasonable values in the respose)
2087 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2089 # main result, nresult should be a non-empty dict
2090 test = not nresult or not isinstance(nresult, dict)
2091 _ErrorIf(test, constants.CV_ENODERPC, node,
2092 "unable to verify node: no data returned")
2096 # compares ganeti version
2097 local_version = constants.PROTOCOL_VERSION
2098 remote_version = nresult.get("version", None)
2099 test = not (remote_version and
2100 isinstance(remote_version, (list, tuple)) and
2101 len(remote_version) == 2)
2102 _ErrorIf(test, constants.CV_ENODERPC, node,
2103 "connection to node returned invalid data")
2107 test = local_version != remote_version[0]
2108 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2109 "incompatible protocol versions: master %s,"
2110 " node %s", local_version, remote_version[0])
2114 # node seems compatible, we can actually try to look into its results
2116 # full package version
2117 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2118 constants.CV_ENODEVERSION, node,
2119 "software version mismatch: master %s, node %s",
2120 constants.RELEASE_VERSION, remote_version[1],
2121 code=self.ETYPE_WARNING)
2123 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2124 if ninfo.vm_capable and isinstance(hyp_result, dict):
2125 for hv_name, hv_result in hyp_result.iteritems():
2126 test = hv_result is not None
2127 _ErrorIf(test, constants.CV_ENODEHV, node,
2128 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2130 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2131 if ninfo.vm_capable and isinstance(hvp_result, list):
2132 for item, hv_name, hv_result in hvp_result:
2133 _ErrorIf(True, constants.CV_ENODEHV, node,
2134 "hypervisor %s parameter verify failure (source %s): %s",
2135 hv_name, item, hv_result)
2137 test = nresult.get(constants.NV_NODESETUP,
2138 ["Missing NODESETUP results"])
2139 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2144 def _VerifyNodeTime(self, ninfo, nresult,
2145 nvinfo_starttime, nvinfo_endtime):
2146 """Check the node time.
2148 @type ninfo: L{objects.Node}
2149 @param ninfo: the node to check
2150 @param nresult: the remote results for the node
2151 @param nvinfo_starttime: the start time of the RPC call
2152 @param nvinfo_endtime: the end time of the RPC call
2156 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2158 ntime = nresult.get(constants.NV_TIME, None)
2160 ntime_merged = utils.MergeTime(ntime)
2161 except (ValueError, TypeError):
2162 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2165 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2166 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2167 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2168 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2172 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2173 "Node time diverges by at least %s from master node time",
2176 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2177 """Check the node LVM results.
2179 @type ninfo: L{objects.Node}
2180 @param ninfo: the node to check
2181 @param nresult: the remote results for the node
2182 @param vg_name: the configured VG name
2189 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2191 # checks vg existence and size > 20G
2192 vglist = nresult.get(constants.NV_VGLIST, None)
2194 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2196 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2197 constants.MIN_VG_SIZE)
2198 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2201 pvlist = nresult.get(constants.NV_PVLIST, None)
2202 test = pvlist is None
2203 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2205 # check that ':' is not present in PV names, since it's a
2206 # special character for lvcreate (denotes the range of PEs to
2208 for _, pvname, owner_vg in pvlist:
2209 test = ":" in pvname
2210 _ErrorIf(test, constants.CV_ENODELVM, node,
2211 "Invalid character ':' in PV '%s' of VG '%s'",
2214 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2215 """Check the node bridges.
2217 @type ninfo: L{objects.Node}
2218 @param ninfo: the node to check
2219 @param nresult: the remote results for the node
2220 @param bridges: the expected list of bridges
2227 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2229 missing = nresult.get(constants.NV_BRIDGES, None)
2230 test = not isinstance(missing, list)
2231 _ErrorIf(test, constants.CV_ENODENET, node,
2232 "did not return valid bridge information")
2234 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2235 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2237 def _VerifyNodeUserScripts(self, ninfo, nresult):
2238 """Check the results of user scripts presence and executability on the node
2240 @type ninfo: L{objects.Node}
2241 @param ninfo: the node to check
2242 @param nresult: the remote results for the node
2247 test = not constants.NV_USERSCRIPTS in nresult
2248 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2249 "did not return user scripts information")
2251 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2253 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2254 "user scripts not present or not executable: %s" %
2255 utils.CommaJoin(sorted(broken_scripts)))
2257 def _VerifyNodeNetwork(self, ninfo, nresult):
2258 """Check the node network connectivity results.
2260 @type ninfo: L{objects.Node}
2261 @param ninfo: the node to check
2262 @param nresult: the remote results for the node
2266 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2268 test = constants.NV_NODELIST not in nresult
2269 _ErrorIf(test, constants.CV_ENODESSH, node,
2270 "node hasn't returned node ssh connectivity data")
2272 if nresult[constants.NV_NODELIST]:
2273 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2274 _ErrorIf(True, constants.CV_ENODESSH, node,
2275 "ssh communication with node '%s': %s", a_node, a_msg)
2277 test = constants.NV_NODENETTEST not in nresult
2278 _ErrorIf(test, constants.CV_ENODENET, node,
2279 "node hasn't returned node tcp connectivity data")
2281 if nresult[constants.NV_NODENETTEST]:
2282 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2284 _ErrorIf(True, constants.CV_ENODENET, node,
2285 "tcp communication with node '%s': %s",
2286 anode, nresult[constants.NV_NODENETTEST][anode])
2288 test = constants.NV_MASTERIP not in nresult
2289 _ErrorIf(test, constants.CV_ENODENET, node,
2290 "node hasn't returned node master IP reachability data")
2292 if not nresult[constants.NV_MASTERIP]:
2293 if node == self.master_node:
2294 msg = "the master node cannot reach the master IP (not configured?)"
2296 msg = "cannot reach the master IP"
2297 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2299 def _VerifyInstance(self, instance, instanceconfig, node_image,
2301 """Verify an instance.
2303 This function checks to see if the required block devices are
2304 available on the instance's node.
2307 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2308 node_current = instanceconfig.primary_node
2310 node_vol_should = {}
2311 instanceconfig.MapLVsByNode(node_vol_should)
2313 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2314 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2315 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, err)
2317 for node in node_vol_should:
2318 n_img = node_image[node]
2319 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2320 # ignore missing volumes on offline or broken nodes
2322 for volume in node_vol_should[node]:
2323 test = volume not in n_img.volumes
2324 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2325 "volume %s missing on node %s", volume, node)
2327 if instanceconfig.admin_state == constants.ADMINST_UP:
2328 pri_img = node_image[node_current]
2329 test = instance not in pri_img.instances and not pri_img.offline
2330 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2331 "instance not running on its primary node %s",
2334 diskdata = [(nname, success, status, idx)
2335 for (nname, disks) in diskstatus.items()
2336 for idx, (success, status) in enumerate(disks)]
2338 for nname, success, bdev_status, idx in diskdata:
2339 # the 'ghost node' construction in Exec() ensures that we have a
2341 snode = node_image[nname]
2342 bad_snode = snode.ghost or snode.offline
2343 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2344 not success and not bad_snode,
2345 constants.CV_EINSTANCEFAULTYDISK, instance,
2346 "couldn't retrieve status for disk/%s on %s: %s",
2347 idx, nname, bdev_status)
2348 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2349 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2350 constants.CV_EINSTANCEFAULTYDISK, instance,
2351 "disk/%s on %s is faulty", idx, nname)
2353 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2354 """Verify if there are any unknown volumes in the cluster.
2356 The .os, .swap and backup volumes are ignored. All other volumes are
2357 reported as unknown.
2359 @type reserved: L{ganeti.utils.FieldSet}
2360 @param reserved: a FieldSet of reserved volume names
2363 for node, n_img in node_image.items():
2364 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2365 # skip non-healthy nodes
2367 for volume in n_img.volumes:
2368 test = ((node not in node_vol_should or
2369 volume not in node_vol_should[node]) and
2370 not reserved.Matches(volume))
2371 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2372 "volume %s is unknown", volume)
2374 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2375 """Verify N+1 Memory Resilience.
2377 Check that if one single node dies we can still start all the
2378 instances it was primary for.
2381 cluster_info = self.cfg.GetClusterInfo()
2382 for node, n_img in node_image.items():
2383 # This code checks that every node which is now listed as
2384 # secondary has enough memory to host all instances it is
2385 # supposed to should a single other node in the cluster fail.
2386 # FIXME: not ready for failover to an arbitrary node
2387 # FIXME: does not support file-backed instances
2388 # WARNING: we currently take into account down instances as well
2389 # as up ones, considering that even if they're down someone
2390 # might want to start them even in the event of a node failure.
2392 # we're skipping offline nodes from the N+1 warning, since
2393 # most likely we don't have good memory infromation from them;
2394 # we already list instances living on such nodes, and that's
2397 #TODO(dynmem): use MINMEM for checking
2398 #TODO(dynmem): also consider ballooning out other instances
2399 for prinode, instances in n_img.sbp.items():
2401 for instance in instances:
2402 bep = cluster_info.FillBE(instance_cfg[instance])
2403 if bep[constants.BE_AUTO_BALANCE]:
2404 needed_mem += bep[constants.BE_MAXMEM]
2405 test = n_img.mfree < needed_mem
2406 self._ErrorIf(test, constants.CV_ENODEN1, node,
2407 "not enough memory to accomodate instance failovers"
2408 " should node %s fail (%dMiB needed, %dMiB available)",
2409 prinode, needed_mem, n_img.mfree)
2412 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2413 (files_all, files_opt, files_mc, files_vm)):
2414 """Verifies file checksums collected from all nodes.
2416 @param errorif: Callback for reporting errors
2417 @param nodeinfo: List of L{objects.Node} objects
2418 @param master_node: Name of master node
2419 @param all_nvinfo: RPC results
2422 # Define functions determining which nodes to consider for a file
2425 (files_mc, lambda node: (node.master_candidate or
2426 node.name == master_node)),
2427 (files_vm, lambda node: node.vm_capable),
2430 # Build mapping from filename to list of nodes which should have the file
2432 for (files, fn) in files2nodefn:
2434 filenodes = nodeinfo
2436 filenodes = filter(fn, nodeinfo)
2437 nodefiles.update((filename,
2438 frozenset(map(operator.attrgetter("name"), filenodes)))
2439 for filename in files)
2441 assert set(nodefiles) == (files_all | files_mc | files_vm)
2443 fileinfo = dict((filename, {}) for filename in nodefiles)
2444 ignore_nodes = set()
2446 for node in nodeinfo:
2448 ignore_nodes.add(node.name)
2451 nresult = all_nvinfo[node.name]
2453 if nresult.fail_msg or not nresult.payload:
2456 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2458 test = not (node_files and isinstance(node_files, dict))
2459 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2460 "Node did not return file checksum data")
2462 ignore_nodes.add(node.name)
2465 # Build per-checksum mapping from filename to nodes having it
2466 for (filename, checksum) in node_files.items():
2467 assert filename in nodefiles
2468 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2470 for (filename, checksums) in fileinfo.items():
2471 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2473 # Nodes having the file
2474 with_file = frozenset(node_name
2475 for nodes in fileinfo[filename].values()
2476 for node_name in nodes) - ignore_nodes
2478 expected_nodes = nodefiles[filename] - ignore_nodes
2480 # Nodes missing file
2481 missing_file = expected_nodes - with_file
2483 if filename in files_opt:
2485 errorif(missing_file and missing_file != expected_nodes,
2486 constants.CV_ECLUSTERFILECHECK, None,
2487 "File %s is optional, but it must exist on all or no"
2488 " nodes (not found on %s)",
2489 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2491 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2492 "File %s is missing from node(s) %s", filename,
2493 utils.CommaJoin(utils.NiceSort(missing_file)))
2495 # Warn if a node has a file it shouldn't
2496 unexpected = with_file - expected_nodes
2498 constants.CV_ECLUSTERFILECHECK, None,
2499 "File %s should not exist on node(s) %s",
2500 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2502 # See if there are multiple versions of the file
2503 test = len(checksums) > 1
2505 variants = ["variant %s on %s" %
2506 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2507 for (idx, (checksum, nodes)) in
2508 enumerate(sorted(checksums.items()))]
2512 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2513 "File %s found with %s different checksums (%s)",
2514 filename, len(checksums), "; ".join(variants))
2516 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2518 """Verifies and the node DRBD status.
2520 @type ninfo: L{objects.Node}
2521 @param ninfo: the node to check
2522 @param nresult: the remote results for the node
2523 @param instanceinfo: the dict of instances
2524 @param drbd_helper: the configured DRBD usermode helper
2525 @param drbd_map: the DRBD map as returned by
2526 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2530 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2533 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2534 test = (helper_result == None)
2535 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2536 "no drbd usermode helper returned")
2538 status, payload = helper_result
2540 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2541 "drbd usermode helper check unsuccessful: %s", payload)
2542 test = status and (payload != drbd_helper)
2543 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2544 "wrong drbd usermode helper: %s", payload)
2546 # compute the DRBD minors
2548 for minor, instance in drbd_map[node].items():
2549 test = instance not in instanceinfo
2550 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2551 "ghost instance '%s' in temporary DRBD map", instance)
2552 # ghost instance should not be running, but otherwise we
2553 # don't give double warnings (both ghost instance and
2554 # unallocated minor in use)
2556 node_drbd[minor] = (instance, False)
2558 instance = instanceinfo[instance]
2559 node_drbd[minor] = (instance.name,
2560 instance.admin_state == constants.ADMINST_UP)
2562 # and now check them
2563 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2564 test = not isinstance(used_minors, (tuple, list))
2565 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2566 "cannot parse drbd status file: %s", str(used_minors))
2568 # we cannot check drbd status
2571 for minor, (iname, must_exist) in node_drbd.items():
2572 test = minor not in used_minors and must_exist
2573 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2574 "drbd minor %d of instance %s is not active", minor, iname)
2575 for minor in used_minors:
2576 test = minor not in node_drbd
2577 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2578 "unallocated drbd minor %d is in use", minor)
2580 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2581 """Builds the node OS structures.
2583 @type ninfo: L{objects.Node}
2584 @param ninfo: the node to check
2585 @param nresult: the remote results for the node
2586 @param nimg: the node image object
2590 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2592 remote_os = nresult.get(constants.NV_OSLIST, None)
2593 test = (not isinstance(remote_os, list) or
2594 not compat.all(isinstance(v, list) and len(v) == 7
2595 for v in remote_os))
2597 _ErrorIf(test, constants.CV_ENODEOS, node,
2598 "node hasn't returned valid OS data")
2607 for (name, os_path, status, diagnose,
2608 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2610 if name not in os_dict:
2613 # parameters is a list of lists instead of list of tuples due to
2614 # JSON lacking a real tuple type, fix it:
2615 parameters = [tuple(v) for v in parameters]
2616 os_dict[name].append((os_path, status, diagnose,
2617 set(variants), set(parameters), set(api_ver)))
2619 nimg.oslist = os_dict
2621 def _VerifyNodeOS(self, ninfo, nimg, base):
2622 """Verifies the node OS list.
2624 @type ninfo: L{objects.Node}
2625 @param ninfo: the node to check
2626 @param nimg: the node image object
2627 @param base: the 'template' node we match against (e.g. from the master)
2631 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2633 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2635 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2636 for os_name, os_data in nimg.oslist.items():
2637 assert os_data, "Empty OS status for OS %s?!" % os_name
2638 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2639 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2640 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2641 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2642 "OS '%s' has multiple entries (first one shadows the rest): %s",
2643 os_name, utils.CommaJoin([v[0] for v in os_data]))
2644 # comparisons with the 'base' image
2645 test = os_name not in base.oslist
2646 _ErrorIf(test, constants.CV_ENODEOS, node,
2647 "Extra OS %s not present on reference node (%s)",
2651 assert base.oslist[os_name], "Base node has empty OS status?"
2652 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2654 # base OS is invalid, skipping
2656 for kind, a, b in [("API version", f_api, b_api),
2657 ("variants list", f_var, b_var),
2658 ("parameters", beautify_params(f_param),
2659 beautify_params(b_param))]:
2660 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2661 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2662 kind, os_name, base.name,
2663 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2665 # check any missing OSes
2666 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2667 _ErrorIf(missing, constants.CV_ENODEOS, node,
2668 "OSes present on reference node %s but missing on this node: %s",
2669 base.name, utils.CommaJoin(missing))
2671 def _VerifyOob(self, ninfo, nresult):
2672 """Verifies out of band functionality of a node.
2674 @type ninfo: L{objects.Node}
2675 @param ninfo: the node to check
2676 @param nresult: the remote results for the node
2680 # We just have to verify the paths on master and/or master candidates
2681 # as the oob helper is invoked on the master
2682 if ((ninfo.master_candidate or ninfo.master_capable) and
2683 constants.NV_OOB_PATHS in nresult):
2684 for path_result in nresult[constants.NV_OOB_PATHS]:
2685 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2687 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2688 """Verifies and updates the node volume data.
2690 This function will update a L{NodeImage}'s internal structures
2691 with data from the remote call.
2693 @type ninfo: L{objects.Node}
2694 @param ninfo: the node to check
2695 @param nresult: the remote results for the node
2696 @param nimg: the node image object
2697 @param vg_name: the configured VG name
2701 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2703 nimg.lvm_fail = True
2704 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2707 elif isinstance(lvdata, basestring):
2708 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2709 utils.SafeEncode(lvdata))
2710 elif not isinstance(lvdata, dict):
2711 _ErrorIf(True, constants.CV_ENODELVM, node,
2712 "rpc call to node failed (lvlist)")
2714 nimg.volumes = lvdata
2715 nimg.lvm_fail = False
2717 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2718 """Verifies and updates the node instance list.
2720 If the listing was successful, then updates this node's instance
2721 list. Otherwise, it marks the RPC call as failed for the instance
2724 @type ninfo: L{objects.Node}
2725 @param ninfo: the node to check
2726 @param nresult: the remote results for the node
2727 @param nimg: the node image object
2730 idata = nresult.get(constants.NV_INSTANCELIST, None)
2731 test = not isinstance(idata, list)
2732 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2733 "rpc call to node failed (instancelist): %s",
2734 utils.SafeEncode(str(idata)))
2736 nimg.hyp_fail = True
2738 nimg.instances = idata
2740 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2741 """Verifies and computes a node information map
2743 @type ninfo: L{objects.Node}
2744 @param ninfo: the node to check
2745 @param nresult: the remote results for the node
2746 @param nimg: the node image object
2747 @param vg_name: the configured VG name
2751 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2753 # try to read free memory (from the hypervisor)
2754 hv_info = nresult.get(constants.NV_HVINFO, None)
2755 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2756 _ErrorIf(test, constants.CV_ENODEHV, node,
2757 "rpc call to node failed (hvinfo)")
2760 nimg.mfree = int(hv_info["memory_free"])
2761 except (ValueError, TypeError):
2762 _ErrorIf(True, constants.CV_ENODERPC, node,
2763 "node returned invalid nodeinfo, check hypervisor")
2765 # FIXME: devise a free space model for file based instances as well
2766 if vg_name is not None:
2767 test = (constants.NV_VGLIST not in nresult or
2768 vg_name not in nresult[constants.NV_VGLIST])
2769 _ErrorIf(test, constants.CV_ENODELVM, node,
2770 "node didn't return data for the volume group '%s'"
2771 " - it is either missing or broken", vg_name)
2774 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2775 except (ValueError, TypeError):
2776 _ErrorIf(True, constants.CV_ENODERPC, node,
2777 "node returned invalid LVM info, check LVM status")
2779 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2780 """Gets per-disk status information for all instances.
2782 @type nodelist: list of strings
2783 @param nodelist: Node names
2784 @type node_image: dict of (name, L{objects.Node})
2785 @param node_image: Node objects
2786 @type instanceinfo: dict of (name, L{objects.Instance})
2787 @param instanceinfo: Instance objects
2788 @rtype: {instance: {node: [(succes, payload)]}}
2789 @return: a dictionary of per-instance dictionaries with nodes as
2790 keys and disk information as values; the disk information is a
2791 list of tuples (success, payload)
2794 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2797 node_disks_devonly = {}
2798 diskless_instances = set()
2799 diskless = constants.DT_DISKLESS
2801 for nname in nodelist:
2802 node_instances = list(itertools.chain(node_image[nname].pinst,
2803 node_image[nname].sinst))
2804 diskless_instances.update(inst for inst in node_instances
2805 if instanceinfo[inst].disk_template == diskless)
2806 disks = [(inst, disk)
2807 for inst in node_instances
2808 for disk in instanceinfo[inst].disks]
2811 # No need to collect data
2814 node_disks[nname] = disks
2816 # Creating copies as SetDiskID below will modify the objects and that can
2817 # lead to incorrect data returned from nodes
2818 devonly = [dev.Copy() for (_, dev) in disks]
2821 self.cfg.SetDiskID(dev, nname)
2823 node_disks_devonly[nname] = devonly
2825 assert len(node_disks) == len(node_disks_devonly)
2827 # Collect data from all nodes with disks
2828 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2831 assert len(result) == len(node_disks)
2835 for (nname, nres) in result.items():
2836 disks = node_disks[nname]
2839 # No data from this node
2840 data = len(disks) * [(False, "node offline")]
2843 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2844 "while getting disk information: %s", msg)
2846 # No data from this node
2847 data = len(disks) * [(False, msg)]
2850 for idx, i in enumerate(nres.payload):
2851 if isinstance(i, (tuple, list)) and len(i) == 2:
2854 logging.warning("Invalid result from node %s, entry %d: %s",
2856 data.append((False, "Invalid result from the remote node"))
2858 for ((inst, _), status) in zip(disks, data):
2859 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2861 # Add empty entries for diskless instances.
2862 for inst in diskless_instances:
2863 assert inst not in instdisk
2866 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2867 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2868 compat.all(isinstance(s, (tuple, list)) and
2869 len(s) == 2 for s in statuses)
2870 for inst, nnames in instdisk.items()
2871 for nname, statuses in nnames.items())
2872 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2877 def _SshNodeSelector(group_uuid, all_nodes):
2878 """Create endless iterators for all potential SSH check hosts.
2881 nodes = [node for node in all_nodes
2882 if (node.group != group_uuid and
2884 keyfunc = operator.attrgetter("group")
2886 return map(itertools.cycle,
2887 [sorted(map(operator.attrgetter("name"), names))
2888 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2892 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2893 """Choose which nodes should talk to which other nodes.
2895 We will make nodes contact all nodes in their group, and one node from
2898 @warning: This algorithm has a known issue if one node group is much
2899 smaller than others (e.g. just one node). In such a case all other
2900 nodes will talk to the single node.
2903 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2904 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2906 return (online_nodes,
2907 dict((name, sorted([i.next() for i in sel]))
2908 for name in online_nodes))
2910 def BuildHooksEnv(self):
2913 Cluster-Verify hooks just ran in the post phase and their failure makes
2914 the output be logged in the verify output and the verification to fail.
2918 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2921 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2922 for node in self.my_node_info.values())
2926 def BuildHooksNodes(self):
2927 """Build hooks nodes.
2930 return ([], self.my_node_names)
2932 def Exec(self, feedback_fn):
2933 """Verify integrity of the node group, performing various test on nodes.
2936 # This method has too many local variables. pylint: disable=R0914
2937 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2939 if not self.my_node_names:
2941 feedback_fn("* Empty node group, skipping verification")
2945 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2946 verbose = self.op.verbose
2947 self._feedback_fn = feedback_fn
2949 vg_name = self.cfg.GetVGName()
2950 drbd_helper = self.cfg.GetDRBDHelper()
2951 cluster = self.cfg.GetClusterInfo()
2952 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2953 hypervisors = cluster.enabled_hypervisors
2954 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2956 i_non_redundant = [] # Non redundant instances
2957 i_non_a_balanced = [] # Non auto-balanced instances
2958 i_offline = 0 # Count of offline instances
2959 n_offline = 0 # Count of offline nodes
2960 n_drained = 0 # Count of nodes being drained
2961 node_vol_should = {}
2963 # FIXME: verify OS list
2966 filemap = _ComputeAncillaryFiles(cluster, False)
2968 # do local checksums
2969 master_node = self.master_node = self.cfg.GetMasterNode()
2970 master_ip = self.cfg.GetMasterIP()
2972 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2975 if self.cfg.GetUseExternalMipScript():
2976 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2978 node_verify_param = {
2979 constants.NV_FILELIST:
2980 utils.UniqueSequence(filename
2981 for files in filemap
2982 for filename in files),
2983 constants.NV_NODELIST:
2984 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2985 self.all_node_info.values()),
2986 constants.NV_HYPERVISOR: hypervisors,
2987 constants.NV_HVPARAMS:
2988 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2989 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2990 for node in node_data_list
2991 if not node.offline],
2992 constants.NV_INSTANCELIST: hypervisors,
2993 constants.NV_VERSION: None,
2994 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2995 constants.NV_NODESETUP: None,
2996 constants.NV_TIME: None,
2997 constants.NV_MASTERIP: (master_node, master_ip),
2998 constants.NV_OSLIST: None,
2999 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3000 constants.NV_USERSCRIPTS: user_scripts,
3003 if vg_name is not None:
3004 node_verify_param[constants.NV_VGLIST] = None
3005 node_verify_param[constants.NV_LVLIST] = vg_name
3006 node_verify_param[constants.NV_PVLIST] = [vg_name]
3007 node_verify_param[constants.NV_DRBDLIST] = None
3010 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3013 # FIXME: this needs to be changed per node-group, not cluster-wide
3015 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3016 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3017 bridges.add(default_nicpp[constants.NIC_LINK])
3018 for instance in self.my_inst_info.values():
3019 for nic in instance.nics:
3020 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3021 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3022 bridges.add(full_nic[constants.NIC_LINK])
3025 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3027 # Build our expected cluster state
3028 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3030 vm_capable=node.vm_capable))
3031 for node in node_data_list)
3035 for node in self.all_node_info.values():
3036 path = _SupportsOob(self.cfg, node)
3037 if path and path not in oob_paths:
3038 oob_paths.append(path)
3041 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3043 for instance in self.my_inst_names:
3044 inst_config = self.my_inst_info[instance]
3046 for nname in inst_config.all_nodes:
3047 if nname not in node_image:
3048 gnode = self.NodeImage(name=nname)
3049 gnode.ghost = (nname not in self.all_node_info)
3050 node_image[nname] = gnode
3052 inst_config.MapLVsByNode(node_vol_should)
3054 pnode = inst_config.primary_node
3055 node_image[pnode].pinst.append(instance)
3057 for snode in inst_config.secondary_nodes:
3058 nimg = node_image[snode]
3059 nimg.sinst.append(instance)
3060 if pnode not in nimg.sbp:
3061 nimg.sbp[pnode] = []
3062 nimg.sbp[pnode].append(instance)
3064 # At this point, we have the in-memory data structures complete,
3065 # except for the runtime information, which we'll gather next
3067 # Due to the way our RPC system works, exact response times cannot be
3068 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3069 # time before and after executing the request, we can at least have a time
3071 nvinfo_starttime = time.time()
3072 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3074 self.cfg.GetClusterName())
3075 nvinfo_endtime = time.time()
3077 if self.extra_lv_nodes and vg_name is not None:
3079 self.rpc.call_node_verify(self.extra_lv_nodes,
3080 {constants.NV_LVLIST: vg_name},
3081 self.cfg.GetClusterName())
3083 extra_lv_nvinfo = {}
3085 all_drbd_map = self.cfg.ComputeDRBDMap()
3087 feedback_fn("* Gathering disk information (%s nodes)" %
3088 len(self.my_node_names))
3089 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3092 feedback_fn("* Verifying configuration file consistency")
3094 # If not all nodes are being checked, we need to make sure the master node
3095 # and a non-checked vm_capable node are in the list.
3096 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3098 vf_nvinfo = all_nvinfo.copy()
3099 vf_node_info = list(self.my_node_info.values())
3100 additional_nodes = []
3101 if master_node not in self.my_node_info:
3102 additional_nodes.append(master_node)
3103 vf_node_info.append(self.all_node_info[master_node])
3104 # Add the first vm_capable node we find which is not included
3105 for node in absent_nodes:
3106 nodeinfo = self.all_node_info[node]
3107 if nodeinfo.vm_capable and not nodeinfo.offline:
3108 additional_nodes.append(node)
3109 vf_node_info.append(self.all_node_info[node])
3111 key = constants.NV_FILELIST
3112 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3113 {key: node_verify_param[key]},
3114 self.cfg.GetClusterName()))
3116 vf_nvinfo = all_nvinfo
3117 vf_node_info = self.my_node_info.values()
3119 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3121 feedback_fn("* Verifying node status")
3125 for node_i in node_data_list:
3127 nimg = node_image[node]
3131 feedback_fn("* Skipping offline node %s" % (node,))
3135 if node == master_node:
3137 elif node_i.master_candidate:
3138 ntype = "master candidate"
3139 elif node_i.drained:
3145 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3147 msg = all_nvinfo[node].fail_msg
3148 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3151 nimg.rpc_fail = True
3154 nresult = all_nvinfo[node].payload
3156 nimg.call_ok = self._VerifyNode(node_i, nresult)
3157 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3158 self._VerifyNodeNetwork(node_i, nresult)
3159 self._VerifyNodeUserScripts(node_i, nresult)
3160 self._VerifyOob(node_i, nresult)
3163 self._VerifyNodeLVM(node_i, nresult, vg_name)
3164 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3167 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3168 self._UpdateNodeInstances(node_i, nresult, nimg)
3169 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3170 self._UpdateNodeOS(node_i, nresult, nimg)
3172 if not nimg.os_fail:
3173 if refos_img is None:
3175 self._VerifyNodeOS(node_i, nimg, refos_img)
3176 self._VerifyNodeBridges(node_i, nresult, bridges)
3178 # Check whether all running instancies are primary for the node. (This
3179 # can no longer be done from _VerifyInstance below, since some of the
3180 # wrong instances could be from other node groups.)
3181 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3183 for inst in non_primary_inst:
3184 # FIXME: investigate best way to handle offline insts
3185 if inst.admin_state == constants.ADMINST_OFFLINE:
3187 feedback_fn("* Skipping offline instance %s" % inst.name)
3190 test = inst in self.all_inst_info
3191 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3192 "instance should not run on node %s", node_i.name)
3193 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3194 "node is running unknown instance %s", inst)
3196 for node, result in extra_lv_nvinfo.items():
3197 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3198 node_image[node], vg_name)
3200 feedback_fn("* Verifying instance status")
3201 for instance in self.my_inst_names:
3203 feedback_fn("* Verifying instance %s" % instance)
3204 inst_config = self.my_inst_info[instance]
3205 self._VerifyInstance(instance, inst_config, node_image,
3207 inst_nodes_offline = []
3209 pnode = inst_config.primary_node
3210 pnode_img = node_image[pnode]
3211 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3212 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3213 " primary node failed", instance)
3215 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3217 constants.CV_EINSTANCEBADNODE, instance,
3218 "instance is marked as running and lives on offline node %s",
3219 inst_config.primary_node)
3221 # If the instance is non-redundant we cannot survive losing its primary
3222 # node, so we are not N+1 compliant. On the other hand we have no disk
3223 # templates with more than one secondary so that situation is not well
3225 # FIXME: does not support file-backed instances
3226 if not inst_config.secondary_nodes:
3227 i_non_redundant.append(instance)
3229 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3230 constants.CV_EINSTANCELAYOUT,
3231 instance, "instance has multiple secondary nodes: %s",
3232 utils.CommaJoin(inst_config.secondary_nodes),
3233 code=self.ETYPE_WARNING)
3235 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3236 pnode = inst_config.primary_node
3237 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3238 instance_groups = {}
3240 for node in instance_nodes:
3241 instance_groups.setdefault(self.all_node_info[node].group,
3245 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3246 # Sort so that we always list the primary node first.
3247 for group, nodes in sorted(instance_groups.items(),
3248 key=lambda (_, nodes): pnode in nodes,
3251 self._ErrorIf(len(instance_groups) > 1,
3252 constants.CV_EINSTANCESPLITGROUPS,
3253 instance, "instance has primary and secondary nodes in"
3254 " different groups: %s", utils.CommaJoin(pretty_list),
3255 code=self.ETYPE_WARNING)
3257 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3258 i_non_a_balanced.append(instance)
3260 for snode in inst_config.secondary_nodes:
3261 s_img = node_image[snode]
3262 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3263 snode, "instance %s, connection to secondary node failed",
3267 inst_nodes_offline.append(snode)
3269 # warn that the instance lives on offline nodes
3270 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3271 "instance has offline secondary node(s) %s",
3272 utils.CommaJoin(inst_nodes_offline))
3273 # ... or ghost/non-vm_capable nodes
3274 for node in inst_config.all_nodes:
3275 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3276 instance, "instance lives on ghost node %s", node)
3277 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3278 instance, "instance lives on non-vm_capable node %s", node)
3280 feedback_fn("* Verifying orphan volumes")
3281 reserved = utils.FieldSet(*cluster.reserved_lvs)
3283 # We will get spurious "unknown volume" warnings if any node of this group
3284 # is secondary for an instance whose primary is in another group. To avoid
3285 # them, we find these instances and add their volumes to node_vol_should.
3286 for inst in self.all_inst_info.values():
3287 for secondary in inst.secondary_nodes:
3288 if (secondary in self.my_node_info
3289 and inst.name not in self.my_inst_info):
3290 inst.MapLVsByNode(node_vol_should)
3293 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3295 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3296 feedback_fn("* Verifying N+1 Memory redundancy")
3297 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3299 feedback_fn("* Other Notes")
3301 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3302 % len(i_non_redundant))
3304 if i_non_a_balanced:
3305 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3306 % len(i_non_a_balanced))
3309 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3312 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3315 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3319 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3320 """Analyze the post-hooks' result
3322 This method analyses the hook result, handles it, and sends some
3323 nicely-formatted feedback back to the user.
3325 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3326 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3327 @param hooks_results: the results of the multi-node hooks rpc call
3328 @param feedback_fn: function used send feedback back to the caller
3329 @param lu_result: previous Exec result
3330 @return: the new Exec result, based on the previous result
3334 # We only really run POST phase hooks, only for non-empty groups,
3335 # and are only interested in their results
3336 if not self.my_node_names:
3339 elif phase == constants.HOOKS_PHASE_POST:
3340 # Used to change hooks' output to proper indentation
3341 feedback_fn("* Hooks Results")
3342 assert hooks_results, "invalid result from hooks"
3344 for node_name in hooks_results:
3345 res = hooks_results[node_name]
3347 test = msg and not res.offline
3348 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3349 "Communication failure in hooks execution: %s", msg)
3350 if res.offline or msg:
3351 # No need to investigate payload if node is offline or gave
3354 for script, hkr, output in res.payload:
3355 test = hkr == constants.HKR_FAIL
3356 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3357 "Script %s failed, output:", script)
3359 output = self._HOOKS_INDENT_RE.sub(" ", output)
3360 feedback_fn("%s" % output)
3366 class LUClusterVerifyDisks(NoHooksLU):
3367 """Verifies the cluster disks status.
3372 def ExpandNames(self):
3373 self.share_locks = _ShareAll()
3374 self.needed_locks = {
3375 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3378 def Exec(self, feedback_fn):
3379 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3381 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3382 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3383 for group in group_names])
3386 class LUGroupVerifyDisks(NoHooksLU):
3387 """Verifies the status of all disks in a node group.
3392 def ExpandNames(self):
3393 # Raises errors.OpPrereqError on its own if group can't be found
3394 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3396 self.share_locks = _ShareAll()
3397 self.needed_locks = {
3398 locking.LEVEL_INSTANCE: [],
3399 locking.LEVEL_NODEGROUP: [],
3400 locking.LEVEL_NODE: [],
3403 def DeclareLocks(self, level):
3404 if level == locking.LEVEL_INSTANCE:
3405 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3407 # Lock instances optimistically, needs verification once node and group
3408 # locks have been acquired
3409 self.needed_locks[locking.LEVEL_INSTANCE] = \
3410 self.cfg.GetNodeGroupInstances(self.group_uuid)
3412 elif level == locking.LEVEL_NODEGROUP:
3413 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3415 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3416 set([self.group_uuid] +
3417 # Lock all groups used by instances optimistically; this requires
3418 # going via the node before it's locked, requiring verification
3421 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3422 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3424 elif level == locking.LEVEL_NODE:
3425 # This will only lock the nodes in the group to be verified which contain
3427 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3428 self._LockInstancesNodes()
3430 # Lock all nodes in group to be verified
3431 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3432 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3433 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3435 def CheckPrereq(self):
3436 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3437 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3438 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3440 assert self.group_uuid in owned_groups
3442 # Check if locked instances are still correct
3443 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3445 # Get instance information
3446 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3448 # Check if node groups for locked instances are still correct
3449 for (instance_name, inst) in self.instances.items():
3450 assert owned_nodes.issuperset(inst.all_nodes), \
3451 "Instance %s's nodes changed while we kept the lock" % instance_name
3453 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3456 assert self.group_uuid in inst_groups, \
3457 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3459 def Exec(self, feedback_fn):
3460 """Verify integrity of cluster disks.
3462 @rtype: tuple of three items
3463 @return: a tuple of (dict of node-to-node_error, list of instances
3464 which need activate-disks, dict of instance: (node, volume) for
3469 res_instances = set()
3472 nv_dict = _MapInstanceDisksToNodes([inst
3473 for inst in self.instances.values()
3474 if inst.admin_state == constants.ADMINST_UP])
3477 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3478 set(self.cfg.GetVmCapableNodeList()))
3480 node_lvs = self.rpc.call_lv_list(nodes, [])
3482 for (node, node_res) in node_lvs.items():
3483 if node_res.offline:
3486 msg = node_res.fail_msg
3488 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3489 res_nodes[node] = msg
3492 for lv_name, (_, _, lv_online) in node_res.payload.items():
3493 inst = nv_dict.pop((node, lv_name), None)
3494 if not (lv_online or inst is None):
3495 res_instances.add(inst)
3497 # any leftover items in nv_dict are missing LVs, let's arrange the data
3499 for key, inst in nv_dict.iteritems():
3500 res_missing.setdefault(inst, []).append(list(key))
3502 return (res_nodes, list(res_instances), res_missing)
3505 class LUClusterRepairDiskSizes(NoHooksLU):
3506 """Verifies the cluster disks sizes.
3511 def ExpandNames(self):
3512 if self.op.instances:
3513 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3514 self.needed_locks = {
3515 locking.LEVEL_NODE_RES: [],
3516 locking.LEVEL_INSTANCE: self.wanted_names,
3518 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3520 self.wanted_names = None
3521 self.needed_locks = {
3522 locking.LEVEL_NODE_RES: locking.ALL_SET,
3523 locking.LEVEL_INSTANCE: locking.ALL_SET,
3525 self.share_locks = {
3526 locking.LEVEL_NODE_RES: 1,
3527 locking.LEVEL_INSTANCE: 0,
3530 def DeclareLocks(self, level):
3531 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3532 self._LockInstancesNodes(primary_only=True, level=level)
3534 def CheckPrereq(self):
3535 """Check prerequisites.
3537 This only checks the optional instance list against the existing names.
3540 if self.wanted_names is None:
3541 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3543 self.wanted_instances = \
3544 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3546 def _EnsureChildSizes(self, disk):
3547 """Ensure children of the disk have the needed disk size.
3549 This is valid mainly for DRBD8 and fixes an issue where the
3550 children have smaller disk size.
3552 @param disk: an L{ganeti.objects.Disk} object
3555 if disk.dev_type == constants.LD_DRBD8:
3556 assert disk.children, "Empty children for DRBD8?"
3557 fchild = disk.children[0]
3558 mismatch = fchild.size < disk.size
3560 self.LogInfo("Child disk has size %d, parent %d, fixing",
3561 fchild.size, disk.size)
3562 fchild.size = disk.size
3564 # and we recurse on this child only, not on the metadev
3565 return self._EnsureChildSizes(fchild) or mismatch
3569 def Exec(self, feedback_fn):
3570 """Verify the size of cluster disks.
3573 # TODO: check child disks too
3574 # TODO: check differences in size between primary/secondary nodes
3576 for instance in self.wanted_instances:
3577 pnode = instance.primary_node
3578 if pnode not in per_node_disks:
3579 per_node_disks[pnode] = []
3580 for idx, disk in enumerate(instance.disks):
3581 per_node_disks[pnode].append((instance, idx, disk))
3583 assert not (frozenset(per_node_disks.keys()) -
3584 self.owned_locks(locking.LEVEL_NODE_RES)), \
3585 "Not owning correct locks"
3586 assert not self.owned_locks(locking.LEVEL_NODE)
3589 for node, dskl in per_node_disks.items():
3590 newl = [v[2].Copy() for v in dskl]
3592 self.cfg.SetDiskID(dsk, node)
3593 result = self.rpc.call_blockdev_getsize(node, newl)
3595 self.LogWarning("Failure in blockdev_getsize call to node"
3596 " %s, ignoring", node)
3598 if len(result.payload) != len(dskl):
3599 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3600 " result.payload=%s", node, len(dskl), result.payload)
3601 self.LogWarning("Invalid result from node %s, ignoring node results",
3604 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3606 self.LogWarning("Disk %d of instance %s did not return size"
3607 " information, ignoring", idx, instance.name)
3609 if not isinstance(size, (int, long)):
3610 self.LogWarning("Disk %d of instance %s did not return valid"
3611 " size information, ignoring", idx, instance.name)
3614 if size != disk.size:
3615 self.LogInfo("Disk %d of instance %s has mismatched size,"
3616 " correcting: recorded %d, actual %d", idx,
3617 instance.name, disk.size, size)
3619 self.cfg.Update(instance, feedback_fn)
3620 changed.append((instance.name, idx, size))
3621 if self._EnsureChildSizes(disk):
3622 self.cfg.Update(instance, feedback_fn)
3623 changed.append((instance.name, idx, disk.size))
3627 class LUClusterRename(LogicalUnit):
3628 """Rename the cluster.
3631 HPATH = "cluster-rename"
3632 HTYPE = constants.HTYPE_CLUSTER
3634 def BuildHooksEnv(self):
3639 "OP_TARGET": self.cfg.GetClusterName(),
3640 "NEW_NAME": self.op.name,
3643 def BuildHooksNodes(self):
3644 """Build hooks nodes.
3647 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3649 def CheckPrereq(self):
3650 """Verify that the passed name is a valid one.
3653 hostname = netutils.GetHostname(name=self.op.name,
3654 family=self.cfg.GetPrimaryIPFamily())
3656 new_name = hostname.name
3657 self.ip = new_ip = hostname.ip
3658 old_name = self.cfg.GetClusterName()
3659 old_ip = self.cfg.GetMasterIP()
3660 if new_name == old_name and new_ip == old_ip:
3661 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3662 " cluster has changed",
3664 if new_ip != old_ip:
3665 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3666 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3667 " reachable on the network" %
3668 new_ip, errors.ECODE_NOTUNIQUE)
3670 self.op.name = new_name
3672 def Exec(self, feedback_fn):
3673 """Rename the cluster.
3676 clustername = self.op.name
3679 # shutdown the master IP
3680 master_params = self.cfg.GetMasterNetworkParameters()
3681 ems = self.cfg.GetUseExternalMipScript()
3682 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3684 result.Raise("Could not disable the master role")
3687 cluster = self.cfg.GetClusterInfo()
3688 cluster.cluster_name = clustername
3689 cluster.master_ip = new_ip
3690 self.cfg.Update(cluster, feedback_fn)
3692 # update the known hosts file
3693 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3694 node_list = self.cfg.GetOnlineNodeList()
3696 node_list.remove(master_params.name)
3699 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3701 master_params.ip = new_ip
3702 result = self.rpc.call_node_activate_master_ip(master_params.name,
3704 msg = result.fail_msg
3706 self.LogWarning("Could not re-enable the master role on"
3707 " the master, please restart manually: %s", msg)
3712 def _ValidateNetmask(cfg, netmask):
3713 """Checks if a netmask is valid.
3715 @type cfg: L{config.ConfigWriter}
3716 @param cfg: The cluster configuration
3718 @param netmask: the netmask to be verified
3719 @raise errors.OpPrereqError: if the validation fails
3722 ip_family = cfg.GetPrimaryIPFamily()
3724 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3725 except errors.ProgrammerError:
3726 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3728 if not ipcls.ValidateNetmask(netmask):
3729 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3733 class LUClusterSetParams(LogicalUnit):
3734 """Change the parameters of the cluster.
3737 HPATH = "cluster-modify"
3738 HTYPE = constants.HTYPE_CLUSTER
3741 def CheckArguments(self):
3745 if self.op.uid_pool:
3746 uidpool.CheckUidPool(self.op.uid_pool)
3748 if self.op.add_uids:
3749 uidpool.CheckUidPool(self.op.add_uids)
3751 if self.op.remove_uids:
3752 uidpool.CheckUidPool(self.op.remove_uids)
3754 if self.op.master_netmask is not None:
3755 _ValidateNetmask(self.cfg, self.op.master_netmask)
3757 if self.op.diskparams:
3758 for dt_params in self.op.diskparams.values():
3759 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3761 def ExpandNames(self):
3762 # FIXME: in the future maybe other cluster params won't require checking on
3763 # all nodes to be modified.
3764 self.needed_locks = {
3765 locking.LEVEL_NODE: locking.ALL_SET,
3767 self.share_locks[locking.LEVEL_NODE] = 1
3769 def BuildHooksEnv(self):
3774 "OP_TARGET": self.cfg.GetClusterName(),
3775 "NEW_VG_NAME": self.op.vg_name,
3778 def BuildHooksNodes(self):
3779 """Build hooks nodes.
3782 mn = self.cfg.GetMasterNode()
3785 def CheckPrereq(self):
3786 """Check prerequisites.
3788 This checks whether the given params don't conflict and
3789 if the given volume group is valid.
3792 if self.op.vg_name is not None and not self.op.vg_name:
3793 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3794 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3795 " instances exist", errors.ECODE_INVAL)
3797 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3798 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3799 raise errors.OpPrereqError("Cannot disable drbd helper while"
3800 " drbd-based instances exist",
3803 node_list = self.owned_locks(locking.LEVEL_NODE)
3805 # if vg_name not None, checks given volume group on all nodes
3807 vglist = self.rpc.call_vg_list(node_list)
3808 for node in node_list:
3809 msg = vglist[node].fail_msg
3811 # ignoring down node
3812 self.LogWarning("Error while gathering data on node %s"
3813 " (ignoring node): %s", node, msg)
3815 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3817 constants.MIN_VG_SIZE)
3819 raise errors.OpPrereqError("Error on node '%s': %s" %
3820 (node, vgstatus), errors.ECODE_ENVIRON)
3822 if self.op.drbd_helper:
3823 # checks given drbd helper on all nodes
3824 helpers = self.rpc.call_drbd_helper(node_list)
3825 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3827 self.LogInfo("Not checking drbd helper on offline node %s", node)
3829 msg = helpers[node].fail_msg
3831 raise errors.OpPrereqError("Error checking drbd helper on node"
3832 " '%s': %s" % (node, msg),
3833 errors.ECODE_ENVIRON)
3834 node_helper = helpers[node].payload
3835 if node_helper != self.op.drbd_helper:
3836 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3837 (node, node_helper), errors.ECODE_ENVIRON)
3839 self.cluster = cluster = self.cfg.GetClusterInfo()
3840 # validate params changes
3841 if self.op.beparams:
3842 objects.UpgradeBeParams(self.op.beparams)
3843 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3844 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3846 if self.op.ndparams:
3847 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3848 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3850 # TODO: we need a more general way to handle resetting
3851 # cluster-level parameters to default values
3852 if self.new_ndparams["oob_program"] == "":
3853 self.new_ndparams["oob_program"] = \
3854 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3856 if self.op.hv_state:
3857 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3858 self.cluster.hv_state_static)
3859 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3860 for hv, values in new_hv_state.items())
3862 if self.op.disk_state:
3863 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3864 self.cluster.disk_state_static)
3865 self.new_disk_state = \
3866 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3867 for name, values in svalues.items()))
3868 for storage, svalues in new_disk_state.items())
3871 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3874 if self.op.nicparams:
3875 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3876 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3877 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3880 # check all instances for consistency
3881 for instance in self.cfg.GetAllInstancesInfo().values():
3882 for nic_idx, nic in enumerate(instance.nics):
3883 params_copy = copy.deepcopy(nic.nicparams)
3884 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3886 # check parameter syntax
3888 objects.NIC.CheckParameterSyntax(params_filled)
3889 except errors.ConfigurationError, err:
3890 nic_errors.append("Instance %s, nic/%d: %s" %
3891 (instance.name, nic_idx, err))
3893 # if we're moving instances to routed, check that they have an ip
3894 target_mode = params_filled[constants.NIC_MODE]
3895 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3896 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3897 " address" % (instance.name, nic_idx))
3899 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3900 "\n".join(nic_errors))
3902 # hypervisor list/parameters
3903 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3904 if self.op.hvparams:
3905 for hv_name, hv_dict in self.op.hvparams.items():
3906 if hv_name not in self.new_hvparams:
3907 self.new_hvparams[hv_name] = hv_dict
3909 self.new_hvparams[hv_name].update(hv_dict)
3911 # disk template parameters
3912 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3913 if self.op.diskparams:
3914 for dt_name, dt_params in self.op.diskparams.items():
3915 if dt_name not in self.op.diskparams:
3916 self.new_diskparams[dt_name] = dt_params
3918 self.new_diskparams[dt_name].update(dt_params)
3920 # os hypervisor parameters
3921 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3923 for os_name, hvs in self.op.os_hvp.items():
3924 if os_name not in self.new_os_hvp:
3925 self.new_os_hvp[os_name] = hvs
3927 for hv_name, hv_dict in hvs.items():
3928 if hv_name not in self.new_os_hvp[os_name]:
3929 self.new_os_hvp[os_name][hv_name] = hv_dict
3931 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3934 self.new_osp = objects.FillDict(cluster.osparams, {})
3935 if self.op.osparams:
3936 for os_name, osp in self.op.osparams.items():
3937 if os_name not in self.new_osp:
3938 self.new_osp[os_name] = {}
3940 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3943 if not self.new_osp[os_name]:
3944 # we removed all parameters
3945 del self.new_osp[os_name]
3947 # check the parameter validity (remote check)
3948 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3949 os_name, self.new_osp[os_name])
3951 # changes to the hypervisor list
3952 if self.op.enabled_hypervisors is not None:
3953 self.hv_list = self.op.enabled_hypervisors
3954 for hv in self.hv_list:
3955 # if the hypervisor doesn't already exist in the cluster
3956 # hvparams, we initialize it to empty, and then (in both
3957 # cases) we make sure to fill the defaults, as we might not
3958 # have a complete defaults list if the hypervisor wasn't
3960 if hv not in new_hvp:
3962 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3963 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3965 self.hv_list = cluster.enabled_hypervisors
3967 if self.op.hvparams or self.op.enabled_hypervisors is not None:
3968 # either the enabled list has changed, or the parameters have, validate
3969 for hv_name, hv_params in self.new_hvparams.items():
3970 if ((self.op.hvparams and hv_name in self.op.hvparams) or
3971 (self.op.enabled_hypervisors and
3972 hv_name in self.op.enabled_hypervisors)):
3973 # either this is a new hypervisor, or its parameters have changed
3974 hv_class = hypervisor.GetHypervisor(hv_name)
3975 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3976 hv_class.CheckParameterSyntax(hv_params)
3977 _CheckHVParams(self, node_list, hv_name, hv_params)
3980 # no need to check any newly-enabled hypervisors, since the
3981 # defaults have already been checked in the above code-block
3982 for os_name, os_hvp in self.new_os_hvp.items():
3983 for hv_name, hv_params in os_hvp.items():
3984 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3985 # we need to fill in the new os_hvp on top of the actual hv_p
3986 cluster_defaults = self.new_hvparams.get(hv_name, {})
3987 new_osp = objects.FillDict(cluster_defaults, hv_params)
3988 hv_class = hypervisor.GetHypervisor(hv_name)
3989 hv_class.CheckParameterSyntax(new_osp)
3990 _CheckHVParams(self, node_list, hv_name, new_osp)
3992 if self.op.default_iallocator:
3993 alloc_script = utils.FindFile(self.op.default_iallocator,
3994 constants.IALLOCATOR_SEARCH_PATH,
3996 if alloc_script is None:
3997 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3998 " specified" % self.op.default_iallocator,
4001 def Exec(self, feedback_fn):
4002 """Change the parameters of the cluster.
4005 if self.op.vg_name is not None:
4006 new_volume = self.op.vg_name
4009 if new_volume != self.cfg.GetVGName():
4010 self.cfg.SetVGName(new_volume)
4012 feedback_fn("Cluster LVM configuration already in desired"
4013 " state, not changing")
4014 if self.op.drbd_helper is not None:
4015 new_helper = self.op.drbd_helper
4018 if new_helper != self.cfg.GetDRBDHelper():
4019 self.cfg.SetDRBDHelper(new_helper)
4021 feedback_fn("Cluster DRBD helper already in desired state,"
4023 if self.op.hvparams:
4024 self.cluster.hvparams = self.new_hvparams
4026 self.cluster.os_hvp = self.new_os_hvp
4027 if self.op.enabled_hypervisors is not None:
4028 self.cluster.hvparams = self.new_hvparams
4029 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4030 if self.op.beparams:
4031 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4032 if self.op.nicparams:
4033 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4035 self.cluster.ipolicy = self.new_ipolicy
4036 if self.op.osparams:
4037 self.cluster.osparams = self.new_osp
4038 if self.op.ndparams:
4039 self.cluster.ndparams = self.new_ndparams
4040 if self.op.diskparams:
4041 self.cluster.diskparams = self.new_diskparams
4042 if self.op.hv_state:
4043 self.cluster.hv_state_static = self.new_hv_state
4044 if self.op.disk_state:
4045 self.cluster.disk_state_static = self.new_disk_state
4047 if self.op.candidate_pool_size is not None:
4048 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4049 # we need to update the pool size here, otherwise the save will fail
4050 _AdjustCandidatePool(self, [])
4052 if self.op.maintain_node_health is not None:
4053 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4054 feedback_fn("Note: CONFD was disabled at build time, node health"
4055 " maintenance is not useful (still enabling it)")
4056 self.cluster.maintain_node_health = self.op.maintain_node_health
4058 if self.op.prealloc_wipe_disks is not None:
4059 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4061 if self.op.add_uids is not None:
4062 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4064 if self.op.remove_uids is not None:
4065 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4067 if self.op.uid_pool is not None:
4068 self.cluster.uid_pool = self.op.uid_pool
4070 if self.op.default_iallocator is not None:
4071 self.cluster.default_iallocator = self.op.default_iallocator
4073 if self.op.reserved_lvs is not None:
4074 self.cluster.reserved_lvs = self.op.reserved_lvs
4076 if self.op.use_external_mip_script is not None:
4077 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4079 def helper_os(aname, mods, desc):
4081 lst = getattr(self.cluster, aname)
4082 for key, val in mods:
4083 if key == constants.DDM_ADD:
4085 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4088 elif key == constants.DDM_REMOVE:
4092 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4094 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4096 if self.op.hidden_os:
4097 helper_os("hidden_os", self.op.hidden_os, "hidden")
4099 if self.op.blacklisted_os:
4100 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4102 if self.op.master_netdev:
4103 master_params = self.cfg.GetMasterNetworkParameters()
4104 ems = self.cfg.GetUseExternalMipScript()
4105 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4106 self.cluster.master_netdev)
4107 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4109 result.Raise("Could not disable the master ip")
4110 feedback_fn("Changing master_netdev from %s to %s" %
4111 (master_params.netdev, self.op.master_netdev))
4112 self.cluster.master_netdev = self.op.master_netdev
4114 if self.op.master_netmask:
4115 master_params = self.cfg.GetMasterNetworkParameters()
4116 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4117 result = self.rpc.call_node_change_master_netmask(master_params.name,
4118 master_params.netmask,
4119 self.op.master_netmask,
4121 master_params.netdev)
4123 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4126 self.cluster.master_netmask = self.op.master_netmask
4128 self.cfg.Update(self.cluster, feedback_fn)
4130 if self.op.master_netdev:
4131 master_params = self.cfg.GetMasterNetworkParameters()
4132 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4133 self.op.master_netdev)
4134 ems = self.cfg.GetUseExternalMipScript()
4135 result = self.rpc.call_node_activate_master_ip(master_params.name,
4138 self.LogWarning("Could not re-enable the master ip on"
4139 " the master, please restart manually: %s",
4143 def _UploadHelper(lu, nodes, fname):
4144 """Helper for uploading a file and showing warnings.
4147 if os.path.exists(fname):
4148 result = lu.rpc.call_upload_file(nodes, fname)
4149 for to_node, to_result in result.items():
4150 msg = to_result.fail_msg
4152 msg = ("Copy of file %s to node %s failed: %s" %
4153 (fname, to_node, msg))
4154 lu.proc.LogWarning(msg)
4157 def _ComputeAncillaryFiles(cluster, redist):
4158 """Compute files external to Ganeti which need to be consistent.
4160 @type redist: boolean
4161 @param redist: Whether to include files which need to be redistributed
4164 # Compute files for all nodes
4166 constants.SSH_KNOWN_HOSTS_FILE,
4167 constants.CONFD_HMAC_KEY,
4168 constants.CLUSTER_DOMAIN_SECRET_FILE,
4169 constants.SPICE_CERT_FILE,
4170 constants.SPICE_CACERT_FILE,
4171 constants.RAPI_USERS_FILE,
4175 files_all.update(constants.ALL_CERT_FILES)
4176 files_all.update(ssconf.SimpleStore().GetFileList())
4178 # we need to ship at least the RAPI certificate
4179 files_all.add(constants.RAPI_CERT_FILE)
4181 if cluster.modify_etc_hosts:
4182 files_all.add(constants.ETC_HOSTS)
4184 # Files which are optional, these must:
4185 # - be present in one other category as well
4186 # - either exist or not exist on all nodes of that category (mc, vm all)
4188 constants.RAPI_USERS_FILE,
4191 # Files which should only be on master candidates
4195 files_mc.add(constants.CLUSTER_CONF_FILE)
4197 # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4199 files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4201 # Files which should only be on VM-capable nodes
4202 files_vm = set(filename
4203 for hv_name in cluster.enabled_hypervisors
4204 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4206 files_opt |= set(filename
4207 for hv_name in cluster.enabled_hypervisors
4208 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4210 # Filenames in each category must be unique
4211 all_files_set = files_all | files_mc | files_vm
4212 assert (len(all_files_set) ==
4213 sum(map(len, [files_all, files_mc, files_vm]))), \
4214 "Found file listed in more than one file list"
4216 # Optional files must be present in one other category
4217 assert all_files_set.issuperset(files_opt), \
4218 "Optional file not in a different required list"
4220 return (files_all, files_opt, files_mc, files_vm)
4223 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4224 """Distribute additional files which are part of the cluster configuration.
4226 ConfigWriter takes care of distributing the config and ssconf files, but
4227 there are more files which should be distributed to all nodes. This function
4228 makes sure those are copied.
4230 @param lu: calling logical unit
4231 @param additional_nodes: list of nodes not in the config to distribute to
4232 @type additional_vm: boolean
4233 @param additional_vm: whether the additional nodes are vm-capable or not
4236 # Gather target nodes
4237 cluster = lu.cfg.GetClusterInfo()
4238 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4240 online_nodes = lu.cfg.GetOnlineNodeList()
4241 vm_nodes = lu.cfg.GetVmCapableNodeList()
4243 if additional_nodes is not None:
4244 online_nodes.extend(additional_nodes)
4246 vm_nodes.extend(additional_nodes)
4248 # Never distribute to master node
4249 for nodelist in [online_nodes, vm_nodes]:
4250 if master_info.name in nodelist:
4251 nodelist.remove(master_info.name)
4254 (files_all, _, files_mc, files_vm) = \
4255 _ComputeAncillaryFiles(cluster, True)
4257 # Never re-distribute configuration file from here
4258 assert not (constants.CLUSTER_CONF_FILE in files_all or
4259 constants.CLUSTER_CONF_FILE in files_vm)
4260 assert not files_mc, "Master candidates not handled in this function"
4263 (online_nodes, files_all),
4264 (vm_nodes, files_vm),
4268 for (node_list, files) in filemap:
4270 _UploadHelper(lu, node_list, fname)
4273 class LUClusterRedistConf(NoHooksLU):
4274 """Force the redistribution of cluster configuration.
4276 This is a very simple LU.
4281 def ExpandNames(self):
4282 self.needed_locks = {
4283 locking.LEVEL_NODE: locking.ALL_SET,
4285 self.share_locks[locking.LEVEL_NODE] = 1
4287 def Exec(self, feedback_fn):
4288 """Redistribute the configuration.
4291 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4292 _RedistributeAncillaryFiles(self)
4295 class LUClusterActivateMasterIp(NoHooksLU):
4296 """Activate the master IP on the master node.
4299 def Exec(self, feedback_fn):
4300 """Activate the master IP.
4303 master_params = self.cfg.GetMasterNetworkParameters()
4304 ems = self.cfg.GetUseExternalMipScript()
4305 result = self.rpc.call_node_activate_master_ip(master_params.name,
4307 result.Raise("Could not activate the master IP")
4310 class LUClusterDeactivateMasterIp(NoHooksLU):
4311 """Deactivate the master IP on the master node.
4314 def Exec(self, feedback_fn):
4315 """Deactivate the master IP.
4318 master_params = self.cfg.GetMasterNetworkParameters()
4319 ems = self.cfg.GetUseExternalMipScript()
4320 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4322 result.Raise("Could not deactivate the master IP")
4325 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4326 """Sleep and poll for an instance's disk to sync.
4329 if not instance.disks or disks is not None and not disks:
4332 disks = _ExpandCheckDisks(instance, disks)
4335 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4337 node = instance.primary_node
4340 lu.cfg.SetDiskID(dev, node)
4342 # TODO: Convert to utils.Retry
4345 degr_retries = 10 # in seconds, as we sleep 1 second each time
4349 cumul_degraded = False
4350 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4351 msg = rstats.fail_msg
4353 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4356 raise errors.RemoteError("Can't contact node %s for mirror data,"
4357 " aborting." % node)
4360 rstats = rstats.payload
4362 for i, mstat in enumerate(rstats):
4364 lu.LogWarning("Can't compute data for node %s/%s",
4365 node, disks[i].iv_name)
4368 cumul_degraded = (cumul_degraded or
4369 (mstat.is_degraded and mstat.sync_percent is None))
4370 if mstat.sync_percent is not None:
4372 if mstat.estimated_time is not None:
4373 rem_time = ("%s remaining (estimated)" %
4374 utils.FormatSeconds(mstat.estimated_time))
4375 max_time = mstat.estimated_time
4377 rem_time = "no time estimate"
4378 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4379 (disks[i].iv_name, mstat.sync_percent, rem_time))
4381 # if we're done but degraded, let's do a few small retries, to
4382 # make sure we see a stable and not transient situation; therefore
4383 # we force restart of the loop
4384 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4385 logging.info("Degraded disks found, %d retries left", degr_retries)
4393 time.sleep(min(60, max_time))
4396 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4397 return not cumul_degraded
4400 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4401 """Check that mirrors are not degraded.
4403 The ldisk parameter, if True, will change the test from the
4404 is_degraded attribute (which represents overall non-ok status for
4405 the device(s)) to the ldisk (representing the local storage status).
4408 lu.cfg.SetDiskID(dev, node)
4412 if on_primary or dev.AssembleOnSecondary():
4413 rstats = lu.rpc.call_blockdev_find(node, dev)
4414 msg = rstats.fail_msg
4416 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4418 elif not rstats.payload:
4419 lu.LogWarning("Can't find disk on node %s", node)
4423 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4425 result = result and not rstats.payload.is_degraded
4428 for child in dev.children:
4429 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4434 class LUOobCommand(NoHooksLU):
4435 """Logical unit for OOB handling.
4439 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4441 def ExpandNames(self):
4442 """Gather locks we need.
4445 if self.op.node_names:
4446 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4447 lock_names = self.op.node_names
4449 lock_names = locking.ALL_SET
4451 self.needed_locks = {
4452 locking.LEVEL_NODE: lock_names,
4455 def CheckPrereq(self):
4456 """Check prerequisites.
4459 - the node exists in the configuration
4462 Any errors are signaled by raising errors.OpPrereqError.
4466 self.master_node = self.cfg.GetMasterNode()
4468 assert self.op.power_delay >= 0.0
4470 if self.op.node_names:
4471 if (self.op.command in self._SKIP_MASTER and
4472 self.master_node in self.op.node_names):
4473 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4474 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4476 if master_oob_handler:
4477 additional_text = ("run '%s %s %s' if you want to operate on the"
4478 " master regardless") % (master_oob_handler,
4482 additional_text = "it does not support out-of-band operations"
4484 raise errors.OpPrereqError(("Operating on the master node %s is not"
4485 " allowed for %s; %s") %
4486 (self.master_node, self.op.command,
4487 additional_text), errors.ECODE_INVAL)
4489 self.op.node_names = self.cfg.GetNodeList()
4490 if self.op.command in self._SKIP_MASTER:
4491 self.op.node_names.remove(self.master_node)
4493 if self.op.command in self._SKIP_MASTER:
4494 assert self.master_node not in self.op.node_names
4496 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4498 raise errors.OpPrereqError("Node %s not found" % node_name,
4501 self.nodes.append(node)
4503 if (not self.op.ignore_status and
4504 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4505 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4506 " not marked offline") % node_name,
4509 def Exec(self, feedback_fn):
4510 """Execute OOB and return result if we expect any.
4513 master_node = self.master_node
4516 for idx, node in enumerate(utils.NiceSort(self.nodes,
4517 key=lambda node: node.name)):
4518 node_entry = [(constants.RS_NORMAL, node.name)]
4519 ret.append(node_entry)
4521 oob_program = _SupportsOob(self.cfg, node)
4524 node_entry.append((constants.RS_UNAVAIL, None))
4527 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4528 self.op.command, oob_program, node.name)
4529 result = self.rpc.call_run_oob(master_node, oob_program,
4530 self.op.command, node.name,
4534 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4535 node.name, result.fail_msg)
4536 node_entry.append((constants.RS_NODATA, None))
4539 self._CheckPayload(result)
4540 except errors.OpExecError, err:
4541 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4543 node_entry.append((constants.RS_NODATA, None))
4545 if self.op.command == constants.OOB_HEALTH:
4546 # For health we should log important events
4547 for item, status in result.payload:
4548 if status in [constants.OOB_STATUS_WARNING,
4549 constants.OOB_STATUS_CRITICAL]:
4550 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4551 item, node.name, status)
4553 if self.op.command == constants.OOB_POWER_ON:
4555 elif self.op.command == constants.OOB_POWER_OFF:
4556 node.powered = False
4557 elif self.op.command == constants.OOB_POWER_STATUS:
4558 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4559 if powered != node.powered:
4560 logging.warning(("Recorded power state (%s) of node '%s' does not"
4561 " match actual power state (%s)"), node.powered,
4564 # For configuration changing commands we should update the node
4565 if self.op.command in (constants.OOB_POWER_ON,
4566 constants.OOB_POWER_OFF):
4567 self.cfg.Update(node, feedback_fn)
4569 node_entry.append((constants.RS_NORMAL, result.payload))
4571 if (self.op.command == constants.OOB_POWER_ON and
4572 idx < len(self.nodes) - 1):
4573 time.sleep(self.op.power_delay)
4577 def _CheckPayload(self, result):
4578 """Checks if the payload is valid.
4580 @param result: RPC result
4581 @raises errors.OpExecError: If payload is not valid
4585 if self.op.command == constants.OOB_HEALTH:
4586 if not isinstance(result.payload, list):
4587 errs.append("command 'health' is expected to return a list but got %s" %
4588 type(result.payload))
4590 for item, status in result.payload:
4591 if status not in constants.OOB_STATUSES:
4592 errs.append("health item '%s' has invalid status '%s'" %
4595 if self.op.command == constants.OOB_POWER_STATUS:
4596 if not isinstance(result.payload, dict):
4597 errs.append("power-status is expected to return a dict but got %s" %
4598 type(result.payload))
4600 if self.op.command in [
4601 constants.OOB_POWER_ON,
4602 constants.OOB_POWER_OFF,
4603 constants.OOB_POWER_CYCLE,
4605 if result.payload is not None:
4606 errs.append("%s is expected to not return payload but got '%s'" %
4607 (self.op.command, result.payload))
4610 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4611 utils.CommaJoin(errs))
4614 class _OsQuery(_QueryBase):
4615 FIELDS = query.OS_FIELDS
4617 def ExpandNames(self, lu):
4618 # Lock all nodes in shared mode
4619 # Temporary removal of locks, should be reverted later
4620 # TODO: reintroduce locks when they are lighter-weight
4621 lu.needed_locks = {}
4622 #self.share_locks[locking.LEVEL_NODE] = 1
4623 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4625 # The following variables interact with _QueryBase._GetNames
4627 self.wanted = self.names
4629 self.wanted = locking.ALL_SET
4631 self.do_locking = self.use_locking
4633 def DeclareLocks(self, lu, level):
4637 def _DiagnoseByOS(rlist):
4638 """Remaps a per-node return list into an a per-os per-node dictionary
4640 @param rlist: a map with node names as keys and OS objects as values
4643 @return: a dictionary with osnames as keys and as value another
4644 map, with nodes as keys and tuples of (path, status, diagnose,
4645 variants, parameters, api_versions) as values, eg::
4647 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4648 (/srv/..., False, "invalid api")],
4649 "node2": [(/srv/..., True, "", [], [])]}
4654 # we build here the list of nodes that didn't fail the RPC (at RPC
4655 # level), so that nodes with a non-responding node daemon don't
4656 # make all OSes invalid
4657 good_nodes = [node_name for node_name in rlist
4658 if not rlist[node_name].fail_msg]
4659 for node_name, nr in rlist.items():
4660 if nr.fail_msg or not nr.payload:
4662 for (name, path, status, diagnose, variants,
4663 params, api_versions) in nr.payload:
4664 if name not in all_os:
4665 # build a list of nodes for this os containing empty lists
4666 # for each node in node_list
4668 for nname in good_nodes:
4669 all_os[name][nname] = []
4670 # convert params from [name, help] to (name, help)
4671 params = [tuple(v) for v in params]
4672 all_os[name][node_name].append((path, status, diagnose,
4673 variants, params, api_versions))
4676 def _GetQueryData(self, lu):
4677 """Computes the list of nodes and their attributes.
4680 # Locking is not used
4681 assert not (compat.any(lu.glm.is_owned(level)
4682 for level in locking.LEVELS
4683 if level != locking.LEVEL_CLUSTER) or
4684 self.do_locking or self.use_locking)
4686 valid_nodes = [node.name
4687 for node in lu.cfg.GetAllNodesInfo().values()
4688 if not node.offline and node.vm_capable]
4689 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4690 cluster = lu.cfg.GetClusterInfo()
4694 for (os_name, os_data) in pol.items():
4695 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4696 hidden=(os_name in cluster.hidden_os),
4697 blacklisted=(os_name in cluster.blacklisted_os))
4701 api_versions = set()
4703 for idx, osl in enumerate(os_data.values()):
4704 info.valid = bool(info.valid and osl and osl[0][1])
4708 (node_variants, node_params, node_api) = osl[0][3:6]
4711 variants.update(node_variants)
4712 parameters.update(node_params)
4713 api_versions.update(node_api)
4715 # Filter out inconsistent values
4716 variants.intersection_update(node_variants)
4717 parameters.intersection_update(node_params)
4718 api_versions.intersection_update(node_api)
4720 info.variants = list(variants)
4721 info.parameters = list(parameters)
4722 info.api_versions = list(api_versions)
4724 data[os_name] = info
4726 # Prepare data in requested order
4727 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4731 class LUOsDiagnose(NoHooksLU):
4732 """Logical unit for OS diagnose/query.
4738 def _BuildFilter(fields, names):
4739 """Builds a filter for querying OSes.
4742 name_filter = qlang.MakeSimpleFilter("name", names)
4744 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4745 # respective field is not requested
4746 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4747 for fname in ["hidden", "blacklisted"]
4748 if fname not in fields]
4749 if "valid" not in fields:
4750 status_filter.append([qlang.OP_TRUE, "valid"])
4753 status_filter.insert(0, qlang.OP_AND)
4755 status_filter = None
4757 if name_filter and status_filter:
4758 return [qlang.OP_AND, name_filter, status_filter]
4762 return status_filter
4764 def CheckArguments(self):
4765 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4766 self.op.output_fields, False)
4768 def ExpandNames(self):
4769 self.oq.ExpandNames(self)
4771 def Exec(self, feedback_fn):
4772 return self.oq.OldStyleQuery(self)
4775 class LUNodeRemove(LogicalUnit):
4776 """Logical unit for removing a node.
4779 HPATH = "node-remove"
4780 HTYPE = constants.HTYPE_NODE
4782 def BuildHooksEnv(self):
4785 This doesn't run on the target node in the pre phase as a failed
4786 node would then be impossible to remove.
4790 "OP_TARGET": self.op.node_name,
4791 "NODE_NAME": self.op.node_name,
4794 def BuildHooksNodes(self):
4795 """Build hooks nodes.
4798 all_nodes = self.cfg.GetNodeList()
4800 all_nodes.remove(self.op.node_name)
4802 logging.warning("Node '%s', which is about to be removed, was not found"
4803 " in the list of all nodes", self.op.node_name)
4804 return (all_nodes, all_nodes)
4806 def CheckPrereq(self):
4807 """Check prerequisites.
4810 - the node exists in the configuration
4811 - it does not have primary or secondary instances
4812 - it's not the master
4814 Any errors are signaled by raising errors.OpPrereqError.
4817 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4818 node = self.cfg.GetNodeInfo(self.op.node_name)
4819 assert node is not None
4821 masternode = self.cfg.GetMasterNode()
4822 if node.name == masternode:
4823 raise errors.OpPrereqError("Node is the master node, failover to another"
4824 " node is required", errors.ECODE_INVAL)
4826 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4827 if node.name in instance.all_nodes:
4828 raise errors.OpPrereqError("Instance %s is still running on the node,"
4829 " please remove first" % instance_name,
4831 self.op.node_name = node.name
4834 def Exec(self, feedback_fn):
4835 """Removes the node from the cluster.
4839 logging.info("Stopping the node daemon and removing configs from node %s",
4842 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4844 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4847 # Promote nodes to master candidate as needed
4848 _AdjustCandidatePool(self, exceptions=[node.name])
4849 self.context.RemoveNode(node.name)
4851 # Run post hooks on the node before it's removed
4852 _RunPostHook(self, node.name)
4854 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4855 msg = result.fail_msg
4857 self.LogWarning("Errors encountered on the remote node while leaving"
4858 " the cluster: %s", msg)
4860 # Remove node from our /etc/hosts
4861 if self.cfg.GetClusterInfo().modify_etc_hosts:
4862 master_node = self.cfg.GetMasterNode()
4863 result = self.rpc.call_etc_hosts_modify(master_node,
4864 constants.ETC_HOSTS_REMOVE,
4866 result.Raise("Can't update hosts file with new host data")
4867 _RedistributeAncillaryFiles(self)
4870 class _NodeQuery(_QueryBase):
4871 FIELDS = query.NODE_FIELDS
4873 def ExpandNames(self, lu):
4874 lu.needed_locks = {}
4875 lu.share_locks = _ShareAll()
4878 self.wanted = _GetWantedNodes(lu, self.names)
4880 self.wanted = locking.ALL_SET
4882 self.do_locking = (self.use_locking and
4883 query.NQ_LIVE in self.requested_data)
4886 # If any non-static field is requested we need to lock the nodes
4887 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4889 def DeclareLocks(self, lu, level):
4892 def _GetQueryData(self, lu):
4893 """Computes the list of nodes and their attributes.
4896 all_info = lu.cfg.GetAllNodesInfo()
4898 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4900 # Gather data as requested
4901 if query.NQ_LIVE in self.requested_data:
4902 # filter out non-vm_capable nodes
4903 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4905 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4906 [lu.cfg.GetHypervisorType()])
4907 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4908 for (name, nresult) in node_data.items()
4909 if not nresult.fail_msg and nresult.payload)
4913 if query.NQ_INST in self.requested_data:
4914 node_to_primary = dict([(name, set()) for name in nodenames])
4915 node_to_secondary = dict([(name, set()) for name in nodenames])
4917 inst_data = lu.cfg.GetAllInstancesInfo()
4919 for inst in inst_data.values():
4920 if inst.primary_node in node_to_primary:
4921 node_to_primary[inst.primary_node].add(inst.name)
4922 for secnode in inst.secondary_nodes:
4923 if secnode in node_to_secondary:
4924 node_to_secondary[secnode].add(inst.name)
4926 node_to_primary = None
4927 node_to_secondary = None
4929 if query.NQ_OOB in self.requested_data:
4930 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4931 for name, node in all_info.iteritems())
4935 if query.NQ_GROUP in self.requested_data:
4936 groups = lu.cfg.GetAllNodeGroupsInfo()
4940 return query.NodeQueryData([all_info[name] for name in nodenames],
4941 live_data, lu.cfg.GetMasterNode(),
4942 node_to_primary, node_to_secondary, groups,
4943 oob_support, lu.cfg.GetClusterInfo())
4946 class LUNodeQuery(NoHooksLU):
4947 """Logical unit for querying nodes.
4950 # pylint: disable=W0142
4953 def CheckArguments(self):
4954 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4955 self.op.output_fields, self.op.use_locking)
4957 def ExpandNames(self):
4958 self.nq.ExpandNames(self)
4960 def DeclareLocks(self, level):
4961 self.nq.DeclareLocks(self, level)
4963 def Exec(self, feedback_fn):
4964 return self.nq.OldStyleQuery(self)
4967 class LUNodeQueryvols(NoHooksLU):
4968 """Logical unit for getting volumes on node(s).
4972 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4973 _FIELDS_STATIC = utils.FieldSet("node")
4975 def CheckArguments(self):
4976 _CheckOutputFields(static=self._FIELDS_STATIC,
4977 dynamic=self._FIELDS_DYNAMIC,
4978 selected=self.op.output_fields)
4980 def ExpandNames(self):
4981 self.share_locks = _ShareAll()
4982 self.needed_locks = {}
4984 if not self.op.nodes:
4985 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4987 self.needed_locks[locking.LEVEL_NODE] = \
4988 _GetWantedNodes(self, self.op.nodes)
4990 def Exec(self, feedback_fn):
4991 """Computes the list of nodes and their attributes.
4994 nodenames = self.owned_locks(locking.LEVEL_NODE)
4995 volumes = self.rpc.call_node_volumes(nodenames)
4997 ilist = self.cfg.GetAllInstancesInfo()
4998 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5001 for node in nodenames:
5002 nresult = volumes[node]
5005 msg = nresult.fail_msg
5007 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5010 node_vols = sorted(nresult.payload,
5011 key=operator.itemgetter("dev"))
5013 for vol in node_vols:
5015 for field in self.op.output_fields:
5018 elif field == "phys":
5022 elif field == "name":
5024 elif field == "size":
5025 val = int(float(vol["size"]))
5026 elif field == "instance":
5027 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5029 raise errors.ParameterError(field)
5030 node_output.append(str(val))
5032 output.append(node_output)
5037 class LUNodeQueryStorage(NoHooksLU):
5038 """Logical unit for getting information on storage units on node(s).
5041 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5044 def CheckArguments(self):
5045 _CheckOutputFields(static=self._FIELDS_STATIC,
5046 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5047 selected=self.op.output_fields)
5049 def ExpandNames(self):
5050 self.share_locks = _ShareAll()
5051 self.needed_locks = {}
5054 self.needed_locks[locking.LEVEL_NODE] = \
5055 _GetWantedNodes(self, self.op.nodes)
5057 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5059 def Exec(self, feedback_fn):
5060 """Computes the list of nodes and their attributes.
5063 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5065 # Always get name to sort by
5066 if constants.SF_NAME in self.op.output_fields:
5067 fields = self.op.output_fields[:]
5069 fields = [constants.SF_NAME] + self.op.output_fields
5071 # Never ask for node or type as it's only known to the LU
5072 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5073 while extra in fields:
5074 fields.remove(extra)
5076 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5077 name_idx = field_idx[constants.SF_NAME]
5079 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5080 data = self.rpc.call_storage_list(self.nodes,
5081 self.op.storage_type, st_args,
5082 self.op.name, fields)
5086 for node in utils.NiceSort(self.nodes):
5087 nresult = data[node]
5091 msg = nresult.fail_msg
5093 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5096 rows = dict([(row[name_idx], row) for row in nresult.payload])
5098 for name in utils.NiceSort(rows.keys()):
5103 for field in self.op.output_fields:
5104 if field == constants.SF_NODE:
5106 elif field == constants.SF_TYPE:
5107 val = self.op.storage_type
5108 elif field in field_idx:
5109 val = row[field_idx[field]]
5111 raise errors.ParameterError(field)
5120 class _InstanceQuery(_QueryBase):
5121 FIELDS = query.INSTANCE_FIELDS
5123 def ExpandNames(self, lu):
5124 lu.needed_locks = {}
5125 lu.share_locks = _ShareAll()
5128 self.wanted = _GetWantedInstances(lu, self.names)
5130 self.wanted = locking.ALL_SET
5132 self.do_locking = (self.use_locking and
5133 query.IQ_LIVE in self.requested_data)
5135 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5136 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5137 lu.needed_locks[locking.LEVEL_NODE] = []
5138 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5140 self.do_grouplocks = (self.do_locking and
5141 query.IQ_NODES in self.requested_data)
5143 def DeclareLocks(self, lu, level):
5145 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5146 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5148 # Lock all groups used by instances optimistically; this requires going
5149 # via the node before it's locked, requiring verification later on
5150 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5152 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5153 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5154 elif level == locking.LEVEL_NODE:
5155 lu._LockInstancesNodes() # pylint: disable=W0212
5158 def _CheckGroupLocks(lu):
5159 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5160 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5162 # Check if node groups for locked instances are still correct
5163 for instance_name in owned_instances:
5164 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5166 def _GetQueryData(self, lu):
5167 """Computes the list of instances and their attributes.
5170 if self.do_grouplocks:
5171 self._CheckGroupLocks(lu)
5173 cluster = lu.cfg.GetClusterInfo()
5174 all_info = lu.cfg.GetAllInstancesInfo()
5176 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5178 instance_list = [all_info[name] for name in instance_names]
5179 nodes = frozenset(itertools.chain(*(inst.all_nodes
5180 for inst in instance_list)))
5181 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5184 wrongnode_inst = set()
5186 # Gather data as requested
5187 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5189 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5191 result = node_data[name]
5193 # offline nodes will be in both lists
5194 assert result.fail_msg
5195 offline_nodes.append(name)
5197 bad_nodes.append(name)
5198 elif result.payload:
5199 for inst in result.payload:
5200 if inst in all_info:
5201 if all_info[inst].primary_node == name:
5202 live_data.update(result.payload)
5204 wrongnode_inst.add(inst)
5206 # orphan instance; we don't list it here as we don't
5207 # handle this case yet in the output of instance listing
5208 logging.warning("Orphan instance '%s' found on node %s",
5210 # else no instance is alive
5214 if query.IQ_DISKUSAGE in self.requested_data:
5215 disk_usage = dict((inst.name,
5216 _ComputeDiskSize(inst.disk_template,
5217 [{constants.IDISK_SIZE: disk.size}
5218 for disk in inst.disks]))
5219 for inst in instance_list)
5223 if query.IQ_CONSOLE in self.requested_data:
5225 for inst in instance_list:
5226 if inst.name in live_data:
5227 # Instance is running
5228 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5230 consinfo[inst.name] = None
5231 assert set(consinfo.keys()) == set(instance_names)
5235 if query.IQ_NODES in self.requested_data:
5236 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5238 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5239 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5240 for uuid in set(map(operator.attrgetter("group"),
5246 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5247 disk_usage, offline_nodes, bad_nodes,
5248 live_data, wrongnode_inst, consinfo,
5252 class LUQuery(NoHooksLU):
5253 """Query for resources/items of a certain kind.
5256 # pylint: disable=W0142
5259 def CheckArguments(self):
5260 qcls = _GetQueryImplementation(self.op.what)
5262 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5264 def ExpandNames(self):
5265 self.impl.ExpandNames(self)
5267 def DeclareLocks(self, level):
5268 self.impl.DeclareLocks(self, level)
5270 def Exec(self, feedback_fn):
5271 return self.impl.NewStyleQuery(self)
5274 class LUQueryFields(NoHooksLU):
5275 """Query for resources/items of a certain kind.
5278 # pylint: disable=W0142
5281 def CheckArguments(self):
5282 self.qcls = _GetQueryImplementation(self.op.what)
5284 def ExpandNames(self):
5285 self.needed_locks = {}
5287 def Exec(self, feedback_fn):
5288 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5291 class LUNodeModifyStorage(NoHooksLU):
5292 """Logical unit for modifying a storage volume on a node.
5297 def CheckArguments(self):
5298 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5300 storage_type = self.op.storage_type
5303 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5305 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5306 " modified" % storage_type,
5309 diff = set(self.op.changes.keys()) - modifiable
5311 raise errors.OpPrereqError("The following fields can not be modified for"
5312 " storage units of type '%s': %r" %
5313 (storage_type, list(diff)),
5316 def ExpandNames(self):
5317 self.needed_locks = {
5318 locking.LEVEL_NODE: self.op.node_name,
5321 def Exec(self, feedback_fn):
5322 """Computes the list of nodes and their attributes.
5325 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5326 result = self.rpc.call_storage_modify(self.op.node_name,
5327 self.op.storage_type, st_args,
5328 self.op.name, self.op.changes)
5329 result.Raise("Failed to modify storage unit '%s' on %s" %
5330 (self.op.name, self.op.node_name))
5333 class LUNodeAdd(LogicalUnit):
5334 """Logical unit for adding node to the cluster.
5338 HTYPE = constants.HTYPE_NODE
5339 _NFLAGS = ["master_capable", "vm_capable"]
5341 def CheckArguments(self):
5342 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5343 # validate/normalize the node name
5344 self.hostname = netutils.GetHostname(name=self.op.node_name,
5345 family=self.primary_ip_family)
5346 self.op.node_name = self.hostname.name
5348 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5349 raise errors.OpPrereqError("Cannot readd the master node",
5352 if self.op.readd and self.op.group:
5353 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5354 " being readded", errors.ECODE_INVAL)
5356 def BuildHooksEnv(self):
5359 This will run on all nodes before, and on all nodes + the new node after.
5363 "OP_TARGET": self.op.node_name,
5364 "NODE_NAME": self.op.node_name,
5365 "NODE_PIP": self.op.primary_ip,
5366 "NODE_SIP": self.op.secondary_ip,
5367 "MASTER_CAPABLE": str(self.op.master_capable),
5368 "VM_CAPABLE": str(self.op.vm_capable),
5371 def BuildHooksNodes(self):
5372 """Build hooks nodes.
5375 # Exclude added node
5376 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5377 post_nodes = pre_nodes + [self.op.node_name, ]
5379 return (pre_nodes, post_nodes)
5381 def CheckPrereq(self):
5382 """Check prerequisites.
5385 - the new node is not already in the config
5387 - its parameters (single/dual homed) matches the cluster
5389 Any errors are signaled by raising errors.OpPrereqError.
5393 hostname = self.hostname
5394 node = hostname.name
5395 primary_ip = self.op.primary_ip = hostname.ip
5396 if self.op.secondary_ip is None:
5397 if self.primary_ip_family == netutils.IP6Address.family:
5398 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5399 " IPv4 address must be given as secondary",
5401 self.op.secondary_ip = primary_ip
5403 secondary_ip = self.op.secondary_ip
5404 if not netutils.IP4Address.IsValid(secondary_ip):
5405 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5406 " address" % secondary_ip, errors.ECODE_INVAL)
5408 node_list = cfg.GetNodeList()
5409 if not self.op.readd and node in node_list:
5410 raise errors.OpPrereqError("Node %s is already in the configuration" %
5411 node, errors.ECODE_EXISTS)
5412 elif self.op.readd and node not in node_list:
5413 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5416 self.changed_primary_ip = False
5418 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5419 if self.op.readd and node == existing_node_name:
5420 if existing_node.secondary_ip != secondary_ip:
5421 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5422 " address configuration as before",
5424 if existing_node.primary_ip != primary_ip:
5425 self.changed_primary_ip = True
5429 if (existing_node.primary_ip == primary_ip or
5430 existing_node.secondary_ip == primary_ip or
5431 existing_node.primary_ip == secondary_ip or
5432 existing_node.secondary_ip == secondary_ip):
5433 raise errors.OpPrereqError("New node ip address(es) conflict with"
5434 " existing node %s" % existing_node.name,
5435 errors.ECODE_NOTUNIQUE)
5437 # After this 'if' block, None is no longer a valid value for the
5438 # _capable op attributes
5440 old_node = self.cfg.GetNodeInfo(node)
5441 assert old_node is not None, "Can't retrieve locked node %s" % node
5442 for attr in self._NFLAGS:
5443 if getattr(self.op, attr) is None:
5444 setattr(self.op, attr, getattr(old_node, attr))
5446 for attr in self._NFLAGS:
5447 if getattr(self.op, attr) is None:
5448 setattr(self.op, attr, True)
5450 if self.op.readd and not self.op.vm_capable:
5451 pri, sec = cfg.GetNodeInstances(node)
5453 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5454 " flag set to false, but it already holds"
5455 " instances" % node,
5458 # check that the type of the node (single versus dual homed) is the
5459 # same as for the master
5460 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5461 master_singlehomed = myself.secondary_ip == myself.primary_ip
5462 newbie_singlehomed = secondary_ip == primary_ip
5463 if master_singlehomed != newbie_singlehomed:
5464 if master_singlehomed:
5465 raise errors.OpPrereqError("The master has no secondary ip but the"
5466 " new node has one",
5469 raise errors.OpPrereqError("The master has a secondary ip but the"
5470 " new node doesn't have one",
5473 # checks reachability
5474 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5475 raise errors.OpPrereqError("Node not reachable by ping",
5476 errors.ECODE_ENVIRON)
5478 if not newbie_singlehomed:
5479 # check reachability from my secondary ip to newbie's secondary ip
5480 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5481 source=myself.secondary_ip):
5482 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5483 " based ping to node daemon port",
5484 errors.ECODE_ENVIRON)
5491 if self.op.master_capable:
5492 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5494 self.master_candidate = False
5497 self.new_node = old_node
5499 node_group = cfg.LookupNodeGroup(self.op.group)
5500 self.new_node = objects.Node(name=node,
5501 primary_ip=primary_ip,
5502 secondary_ip=secondary_ip,
5503 master_candidate=self.master_candidate,
5504 offline=False, drained=False,
5507 if self.op.ndparams:
5508 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5510 if self.op.hv_state:
5511 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5513 if self.op.disk_state:
5514 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5516 def Exec(self, feedback_fn):
5517 """Adds the new node to the cluster.
5520 new_node = self.new_node
5521 node = new_node.name
5523 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5526 # We adding a new node so we assume it's powered
5527 new_node.powered = True
5529 # for re-adds, reset the offline/drained/master-candidate flags;
5530 # we need to reset here, otherwise offline would prevent RPC calls
5531 # later in the procedure; this also means that if the re-add
5532 # fails, we are left with a non-offlined, broken node
5534 new_node.drained = new_node.offline = False # pylint: disable=W0201
5535 self.LogInfo("Readding a node, the offline/drained flags were reset")
5536 # if we demote the node, we do cleanup later in the procedure
5537 new_node.master_candidate = self.master_candidate
5538 if self.changed_primary_ip:
5539 new_node.primary_ip = self.op.primary_ip
5541 # copy the master/vm_capable flags
5542 for attr in self._NFLAGS:
5543 setattr(new_node, attr, getattr(self.op, attr))
5545 # notify the user about any possible mc promotion
5546 if new_node.master_candidate:
5547 self.LogInfo("Node will be a master candidate")
5549 if self.op.ndparams:
5550 new_node.ndparams = self.op.ndparams
5552 new_node.ndparams = {}
5554 if self.op.hv_state:
5555 new_node.hv_state_static = self.new_hv_state
5557 if self.op.disk_state:
5558 new_node.disk_state_static = self.new_disk_state
5560 # check connectivity
5561 result = self.rpc.call_version([node])[node]
5562 result.Raise("Can't get version information from node %s" % node)
5563 if constants.PROTOCOL_VERSION == result.payload:
5564 logging.info("Communication to node %s fine, sw version %s match",
5565 node, result.payload)
5567 raise errors.OpExecError("Version mismatch master version %s,"
5568 " node version %s" %
5569 (constants.PROTOCOL_VERSION, result.payload))
5571 # Add node to our /etc/hosts, and add key to known_hosts
5572 if self.cfg.GetClusterInfo().modify_etc_hosts:
5573 master_node = self.cfg.GetMasterNode()
5574 result = self.rpc.call_etc_hosts_modify(master_node,
5575 constants.ETC_HOSTS_ADD,
5578 result.Raise("Can't update hosts file with new host data")
5580 if new_node.secondary_ip != new_node.primary_ip:
5581 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5584 node_verify_list = [self.cfg.GetMasterNode()]
5585 node_verify_param = {
5586 constants.NV_NODELIST: ([node], {}),
5587 # TODO: do a node-net-test as well?
5590 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5591 self.cfg.GetClusterName())
5592 for verifier in node_verify_list:
5593 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5594 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5596 for failed in nl_payload:
5597 feedback_fn("ssh/hostname verification failed"
5598 " (checking from %s): %s" %
5599 (verifier, nl_payload[failed]))
5600 raise errors.OpExecError("ssh/hostname verification failed")
5603 _RedistributeAncillaryFiles(self)
5604 self.context.ReaddNode(new_node)
5605 # make sure we redistribute the config
5606 self.cfg.Update(new_node, feedback_fn)
5607 # and make sure the new node will not have old files around
5608 if not new_node.master_candidate:
5609 result = self.rpc.call_node_demote_from_mc(new_node.name)
5610 msg = result.fail_msg
5612 self.LogWarning("Node failed to demote itself from master"
5613 " candidate status: %s" % msg)
5615 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5616 additional_vm=self.op.vm_capable)
5617 self.context.AddNode(new_node, self.proc.GetECId())
5620 class LUNodeSetParams(LogicalUnit):
5621 """Modifies the parameters of a node.
5623 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5624 to the node role (as _ROLE_*)
5625 @cvar _R2F: a dictionary from node role to tuples of flags
5626 @cvar _FLAGS: a list of attribute names corresponding to the flags
5629 HPATH = "node-modify"
5630 HTYPE = constants.HTYPE_NODE
5632 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5634 (True, False, False): _ROLE_CANDIDATE,
5635 (False, True, False): _ROLE_DRAINED,
5636 (False, False, True): _ROLE_OFFLINE,
5637 (False, False, False): _ROLE_REGULAR,
5639 _R2F = dict((v, k) for k, v in _F2R.items())
5640 _FLAGS = ["master_candidate", "drained", "offline"]
5642 def CheckArguments(self):
5643 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5644 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5645 self.op.master_capable, self.op.vm_capable,
5646 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5648 if all_mods.count(None) == len(all_mods):
5649 raise errors.OpPrereqError("Please pass at least one modification",
5651 if all_mods.count(True) > 1:
5652 raise errors.OpPrereqError("Can't set the node into more than one"
5653 " state at the same time",
5656 # Boolean value that tells us whether we might be demoting from MC
5657 self.might_demote = (self.op.master_candidate == False or
5658 self.op.offline == True or
5659 self.op.drained == True or
5660 self.op.master_capable == False)
5662 if self.op.secondary_ip:
5663 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5664 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5665 " address" % self.op.secondary_ip,
5668 self.lock_all = self.op.auto_promote and self.might_demote
5669 self.lock_instances = self.op.secondary_ip is not None
5671 def _InstanceFilter(self, instance):
5672 """Filter for getting affected instances.
5675 return (instance.disk_template in constants.DTS_INT_MIRROR and
5676 self.op.node_name in instance.all_nodes)
5678 def ExpandNames(self):
5680 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5682 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5684 # Since modifying a node can have severe effects on currently running
5685 # operations the resource lock is at least acquired in shared mode
5686 self.needed_locks[locking.LEVEL_NODE_RES] = \
5687 self.needed_locks[locking.LEVEL_NODE]
5689 # Get node resource and instance locks in shared mode; they are not used
5690 # for anything but read-only access
5691 self.share_locks[locking.LEVEL_NODE_RES] = 1
5692 self.share_locks[locking.LEVEL_INSTANCE] = 1
5694 if self.lock_instances:
5695 self.needed_locks[locking.LEVEL_INSTANCE] = \
5696 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5698 def BuildHooksEnv(self):
5701 This runs on the master node.
5705 "OP_TARGET": self.op.node_name,
5706 "MASTER_CANDIDATE": str(self.op.master_candidate),
5707 "OFFLINE": str(self.op.offline),
5708 "DRAINED": str(self.op.drained),
5709 "MASTER_CAPABLE": str(self.op.master_capable),
5710 "VM_CAPABLE": str(self.op.vm_capable),
5713 def BuildHooksNodes(self):
5714 """Build hooks nodes.
5717 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5720 def CheckPrereq(self):
5721 """Check prerequisites.
5723 This only checks the instance list against the existing names.
5726 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5728 if self.lock_instances:
5729 affected_instances = \
5730 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5732 # Verify instance locks
5733 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5734 wanted_instances = frozenset(affected_instances.keys())
5735 if wanted_instances - owned_instances:
5736 raise errors.OpPrereqError("Instances affected by changing node %s's"
5737 " secondary IP address have changed since"
5738 " locks were acquired, wanted '%s', have"
5739 " '%s'; retry the operation" %
5741 utils.CommaJoin(wanted_instances),
5742 utils.CommaJoin(owned_instances)),
5745 affected_instances = None
5747 if (self.op.master_candidate is not None or
5748 self.op.drained is not None or
5749 self.op.offline is not None):
5750 # we can't change the master's node flags
5751 if self.op.node_name == self.cfg.GetMasterNode():
5752 raise errors.OpPrereqError("The master role can be changed"
5753 " only via master-failover",
5756 if self.op.master_candidate and not node.master_capable:
5757 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5758 " it a master candidate" % node.name,
5761 if self.op.vm_capable == False:
5762 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5764 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5765 " the vm_capable flag" % node.name,
5768 if node.master_candidate and self.might_demote and not self.lock_all:
5769 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5770 # check if after removing the current node, we're missing master
5772 (mc_remaining, mc_should, _) = \
5773 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5774 if mc_remaining < mc_should:
5775 raise errors.OpPrereqError("Not enough master candidates, please"
5776 " pass auto promote option to allow"
5777 " promotion", errors.ECODE_STATE)
5779 self.old_flags = old_flags = (node.master_candidate,
5780 node.drained, node.offline)
5781 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5782 self.old_role = old_role = self._F2R[old_flags]
5784 # Check for ineffective changes
5785 for attr in self._FLAGS:
5786 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5787 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5788 setattr(self.op, attr, None)
5790 # Past this point, any flag change to False means a transition
5791 # away from the respective state, as only real changes are kept
5793 # TODO: We might query the real power state if it supports OOB
5794 if _SupportsOob(self.cfg, node):
5795 if self.op.offline is False and not (node.powered or
5796 self.op.powered == True):
5797 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5798 " offline status can be reset") %
5800 elif self.op.powered is not None:
5801 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5802 " as it does not support out-of-band"
5803 " handling") % self.op.node_name)
5805 # If we're being deofflined/drained, we'll MC ourself if needed
5806 if (self.op.drained == False or self.op.offline == False or
5807 (self.op.master_capable and not node.master_capable)):
5808 if _DecideSelfPromotion(self):
5809 self.op.master_candidate = True
5810 self.LogInfo("Auto-promoting node to master candidate")
5812 # If we're no longer master capable, we'll demote ourselves from MC
5813 if self.op.master_capable == False and node.master_candidate:
5814 self.LogInfo("Demoting from master candidate")
5815 self.op.master_candidate = False
5818 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5819 if self.op.master_candidate:
5820 new_role = self._ROLE_CANDIDATE
5821 elif self.op.drained:
5822 new_role = self._ROLE_DRAINED
5823 elif self.op.offline:
5824 new_role = self._ROLE_OFFLINE
5825 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5826 # False is still in new flags, which means we're un-setting (the
5828 new_role = self._ROLE_REGULAR
5829 else: # no new flags, nothing, keep old role
5832 self.new_role = new_role
5834 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5835 # Trying to transition out of offline status
5836 # TODO: Use standard RPC runner, but make sure it works when the node is
5837 # still marked offline
5838 result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5840 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5841 " to report its version: %s" %
5842 (node.name, result.fail_msg),
5845 self.LogWarning("Transitioning node from offline to online state"
5846 " without using re-add. Please make sure the node"
5849 if self.op.secondary_ip:
5850 # Ok even without locking, because this can't be changed by any LU
5851 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5852 master_singlehomed = master.secondary_ip == master.primary_ip
5853 if master_singlehomed and self.op.secondary_ip:
5854 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5855 " homed cluster", errors.ECODE_INVAL)
5857 assert not (frozenset(affected_instances) -
5858 self.owned_locks(locking.LEVEL_INSTANCE))
5861 if affected_instances:
5862 raise errors.OpPrereqError("Cannot change secondary IP address:"
5863 " offline node has instances (%s)"
5864 " configured to use it" %
5865 utils.CommaJoin(affected_instances.keys()))
5867 # On online nodes, check that no instances are running, and that
5868 # the node has the new ip and we can reach it.
5869 for instance in affected_instances.values():
5870 _CheckInstanceState(self, instance, INSTANCE_DOWN,
5871 msg="cannot change secondary ip")
5873 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5874 if master.name != node.name:
5875 # check reachability from master secondary ip to new secondary ip
5876 if not netutils.TcpPing(self.op.secondary_ip,
5877 constants.DEFAULT_NODED_PORT,
5878 source=master.secondary_ip):
5879 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5880 " based ping to node daemon port",
5881 errors.ECODE_ENVIRON)
5883 if self.op.ndparams:
5884 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5885 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5886 self.new_ndparams = new_ndparams
5888 if self.op.hv_state:
5889 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5890 self.node.hv_state_static)
5892 if self.op.disk_state:
5893 self.new_disk_state = \
5894 _MergeAndVerifyDiskState(self.op.disk_state,
5895 self.node.disk_state_static)
5897 def Exec(self, feedback_fn):
5902 old_role = self.old_role
5903 new_role = self.new_role
5907 if self.op.ndparams:
5908 node.ndparams = self.new_ndparams
5910 if self.op.powered is not None:
5911 node.powered = self.op.powered
5913 if self.op.hv_state:
5914 node.hv_state_static = self.new_hv_state
5916 if self.op.disk_state:
5917 node.disk_state_static = self.new_disk_state
5919 for attr in ["master_capable", "vm_capable"]:
5920 val = getattr(self.op, attr)
5922 setattr(node, attr, val)
5923 result.append((attr, str(val)))
5925 if new_role != old_role:
5926 # Tell the node to demote itself, if no longer MC and not offline
5927 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5928 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5930 self.LogWarning("Node failed to demote itself: %s", msg)
5932 new_flags = self._R2F[new_role]
5933 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5935 result.append((desc, str(nf)))
5936 (node.master_candidate, node.drained, node.offline) = new_flags
5938 # we locked all nodes, we adjust the CP before updating this node
5940 _AdjustCandidatePool(self, [node.name])
5942 if self.op.secondary_ip:
5943 node.secondary_ip = self.op.secondary_ip
5944 result.append(("secondary_ip", self.op.secondary_ip))
5946 # this will trigger configuration file update, if needed
5947 self.cfg.Update(node, feedback_fn)
5949 # this will trigger job queue propagation or cleanup if the mc
5951 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5952 self.context.ReaddNode(node)
5957 class LUNodePowercycle(NoHooksLU):
5958 """Powercycles a node.
5963 def CheckArguments(self):
5964 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5965 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5966 raise errors.OpPrereqError("The node is the master and the force"
5967 " parameter was not set",
5970 def ExpandNames(self):
5971 """Locking for PowercycleNode.
5973 This is a last-resort option and shouldn't block on other
5974 jobs. Therefore, we grab no locks.
5977 self.needed_locks = {}
5979 def Exec(self, feedback_fn):
5983 result = self.rpc.call_node_powercycle(self.op.node_name,
5984 self.cfg.GetHypervisorType())
5985 result.Raise("Failed to schedule the reboot")
5986 return result.payload
5989 class LUClusterQuery(NoHooksLU):
5990 """Query cluster configuration.
5995 def ExpandNames(self):
5996 self.needed_locks = {}
5998 def Exec(self, feedback_fn):
5999 """Return cluster config.
6002 cluster = self.cfg.GetClusterInfo()
6005 # Filter just for enabled hypervisors
6006 for os_name, hv_dict in cluster.os_hvp.items():
6007 os_hvp[os_name] = {}
6008 for hv_name, hv_params in hv_dict.items():
6009 if hv_name in cluster.enabled_hypervisors:
6010 os_hvp[os_name][hv_name] = hv_params
6012 # Convert ip_family to ip_version
6013 primary_ip_version = constants.IP4_VERSION
6014 if cluster.primary_ip_family == netutils.IP6Address.family:
6015 primary_ip_version = constants.IP6_VERSION
6018 "software_version": constants.RELEASE_VERSION,
6019 "protocol_version": constants.PROTOCOL_VERSION,
6020 "config_version": constants.CONFIG_VERSION,
6021 "os_api_version": max(constants.OS_API_VERSIONS),
6022 "export_version": constants.EXPORT_VERSION,
6023 "architecture": (platform.architecture()[0], platform.machine()),
6024 "name": cluster.cluster_name,
6025 "master": cluster.master_node,
6026 "default_hypervisor": cluster.primary_hypervisor,
6027 "enabled_hypervisors": cluster.enabled_hypervisors,
6028 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6029 for hypervisor_name in cluster.enabled_hypervisors]),
6031 "beparams": cluster.beparams,
6032 "osparams": cluster.osparams,
6033 "ipolicy": cluster.ipolicy,
6034 "nicparams": cluster.nicparams,
6035 "ndparams": cluster.ndparams,
6036 "candidate_pool_size": cluster.candidate_pool_size,
6037 "master_netdev": cluster.master_netdev,
6038 "master_netmask": cluster.master_netmask,
6039 "use_external_mip_script": cluster.use_external_mip_script,
6040 "volume_group_name": cluster.volume_group_name,
6041 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6042 "file_storage_dir": cluster.file_storage_dir,
6043 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6044 "maintain_node_health": cluster.maintain_node_health,
6045 "ctime": cluster.ctime,
6046 "mtime": cluster.mtime,
6047 "uuid": cluster.uuid,
6048 "tags": list(cluster.GetTags()),
6049 "uid_pool": cluster.uid_pool,
6050 "default_iallocator": cluster.default_iallocator,
6051 "reserved_lvs": cluster.reserved_lvs,
6052 "primary_ip_version": primary_ip_version,
6053 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6054 "hidden_os": cluster.hidden_os,
6055 "blacklisted_os": cluster.blacklisted_os,
6061 class LUClusterConfigQuery(NoHooksLU):
6062 """Return configuration values.
6066 _FIELDS_DYNAMIC = utils.FieldSet()
6067 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
6068 "watcher_pause", "volume_group_name")
6070 def CheckArguments(self):
6071 _CheckOutputFields(static=self._FIELDS_STATIC,
6072 dynamic=self._FIELDS_DYNAMIC,
6073 selected=self.op.output_fields)
6075 def ExpandNames(self):
6076 self.needed_locks = {}
6078 def Exec(self, feedback_fn):
6079 """Dump a representation of the cluster config to the standard output.
6083 for field in self.op.output_fields:
6084 if field == "cluster_name":
6085 entry = self.cfg.GetClusterName()
6086 elif field == "master_node":
6087 entry = self.cfg.GetMasterNode()
6088 elif field == "drain_flag":
6089 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6090 elif field == "watcher_pause":
6091 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6092 elif field == "volume_group_name":
6093 entry = self.cfg.GetVGName()
6095 raise errors.ParameterError(field)
6096 values.append(entry)
6100 class LUInstanceActivateDisks(NoHooksLU):
6101 """Bring up an instance's disks.
6106 def ExpandNames(self):
6107 self._ExpandAndLockInstance()
6108 self.needed_locks[locking.LEVEL_NODE] = []
6109 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6111 def DeclareLocks(self, level):
6112 if level == locking.LEVEL_NODE:
6113 self._LockInstancesNodes()
6115 def CheckPrereq(self):
6116 """Check prerequisites.
6118 This checks that the instance is in the cluster.
6121 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6122 assert self.instance is not None, \
6123 "Cannot retrieve locked instance %s" % self.op.instance_name
6124 _CheckNodeOnline(self, self.instance.primary_node)
6126 def Exec(self, feedback_fn):
6127 """Activate the disks.
6130 disks_ok, disks_info = \
6131 _AssembleInstanceDisks(self, self.instance,
6132 ignore_size=self.op.ignore_size)
6134 raise errors.OpExecError("Cannot activate block devices")
6139 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6141 """Prepare the block devices for an instance.
6143 This sets up the block devices on all nodes.
6145 @type lu: L{LogicalUnit}
6146 @param lu: the logical unit on whose behalf we execute
6147 @type instance: L{objects.Instance}
6148 @param instance: the instance for whose disks we assemble
6149 @type disks: list of L{objects.Disk} or None
6150 @param disks: which disks to assemble (or all, if None)
6151 @type ignore_secondaries: boolean
6152 @param ignore_secondaries: if true, errors on secondary nodes
6153 won't result in an error return from the function
6154 @type ignore_size: boolean
6155 @param ignore_size: if true, the current known size of the disk
6156 will not be used during the disk activation, useful for cases
6157 when the size is wrong
6158 @return: False if the operation failed, otherwise a list of
6159 (host, instance_visible_name, node_visible_name)
6160 with the mapping from node devices to instance devices
6165 iname = instance.name
6166 disks = _ExpandCheckDisks(instance, disks)
6168 # With the two passes mechanism we try to reduce the window of
6169 # opportunity for the race condition of switching DRBD to primary
6170 # before handshaking occured, but we do not eliminate it
6172 # The proper fix would be to wait (with some limits) until the
6173 # connection has been made and drbd transitions from WFConnection
6174 # into any other network-connected state (Connected, SyncTarget,
6177 # 1st pass, assemble on all nodes in secondary mode
6178 for idx, inst_disk in enumerate(disks):
6179 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6181 node_disk = node_disk.Copy()
6182 node_disk.UnsetSize()
6183 lu.cfg.SetDiskID(node_disk, node)
6184 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
6185 msg = result.fail_msg
6187 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6188 " (is_primary=False, pass=1): %s",
6189 inst_disk.iv_name, node, msg)
6190 if not ignore_secondaries:
6193 # FIXME: race condition on drbd migration to primary
6195 # 2nd pass, do only the primary node
6196 for idx, inst_disk in enumerate(disks):
6199 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6200 if node != instance.primary_node:
6203 node_disk = node_disk.Copy()
6204 node_disk.UnsetSize()
6205 lu.cfg.SetDiskID(node_disk, node)
6206 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6207 msg = result.fail_msg
6209 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6210 " (is_primary=True, pass=2): %s",
6211 inst_disk.iv_name, node, msg)
6214 dev_path = result.payload
6216 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6218 # leave the disks configured for the primary node
6219 # this is a workaround that would be fixed better by
6220 # improving the logical/physical id handling
6222 lu.cfg.SetDiskID(disk, instance.primary_node)
6224 return disks_ok, device_info
6227 def _StartInstanceDisks(lu, instance, force):
6228 """Start the disks of an instance.
6231 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6232 ignore_secondaries=force)
6234 _ShutdownInstanceDisks(lu, instance)
6235 if force is not None and not force:
6236 lu.proc.LogWarning("", hint="If the message above refers to a"
6238 " you can retry the operation using '--force'.")
6239 raise errors.OpExecError("Disk consistency error")
6242 class LUInstanceDeactivateDisks(NoHooksLU):
6243 """Shutdown an instance's disks.
6248 def ExpandNames(self):
6249 self._ExpandAndLockInstance()
6250 self.needed_locks[locking.LEVEL_NODE] = []
6251 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6253 def DeclareLocks(self, level):
6254 if level == locking.LEVEL_NODE:
6255 self._LockInstancesNodes()
6257 def CheckPrereq(self):
6258 """Check prerequisites.
6260 This checks that the instance is in the cluster.
6263 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6264 assert self.instance is not None, \
6265 "Cannot retrieve locked instance %s" % self.op.instance_name
6267 def Exec(self, feedback_fn):
6268 """Deactivate the disks
6271 instance = self.instance
6273 _ShutdownInstanceDisks(self, instance)
6275 _SafeShutdownInstanceDisks(self, instance)
6278 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6279 """Shutdown block devices of an instance.
6281 This function checks if an instance is running, before calling
6282 _ShutdownInstanceDisks.
6285 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6286 _ShutdownInstanceDisks(lu, instance, disks=disks)
6289 def _ExpandCheckDisks(instance, disks):
6290 """Return the instance disks selected by the disks list
6292 @type disks: list of L{objects.Disk} or None
6293 @param disks: selected disks
6294 @rtype: list of L{objects.Disk}
6295 @return: selected instance disks to act on
6299 return instance.disks
6301 if not set(disks).issubset(instance.disks):
6302 raise errors.ProgrammerError("Can only act on disks belonging to the"
6307 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6308 """Shutdown block devices of an instance.
6310 This does the shutdown on all nodes of the instance.
6312 If the ignore_primary is false, errors on the primary node are
6317 disks = _ExpandCheckDisks(instance, disks)
6320 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6321 lu.cfg.SetDiskID(top_disk, node)
6322 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6323 msg = result.fail_msg
6325 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6326 disk.iv_name, node, msg)
6327 if ((node == instance.primary_node and not ignore_primary) or
6328 (node != instance.primary_node and not result.offline)):
6333 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6334 """Checks if a node has enough free memory.
6336 This function check if a given node has the needed amount of free
6337 memory. In case the node has less memory or we cannot get the
6338 information from the node, this function raise an OpPrereqError
6341 @type lu: C{LogicalUnit}
6342 @param lu: a logical unit from which we get configuration data
6344 @param node: the node to check
6345 @type reason: C{str}
6346 @param reason: string to use in the error message
6347 @type requested: C{int}
6348 @param requested: the amount of memory in MiB to check for
6349 @type hypervisor_name: C{str}
6350 @param hypervisor_name: the hypervisor to ask for memory stats
6351 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6352 we cannot check the node
6355 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6356 nodeinfo[node].Raise("Can't get data from node %s" % node,
6357 prereq=True, ecode=errors.ECODE_ENVIRON)
6358 (_, _, (hv_info, )) = nodeinfo[node].payload
6360 free_mem = hv_info.get("memory_free", None)
6361 if not isinstance(free_mem, int):
6362 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6363 " was '%s'" % (node, free_mem),
6364 errors.ECODE_ENVIRON)
6365 if requested > free_mem:
6366 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6367 " needed %s MiB, available %s MiB" %
6368 (node, reason, requested, free_mem),
6372 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6373 """Checks if nodes have enough free disk space in the all VGs.
6375 This function check if all given nodes have the needed amount of
6376 free disk. In case any node has less disk or we cannot get the
6377 information from the node, this function raise an OpPrereqError
6380 @type lu: C{LogicalUnit}
6381 @param lu: a logical unit from which we get configuration data
6382 @type nodenames: C{list}
6383 @param nodenames: the list of node names to check
6384 @type req_sizes: C{dict}
6385 @param req_sizes: the hash of vg and corresponding amount of disk in
6387 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6388 or we cannot check the node
6391 for vg, req_size in req_sizes.items():
6392 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6395 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6396 """Checks if nodes have enough free disk space in the specified VG.
6398 This function check if all given nodes have the needed amount of
6399 free disk. In case any node has less disk or we cannot get the
6400 information from the node, this function raise an OpPrereqError
6403 @type lu: C{LogicalUnit}
6404 @param lu: a logical unit from which we get configuration data
6405 @type nodenames: C{list}
6406 @param nodenames: the list of node names to check
6408 @param vg: the volume group to check
6409 @type requested: C{int}
6410 @param requested: the amount of disk in MiB to check for
6411 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6412 or we cannot check the node
6415 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6416 for node in nodenames:
6417 info = nodeinfo[node]
6418 info.Raise("Cannot get current information from node %s" % node,
6419 prereq=True, ecode=errors.ECODE_ENVIRON)
6420 (_, (vg_info, ), _) = info.payload
6421 vg_free = vg_info.get("vg_free", None)
6422 if not isinstance(vg_free, int):
6423 raise errors.OpPrereqError("Can't compute free disk space on node"
6424 " %s for vg %s, result was '%s'" %
6425 (node, vg, vg_free), errors.ECODE_ENVIRON)
6426 if requested > vg_free:
6427 raise errors.OpPrereqError("Not enough disk space on target node %s"
6428 " vg %s: required %d MiB, available %d MiB" %
6429 (node, vg, requested, vg_free),
6433 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6434 """Checks if nodes have enough physical CPUs
6436 This function checks if all given nodes have the needed number of
6437 physical CPUs. In case any node has less CPUs or we cannot get the
6438 information from the node, this function raises an OpPrereqError
6441 @type lu: C{LogicalUnit}
6442 @param lu: a logical unit from which we get configuration data
6443 @type nodenames: C{list}
6444 @param nodenames: the list of node names to check
6445 @type requested: C{int}
6446 @param requested: the minimum acceptable number of physical CPUs
6447 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6448 or we cannot check the node
6451 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6452 for node in nodenames:
6453 info = nodeinfo[node]
6454 info.Raise("Cannot get current information from node %s" % node,
6455 prereq=True, ecode=errors.ECODE_ENVIRON)
6456 (_, _, (hv_info, )) = info.payload
6457 num_cpus = hv_info.get("cpu_total", None)
6458 if not isinstance(num_cpus, int):
6459 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6460 " on node %s, result was '%s'" %
6461 (node, num_cpus), errors.ECODE_ENVIRON)
6462 if requested > num_cpus:
6463 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6464 "required" % (node, num_cpus, requested),
6468 class LUInstanceStartup(LogicalUnit):
6469 """Starts an instance.
6472 HPATH = "instance-start"
6473 HTYPE = constants.HTYPE_INSTANCE
6476 def CheckArguments(self):
6478 if self.op.beparams:
6479 # fill the beparams dict
6480 objects.UpgradeBeParams(self.op.beparams)
6481 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6483 def ExpandNames(self):
6484 self._ExpandAndLockInstance()
6486 def BuildHooksEnv(self):
6489 This runs on master, primary and secondary nodes of the instance.
6493 "FORCE": self.op.force,
6496 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6500 def BuildHooksNodes(self):
6501 """Build hooks nodes.
6504 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6507 def CheckPrereq(self):
6508 """Check prerequisites.
6510 This checks that the instance is in the cluster.
6513 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6514 assert self.instance is not None, \
6515 "Cannot retrieve locked instance %s" % self.op.instance_name
6518 if self.op.hvparams:
6519 # check hypervisor parameter syntax (locally)
6520 cluster = self.cfg.GetClusterInfo()
6521 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6522 filled_hvp = cluster.FillHV(instance)
6523 filled_hvp.update(self.op.hvparams)
6524 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6525 hv_type.CheckParameterSyntax(filled_hvp)
6526 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6528 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6530 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6532 if self.primary_offline and self.op.ignore_offline_nodes:
6533 self.proc.LogWarning("Ignoring offline primary node")
6535 if self.op.hvparams or self.op.beparams:
6536 self.proc.LogWarning("Overridden parameters are ignored")
6538 _CheckNodeOnline(self, instance.primary_node)
6540 bep = self.cfg.GetClusterInfo().FillBE(instance)
6541 bep.update(self.op.beparams)
6543 # check bridges existence
6544 _CheckInstanceBridgesExist(self, instance)
6546 remote_info = self.rpc.call_instance_info(instance.primary_node,
6548 instance.hypervisor)
6549 remote_info.Raise("Error checking node %s" % instance.primary_node,
6550 prereq=True, ecode=errors.ECODE_ENVIRON)
6551 if not remote_info.payload: # not running already
6552 _CheckNodeFreeMemory(self, instance.primary_node,
6553 "starting instance %s" % instance.name,
6554 bep[constants.BE_MAXMEM], instance.hypervisor)
6556 def Exec(self, feedback_fn):
6557 """Start the instance.
6560 instance = self.instance
6561 force = self.op.force
6563 if not self.op.no_remember:
6564 self.cfg.MarkInstanceUp(instance.name)
6566 if self.primary_offline:
6567 assert self.op.ignore_offline_nodes
6568 self.proc.LogInfo("Primary node offline, marked instance as started")
6570 node_current = instance.primary_node
6572 _StartInstanceDisks(self, instance, force)
6575 self.rpc.call_instance_start(node_current,
6576 (instance, self.op.hvparams,
6578 self.op.startup_paused)
6579 msg = result.fail_msg
6581 _ShutdownInstanceDisks(self, instance)
6582 raise errors.OpExecError("Could not start instance: %s" % msg)
6585 class LUInstanceReboot(LogicalUnit):
6586 """Reboot an instance.
6589 HPATH = "instance-reboot"
6590 HTYPE = constants.HTYPE_INSTANCE
6593 def ExpandNames(self):
6594 self._ExpandAndLockInstance()
6596 def BuildHooksEnv(self):
6599 This runs on master, primary and secondary nodes of the instance.
6603 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6604 "REBOOT_TYPE": self.op.reboot_type,
6605 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6608 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6612 def BuildHooksNodes(self):
6613 """Build hooks nodes.
6616 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6619 def CheckPrereq(self):
6620 """Check prerequisites.
6622 This checks that the instance is in the cluster.
6625 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6626 assert self.instance is not None, \
6627 "Cannot retrieve locked instance %s" % self.op.instance_name
6628 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6629 _CheckNodeOnline(self, instance.primary_node)
6631 # check bridges existence
6632 _CheckInstanceBridgesExist(self, instance)
6634 def Exec(self, feedback_fn):
6635 """Reboot the instance.
6638 instance = self.instance
6639 ignore_secondaries = self.op.ignore_secondaries
6640 reboot_type = self.op.reboot_type
6642 remote_info = self.rpc.call_instance_info(instance.primary_node,
6644 instance.hypervisor)
6645 remote_info.Raise("Error checking node %s" % instance.primary_node)
6646 instance_running = bool(remote_info.payload)
6648 node_current = instance.primary_node
6650 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6651 constants.INSTANCE_REBOOT_HARD]:
6652 for disk in instance.disks:
6653 self.cfg.SetDiskID(disk, node_current)
6654 result = self.rpc.call_instance_reboot(node_current, instance,
6656 self.op.shutdown_timeout)
6657 result.Raise("Could not reboot instance")
6659 if instance_running:
6660 result = self.rpc.call_instance_shutdown(node_current, instance,
6661 self.op.shutdown_timeout)
6662 result.Raise("Could not shutdown instance for full reboot")
6663 _ShutdownInstanceDisks(self, instance)
6665 self.LogInfo("Instance %s was already stopped, starting now",
6667 _StartInstanceDisks(self, instance, ignore_secondaries)
6668 result = self.rpc.call_instance_start(node_current,
6669 (instance, None, None), False)
6670 msg = result.fail_msg
6672 _ShutdownInstanceDisks(self, instance)
6673 raise errors.OpExecError("Could not start instance for"
6674 " full reboot: %s" % msg)
6676 self.cfg.MarkInstanceUp(instance.name)
6679 class LUInstanceShutdown(LogicalUnit):
6680 """Shutdown an instance.
6683 HPATH = "instance-stop"
6684 HTYPE = constants.HTYPE_INSTANCE
6687 def ExpandNames(self):
6688 self._ExpandAndLockInstance()
6690 def BuildHooksEnv(self):
6693 This runs on master, primary and secondary nodes of the instance.
6696 env = _BuildInstanceHookEnvByObject(self, self.instance)
6697 env["TIMEOUT"] = self.op.timeout
6700 def BuildHooksNodes(self):
6701 """Build hooks nodes.
6704 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6707 def CheckPrereq(self):
6708 """Check prerequisites.
6710 This checks that the instance is in the cluster.
6713 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6714 assert self.instance is not None, \
6715 "Cannot retrieve locked instance %s" % self.op.instance_name
6717 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6719 self.primary_offline = \
6720 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6722 if self.primary_offline and self.op.ignore_offline_nodes:
6723 self.proc.LogWarning("Ignoring offline primary node")
6725 _CheckNodeOnline(self, self.instance.primary_node)
6727 def Exec(self, feedback_fn):
6728 """Shutdown the instance.
6731 instance = self.instance
6732 node_current = instance.primary_node
6733 timeout = self.op.timeout
6735 if not self.op.no_remember:
6736 self.cfg.MarkInstanceDown(instance.name)
6738 if self.primary_offline:
6739 assert self.op.ignore_offline_nodes
6740 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6742 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6743 msg = result.fail_msg
6745 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6747 _ShutdownInstanceDisks(self, instance)
6750 class LUInstanceReinstall(LogicalUnit):
6751 """Reinstall an instance.
6754 HPATH = "instance-reinstall"
6755 HTYPE = constants.HTYPE_INSTANCE
6758 def ExpandNames(self):
6759 self._ExpandAndLockInstance()
6761 def BuildHooksEnv(self):
6764 This runs on master, primary and secondary nodes of the instance.
6767 return _BuildInstanceHookEnvByObject(self, self.instance)
6769 def BuildHooksNodes(self):
6770 """Build hooks nodes.
6773 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6776 def CheckPrereq(self):
6777 """Check prerequisites.
6779 This checks that the instance is in the cluster and is not running.
6782 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6783 assert instance is not None, \
6784 "Cannot retrieve locked instance %s" % self.op.instance_name
6785 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6786 " offline, cannot reinstall")
6787 for node in instance.secondary_nodes:
6788 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6789 " cannot reinstall")
6791 if instance.disk_template == constants.DT_DISKLESS:
6792 raise errors.OpPrereqError("Instance '%s' has no disks" %
6793 self.op.instance_name,
6795 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6797 if self.op.os_type is not None:
6799 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6800 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6801 instance_os = self.op.os_type
6803 instance_os = instance.os
6805 nodelist = list(instance.all_nodes)
6807 if self.op.osparams:
6808 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6809 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6810 self.os_inst = i_osdict # the new dict (without defaults)
6814 self.instance = instance
6816 def Exec(self, feedback_fn):
6817 """Reinstall the instance.
6820 inst = self.instance
6822 if self.op.os_type is not None:
6823 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6824 inst.os = self.op.os_type
6825 # Write to configuration
6826 self.cfg.Update(inst, feedback_fn)
6828 _StartInstanceDisks(self, inst, None)
6830 feedback_fn("Running the instance OS create scripts...")
6831 # FIXME: pass debug option from opcode to backend
6832 result = self.rpc.call_instance_os_add(inst.primary_node,
6833 (inst, self.os_inst), True,
6834 self.op.debug_level)
6835 result.Raise("Could not install OS for instance %s on node %s" %
6836 (inst.name, inst.primary_node))
6838 _ShutdownInstanceDisks(self, inst)
6841 class LUInstanceRecreateDisks(LogicalUnit):
6842 """Recreate an instance's missing disks.
6845 HPATH = "instance-recreate-disks"
6846 HTYPE = constants.HTYPE_INSTANCE
6849 def CheckArguments(self):
6850 # normalise the disk list
6851 self.op.disks = sorted(frozenset(self.op.disks))
6853 def ExpandNames(self):
6854 self._ExpandAndLockInstance()
6855 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6857 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6858 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6860 self.needed_locks[locking.LEVEL_NODE] = []
6862 def DeclareLocks(self, level):
6863 if level == locking.LEVEL_NODE:
6864 # if we replace the nodes, we only need to lock the old primary,
6865 # otherwise we need to lock all nodes for disk re-creation
6866 primary_only = bool(self.op.nodes)
6867 self._LockInstancesNodes(primary_only=primary_only)
6868 elif level == locking.LEVEL_NODE_RES:
6870 self.needed_locks[locking.LEVEL_NODE_RES] = \
6871 self.needed_locks[locking.LEVEL_NODE][:]
6873 def BuildHooksEnv(self):
6876 This runs on master, primary and secondary nodes of the instance.
6879 return _BuildInstanceHookEnvByObject(self, self.instance)
6881 def BuildHooksNodes(self):
6882 """Build hooks nodes.
6885 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6888 def CheckPrereq(self):
6889 """Check prerequisites.
6891 This checks that the instance is in the cluster and is not running.
6894 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6895 assert instance is not None, \
6896 "Cannot retrieve locked instance %s" % self.op.instance_name
6898 if len(self.op.nodes) != len(instance.all_nodes):
6899 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6900 " %d replacement nodes were specified" %
6901 (instance.name, len(instance.all_nodes),
6902 len(self.op.nodes)),
6904 assert instance.disk_template != constants.DT_DRBD8 or \
6905 len(self.op.nodes) == 2
6906 assert instance.disk_template != constants.DT_PLAIN or \
6907 len(self.op.nodes) == 1
6908 primary_node = self.op.nodes[0]
6910 primary_node = instance.primary_node
6911 _CheckNodeOnline(self, primary_node)
6913 if instance.disk_template == constants.DT_DISKLESS:
6914 raise errors.OpPrereqError("Instance '%s' has no disks" %
6915 self.op.instance_name, errors.ECODE_INVAL)
6916 # if we replace nodes *and* the old primary is offline, we don't
6918 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
6919 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
6920 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6921 if not (self.op.nodes and old_pnode.offline):
6922 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6923 msg="cannot recreate disks")
6925 if not self.op.disks:
6926 self.op.disks = range(len(instance.disks))
6928 for idx in self.op.disks:
6929 if idx >= len(instance.disks):
6930 raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6932 if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6933 raise errors.OpPrereqError("Can't recreate disks partially and"
6934 " change the nodes at the same time",
6936 self.instance = instance
6938 def Exec(self, feedback_fn):
6939 """Recreate the disks.
6942 instance = self.instance
6944 assert (self.owned_locks(locking.LEVEL_NODE) ==
6945 self.owned_locks(locking.LEVEL_NODE_RES))
6948 mods = [] # keeps track of needed logical_id changes
6950 for idx, disk in enumerate(instance.disks):
6951 if idx not in self.op.disks: # disk idx has not been passed in
6954 # update secondaries for disks, if needed
6956 if disk.dev_type == constants.LD_DRBD8:
6957 # need to update the nodes and minors
6958 assert len(self.op.nodes) == 2
6959 assert len(disk.logical_id) == 6 # otherwise disk internals
6961 (_, _, old_port, _, _, old_secret) = disk.logical_id
6962 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6963 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6964 new_minors[0], new_minors[1], old_secret)
6965 assert len(disk.logical_id) == len(new_id)
6966 mods.append((idx, new_id))
6968 # now that we have passed all asserts above, we can apply the mods
6969 # in a single run (to avoid partial changes)
6970 for idx, new_id in mods:
6971 instance.disks[idx].logical_id = new_id
6973 # change primary node, if needed
6975 instance.primary_node = self.op.nodes[0]
6976 self.LogWarning("Changing the instance's nodes, you will have to"
6977 " remove any disks left on the older nodes manually")
6980 self.cfg.Update(instance, feedback_fn)
6982 _CreateDisks(self, instance, to_skip=to_skip)
6985 class LUInstanceRename(LogicalUnit):
6986 """Rename an instance.
6989 HPATH = "instance-rename"
6990 HTYPE = constants.HTYPE_INSTANCE
6992 def CheckArguments(self):
6996 if self.op.ip_check and not self.op.name_check:
6997 # TODO: make the ip check more flexible and not depend on the name check
6998 raise errors.OpPrereqError("IP address check requires a name check",
7001 def BuildHooksEnv(self):
7004 This runs on master, primary and secondary nodes of the instance.
7007 env = _BuildInstanceHookEnvByObject(self, self.instance)
7008 env["INSTANCE_NEW_NAME"] = self.op.new_name
7011 def BuildHooksNodes(self):
7012 """Build hooks nodes.
7015 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7018 def CheckPrereq(self):
7019 """Check prerequisites.
7021 This checks that the instance is in the cluster and is not running.
7024 self.op.instance_name = _ExpandInstanceName(self.cfg,
7025 self.op.instance_name)
7026 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7027 assert instance is not None
7028 _CheckNodeOnline(self, instance.primary_node)
7029 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7030 msg="cannot rename")
7031 self.instance = instance
7033 new_name = self.op.new_name
7034 if self.op.name_check:
7035 hostname = netutils.GetHostname(name=new_name)
7036 if hostname.name != new_name:
7037 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7039 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7040 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7041 " same as given hostname '%s'") %
7042 (hostname.name, self.op.new_name),
7044 new_name = self.op.new_name = hostname.name
7045 if (self.op.ip_check and
7046 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7047 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7048 (hostname.ip, new_name),
7049 errors.ECODE_NOTUNIQUE)
7051 instance_list = self.cfg.GetInstanceList()
7052 if new_name in instance_list and new_name != instance.name:
7053 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7054 new_name, errors.ECODE_EXISTS)
7056 def Exec(self, feedback_fn):
7057 """Rename the instance.
7060 inst = self.instance
7061 old_name = inst.name
7063 rename_file_storage = False
7064 if (inst.disk_template in constants.DTS_FILEBASED and
7065 self.op.new_name != inst.name):
7066 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7067 rename_file_storage = True
7069 self.cfg.RenameInstance(inst.name, self.op.new_name)
7070 # Change the instance lock. This is definitely safe while we hold the BGL.
7071 # Otherwise the new lock would have to be added in acquired mode.
7073 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7074 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7076 # re-read the instance from the configuration after rename
7077 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7079 if rename_file_storage:
7080 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7081 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7082 old_file_storage_dir,
7083 new_file_storage_dir)
7084 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7085 " (but the instance has been renamed in Ganeti)" %
7086 (inst.primary_node, old_file_storage_dir,
7087 new_file_storage_dir))
7089 _StartInstanceDisks(self, inst, None)
7091 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7092 old_name, self.op.debug_level)
7093 msg = result.fail_msg
7095 msg = ("Could not run OS rename script for instance %s on node %s"
7096 " (but the instance has been renamed in Ganeti): %s" %
7097 (inst.name, inst.primary_node, msg))
7098 self.proc.LogWarning(msg)
7100 _ShutdownInstanceDisks(self, inst)
7105 class LUInstanceRemove(LogicalUnit):
7106 """Remove an instance.
7109 HPATH = "instance-remove"
7110 HTYPE = constants.HTYPE_INSTANCE
7113 def ExpandNames(self):
7114 self._ExpandAndLockInstance()
7115 self.needed_locks[locking.LEVEL_NODE] = []
7116 self.needed_locks[locking.LEVEL_NODE_RES] = []
7117 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7119 def DeclareLocks(self, level):
7120 if level == locking.LEVEL_NODE:
7121 self._LockInstancesNodes()
7122 elif level == locking.LEVEL_NODE_RES:
7124 self.needed_locks[locking.LEVEL_NODE_RES] = \
7125 self.needed_locks[locking.LEVEL_NODE][:]
7127 def BuildHooksEnv(self):
7130 This runs on master, primary and secondary nodes of the instance.
7133 env = _BuildInstanceHookEnvByObject(self, self.instance)
7134 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7137 def BuildHooksNodes(self):
7138 """Build hooks nodes.
7141 nl = [self.cfg.GetMasterNode()]
7142 nl_post = list(self.instance.all_nodes) + nl
7143 return (nl, nl_post)
7145 def CheckPrereq(self):
7146 """Check prerequisites.
7148 This checks that the instance is in the cluster.
7151 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7152 assert self.instance is not None, \
7153 "Cannot retrieve locked instance %s" % self.op.instance_name
7155 def Exec(self, feedback_fn):
7156 """Remove the instance.
7159 instance = self.instance
7160 logging.info("Shutting down instance %s on node %s",
7161 instance.name, instance.primary_node)
7163 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7164 self.op.shutdown_timeout)
7165 msg = result.fail_msg
7167 if self.op.ignore_failures:
7168 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7170 raise errors.OpExecError("Could not shutdown instance %s on"
7172 (instance.name, instance.primary_node, msg))
7174 assert (self.owned_locks(locking.LEVEL_NODE) ==
7175 self.owned_locks(locking.LEVEL_NODE_RES))
7176 assert not (set(instance.all_nodes) -
7177 self.owned_locks(locking.LEVEL_NODE)), \
7178 "Not owning correct locks"
7180 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7183 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7184 """Utility function to remove an instance.
7187 logging.info("Removing block devices for instance %s", instance.name)
7189 if not _RemoveDisks(lu, instance):
7190 if not ignore_failures:
7191 raise errors.OpExecError("Can't remove instance's disks")
7192 feedback_fn("Warning: can't remove instance's disks")
7194 logging.info("Removing instance %s out of cluster config", instance.name)
7196 lu.cfg.RemoveInstance(instance.name)
7198 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7199 "Instance lock removal conflict"
7201 # Remove lock for the instance
7202 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7205 class LUInstanceQuery(NoHooksLU):
7206 """Logical unit for querying instances.
7209 # pylint: disable=W0142
7212 def CheckArguments(self):
7213 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7214 self.op.output_fields, self.op.use_locking)
7216 def ExpandNames(self):
7217 self.iq.ExpandNames(self)
7219 def DeclareLocks(self, level):
7220 self.iq.DeclareLocks(self, level)
7222 def Exec(self, feedback_fn):
7223 return self.iq.OldStyleQuery(self)
7226 class LUInstanceFailover(LogicalUnit):
7227 """Failover an instance.
7230 HPATH = "instance-failover"
7231 HTYPE = constants.HTYPE_INSTANCE
7234 def CheckArguments(self):
7235 """Check the arguments.
7238 self.iallocator = getattr(self.op, "iallocator", None)
7239 self.target_node = getattr(self.op, "target_node", None)
7241 def ExpandNames(self):
7242 self._ExpandAndLockInstance()
7244 if self.op.target_node is not None:
7245 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7247 self.needed_locks[locking.LEVEL_NODE] = []
7248 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7250 ignore_consistency = self.op.ignore_consistency
7251 shutdown_timeout = self.op.shutdown_timeout
7252 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7255 ignore_consistency=ignore_consistency,
7256 shutdown_timeout=shutdown_timeout,
7257 ignore_ipolicy=self.op.ignore_ipolicy)
7258 self.tasklets = [self._migrater]
7260 def DeclareLocks(self, level):
7261 if level == locking.LEVEL_NODE:
7262 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7263 if instance.disk_template in constants.DTS_EXT_MIRROR:
7264 if self.op.target_node is None:
7265 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7267 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7268 self.op.target_node]
7269 del self.recalculate_locks[locking.LEVEL_NODE]
7271 self._LockInstancesNodes()
7273 def BuildHooksEnv(self):
7276 This runs on master, primary and secondary nodes of the instance.
7279 instance = self._migrater.instance
7280 source_node = instance.primary_node
7281 target_node = self.op.target_node
7283 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7284 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7285 "OLD_PRIMARY": source_node,
7286 "NEW_PRIMARY": target_node,
7289 if instance.disk_template in constants.DTS_INT_MIRROR:
7290 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7291 env["NEW_SECONDARY"] = source_node
7293 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7295 env.update(_BuildInstanceHookEnvByObject(self, instance))
7299 def BuildHooksNodes(self):
7300 """Build hooks nodes.
7303 instance = self._migrater.instance
7304 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7305 return (nl, nl + [instance.primary_node])
7308 class LUInstanceMigrate(LogicalUnit):
7309 """Migrate an instance.
7311 This is migration without shutting down, compared to the failover,
7312 which is done with shutdown.
7315 HPATH = "instance-migrate"
7316 HTYPE = constants.HTYPE_INSTANCE
7319 def ExpandNames(self):
7320 self._ExpandAndLockInstance()
7322 if self.op.target_node is not None:
7323 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7325 self.needed_locks[locking.LEVEL_NODE] = []
7326 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7328 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7329 cleanup=self.op.cleanup,
7331 fallback=self.op.allow_failover,
7332 ignore_ipolicy=self.op.ignore_ipolicy)
7333 self.tasklets = [self._migrater]
7335 def DeclareLocks(self, level):
7336 if level == locking.LEVEL_NODE:
7337 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7338 if instance.disk_template in constants.DTS_EXT_MIRROR:
7339 if self.op.target_node is None:
7340 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7342 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7343 self.op.target_node]
7344 del self.recalculate_locks[locking.LEVEL_NODE]
7346 self._LockInstancesNodes()
7348 def BuildHooksEnv(self):
7351 This runs on master, primary and secondary nodes of the instance.
7354 instance = self._migrater.instance
7355 source_node = instance.primary_node
7356 target_node = self.op.target_node
7357 env = _BuildInstanceHookEnvByObject(self, instance)
7359 "MIGRATE_LIVE": self._migrater.live,
7360 "MIGRATE_CLEANUP": self.op.cleanup,
7361 "OLD_PRIMARY": source_node,
7362 "NEW_PRIMARY": target_node,
7365 if instance.disk_template in constants.DTS_INT_MIRROR:
7366 env["OLD_SECONDARY"] = target_node
7367 env["NEW_SECONDARY"] = source_node
7369 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7373 def BuildHooksNodes(self):
7374 """Build hooks nodes.
7377 instance = self._migrater.instance
7378 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7379 return (nl, nl + [instance.primary_node])
7382 class LUInstanceMove(LogicalUnit):
7383 """Move an instance by data-copying.
7386 HPATH = "instance-move"
7387 HTYPE = constants.HTYPE_INSTANCE
7390 def ExpandNames(self):
7391 self._ExpandAndLockInstance()
7392 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7393 self.op.target_node = target_node
7394 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7395 self.needed_locks[locking.LEVEL_NODE_RES] = []
7396 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7398 def DeclareLocks(self, level):
7399 if level == locking.LEVEL_NODE:
7400 self._LockInstancesNodes(primary_only=True)
7401 elif level == locking.LEVEL_NODE_RES:
7403 self.needed_locks[locking.LEVEL_NODE_RES] = \
7404 self.needed_locks[locking.LEVEL_NODE][:]
7406 def BuildHooksEnv(self):
7409 This runs on master, primary and secondary nodes of the instance.
7413 "TARGET_NODE": self.op.target_node,
7414 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7416 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7419 def BuildHooksNodes(self):
7420 """Build hooks nodes.
7424 self.cfg.GetMasterNode(),
7425 self.instance.primary_node,
7426 self.op.target_node,
7430 def CheckPrereq(self):
7431 """Check prerequisites.
7433 This checks that the instance is in the cluster.
7436 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7437 assert self.instance is not None, \
7438 "Cannot retrieve locked instance %s" % self.op.instance_name
7440 node = self.cfg.GetNodeInfo(self.op.target_node)
7441 assert node is not None, \
7442 "Cannot retrieve locked node %s" % self.op.target_node
7444 self.target_node = target_node = node.name
7446 if target_node == instance.primary_node:
7447 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7448 (instance.name, target_node),
7451 bep = self.cfg.GetClusterInfo().FillBE(instance)
7453 for idx, dsk in enumerate(instance.disks):
7454 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7455 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7456 " cannot copy" % idx, errors.ECODE_STATE)
7458 _CheckNodeOnline(self, target_node)
7459 _CheckNodeNotDrained(self, target_node)
7460 _CheckNodeVmCapable(self, target_node)
7461 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7462 self.cfg.GetNodeGroup(node.group))
7463 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7464 ignore=self.op.ignore_ipolicy)
7466 if instance.admin_state == constants.ADMINST_UP:
7467 # check memory requirements on the secondary node
7468 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7469 instance.name, bep[constants.BE_MAXMEM],
7470 instance.hypervisor)
7472 self.LogInfo("Not checking memory on the secondary node as"
7473 " instance will not be started")
7475 # check bridge existance
7476 _CheckInstanceBridgesExist(self, instance, node=target_node)
7478 def Exec(self, feedback_fn):
7479 """Move an instance.
7481 The move is done by shutting it down on its present node, copying
7482 the data over (slow) and starting it on the new node.
7485 instance = self.instance
7487 source_node = instance.primary_node
7488 target_node = self.target_node
7490 self.LogInfo("Shutting down instance %s on source node %s",
7491 instance.name, source_node)
7493 assert (self.owned_locks(locking.LEVEL_NODE) ==
7494 self.owned_locks(locking.LEVEL_NODE_RES))
7496 result = self.rpc.call_instance_shutdown(source_node, instance,
7497 self.op.shutdown_timeout)
7498 msg = result.fail_msg
7500 if self.op.ignore_consistency:
7501 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7502 " Proceeding anyway. Please make sure node"
7503 " %s is down. Error details: %s",
7504 instance.name, source_node, source_node, msg)
7506 raise errors.OpExecError("Could not shutdown instance %s on"
7508 (instance.name, source_node, msg))
7510 # create the target disks
7512 _CreateDisks(self, instance, target_node=target_node)
7513 except errors.OpExecError:
7514 self.LogWarning("Device creation failed, reverting...")
7516 _RemoveDisks(self, instance, target_node=target_node)
7518 self.cfg.ReleaseDRBDMinors(instance.name)
7521 cluster_name = self.cfg.GetClusterInfo().cluster_name
7524 # activate, get path, copy the data over
7525 for idx, disk in enumerate(instance.disks):
7526 self.LogInfo("Copying data for disk %d", idx)
7527 result = self.rpc.call_blockdev_assemble(target_node, disk,
7528 instance.name, True, idx)
7530 self.LogWarning("Can't assemble newly created disk %d: %s",
7531 idx, result.fail_msg)
7532 errs.append(result.fail_msg)
7534 dev_path = result.payload
7535 result = self.rpc.call_blockdev_export(source_node, disk,
7536 target_node, dev_path,
7539 self.LogWarning("Can't copy data over for disk %d: %s",
7540 idx, result.fail_msg)
7541 errs.append(result.fail_msg)
7545 self.LogWarning("Some disks failed to copy, aborting")
7547 _RemoveDisks(self, instance, target_node=target_node)
7549 self.cfg.ReleaseDRBDMinors(instance.name)
7550 raise errors.OpExecError("Errors during disk copy: %s" %
7553 instance.primary_node = target_node
7554 self.cfg.Update(instance, feedback_fn)
7556 self.LogInfo("Removing the disks on the original node")
7557 _RemoveDisks(self, instance, target_node=source_node)
7559 # Only start the instance if it's marked as up
7560 if instance.admin_state == constants.ADMINST_UP:
7561 self.LogInfo("Starting instance %s on node %s",
7562 instance.name, target_node)
7564 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7565 ignore_secondaries=True)
7567 _ShutdownInstanceDisks(self, instance)
7568 raise errors.OpExecError("Can't activate the instance's disks")
7570 result = self.rpc.call_instance_start(target_node,
7571 (instance, None, None), False)
7572 msg = result.fail_msg
7574 _ShutdownInstanceDisks(self, instance)
7575 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7576 (instance.name, target_node, msg))
7579 class LUNodeMigrate(LogicalUnit):
7580 """Migrate all instances from a node.
7583 HPATH = "node-migrate"
7584 HTYPE = constants.HTYPE_NODE
7587 def CheckArguments(self):
7590 def ExpandNames(self):
7591 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7593 self.share_locks = _ShareAll()
7594 self.needed_locks = {
7595 locking.LEVEL_NODE: [self.op.node_name],
7598 def BuildHooksEnv(self):
7601 This runs on the master, the primary and all the secondaries.
7605 "NODE_NAME": self.op.node_name,
7608 def BuildHooksNodes(self):
7609 """Build hooks nodes.
7612 nl = [self.cfg.GetMasterNode()]
7615 def CheckPrereq(self):
7618 def Exec(self, feedback_fn):
7619 # Prepare jobs for migration instances
7621 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7624 iallocator=self.op.iallocator,
7625 target_node=self.op.target_node,
7626 ignore_ipolicy=self.op.ignore_ipolicy)]
7627 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7630 # TODO: Run iallocator in this opcode and pass correct placement options to
7631 # OpInstanceMigrate. Since other jobs can modify the cluster between
7632 # running the iallocator and the actual migration, a good consistency model
7633 # will have to be found.
7635 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7636 frozenset([self.op.node_name]))
7638 return ResultWithJobs(jobs)
7641 class TLMigrateInstance(Tasklet):
7642 """Tasklet class for instance migration.
7645 @ivar live: whether the migration will be done live or non-live;
7646 this variable is initalized only after CheckPrereq has run
7647 @type cleanup: boolean
7648 @ivar cleanup: Wheater we cleanup from a failed migration
7649 @type iallocator: string
7650 @ivar iallocator: The iallocator used to determine target_node
7651 @type target_node: string
7652 @ivar target_node: If given, the target_node to reallocate the instance to
7653 @type failover: boolean
7654 @ivar failover: Whether operation results in failover or migration
7655 @type fallback: boolean
7656 @ivar fallback: Whether fallback to failover is allowed if migration not
7658 @type ignore_consistency: boolean
7659 @ivar ignore_consistency: Wheter we should ignore consistency between source
7661 @type shutdown_timeout: int
7662 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7663 @type ignore_ipolicy: bool
7664 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7669 _MIGRATION_POLL_INTERVAL = 1 # seconds
7670 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7672 def __init__(self, lu, instance_name, cleanup=False,
7673 failover=False, fallback=False,
7674 ignore_consistency=False,
7675 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7676 ignore_ipolicy=False):
7677 """Initializes this class.
7680 Tasklet.__init__(self, lu)
7683 self.instance_name = instance_name
7684 self.cleanup = cleanup
7685 self.live = False # will be overridden later
7686 self.failover = failover
7687 self.fallback = fallback
7688 self.ignore_consistency = ignore_consistency
7689 self.shutdown_timeout = shutdown_timeout
7690 self.ignore_ipolicy = ignore_ipolicy
7692 def CheckPrereq(self):
7693 """Check prerequisites.
7695 This checks that the instance is in the cluster.
7698 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7699 instance = self.cfg.GetInstanceInfo(instance_name)
7700 assert instance is not None
7701 self.instance = instance
7702 cluster = self.cfg.GetClusterInfo()
7704 if (not self.cleanup and
7705 not instance.admin_state == constants.ADMINST_UP and
7706 not self.failover and self.fallback):
7707 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7708 " switching to failover")
7709 self.failover = True
7711 if instance.disk_template not in constants.DTS_MIRRORED:
7716 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7717 " %s" % (instance.disk_template, text),
7720 if instance.disk_template in constants.DTS_EXT_MIRROR:
7721 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7723 if self.lu.op.iallocator:
7724 self._RunAllocator()
7726 # We set set self.target_node as it is required by
7728 self.target_node = self.lu.op.target_node
7730 # Check that the target node is correct in terms of instance policy
7731 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
7732 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7733 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7734 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7735 ignore=self.ignore_ipolicy)
7737 # self.target_node is already populated, either directly or by the
7739 target_node = self.target_node
7740 if self.target_node == instance.primary_node:
7741 raise errors.OpPrereqError("Cannot migrate instance %s"
7742 " to its primary (%s)" %
7743 (instance.name, instance.primary_node))
7745 if len(self.lu.tasklets) == 1:
7746 # It is safe to release locks only when we're the only tasklet
7748 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7749 keep=[instance.primary_node, self.target_node])
7752 secondary_nodes = instance.secondary_nodes
7753 if not secondary_nodes:
7754 raise errors.ConfigurationError("No secondary node but using"
7755 " %s disk template" %
7756 instance.disk_template)
7757 target_node = secondary_nodes[0]
7758 if self.lu.op.iallocator or (self.lu.op.target_node and
7759 self.lu.op.target_node != target_node):
7761 text = "failed over"
7764 raise errors.OpPrereqError("Instances with disk template %s cannot"
7765 " be %s to arbitrary nodes"
7766 " (neither an iallocator nor a target"
7767 " node can be passed)" %
7768 (instance.disk_template, text),
7770 nodeinfo = self.cfg.GetNodeInfo(target_node)
7771 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7772 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7773 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7774 ignore=self.ignore_ipolicy)
7776 i_be = cluster.FillBE(instance)
7778 # check memory requirements on the secondary node
7779 if not self.failover or instance.admin_state == constants.ADMINST_UP:
7780 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7781 instance.name, i_be[constants.BE_MAXMEM],
7782 instance.hypervisor)
7784 self.lu.LogInfo("Not checking memory on the secondary node as"
7785 " instance will not be started")
7787 # check if failover must be forced instead of migration
7788 if (not self.cleanup and not self.failover and
7789 i_be[constants.BE_ALWAYS_FAILOVER]):
7791 self.lu.LogInfo("Instance configured to always failover; fallback"
7793 self.failover = True
7795 raise errors.OpPrereqError("This instance has been configured to"
7796 " always failover, please allow failover",
7799 # check bridge existance
7800 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7802 if not self.cleanup:
7803 _CheckNodeNotDrained(self.lu, target_node)
7804 if not self.failover:
7805 result = self.rpc.call_instance_migratable(instance.primary_node,
7807 if result.fail_msg and self.fallback:
7808 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7810 self.failover = True
7812 result.Raise("Can't migrate, please use failover",
7813 prereq=True, ecode=errors.ECODE_STATE)
7815 assert not (self.failover and self.cleanup)
7817 if not self.failover:
7818 if self.lu.op.live is not None and self.lu.op.mode is not None:
7819 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7820 " parameters are accepted",
7822 if self.lu.op.live is not None:
7824 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7826 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7827 # reset the 'live' parameter to None so that repeated
7828 # invocations of CheckPrereq do not raise an exception
7829 self.lu.op.live = None
7830 elif self.lu.op.mode is None:
7831 # read the default value from the hypervisor
7832 i_hv = cluster.FillHV(self.instance, skip_globals=False)
7833 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7835 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7837 # Failover is never live
7840 def _RunAllocator(self):
7841 """Run the allocator based on input opcode.
7844 # FIXME: add a self.ignore_ipolicy option
7845 ial = IAllocator(self.cfg, self.rpc,
7846 mode=constants.IALLOCATOR_MODE_RELOC,
7847 name=self.instance_name,
7848 # TODO See why hail breaks with a single node below
7849 relocate_from=[self.instance.primary_node,
7850 self.instance.primary_node],
7853 ial.Run(self.lu.op.iallocator)
7856 raise errors.OpPrereqError("Can't compute nodes using"
7857 " iallocator '%s': %s" %
7858 (self.lu.op.iallocator, ial.info),
7860 if len(ial.result) != ial.required_nodes:
7861 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7862 " of nodes (%s), required %s" %
7863 (self.lu.op.iallocator, len(ial.result),
7864 ial.required_nodes), errors.ECODE_FAULT)
7865 self.target_node = ial.result[0]
7866 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7867 self.instance_name, self.lu.op.iallocator,
7868 utils.CommaJoin(ial.result))
7870 def _WaitUntilSync(self):
7871 """Poll with custom rpc for disk sync.
7873 This uses our own step-based rpc call.
7876 self.feedback_fn("* wait until resync is done")
7880 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7882 self.instance.disks)
7884 for node, nres in result.items():
7885 nres.Raise("Cannot resync disks on node %s" % node)
7886 node_done, node_percent = nres.payload
7887 all_done = all_done and node_done
7888 if node_percent is not None:
7889 min_percent = min(min_percent, node_percent)
7891 if min_percent < 100:
7892 self.feedback_fn(" - progress: %.1f%%" % min_percent)
7895 def _EnsureSecondary(self, node):
7896 """Demote a node to secondary.
7899 self.feedback_fn("* switching node %s to secondary mode" % node)
7901 for dev in self.instance.disks:
7902 self.cfg.SetDiskID(dev, node)
7904 result = self.rpc.call_blockdev_close(node, self.instance.name,
7905 self.instance.disks)
7906 result.Raise("Cannot change disk to secondary on node %s" % node)
7908 def _GoStandalone(self):
7909 """Disconnect from the network.
7912 self.feedback_fn("* changing into standalone mode")
7913 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7914 self.instance.disks)
7915 for node, nres in result.items():
7916 nres.Raise("Cannot disconnect disks node %s" % node)
7918 def _GoReconnect(self, multimaster):
7919 """Reconnect to the network.
7925 msg = "single-master"
7926 self.feedback_fn("* changing disks into %s mode" % msg)
7927 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7928 self.instance.disks,
7929 self.instance.name, multimaster)
7930 for node, nres in result.items():
7931 nres.Raise("Cannot change disks config on node %s" % node)
7933 def _ExecCleanup(self):
7934 """Try to cleanup after a failed migration.
7936 The cleanup is done by:
7937 - check that the instance is running only on one node
7938 (and update the config if needed)
7939 - change disks on its secondary node to secondary
7940 - wait until disks are fully synchronized
7941 - disconnect from the network
7942 - change disks into single-master mode
7943 - wait again until disks are fully synchronized
7946 instance = self.instance
7947 target_node = self.target_node
7948 source_node = self.source_node
7950 # check running on only one node
7951 self.feedback_fn("* checking where the instance actually runs"
7952 " (if this hangs, the hypervisor might be in"
7954 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7955 for node, result in ins_l.items():
7956 result.Raise("Can't contact node %s" % node)
7958 runningon_source = instance.name in ins_l[source_node].payload
7959 runningon_target = instance.name in ins_l[target_node].payload
7961 if runningon_source and runningon_target:
7962 raise errors.OpExecError("Instance seems to be running on two nodes,"
7963 " or the hypervisor is confused; you will have"
7964 " to ensure manually that it runs only on one"
7965 " and restart this operation")
7967 if not (runningon_source or runningon_target):
7968 raise errors.OpExecError("Instance does not seem to be running at all;"
7969 " in this case it's safer to repair by"
7970 " running 'gnt-instance stop' to ensure disk"
7971 " shutdown, and then restarting it")
7973 if runningon_target:
7974 # the migration has actually succeeded, we need to update the config
7975 self.feedback_fn("* instance running on secondary node (%s),"
7976 " updating config" % target_node)
7977 instance.primary_node = target_node
7978 self.cfg.Update(instance, self.feedback_fn)
7979 demoted_node = source_node
7981 self.feedback_fn("* instance confirmed to be running on its"
7982 " primary node (%s)" % source_node)
7983 demoted_node = target_node
7985 if instance.disk_template in constants.DTS_INT_MIRROR:
7986 self._EnsureSecondary(demoted_node)
7988 self._WaitUntilSync()
7989 except errors.OpExecError:
7990 # we ignore here errors, since if the device is standalone, it
7991 # won't be able to sync
7993 self._GoStandalone()
7994 self._GoReconnect(False)
7995 self._WaitUntilSync()
7997 self.feedback_fn("* done")
7999 def _RevertDiskStatus(self):
8000 """Try to revert the disk status after a failed migration.
8003 target_node = self.target_node
8004 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8008 self._EnsureSecondary(target_node)
8009 self._GoStandalone()
8010 self._GoReconnect(False)
8011 self._WaitUntilSync()
8012 except errors.OpExecError, err:
8013 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8014 " please try to recover the instance manually;"
8015 " error '%s'" % str(err))
8017 def _AbortMigration(self):
8018 """Call the hypervisor code to abort a started migration.
8021 instance = self.instance
8022 target_node = self.target_node
8023 source_node = self.source_node
8024 migration_info = self.migration_info
8026 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8030 abort_msg = abort_result.fail_msg
8032 logging.error("Aborting migration failed on target node %s: %s",
8033 target_node, abort_msg)
8034 # Don't raise an exception here, as we stil have to try to revert the
8035 # disk status, even if this step failed.
8037 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8038 instance, False, self.live)
8039 abort_msg = abort_result.fail_msg
8041 logging.error("Aborting migration failed on source node %s: %s",
8042 source_node, abort_msg)
8044 def _ExecMigration(self):
8045 """Migrate an instance.
8047 The migrate is done by:
8048 - change the disks into dual-master mode
8049 - wait until disks are fully synchronized again
8050 - migrate the instance
8051 - change disks on the new secondary node (the old primary) to secondary
8052 - wait until disks are fully synchronized
8053 - change disks into single-master mode
8056 instance = self.instance
8057 target_node = self.target_node
8058 source_node = self.source_node
8060 # Check for hypervisor version mismatch and warn the user.
8061 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8062 None, [self.instance.hypervisor])
8063 for ninfo in nodeinfo.values():
8064 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8066 (_, _, (src_info, )) = nodeinfo[source_node].payload
8067 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8069 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8070 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8071 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8072 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8073 if src_version != dst_version:
8074 self.feedback_fn("* warning: hypervisor version mismatch between"
8075 " source (%s) and target (%s) node" %
8076 (src_version, dst_version))
8078 self.feedback_fn("* checking disk consistency between source and target")
8079 for dev in instance.disks:
8080 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8081 raise errors.OpExecError("Disk %s is degraded or not fully"
8082 " synchronized on target node,"
8083 " aborting migration" % dev.iv_name)
8085 # First get the migration information from the remote node
8086 result = self.rpc.call_migration_info(source_node, instance)
8087 msg = result.fail_msg
8089 log_err = ("Failed fetching source migration information from %s: %s" %
8091 logging.error(log_err)
8092 raise errors.OpExecError(log_err)
8094 self.migration_info = migration_info = result.payload
8096 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8097 # Then switch the disks to master/master mode
8098 self._EnsureSecondary(target_node)
8099 self._GoStandalone()
8100 self._GoReconnect(True)
8101 self._WaitUntilSync()
8103 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8104 result = self.rpc.call_accept_instance(target_node,
8107 self.nodes_ip[target_node])
8109 msg = result.fail_msg
8111 logging.error("Instance pre-migration failed, trying to revert"
8112 " disk status: %s", msg)
8113 self.feedback_fn("Pre-migration failed, aborting")
8114 self._AbortMigration()
8115 self._RevertDiskStatus()
8116 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8117 (instance.name, msg))
8119 self.feedback_fn("* migrating instance to %s" % target_node)
8120 result = self.rpc.call_instance_migrate(source_node, instance,
8121 self.nodes_ip[target_node],
8123 msg = result.fail_msg
8125 logging.error("Instance migration failed, trying to revert"
8126 " disk status: %s", msg)
8127 self.feedback_fn("Migration failed, aborting")
8128 self._AbortMigration()
8129 self._RevertDiskStatus()
8130 raise errors.OpExecError("Could not migrate instance %s: %s" %
8131 (instance.name, msg))
8133 self.feedback_fn("* starting memory transfer")
8134 last_feedback = time.time()
8136 result = self.rpc.call_instance_get_migration_status(source_node,
8138 msg = result.fail_msg
8139 ms = result.payload # MigrationStatus instance
8140 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8141 logging.error("Instance migration failed, trying to revert"
8142 " disk status: %s", msg)
8143 self.feedback_fn("Migration failed, aborting")
8144 self._AbortMigration()
8145 self._RevertDiskStatus()
8146 raise errors.OpExecError("Could not migrate instance %s: %s" %
8147 (instance.name, msg))
8149 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8150 self.feedback_fn("* memory transfer complete")
8153 if (utils.TimeoutExpired(last_feedback,
8154 self._MIGRATION_FEEDBACK_INTERVAL) and
8155 ms.transferred_ram is not None):
8156 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8157 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8158 last_feedback = time.time()
8160 time.sleep(self._MIGRATION_POLL_INTERVAL)
8162 result = self.rpc.call_instance_finalize_migration_src(source_node,
8166 msg = result.fail_msg
8168 logging.error("Instance migration succeeded, but finalization failed"
8169 " on the source node: %s", msg)
8170 raise errors.OpExecError("Could not finalize instance migration: %s" %
8173 instance.primary_node = target_node
8175 # distribute new instance config to the other nodes
8176 self.cfg.Update(instance, self.feedback_fn)
8178 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8182 msg = result.fail_msg
8184 logging.error("Instance migration succeeded, but finalization failed"
8185 " on the target node: %s", msg)
8186 raise errors.OpExecError("Could not finalize instance migration: %s" %
8189 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8190 self._EnsureSecondary(source_node)
8191 self._WaitUntilSync()
8192 self._GoStandalone()
8193 self._GoReconnect(False)
8194 self._WaitUntilSync()
8196 self.feedback_fn("* done")
8198 def _ExecFailover(self):
8199 """Failover an instance.
8201 The failover is done by shutting it down on its present node and
8202 starting it on the secondary.
8205 instance = self.instance
8206 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8208 source_node = instance.primary_node
8209 target_node = self.target_node
8211 if instance.admin_state == constants.ADMINST_UP:
8212 self.feedback_fn("* checking disk consistency between source and target")
8213 for dev in instance.disks:
8214 # for drbd, these are drbd over lvm
8215 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8216 if primary_node.offline:
8217 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8219 (primary_node.name, dev.iv_name, target_node))
8220 elif not self.ignore_consistency:
8221 raise errors.OpExecError("Disk %s is degraded on target node,"
8222 " aborting failover" % dev.iv_name)
8224 self.feedback_fn("* not checking disk consistency as instance is not"
8227 self.feedback_fn("* shutting down instance on source node")
8228 logging.info("Shutting down instance %s on node %s",
8229 instance.name, source_node)
8231 result = self.rpc.call_instance_shutdown(source_node, instance,
8232 self.shutdown_timeout)
8233 msg = result.fail_msg
8235 if self.ignore_consistency or primary_node.offline:
8236 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8237 " proceeding anyway; please make sure node"
8238 " %s is down; error details: %s",
8239 instance.name, source_node, source_node, msg)
8241 raise errors.OpExecError("Could not shutdown instance %s on"
8243 (instance.name, source_node, msg))
8245 self.feedback_fn("* deactivating the instance's disks on source node")
8246 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8247 raise errors.OpExecError("Can't shut down the instance's disks")
8249 instance.primary_node = target_node
8250 # distribute new instance config to the other nodes
8251 self.cfg.Update(instance, self.feedback_fn)
8253 # Only start the instance if it's marked as up
8254 if instance.admin_state == constants.ADMINST_UP:
8255 self.feedback_fn("* activating the instance's disks on target node %s" %
8257 logging.info("Starting instance %s on node %s",
8258 instance.name, target_node)
8260 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8261 ignore_secondaries=True)
8263 _ShutdownInstanceDisks(self.lu, instance)
8264 raise errors.OpExecError("Can't activate the instance's disks")
8266 self.feedback_fn("* starting the instance on the target node %s" %
8268 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8270 msg = result.fail_msg
8272 _ShutdownInstanceDisks(self.lu, instance)
8273 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8274 (instance.name, target_node, msg))
8276 def Exec(self, feedback_fn):
8277 """Perform the migration.
8280 self.feedback_fn = feedback_fn
8281 self.source_node = self.instance.primary_node
8283 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8284 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8285 self.target_node = self.instance.secondary_nodes[0]
8286 # Otherwise self.target_node has been populated either
8287 # directly, or through an iallocator.
8289 self.all_nodes = [self.source_node, self.target_node]
8290 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8291 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8294 feedback_fn("Failover instance %s" % self.instance.name)
8295 self._ExecFailover()
8297 feedback_fn("Migrating instance %s" % self.instance.name)
8300 return self._ExecCleanup()
8302 return self._ExecMigration()
8305 def _CreateBlockDev(lu, node, instance, device, force_create,
8307 """Create a tree of block devices on a given node.
8309 If this device type has to be created on secondaries, create it and
8312 If not, just recurse to children keeping the same 'force' value.
8314 @param lu: the lu on whose behalf we execute
8315 @param node: the node on which to create the device
8316 @type instance: L{objects.Instance}
8317 @param instance: the instance which owns the device
8318 @type device: L{objects.Disk}
8319 @param device: the device to create
8320 @type force_create: boolean
8321 @param force_create: whether to force creation of this device; this
8322 will be change to True whenever we find a device which has
8323 CreateOnSecondary() attribute
8324 @param info: the extra 'metadata' we should attach to the device
8325 (this will be represented as a LVM tag)
8326 @type force_open: boolean
8327 @param force_open: this parameter will be passes to the
8328 L{backend.BlockdevCreate} function where it specifies
8329 whether we run on primary or not, and it affects both
8330 the child assembly and the device own Open() execution
8333 if device.CreateOnSecondary():
8337 for child in device.children:
8338 _CreateBlockDev(lu, node, instance, child, force_create,
8341 if not force_create:
8344 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8347 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8348 """Create a single block device on a given node.
8350 This will not recurse over children of the device, so they must be
8353 @param lu: the lu on whose behalf we execute
8354 @param node: the node on which to create the device
8355 @type instance: L{objects.Instance}
8356 @param instance: the instance which owns the device
8357 @type device: L{objects.Disk}
8358 @param device: the device to create
8359 @param info: the extra 'metadata' we should attach to the device
8360 (this will be represented as a LVM tag)
8361 @type force_open: boolean
8362 @param force_open: this parameter will be passes to the
8363 L{backend.BlockdevCreate} function where it specifies
8364 whether we run on primary or not, and it affects both
8365 the child assembly and the device own Open() execution
8368 lu.cfg.SetDiskID(device, node)
8369 result = lu.rpc.call_blockdev_create(node, device, device.size,
8370 instance.name, force_open, info)
8371 result.Raise("Can't create block device %s on"
8372 " node %s for instance %s" % (device, node, instance.name))
8373 if device.physical_id is None:
8374 device.physical_id = result.payload
8377 def _GenerateUniqueNames(lu, exts):
8378 """Generate a suitable LV name.
8380 This will generate a logical volume name for the given instance.
8385 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8386 results.append("%s%s" % (new_id, val))
8390 def _ComputeLDParams(disk_template, disk_params):
8391 """Computes Logical Disk parameters from Disk Template parameters.
8393 @type disk_template: string
8394 @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8395 @type disk_params: dict
8396 @param disk_params: disk template parameters; dict(template_name -> parameters
8398 @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8399 contains the LD parameters of the node. The tree is flattened in-order.
8402 if disk_template not in constants.DISK_TEMPLATES:
8403 raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8406 dt_params = disk_params[disk_template]
8407 if disk_template == constants.DT_DRBD8:
8409 constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8410 constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8411 constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8412 constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8413 constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8414 constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8415 constants.LDP_DYNAMIC_RESYNC: dt_params[constants.DRBD_DYNAMIC_RESYNC],
8416 constants.LDP_PLAN_AHEAD: dt_params[constants.DRBD_PLAN_AHEAD],
8417 constants.LDP_FILL_TARGET: dt_params[constants.DRBD_FILL_TARGET],
8418 constants.LDP_DELAY_TARGET: dt_params[constants.DRBD_DELAY_TARGET],
8419 constants.LDP_MAX_RATE: dt_params[constants.DRBD_MAX_RATE],
8420 constants.LDP_MIN_RATE: dt_params[constants.DRBD_MIN_RATE],
8424 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8427 result.append(drbd_params)
8431 constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8434 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8436 result.append(data_params)
8440 constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8443 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8445 result.append(meta_params)
8447 elif (disk_template == constants.DT_FILE or
8448 disk_template == constants.DT_SHARED_FILE):
8449 result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8451 elif disk_template == constants.DT_PLAIN:
8453 constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8456 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8458 result.append(params)
8460 elif disk_template == constants.DT_BLOCK:
8461 result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8466 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8467 iv_name, p_minor, s_minor, drbd_params, data_params,
8469 """Generate a drbd8 device complete with its children.
8472 assert len(vgnames) == len(names) == 2
8473 port = lu.cfg.AllocatePort()
8474 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8476 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8477 logical_id=(vgnames[0], names[0]),
8479 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8480 logical_id=(vgnames[1], names[1]),
8482 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8483 logical_id=(primary, secondary, port,
8486 children=[dev_data, dev_meta],
8487 iv_name=iv_name, params=drbd_params)
8491 def _GenerateDiskTemplate(lu, template_name,
8492 instance_name, primary_node,
8493 secondary_nodes, disk_info,
8494 file_storage_dir, file_driver,
8495 base_index, feedback_fn, disk_params):
8496 """Generate the entire disk layout for a given template type.
8499 #TODO: compute space requirements
8501 vgname = lu.cfg.GetVGName()
8502 disk_count = len(disk_info)
8504 ld_params = _ComputeLDParams(template_name, disk_params)
8505 if template_name == constants.DT_DISKLESS:
8507 elif template_name == constants.DT_PLAIN:
8508 if len(secondary_nodes) != 0:
8509 raise errors.ProgrammerError("Wrong template configuration")
8511 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8512 for i in range(disk_count)])
8513 for idx, disk in enumerate(disk_info):
8514 disk_index = idx + base_index
8515 vg = disk.get(constants.IDISK_VG, vgname)
8516 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8517 disk_dev = objects.Disk(dev_type=constants.LD_LV,
8518 size=disk[constants.IDISK_SIZE],
8519 logical_id=(vg, names[idx]),
8520 iv_name="disk/%d" % disk_index,
8521 mode=disk[constants.IDISK_MODE],
8522 params=ld_params[0])
8523 disks.append(disk_dev)
8524 elif template_name == constants.DT_DRBD8:
8525 drbd_params, data_params, meta_params = ld_params
8526 if len(secondary_nodes) != 1:
8527 raise errors.ProgrammerError("Wrong template configuration")
8528 remote_node = secondary_nodes[0]
8529 minors = lu.cfg.AllocateDRBDMinor(
8530 [primary_node, remote_node] * len(disk_info), instance_name)
8533 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8534 for i in range(disk_count)]):
8535 names.append(lv_prefix + "_data")
8536 names.append(lv_prefix + "_meta")
8537 for idx, disk in enumerate(disk_info):
8538 disk_index = idx + base_index
8539 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8540 data_vg = disk.get(constants.IDISK_VG, vgname)
8541 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8542 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8543 disk[constants.IDISK_SIZE],
8545 names[idx * 2:idx * 2 + 2],
8546 "disk/%d" % disk_index,
8547 minors[idx * 2], minors[idx * 2 + 1],
8548 drbd_params, data_params, meta_params)
8549 disk_dev.mode = disk[constants.IDISK_MODE]
8550 disks.append(disk_dev)
8551 elif template_name == constants.DT_FILE:
8552 if len(secondary_nodes) != 0:
8553 raise errors.ProgrammerError("Wrong template configuration")
8555 opcodes.RequireFileStorage()
8557 for idx, disk in enumerate(disk_info):
8558 disk_index = idx + base_index
8559 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8560 size=disk[constants.IDISK_SIZE],
8561 iv_name="disk/%d" % disk_index,
8562 logical_id=(file_driver,
8563 "%s/disk%d" % (file_storage_dir,
8565 mode=disk[constants.IDISK_MODE],
8566 params=ld_params[0])
8567 disks.append(disk_dev)
8568 elif template_name == constants.DT_SHARED_FILE:
8569 if len(secondary_nodes) != 0:
8570 raise errors.ProgrammerError("Wrong template configuration")
8572 opcodes.RequireSharedFileStorage()
8574 for idx, disk in enumerate(disk_info):
8575 disk_index = idx + base_index
8576 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8577 size=disk[constants.IDISK_SIZE],
8578 iv_name="disk/%d" % disk_index,
8579 logical_id=(file_driver,
8580 "%s/disk%d" % (file_storage_dir,
8582 mode=disk[constants.IDISK_MODE],
8583 params=ld_params[0])
8584 disks.append(disk_dev)
8585 elif template_name == constants.DT_BLOCK:
8586 if len(secondary_nodes) != 0:
8587 raise errors.ProgrammerError("Wrong template configuration")
8589 for idx, disk in enumerate(disk_info):
8590 disk_index = idx + base_index
8591 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8592 size=disk[constants.IDISK_SIZE],
8593 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8594 disk[constants.IDISK_ADOPT]),
8595 iv_name="disk/%d" % disk_index,
8596 mode=disk[constants.IDISK_MODE],
8597 params=ld_params[0])
8598 disks.append(disk_dev)
8601 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8605 def _GetInstanceInfoText(instance):
8606 """Compute that text that should be added to the disk's metadata.
8609 return "originstname+%s" % instance.name
8612 def _CalcEta(time_taken, written, total_size):
8613 """Calculates the ETA based on size written and total size.
8615 @param time_taken: The time taken so far
8616 @param written: amount written so far
8617 @param total_size: The total size of data to be written
8618 @return: The remaining time in seconds
8621 avg_time = time_taken / float(written)
8622 return (total_size - written) * avg_time
8625 def _WipeDisks(lu, instance):
8626 """Wipes instance disks.
8628 @type lu: L{LogicalUnit}
8629 @param lu: the logical unit on whose behalf we execute
8630 @type instance: L{objects.Instance}
8631 @param instance: the instance whose disks we should create
8632 @return: the success of the wipe
8635 node = instance.primary_node
8637 for device in instance.disks:
8638 lu.cfg.SetDiskID(device, node)
8640 logging.info("Pause sync of instance %s disks", instance.name)
8641 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8643 for idx, success in enumerate(result.payload):
8645 logging.warn("pause-sync of instance %s for disks %d failed",
8649 for idx, device in enumerate(instance.disks):
8650 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8651 # MAX_WIPE_CHUNK at max
8652 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8653 constants.MIN_WIPE_CHUNK_PERCENT)
8654 # we _must_ make this an int, otherwise rounding errors will
8656 wipe_chunk_size = int(wipe_chunk_size)
8658 lu.LogInfo("* Wiping disk %d", idx)
8659 logging.info("Wiping disk %d for instance %s, node %s using"
8660 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8665 start_time = time.time()
8667 while offset < size:
8668 wipe_size = min(wipe_chunk_size, size - offset)
8669 logging.debug("Wiping disk %d, offset %s, chunk %s",
8670 idx, offset, wipe_size)
8671 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8672 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8673 (idx, offset, wipe_size))
8676 if now - last_output >= 60:
8677 eta = _CalcEta(now - start_time, offset, size)
8678 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8679 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8682 logging.info("Resume sync of instance %s disks", instance.name)
8684 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8686 for idx, success in enumerate(result.payload):
8688 lu.LogWarning("Resume sync of disk %d failed, please have a"
8689 " look at the status and troubleshoot the issue", idx)
8690 logging.warn("resume-sync of instance %s for disks %d failed",
8694 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8695 """Create all disks for an instance.
8697 This abstracts away some work from AddInstance.
8699 @type lu: L{LogicalUnit}
8700 @param lu: the logical unit on whose behalf we execute
8701 @type instance: L{objects.Instance}
8702 @param instance: the instance whose disks we should create
8704 @param to_skip: list of indices to skip
8705 @type target_node: string
8706 @param target_node: if passed, overrides the target node for creation
8708 @return: the success of the creation
8711 info = _GetInstanceInfoText(instance)
8712 if target_node is None:
8713 pnode = instance.primary_node
8714 all_nodes = instance.all_nodes
8719 if instance.disk_template in constants.DTS_FILEBASED:
8720 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8721 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8723 result.Raise("Failed to create directory '%s' on"
8724 " node %s" % (file_storage_dir, pnode))
8726 # Note: this needs to be kept in sync with adding of disks in
8727 # LUInstanceSetParams
8728 for idx, device in enumerate(instance.disks):
8729 if to_skip and idx in to_skip:
8731 logging.info("Creating volume %s for instance %s",
8732 device.iv_name, instance.name)
8734 for node in all_nodes:
8735 f_create = node == pnode
8736 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8739 def _RemoveDisks(lu, instance, target_node=None):
8740 """Remove all disks for an instance.
8742 This abstracts away some work from `AddInstance()` and
8743 `RemoveInstance()`. Note that in case some of the devices couldn't
8744 be removed, the removal will continue with the other ones (compare
8745 with `_CreateDisks()`).
8747 @type lu: L{LogicalUnit}
8748 @param lu: the logical unit on whose behalf we execute
8749 @type instance: L{objects.Instance}
8750 @param instance: the instance whose disks we should remove
8751 @type target_node: string
8752 @param target_node: used to override the node on which to remove the disks
8754 @return: the success of the removal
8757 logging.info("Removing block devices for instance %s", instance.name)
8760 for device in instance.disks:
8762 edata = [(target_node, device)]
8764 edata = device.ComputeNodeTree(instance.primary_node)
8765 for node, disk in edata:
8766 lu.cfg.SetDiskID(disk, node)
8767 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8769 lu.LogWarning("Could not remove block device %s on node %s,"
8770 " continuing anyway: %s", device.iv_name, node, msg)
8773 # if this is a DRBD disk, return its port to the pool
8774 if device.dev_type in constants.LDS_DRBD:
8775 tcp_port = device.logical_id[2]
8776 lu.cfg.AddTcpUdpPort(tcp_port)
8778 if instance.disk_template == constants.DT_FILE:
8779 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8783 tgt = instance.primary_node
8784 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8786 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8787 file_storage_dir, instance.primary_node, result.fail_msg)
8793 def _ComputeDiskSizePerVG(disk_template, disks):
8794 """Compute disk size requirements in the volume group
8797 def _compute(disks, payload):
8798 """Universal algorithm.
8803 vgs[disk[constants.IDISK_VG]] = \
8804 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8808 # Required free disk space as a function of disk and swap space
8810 constants.DT_DISKLESS: {},
8811 constants.DT_PLAIN: _compute(disks, 0),
8812 # 128 MB are added for drbd metadata for each disk
8813 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8814 constants.DT_FILE: {},
8815 constants.DT_SHARED_FILE: {},
8818 if disk_template not in req_size_dict:
8819 raise errors.ProgrammerError("Disk template '%s' size requirement"
8820 " is unknown" % disk_template)
8822 return req_size_dict[disk_template]
8825 def _ComputeDiskSize(disk_template, disks):
8826 """Compute disk size requirements in the volume group
8829 # Required free disk space as a function of disk and swap space
8831 constants.DT_DISKLESS: None,
8832 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8833 # 128 MB are added for drbd metadata for each disk
8835 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8836 constants.DT_FILE: None,
8837 constants.DT_SHARED_FILE: 0,
8838 constants.DT_BLOCK: 0,
8841 if disk_template not in req_size_dict:
8842 raise errors.ProgrammerError("Disk template '%s' size requirement"
8843 " is unknown" % disk_template)
8845 return req_size_dict[disk_template]
8848 def _FilterVmNodes(lu, nodenames):
8849 """Filters out non-vm_capable nodes from a list.
8851 @type lu: L{LogicalUnit}
8852 @param lu: the logical unit for which we check
8853 @type nodenames: list
8854 @param nodenames: the list of nodes on which we should check
8856 @return: the list of vm-capable nodes
8859 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8860 return [name for name in nodenames if name not in vm_nodes]
8863 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8864 """Hypervisor parameter validation.
8866 This function abstract the hypervisor parameter validation to be
8867 used in both instance create and instance modify.
8869 @type lu: L{LogicalUnit}
8870 @param lu: the logical unit for which we check
8871 @type nodenames: list
8872 @param nodenames: the list of nodes on which we should check
8873 @type hvname: string
8874 @param hvname: the name of the hypervisor we should use
8875 @type hvparams: dict
8876 @param hvparams: the parameters which we need to check
8877 @raise errors.OpPrereqError: if the parameters are not valid
8880 nodenames = _FilterVmNodes(lu, nodenames)
8882 cluster = lu.cfg.GetClusterInfo()
8883 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8885 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8886 for node in nodenames:
8890 info.Raise("Hypervisor parameter validation failed on node %s" % node)
8893 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8894 """OS parameters validation.
8896 @type lu: L{LogicalUnit}
8897 @param lu: the logical unit for which we check
8898 @type required: boolean
8899 @param required: whether the validation should fail if the OS is not
8901 @type nodenames: list
8902 @param nodenames: the list of nodes on which we should check
8903 @type osname: string
8904 @param osname: the name of the hypervisor we should use
8905 @type osparams: dict
8906 @param osparams: the parameters which we need to check
8907 @raise errors.OpPrereqError: if the parameters are not valid
8910 nodenames = _FilterVmNodes(lu, nodenames)
8911 result = lu.rpc.call_os_validate(nodenames, required, osname,
8912 [constants.OS_VALIDATE_PARAMETERS],
8914 for node, nres in result.items():
8915 # we don't check for offline cases since this should be run only
8916 # against the master node and/or an instance's nodes
8917 nres.Raise("OS Parameters validation failed on node %s" % node)
8918 if not nres.payload:
8919 lu.LogInfo("OS %s not found on node %s, validation skipped",
8923 class LUInstanceCreate(LogicalUnit):
8924 """Create an instance.
8927 HPATH = "instance-add"
8928 HTYPE = constants.HTYPE_INSTANCE
8931 def CheckArguments(self):
8935 # do not require name_check to ease forward/backward compatibility
8937 if self.op.no_install and self.op.start:
8938 self.LogInfo("No-installation mode selected, disabling startup")
8939 self.op.start = False
8940 # validate/normalize the instance name
8941 self.op.instance_name = \
8942 netutils.Hostname.GetNormalizedName(self.op.instance_name)
8944 if self.op.ip_check and not self.op.name_check:
8945 # TODO: make the ip check more flexible and not depend on the name check
8946 raise errors.OpPrereqError("Cannot do IP address check without a name"
8947 " check", errors.ECODE_INVAL)
8949 # check nics' parameter names
8950 for nic in self.op.nics:
8951 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8953 # check disks. parameter names and consistent adopt/no-adopt strategy
8954 has_adopt = has_no_adopt = False
8955 for disk in self.op.disks:
8956 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8957 if constants.IDISK_ADOPT in disk:
8961 if has_adopt and has_no_adopt:
8962 raise errors.OpPrereqError("Either all disks are adopted or none is",
8965 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8966 raise errors.OpPrereqError("Disk adoption is not supported for the"
8967 " '%s' disk template" %
8968 self.op.disk_template,
8970 if self.op.iallocator is not None:
8971 raise errors.OpPrereqError("Disk adoption not allowed with an"
8972 " iallocator script", errors.ECODE_INVAL)
8973 if self.op.mode == constants.INSTANCE_IMPORT:
8974 raise errors.OpPrereqError("Disk adoption not allowed for"
8975 " instance import", errors.ECODE_INVAL)
8977 if self.op.disk_template in constants.DTS_MUST_ADOPT:
8978 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8979 " but no 'adopt' parameter given" %
8980 self.op.disk_template,
8983 self.adopt_disks = has_adopt
8985 # instance name verification
8986 if self.op.name_check:
8987 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8988 self.op.instance_name = self.hostname1.name
8989 # used in CheckPrereq for ip ping check
8990 self.check_ip = self.hostname1.ip
8992 self.check_ip = None
8994 # file storage checks
8995 if (self.op.file_driver and
8996 not self.op.file_driver in constants.FILE_DRIVER):
8997 raise errors.OpPrereqError("Invalid file driver name '%s'" %
8998 self.op.file_driver, errors.ECODE_INVAL)
9000 if self.op.disk_template == constants.DT_FILE:
9001 opcodes.RequireFileStorage()
9002 elif self.op.disk_template == constants.DT_SHARED_FILE:
9003 opcodes.RequireSharedFileStorage()
9005 ### Node/iallocator related checks
9006 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9008 if self.op.pnode is not None:
9009 if self.op.disk_template in constants.DTS_INT_MIRROR:
9010 if self.op.snode is None:
9011 raise errors.OpPrereqError("The networked disk templates need"
9012 " a mirror node", errors.ECODE_INVAL)
9014 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9016 self.op.snode = None
9018 self._cds = _GetClusterDomainSecret()
9020 if self.op.mode == constants.INSTANCE_IMPORT:
9021 # On import force_variant must be True, because if we forced it at
9022 # initial install, our only chance when importing it back is that it
9024 self.op.force_variant = True
9026 if self.op.no_install:
9027 self.LogInfo("No-installation mode has no effect during import")
9029 elif self.op.mode == constants.INSTANCE_CREATE:
9030 if self.op.os_type is None:
9031 raise errors.OpPrereqError("No guest OS specified",
9033 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9034 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9035 " installation" % self.op.os_type,
9037 if self.op.disk_template is None:
9038 raise errors.OpPrereqError("No disk template specified",
9041 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9042 # Check handshake to ensure both clusters have the same domain secret
9043 src_handshake = self.op.source_handshake
9044 if not src_handshake:
9045 raise errors.OpPrereqError("Missing source handshake",
9048 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9051 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9054 # Load and check source CA
9055 self.source_x509_ca_pem = self.op.source_x509_ca
9056 if not self.source_x509_ca_pem:
9057 raise errors.OpPrereqError("Missing source X509 CA",
9061 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9063 except OpenSSL.crypto.Error, err:
9064 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9065 (err, ), errors.ECODE_INVAL)
9067 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9068 if errcode is not None:
9069 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9072 self.source_x509_ca = cert
9074 src_instance_name = self.op.source_instance_name
9075 if not src_instance_name:
9076 raise errors.OpPrereqError("Missing source instance name",
9079 self.source_instance_name = \
9080 netutils.GetHostname(name=src_instance_name).name
9083 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9084 self.op.mode, errors.ECODE_INVAL)
9086 def ExpandNames(self):
9087 """ExpandNames for CreateInstance.
9089 Figure out the right locks for instance creation.
9092 self.needed_locks = {}
9094 instance_name = self.op.instance_name
9095 # this is just a preventive check, but someone might still add this
9096 # instance in the meantime, and creation will fail at lock-add time
9097 if instance_name in self.cfg.GetInstanceList():
9098 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9099 instance_name, errors.ECODE_EXISTS)
9101 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9103 if self.op.iallocator:
9104 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9105 # specifying a group on instance creation and then selecting nodes from
9107 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9108 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9110 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9111 nodelist = [self.op.pnode]
9112 if self.op.snode is not None:
9113 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9114 nodelist.append(self.op.snode)
9115 self.needed_locks[locking.LEVEL_NODE] = nodelist
9116 # Lock resources of instance's primary and secondary nodes (copy to
9117 # prevent accidential modification)
9118 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9120 # in case of import lock the source node too
9121 if self.op.mode == constants.INSTANCE_IMPORT:
9122 src_node = self.op.src_node
9123 src_path = self.op.src_path
9125 if src_path is None:
9126 self.op.src_path = src_path = self.op.instance_name
9128 if src_node is None:
9129 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9130 self.op.src_node = None
9131 if os.path.isabs(src_path):
9132 raise errors.OpPrereqError("Importing an instance from a path"
9133 " requires a source node option",
9136 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9137 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9138 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9139 if not os.path.isabs(src_path):
9140 self.op.src_path = src_path = \
9141 utils.PathJoin(constants.EXPORT_DIR, src_path)
9143 def _RunAllocator(self):
9144 """Run the allocator based on input opcode.
9147 nics = [n.ToDict() for n in self.nics]
9148 ial = IAllocator(self.cfg, self.rpc,
9149 mode=constants.IALLOCATOR_MODE_ALLOC,
9150 name=self.op.instance_name,
9151 disk_template=self.op.disk_template,
9154 vcpus=self.be_full[constants.BE_VCPUS],
9155 memory=self.be_full[constants.BE_MAXMEM],
9158 hypervisor=self.op.hypervisor,
9161 ial.Run(self.op.iallocator)
9164 raise errors.OpPrereqError("Can't compute nodes using"
9165 " iallocator '%s': %s" %
9166 (self.op.iallocator, ial.info),
9168 if len(ial.result) != ial.required_nodes:
9169 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9170 " of nodes (%s), required %s" %
9171 (self.op.iallocator, len(ial.result),
9172 ial.required_nodes), errors.ECODE_FAULT)
9173 self.op.pnode = ial.result[0]
9174 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9175 self.op.instance_name, self.op.iallocator,
9176 utils.CommaJoin(ial.result))
9177 if ial.required_nodes == 2:
9178 self.op.snode = ial.result[1]
9180 def BuildHooksEnv(self):
9183 This runs on master, primary and secondary nodes of the instance.
9187 "ADD_MODE": self.op.mode,
9189 if self.op.mode == constants.INSTANCE_IMPORT:
9190 env["SRC_NODE"] = self.op.src_node
9191 env["SRC_PATH"] = self.op.src_path
9192 env["SRC_IMAGES"] = self.src_images
9194 env.update(_BuildInstanceHookEnv(
9195 name=self.op.instance_name,
9196 primary_node=self.op.pnode,
9197 secondary_nodes=self.secondaries,
9198 status=self.op.start,
9199 os_type=self.op.os_type,
9200 minmem=self.be_full[constants.BE_MINMEM],
9201 maxmem=self.be_full[constants.BE_MAXMEM],
9202 vcpus=self.be_full[constants.BE_VCPUS],
9203 nics=_NICListToTuple(self, self.nics),
9204 disk_template=self.op.disk_template,
9205 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9206 for d in self.disks],
9209 hypervisor_name=self.op.hypervisor,
9215 def BuildHooksNodes(self):
9216 """Build hooks nodes.
9219 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9222 def _ReadExportInfo(self):
9223 """Reads the export information from disk.
9225 It will override the opcode source node and path with the actual
9226 information, if these two were not specified before.
9228 @return: the export information
9231 assert self.op.mode == constants.INSTANCE_IMPORT
9233 src_node = self.op.src_node
9234 src_path = self.op.src_path
9236 if src_node is None:
9237 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9238 exp_list = self.rpc.call_export_list(locked_nodes)
9240 for node in exp_list:
9241 if exp_list[node].fail_msg:
9243 if src_path in exp_list[node].payload:
9245 self.op.src_node = src_node = node
9246 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9250 raise errors.OpPrereqError("No export found for relative path %s" %
9251 src_path, errors.ECODE_INVAL)
9253 _CheckNodeOnline(self, src_node)
9254 result = self.rpc.call_export_info(src_node, src_path)
9255 result.Raise("No export or invalid export found in dir %s" % src_path)
9257 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9258 if not export_info.has_section(constants.INISECT_EXP):
9259 raise errors.ProgrammerError("Corrupted export config",
9260 errors.ECODE_ENVIRON)
9262 ei_version = export_info.get(constants.INISECT_EXP, "version")
9263 if (int(ei_version) != constants.EXPORT_VERSION):
9264 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9265 (ei_version, constants.EXPORT_VERSION),
9266 errors.ECODE_ENVIRON)
9269 def _ReadExportParams(self, einfo):
9270 """Use export parameters as defaults.
9272 In case the opcode doesn't specify (as in override) some instance
9273 parameters, then try to use them from the export information, if
9277 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9279 if self.op.disk_template is None:
9280 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9281 self.op.disk_template = einfo.get(constants.INISECT_INS,
9283 if self.op.disk_template not in constants.DISK_TEMPLATES:
9284 raise errors.OpPrereqError("Disk template specified in configuration"
9285 " file is not one of the allowed values:"
9286 " %s" % " ".join(constants.DISK_TEMPLATES))
9288 raise errors.OpPrereqError("No disk template specified and the export"
9289 " is missing the disk_template information",
9292 if not self.op.disks:
9294 # TODO: import the disk iv_name too
9295 for idx in range(constants.MAX_DISKS):
9296 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9297 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9298 disks.append({constants.IDISK_SIZE: disk_sz})
9299 self.op.disks = disks
9300 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9301 raise errors.OpPrereqError("No disk info specified and the export"
9302 " is missing the disk information",
9305 if not self.op.nics:
9307 for idx in range(constants.MAX_NICS):
9308 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9310 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9311 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9318 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9319 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9321 if (self.op.hypervisor is None and
9322 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9323 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9325 if einfo.has_section(constants.INISECT_HYP):
9326 # use the export parameters but do not override the ones
9327 # specified by the user
9328 for name, value in einfo.items(constants.INISECT_HYP):
9329 if name not in self.op.hvparams:
9330 self.op.hvparams[name] = value
9332 if einfo.has_section(constants.INISECT_BEP):
9333 # use the parameters, without overriding
9334 for name, value in einfo.items(constants.INISECT_BEP):
9335 if name not in self.op.beparams:
9336 self.op.beparams[name] = value
9337 # Compatibility for the old "memory" be param
9338 if name == constants.BE_MEMORY:
9339 if constants.BE_MAXMEM not in self.op.beparams:
9340 self.op.beparams[constants.BE_MAXMEM] = value
9341 if constants.BE_MINMEM not in self.op.beparams:
9342 self.op.beparams[constants.BE_MINMEM] = value
9344 # try to read the parameters old style, from the main section
9345 for name in constants.BES_PARAMETERS:
9346 if (name not in self.op.beparams and
9347 einfo.has_option(constants.INISECT_INS, name)):
9348 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9350 if einfo.has_section(constants.INISECT_OSP):
9351 # use the parameters, without overriding
9352 for name, value in einfo.items(constants.INISECT_OSP):
9353 if name not in self.op.osparams:
9354 self.op.osparams[name] = value
9356 def _RevertToDefaults(self, cluster):
9357 """Revert the instance parameters to the default values.
9361 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9362 for name in self.op.hvparams.keys():
9363 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9364 del self.op.hvparams[name]
9366 be_defs = cluster.SimpleFillBE({})
9367 for name in self.op.beparams.keys():
9368 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9369 del self.op.beparams[name]
9371 nic_defs = cluster.SimpleFillNIC({})
9372 for nic in self.op.nics:
9373 for name in constants.NICS_PARAMETERS:
9374 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9377 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9378 for name in self.op.osparams.keys():
9379 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9380 del self.op.osparams[name]
9382 def _CalculateFileStorageDir(self):
9383 """Calculate final instance file storage dir.
9386 # file storage dir calculation/check
9387 self.instance_file_storage_dir = None
9388 if self.op.disk_template in constants.DTS_FILEBASED:
9389 # build the full file storage dir path
9392 if self.op.disk_template == constants.DT_SHARED_FILE:
9393 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9395 get_fsd_fn = self.cfg.GetFileStorageDir
9397 cfg_storagedir = get_fsd_fn()
9398 if not cfg_storagedir:
9399 raise errors.OpPrereqError("Cluster file storage dir not defined")
9400 joinargs.append(cfg_storagedir)
9402 if self.op.file_storage_dir is not None:
9403 joinargs.append(self.op.file_storage_dir)
9405 joinargs.append(self.op.instance_name)
9407 # pylint: disable=W0142
9408 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9410 def CheckPrereq(self): # pylint: disable=R0914
9411 """Check prerequisites.
9414 self._CalculateFileStorageDir()
9416 if self.op.mode == constants.INSTANCE_IMPORT:
9417 export_info = self._ReadExportInfo()
9418 self._ReadExportParams(export_info)
9420 if (not self.cfg.GetVGName() and
9421 self.op.disk_template not in constants.DTS_NOT_LVM):
9422 raise errors.OpPrereqError("Cluster does not support lvm-based"
9423 " instances", errors.ECODE_STATE)
9425 if (self.op.hypervisor is None or
9426 self.op.hypervisor == constants.VALUE_AUTO):
9427 self.op.hypervisor = self.cfg.GetHypervisorType()
9429 cluster = self.cfg.GetClusterInfo()
9430 enabled_hvs = cluster.enabled_hypervisors
9431 if self.op.hypervisor not in enabled_hvs:
9432 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9433 " cluster (%s)" % (self.op.hypervisor,
9434 ",".join(enabled_hvs)),
9437 # Check tag validity
9438 for tag in self.op.tags:
9439 objects.TaggableObject.ValidateTag(tag)
9441 # check hypervisor parameter syntax (locally)
9442 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9443 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9445 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9446 hv_type.CheckParameterSyntax(filled_hvp)
9447 self.hv_full = filled_hvp
9448 # check that we don't specify global parameters on an instance
9449 _CheckGlobalHvParams(self.op.hvparams)
9451 # fill and remember the beparams dict
9452 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9453 for param, value in self.op.beparams.iteritems():
9454 if value == constants.VALUE_AUTO:
9455 self.op.beparams[param] = default_beparams[param]
9456 objects.UpgradeBeParams(self.op.beparams)
9457 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9458 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9460 # build os parameters
9461 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9463 # now that hvp/bep are in final format, let's reset to defaults,
9465 if self.op.identify_defaults:
9466 self._RevertToDefaults(cluster)
9470 for idx, nic in enumerate(self.op.nics):
9471 nic_mode_req = nic.get(constants.INIC_MODE, None)
9472 nic_mode = nic_mode_req
9473 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9474 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9476 # in routed mode, for the first nic, the default ip is 'auto'
9477 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9478 default_ip_mode = constants.VALUE_AUTO
9480 default_ip_mode = constants.VALUE_NONE
9482 # ip validity checks
9483 ip = nic.get(constants.INIC_IP, default_ip_mode)
9484 if ip is None or ip.lower() == constants.VALUE_NONE:
9486 elif ip.lower() == constants.VALUE_AUTO:
9487 if not self.op.name_check:
9488 raise errors.OpPrereqError("IP address set to auto but name checks"
9489 " have been skipped",
9491 nic_ip = self.hostname1.ip
9493 if not netutils.IPAddress.IsValid(ip):
9494 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9498 # TODO: check the ip address for uniqueness
9499 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9500 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9503 # MAC address verification
9504 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9505 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9506 mac = utils.NormalizeAndValidateMac(mac)
9509 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9510 except errors.ReservationError:
9511 raise errors.OpPrereqError("MAC address %s already in use"
9512 " in cluster" % mac,
9513 errors.ECODE_NOTUNIQUE)
9515 # Build nic parameters
9516 link = nic.get(constants.INIC_LINK, None)
9517 if link == constants.VALUE_AUTO:
9518 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9521 nicparams[constants.NIC_MODE] = nic_mode
9523 nicparams[constants.NIC_LINK] = link
9525 check_params = cluster.SimpleFillNIC(nicparams)
9526 objects.NIC.CheckParameterSyntax(check_params)
9527 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9529 # disk checks/pre-build
9530 default_vg = self.cfg.GetVGName()
9532 for disk in self.op.disks:
9533 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9534 if mode not in constants.DISK_ACCESS_SET:
9535 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9536 mode, errors.ECODE_INVAL)
9537 size = disk.get(constants.IDISK_SIZE, None)
9539 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9542 except (TypeError, ValueError):
9543 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9546 data_vg = disk.get(constants.IDISK_VG, default_vg)
9548 constants.IDISK_SIZE: size,
9549 constants.IDISK_MODE: mode,
9550 constants.IDISK_VG: data_vg,
9552 if constants.IDISK_METAVG in disk:
9553 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9554 if constants.IDISK_ADOPT in disk:
9555 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9556 self.disks.append(new_disk)
9558 if self.op.mode == constants.INSTANCE_IMPORT:
9560 for idx in range(len(self.disks)):
9561 option = "disk%d_dump" % idx
9562 if export_info.has_option(constants.INISECT_INS, option):
9563 # FIXME: are the old os-es, disk sizes, etc. useful?
9564 export_name = export_info.get(constants.INISECT_INS, option)
9565 image = utils.PathJoin(self.op.src_path, export_name)
9566 disk_images.append(image)
9568 disk_images.append(False)
9570 self.src_images = disk_images
9572 old_name = export_info.get(constants.INISECT_INS, "name")
9573 if self.op.instance_name == old_name:
9574 for idx, nic in enumerate(self.nics):
9575 if nic.mac == constants.VALUE_AUTO:
9576 nic_mac_ini = "nic%d_mac" % idx
9577 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9579 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9581 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9582 if self.op.ip_check:
9583 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9584 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9585 (self.check_ip, self.op.instance_name),
9586 errors.ECODE_NOTUNIQUE)
9588 #### mac address generation
9589 # By generating here the mac address both the allocator and the hooks get
9590 # the real final mac address rather than the 'auto' or 'generate' value.
9591 # There is a race condition between the generation and the instance object
9592 # creation, which means that we know the mac is valid now, but we're not
9593 # sure it will be when we actually add the instance. If things go bad
9594 # adding the instance will abort because of a duplicate mac, and the
9595 # creation job will fail.
9596 for nic in self.nics:
9597 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9598 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9602 if self.op.iallocator is not None:
9603 self._RunAllocator()
9605 # Release all unneeded node locks
9606 _ReleaseLocks(self, locking.LEVEL_NODE,
9607 keep=filter(None, [self.op.pnode, self.op.snode,
9609 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9610 keep=filter(None, [self.op.pnode, self.op.snode,
9613 #### node related checks
9615 # check primary node
9616 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9617 assert self.pnode is not None, \
9618 "Cannot retrieve locked node %s" % self.op.pnode
9620 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9621 pnode.name, errors.ECODE_STATE)
9623 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9624 pnode.name, errors.ECODE_STATE)
9625 if not pnode.vm_capable:
9626 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9627 " '%s'" % pnode.name, errors.ECODE_STATE)
9629 self.secondaries = []
9631 # mirror node verification
9632 if self.op.disk_template in constants.DTS_INT_MIRROR:
9633 if self.op.snode == pnode.name:
9634 raise errors.OpPrereqError("The secondary node cannot be the"
9635 " primary node", errors.ECODE_INVAL)
9636 _CheckNodeOnline(self, self.op.snode)
9637 _CheckNodeNotDrained(self, self.op.snode)
9638 _CheckNodeVmCapable(self, self.op.snode)
9639 self.secondaries.append(self.op.snode)
9641 snode = self.cfg.GetNodeInfo(self.op.snode)
9642 if pnode.group != snode.group:
9643 self.LogWarning("The primary and secondary nodes are in two"
9644 " different node groups; the disk parameters"
9645 " from the first disk's node group will be"
9648 nodenames = [pnode.name] + self.secondaries
9650 # Verify instance specs
9652 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9653 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9654 constants.ISPEC_DISK_COUNT: len(self.disks),
9655 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9656 constants.ISPEC_NIC_COUNT: len(self.nics),
9659 group_info = self.cfg.GetNodeGroup(pnode.group)
9660 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9661 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9662 if not self.op.ignore_ipolicy and res:
9663 raise errors.OpPrereqError(("Instance allocation to group %s violates"
9664 " policy: %s") % (pnode.group,
9665 utils.CommaJoin(res)),
9668 # disk parameters (not customizable at instance or node level)
9669 # just use the primary node parameters, ignoring the secondary.
9670 self.diskparams = group_info.diskparams
9672 if not self.adopt_disks:
9673 # Check lv size requirements, if not adopting
9674 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9675 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9677 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9678 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9679 disk[constants.IDISK_ADOPT])
9680 for disk in self.disks])
9681 if len(all_lvs) != len(self.disks):
9682 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9684 for lv_name in all_lvs:
9686 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9687 # to ReserveLV uses the same syntax
9688 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9689 except errors.ReservationError:
9690 raise errors.OpPrereqError("LV named %s used by another instance" %
9691 lv_name, errors.ECODE_NOTUNIQUE)
9693 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9694 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9696 node_lvs = self.rpc.call_lv_list([pnode.name],
9697 vg_names.payload.keys())[pnode.name]
9698 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9699 node_lvs = node_lvs.payload
9701 delta = all_lvs.difference(node_lvs.keys())
9703 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9704 utils.CommaJoin(delta),
9706 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9708 raise errors.OpPrereqError("Online logical volumes found, cannot"
9709 " adopt: %s" % utils.CommaJoin(online_lvs),
9711 # update the size of disk based on what is found
9712 for dsk in self.disks:
9713 dsk[constants.IDISK_SIZE] = \
9714 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9715 dsk[constants.IDISK_ADOPT])][0]))
9717 elif self.op.disk_template == constants.DT_BLOCK:
9718 # Normalize and de-duplicate device paths
9719 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9720 for disk in self.disks])
9721 if len(all_disks) != len(self.disks):
9722 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9724 baddisks = [d for d in all_disks
9725 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9727 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9728 " cannot be adopted" %
9729 (", ".join(baddisks),
9730 constants.ADOPTABLE_BLOCKDEV_ROOT),
9733 node_disks = self.rpc.call_bdev_sizes([pnode.name],
9734 list(all_disks))[pnode.name]
9735 node_disks.Raise("Cannot get block device information from node %s" %
9737 node_disks = node_disks.payload
9738 delta = all_disks.difference(node_disks.keys())
9740 raise errors.OpPrereqError("Missing block device(s): %s" %
9741 utils.CommaJoin(delta),
9743 for dsk in self.disks:
9744 dsk[constants.IDISK_SIZE] = \
9745 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9747 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9749 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9750 # check OS parameters (remotely)
9751 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9753 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9755 # memory check on primary node
9756 #TODO(dynmem): use MINMEM for checking
9758 _CheckNodeFreeMemory(self, self.pnode.name,
9759 "creating instance %s" % self.op.instance_name,
9760 self.be_full[constants.BE_MAXMEM],
9763 self.dry_run_result = list(nodenames)
9765 def Exec(self, feedback_fn):
9766 """Create and add the instance to the cluster.
9769 instance = self.op.instance_name
9770 pnode_name = self.pnode.name
9772 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9773 self.owned_locks(locking.LEVEL_NODE)), \
9774 "Node locks differ from node resource locks"
9776 ht_kind = self.op.hypervisor
9777 if ht_kind in constants.HTS_REQ_PORT:
9778 network_port = self.cfg.AllocatePort()
9782 disks = _GenerateDiskTemplate(self,
9783 self.op.disk_template,
9784 instance, pnode_name,
9787 self.instance_file_storage_dir,
9788 self.op.file_driver,
9793 iobj = objects.Instance(name=instance, os=self.op.os_type,
9794 primary_node=pnode_name,
9795 nics=self.nics, disks=disks,
9796 disk_template=self.op.disk_template,
9797 admin_state=constants.ADMINST_DOWN,
9798 network_port=network_port,
9799 beparams=self.op.beparams,
9800 hvparams=self.op.hvparams,
9801 hypervisor=self.op.hypervisor,
9802 osparams=self.op.osparams,
9806 for tag in self.op.tags:
9809 if self.adopt_disks:
9810 if self.op.disk_template == constants.DT_PLAIN:
9811 # rename LVs to the newly-generated names; we need to construct
9812 # 'fake' LV disks with the old data, plus the new unique_id
9813 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9815 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9816 rename_to.append(t_dsk.logical_id)
9817 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9818 self.cfg.SetDiskID(t_dsk, pnode_name)
9819 result = self.rpc.call_blockdev_rename(pnode_name,
9820 zip(tmp_disks, rename_to))
9821 result.Raise("Failed to rename adoped LVs")
9823 feedback_fn("* creating instance disks...")
9825 _CreateDisks(self, iobj)
9826 except errors.OpExecError:
9827 self.LogWarning("Device creation failed, reverting...")
9829 _RemoveDisks(self, iobj)
9831 self.cfg.ReleaseDRBDMinors(instance)
9834 feedback_fn("adding instance %s to cluster config" % instance)
9836 self.cfg.AddInstance(iobj, self.proc.GetECId())
9838 # Declare that we don't want to remove the instance lock anymore, as we've
9839 # added the instance to the config
9840 del self.remove_locks[locking.LEVEL_INSTANCE]
9842 if self.op.mode == constants.INSTANCE_IMPORT:
9843 # Release unused nodes
9844 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9847 _ReleaseLocks(self, locking.LEVEL_NODE)
9850 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9851 feedback_fn("* wiping instance disks...")
9853 _WipeDisks(self, iobj)
9854 except errors.OpExecError, err:
9855 logging.exception("Wiping disks failed")
9856 self.LogWarning("Wiping instance disks failed (%s)", err)
9860 # Something is already wrong with the disks, don't do anything else
9862 elif self.op.wait_for_sync:
9863 disk_abort = not _WaitForSync(self, iobj)
9864 elif iobj.disk_template in constants.DTS_INT_MIRROR:
9865 # make sure the disks are not degraded (still sync-ing is ok)
9866 feedback_fn("* checking mirrors status")
9867 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9872 _RemoveDisks(self, iobj)
9873 self.cfg.RemoveInstance(iobj.name)
9874 # Make sure the instance lock gets removed
9875 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9876 raise errors.OpExecError("There are some degraded disks for"
9879 # Release all node resource locks
9880 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
9882 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9883 if self.op.mode == constants.INSTANCE_CREATE:
9884 if not self.op.no_install:
9885 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9886 not self.op.wait_for_sync)
9888 feedback_fn("* pausing disk sync to install instance OS")
9889 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9891 for idx, success in enumerate(result.payload):
9893 logging.warn("pause-sync of instance %s for disk %d failed",
9896 feedback_fn("* running the instance OS create scripts...")
9897 # FIXME: pass debug option from opcode to backend
9899 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9900 self.op.debug_level)
9902 feedback_fn("* resuming disk sync")
9903 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9905 for idx, success in enumerate(result.payload):
9907 logging.warn("resume-sync of instance %s for disk %d failed",
9910 os_add_result.Raise("Could not add os for instance %s"
9911 " on node %s" % (instance, pnode_name))
9913 elif self.op.mode == constants.INSTANCE_IMPORT:
9914 feedback_fn("* running the instance OS import scripts...")
9918 for idx, image in enumerate(self.src_images):
9922 # FIXME: pass debug option from opcode to backend
9923 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9924 constants.IEIO_FILE, (image, ),
9925 constants.IEIO_SCRIPT,
9926 (iobj.disks[idx], idx),
9928 transfers.append(dt)
9931 masterd.instance.TransferInstanceData(self, feedback_fn,
9932 self.op.src_node, pnode_name,
9933 self.pnode.secondary_ip,
9935 if not compat.all(import_result):
9936 self.LogWarning("Some disks for instance %s on node %s were not"
9937 " imported successfully" % (instance, pnode_name))
9939 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9940 feedback_fn("* preparing remote import...")
9941 # The source cluster will stop the instance before attempting to make a
9942 # connection. In some cases stopping an instance can take a long time,
9943 # hence the shutdown timeout is added to the connection timeout.
9944 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9945 self.op.source_shutdown_timeout)
9946 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9948 assert iobj.primary_node == self.pnode.name
9950 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9951 self.source_x509_ca,
9952 self._cds, timeouts)
9953 if not compat.all(disk_results):
9954 # TODO: Should the instance still be started, even if some disks
9955 # failed to import (valid for local imports, too)?
9956 self.LogWarning("Some disks for instance %s on node %s were not"
9957 " imported successfully" % (instance, pnode_name))
9959 # Run rename script on newly imported instance
9960 assert iobj.name == instance
9961 feedback_fn("Running rename script for %s" % instance)
9962 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9963 self.source_instance_name,
9964 self.op.debug_level)
9966 self.LogWarning("Failed to run rename script for %s on node"
9967 " %s: %s" % (instance, pnode_name, result.fail_msg))
9970 # also checked in the prereq part
9971 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9974 assert not self.owned_locks(locking.LEVEL_NODE_RES)
9977 iobj.admin_state = constants.ADMINST_UP
9978 self.cfg.Update(iobj, feedback_fn)
9979 logging.info("Starting instance %s on node %s", instance, pnode_name)
9980 feedback_fn("* starting instance...")
9981 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9983 result.Raise("Could not start instance")
9985 return list(iobj.all_nodes)
9988 class LUInstanceConsole(NoHooksLU):
9989 """Connect to an instance's console.
9991 This is somewhat special in that it returns the command line that
9992 you need to run on the master node in order to connect to the
9998 def ExpandNames(self):
9999 self.share_locks = _ShareAll()
10000 self._ExpandAndLockInstance()
10002 def CheckPrereq(self):
10003 """Check prerequisites.
10005 This checks that the instance is in the cluster.
10008 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10009 assert self.instance is not None, \
10010 "Cannot retrieve locked instance %s" % self.op.instance_name
10011 _CheckNodeOnline(self, self.instance.primary_node)
10013 def Exec(self, feedback_fn):
10014 """Connect to the console of an instance
10017 instance = self.instance
10018 node = instance.primary_node
10020 node_insts = self.rpc.call_instance_list([node],
10021 [instance.hypervisor])[node]
10022 node_insts.Raise("Can't get node information from %s" % node)
10024 if instance.name not in node_insts.payload:
10025 if instance.admin_state == constants.ADMINST_UP:
10026 state = constants.INSTST_ERRORDOWN
10027 elif instance.admin_state == constants.ADMINST_DOWN:
10028 state = constants.INSTST_ADMINDOWN
10030 state = constants.INSTST_ADMINOFFLINE
10031 raise errors.OpExecError("Instance %s is not running (state %s)" %
10032 (instance.name, state))
10034 logging.debug("Connecting to console of %s on %s", instance.name, node)
10036 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10039 def _GetInstanceConsole(cluster, instance):
10040 """Returns console information for an instance.
10042 @type cluster: L{objects.Cluster}
10043 @type instance: L{objects.Instance}
10047 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10048 # beparams and hvparams are passed separately, to avoid editing the
10049 # instance and then saving the defaults in the instance itself.
10050 hvparams = cluster.FillHV(instance)
10051 beparams = cluster.FillBE(instance)
10052 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10054 assert console.instance == instance.name
10055 assert console.Validate()
10057 return console.ToDict()
10060 class LUInstanceReplaceDisks(LogicalUnit):
10061 """Replace the disks of an instance.
10064 HPATH = "mirrors-replace"
10065 HTYPE = constants.HTYPE_INSTANCE
10068 def CheckArguments(self):
10069 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10070 self.op.iallocator)
10072 def ExpandNames(self):
10073 self._ExpandAndLockInstance()
10075 assert locking.LEVEL_NODE not in self.needed_locks
10076 assert locking.LEVEL_NODE_RES not in self.needed_locks
10077 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10079 assert self.op.iallocator is None or self.op.remote_node is None, \
10080 "Conflicting options"
10082 if self.op.remote_node is not None:
10083 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10085 # Warning: do not remove the locking of the new secondary here
10086 # unless DRBD8.AddChildren is changed to work in parallel;
10087 # currently it doesn't since parallel invocations of
10088 # FindUnusedMinor will conflict
10089 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10090 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10092 self.needed_locks[locking.LEVEL_NODE] = []
10093 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10095 if self.op.iallocator is not None:
10096 # iallocator will select a new node in the same group
10097 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10099 self.needed_locks[locking.LEVEL_NODE_RES] = []
10101 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10102 self.op.iallocator, self.op.remote_node,
10103 self.op.disks, False, self.op.early_release,
10104 self.op.ignore_ipolicy)
10106 self.tasklets = [self.replacer]
10108 def DeclareLocks(self, level):
10109 if level == locking.LEVEL_NODEGROUP:
10110 assert self.op.remote_node is None
10111 assert self.op.iallocator is not None
10112 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10114 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10115 # Lock all groups used by instance optimistically; this requires going
10116 # via the node before it's locked, requiring verification later on
10117 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10118 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10120 elif level == locking.LEVEL_NODE:
10121 if self.op.iallocator is not None:
10122 assert self.op.remote_node is None
10123 assert not self.needed_locks[locking.LEVEL_NODE]
10125 # Lock member nodes of all locked groups
10126 self.needed_locks[locking.LEVEL_NODE] = [node_name
10127 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10128 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10130 self._LockInstancesNodes()
10131 elif level == locking.LEVEL_NODE_RES:
10133 self.needed_locks[locking.LEVEL_NODE_RES] = \
10134 self.needed_locks[locking.LEVEL_NODE]
10136 def BuildHooksEnv(self):
10137 """Build hooks env.
10139 This runs on the master, the primary and all the secondaries.
10142 instance = self.replacer.instance
10144 "MODE": self.op.mode,
10145 "NEW_SECONDARY": self.op.remote_node,
10146 "OLD_SECONDARY": instance.secondary_nodes[0],
10148 env.update(_BuildInstanceHookEnvByObject(self, instance))
10151 def BuildHooksNodes(self):
10152 """Build hooks nodes.
10155 instance = self.replacer.instance
10157 self.cfg.GetMasterNode(),
10158 instance.primary_node,
10160 if self.op.remote_node is not None:
10161 nl.append(self.op.remote_node)
10164 def CheckPrereq(self):
10165 """Check prerequisites.
10168 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10169 self.op.iallocator is None)
10171 # Verify if node group locks are still correct
10172 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10174 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10176 return LogicalUnit.CheckPrereq(self)
10179 class TLReplaceDisks(Tasklet):
10180 """Replaces disks for an instance.
10182 Note: Locking is not within the scope of this class.
10185 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10186 disks, delay_iallocator, early_release, ignore_ipolicy):
10187 """Initializes this class.
10190 Tasklet.__init__(self, lu)
10193 self.instance_name = instance_name
10195 self.iallocator_name = iallocator_name
10196 self.remote_node = remote_node
10198 self.delay_iallocator = delay_iallocator
10199 self.early_release = early_release
10200 self.ignore_ipolicy = ignore_ipolicy
10203 self.instance = None
10204 self.new_node = None
10205 self.target_node = None
10206 self.other_node = None
10207 self.remote_node_info = None
10208 self.node_secondary_ip = None
10211 def CheckArguments(mode, remote_node, iallocator):
10212 """Helper function for users of this class.
10215 # check for valid parameter combination
10216 if mode == constants.REPLACE_DISK_CHG:
10217 if remote_node is None and iallocator is None:
10218 raise errors.OpPrereqError("When changing the secondary either an"
10219 " iallocator script must be used or the"
10220 " new node given", errors.ECODE_INVAL)
10222 if remote_node is not None and iallocator is not None:
10223 raise errors.OpPrereqError("Give either the iallocator or the new"
10224 " secondary, not both", errors.ECODE_INVAL)
10226 elif remote_node is not None or iallocator is not None:
10227 # Not replacing the secondary
10228 raise errors.OpPrereqError("The iallocator and new node options can"
10229 " only be used when changing the"
10230 " secondary node", errors.ECODE_INVAL)
10233 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10234 """Compute a new secondary node using an IAllocator.
10237 ial = IAllocator(lu.cfg, lu.rpc,
10238 mode=constants.IALLOCATOR_MODE_RELOC,
10239 name=instance_name,
10240 relocate_from=list(relocate_from))
10242 ial.Run(iallocator_name)
10244 if not ial.success:
10245 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10246 " %s" % (iallocator_name, ial.info),
10247 errors.ECODE_NORES)
10249 if len(ial.result) != ial.required_nodes:
10250 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10251 " of nodes (%s), required %s" %
10253 len(ial.result), ial.required_nodes),
10254 errors.ECODE_FAULT)
10256 remote_node_name = ial.result[0]
10258 lu.LogInfo("Selected new secondary for instance '%s': %s",
10259 instance_name, remote_node_name)
10261 return remote_node_name
10263 def _FindFaultyDisks(self, node_name):
10264 """Wrapper for L{_FindFaultyInstanceDisks}.
10267 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10270 def _CheckDisksActivated(self, instance):
10271 """Checks if the instance disks are activated.
10273 @param instance: The instance to check disks
10274 @return: True if they are activated, False otherwise
10277 nodes = instance.all_nodes
10279 for idx, dev in enumerate(instance.disks):
10281 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10282 self.cfg.SetDiskID(dev, node)
10284 result = self.rpc.call_blockdev_find(node, dev)
10288 elif result.fail_msg or not result.payload:
10293 def CheckPrereq(self):
10294 """Check prerequisites.
10296 This checks that the instance is in the cluster.
10299 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10300 assert instance is not None, \
10301 "Cannot retrieve locked instance %s" % self.instance_name
10303 if instance.disk_template != constants.DT_DRBD8:
10304 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10305 " instances", errors.ECODE_INVAL)
10307 if len(instance.secondary_nodes) != 1:
10308 raise errors.OpPrereqError("The instance has a strange layout,"
10309 " expected one secondary but found %d" %
10310 len(instance.secondary_nodes),
10311 errors.ECODE_FAULT)
10313 if not self.delay_iallocator:
10314 self._CheckPrereq2()
10316 def _CheckPrereq2(self):
10317 """Check prerequisites, second part.
10319 This function should always be part of CheckPrereq. It was separated and is
10320 now called from Exec because during node evacuation iallocator was only
10321 called with an unmodified cluster model, not taking planned changes into
10325 instance = self.instance
10326 secondary_node = instance.secondary_nodes[0]
10328 if self.iallocator_name is None:
10329 remote_node = self.remote_node
10331 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10332 instance.name, instance.secondary_nodes)
10334 if remote_node is None:
10335 self.remote_node_info = None
10337 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10338 "Remote node '%s' is not locked" % remote_node
10340 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10341 assert self.remote_node_info is not None, \
10342 "Cannot retrieve locked node %s" % remote_node
10344 if remote_node == self.instance.primary_node:
10345 raise errors.OpPrereqError("The specified node is the primary node of"
10346 " the instance", errors.ECODE_INVAL)
10348 if remote_node == secondary_node:
10349 raise errors.OpPrereqError("The specified node is already the"
10350 " secondary node of the instance",
10351 errors.ECODE_INVAL)
10353 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10354 constants.REPLACE_DISK_CHG):
10355 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10356 errors.ECODE_INVAL)
10358 if self.mode == constants.REPLACE_DISK_AUTO:
10359 if not self._CheckDisksActivated(instance):
10360 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10361 " first" % self.instance_name,
10362 errors.ECODE_STATE)
10363 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10364 faulty_secondary = self._FindFaultyDisks(secondary_node)
10366 if faulty_primary and faulty_secondary:
10367 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10368 " one node and can not be repaired"
10369 " automatically" % self.instance_name,
10370 errors.ECODE_STATE)
10373 self.disks = faulty_primary
10374 self.target_node = instance.primary_node
10375 self.other_node = secondary_node
10376 check_nodes = [self.target_node, self.other_node]
10377 elif faulty_secondary:
10378 self.disks = faulty_secondary
10379 self.target_node = secondary_node
10380 self.other_node = instance.primary_node
10381 check_nodes = [self.target_node, self.other_node]
10387 # Non-automatic modes
10388 if self.mode == constants.REPLACE_DISK_PRI:
10389 self.target_node = instance.primary_node
10390 self.other_node = secondary_node
10391 check_nodes = [self.target_node, self.other_node]
10393 elif self.mode == constants.REPLACE_DISK_SEC:
10394 self.target_node = secondary_node
10395 self.other_node = instance.primary_node
10396 check_nodes = [self.target_node, self.other_node]
10398 elif self.mode == constants.REPLACE_DISK_CHG:
10399 self.new_node = remote_node
10400 self.other_node = instance.primary_node
10401 self.target_node = secondary_node
10402 check_nodes = [self.new_node, self.other_node]
10404 _CheckNodeNotDrained(self.lu, remote_node)
10405 _CheckNodeVmCapable(self.lu, remote_node)
10407 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10408 assert old_node_info is not None
10409 if old_node_info.offline and not self.early_release:
10410 # doesn't make sense to delay the release
10411 self.early_release = True
10412 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10413 " early-release mode", secondary_node)
10416 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10419 # If not specified all disks should be replaced
10421 self.disks = range(len(self.instance.disks))
10423 # TODO: This is ugly, but right now we can't distinguish between internal
10424 # submitted opcode and external one. We should fix that.
10425 if self.remote_node_info:
10426 # We change the node, lets verify it still meets instance policy
10427 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10428 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10430 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10431 ignore=self.ignore_ipolicy)
10433 # TODO: compute disk parameters
10434 primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10435 secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10436 if primary_node_info.group != secondary_node_info.group:
10437 self.lu.LogInfo("The instance primary and secondary nodes are in two"
10438 " different node groups; the disk parameters of the"
10439 " primary node's group will be applied.")
10441 self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10443 for node in check_nodes:
10444 _CheckNodeOnline(self.lu, node)
10446 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10449 if node_name is not None)
10451 # Release unneeded node and node resource locks
10452 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10453 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10455 # Release any owned node group
10456 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10457 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10459 # Check whether disks are valid
10460 for disk_idx in self.disks:
10461 instance.FindDisk(disk_idx)
10463 # Get secondary node IP addresses
10464 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10465 in self.cfg.GetMultiNodeInfo(touched_nodes))
10467 def Exec(self, feedback_fn):
10468 """Execute disk replacement.
10470 This dispatches the disk replacement to the appropriate handler.
10473 if self.delay_iallocator:
10474 self._CheckPrereq2()
10477 # Verify owned locks before starting operation
10478 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10479 assert set(owned_nodes) == set(self.node_secondary_ip), \
10480 ("Incorrect node locks, owning %s, expected %s" %
10481 (owned_nodes, self.node_secondary_ip.keys()))
10482 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10483 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10485 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10486 assert list(owned_instances) == [self.instance_name], \
10487 "Instance '%s' not locked" % self.instance_name
10489 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10490 "Should not own any node group lock at this point"
10493 feedback_fn("No disks need replacement")
10496 feedback_fn("Replacing disk(s) %s for %s" %
10497 (utils.CommaJoin(self.disks), self.instance.name))
10499 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10501 # Activate the instance disks if we're replacing them on a down instance
10503 _StartInstanceDisks(self.lu, self.instance, True)
10506 # Should we replace the secondary node?
10507 if self.new_node is not None:
10508 fn = self._ExecDrbd8Secondary
10510 fn = self._ExecDrbd8DiskOnly
10512 result = fn(feedback_fn)
10514 # Deactivate the instance disks if we're replacing them on a
10517 _SafeShutdownInstanceDisks(self.lu, self.instance)
10519 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10522 # Verify owned locks
10523 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10524 nodes = frozenset(self.node_secondary_ip)
10525 assert ((self.early_release and not owned_nodes) or
10526 (not self.early_release and not (set(owned_nodes) - nodes))), \
10527 ("Not owning the correct locks, early_release=%s, owned=%r,"
10528 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10532 def _CheckVolumeGroup(self, nodes):
10533 self.lu.LogInfo("Checking volume groups")
10535 vgname = self.cfg.GetVGName()
10537 # Make sure volume group exists on all involved nodes
10538 results = self.rpc.call_vg_list(nodes)
10540 raise errors.OpExecError("Can't list volume groups on the nodes")
10543 res = results[node]
10544 res.Raise("Error checking node %s" % node)
10545 if vgname not in res.payload:
10546 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10549 def _CheckDisksExistence(self, nodes):
10550 # Check disk existence
10551 for idx, dev in enumerate(self.instance.disks):
10552 if idx not in self.disks:
10556 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10557 self.cfg.SetDiskID(dev, node)
10559 result = self.rpc.call_blockdev_find(node, dev)
10561 msg = result.fail_msg
10562 if msg or not result.payload:
10564 msg = "disk not found"
10565 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10568 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10569 for idx, dev in enumerate(self.instance.disks):
10570 if idx not in self.disks:
10573 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10576 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10578 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10579 " replace disks for instance %s" %
10580 (node_name, self.instance.name))
10582 def _CreateNewStorage(self, node_name):
10583 """Create new storage on the primary or secondary node.
10585 This is only used for same-node replaces, not for changing the
10586 secondary node, hence we don't want to modify the existing disk.
10591 for idx, dev in enumerate(self.instance.disks):
10592 if idx not in self.disks:
10595 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10597 self.cfg.SetDiskID(dev, node_name)
10599 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10600 names = _GenerateUniqueNames(self.lu, lv_names)
10602 _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10604 vg_data = dev.children[0].logical_id[0]
10605 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10606 logical_id=(vg_data, names[0]), params=data_p)
10607 vg_meta = dev.children[1].logical_id[0]
10608 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10609 logical_id=(vg_meta, names[1]), params=meta_p)
10611 new_lvs = [lv_data, lv_meta]
10612 old_lvs = [child.Copy() for child in dev.children]
10613 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10615 # we pass force_create=True to force the LVM creation
10616 for new_lv in new_lvs:
10617 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10618 _GetInstanceInfoText(self.instance), False)
10622 def _CheckDevices(self, node_name, iv_names):
10623 for name, (dev, _, _) in iv_names.iteritems():
10624 self.cfg.SetDiskID(dev, node_name)
10626 result = self.rpc.call_blockdev_find(node_name, dev)
10628 msg = result.fail_msg
10629 if msg or not result.payload:
10631 msg = "disk not found"
10632 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10635 if result.payload.is_degraded:
10636 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10638 def _RemoveOldStorage(self, node_name, iv_names):
10639 for name, (_, old_lvs, _) in iv_names.iteritems():
10640 self.lu.LogInfo("Remove logical volumes for %s" % name)
10643 self.cfg.SetDiskID(lv, node_name)
10645 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10647 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10648 hint="remove unused LVs manually")
10650 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10651 """Replace a disk on the primary or secondary for DRBD 8.
10653 The algorithm for replace is quite complicated:
10655 1. for each disk to be replaced:
10657 1. create new LVs on the target node with unique names
10658 1. detach old LVs from the drbd device
10659 1. rename old LVs to name_replaced.<time_t>
10660 1. rename new LVs to old LVs
10661 1. attach the new LVs (with the old names now) to the drbd device
10663 1. wait for sync across all devices
10665 1. for each modified disk:
10667 1. remove old LVs (which have the name name_replaces.<time_t>)
10669 Failures are not very well handled.
10674 # Step: check device activation
10675 self.lu.LogStep(1, steps_total, "Check device existence")
10676 self._CheckDisksExistence([self.other_node, self.target_node])
10677 self._CheckVolumeGroup([self.target_node, self.other_node])
10679 # Step: check other node consistency
10680 self.lu.LogStep(2, steps_total, "Check peer consistency")
10681 self._CheckDisksConsistency(self.other_node,
10682 self.other_node == self.instance.primary_node,
10685 # Step: create new storage
10686 self.lu.LogStep(3, steps_total, "Allocate new storage")
10687 iv_names = self._CreateNewStorage(self.target_node)
10689 # Step: for each lv, detach+rename*2+attach
10690 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10691 for dev, old_lvs, new_lvs in iv_names.itervalues():
10692 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10694 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10696 result.Raise("Can't detach drbd from local storage on node"
10697 " %s for device %s" % (self.target_node, dev.iv_name))
10699 #cfg.Update(instance)
10701 # ok, we created the new LVs, so now we know we have the needed
10702 # storage; as such, we proceed on the target node to rename
10703 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10704 # using the assumption that logical_id == physical_id (which in
10705 # turn is the unique_id on that node)
10707 # FIXME(iustin): use a better name for the replaced LVs
10708 temp_suffix = int(time.time())
10709 ren_fn = lambda d, suff: (d.physical_id[0],
10710 d.physical_id[1] + "_replaced-%s" % suff)
10712 # Build the rename list based on what LVs exist on the node
10713 rename_old_to_new = []
10714 for to_ren in old_lvs:
10715 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10716 if not result.fail_msg and result.payload:
10718 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10720 self.lu.LogInfo("Renaming the old LVs on the target node")
10721 result = self.rpc.call_blockdev_rename(self.target_node,
10723 result.Raise("Can't rename old LVs on node %s" % self.target_node)
10725 # Now we rename the new LVs to the old LVs
10726 self.lu.LogInfo("Renaming the new LVs on the target node")
10727 rename_new_to_old = [(new, old.physical_id)
10728 for old, new in zip(old_lvs, new_lvs)]
10729 result = self.rpc.call_blockdev_rename(self.target_node,
10731 result.Raise("Can't rename new LVs on node %s" % self.target_node)
10733 # Intermediate steps of in memory modifications
10734 for old, new in zip(old_lvs, new_lvs):
10735 new.logical_id = old.logical_id
10736 self.cfg.SetDiskID(new, self.target_node)
10738 # We need to modify old_lvs so that removal later removes the
10739 # right LVs, not the newly added ones; note that old_lvs is a
10741 for disk in old_lvs:
10742 disk.logical_id = ren_fn(disk, temp_suffix)
10743 self.cfg.SetDiskID(disk, self.target_node)
10745 # Now that the new lvs have the old name, we can add them to the device
10746 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10747 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10749 msg = result.fail_msg
10751 for new_lv in new_lvs:
10752 msg2 = self.rpc.call_blockdev_remove(self.target_node,
10755 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10756 hint=("cleanup manually the unused logical"
10758 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10760 cstep = itertools.count(5)
10762 if self.early_release:
10763 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10764 self._RemoveOldStorage(self.target_node, iv_names)
10765 # TODO: Check if releasing locks early still makes sense
10766 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10768 # Release all resource locks except those used by the instance
10769 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10770 keep=self.node_secondary_ip.keys())
10772 # Release all node locks while waiting for sync
10773 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10775 # TODO: Can the instance lock be downgraded here? Take the optional disk
10776 # shutdown in the caller into consideration.
10779 # This can fail as the old devices are degraded and _WaitForSync
10780 # does a combined result over all disks, so we don't check its return value
10781 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10782 _WaitForSync(self.lu, self.instance)
10784 # Check all devices manually
10785 self._CheckDevices(self.instance.primary_node, iv_names)
10787 # Step: remove old storage
10788 if not self.early_release:
10789 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10790 self._RemoveOldStorage(self.target_node, iv_names)
10792 def _ExecDrbd8Secondary(self, feedback_fn):
10793 """Replace the secondary node for DRBD 8.
10795 The algorithm for replace is quite complicated:
10796 - for all disks of the instance:
10797 - create new LVs on the new node with same names
10798 - shutdown the drbd device on the old secondary
10799 - disconnect the drbd network on the primary
10800 - create the drbd device on the new secondary
10801 - network attach the drbd on the primary, using an artifice:
10802 the drbd code for Attach() will connect to the network if it
10803 finds a device which is connected to the good local disks but
10804 not network enabled
10805 - wait for sync across all devices
10806 - remove all disks from the old secondary
10808 Failures are not very well handled.
10813 pnode = self.instance.primary_node
10815 # Step: check device activation
10816 self.lu.LogStep(1, steps_total, "Check device existence")
10817 self._CheckDisksExistence([self.instance.primary_node])
10818 self._CheckVolumeGroup([self.instance.primary_node])
10820 # Step: check other node consistency
10821 self.lu.LogStep(2, steps_total, "Check peer consistency")
10822 self._CheckDisksConsistency(self.instance.primary_node, True, True)
10824 # Step: create new storage
10825 self.lu.LogStep(3, steps_total, "Allocate new storage")
10826 for idx, dev in enumerate(self.instance.disks):
10827 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10828 (self.new_node, idx))
10829 # we pass force_create=True to force LVM creation
10830 for new_lv in dev.children:
10831 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10832 _GetInstanceInfoText(self.instance), False)
10834 # Step 4: dbrd minors and drbd setups changes
10835 # after this, we must manually remove the drbd minors on both the
10836 # error and the success paths
10837 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10838 minors = self.cfg.AllocateDRBDMinor([self.new_node
10839 for dev in self.instance.disks],
10840 self.instance.name)
10841 logging.debug("Allocated minors %r", minors)
10844 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10845 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10846 (self.new_node, idx))
10847 # create new devices on new_node; note that we create two IDs:
10848 # one without port, so the drbd will be activated without
10849 # networking information on the new node at this stage, and one
10850 # with network, for the latter activation in step 4
10851 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10852 if self.instance.primary_node == o_node1:
10855 assert self.instance.primary_node == o_node2, "Three-node instance?"
10858 new_alone_id = (self.instance.primary_node, self.new_node, None,
10859 p_minor, new_minor, o_secret)
10860 new_net_id = (self.instance.primary_node, self.new_node, o_port,
10861 p_minor, new_minor, o_secret)
10863 iv_names[idx] = (dev, dev.children, new_net_id)
10864 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10866 drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10867 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10868 logical_id=new_alone_id,
10869 children=dev.children,
10871 params=drbd_params)
10873 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10874 _GetInstanceInfoText(self.instance), False)
10875 except errors.GenericError:
10876 self.cfg.ReleaseDRBDMinors(self.instance.name)
10879 # We have new devices, shutdown the drbd on the old secondary
10880 for idx, dev in enumerate(self.instance.disks):
10881 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10882 self.cfg.SetDiskID(dev, self.target_node)
10883 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10885 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10886 "node: %s" % (idx, msg),
10887 hint=("Please cleanup this device manually as"
10888 " soon as possible"))
10890 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10891 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10892 self.instance.disks)[pnode]
10894 msg = result.fail_msg
10896 # detaches didn't succeed (unlikely)
10897 self.cfg.ReleaseDRBDMinors(self.instance.name)
10898 raise errors.OpExecError("Can't detach the disks from the network on"
10899 " old node: %s" % (msg,))
10901 # if we managed to detach at least one, we update all the disks of
10902 # the instance to point to the new secondary
10903 self.lu.LogInfo("Updating instance configuration")
10904 for dev, _, new_logical_id in iv_names.itervalues():
10905 dev.logical_id = new_logical_id
10906 self.cfg.SetDiskID(dev, self.instance.primary_node)
10908 self.cfg.Update(self.instance, feedback_fn)
10910 # Release all node locks (the configuration has been updated)
10911 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10913 # and now perform the drbd attach
10914 self.lu.LogInfo("Attaching primary drbds to new secondary"
10915 " (standalone => connected)")
10916 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10918 self.node_secondary_ip,
10919 self.instance.disks,
10920 self.instance.name,
10922 for to_node, to_result in result.items():
10923 msg = to_result.fail_msg
10925 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10927 hint=("please do a gnt-instance info to see the"
10928 " status of disks"))
10930 cstep = itertools.count(5)
10932 if self.early_release:
10933 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10934 self._RemoveOldStorage(self.target_node, iv_names)
10935 # TODO: Check if releasing locks early still makes sense
10936 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10938 # Release all resource locks except those used by the instance
10939 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10940 keep=self.node_secondary_ip.keys())
10942 # TODO: Can the instance lock be downgraded here? Take the optional disk
10943 # shutdown in the caller into consideration.
10946 # This can fail as the old devices are degraded and _WaitForSync
10947 # does a combined result over all disks, so we don't check its return value
10948 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10949 _WaitForSync(self.lu, self.instance)
10951 # Check all devices manually
10952 self._CheckDevices(self.instance.primary_node, iv_names)
10954 # Step: remove old storage
10955 if not self.early_release:
10956 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10957 self._RemoveOldStorage(self.target_node, iv_names)
10960 class LURepairNodeStorage(NoHooksLU):
10961 """Repairs the volume group on a node.
10966 def CheckArguments(self):
10967 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10969 storage_type = self.op.storage_type
10971 if (constants.SO_FIX_CONSISTENCY not in
10972 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10973 raise errors.OpPrereqError("Storage units of type '%s' can not be"
10974 " repaired" % storage_type,
10975 errors.ECODE_INVAL)
10977 def ExpandNames(self):
10978 self.needed_locks = {
10979 locking.LEVEL_NODE: [self.op.node_name],
10982 def _CheckFaultyDisks(self, instance, node_name):
10983 """Ensure faulty disks abort the opcode or at least warn."""
10985 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10987 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10988 " node '%s'" % (instance.name, node_name),
10989 errors.ECODE_STATE)
10990 except errors.OpPrereqError, err:
10991 if self.op.ignore_consistency:
10992 self.proc.LogWarning(str(err.args[0]))
10996 def CheckPrereq(self):
10997 """Check prerequisites.
11000 # Check whether any instance on this node has faulty disks
11001 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11002 if inst.admin_state != constants.ADMINST_UP:
11004 check_nodes = set(inst.all_nodes)
11005 check_nodes.discard(self.op.node_name)
11006 for inst_node_name in check_nodes:
11007 self._CheckFaultyDisks(inst, inst_node_name)
11009 def Exec(self, feedback_fn):
11010 feedback_fn("Repairing storage unit '%s' on %s ..." %
11011 (self.op.name, self.op.node_name))
11013 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11014 result = self.rpc.call_storage_execute(self.op.node_name,
11015 self.op.storage_type, st_args,
11017 constants.SO_FIX_CONSISTENCY)
11018 result.Raise("Failed to repair storage unit '%s' on %s" %
11019 (self.op.name, self.op.node_name))
11022 class LUNodeEvacuate(NoHooksLU):
11023 """Evacuates instances off a list of nodes.
11028 _MODE2IALLOCATOR = {
11029 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11030 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11031 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11033 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11034 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11035 constants.IALLOCATOR_NEVAC_MODES)
11037 def CheckArguments(self):
11038 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11040 def ExpandNames(self):
11041 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11043 if self.op.remote_node is not None:
11044 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11045 assert self.op.remote_node
11047 if self.op.remote_node == self.op.node_name:
11048 raise errors.OpPrereqError("Can not use evacuated node as a new"
11049 " secondary node", errors.ECODE_INVAL)
11051 if self.op.mode != constants.NODE_EVAC_SEC:
11052 raise errors.OpPrereqError("Without the use of an iallocator only"
11053 " secondary instances can be evacuated",
11054 errors.ECODE_INVAL)
11057 self.share_locks = _ShareAll()
11058 self.needed_locks = {
11059 locking.LEVEL_INSTANCE: [],
11060 locking.LEVEL_NODEGROUP: [],
11061 locking.LEVEL_NODE: [],
11064 # Determine nodes (via group) optimistically, needs verification once locks
11065 # have been acquired
11066 self.lock_nodes = self._DetermineNodes()
11068 def _DetermineNodes(self):
11069 """Gets the list of nodes to operate on.
11072 if self.op.remote_node is None:
11073 # Iallocator will choose any node(s) in the same group
11074 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11076 group_nodes = frozenset([self.op.remote_node])
11078 # Determine nodes to be locked
11079 return set([self.op.node_name]) | group_nodes
11081 def _DetermineInstances(self):
11082 """Builds list of instances to operate on.
11085 assert self.op.mode in constants.NODE_EVAC_MODES
11087 if self.op.mode == constants.NODE_EVAC_PRI:
11088 # Primary instances only
11089 inst_fn = _GetNodePrimaryInstances
11090 assert self.op.remote_node is None, \
11091 "Evacuating primary instances requires iallocator"
11092 elif self.op.mode == constants.NODE_EVAC_SEC:
11093 # Secondary instances only
11094 inst_fn = _GetNodeSecondaryInstances
11097 assert self.op.mode == constants.NODE_EVAC_ALL
11098 inst_fn = _GetNodeInstances
11099 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11101 raise errors.OpPrereqError("Due to an issue with the iallocator"
11102 " interface it is not possible to evacuate"
11103 " all instances at once; specify explicitly"
11104 " whether to evacuate primary or secondary"
11106 errors.ECODE_INVAL)
11108 return inst_fn(self.cfg, self.op.node_name)
11110 def DeclareLocks(self, level):
11111 if level == locking.LEVEL_INSTANCE:
11112 # Lock instances optimistically, needs verification once node and group
11113 # locks have been acquired
11114 self.needed_locks[locking.LEVEL_INSTANCE] = \
11115 set(i.name for i in self._DetermineInstances())
11117 elif level == locking.LEVEL_NODEGROUP:
11118 # Lock node groups for all potential target nodes optimistically, needs
11119 # verification once nodes have been acquired
11120 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11121 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11123 elif level == locking.LEVEL_NODE:
11124 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11126 def CheckPrereq(self):
11128 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11129 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11130 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11132 need_nodes = self._DetermineNodes()
11134 if not owned_nodes.issuperset(need_nodes):
11135 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11136 " locks were acquired, current nodes are"
11137 " are '%s', used to be '%s'; retry the"
11139 (self.op.node_name,
11140 utils.CommaJoin(need_nodes),
11141 utils.CommaJoin(owned_nodes)),
11142 errors.ECODE_STATE)
11144 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11145 if owned_groups != wanted_groups:
11146 raise errors.OpExecError("Node groups changed since locks were acquired,"
11147 " current groups are '%s', used to be '%s';"
11148 " retry the operation" %
11149 (utils.CommaJoin(wanted_groups),
11150 utils.CommaJoin(owned_groups)))
11152 # Determine affected instances
11153 self.instances = self._DetermineInstances()
11154 self.instance_names = [i.name for i in self.instances]
11156 if set(self.instance_names) != owned_instances:
11157 raise errors.OpExecError("Instances on node '%s' changed since locks"
11158 " were acquired, current instances are '%s',"
11159 " used to be '%s'; retry the operation" %
11160 (self.op.node_name,
11161 utils.CommaJoin(self.instance_names),
11162 utils.CommaJoin(owned_instances)))
11164 if self.instance_names:
11165 self.LogInfo("Evacuating instances from node '%s': %s",
11167 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11169 self.LogInfo("No instances to evacuate from node '%s'",
11172 if self.op.remote_node is not None:
11173 for i in self.instances:
11174 if i.primary_node == self.op.remote_node:
11175 raise errors.OpPrereqError("Node %s is the primary node of"
11176 " instance %s, cannot use it as"
11178 (self.op.remote_node, i.name),
11179 errors.ECODE_INVAL)
11181 def Exec(self, feedback_fn):
11182 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11184 if not self.instance_names:
11185 # No instances to evacuate
11188 elif self.op.iallocator is not None:
11189 # TODO: Implement relocation to other group
11190 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11191 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11192 instances=list(self.instance_names))
11194 ial.Run(self.op.iallocator)
11196 if not ial.success:
11197 raise errors.OpPrereqError("Can't compute node evacuation using"
11198 " iallocator '%s': %s" %
11199 (self.op.iallocator, ial.info),
11200 errors.ECODE_NORES)
11202 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11204 elif self.op.remote_node is not None:
11205 assert self.op.mode == constants.NODE_EVAC_SEC
11207 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11208 remote_node=self.op.remote_node,
11210 mode=constants.REPLACE_DISK_CHG,
11211 early_release=self.op.early_release)]
11212 for instance_name in self.instance_names
11216 raise errors.ProgrammerError("No iallocator or remote node")
11218 return ResultWithJobs(jobs)
11221 def _SetOpEarlyRelease(early_release, op):
11222 """Sets C{early_release} flag on opcodes if available.
11226 op.early_release = early_release
11227 except AttributeError:
11228 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11233 def _NodeEvacDest(use_nodes, group, nodes):
11234 """Returns group or nodes depending on caller's choice.
11238 return utils.CommaJoin(nodes)
11243 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11244 """Unpacks the result of change-group and node-evacuate iallocator requests.
11246 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11247 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11249 @type lu: L{LogicalUnit}
11250 @param lu: Logical unit instance
11251 @type alloc_result: tuple/list
11252 @param alloc_result: Result from iallocator
11253 @type early_release: bool
11254 @param early_release: Whether to release locks early if possible
11255 @type use_nodes: bool
11256 @param use_nodes: Whether to display node names instead of groups
11259 (moved, failed, jobs) = alloc_result
11262 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11263 for (name, reason) in failed)
11264 lu.LogWarning("Unable to evacuate instances %s", failreason)
11265 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11268 lu.LogInfo("Instances to be moved: %s",
11269 utils.CommaJoin("%s (to %s)" %
11270 (name, _NodeEvacDest(use_nodes, group, nodes))
11271 for (name, group, nodes) in moved))
11273 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11274 map(opcodes.OpCode.LoadOpCode, ops))
11278 class LUInstanceGrowDisk(LogicalUnit):
11279 """Grow a disk of an instance.
11282 HPATH = "disk-grow"
11283 HTYPE = constants.HTYPE_INSTANCE
11286 def ExpandNames(self):
11287 self._ExpandAndLockInstance()
11288 self.needed_locks[locking.LEVEL_NODE] = []
11289 self.needed_locks[locking.LEVEL_NODE_RES] = []
11290 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11291 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11293 def DeclareLocks(self, level):
11294 if level == locking.LEVEL_NODE:
11295 self._LockInstancesNodes()
11296 elif level == locking.LEVEL_NODE_RES:
11298 self.needed_locks[locking.LEVEL_NODE_RES] = \
11299 self.needed_locks[locking.LEVEL_NODE][:]
11301 def BuildHooksEnv(self):
11302 """Build hooks env.
11304 This runs on the master, the primary and all the secondaries.
11308 "DISK": self.op.disk,
11309 "AMOUNT": self.op.amount,
11311 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11314 def BuildHooksNodes(self):
11315 """Build hooks nodes.
11318 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11321 def CheckPrereq(self):
11322 """Check prerequisites.
11324 This checks that the instance is in the cluster.
11327 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11328 assert instance is not None, \
11329 "Cannot retrieve locked instance %s" % self.op.instance_name
11330 nodenames = list(instance.all_nodes)
11331 for node in nodenames:
11332 _CheckNodeOnline(self, node)
11334 self.instance = instance
11336 if instance.disk_template not in constants.DTS_GROWABLE:
11337 raise errors.OpPrereqError("Instance's disk layout does not support"
11338 " growing", errors.ECODE_INVAL)
11340 self.disk = instance.FindDisk(self.op.disk)
11342 if instance.disk_template not in (constants.DT_FILE,
11343 constants.DT_SHARED_FILE):
11344 # TODO: check the free disk space for file, when that feature will be
11346 _CheckNodesFreeDiskPerVG(self, nodenames,
11347 self.disk.ComputeGrowth(self.op.amount))
11349 def Exec(self, feedback_fn):
11350 """Execute disk grow.
11353 instance = self.instance
11356 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11357 assert (self.owned_locks(locking.LEVEL_NODE) ==
11358 self.owned_locks(locking.LEVEL_NODE_RES))
11360 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11362 raise errors.OpExecError("Cannot activate block device to grow")
11364 feedback_fn("Growing disk %s of instance '%s' by %s" %
11365 (self.op.disk, instance.name,
11366 utils.FormatUnit(self.op.amount, "h")))
11368 # First run all grow ops in dry-run mode
11369 for node in instance.all_nodes:
11370 self.cfg.SetDiskID(disk, node)
11371 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11372 result.Raise("Grow request failed to node %s" % node)
11374 # We know that (as far as we can test) operations across different
11375 # nodes will succeed, time to run it for real
11376 for node in instance.all_nodes:
11377 self.cfg.SetDiskID(disk, node)
11378 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11379 result.Raise("Grow request failed to node %s" % node)
11381 # TODO: Rewrite code to work properly
11382 # DRBD goes into sync mode for a short amount of time after executing the
11383 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11384 # calling "resize" in sync mode fails. Sleeping for a short amount of
11385 # time is a work-around.
11388 disk.RecordGrow(self.op.amount)
11389 self.cfg.Update(instance, feedback_fn)
11391 # Changes have been recorded, release node lock
11392 _ReleaseLocks(self, locking.LEVEL_NODE)
11394 # Downgrade lock while waiting for sync
11395 self.glm.downgrade(locking.LEVEL_INSTANCE)
11397 if self.op.wait_for_sync:
11398 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11400 self.proc.LogWarning("Disk sync-ing has not returned a good"
11401 " status; please check the instance")
11402 if instance.admin_state != constants.ADMINST_UP:
11403 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11404 elif instance.admin_state != constants.ADMINST_UP:
11405 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11406 " not supposed to be running because no wait for"
11407 " sync mode was requested")
11409 assert self.owned_locks(locking.LEVEL_NODE_RES)
11410 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11413 class LUInstanceQueryData(NoHooksLU):
11414 """Query runtime instance data.
11419 def ExpandNames(self):
11420 self.needed_locks = {}
11422 # Use locking if requested or when non-static information is wanted
11423 if not (self.op.static or self.op.use_locking):
11424 self.LogWarning("Non-static data requested, locks need to be acquired")
11425 self.op.use_locking = True
11427 if self.op.instances or not self.op.use_locking:
11428 # Expand instance names right here
11429 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11431 # Will use acquired locks
11432 self.wanted_names = None
11434 if self.op.use_locking:
11435 self.share_locks = _ShareAll()
11437 if self.wanted_names is None:
11438 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11440 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11442 self.needed_locks[locking.LEVEL_NODE] = []
11443 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11445 def DeclareLocks(self, level):
11446 if self.op.use_locking and level == locking.LEVEL_NODE:
11447 self._LockInstancesNodes()
11449 def CheckPrereq(self):
11450 """Check prerequisites.
11452 This only checks the optional instance list against the existing names.
11455 if self.wanted_names is None:
11456 assert self.op.use_locking, "Locking was not used"
11457 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11459 self.wanted_instances = \
11460 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11462 def _ComputeBlockdevStatus(self, node, instance_name, dev):
11463 """Returns the status of a block device
11466 if self.op.static or not node:
11469 self.cfg.SetDiskID(dev, node)
11471 result = self.rpc.call_blockdev_find(node, dev)
11475 result.Raise("Can't compute disk status for %s" % instance_name)
11477 status = result.payload
11481 return (status.dev_path, status.major, status.minor,
11482 status.sync_percent, status.estimated_time,
11483 status.is_degraded, status.ldisk_status)
11485 def _ComputeDiskStatus(self, instance, snode, dev):
11486 """Compute block device status.
11489 if dev.dev_type in constants.LDS_DRBD:
11490 # we change the snode then (otherwise we use the one passed in)
11491 if dev.logical_id[0] == instance.primary_node:
11492 snode = dev.logical_id[1]
11494 snode = dev.logical_id[0]
11496 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11497 instance.name, dev)
11498 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11501 dev_children = map(compat.partial(self._ComputeDiskStatus,
11508 "iv_name": dev.iv_name,
11509 "dev_type": dev.dev_type,
11510 "logical_id": dev.logical_id,
11511 "physical_id": dev.physical_id,
11512 "pstatus": dev_pstatus,
11513 "sstatus": dev_sstatus,
11514 "children": dev_children,
11519 def Exec(self, feedback_fn):
11520 """Gather and return data"""
11523 cluster = self.cfg.GetClusterInfo()
11525 pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11526 for i in self.wanted_instances)
11527 for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11528 if self.op.static or pnode.offline:
11529 remote_state = None
11531 self.LogWarning("Primary node %s is marked offline, returning static"
11532 " information only for instance %s" %
11533 (pnode.name, instance.name))
11535 remote_info = self.rpc.call_instance_info(instance.primary_node,
11537 instance.hypervisor)
11538 remote_info.Raise("Error checking node %s" % instance.primary_node)
11539 remote_info = remote_info.payload
11540 if remote_info and "state" in remote_info:
11541 remote_state = "up"
11543 if instance.admin_state == constants.ADMINST_UP:
11544 remote_state = "down"
11546 remote_state = instance.admin_state
11548 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11551 result[instance.name] = {
11552 "name": instance.name,
11553 "config_state": instance.admin_state,
11554 "run_state": remote_state,
11555 "pnode": instance.primary_node,
11556 "snodes": instance.secondary_nodes,
11558 # this happens to be the same format used for hooks
11559 "nics": _NICListToTuple(self, instance.nics),
11560 "disk_template": instance.disk_template,
11562 "hypervisor": instance.hypervisor,
11563 "network_port": instance.network_port,
11564 "hv_instance": instance.hvparams,
11565 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11566 "be_instance": instance.beparams,
11567 "be_actual": cluster.FillBE(instance),
11568 "os_instance": instance.osparams,
11569 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11570 "serial_no": instance.serial_no,
11571 "mtime": instance.mtime,
11572 "ctime": instance.ctime,
11573 "uuid": instance.uuid,
11579 class LUInstanceSetParams(LogicalUnit):
11580 """Modifies an instances's parameters.
11583 HPATH = "instance-modify"
11584 HTYPE = constants.HTYPE_INSTANCE
11587 def CheckArguments(self):
11588 if not (self.op.nics or self.op.disks or self.op.disk_template or
11589 self.op.hvparams or self.op.beparams or self.op.os_name or
11590 self.op.online_inst or self.op.offline_inst):
11591 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
11593 if self.op.hvparams:
11594 _CheckGlobalHvParams(self.op.hvparams)
11598 for disk_op, disk_dict in self.op.disks:
11599 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
11600 if disk_op == constants.DDM_REMOVE:
11601 disk_addremove += 1
11603 elif disk_op == constants.DDM_ADD:
11604 disk_addremove += 1
11606 if not isinstance(disk_op, int):
11607 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11608 if not isinstance(disk_dict, dict):
11609 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11610 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11612 if disk_op == constants.DDM_ADD:
11613 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11614 if mode not in constants.DISK_ACCESS_SET:
11615 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11616 errors.ECODE_INVAL)
11617 size = disk_dict.get(constants.IDISK_SIZE, None)
11619 raise errors.OpPrereqError("Required disk parameter size missing",
11620 errors.ECODE_INVAL)
11623 except (TypeError, ValueError), err:
11624 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11625 str(err), errors.ECODE_INVAL)
11626 disk_dict[constants.IDISK_SIZE] = size
11628 # modification of disk
11629 if constants.IDISK_SIZE in disk_dict:
11630 raise errors.OpPrereqError("Disk size change not possible, use"
11631 " grow-disk", errors.ECODE_INVAL)
11633 if disk_addremove > 1:
11634 raise errors.OpPrereqError("Only one disk add or remove operation"
11635 " supported at a time", errors.ECODE_INVAL)
11637 if self.op.disks and self.op.disk_template is not None:
11638 raise errors.OpPrereqError("Disk template conversion and other disk"
11639 " changes not supported at the same time",
11640 errors.ECODE_INVAL)
11642 if (self.op.disk_template and
11643 self.op.disk_template in constants.DTS_INT_MIRROR and
11644 self.op.remote_node is None):
11645 raise errors.OpPrereqError("Changing the disk template to a mirrored"
11646 " one requires specifying a secondary node",
11647 errors.ECODE_INVAL)
11651 for nic_op, nic_dict in self.op.nics:
11652 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11653 if nic_op == constants.DDM_REMOVE:
11656 elif nic_op == constants.DDM_ADD:
11659 if not isinstance(nic_op, int):
11660 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11661 if not isinstance(nic_dict, dict):
11662 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11663 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11665 # nic_dict should be a dict
11666 nic_ip = nic_dict.get(constants.INIC_IP, None)
11667 if nic_ip is not None:
11668 if nic_ip.lower() == constants.VALUE_NONE:
11669 nic_dict[constants.INIC_IP] = None
11671 if not netutils.IPAddress.IsValid(nic_ip):
11672 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11673 errors.ECODE_INVAL)
11675 nic_bridge = nic_dict.get("bridge", None)
11676 nic_link = nic_dict.get(constants.INIC_LINK, None)
11677 if nic_bridge and nic_link:
11678 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11679 " at the same time", errors.ECODE_INVAL)
11680 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11681 nic_dict["bridge"] = None
11682 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11683 nic_dict[constants.INIC_LINK] = None
11685 if nic_op == constants.DDM_ADD:
11686 nic_mac = nic_dict.get(constants.INIC_MAC, None)
11687 if nic_mac is None:
11688 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11690 if constants.INIC_MAC in nic_dict:
11691 nic_mac = nic_dict[constants.INIC_MAC]
11692 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11693 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11695 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11696 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11697 " modifying an existing nic",
11698 errors.ECODE_INVAL)
11700 if nic_addremove > 1:
11701 raise errors.OpPrereqError("Only one NIC add or remove operation"
11702 " supported at a time", errors.ECODE_INVAL)
11704 def ExpandNames(self):
11705 self._ExpandAndLockInstance()
11706 # Can't even acquire node locks in shared mode as upcoming changes in
11707 # Ganeti 2.6 will start to modify the node object on disk conversion
11708 self.needed_locks[locking.LEVEL_NODE] = []
11709 self.needed_locks[locking.LEVEL_NODE_RES] = []
11710 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11712 def DeclareLocks(self, level):
11713 if level == locking.LEVEL_NODE:
11714 self._LockInstancesNodes()
11715 if self.op.disk_template and self.op.remote_node:
11716 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11717 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11718 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11720 self.needed_locks[locking.LEVEL_NODE_RES] = \
11721 self.needed_locks[locking.LEVEL_NODE][:]
11723 def BuildHooksEnv(self):
11724 """Build hooks env.
11726 This runs on the master, primary and secondaries.
11730 if constants.BE_MINMEM in self.be_new:
11731 args["minmem"] = self.be_new[constants.BE_MINMEM]
11732 if constants.BE_MAXMEM in self.be_new:
11733 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
11734 if constants.BE_VCPUS in self.be_new:
11735 args["vcpus"] = self.be_new[constants.BE_VCPUS]
11736 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11737 # information at all.
11740 nic_override = dict(self.op.nics)
11741 for idx, nic in enumerate(self.instance.nics):
11742 if idx in nic_override:
11743 this_nic_override = nic_override[idx]
11745 this_nic_override = {}
11746 if constants.INIC_IP in this_nic_override:
11747 ip = this_nic_override[constants.INIC_IP]
11750 if constants.INIC_MAC in this_nic_override:
11751 mac = this_nic_override[constants.INIC_MAC]
11754 if idx in self.nic_pnew:
11755 nicparams = self.nic_pnew[idx]
11757 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11758 mode = nicparams[constants.NIC_MODE]
11759 link = nicparams[constants.NIC_LINK]
11760 args["nics"].append((ip, mac, mode, link))
11761 if constants.DDM_ADD in nic_override:
11762 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11763 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11764 nicparams = self.nic_pnew[constants.DDM_ADD]
11765 mode = nicparams[constants.NIC_MODE]
11766 link = nicparams[constants.NIC_LINK]
11767 args["nics"].append((ip, mac, mode, link))
11768 elif constants.DDM_REMOVE in nic_override:
11769 del args["nics"][-1]
11771 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11772 if self.op.disk_template:
11773 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11777 def BuildHooksNodes(self):
11778 """Build hooks nodes.
11781 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11784 def CheckPrereq(self):
11785 """Check prerequisites.
11787 This only checks the instance list against the existing names.
11790 # checking the new params on the primary/secondary nodes
11792 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11793 cluster = self.cluster = self.cfg.GetClusterInfo()
11794 assert self.instance is not None, \
11795 "Cannot retrieve locked instance %s" % self.op.instance_name
11796 pnode = instance.primary_node
11797 nodelist = list(instance.all_nodes)
11798 pnode_info = self.cfg.GetNodeInfo(pnode)
11799 self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
11802 if self.op.os_name and not self.op.force:
11803 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11804 self.op.force_variant)
11805 instance_os = self.op.os_name
11807 instance_os = instance.os
11809 if self.op.disk_template:
11810 if instance.disk_template == self.op.disk_template:
11811 raise errors.OpPrereqError("Instance already has disk template %s" %
11812 instance.disk_template, errors.ECODE_INVAL)
11814 if (instance.disk_template,
11815 self.op.disk_template) not in self._DISK_CONVERSIONS:
11816 raise errors.OpPrereqError("Unsupported disk template conversion from"
11817 " %s to %s" % (instance.disk_template,
11818 self.op.disk_template),
11819 errors.ECODE_INVAL)
11820 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11821 msg="cannot change disk template")
11822 if self.op.disk_template in constants.DTS_INT_MIRROR:
11823 if self.op.remote_node == pnode:
11824 raise errors.OpPrereqError("Given new secondary node %s is the same"
11825 " as the primary node of the instance" %
11826 self.op.remote_node, errors.ECODE_STATE)
11827 _CheckNodeOnline(self, self.op.remote_node)
11828 _CheckNodeNotDrained(self, self.op.remote_node)
11829 # FIXME: here we assume that the old instance type is DT_PLAIN
11830 assert instance.disk_template == constants.DT_PLAIN
11831 disks = [{constants.IDISK_SIZE: d.size,
11832 constants.IDISK_VG: d.logical_id[0]}
11833 for d in instance.disks]
11834 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11835 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11837 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
11838 snode_group = self.cfg.GetNodeGroup(snode_info.group)
11839 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
11840 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
11841 ignore=self.op.ignore_ipolicy)
11842 if pnode_info.group != snode_info.group:
11843 self.LogWarning("The primary and secondary nodes are in two"
11844 " different node groups; the disk parameters"
11845 " from the first disk's node group will be"
11848 # hvparams processing
11849 if self.op.hvparams:
11850 hv_type = instance.hypervisor
11851 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11852 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11853 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11856 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11857 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11858 self.hv_proposed = self.hv_new = hv_new # the new actual values
11859 self.hv_inst = i_hvdict # the new dict (without defaults)
11861 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11863 self.hv_new = self.hv_inst = {}
11865 # beparams processing
11866 if self.op.beparams:
11867 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11869 objects.UpgradeBeParams(i_bedict)
11870 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11871 be_new = cluster.SimpleFillBE(i_bedict)
11872 self.be_proposed = self.be_new = be_new # the new actual values
11873 self.be_inst = i_bedict # the new dict (without defaults)
11875 self.be_new = self.be_inst = {}
11876 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11877 be_old = cluster.FillBE(instance)
11879 # CPU param validation -- checking every time a paramtere is
11880 # changed to cover all cases where either CPU mask or vcpus have
11882 if (constants.BE_VCPUS in self.be_proposed and
11883 constants.HV_CPU_MASK in self.hv_proposed):
11885 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11886 # Verify mask is consistent with number of vCPUs. Can skip this
11887 # test if only 1 entry in the CPU mask, which means same mask
11888 # is applied to all vCPUs.
11889 if (len(cpu_list) > 1 and
11890 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11891 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11893 (self.be_proposed[constants.BE_VCPUS],
11894 self.hv_proposed[constants.HV_CPU_MASK]),
11895 errors.ECODE_INVAL)
11897 # Only perform this test if a new CPU mask is given
11898 if constants.HV_CPU_MASK in self.hv_new:
11899 # Calculate the largest CPU number requested
11900 max_requested_cpu = max(map(max, cpu_list))
11901 # Check that all of the instance's nodes have enough physical CPUs to
11902 # satisfy the requested CPU mask
11903 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11904 max_requested_cpu + 1, instance.hypervisor)
11906 # osparams processing
11907 if self.op.osparams:
11908 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11909 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11910 self.os_inst = i_osdict # the new dict (without defaults)
11916 #TODO(dynmem): do the appropriate check involving MINMEM
11917 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
11918 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
11919 mem_check_list = [pnode]
11920 if be_new[constants.BE_AUTO_BALANCE]:
11921 # either we changed auto_balance to yes or it was from before
11922 mem_check_list.extend(instance.secondary_nodes)
11923 instance_info = self.rpc.call_instance_info(pnode, instance.name,
11924 instance.hypervisor)
11925 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11926 [instance.hypervisor])
11927 pninfo = nodeinfo[pnode]
11928 msg = pninfo.fail_msg
11930 # Assume the primary node is unreachable and go ahead
11931 self.warn.append("Can't get info from primary node %s: %s" %
11934 (_, _, (pnhvinfo, )) = pninfo.payload
11935 if not isinstance(pnhvinfo.get("memory_free", None), int):
11936 self.warn.append("Node data from primary node %s doesn't contain"
11937 " free memory information" % pnode)
11938 elif instance_info.fail_msg:
11939 self.warn.append("Can't get instance runtime information: %s" %
11940 instance_info.fail_msg)
11942 if instance_info.payload:
11943 current_mem = int(instance_info.payload["memory"])
11945 # Assume instance not running
11946 # (there is a slight race condition here, but it's not very
11947 # probable, and we have no other way to check)
11948 # TODO: Describe race condition
11950 #TODO(dynmem): do the appropriate check involving MINMEM
11951 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
11952 pnhvinfo["memory_free"])
11954 raise errors.OpPrereqError("This change will prevent the instance"
11955 " from starting, due to %d MB of memory"
11956 " missing on its primary node" %
11958 errors.ECODE_NORES)
11960 if be_new[constants.BE_AUTO_BALANCE]:
11961 for node, nres in nodeinfo.items():
11962 if node not in instance.secondary_nodes:
11964 nres.Raise("Can't get info from secondary node %s" % node,
11965 prereq=True, ecode=errors.ECODE_STATE)
11966 (_, _, (nhvinfo, )) = nres.payload
11967 if not isinstance(nhvinfo.get("memory_free", None), int):
11968 raise errors.OpPrereqError("Secondary node %s didn't return free"
11969 " memory information" % node,
11970 errors.ECODE_STATE)
11971 #TODO(dynmem): do the appropriate check involving MINMEM
11972 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
11973 raise errors.OpPrereqError("This change will prevent the instance"
11974 " from failover to its secondary node"
11975 " %s, due to not enough memory" % node,
11976 errors.ECODE_STATE)
11980 self.nic_pinst = {}
11981 for nic_op, nic_dict in self.op.nics:
11982 if nic_op == constants.DDM_REMOVE:
11983 if not instance.nics:
11984 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11985 errors.ECODE_INVAL)
11987 if nic_op != constants.DDM_ADD:
11989 if not instance.nics:
11990 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11991 " no NICs" % nic_op,
11992 errors.ECODE_INVAL)
11993 if nic_op < 0 or nic_op >= len(instance.nics):
11994 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11996 (nic_op, len(instance.nics) - 1),
11997 errors.ECODE_INVAL)
11998 old_nic_params = instance.nics[nic_op].nicparams
11999 old_nic_ip = instance.nics[nic_op].ip
12001 old_nic_params = {}
12004 update_params_dict = dict([(key, nic_dict[key])
12005 for key in constants.NICS_PARAMETERS
12006 if key in nic_dict])
12008 if "bridge" in nic_dict:
12009 update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
12011 new_nic_params = _GetUpdatedParams(old_nic_params,
12012 update_params_dict)
12013 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
12014 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
12015 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
12016 self.nic_pinst[nic_op] = new_nic_params
12017 self.nic_pnew[nic_op] = new_filled_nic_params
12018 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
12020 if new_nic_mode == constants.NIC_MODE_BRIDGED:
12021 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
12022 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
12024 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
12026 self.warn.append(msg)
12028 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12029 if new_nic_mode == constants.NIC_MODE_ROUTED:
12030 if constants.INIC_IP in nic_dict:
12031 nic_ip = nic_dict[constants.INIC_IP]
12033 nic_ip = old_nic_ip
12035 raise errors.OpPrereqError("Cannot set the nic ip to None"
12036 " on a routed nic", errors.ECODE_INVAL)
12037 if constants.INIC_MAC in nic_dict:
12038 nic_mac = nic_dict[constants.INIC_MAC]
12039 if nic_mac is None:
12040 raise errors.OpPrereqError("Cannot set the nic mac to None",
12041 errors.ECODE_INVAL)
12042 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12043 # otherwise generate the mac
12044 nic_dict[constants.INIC_MAC] = \
12045 self.cfg.GenerateMAC(self.proc.GetECId())
12047 # or validate/reserve the current one
12049 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
12050 except errors.ReservationError:
12051 raise errors.OpPrereqError("MAC address %s already in use"
12052 " in cluster" % nic_mac,
12053 errors.ECODE_NOTUNIQUE)
12056 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12057 raise errors.OpPrereqError("Disk operations not supported for"
12058 " diskless instances",
12059 errors.ECODE_INVAL)
12060 for disk_op, _ in self.op.disks:
12061 if disk_op == constants.DDM_REMOVE:
12062 if len(instance.disks) == 1:
12063 raise errors.OpPrereqError("Cannot remove the last disk of"
12064 " an instance", errors.ECODE_INVAL)
12065 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12066 msg="cannot remove disks")
12068 if (disk_op == constants.DDM_ADD and
12069 len(instance.disks) >= constants.MAX_DISKS):
12070 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
12071 " add more" % constants.MAX_DISKS,
12072 errors.ECODE_STATE)
12073 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
12075 if disk_op < 0 or disk_op >= len(instance.disks):
12076 raise errors.OpPrereqError("Invalid disk index %s, valid values"
12078 (disk_op, len(instance.disks)),
12079 errors.ECODE_INVAL)
12081 # disabling the instance
12082 if self.op.offline_inst:
12083 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12084 msg="cannot change instance state to offline")
12086 # enabling the instance
12087 if self.op.online_inst:
12088 _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
12089 msg="cannot make instance go online")
12091 def _ConvertPlainToDrbd(self, feedback_fn):
12092 """Converts an instance from plain to drbd.
12095 feedback_fn("Converting template to drbd")
12096 instance = self.instance
12097 pnode = instance.primary_node
12098 snode = self.op.remote_node
12100 assert instance.disk_template == constants.DT_PLAIN
12102 # create a fake disk info for _GenerateDiskTemplate
12103 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12104 constants.IDISK_VG: d.logical_id[0]}
12105 for d in instance.disks]
12106 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12107 instance.name, pnode, [snode],
12108 disk_info, None, None, 0, feedback_fn,
12110 info = _GetInstanceInfoText(instance)
12111 feedback_fn("Creating aditional volumes...")
12112 # first, create the missing data and meta devices
12113 for disk in new_disks:
12114 # unfortunately this is... not too nice
12115 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12117 for child in disk.children:
12118 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12119 # at this stage, all new LVs have been created, we can rename the
12121 feedback_fn("Renaming original volumes...")
12122 rename_list = [(o, n.children[0].logical_id)
12123 for (o, n) in zip(instance.disks, new_disks)]
12124 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12125 result.Raise("Failed to rename original LVs")
12127 feedback_fn("Initializing DRBD devices...")
12128 # all child devices are in place, we can now create the DRBD devices
12129 for disk in new_disks:
12130 for node in [pnode, snode]:
12131 f_create = node == pnode
12132 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12134 # at this point, the instance has been modified
12135 instance.disk_template = constants.DT_DRBD8
12136 instance.disks = new_disks
12137 self.cfg.Update(instance, feedback_fn)
12139 # Release node locks while waiting for sync
12140 _ReleaseLocks(self, locking.LEVEL_NODE)
12142 # disks are created, waiting for sync
12143 disk_abort = not _WaitForSync(self, instance,
12144 oneshot=not self.op.wait_for_sync)
12146 raise errors.OpExecError("There are some degraded disks for"
12147 " this instance, please cleanup manually")
12149 # Node resource locks will be released by caller
12151 def _ConvertDrbdToPlain(self, feedback_fn):
12152 """Converts an instance from drbd to plain.
12155 instance = self.instance
12157 assert len(instance.secondary_nodes) == 1
12158 assert instance.disk_template == constants.DT_DRBD8
12160 pnode = instance.primary_node
12161 snode = instance.secondary_nodes[0]
12162 feedback_fn("Converting template to plain")
12164 old_disks = instance.disks
12165 new_disks = [d.children[0] for d in old_disks]
12167 # copy over size and mode
12168 for parent, child in zip(old_disks, new_disks):
12169 child.size = parent.size
12170 child.mode = parent.mode
12172 # update instance structure
12173 instance.disks = new_disks
12174 instance.disk_template = constants.DT_PLAIN
12175 self.cfg.Update(instance, feedback_fn)
12177 # Release locks in case removing disks takes a while
12178 _ReleaseLocks(self, locking.LEVEL_NODE)
12180 feedback_fn("Removing volumes on the secondary node...")
12181 for disk in old_disks:
12182 self.cfg.SetDiskID(disk, snode)
12183 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12185 self.LogWarning("Could not remove block device %s on node %s,"
12186 " continuing anyway: %s", disk.iv_name, snode, msg)
12188 feedback_fn("Removing unneeded volumes on the primary node...")
12189 for idx, disk in enumerate(old_disks):
12190 meta = disk.children[1]
12191 self.cfg.SetDiskID(meta, pnode)
12192 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12194 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12195 " continuing anyway: %s", idx, pnode, msg)
12197 # this is a DRBD disk, return its port to the pool
12198 for disk in old_disks:
12199 tcp_port = disk.logical_id[2]
12200 self.cfg.AddTcpUdpPort(tcp_port)
12202 # Node resource locks will be released by caller
12204 def Exec(self, feedback_fn):
12205 """Modifies an instance.
12207 All parameters take effect only at the next restart of the instance.
12210 # Process here the warnings from CheckPrereq, as we don't have a
12211 # feedback_fn there.
12212 for warn in self.warn:
12213 feedback_fn("WARNING: %s" % warn)
12215 assert ((self.op.disk_template is None) ^
12216 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12217 "Not owning any node resource locks"
12220 instance = self.instance
12222 for disk_op, disk_dict in self.op.disks:
12223 if disk_op == constants.DDM_REMOVE:
12224 # remove the last disk
12225 device = instance.disks.pop()
12226 device_idx = len(instance.disks)
12227 for node, disk in device.ComputeNodeTree(instance.primary_node):
12228 self.cfg.SetDiskID(disk, node)
12229 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12231 self.LogWarning("Could not remove disk/%d on node %s: %s,"
12232 " continuing anyway", device_idx, node, msg)
12233 result.append(("disk/%d" % device_idx, "remove"))
12235 # if this is a DRBD disk, return its port to the pool
12236 if device.dev_type in constants.LDS_DRBD:
12237 tcp_port = device.logical_id[2]
12238 self.cfg.AddTcpUdpPort(tcp_port)
12239 elif disk_op == constants.DDM_ADD:
12241 if instance.disk_template in (constants.DT_FILE,
12242 constants.DT_SHARED_FILE):
12243 file_driver, file_path = instance.disks[0].logical_id
12244 file_path = os.path.dirname(file_path)
12246 file_driver = file_path = None
12247 disk_idx_base = len(instance.disks)
12248 new_disk = _GenerateDiskTemplate(self,
12249 instance.disk_template,
12250 instance.name, instance.primary_node,
12251 instance.secondary_nodes,
12257 self.diskparams)[0]
12258 instance.disks.append(new_disk)
12259 info = _GetInstanceInfoText(instance)
12261 logging.info("Creating volume %s for instance %s",
12262 new_disk.iv_name, instance.name)
12263 # Note: this needs to be kept in sync with _CreateDisks
12265 for node in instance.all_nodes:
12266 f_create = node == instance.primary_node
12268 _CreateBlockDev(self, node, instance, new_disk,
12269 f_create, info, f_create)
12270 except errors.OpExecError, err:
12271 self.LogWarning("Failed to create volume %s (%s) on"
12273 new_disk.iv_name, new_disk, node, err)
12274 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
12275 (new_disk.size, new_disk.mode)))
12277 # change a given disk
12278 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
12279 result.append(("disk.mode/%d" % disk_op,
12280 disk_dict[constants.IDISK_MODE]))
12282 if self.op.disk_template:
12284 check_nodes = set(instance.all_nodes)
12285 if self.op.remote_node:
12286 check_nodes.add(self.op.remote_node)
12287 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12288 owned = self.owned_locks(level)
12289 assert not (check_nodes - owned), \
12290 ("Not owning the correct locks, owning %r, expected at least %r" %
12291 (owned, check_nodes))
12293 r_shut = _ShutdownInstanceDisks(self, instance)
12295 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12296 " proceed with disk template conversion")
12297 mode = (instance.disk_template, self.op.disk_template)
12299 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12301 self.cfg.ReleaseDRBDMinors(instance.name)
12303 result.append(("disk_template", self.op.disk_template))
12305 assert instance.disk_template == self.op.disk_template, \
12306 ("Expected disk template '%s', found '%s'" %
12307 (self.op.disk_template, instance.disk_template))
12309 # Release node and resource locks if there are any (they might already have
12310 # been released during disk conversion)
12311 _ReleaseLocks(self, locking.LEVEL_NODE)
12312 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12315 for nic_op, nic_dict in self.op.nics:
12316 if nic_op == constants.DDM_REMOVE:
12317 # remove the last nic
12318 del instance.nics[-1]
12319 result.append(("nic.%d" % len(instance.nics), "remove"))
12320 elif nic_op == constants.DDM_ADD:
12321 # mac and bridge should be set, by now
12322 mac = nic_dict[constants.INIC_MAC]
12323 ip = nic_dict.get(constants.INIC_IP, None)
12324 nicparams = self.nic_pinst[constants.DDM_ADD]
12325 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
12326 instance.nics.append(new_nic)
12327 result.append(("nic.%d" % (len(instance.nics) - 1),
12328 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12329 (new_nic.mac, new_nic.ip,
12330 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
12331 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
12334 for key in (constants.INIC_MAC, constants.INIC_IP):
12335 if key in nic_dict:
12336 setattr(instance.nics[nic_op], key, nic_dict[key])
12337 if nic_op in self.nic_pinst:
12338 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
12339 for key, val in nic_dict.iteritems():
12340 result.append(("nic.%s/%d" % (key, nic_op), val))
12343 if self.op.hvparams:
12344 instance.hvparams = self.hv_inst
12345 for key, val in self.op.hvparams.iteritems():
12346 result.append(("hv/%s" % key, val))
12349 if self.op.beparams:
12350 instance.beparams = self.be_inst
12351 for key, val in self.op.beparams.iteritems():
12352 result.append(("be/%s" % key, val))
12355 if self.op.os_name:
12356 instance.os = self.op.os_name
12359 if self.op.osparams:
12360 instance.osparams = self.os_inst
12361 for key, val in self.op.osparams.iteritems():
12362 result.append(("os/%s" % key, val))
12364 # online/offline instance
12365 if self.op.online_inst:
12366 self.cfg.MarkInstanceDown(instance.name)
12367 result.append(("admin_state", constants.ADMINST_DOWN))
12368 if self.op.offline_inst:
12369 self.cfg.MarkInstanceOffline(instance.name)
12370 result.append(("admin_state", constants.ADMINST_OFFLINE))
12372 self.cfg.Update(instance, feedback_fn)
12374 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12375 self.owned_locks(locking.LEVEL_NODE)), \
12376 "All node locks should have been released by now"
12380 _DISK_CONVERSIONS = {
12381 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12382 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12386 class LUInstanceChangeGroup(LogicalUnit):
12387 HPATH = "instance-change-group"
12388 HTYPE = constants.HTYPE_INSTANCE
12391 def ExpandNames(self):
12392 self.share_locks = _ShareAll()
12393 self.needed_locks = {
12394 locking.LEVEL_NODEGROUP: [],
12395 locking.LEVEL_NODE: [],
12398 self._ExpandAndLockInstance()
12400 if self.op.target_groups:
12401 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12402 self.op.target_groups)
12404 self.req_target_uuids = None
12406 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12408 def DeclareLocks(self, level):
12409 if level == locking.LEVEL_NODEGROUP:
12410 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12412 if self.req_target_uuids:
12413 lock_groups = set(self.req_target_uuids)
12415 # Lock all groups used by instance optimistically; this requires going
12416 # via the node before it's locked, requiring verification later on
12417 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12418 lock_groups.update(instance_groups)
12420 # No target groups, need to lock all of them
12421 lock_groups = locking.ALL_SET
12423 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12425 elif level == locking.LEVEL_NODE:
12426 if self.req_target_uuids:
12427 # Lock all nodes used by instances
12428 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12429 self._LockInstancesNodes()
12431 # Lock all nodes in all potential target groups
12432 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12433 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12434 member_nodes = [node_name
12435 for group in lock_groups
12436 for node_name in self.cfg.GetNodeGroup(group).members]
12437 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12439 # Lock all nodes as all groups are potential targets
12440 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12442 def CheckPrereq(self):
12443 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12444 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12445 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12447 assert (self.req_target_uuids is None or
12448 owned_groups.issuperset(self.req_target_uuids))
12449 assert owned_instances == set([self.op.instance_name])
12451 # Get instance information
12452 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12454 # Check if node groups for locked instance are still correct
12455 assert owned_nodes.issuperset(self.instance.all_nodes), \
12456 ("Instance %s's nodes changed while we kept the lock" %
12457 self.op.instance_name)
12459 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12462 if self.req_target_uuids:
12463 # User requested specific target groups
12464 self.target_uuids = self.req_target_uuids
12466 # All groups except those used by the instance are potential targets
12467 self.target_uuids = owned_groups - inst_groups
12469 conflicting_groups = self.target_uuids & inst_groups
12470 if conflicting_groups:
12471 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12472 " used by the instance '%s'" %
12473 (utils.CommaJoin(conflicting_groups),
12474 self.op.instance_name),
12475 errors.ECODE_INVAL)
12477 if not self.target_uuids:
12478 raise errors.OpPrereqError("There are no possible target groups",
12479 errors.ECODE_INVAL)
12481 def BuildHooksEnv(self):
12482 """Build hooks env.
12485 assert self.target_uuids
12488 "TARGET_GROUPS": " ".join(self.target_uuids),
12491 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12495 def BuildHooksNodes(self):
12496 """Build hooks nodes.
12499 mn = self.cfg.GetMasterNode()
12500 return ([mn], [mn])
12502 def Exec(self, feedback_fn):
12503 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12505 assert instances == [self.op.instance_name], "Instance not locked"
12507 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12508 instances=instances, target_groups=list(self.target_uuids))
12510 ial.Run(self.op.iallocator)
12512 if not ial.success:
12513 raise errors.OpPrereqError("Can't compute solution for changing group of"
12514 " instance '%s' using iallocator '%s': %s" %
12515 (self.op.instance_name, self.op.iallocator,
12517 errors.ECODE_NORES)
12519 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12521 self.LogInfo("Iallocator returned %s job(s) for changing group of"
12522 " instance '%s'", len(jobs), self.op.instance_name)
12524 return ResultWithJobs(jobs)
12527 class LUBackupQuery(NoHooksLU):
12528 """Query the exports list
12533 def ExpandNames(self):
12534 self.needed_locks = {}
12535 self.share_locks[locking.LEVEL_NODE] = 1
12536 if not self.op.nodes:
12537 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12539 self.needed_locks[locking.LEVEL_NODE] = \
12540 _GetWantedNodes(self, self.op.nodes)
12542 def Exec(self, feedback_fn):
12543 """Compute the list of all the exported system images.
12546 @return: a dictionary with the structure node->(export-list)
12547 where export-list is a list of the instances exported on
12551 self.nodes = self.owned_locks(locking.LEVEL_NODE)
12552 rpcresult = self.rpc.call_export_list(self.nodes)
12554 for node in rpcresult:
12555 if rpcresult[node].fail_msg:
12556 result[node] = False
12558 result[node] = rpcresult[node].payload
12563 class LUBackupPrepare(NoHooksLU):
12564 """Prepares an instance for an export and returns useful information.
12569 def ExpandNames(self):
12570 self._ExpandAndLockInstance()
12572 def CheckPrereq(self):
12573 """Check prerequisites.
12576 instance_name = self.op.instance_name
12578 self.instance = self.cfg.GetInstanceInfo(instance_name)
12579 assert self.instance is not None, \
12580 "Cannot retrieve locked instance %s" % self.op.instance_name
12581 _CheckNodeOnline(self, self.instance.primary_node)
12583 self._cds = _GetClusterDomainSecret()
12585 def Exec(self, feedback_fn):
12586 """Prepares an instance for an export.
12589 instance = self.instance
12591 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12592 salt = utils.GenerateSecret(8)
12594 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
12595 result = self.rpc.call_x509_cert_create(instance.primary_node,
12596 constants.RIE_CERT_VALIDITY)
12597 result.Raise("Can't create X509 key and certificate on %s" % result.node)
12599 (name, cert_pem) = result.payload
12601 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
12605 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
12606 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
12608 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
12614 class LUBackupExport(LogicalUnit):
12615 """Export an instance to an image in the cluster.
12618 HPATH = "instance-export"
12619 HTYPE = constants.HTYPE_INSTANCE
12622 def CheckArguments(self):
12623 """Check the arguments.
12626 self.x509_key_name = self.op.x509_key_name
12627 self.dest_x509_ca_pem = self.op.destination_x509_ca
12629 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12630 if not self.x509_key_name:
12631 raise errors.OpPrereqError("Missing X509 key name for encryption",
12632 errors.ECODE_INVAL)
12634 if not self.dest_x509_ca_pem:
12635 raise errors.OpPrereqError("Missing destination X509 CA",
12636 errors.ECODE_INVAL)
12638 def ExpandNames(self):
12639 self._ExpandAndLockInstance()
12641 # Lock all nodes for local exports
12642 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12643 # FIXME: lock only instance primary and destination node
12645 # Sad but true, for now we have do lock all nodes, as we don't know where
12646 # the previous export might be, and in this LU we search for it and
12647 # remove it from its current node. In the future we could fix this by:
12648 # - making a tasklet to search (share-lock all), then create the
12649 # new one, then one to remove, after
12650 # - removing the removal operation altogether
12651 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12653 def DeclareLocks(self, level):
12654 """Last minute lock declaration."""
12655 # All nodes are locked anyway, so nothing to do here.
12657 def BuildHooksEnv(self):
12658 """Build hooks env.
12660 This will run on the master, primary node and target node.
12664 "EXPORT_MODE": self.op.mode,
12665 "EXPORT_NODE": self.op.target_node,
12666 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12667 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12668 # TODO: Generic function for boolean env variables
12669 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12672 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12676 def BuildHooksNodes(self):
12677 """Build hooks nodes.
12680 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12682 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12683 nl.append(self.op.target_node)
12687 def CheckPrereq(self):
12688 """Check prerequisites.
12690 This checks that the instance and node names are valid.
12693 instance_name = self.op.instance_name
12695 self.instance = self.cfg.GetInstanceInfo(instance_name)
12696 assert self.instance is not None, \
12697 "Cannot retrieve locked instance %s" % self.op.instance_name
12698 _CheckNodeOnline(self, self.instance.primary_node)
12700 if (self.op.remove_instance and
12701 self.instance.admin_state == constants.ADMINST_UP and
12702 not self.op.shutdown):
12703 raise errors.OpPrereqError("Can not remove instance without shutting it"
12706 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12707 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12708 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12709 assert self.dst_node is not None
12711 _CheckNodeOnline(self, self.dst_node.name)
12712 _CheckNodeNotDrained(self, self.dst_node.name)
12715 self.dest_disk_info = None
12716 self.dest_x509_ca = None
12718 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12719 self.dst_node = None
12721 if len(self.op.target_node) != len(self.instance.disks):
12722 raise errors.OpPrereqError(("Received destination information for %s"
12723 " disks, but instance %s has %s disks") %
12724 (len(self.op.target_node), instance_name,
12725 len(self.instance.disks)),
12726 errors.ECODE_INVAL)
12728 cds = _GetClusterDomainSecret()
12730 # Check X509 key name
12732 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12733 except (TypeError, ValueError), err:
12734 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12736 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12737 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12738 errors.ECODE_INVAL)
12740 # Load and verify CA
12742 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
12743 except OpenSSL.crypto.Error, err:
12744 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
12745 (err, ), errors.ECODE_INVAL)
12747 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
12748 if errcode is not None:
12749 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
12750 (msg, ), errors.ECODE_INVAL)
12752 self.dest_x509_ca = cert
12754 # Verify target information
12756 for idx, disk_data in enumerate(self.op.target_node):
12758 (host, port, magic) = \
12759 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
12760 except errors.GenericError, err:
12761 raise errors.OpPrereqError("Target info for disk %s: %s" %
12762 (idx, err), errors.ECODE_INVAL)
12764 disk_info.append((host, port, magic))
12766 assert len(disk_info) == len(self.op.target_node)
12767 self.dest_disk_info = disk_info
12770 raise errors.ProgrammerError("Unhandled export mode %r" %
12773 # instance disk type verification
12774 # TODO: Implement export support for file-based disks
12775 for disk in self.instance.disks:
12776 if disk.dev_type == constants.LD_FILE:
12777 raise errors.OpPrereqError("Export not supported for instances with"
12778 " file-based disks", errors.ECODE_INVAL)
12780 def _CleanupExports(self, feedback_fn):
12781 """Removes exports of current instance from all other nodes.
12783 If an instance in a cluster with nodes A..D was exported to node C, its
12784 exports will be removed from the nodes A, B and D.
12787 assert self.op.mode != constants.EXPORT_MODE_REMOTE
12789 nodelist = self.cfg.GetNodeList()
12790 nodelist.remove(self.dst_node.name)
12792 # on one-node clusters nodelist will be empty after the removal
12793 # if we proceed the backup would be removed because OpBackupQuery
12794 # substitutes an empty list with the full cluster node list.
12795 iname = self.instance.name
12797 feedback_fn("Removing old exports for instance %s" % iname)
12798 exportlist = self.rpc.call_export_list(nodelist)
12799 for node in exportlist:
12800 if exportlist[node].fail_msg:
12802 if iname in exportlist[node].payload:
12803 msg = self.rpc.call_export_remove(node, iname).fail_msg
12805 self.LogWarning("Could not remove older export for instance %s"
12806 " on node %s: %s", iname, node, msg)
12808 def Exec(self, feedback_fn):
12809 """Export an instance to an image in the cluster.
12812 assert self.op.mode in constants.EXPORT_MODES
12814 instance = self.instance
12815 src_node = instance.primary_node
12817 if self.op.shutdown:
12818 # shutdown the instance, but not the disks
12819 feedback_fn("Shutting down instance %s" % instance.name)
12820 result = self.rpc.call_instance_shutdown(src_node, instance,
12821 self.op.shutdown_timeout)
12822 # TODO: Maybe ignore failures if ignore_remove_failures is set
12823 result.Raise("Could not shutdown instance %s on"
12824 " node %s" % (instance.name, src_node))
12826 # set the disks ID correctly since call_instance_start needs the
12827 # correct drbd minor to create the symlinks
12828 for disk in instance.disks:
12829 self.cfg.SetDiskID(disk, src_node)
12831 activate_disks = (instance.admin_state != constants.ADMINST_UP)
12834 # Activate the instance disks if we'exporting a stopped instance
12835 feedback_fn("Activating disks for %s" % instance.name)
12836 _StartInstanceDisks(self, instance, None)
12839 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
12842 helper.CreateSnapshots()
12844 if (self.op.shutdown and
12845 instance.admin_state == constants.ADMINST_UP and
12846 not self.op.remove_instance):
12847 assert not activate_disks
12848 feedback_fn("Starting instance %s" % instance.name)
12849 result = self.rpc.call_instance_start(src_node,
12850 (instance, None, None), False)
12851 msg = result.fail_msg
12853 feedback_fn("Failed to start instance: %s" % msg)
12854 _ShutdownInstanceDisks(self, instance)
12855 raise errors.OpExecError("Could not start instance: %s" % msg)
12857 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12858 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
12859 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12860 connect_timeout = constants.RIE_CONNECT_TIMEOUT
12861 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
12863 (key_name, _, _) = self.x509_key_name
12866 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
12869 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
12870 key_name, dest_ca_pem,
12875 # Check for backwards compatibility
12876 assert len(dresults) == len(instance.disks)
12877 assert compat.all(isinstance(i, bool) for i in dresults), \
12878 "Not all results are boolean: %r" % dresults
12882 feedback_fn("Deactivating disks for %s" % instance.name)
12883 _ShutdownInstanceDisks(self, instance)
12885 if not (compat.all(dresults) and fin_resu):
12888 failures.append("export finalization")
12889 if not compat.all(dresults):
12890 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12892 failures.append("disk export: disk(s) %s" % fdsk)
12894 raise errors.OpExecError("Export failed, errors in %s" %
12895 utils.CommaJoin(failures))
12897 # At this point, the export was successful, we can cleanup/finish
12899 # Remove instance if requested
12900 if self.op.remove_instance:
12901 feedback_fn("Removing instance %s" % instance.name)
12902 _RemoveInstance(self, feedback_fn, instance,
12903 self.op.ignore_remove_failures)
12905 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12906 self._CleanupExports(feedback_fn)
12908 return fin_resu, dresults
12911 class LUBackupRemove(NoHooksLU):
12912 """Remove exports related to the named instance.
12917 def ExpandNames(self):
12918 self.needed_locks = {}
12919 # We need all nodes to be locked in order for RemoveExport to work, but we
12920 # don't need to lock the instance itself, as nothing will happen to it (and
12921 # we can remove exports also for a removed instance)
12922 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12924 def Exec(self, feedback_fn):
12925 """Remove any export.
12928 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12929 # If the instance was not found we'll try with the name that was passed in.
12930 # This will only work if it was an FQDN, though.
12932 if not instance_name:
12934 instance_name = self.op.instance_name
12936 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12937 exportlist = self.rpc.call_export_list(locked_nodes)
12939 for node in exportlist:
12940 msg = exportlist[node].fail_msg
12942 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12944 if instance_name in exportlist[node].payload:
12946 result = self.rpc.call_export_remove(node, instance_name)
12947 msg = result.fail_msg
12949 logging.error("Could not remove export for instance %s"
12950 " on node %s: %s", instance_name, node, msg)
12952 if fqdn_warn and not found:
12953 feedback_fn("Export not found. If trying to remove an export belonging"
12954 " to a deleted instance please use its Fully Qualified"
12958 class LUGroupAdd(LogicalUnit):
12959 """Logical unit for creating node groups.
12962 HPATH = "group-add"
12963 HTYPE = constants.HTYPE_GROUP
12966 def ExpandNames(self):
12967 # We need the new group's UUID here so that we can create and acquire the
12968 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12969 # that it should not check whether the UUID exists in the configuration.
12970 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12971 self.needed_locks = {}
12972 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12974 def CheckPrereq(self):
12975 """Check prerequisites.
12977 This checks that the given group name is not an existing node group
12982 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12983 except errors.OpPrereqError:
12986 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12987 " node group (UUID: %s)" %
12988 (self.op.group_name, existing_uuid),
12989 errors.ECODE_EXISTS)
12991 if self.op.ndparams:
12992 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12994 if self.op.hv_state:
12995 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
12997 self.new_hv_state = None
12999 if self.op.disk_state:
13000 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13002 self.new_disk_state = None
13004 if self.op.diskparams:
13005 for templ in constants.DISK_TEMPLATES:
13006 if templ not in self.op.diskparams:
13007 self.op.diskparams[templ] = {}
13008 utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
13010 self.op.diskparams = self.cfg.GetClusterInfo().diskparams
13012 if self.op.ipolicy:
13013 cluster = self.cfg.GetClusterInfo()
13014 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13016 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
13017 except errors.ConfigurationError, err:
13018 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13019 errors.ECODE_INVAL)
13021 def BuildHooksEnv(self):
13022 """Build hooks env.
13026 "GROUP_NAME": self.op.group_name,
13029 def BuildHooksNodes(self):
13030 """Build hooks nodes.
13033 mn = self.cfg.GetMasterNode()
13034 return ([mn], [mn])
13036 def Exec(self, feedback_fn):
13037 """Add the node group to the cluster.
13040 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13041 uuid=self.group_uuid,
13042 alloc_policy=self.op.alloc_policy,
13043 ndparams=self.op.ndparams,
13044 diskparams=self.op.diskparams,
13045 ipolicy=self.op.ipolicy,
13046 hv_state_static=self.new_hv_state,
13047 disk_state_static=self.new_disk_state)
13049 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13050 del self.remove_locks[locking.LEVEL_NODEGROUP]
13053 class LUGroupAssignNodes(NoHooksLU):
13054 """Logical unit for assigning nodes to groups.
13059 def ExpandNames(self):
13060 # These raise errors.OpPrereqError on their own:
13061 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13062 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13064 # We want to lock all the affected nodes and groups. We have readily
13065 # available the list of nodes, and the *destination* group. To gather the
13066 # list of "source" groups, we need to fetch node information later on.
13067 self.needed_locks = {
13068 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13069 locking.LEVEL_NODE: self.op.nodes,
13072 def DeclareLocks(self, level):
13073 if level == locking.LEVEL_NODEGROUP:
13074 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13076 # Try to get all affected nodes' groups without having the group or node
13077 # lock yet. Needs verification later in the code flow.
13078 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13080 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13082 def CheckPrereq(self):
13083 """Check prerequisites.
13086 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13087 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13088 frozenset(self.op.nodes))
13090 expected_locks = (set([self.group_uuid]) |
13091 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13092 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13093 if actual_locks != expected_locks:
13094 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13095 " current groups are '%s', used to be '%s'" %
13096 (utils.CommaJoin(expected_locks),
13097 utils.CommaJoin(actual_locks)))
13099 self.node_data = self.cfg.GetAllNodesInfo()
13100 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13101 instance_data = self.cfg.GetAllInstancesInfo()
13103 if self.group is None:
13104 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13105 (self.op.group_name, self.group_uuid))
13107 (new_splits, previous_splits) = \
13108 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13109 for node in self.op.nodes],
13110 self.node_data, instance_data)
13113 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13115 if not self.op.force:
13116 raise errors.OpExecError("The following instances get split by this"
13117 " change and --force was not given: %s" %
13120 self.LogWarning("This operation will split the following instances: %s",
13123 if previous_splits:
13124 self.LogWarning("In addition, these already-split instances continue"
13125 " to be split across groups: %s",
13126 utils.CommaJoin(utils.NiceSort(previous_splits)))
13128 def Exec(self, feedback_fn):
13129 """Assign nodes to a new group.
13132 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13134 self.cfg.AssignGroupNodes(mods)
13137 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13138 """Check for split instances after a node assignment.
13140 This method considers a series of node assignments as an atomic operation,
13141 and returns information about split instances after applying the set of
13144 In particular, it returns information about newly split instances, and
13145 instances that were already split, and remain so after the change.
13147 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13150 @type changes: list of (node_name, new_group_uuid) pairs.
13151 @param changes: list of node assignments to consider.
13152 @param node_data: a dict with data for all nodes
13153 @param instance_data: a dict with all instances to consider
13154 @rtype: a two-tuple
13155 @return: a list of instances that were previously okay and result split as a
13156 consequence of this change, and a list of instances that were previously
13157 split and this change does not fix.
13160 changed_nodes = dict((node, group) for node, group in changes
13161 if node_data[node].group != group)
13163 all_split_instances = set()
13164 previously_split_instances = set()
13166 def InstanceNodes(instance):
13167 return [instance.primary_node] + list(instance.secondary_nodes)
13169 for inst in instance_data.values():
13170 if inst.disk_template not in constants.DTS_INT_MIRROR:
13173 instance_nodes = InstanceNodes(inst)
13175 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13176 previously_split_instances.add(inst.name)
13178 if len(set(changed_nodes.get(node, node_data[node].group)
13179 for node in instance_nodes)) > 1:
13180 all_split_instances.add(inst.name)
13182 return (list(all_split_instances - previously_split_instances),
13183 list(previously_split_instances & all_split_instances))
13186 class _GroupQuery(_QueryBase):
13187 FIELDS = query.GROUP_FIELDS
13189 def ExpandNames(self, lu):
13190 lu.needed_locks = {}
13192 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13193 self._cluster = lu.cfg.GetClusterInfo()
13194 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13197 self.wanted = [name_to_uuid[name]
13198 for name in utils.NiceSort(name_to_uuid.keys())]
13200 # Accept names to be either names or UUIDs.
13203 all_uuid = frozenset(self._all_groups.keys())
13205 for name in self.names:
13206 if name in all_uuid:
13207 self.wanted.append(name)
13208 elif name in name_to_uuid:
13209 self.wanted.append(name_to_uuid[name])
13211 missing.append(name)
13214 raise errors.OpPrereqError("Some groups do not exist: %s" %
13215 utils.CommaJoin(missing),
13216 errors.ECODE_NOENT)
13218 def DeclareLocks(self, lu, level):
13221 def _GetQueryData(self, lu):
13222 """Computes the list of node groups and their attributes.
13225 do_nodes = query.GQ_NODE in self.requested_data
13226 do_instances = query.GQ_INST in self.requested_data
13228 group_to_nodes = None
13229 group_to_instances = None
13231 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13232 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13233 # latter GetAllInstancesInfo() is not enough, for we have to go through
13234 # instance->node. Hence, we will need to process nodes even if we only need
13235 # instance information.
13236 if do_nodes or do_instances:
13237 all_nodes = lu.cfg.GetAllNodesInfo()
13238 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13241 for node in all_nodes.values():
13242 if node.group in group_to_nodes:
13243 group_to_nodes[node.group].append(node.name)
13244 node_to_group[node.name] = node.group
13247 all_instances = lu.cfg.GetAllInstancesInfo()
13248 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13250 for instance in all_instances.values():
13251 node = instance.primary_node
13252 if node in node_to_group:
13253 group_to_instances[node_to_group[node]].append(instance.name)
13256 # Do not pass on node information if it was not requested.
13257 group_to_nodes = None
13259 return query.GroupQueryData(self._cluster,
13260 [self._all_groups[uuid]
13261 for uuid in self.wanted],
13262 group_to_nodes, group_to_instances)
13265 class LUGroupQuery(NoHooksLU):
13266 """Logical unit for querying node groups.
13271 def CheckArguments(self):
13272 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13273 self.op.output_fields, False)
13275 def ExpandNames(self):
13276 self.gq.ExpandNames(self)
13278 def DeclareLocks(self, level):
13279 self.gq.DeclareLocks(self, level)
13281 def Exec(self, feedback_fn):
13282 return self.gq.OldStyleQuery(self)
13285 class LUGroupSetParams(LogicalUnit):
13286 """Modifies the parameters of a node group.
13289 HPATH = "group-modify"
13290 HTYPE = constants.HTYPE_GROUP
13293 def CheckArguments(self):
13296 self.op.diskparams,
13297 self.op.alloc_policy,
13299 self.op.disk_state,
13303 if all_changes.count(None) == len(all_changes):
13304 raise errors.OpPrereqError("Please pass at least one modification",
13305 errors.ECODE_INVAL)
13307 def ExpandNames(self):
13308 # This raises errors.OpPrereqError on its own:
13309 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13311 self.needed_locks = {
13312 locking.LEVEL_NODEGROUP: [self.group_uuid],
13315 def CheckPrereq(self):
13316 """Check prerequisites.
13319 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13321 if self.group is None:
13322 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13323 (self.op.group_name, self.group_uuid))
13325 if self.op.ndparams:
13326 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13327 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13328 self.new_ndparams = new_ndparams
13330 if self.op.diskparams:
13331 self.new_diskparams = dict()
13332 for templ in constants.DISK_TEMPLATES:
13333 if templ not in self.op.diskparams:
13334 self.op.diskparams[templ] = {}
13335 new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13336 self.op.diskparams[templ])
13337 utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13338 self.new_diskparams[templ] = new_templ_params
13340 if self.op.hv_state:
13341 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13342 self.group.hv_state_static)
13344 if self.op.disk_state:
13345 self.new_disk_state = \
13346 _MergeAndVerifyDiskState(self.op.disk_state,
13347 self.group.disk_state_static)
13349 if self.op.ipolicy:
13350 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
13354 def BuildHooksEnv(self):
13355 """Build hooks env.
13359 "GROUP_NAME": self.op.group_name,
13360 "NEW_ALLOC_POLICY": self.op.alloc_policy,
13363 def BuildHooksNodes(self):
13364 """Build hooks nodes.
13367 mn = self.cfg.GetMasterNode()
13368 return ([mn], [mn])
13370 def Exec(self, feedback_fn):
13371 """Modifies the node group.
13376 if self.op.ndparams:
13377 self.group.ndparams = self.new_ndparams
13378 result.append(("ndparams", str(self.group.ndparams)))
13380 if self.op.diskparams:
13381 self.group.diskparams = self.new_diskparams
13382 result.append(("diskparams", str(self.group.diskparams)))
13384 if self.op.alloc_policy:
13385 self.group.alloc_policy = self.op.alloc_policy
13387 if self.op.hv_state:
13388 self.group.hv_state_static = self.new_hv_state
13390 if self.op.disk_state:
13391 self.group.disk_state_static = self.new_disk_state
13393 if self.op.ipolicy:
13394 self.group.ipolicy = self.new_ipolicy
13396 self.cfg.Update(self.group, feedback_fn)
13400 class LUGroupRemove(LogicalUnit):
13401 HPATH = "group-remove"
13402 HTYPE = constants.HTYPE_GROUP
13405 def ExpandNames(self):
13406 # This will raises errors.OpPrereqError on its own:
13407 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13408 self.needed_locks = {
13409 locking.LEVEL_NODEGROUP: [self.group_uuid],
13412 def CheckPrereq(self):
13413 """Check prerequisites.
13415 This checks that the given group name exists as a node group, that is
13416 empty (i.e., contains no nodes), and that is not the last group of the
13420 # Verify that the group is empty.
13421 group_nodes = [node.name
13422 for node in self.cfg.GetAllNodesInfo().values()
13423 if node.group == self.group_uuid]
13426 raise errors.OpPrereqError("Group '%s' not empty, has the following"
13428 (self.op.group_name,
13429 utils.CommaJoin(utils.NiceSort(group_nodes))),
13430 errors.ECODE_STATE)
13432 # Verify the cluster would not be left group-less.
13433 if len(self.cfg.GetNodeGroupList()) == 1:
13434 raise errors.OpPrereqError("Group '%s' is the only group,"
13435 " cannot be removed" %
13436 self.op.group_name,
13437 errors.ECODE_STATE)
13439 def BuildHooksEnv(self):
13440 """Build hooks env.
13444 "GROUP_NAME": self.op.group_name,
13447 def BuildHooksNodes(self):
13448 """Build hooks nodes.
13451 mn = self.cfg.GetMasterNode()
13452 return ([mn], [mn])
13454 def Exec(self, feedback_fn):
13455 """Remove the node group.
13459 self.cfg.RemoveNodeGroup(self.group_uuid)
13460 except errors.ConfigurationError:
13461 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13462 (self.op.group_name, self.group_uuid))
13464 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13467 class LUGroupRename(LogicalUnit):
13468 HPATH = "group-rename"
13469 HTYPE = constants.HTYPE_GROUP
13472 def ExpandNames(self):
13473 # This raises errors.OpPrereqError on its own:
13474 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13476 self.needed_locks = {
13477 locking.LEVEL_NODEGROUP: [self.group_uuid],
13480 def CheckPrereq(self):
13481 """Check prerequisites.
13483 Ensures requested new name is not yet used.
13487 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13488 except errors.OpPrereqError:
13491 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13492 " node group (UUID: %s)" %
13493 (self.op.new_name, new_name_uuid),
13494 errors.ECODE_EXISTS)
13496 def BuildHooksEnv(self):
13497 """Build hooks env.
13501 "OLD_NAME": self.op.group_name,
13502 "NEW_NAME": self.op.new_name,
13505 def BuildHooksNodes(self):
13506 """Build hooks nodes.
13509 mn = self.cfg.GetMasterNode()
13511 all_nodes = self.cfg.GetAllNodesInfo()
13512 all_nodes.pop(mn, None)
13515 run_nodes.extend(node.name for node in all_nodes.values()
13516 if node.group == self.group_uuid)
13518 return (run_nodes, run_nodes)
13520 def Exec(self, feedback_fn):
13521 """Rename the node group.
13524 group = self.cfg.GetNodeGroup(self.group_uuid)
13527 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13528 (self.op.group_name, self.group_uuid))
13530 group.name = self.op.new_name
13531 self.cfg.Update(group, feedback_fn)
13533 return self.op.new_name
13536 class LUGroupEvacuate(LogicalUnit):
13537 HPATH = "group-evacuate"
13538 HTYPE = constants.HTYPE_GROUP
13541 def ExpandNames(self):
13542 # This raises errors.OpPrereqError on its own:
13543 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13545 if self.op.target_groups:
13546 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13547 self.op.target_groups)
13549 self.req_target_uuids = []
13551 if self.group_uuid in self.req_target_uuids:
13552 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
13553 " as a target group (targets are %s)" %
13555 utils.CommaJoin(self.req_target_uuids)),
13556 errors.ECODE_INVAL)
13558 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13560 self.share_locks = _ShareAll()
13561 self.needed_locks = {
13562 locking.LEVEL_INSTANCE: [],
13563 locking.LEVEL_NODEGROUP: [],
13564 locking.LEVEL_NODE: [],
13567 def DeclareLocks(self, level):
13568 if level == locking.LEVEL_INSTANCE:
13569 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13571 # Lock instances optimistically, needs verification once node and group
13572 # locks have been acquired
13573 self.needed_locks[locking.LEVEL_INSTANCE] = \
13574 self.cfg.GetNodeGroupInstances(self.group_uuid)
13576 elif level == locking.LEVEL_NODEGROUP:
13577 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13579 if self.req_target_uuids:
13580 lock_groups = set([self.group_uuid] + self.req_target_uuids)
13582 # Lock all groups used by instances optimistically; this requires going
13583 # via the node before it's locked, requiring verification later on
13584 lock_groups.update(group_uuid
13585 for instance_name in
13586 self.owned_locks(locking.LEVEL_INSTANCE)
13588 self.cfg.GetInstanceNodeGroups(instance_name))
13590 # No target groups, need to lock all of them
13591 lock_groups = locking.ALL_SET
13593 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13595 elif level == locking.LEVEL_NODE:
13596 # This will only lock the nodes in the group to be evacuated which
13597 # contain actual instances
13598 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13599 self._LockInstancesNodes()
13601 # Lock all nodes in group to be evacuated and target groups
13602 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13603 assert self.group_uuid in owned_groups
13604 member_nodes = [node_name
13605 for group in owned_groups
13606 for node_name in self.cfg.GetNodeGroup(group).members]
13607 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13609 def CheckPrereq(self):
13610 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13611 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13612 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13614 assert owned_groups.issuperset(self.req_target_uuids)
13615 assert self.group_uuid in owned_groups
13617 # Check if locked instances are still correct
13618 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13620 # Get instance information
13621 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
13623 # Check if node groups for locked instances are still correct
13624 for instance_name in owned_instances:
13625 inst = self.instances[instance_name]
13626 assert owned_nodes.issuperset(inst.all_nodes), \
13627 "Instance %s's nodes changed while we kept the lock" % instance_name
13629 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
13632 assert self.group_uuid in inst_groups, \
13633 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
13635 if self.req_target_uuids:
13636 # User requested specific target groups
13637 self.target_uuids = self.req_target_uuids
13639 # All groups except the one to be evacuated are potential targets
13640 self.target_uuids = [group_uuid for group_uuid in owned_groups
13641 if group_uuid != self.group_uuid]
13643 if not self.target_uuids:
13644 raise errors.OpPrereqError("There are no possible target groups",
13645 errors.ECODE_INVAL)
13647 def BuildHooksEnv(self):
13648 """Build hooks env.
13652 "GROUP_NAME": self.op.group_name,
13653 "TARGET_GROUPS": " ".join(self.target_uuids),
13656 def BuildHooksNodes(self):
13657 """Build hooks nodes.
13660 mn = self.cfg.GetMasterNode()
13662 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
13664 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
13666 return (run_nodes, run_nodes)
13668 def Exec(self, feedback_fn):
13669 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13671 assert self.group_uuid not in self.target_uuids
13673 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13674 instances=instances, target_groups=self.target_uuids)
13676 ial.Run(self.op.iallocator)
13678 if not ial.success:
13679 raise errors.OpPrereqError("Can't compute group evacuation using"
13680 " iallocator '%s': %s" %
13681 (self.op.iallocator, ial.info),
13682 errors.ECODE_NORES)
13684 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13686 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
13687 len(jobs), self.op.group_name)
13689 return ResultWithJobs(jobs)
13692 class TagsLU(NoHooksLU): # pylint: disable=W0223
13693 """Generic tags LU.
13695 This is an abstract class which is the parent of all the other tags LUs.
13698 def ExpandNames(self):
13699 self.group_uuid = None
13700 self.needed_locks = {}
13701 if self.op.kind == constants.TAG_NODE:
13702 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
13703 self.needed_locks[locking.LEVEL_NODE] = self.op.name
13704 elif self.op.kind == constants.TAG_INSTANCE:
13705 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
13706 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
13707 elif self.op.kind == constants.TAG_NODEGROUP:
13708 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
13710 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
13711 # not possible to acquire the BGL based on opcode parameters)
13713 def CheckPrereq(self):
13714 """Check prerequisites.
13717 if self.op.kind == constants.TAG_CLUSTER:
13718 self.target = self.cfg.GetClusterInfo()
13719 elif self.op.kind == constants.TAG_NODE:
13720 self.target = self.cfg.GetNodeInfo(self.op.name)
13721 elif self.op.kind == constants.TAG_INSTANCE:
13722 self.target = self.cfg.GetInstanceInfo(self.op.name)
13723 elif self.op.kind == constants.TAG_NODEGROUP:
13724 self.target = self.cfg.GetNodeGroup(self.group_uuid)
13726 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
13727 str(self.op.kind), errors.ECODE_INVAL)
13730 class LUTagsGet(TagsLU):
13731 """Returns the tags of a given object.
13736 def ExpandNames(self):
13737 TagsLU.ExpandNames(self)
13739 # Share locks as this is only a read operation
13740 self.share_locks = _ShareAll()
13742 def Exec(self, feedback_fn):
13743 """Returns the tag list.
13746 return list(self.target.GetTags())
13749 class LUTagsSearch(NoHooksLU):
13750 """Searches the tags for a given pattern.
13755 def ExpandNames(self):
13756 self.needed_locks = {}
13758 def CheckPrereq(self):
13759 """Check prerequisites.
13761 This checks the pattern passed for validity by compiling it.
13765 self.re = re.compile(self.op.pattern)
13766 except re.error, err:
13767 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
13768 (self.op.pattern, err), errors.ECODE_INVAL)
13770 def Exec(self, feedback_fn):
13771 """Returns the tag list.
13775 tgts = [("/cluster", cfg.GetClusterInfo())]
13776 ilist = cfg.GetAllInstancesInfo().values()
13777 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
13778 nlist = cfg.GetAllNodesInfo().values()
13779 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
13780 tgts.extend(("/nodegroup/%s" % n.name, n)
13781 for n in cfg.GetAllNodeGroupsInfo().values())
13783 for path, target in tgts:
13784 for tag in target.GetTags():
13785 if self.re.search(tag):
13786 results.append((path, tag))
13790 class LUTagsSet(TagsLU):
13791 """Sets a tag on a given object.
13796 def CheckPrereq(self):
13797 """Check prerequisites.
13799 This checks the type and length of the tag name and value.
13802 TagsLU.CheckPrereq(self)
13803 for tag in self.op.tags:
13804 objects.TaggableObject.ValidateTag(tag)
13806 def Exec(self, feedback_fn):
13811 for tag in self.op.tags:
13812 self.target.AddTag(tag)
13813 except errors.TagError, err:
13814 raise errors.OpExecError("Error while setting tag: %s" % str(err))
13815 self.cfg.Update(self.target, feedback_fn)
13818 class LUTagsDel(TagsLU):
13819 """Delete a list of tags from a given object.
13824 def CheckPrereq(self):
13825 """Check prerequisites.
13827 This checks that we have the given tag.
13830 TagsLU.CheckPrereq(self)
13831 for tag in self.op.tags:
13832 objects.TaggableObject.ValidateTag(tag)
13833 del_tags = frozenset(self.op.tags)
13834 cur_tags = self.target.GetTags()
13836 diff_tags = del_tags - cur_tags
13838 diff_names = ("'%s'" % i for i in sorted(diff_tags))
13839 raise errors.OpPrereqError("Tag(s) %s not found" %
13840 (utils.CommaJoin(diff_names), ),
13841 errors.ECODE_NOENT)
13843 def Exec(self, feedback_fn):
13844 """Remove the tag from the object.
13847 for tag in self.op.tags:
13848 self.target.RemoveTag(tag)
13849 self.cfg.Update(self.target, feedback_fn)
13852 class LUTestDelay(NoHooksLU):
13853 """Sleep for a specified amount of time.
13855 This LU sleeps on the master and/or nodes for a specified amount of
13861 def ExpandNames(self):
13862 """Expand names and set required locks.
13864 This expands the node list, if any.
13867 self.needed_locks = {}
13868 if self.op.on_nodes:
13869 # _GetWantedNodes can be used here, but is not always appropriate to use
13870 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
13871 # more information.
13872 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
13873 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
13875 def _TestDelay(self):
13876 """Do the actual sleep.
13879 if self.op.on_master:
13880 if not utils.TestDelay(self.op.duration):
13881 raise errors.OpExecError("Error during master delay test")
13882 if self.op.on_nodes:
13883 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
13884 for node, node_result in result.items():
13885 node_result.Raise("Failure during rpc call to node %s" % node)
13887 def Exec(self, feedback_fn):
13888 """Execute the test delay opcode, with the wanted repetitions.
13891 if self.op.repeat == 0:
13894 top_value = self.op.repeat - 1
13895 for i in range(self.op.repeat):
13896 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
13900 class LUTestJqueue(NoHooksLU):
13901 """Utility LU to test some aspects of the job queue.
13906 # Must be lower than default timeout for WaitForJobChange to see whether it
13907 # notices changed jobs
13908 _CLIENT_CONNECT_TIMEOUT = 20.0
13909 _CLIENT_CONFIRM_TIMEOUT = 60.0
13912 def _NotifyUsingSocket(cls, cb, errcls):
13913 """Opens a Unix socket and waits for another program to connect.
13916 @param cb: Callback to send socket name to client
13917 @type errcls: class
13918 @param errcls: Exception class to use for errors
13921 # Using a temporary directory as there's no easy way to create temporary
13922 # sockets without writing a custom loop around tempfile.mktemp and
13924 tmpdir = tempfile.mkdtemp()
13926 tmpsock = utils.PathJoin(tmpdir, "sock")
13928 logging.debug("Creating temporary socket at %s", tmpsock)
13929 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
13934 # Send details to client
13937 # Wait for client to connect before continuing
13938 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
13940 (conn, _) = sock.accept()
13941 except socket.error, err:
13942 raise errcls("Client didn't connect in time (%s)" % err)
13946 # Remove as soon as client is connected
13947 shutil.rmtree(tmpdir)
13949 # Wait for client to close
13952 # pylint: disable=E1101
13953 # Instance of '_socketobject' has no ... member
13954 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
13956 except socket.error, err:
13957 raise errcls("Client failed to confirm notification (%s)" % err)
13961 def _SendNotification(self, test, arg, sockname):
13962 """Sends a notification to the client.
13965 @param test: Test name
13966 @param arg: Test argument (depends on test)
13967 @type sockname: string
13968 @param sockname: Socket path
13971 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
13973 def _Notify(self, prereq, test, arg):
13974 """Notifies the client of a test.
13977 @param prereq: Whether this is a prereq-phase test
13979 @param test: Test name
13980 @param arg: Test argument (depends on test)
13984 errcls = errors.OpPrereqError
13986 errcls = errors.OpExecError
13988 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
13992 def CheckArguments(self):
13993 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13994 self.expandnames_calls = 0
13996 def ExpandNames(self):
13997 checkargs_calls = getattr(self, "checkargs_calls", 0)
13998 if checkargs_calls < 1:
13999 raise errors.ProgrammerError("CheckArguments was not called")
14001 self.expandnames_calls += 1
14003 if self.op.notify_waitlock:
14004 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14006 self.LogInfo("Expanding names")
14008 # Get lock on master node (just to get a lock, not for a particular reason)
14009 self.needed_locks = {
14010 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14013 def Exec(self, feedback_fn):
14014 if self.expandnames_calls < 1:
14015 raise errors.ProgrammerError("ExpandNames was not called")
14017 if self.op.notify_exec:
14018 self._Notify(False, constants.JQT_EXEC, None)
14020 self.LogInfo("Executing")
14022 if self.op.log_messages:
14023 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14024 for idx, msg in enumerate(self.op.log_messages):
14025 self.LogInfo("Sending log message %s", idx + 1)
14026 feedback_fn(constants.JQT_MSGPREFIX + msg)
14027 # Report how many test messages have been sent
14028 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14031 raise errors.OpExecError("Opcode failure was requested")
14036 class IAllocator(object):
14037 """IAllocator framework.
14039 An IAllocator instance has three sets of attributes:
14040 - cfg that is needed to query the cluster
14041 - input data (all members of the _KEYS class attribute are required)
14042 - four buffer attributes (in|out_data|text), that represent the
14043 input (to the external script) in text and data structure format,
14044 and the output from it, again in two formats
14045 - the result variables from the script (success, info, nodes) for
14049 # pylint: disable=R0902
14050 # lots of instance attributes
14052 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14054 self.rpc = rpc_runner
14055 # init buffer variables
14056 self.in_text = self.out_text = self.in_data = self.out_data = None
14057 # init all input fields so that pylint is happy
14059 self.memory = self.disks = self.disk_template = None
14060 self.os = self.tags = self.nics = self.vcpus = None
14061 self.hypervisor = None
14062 self.relocate_from = None
14064 self.instances = None
14065 self.evac_mode = None
14066 self.target_groups = []
14068 self.required_nodes = None
14069 # init result fields
14070 self.success = self.info = self.result = None
14073 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14075 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14076 " IAllocator" % self.mode)
14078 keyset = [n for (n, _) in keydata]
14081 if key not in keyset:
14082 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14083 " IAllocator" % key)
14084 setattr(self, key, kwargs[key])
14087 if key not in kwargs:
14088 raise errors.ProgrammerError("Missing input parameter '%s' to"
14089 " IAllocator" % key)
14090 self._BuildInputData(compat.partial(fn, self), keydata)
14092 def _ComputeClusterData(self):
14093 """Compute the generic allocator input data.
14095 This is the data that is independent of the actual operation.
14099 cluster_info = cfg.GetClusterInfo()
14102 "version": constants.IALLOCATOR_VERSION,
14103 "cluster_name": cfg.GetClusterName(),
14104 "cluster_tags": list(cluster_info.GetTags()),
14105 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14106 # we don't have job IDs
14108 ninfo = cfg.GetAllNodesInfo()
14109 iinfo = cfg.GetAllInstancesInfo().values()
14110 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14113 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14115 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14116 hypervisor_name = self.hypervisor
14117 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14118 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14120 hypervisor_name = cluster_info.primary_hypervisor
14122 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14125 self.rpc.call_all_instances_info(node_list,
14126 cluster_info.enabled_hypervisors)
14128 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14130 config_ndata = self._ComputeBasicNodeData(ninfo)
14131 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14132 i_list, config_ndata)
14133 assert len(data["nodes"]) == len(ninfo), \
14134 "Incomplete node data computed"
14136 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14138 self.in_data = data
14141 def _ComputeNodeGroupData(cfg):
14142 """Compute node groups data.
14145 ng = dict((guuid, {
14146 "name": gdata.name,
14147 "alloc_policy": gdata.alloc_policy,
14149 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14154 def _ComputeBasicNodeData(node_cfg):
14155 """Compute global node data.
14158 @returns: a dict of name: (node dict, node config)
14161 # fill in static (config-based) values
14162 node_results = dict((ninfo.name, {
14163 "tags": list(ninfo.GetTags()),
14164 "primary_ip": ninfo.primary_ip,
14165 "secondary_ip": ninfo.secondary_ip,
14166 "offline": ninfo.offline,
14167 "drained": ninfo.drained,
14168 "master_candidate": ninfo.master_candidate,
14169 "group": ninfo.group,
14170 "master_capable": ninfo.master_capable,
14171 "vm_capable": ninfo.vm_capable,
14173 for ninfo in node_cfg.values())
14175 return node_results
14178 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14180 """Compute global node data.
14182 @param node_results: the basic node structures as filled from the config
14185 #TODO(dynmem): compute the right data on MAX and MIN memory
14186 # make a copy of the current dict
14187 node_results = dict(node_results)
14188 for nname, nresult in node_data.items():
14189 assert nname in node_results, "Missing basic data for node %s" % nname
14190 ninfo = node_cfg[nname]
14192 if not (ninfo.offline or ninfo.drained):
14193 nresult.Raise("Can't get data for node %s" % nname)
14194 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14196 remote_info = _MakeLegacyNodeInfo(nresult.payload)
14198 for attr in ["memory_total", "memory_free", "memory_dom0",
14199 "vg_size", "vg_free", "cpu_total"]:
14200 if attr not in remote_info:
14201 raise errors.OpExecError("Node '%s' didn't return attribute"
14202 " '%s'" % (nname, attr))
14203 if not isinstance(remote_info[attr], int):
14204 raise errors.OpExecError("Node '%s' returned invalid value"
14206 (nname, attr, remote_info[attr]))
14207 # compute memory used by primary instances
14208 i_p_mem = i_p_up_mem = 0
14209 for iinfo, beinfo in i_list:
14210 if iinfo.primary_node == nname:
14211 i_p_mem += beinfo[constants.BE_MAXMEM]
14212 if iinfo.name not in node_iinfo[nname].payload:
14215 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14216 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14217 remote_info["memory_free"] -= max(0, i_mem_diff)
14219 if iinfo.admin_state == constants.ADMINST_UP:
14220 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14222 # compute memory used by instances
14224 "total_memory": remote_info["memory_total"],
14225 "reserved_memory": remote_info["memory_dom0"],
14226 "free_memory": remote_info["memory_free"],
14227 "total_disk": remote_info["vg_size"],
14228 "free_disk": remote_info["vg_free"],
14229 "total_cpus": remote_info["cpu_total"],
14230 "i_pri_memory": i_p_mem,
14231 "i_pri_up_memory": i_p_up_mem,
14233 pnr_dyn.update(node_results[nname])
14234 node_results[nname] = pnr_dyn
14236 return node_results
14239 def _ComputeInstanceData(cluster_info, i_list):
14240 """Compute global instance data.
14244 for iinfo, beinfo in i_list:
14246 for nic in iinfo.nics:
14247 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14251 "mode": filled_params[constants.NIC_MODE],
14252 "link": filled_params[constants.NIC_LINK],
14254 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14255 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14256 nic_data.append(nic_dict)
14258 "tags": list(iinfo.GetTags()),
14259 "admin_state": iinfo.admin_state,
14260 "vcpus": beinfo[constants.BE_VCPUS],
14261 "memory": beinfo[constants.BE_MAXMEM],
14263 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14265 "disks": [{constants.IDISK_SIZE: dsk.size,
14266 constants.IDISK_MODE: dsk.mode}
14267 for dsk in iinfo.disks],
14268 "disk_template": iinfo.disk_template,
14269 "hypervisor": iinfo.hypervisor,
14271 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14273 instance_data[iinfo.name] = pir
14275 return instance_data
14277 def _AddNewInstance(self):
14278 """Add new instance data to allocator structure.
14280 This in combination with _AllocatorGetClusterData will create the
14281 correct structure needed as input for the allocator.
14283 The checks for the completeness of the opcode must have already been
14287 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14289 if self.disk_template in constants.DTS_INT_MIRROR:
14290 self.required_nodes = 2
14292 self.required_nodes = 1
14296 "disk_template": self.disk_template,
14299 "vcpus": self.vcpus,
14300 "memory": self.memory,
14301 "disks": self.disks,
14302 "disk_space_total": disk_space,
14304 "required_nodes": self.required_nodes,
14305 "hypervisor": self.hypervisor,
14310 def _AddRelocateInstance(self):
14311 """Add relocate instance data to allocator structure.
14313 This in combination with _IAllocatorGetClusterData will create the
14314 correct structure needed as input for the allocator.
14316 The checks for the completeness of the opcode must have already been
14320 instance = self.cfg.GetInstanceInfo(self.name)
14321 if instance is None:
14322 raise errors.ProgrammerError("Unknown instance '%s' passed to"
14323 " IAllocator" % self.name)
14325 if instance.disk_template not in constants.DTS_MIRRORED:
14326 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14327 errors.ECODE_INVAL)
14329 if instance.disk_template in constants.DTS_INT_MIRROR and \
14330 len(instance.secondary_nodes) != 1:
14331 raise errors.OpPrereqError("Instance has not exactly one secondary node",
14332 errors.ECODE_STATE)
14334 self.required_nodes = 1
14335 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14336 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14340 "disk_space_total": disk_space,
14341 "required_nodes": self.required_nodes,
14342 "relocate_from": self.relocate_from,
14346 def _AddNodeEvacuate(self):
14347 """Get data for node-evacuate requests.
14351 "instances": self.instances,
14352 "evac_mode": self.evac_mode,
14355 def _AddChangeGroup(self):
14356 """Get data for node-evacuate requests.
14360 "instances": self.instances,
14361 "target_groups": self.target_groups,
14364 def _BuildInputData(self, fn, keydata):
14365 """Build input data structures.
14368 self._ComputeClusterData()
14371 request["type"] = self.mode
14372 for keyname, keytype in keydata:
14373 if keyname not in request:
14374 raise errors.ProgrammerError("Request parameter %s is missing" %
14376 val = request[keyname]
14377 if not keytype(val):
14378 raise errors.ProgrammerError("Request parameter %s doesn't pass"
14379 " validation, value %s, expected"
14380 " type %s" % (keyname, val, keytype))
14381 self.in_data["request"] = request
14383 self.in_text = serializer.Dump(self.in_data)
14385 _STRING_LIST = ht.TListOf(ht.TString)
14386 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14387 # pylint: disable=E1101
14388 # Class '...' has no 'OP_ID' member
14389 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14390 opcodes.OpInstanceMigrate.OP_ID,
14391 opcodes.OpInstanceReplaceDisks.OP_ID])
14395 ht.TListOf(ht.TAnd(ht.TIsLength(3),
14396 ht.TItems([ht.TNonEmptyString,
14397 ht.TNonEmptyString,
14398 ht.TListOf(ht.TNonEmptyString),
14401 ht.TListOf(ht.TAnd(ht.TIsLength(2),
14402 ht.TItems([ht.TNonEmptyString,
14405 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14406 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14409 constants.IALLOCATOR_MODE_ALLOC:
14412 ("name", ht.TString),
14413 ("memory", ht.TInt),
14414 ("disks", ht.TListOf(ht.TDict)),
14415 ("disk_template", ht.TString),
14416 ("os", ht.TString),
14417 ("tags", _STRING_LIST),
14418 ("nics", ht.TListOf(ht.TDict)),
14419 ("vcpus", ht.TInt),
14420 ("hypervisor", ht.TString),
14422 constants.IALLOCATOR_MODE_RELOC:
14423 (_AddRelocateInstance,
14424 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14426 constants.IALLOCATOR_MODE_NODE_EVAC:
14427 (_AddNodeEvacuate, [
14428 ("instances", _STRING_LIST),
14429 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14431 constants.IALLOCATOR_MODE_CHG_GROUP:
14432 (_AddChangeGroup, [
14433 ("instances", _STRING_LIST),
14434 ("target_groups", _STRING_LIST),
14438 def Run(self, name, validate=True, call_fn=None):
14439 """Run an instance allocator and return the results.
14442 if call_fn is None:
14443 call_fn = self.rpc.call_iallocator_runner
14445 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14446 result.Raise("Failure while running the iallocator script")
14448 self.out_text = result.payload
14450 self._ValidateResult()
14452 def _ValidateResult(self):
14453 """Process the allocator results.
14455 This will process and if successful save the result in
14456 self.out_data and the other parameters.
14460 rdict = serializer.Load(self.out_text)
14461 except Exception, err:
14462 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14464 if not isinstance(rdict, dict):
14465 raise errors.OpExecError("Can't parse iallocator results: not a dict")
14467 # TODO: remove backwards compatiblity in later versions
14468 if "nodes" in rdict and "result" not in rdict:
14469 rdict["result"] = rdict["nodes"]
14472 for key in "success", "info", "result":
14473 if key not in rdict:
14474 raise errors.OpExecError("Can't parse iallocator results:"
14475 " missing key '%s'" % key)
14476 setattr(self, key, rdict[key])
14478 if not self._result_check(self.result):
14479 raise errors.OpExecError("Iallocator returned invalid result,"
14480 " expected %s, got %s" %
14481 (self._result_check, self.result),
14482 errors.ECODE_INVAL)
14484 if self.mode == constants.IALLOCATOR_MODE_RELOC:
14485 assert self.relocate_from is not None
14486 assert self.required_nodes == 1
14488 node2group = dict((name, ndata["group"])
14489 for (name, ndata) in self.in_data["nodes"].items())
14491 fn = compat.partial(self._NodesToGroups, node2group,
14492 self.in_data["nodegroups"])
14494 instance = self.cfg.GetInstanceInfo(self.name)
14495 request_groups = fn(self.relocate_from + [instance.primary_node])
14496 result_groups = fn(rdict["result"] + [instance.primary_node])
14498 if self.success and not set(result_groups).issubset(request_groups):
14499 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14500 " differ from original groups (%s)" %
14501 (utils.CommaJoin(result_groups),
14502 utils.CommaJoin(request_groups)))
14504 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14505 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14507 self.out_data = rdict
14510 def _NodesToGroups(node2group, groups, nodes):
14511 """Returns a list of unique group names for a list of nodes.
14513 @type node2group: dict
14514 @param node2group: Map from node name to group UUID
14516 @param groups: Group information
14518 @param nodes: Node names
14525 group_uuid = node2group[node]
14527 # Ignore unknown node
14531 group = groups[group_uuid]
14533 # Can't find group, let's use UUID
14534 group_name = group_uuid
14536 group_name = group["name"]
14538 result.add(group_name)
14540 return sorted(result)
14543 class LUTestAllocator(NoHooksLU):
14544 """Run allocator tests.
14546 This LU runs the allocator tests
14549 def CheckPrereq(self):
14550 """Check prerequisites.
14552 This checks the opcode parameters depending on the director and mode test.
14555 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14556 for attr in ["memory", "disks", "disk_template",
14557 "os", "tags", "nics", "vcpus"]:
14558 if not hasattr(self.op, attr):
14559 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14560 attr, errors.ECODE_INVAL)
14561 iname = self.cfg.ExpandInstanceName(self.op.name)
14562 if iname is not None:
14563 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14564 iname, errors.ECODE_EXISTS)
14565 if not isinstance(self.op.nics, list):
14566 raise errors.OpPrereqError("Invalid parameter 'nics'",
14567 errors.ECODE_INVAL)
14568 if not isinstance(self.op.disks, list):
14569 raise errors.OpPrereqError("Invalid parameter 'disks'",
14570 errors.ECODE_INVAL)
14571 for row in self.op.disks:
14572 if (not isinstance(row, dict) or
14573 constants.IDISK_SIZE not in row or
14574 not isinstance(row[constants.IDISK_SIZE], int) or
14575 constants.IDISK_MODE not in row or
14576 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
14577 raise errors.OpPrereqError("Invalid contents of the 'disks'"
14578 " parameter", errors.ECODE_INVAL)
14579 if self.op.hypervisor is None:
14580 self.op.hypervisor = self.cfg.GetHypervisorType()
14581 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14582 fname = _ExpandInstanceName(self.cfg, self.op.name)
14583 self.op.name = fname
14584 self.relocate_from = \
14585 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
14586 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
14587 constants.IALLOCATOR_MODE_NODE_EVAC):
14588 if not self.op.instances:
14589 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
14590 self.op.instances = _GetWantedInstances(self, self.op.instances)
14592 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
14593 self.op.mode, errors.ECODE_INVAL)
14595 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
14596 if self.op.allocator is None:
14597 raise errors.OpPrereqError("Missing allocator name",
14598 errors.ECODE_INVAL)
14599 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
14600 raise errors.OpPrereqError("Wrong allocator test '%s'" %
14601 self.op.direction, errors.ECODE_INVAL)
14603 def Exec(self, feedback_fn):
14604 """Run the allocator test.
14607 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14608 ial = IAllocator(self.cfg, self.rpc,
14611 memory=self.op.memory,
14612 disks=self.op.disks,
14613 disk_template=self.op.disk_template,
14617 vcpus=self.op.vcpus,
14618 hypervisor=self.op.hypervisor,
14620 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14621 ial = IAllocator(self.cfg, self.rpc,
14624 relocate_from=list(self.relocate_from),
14626 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14627 ial = IAllocator(self.cfg, self.rpc,
14629 instances=self.op.instances,
14630 target_groups=self.op.target_groups)
14631 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14632 ial = IAllocator(self.cfg, self.rpc,
14634 instances=self.op.instances,
14635 evac_mode=self.op.evac_mode)
14637 raise errors.ProgrammerError("Uncatched mode %s in"
14638 " LUTestAllocator.Exec", self.op.mode)
14640 if self.op.direction == constants.IALLOCATOR_DIR_IN:
14641 result = ial.in_text
14643 ial.Run(self.op.allocator, validate=False)
14644 result = ial.out_text
14648 #: Query type implementations
14650 constants.QR_INSTANCE: _InstanceQuery,
14651 constants.QR_NODE: _NodeQuery,
14652 constants.QR_GROUP: _GroupQuery,
14653 constants.QR_OS: _OsQuery,
14656 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14659 def _GetQueryImplementation(name):
14660 """Returns the implemtnation for a query type.
14662 @param name: Query type, must be one of L{constants.QR_VIA_OP}
14666 return _QUERY_IMPL[name]
14668 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14669 errors.ECODE_INVAL)