4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
63 import ganeti.masterd.instance # pylint: disable=W0611
67 """Data container for LU results with jobs.
69 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
70 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
71 contained in the C{jobs} attribute and include the job IDs in the opcode
75 def __init__(self, jobs, **kwargs):
76 """Initializes this class.
78 Additional return values can be specified as keyword arguments.
80 @type jobs: list of lists of L{opcode.OpCode}
81 @param jobs: A list of lists of opcode objects
88 class LogicalUnit(object):
89 """Logical Unit base class.
91 Subclasses must follow these rules:
92 - implement ExpandNames
93 - implement CheckPrereq (except when tasklets are used)
94 - implement Exec (except when tasklets are used)
95 - implement BuildHooksEnv
96 - implement BuildHooksNodes
97 - redefine HPATH and HTYPE
98 - optionally redefine their run requirements:
99 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
101 Note that all commands require root permissions.
103 @ivar dry_run_result: the value (if any) that will be returned to the caller
104 in dry-run mode (signalled by opcode dry_run parameter)
111 def __init__(self, processor, op, context, rpc):
112 """Constructor for LogicalUnit.
114 This needs to be overridden in derived classes in order to check op
118 self.proc = processor
120 self.cfg = context.cfg
121 self.glm = context.glm
123 self.owned_locks = context.glm.list_owned
124 self.context = context
126 # Dicts used to declare locking needs to mcpu
127 self.needed_locks = None
128 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
130 self.remove_locks = {}
131 # Used to force good behavior when calling helper functions
132 self.recalculate_locks = {}
134 self.Log = processor.Log # pylint: disable=C0103
135 self.LogWarning = processor.LogWarning # pylint: disable=C0103
136 self.LogInfo = processor.LogInfo # pylint: disable=C0103
137 self.LogStep = processor.LogStep # pylint: disable=C0103
138 # support for dry-run
139 self.dry_run_result = None
140 # support for generic debug attribute
141 if (not hasattr(self.op, "debug_level") or
142 not isinstance(self.op.debug_level, int)):
143 self.op.debug_level = 0
148 # Validate opcode parameters and set defaults
149 self.op.Validate(True)
151 self.CheckArguments()
153 def CheckArguments(self):
154 """Check syntactic validity for the opcode arguments.
156 This method is for doing a simple syntactic check and ensure
157 validity of opcode parameters, without any cluster-related
158 checks. While the same can be accomplished in ExpandNames and/or
159 CheckPrereq, doing these separate is better because:
161 - ExpandNames is left as as purely a lock-related function
162 - CheckPrereq is run after we have acquired locks (and possible
165 The function is allowed to change the self.op attribute so that
166 later methods can no longer worry about missing parameters.
171 def ExpandNames(self):
172 """Expand names for this LU.
174 This method is called before starting to execute the opcode, and it should
175 update all the parameters of the opcode to their canonical form (e.g. a
176 short node name must be fully expanded after this method has successfully
177 completed). This way locking, hooks, logging, etc. can work correctly.
179 LUs which implement this method must also populate the self.needed_locks
180 member, as a dict with lock levels as keys, and a list of needed lock names
183 - use an empty dict if you don't need any lock
184 - if you don't need any lock at a particular level omit that level
185 - don't put anything for the BGL level
186 - if you want all locks at a level use locking.ALL_SET as a value
188 If you need to share locks (rather than acquire them exclusively) at one
189 level you can modify self.share_locks, setting a true value (usually 1) for
190 that level. By default locks are not shared.
192 This function can also define a list of tasklets, which then will be
193 executed in order instead of the usual LU-level CheckPrereq and Exec
194 functions, if those are not defined by the LU.
198 # Acquire all nodes and one instance
199 self.needed_locks = {
200 locking.LEVEL_NODE: locking.ALL_SET,
201 locking.LEVEL_INSTANCE: ['instance1.example.com'],
203 # Acquire just two nodes
204 self.needed_locks = {
205 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
208 self.needed_locks = {} # No, you can't leave it to the default value None
211 # The implementation of this method is mandatory only if the new LU is
212 # concurrent, so that old LUs don't need to be changed all at the same
215 self.needed_locks = {} # Exclusive LUs don't need locks.
217 raise NotImplementedError
219 def DeclareLocks(self, level):
220 """Declare LU locking needs for a level
222 While most LUs can just declare their locking needs at ExpandNames time,
223 sometimes there's the need to calculate some locks after having acquired
224 the ones before. This function is called just before acquiring locks at a
225 particular level, but after acquiring the ones at lower levels, and permits
226 such calculations. It can be used to modify self.needed_locks, and by
227 default it does nothing.
229 This function is only called if you have something already set in
230 self.needed_locks for the level.
232 @param level: Locking level which is going to be locked
233 @type level: member of ganeti.locking.LEVELS
237 def CheckPrereq(self):
238 """Check prerequisites for this LU.
240 This method should check that the prerequisites for the execution
241 of this LU are fulfilled. It can do internode communication, but
242 it should be idempotent - no cluster or system changes are
245 The method should raise errors.OpPrereqError in case something is
246 not fulfilled. Its return value is ignored.
248 This method should also update all the parameters of the opcode to
249 their canonical form if it hasn't been done by ExpandNames before.
252 if self.tasklets is not None:
253 for (idx, tl) in enumerate(self.tasklets):
254 logging.debug("Checking prerequisites for tasklet %s/%s",
255 idx + 1, len(self.tasklets))
260 def Exec(self, feedback_fn):
263 This method should implement the actual work. It should raise
264 errors.OpExecError for failures that are somewhat dealt with in
268 if self.tasklets is not None:
269 for (idx, tl) in enumerate(self.tasklets):
270 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
273 raise NotImplementedError
275 def BuildHooksEnv(self):
276 """Build hooks environment for this LU.
279 @return: Dictionary containing the environment that will be used for
280 running the hooks for this LU. The keys of the dict must not be prefixed
281 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
282 will extend the environment with additional variables. If no environment
283 should be defined, an empty dictionary should be returned (not C{None}).
284 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
288 raise NotImplementedError
290 def BuildHooksNodes(self):
291 """Build list of nodes to run LU's hooks.
293 @rtype: tuple; (list, list)
294 @return: Tuple containing a list of node names on which the hook
295 should run before the execution and a list of node names on which the
296 hook should run after the execution. No nodes should be returned as an
297 empty list (and not None).
298 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
302 raise NotImplementedError
304 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
305 """Notify the LU about the results of its hooks.
307 This method is called every time a hooks phase is executed, and notifies
308 the Logical Unit about the hooks' result. The LU can then use it to alter
309 its result based on the hooks. By default the method does nothing and the
310 previous result is passed back unchanged but any LU can define it if it
311 wants to use the local cluster hook-scripts somehow.
313 @param phase: one of L{constants.HOOKS_PHASE_POST} or
314 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
315 @param hook_results: the results of the multi-node hooks rpc call
316 @param feedback_fn: function used send feedback back to the caller
317 @param lu_result: the previous Exec result this LU had, or None
319 @return: the new Exec result, based on the previous result
323 # API must be kept, thus we ignore the unused argument and could
324 # be a function warnings
325 # pylint: disable=W0613,R0201
328 def _ExpandAndLockInstance(self):
329 """Helper function to expand and lock an instance.
331 Many LUs that work on an instance take its name in self.op.instance_name
332 and need to expand it and then declare the expanded name for locking. This
333 function does it, and then updates self.op.instance_name to the expanded
334 name. It also initializes needed_locks as a dict, if this hasn't been done
338 if self.needed_locks is None:
339 self.needed_locks = {}
341 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
342 "_ExpandAndLockInstance called with instance-level locks set"
343 self.op.instance_name = _ExpandInstanceName(self.cfg,
344 self.op.instance_name)
345 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
347 def _LockInstancesNodes(self, primary_only=False):
348 """Helper function to declare instances' nodes for locking.
350 This function should be called after locking one or more instances to lock
351 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
352 with all primary or secondary nodes for instances already locked and
353 present in self.needed_locks[locking.LEVEL_INSTANCE].
355 It should be called from DeclareLocks, and for safety only works if
356 self.recalculate_locks[locking.LEVEL_NODE] is set.
358 In the future it may grow parameters to just lock some instance's nodes, or
359 to just lock primaries or secondary nodes, if needed.
361 If should be called in DeclareLocks in a way similar to::
363 if level == locking.LEVEL_NODE:
364 self._LockInstancesNodes()
366 @type primary_only: boolean
367 @param primary_only: only lock primary nodes of locked instances
370 assert locking.LEVEL_NODE in self.recalculate_locks, \
371 "_LockInstancesNodes helper function called with no nodes to recalculate"
373 # TODO: check if we're really been called with the instance locks held
375 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
376 # future we might want to have different behaviors depending on the value
377 # of self.recalculate_locks[locking.LEVEL_NODE]
379 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
380 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
381 wanted_nodes.append(instance.primary_node)
383 wanted_nodes.extend(instance.secondary_nodes)
385 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
386 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
387 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
388 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
390 del self.recalculate_locks[locking.LEVEL_NODE]
393 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
394 """Simple LU which runs no hooks.
396 This LU is intended as a parent for other LogicalUnits which will
397 run no hooks, in order to reduce duplicate code.
403 def BuildHooksEnv(self):
404 """Empty BuildHooksEnv for NoHooksLu.
406 This just raises an error.
409 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
411 def BuildHooksNodes(self):
412 """Empty BuildHooksNodes for NoHooksLU.
415 raise AssertionError("BuildHooksNodes called for NoHooksLU")
419 """Tasklet base class.
421 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
422 they can mix legacy code with tasklets. Locking needs to be done in the LU,
423 tasklets know nothing about locks.
425 Subclasses must follow these rules:
426 - Implement CheckPrereq
430 def __init__(self, lu):
437 def CheckPrereq(self):
438 """Check prerequisites for this tasklets.
440 This method should check whether the prerequisites for the execution of
441 this tasklet are fulfilled. It can do internode communication, but it
442 should be idempotent - no cluster or system changes are allowed.
444 The method should raise errors.OpPrereqError in case something is not
445 fulfilled. Its return value is ignored.
447 This method should also update all parameters to their canonical form if it
448 hasn't been done before.
453 def Exec(self, feedback_fn):
454 """Execute the tasklet.
456 This method should implement the actual work. It should raise
457 errors.OpExecError for failures that are somewhat dealt with in code, or
461 raise NotImplementedError
465 """Base for query utility classes.
468 #: Attribute holding field definitions
471 def __init__(self, filter_, fields, use_locking):
472 """Initializes this class.
475 self.use_locking = use_locking
477 self.query = query.Query(self.FIELDS, fields, filter_=filter_,
479 self.requested_data = self.query.RequestedData()
480 self.names = self.query.RequestedNames()
482 # Sort only if no names were requested
483 self.sort_by_name = not self.names
485 self.do_locking = None
488 def _GetNames(self, lu, all_names, lock_level):
489 """Helper function to determine names asked for in the query.
493 names = lu.owned_locks(lock_level)
497 if self.wanted == locking.ALL_SET:
498 assert not self.names
499 # caller didn't specify names, so ordering is not important
500 return utils.NiceSort(names)
502 # caller specified names and we must keep the same order
504 assert not self.do_locking or lu.glm.is_owned(lock_level)
506 missing = set(self.wanted).difference(names)
508 raise errors.OpExecError("Some items were removed before retrieving"
509 " their data: %s" % missing)
511 # Return expanded names
514 def ExpandNames(self, lu):
515 """Expand names for this query.
517 See L{LogicalUnit.ExpandNames}.
520 raise NotImplementedError()
522 def DeclareLocks(self, lu, level):
523 """Declare locks for this query.
525 See L{LogicalUnit.DeclareLocks}.
528 raise NotImplementedError()
530 def _GetQueryData(self, lu):
531 """Collects all data for this query.
533 @return: Query data object
536 raise NotImplementedError()
538 def NewStyleQuery(self, lu):
539 """Collect data and execute query.
542 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
543 sort_by_name=self.sort_by_name)
545 def OldStyleQuery(self, lu):
546 """Collect data and execute query.
549 return self.query.OldStyleQuery(self._GetQueryData(lu),
550 sort_by_name=self.sort_by_name)
554 """Returns a dict declaring all lock levels shared.
557 return dict.fromkeys(locking.LEVELS, 1)
560 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
561 """Checks if the owned node groups are still correct for an instance.
563 @type cfg: L{config.ConfigWriter}
564 @param cfg: The cluster configuration
565 @type instance_name: string
566 @param instance_name: Instance name
567 @type owned_groups: set or frozenset
568 @param owned_groups: List of currently owned node groups
571 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
573 if not owned_groups.issuperset(inst_groups):
574 raise errors.OpPrereqError("Instance %s's node groups changed since"
575 " locks were acquired, current groups are"
576 " are '%s', owning groups '%s'; retry the"
579 utils.CommaJoin(inst_groups),
580 utils.CommaJoin(owned_groups)),
586 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
587 """Checks if the instances in a node group are still correct.
589 @type cfg: L{config.ConfigWriter}
590 @param cfg: The cluster configuration
591 @type group_uuid: string
592 @param group_uuid: Node group UUID
593 @type owned_instances: set or frozenset
594 @param owned_instances: List of currently owned instances
597 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
598 if owned_instances != wanted_instances:
599 raise errors.OpPrereqError("Instances in node group '%s' changed since"
600 " locks were acquired, wanted '%s', have '%s';"
601 " retry the operation" %
603 utils.CommaJoin(wanted_instances),
604 utils.CommaJoin(owned_instances)),
607 return wanted_instances
610 def _SupportsOob(cfg, node):
611 """Tells if node supports OOB.
613 @type cfg: L{config.ConfigWriter}
614 @param cfg: The cluster configuration
615 @type node: L{objects.Node}
616 @param node: The node
617 @return: The OOB script if supported or an empty string otherwise
620 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
623 def _GetWantedNodes(lu, nodes):
624 """Returns list of checked and expanded node names.
626 @type lu: L{LogicalUnit}
627 @param lu: the logical unit on whose behalf we execute
629 @param nodes: list of node names or None for all nodes
631 @return: the list of nodes, sorted
632 @raise errors.ProgrammerError: if the nodes parameter is wrong type
636 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
638 return utils.NiceSort(lu.cfg.GetNodeList())
641 def _GetWantedInstances(lu, instances):
642 """Returns list of checked and expanded instance names.
644 @type lu: L{LogicalUnit}
645 @param lu: the logical unit on whose behalf we execute
646 @type instances: list
647 @param instances: list of instance names or None for all instances
649 @return: the list of instances, sorted
650 @raise errors.OpPrereqError: if the instances parameter is wrong type
651 @raise errors.OpPrereqError: if any of the passed instances is not found
655 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
657 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
661 def _GetUpdatedParams(old_params, update_dict,
662 use_default=True, use_none=False):
663 """Return the new version of a parameter dictionary.
665 @type old_params: dict
666 @param old_params: old parameters
667 @type update_dict: dict
668 @param update_dict: dict containing new parameter values, or
669 constants.VALUE_DEFAULT to reset the parameter to its default
671 @param use_default: boolean
672 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
673 values as 'to be deleted' values
674 @param use_none: boolean
675 @type use_none: whether to recognise C{None} values as 'to be
678 @return: the new parameter dictionary
681 params_copy = copy.deepcopy(old_params)
682 for key, val in update_dict.iteritems():
683 if ((use_default and val == constants.VALUE_DEFAULT) or
684 (use_none and val is None)):
690 params_copy[key] = val
694 def _ReleaseLocks(lu, level, names=None, keep=None):
695 """Releases locks owned by an LU.
697 @type lu: L{LogicalUnit}
698 @param level: Lock level
699 @type names: list or None
700 @param names: Names of locks to release
701 @type keep: list or None
702 @param keep: Names of locks to retain
705 assert not (keep is not None and names is not None), \
706 "Only one of the 'names' and the 'keep' parameters can be given"
708 if names is not None:
709 should_release = names.__contains__
711 should_release = lambda name: name not in keep
713 should_release = None
719 # Determine which locks to release
720 for name in lu.owned_locks(level):
721 if should_release(name):
726 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
728 # Release just some locks
729 lu.glm.release(level, names=release)
731 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
734 lu.glm.release(level)
736 assert not lu.glm.is_owned(level), "No locks should be owned"
739 def _MapInstanceDisksToNodes(instances):
740 """Creates a map from (node, volume) to instance name.
742 @type instances: list of L{objects.Instance}
743 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
746 return dict(((node, vol), inst.name)
747 for inst in instances
748 for (node, vols) in inst.MapLVsByNode().items()
752 def _RunPostHook(lu, node_name):
753 """Runs the post-hook for an opcode on a single node.
756 hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
758 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
760 # pylint: disable=W0702
761 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
764 def _CheckOutputFields(static, dynamic, selected):
765 """Checks whether all selected fields are valid.
767 @type static: L{utils.FieldSet}
768 @param static: static fields set
769 @type dynamic: L{utils.FieldSet}
770 @param dynamic: dynamic fields set
777 delta = f.NonMatching(selected)
779 raise errors.OpPrereqError("Unknown output fields selected: %s"
780 % ",".join(delta), errors.ECODE_INVAL)
783 def _CheckGlobalHvParams(params):
784 """Validates that given hypervisor params are not global ones.
786 This will ensure that instances don't get customised versions of
790 used_globals = constants.HVC_GLOBALS.intersection(params)
792 msg = ("The following hypervisor parameters are global and cannot"
793 " be customized at instance level, please modify them at"
794 " cluster level: %s" % utils.CommaJoin(used_globals))
795 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
798 def _CheckNodeOnline(lu, node, msg=None):
799 """Ensure that a given node is online.
801 @param lu: the LU on behalf of which we make the check
802 @param node: the node to check
803 @param msg: if passed, should be a message to replace the default one
804 @raise errors.OpPrereqError: if the node is offline
808 msg = "Can't use offline node"
809 if lu.cfg.GetNodeInfo(node).offline:
810 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
813 def _CheckNodeNotDrained(lu, node):
814 """Ensure that a given node is not drained.
816 @param lu: the LU on behalf of which we make the check
817 @param node: the node to check
818 @raise errors.OpPrereqError: if the node is drained
821 if lu.cfg.GetNodeInfo(node).drained:
822 raise errors.OpPrereqError("Can't use drained node %s" % node,
826 def _CheckNodeVmCapable(lu, node):
827 """Ensure that a given node is vm capable.
829 @param lu: the LU on behalf of which we make the check
830 @param node: the node to check
831 @raise errors.OpPrereqError: if the node is not vm capable
834 if not lu.cfg.GetNodeInfo(node).vm_capable:
835 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
839 def _CheckNodeHasOS(lu, node, os_name, force_variant):
840 """Ensure that a node supports a given OS.
842 @param lu: the LU on behalf of which we make the check
843 @param node: the node to check
844 @param os_name: the OS to query about
845 @param force_variant: whether to ignore variant errors
846 @raise errors.OpPrereqError: if the node is not supporting the OS
849 result = lu.rpc.call_os_get(node, os_name)
850 result.Raise("OS '%s' not in supported OS list for node %s" %
852 prereq=True, ecode=errors.ECODE_INVAL)
853 if not force_variant:
854 _CheckOSVariant(result.payload, os_name)
857 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
858 """Ensure that a node has the given secondary ip.
860 @type lu: L{LogicalUnit}
861 @param lu: the LU on behalf of which we make the check
863 @param node: the node to check
864 @type secondary_ip: string
865 @param secondary_ip: the ip to check
866 @type prereq: boolean
867 @param prereq: whether to throw a prerequisite or an execute error
868 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
869 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
872 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
873 result.Raise("Failure checking secondary ip on node %s" % node,
874 prereq=prereq, ecode=errors.ECODE_ENVIRON)
875 if not result.payload:
876 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
877 " please fix and re-run this command" % secondary_ip)
879 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
881 raise errors.OpExecError(msg)
884 def _GetClusterDomainSecret():
885 """Reads the cluster domain secret.
888 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
892 def _CheckInstanceDown(lu, instance, reason):
893 """Ensure that an instance is not running."""
894 if instance.admin_up:
895 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
896 (instance.name, reason), errors.ECODE_STATE)
898 pnode = instance.primary_node
899 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
900 ins_l.Raise("Can't contact node %s for instance information" % pnode,
901 prereq=True, ecode=errors.ECODE_ENVIRON)
903 if instance.name in ins_l.payload:
904 raise errors.OpPrereqError("Instance %s is running, %s" %
905 (instance.name, reason), errors.ECODE_STATE)
908 def _ExpandItemName(fn, name, kind):
909 """Expand an item name.
911 @param fn: the function to use for expansion
912 @param name: requested item name
913 @param kind: text description ('Node' or 'Instance')
914 @return: the resolved (full) name
915 @raise errors.OpPrereqError: if the item is not found
919 if full_name is None:
920 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
925 def _ExpandNodeName(cfg, name):
926 """Wrapper over L{_ExpandItemName} for nodes."""
927 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
930 def _ExpandInstanceName(cfg, name):
931 """Wrapper over L{_ExpandItemName} for instance."""
932 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
935 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
936 memory, vcpus, nics, disk_template, disks,
937 bep, hvp, hypervisor_name, tags):
938 """Builds instance related env variables for hooks
940 This builds the hook environment from individual variables.
943 @param name: the name of the instance
944 @type primary_node: string
945 @param primary_node: the name of the instance's primary node
946 @type secondary_nodes: list
947 @param secondary_nodes: list of secondary nodes as strings
948 @type os_type: string
949 @param os_type: the name of the instance's OS
950 @type status: boolean
951 @param status: the should_run status of the instance
953 @param memory: the memory size of the instance
955 @param vcpus: the count of VCPUs the instance has
957 @param nics: list of tuples (ip, mac, mode, link) representing
958 the NICs the instance has
959 @type disk_template: string
960 @param disk_template: the disk template of the instance
962 @param disks: the list of (size, mode) pairs
964 @param bep: the backend parameters for the instance
966 @param hvp: the hypervisor parameters for the instance
967 @type hypervisor_name: string
968 @param hypervisor_name: the hypervisor for the instance
970 @param tags: list of instance tags as strings
972 @return: the hook environment for this instance
981 "INSTANCE_NAME": name,
982 "INSTANCE_PRIMARY": primary_node,
983 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
984 "INSTANCE_OS_TYPE": os_type,
985 "INSTANCE_STATUS": str_status,
986 "INSTANCE_MEMORY": memory,
987 "INSTANCE_VCPUS": vcpus,
988 "INSTANCE_DISK_TEMPLATE": disk_template,
989 "INSTANCE_HYPERVISOR": hypervisor_name,
993 nic_count = len(nics)
994 for idx, (ip, mac, mode, link) in enumerate(nics):
997 env["INSTANCE_NIC%d_IP" % idx] = ip
998 env["INSTANCE_NIC%d_MAC" % idx] = mac
999 env["INSTANCE_NIC%d_MODE" % idx] = mode
1000 env["INSTANCE_NIC%d_LINK" % idx] = link
1001 if mode == constants.NIC_MODE_BRIDGED:
1002 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1006 env["INSTANCE_NIC_COUNT"] = nic_count
1009 disk_count = len(disks)
1010 for idx, (size, mode) in enumerate(disks):
1011 env["INSTANCE_DISK%d_SIZE" % idx] = size
1012 env["INSTANCE_DISK%d_MODE" % idx] = mode
1016 env["INSTANCE_DISK_COUNT"] = disk_count
1021 env["INSTANCE_TAGS"] = " ".join(tags)
1023 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1024 for key, value in source.items():
1025 env["INSTANCE_%s_%s" % (kind, key)] = value
1030 def _NICListToTuple(lu, nics):
1031 """Build a list of nic information tuples.
1033 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1034 value in LUInstanceQueryData.
1036 @type lu: L{LogicalUnit}
1037 @param lu: the logical unit on whose behalf we execute
1038 @type nics: list of L{objects.NIC}
1039 @param nics: list of nics to convert to hooks tuples
1043 cluster = lu.cfg.GetClusterInfo()
1047 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1048 mode = filled_params[constants.NIC_MODE]
1049 link = filled_params[constants.NIC_LINK]
1050 hooks_nics.append((ip, mac, mode, link))
1054 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1055 """Builds instance related env variables for hooks from an object.
1057 @type lu: L{LogicalUnit}
1058 @param lu: the logical unit on whose behalf we execute
1059 @type instance: L{objects.Instance}
1060 @param instance: the instance for which we should build the
1062 @type override: dict
1063 @param override: dictionary with key/values that will override
1066 @return: the hook environment dictionary
1069 cluster = lu.cfg.GetClusterInfo()
1070 bep = cluster.FillBE(instance)
1071 hvp = cluster.FillHV(instance)
1073 "name": instance.name,
1074 "primary_node": instance.primary_node,
1075 "secondary_nodes": instance.secondary_nodes,
1076 "os_type": instance.os,
1077 "status": instance.admin_up,
1078 "memory": bep[constants.BE_MEMORY],
1079 "vcpus": bep[constants.BE_VCPUS],
1080 "nics": _NICListToTuple(lu, instance.nics),
1081 "disk_template": instance.disk_template,
1082 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1085 "hypervisor_name": instance.hypervisor,
1086 "tags": instance.tags,
1089 args.update(override)
1090 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1093 def _AdjustCandidatePool(lu, exceptions):
1094 """Adjust the candidate pool after node operations.
1097 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1099 lu.LogInfo("Promoted nodes to master candidate role: %s",
1100 utils.CommaJoin(node.name for node in mod_list))
1101 for name in mod_list:
1102 lu.context.ReaddNode(name)
1103 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1105 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1109 def _DecideSelfPromotion(lu, exceptions=None):
1110 """Decide whether I should promote myself as a master candidate.
1113 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1114 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1115 # the new node will increase mc_max with one, so:
1116 mc_should = min(mc_should + 1, cp_size)
1117 return mc_now < mc_should
1120 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1121 """Check that the brigdes needed by a list of nics exist.
1124 cluster = lu.cfg.GetClusterInfo()
1125 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1126 brlist = [params[constants.NIC_LINK] for params in paramslist
1127 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1129 result = lu.rpc.call_bridges_exist(target_node, brlist)
1130 result.Raise("Error checking bridges on destination node '%s'" %
1131 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1134 def _CheckInstanceBridgesExist(lu, instance, node=None):
1135 """Check that the brigdes needed by an instance exist.
1139 node = instance.primary_node
1140 _CheckNicsBridgesExist(lu, instance.nics, node)
1143 def _CheckOSVariant(os_obj, name):
1144 """Check whether an OS name conforms to the os variants specification.
1146 @type os_obj: L{objects.OS}
1147 @param os_obj: OS object to check
1149 @param name: OS name passed by the user, to check for validity
1152 variant = objects.OS.GetVariant(name)
1153 if not os_obj.supported_variants:
1155 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1156 " passed)" % (os_obj.name, variant),
1160 raise errors.OpPrereqError("OS name must include a variant",
1163 if variant not in os_obj.supported_variants:
1164 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1167 def _GetNodeInstancesInner(cfg, fn):
1168 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1171 def _GetNodeInstances(cfg, node_name):
1172 """Returns a list of all primary and secondary instances on a node.
1176 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1179 def _GetNodePrimaryInstances(cfg, node_name):
1180 """Returns primary instances on a node.
1183 return _GetNodeInstancesInner(cfg,
1184 lambda inst: node_name == inst.primary_node)
1187 def _GetNodeSecondaryInstances(cfg, node_name):
1188 """Returns secondary instances on a node.
1191 return _GetNodeInstancesInner(cfg,
1192 lambda inst: node_name in inst.secondary_nodes)
1195 def _GetStorageTypeArgs(cfg, storage_type):
1196 """Returns the arguments for a storage type.
1199 # Special case for file storage
1200 if storage_type == constants.ST_FILE:
1201 # storage.FileStorage wants a list of storage directories
1202 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1207 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1210 for dev in instance.disks:
1211 cfg.SetDiskID(dev, node_name)
1213 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1214 result.Raise("Failed to get disk status from node %s" % node_name,
1215 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1217 for idx, bdev_status in enumerate(result.payload):
1218 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1224 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1225 """Check the sanity of iallocator and node arguments and use the
1226 cluster-wide iallocator if appropriate.
1228 Check that at most one of (iallocator, node) is specified. If none is
1229 specified, then the LU's opcode's iallocator slot is filled with the
1230 cluster-wide default iallocator.
1232 @type iallocator_slot: string
1233 @param iallocator_slot: the name of the opcode iallocator slot
1234 @type node_slot: string
1235 @param node_slot: the name of the opcode target node slot
1238 node = getattr(lu.op, node_slot, None)
1239 iallocator = getattr(lu.op, iallocator_slot, None)
1241 if node is not None and iallocator is not None:
1242 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1244 elif node is None and iallocator is None:
1245 default_iallocator = lu.cfg.GetDefaultIAllocator()
1246 if default_iallocator:
1247 setattr(lu.op, iallocator_slot, default_iallocator)
1249 raise errors.OpPrereqError("No iallocator or node given and no"
1250 " cluster-wide default iallocator found;"
1251 " please specify either an iallocator or a"
1252 " node, or set a cluster-wide default"
1256 def _GetDefaultIAllocator(cfg, iallocator):
1257 """Decides on which iallocator to use.
1259 @type cfg: L{config.ConfigWriter}
1260 @param cfg: Cluster configuration object
1261 @type iallocator: string or None
1262 @param iallocator: Iallocator specified in opcode
1264 @return: Iallocator name
1268 # Use default iallocator
1269 iallocator = cfg.GetDefaultIAllocator()
1272 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1273 " opcode nor as a cluster-wide default",
1279 class LUClusterPostInit(LogicalUnit):
1280 """Logical unit for running hooks after cluster initialization.
1283 HPATH = "cluster-init"
1284 HTYPE = constants.HTYPE_CLUSTER
1286 def BuildHooksEnv(self):
1291 "OP_TARGET": self.cfg.GetClusterName(),
1294 def BuildHooksNodes(self):
1295 """Build hooks nodes.
1298 return ([], [self.cfg.GetMasterNode()])
1300 def Exec(self, feedback_fn):
1307 class LUClusterDestroy(LogicalUnit):
1308 """Logical unit for destroying the cluster.
1311 HPATH = "cluster-destroy"
1312 HTYPE = constants.HTYPE_CLUSTER
1314 def BuildHooksEnv(self):
1319 "OP_TARGET": self.cfg.GetClusterName(),
1322 def BuildHooksNodes(self):
1323 """Build hooks nodes.
1328 def CheckPrereq(self):
1329 """Check prerequisites.
1331 This checks whether the cluster is empty.
1333 Any errors are signaled by raising errors.OpPrereqError.
1336 master = self.cfg.GetMasterNode()
1338 nodelist = self.cfg.GetNodeList()
1339 if len(nodelist) != 1 or nodelist[0] != master:
1340 raise errors.OpPrereqError("There are still %d node(s) in"
1341 " this cluster." % (len(nodelist) - 1),
1343 instancelist = self.cfg.GetInstanceList()
1345 raise errors.OpPrereqError("There are still %d instance(s) in"
1346 " this cluster." % len(instancelist),
1349 def Exec(self, feedback_fn):
1350 """Destroys the cluster.
1353 master = self.cfg.GetMasterNode()
1355 # Run post hooks on master node before it's removed
1356 _RunPostHook(self, master)
1358 result = self.rpc.call_node_deactivate_master_ip(master)
1359 result.Raise("Could not disable the master role")
1364 def _VerifyCertificate(filename):
1365 """Verifies a certificate for L{LUClusterVerifyConfig}.
1367 @type filename: string
1368 @param filename: Path to PEM file
1372 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1373 utils.ReadFile(filename))
1374 except Exception, err: # pylint: disable=W0703
1375 return (LUClusterVerifyConfig.ETYPE_ERROR,
1376 "Failed to load X509 certificate %s: %s" % (filename, err))
1379 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1380 constants.SSL_CERT_EXPIRATION_ERROR)
1383 fnamemsg = "While verifying %s: %s" % (filename, msg)
1388 return (None, fnamemsg)
1389 elif errcode == utils.CERT_WARNING:
1390 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1391 elif errcode == utils.CERT_ERROR:
1392 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1394 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1397 def _GetAllHypervisorParameters(cluster, instances):
1398 """Compute the set of all hypervisor parameters.
1400 @type cluster: L{objects.Cluster}
1401 @param cluster: the cluster object
1402 @param instances: list of L{objects.Instance}
1403 @param instances: additional instances from which to obtain parameters
1404 @rtype: list of (origin, hypervisor, parameters)
1405 @return: a list with all parameters found, indicating the hypervisor they
1406 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1411 for hv_name in cluster.enabled_hypervisors:
1412 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1414 for os_name, os_hvp in cluster.os_hvp.items():
1415 for hv_name, hv_params in os_hvp.items():
1417 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1418 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1420 # TODO: collapse identical parameter values in a single one
1421 for instance in instances:
1422 if instance.hvparams:
1423 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1424 cluster.FillHV(instance)))
1429 class _VerifyErrors(object):
1430 """Mix-in for cluster/group verify LUs.
1432 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1433 self.op and self._feedback_fn to be available.)
1436 TCLUSTER = "cluster"
1438 TINSTANCE = "instance"
1440 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1441 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1442 ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1443 ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1444 ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1445 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1446 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1447 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1448 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1449 EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1450 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1451 EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1452 ENODEDRBD = (TNODE, "ENODEDRBD")
1453 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1454 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1455 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1456 ENODEHV = (TNODE, "ENODEHV")
1457 ENODELVM = (TNODE, "ENODELVM")
1458 ENODEN1 = (TNODE, "ENODEN1")
1459 ENODENET = (TNODE, "ENODENET")
1460 ENODEOS = (TNODE, "ENODEOS")
1461 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1462 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1463 ENODERPC = (TNODE, "ENODERPC")
1464 ENODESSH = (TNODE, "ENODESSH")
1465 ENODEVERSION = (TNODE, "ENODEVERSION")
1466 ENODESETUP = (TNODE, "ENODESETUP")
1467 ENODETIME = (TNODE, "ENODETIME")
1468 ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1470 ETYPE_FIELD = "code"
1471 ETYPE_ERROR = "ERROR"
1472 ETYPE_WARNING = "WARNING"
1474 def _Error(self, ecode, item, msg, *args, **kwargs):
1475 """Format an error message.
1477 Based on the opcode's error_codes parameter, either format a
1478 parseable error code, or a simpler error string.
1480 This must be called only from Exec and functions called from Exec.
1483 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1485 # first complete the msg
1488 # then format the whole message
1489 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1490 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1496 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1497 # and finally report it via the feedback_fn
1498 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1500 def _ErrorIf(self, cond, *args, **kwargs):
1501 """Log an error message if the passed condition is True.
1505 or self.op.debug_simulate_errors) # pylint: disable=E1101
1507 self._Error(*args, **kwargs)
1508 # do not mark the operation as failed for WARN cases only
1509 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1510 self.bad = self.bad or cond
1513 class LUClusterVerify(NoHooksLU):
1514 """Submits all jobs necessary to verify the cluster.
1519 def ExpandNames(self):
1520 self.needed_locks = {}
1522 def Exec(self, feedback_fn):
1525 if self.op.group_name:
1526 groups = [self.op.group_name]
1527 depends_fn = lambda: None
1529 groups = self.cfg.GetNodeGroupList()
1531 # Verify global configuration
1532 jobs.append([opcodes.OpClusterVerifyConfig()])
1534 # Always depend on global verification
1535 depends_fn = lambda: [(-len(jobs), [])]
1537 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1538 depends=depends_fn())]
1539 for group in groups)
1541 # Fix up all parameters
1542 for op in itertools.chain(*jobs): # pylint: disable=W0142
1543 op.debug_simulate_errors = self.op.debug_simulate_errors
1544 op.verbose = self.op.verbose
1545 op.error_codes = self.op.error_codes
1547 op.skip_checks = self.op.skip_checks
1548 except AttributeError:
1549 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1551 return ResultWithJobs(jobs)
1554 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1555 """Verifies the cluster config.
1560 def _VerifyHVP(self, hvp_data):
1561 """Verifies locally the syntax of the hypervisor parameters.
1564 for item, hv_name, hv_params in hvp_data:
1565 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1568 hv_class = hypervisor.GetHypervisor(hv_name)
1569 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1570 hv_class.CheckParameterSyntax(hv_params)
1571 except errors.GenericError, err:
1572 self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1574 def ExpandNames(self):
1575 # Information can be safely retrieved as the BGL is acquired in exclusive
1577 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1578 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1579 self.all_node_info = self.cfg.GetAllNodesInfo()
1580 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1581 self.needed_locks = {}
1583 def Exec(self, feedback_fn):
1584 """Verify integrity of cluster, performing various test on nodes.
1588 self._feedback_fn = feedback_fn
1590 feedback_fn("* Verifying cluster config")
1592 for msg in self.cfg.VerifyConfig():
1593 self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1595 feedback_fn("* Verifying cluster certificate files")
1597 for cert_filename in constants.ALL_CERT_FILES:
1598 (errcode, msg) = _VerifyCertificate(cert_filename)
1599 self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1601 feedback_fn("* Verifying hypervisor parameters")
1603 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1604 self.all_inst_info.values()))
1606 feedback_fn("* Verifying all nodes belong to an existing group")
1608 # We do this verification here because, should this bogus circumstance
1609 # occur, it would never be caught by VerifyGroup, which only acts on
1610 # nodes/instances reachable from existing node groups.
1612 dangling_nodes = set(node.name for node in self.all_node_info.values()
1613 if node.group not in self.all_group_info)
1615 dangling_instances = {}
1616 no_node_instances = []
1618 for inst in self.all_inst_info.values():
1619 if inst.primary_node in dangling_nodes:
1620 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1621 elif inst.primary_node not in self.all_node_info:
1622 no_node_instances.append(inst.name)
1627 utils.CommaJoin(dangling_instances.get(node.name,
1629 for node in dangling_nodes]
1631 self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1632 "the following nodes (and their instances) belong to a non"
1633 " existing group: %s", utils.CommaJoin(pretty_dangling))
1635 self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1636 "the following instances have a non-existing primary-node:"
1637 " %s", utils.CommaJoin(no_node_instances))
1642 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1643 """Verifies the status of a node group.
1646 HPATH = "cluster-verify"
1647 HTYPE = constants.HTYPE_CLUSTER
1650 _HOOKS_INDENT_RE = re.compile("^", re.M)
1652 class NodeImage(object):
1653 """A class representing the logical and physical status of a node.
1656 @ivar name: the node name to which this object refers
1657 @ivar volumes: a structure as returned from
1658 L{ganeti.backend.GetVolumeList} (runtime)
1659 @ivar instances: a list of running instances (runtime)
1660 @ivar pinst: list of configured primary instances (config)
1661 @ivar sinst: list of configured secondary instances (config)
1662 @ivar sbp: dictionary of {primary-node: list of instances} for all
1663 instances for which this node is secondary (config)
1664 @ivar mfree: free memory, as reported by hypervisor (runtime)
1665 @ivar dfree: free disk, as reported by the node (runtime)
1666 @ivar offline: the offline status (config)
1667 @type rpc_fail: boolean
1668 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1669 not whether the individual keys were correct) (runtime)
1670 @type lvm_fail: boolean
1671 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1672 @type hyp_fail: boolean
1673 @ivar hyp_fail: whether the RPC call didn't return the instance list
1674 @type ghost: boolean
1675 @ivar ghost: whether this is a known node or not (config)
1676 @type os_fail: boolean
1677 @ivar os_fail: whether the RPC call didn't return valid OS data
1679 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1680 @type vm_capable: boolean
1681 @ivar vm_capable: whether the node can host instances
1684 def __init__(self, offline=False, name=None, vm_capable=True):
1693 self.offline = offline
1694 self.vm_capable = vm_capable
1695 self.rpc_fail = False
1696 self.lvm_fail = False
1697 self.hyp_fail = False
1699 self.os_fail = False
1702 def ExpandNames(self):
1703 # This raises errors.OpPrereqError on its own:
1704 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1706 # Get instances in node group; this is unsafe and needs verification later
1707 inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1709 self.needed_locks = {
1710 locking.LEVEL_INSTANCE: inst_names,
1711 locking.LEVEL_NODEGROUP: [self.group_uuid],
1712 locking.LEVEL_NODE: [],
1715 self.share_locks = _ShareAll()
1717 def DeclareLocks(self, level):
1718 if level == locking.LEVEL_NODE:
1719 # Get members of node group; this is unsafe and needs verification later
1720 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1722 all_inst_info = self.cfg.GetAllInstancesInfo()
1724 # In Exec(), we warn about mirrored instances that have primary and
1725 # secondary living in separate node groups. To fully verify that
1726 # volumes for these instances are healthy, we will need to do an
1727 # extra call to their secondaries. We ensure here those nodes will
1729 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1730 # Important: access only the instances whose lock is owned
1731 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1732 nodes.update(all_inst_info[inst].secondary_nodes)
1734 self.needed_locks[locking.LEVEL_NODE] = nodes
1736 def CheckPrereq(self):
1737 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1738 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1740 group_nodes = set(self.group_info.members)
1741 group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1744 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1746 unlocked_instances = \
1747 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1750 raise errors.OpPrereqError("Missing lock for nodes: %s" %
1751 utils.CommaJoin(unlocked_nodes))
1753 if unlocked_instances:
1754 raise errors.OpPrereqError("Missing lock for instances: %s" %
1755 utils.CommaJoin(unlocked_instances))
1757 self.all_node_info = self.cfg.GetAllNodesInfo()
1758 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1760 self.my_node_names = utils.NiceSort(group_nodes)
1761 self.my_inst_names = utils.NiceSort(group_instances)
1763 self.my_node_info = dict((name, self.all_node_info[name])
1764 for name in self.my_node_names)
1766 self.my_inst_info = dict((name, self.all_inst_info[name])
1767 for name in self.my_inst_names)
1769 # We detect here the nodes that will need the extra RPC calls for verifying
1770 # split LV volumes; they should be locked.
1771 extra_lv_nodes = set()
1773 for inst in self.my_inst_info.values():
1774 if inst.disk_template in constants.DTS_INT_MIRROR:
1775 group = self.my_node_info[inst.primary_node].group
1776 for nname in inst.secondary_nodes:
1777 if self.all_node_info[nname].group != group:
1778 extra_lv_nodes.add(nname)
1780 unlocked_lv_nodes = \
1781 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1783 if unlocked_lv_nodes:
1784 raise errors.OpPrereqError("these nodes could be locked: %s" %
1785 utils.CommaJoin(unlocked_lv_nodes))
1786 self.extra_lv_nodes = list(extra_lv_nodes)
1788 def _VerifyNode(self, ninfo, nresult):
1789 """Perform some basic validation on data returned from a node.
1791 - check the result data structure is well formed and has all the
1793 - check ganeti version
1795 @type ninfo: L{objects.Node}
1796 @param ninfo: the node to check
1797 @param nresult: the results from the node
1799 @return: whether overall this call was successful (and we can expect
1800 reasonable values in the respose)
1804 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1806 # main result, nresult should be a non-empty dict
1807 test = not nresult or not isinstance(nresult, dict)
1808 _ErrorIf(test, self.ENODERPC, node,
1809 "unable to verify node: no data returned")
1813 # compares ganeti version
1814 local_version = constants.PROTOCOL_VERSION
1815 remote_version = nresult.get("version", None)
1816 test = not (remote_version and
1817 isinstance(remote_version, (list, tuple)) and
1818 len(remote_version) == 2)
1819 _ErrorIf(test, self.ENODERPC, node,
1820 "connection to node returned invalid data")
1824 test = local_version != remote_version[0]
1825 _ErrorIf(test, self.ENODEVERSION, node,
1826 "incompatible protocol versions: master %s,"
1827 " node %s", local_version, remote_version[0])
1831 # node seems compatible, we can actually try to look into its results
1833 # full package version
1834 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1835 self.ENODEVERSION, node,
1836 "software version mismatch: master %s, node %s",
1837 constants.RELEASE_VERSION, remote_version[1],
1838 code=self.ETYPE_WARNING)
1840 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1841 if ninfo.vm_capable and isinstance(hyp_result, dict):
1842 for hv_name, hv_result in hyp_result.iteritems():
1843 test = hv_result is not None
1844 _ErrorIf(test, self.ENODEHV, node,
1845 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1847 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1848 if ninfo.vm_capable and isinstance(hvp_result, list):
1849 for item, hv_name, hv_result in hvp_result:
1850 _ErrorIf(True, self.ENODEHV, node,
1851 "hypervisor %s parameter verify failure (source %s): %s",
1852 hv_name, item, hv_result)
1854 test = nresult.get(constants.NV_NODESETUP,
1855 ["Missing NODESETUP results"])
1856 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1861 def _VerifyNodeTime(self, ninfo, nresult,
1862 nvinfo_starttime, nvinfo_endtime):
1863 """Check the node time.
1865 @type ninfo: L{objects.Node}
1866 @param ninfo: the node to check
1867 @param nresult: the remote results for the node
1868 @param nvinfo_starttime: the start time of the RPC call
1869 @param nvinfo_endtime: the end time of the RPC call
1873 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1875 ntime = nresult.get(constants.NV_TIME, None)
1877 ntime_merged = utils.MergeTime(ntime)
1878 except (ValueError, TypeError):
1879 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1882 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1883 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1884 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1885 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1889 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1890 "Node time diverges by at least %s from master node time",
1893 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1894 """Check the node LVM results.
1896 @type ninfo: L{objects.Node}
1897 @param ninfo: the node to check
1898 @param nresult: the remote results for the node
1899 @param vg_name: the configured VG name
1906 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1908 # checks vg existence and size > 20G
1909 vglist = nresult.get(constants.NV_VGLIST, None)
1911 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1913 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1914 constants.MIN_VG_SIZE)
1915 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1918 pvlist = nresult.get(constants.NV_PVLIST, None)
1919 test = pvlist is None
1920 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1922 # check that ':' is not present in PV names, since it's a
1923 # special character for lvcreate (denotes the range of PEs to
1925 for _, pvname, owner_vg in pvlist:
1926 test = ":" in pvname
1927 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1928 " '%s' of VG '%s'", pvname, owner_vg)
1930 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1931 """Check the node bridges.
1933 @type ninfo: L{objects.Node}
1934 @param ninfo: the node to check
1935 @param nresult: the remote results for the node
1936 @param bridges: the expected list of bridges
1943 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1945 missing = nresult.get(constants.NV_BRIDGES, None)
1946 test = not isinstance(missing, list)
1947 _ErrorIf(test, self.ENODENET, node,
1948 "did not return valid bridge information")
1950 _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1951 utils.CommaJoin(sorted(missing)))
1953 def _VerifyNodeNetwork(self, ninfo, nresult):
1954 """Check the node network connectivity results.
1956 @type ninfo: L{objects.Node}
1957 @param ninfo: the node to check
1958 @param nresult: the remote results for the node
1962 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1964 test = constants.NV_NODELIST not in nresult
1965 _ErrorIf(test, self.ENODESSH, node,
1966 "node hasn't returned node ssh connectivity data")
1968 if nresult[constants.NV_NODELIST]:
1969 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1970 _ErrorIf(True, self.ENODESSH, node,
1971 "ssh communication with node '%s': %s", a_node, a_msg)
1973 test = constants.NV_NODENETTEST not in nresult
1974 _ErrorIf(test, self.ENODENET, node,
1975 "node hasn't returned node tcp connectivity data")
1977 if nresult[constants.NV_NODENETTEST]:
1978 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1980 _ErrorIf(True, self.ENODENET, node,
1981 "tcp communication with node '%s': %s",
1982 anode, nresult[constants.NV_NODENETTEST][anode])
1984 test = constants.NV_MASTERIP not in nresult
1985 _ErrorIf(test, self.ENODENET, node,
1986 "node hasn't returned node master IP reachability data")
1988 if not nresult[constants.NV_MASTERIP]:
1989 if node == self.master_node:
1990 msg = "the master node cannot reach the master IP (not configured?)"
1992 msg = "cannot reach the master IP"
1993 _ErrorIf(True, self.ENODENET, node, msg)
1995 def _VerifyInstance(self, instance, instanceconfig, node_image,
1997 """Verify an instance.
1999 This function checks to see if the required block devices are
2000 available on the instance's node.
2003 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2004 node_current = instanceconfig.primary_node
2006 node_vol_should = {}
2007 instanceconfig.MapLVsByNode(node_vol_should)
2009 for node in node_vol_should:
2010 n_img = node_image[node]
2011 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2012 # ignore missing volumes on offline or broken nodes
2014 for volume in node_vol_should[node]:
2015 test = volume not in n_img.volumes
2016 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
2017 "volume %s missing on node %s", volume, node)
2019 if instanceconfig.admin_up:
2020 pri_img = node_image[node_current]
2021 test = instance not in pri_img.instances and not pri_img.offline
2022 _ErrorIf(test, self.EINSTANCEDOWN, instance,
2023 "instance not running on its primary node %s",
2026 diskdata = [(nname, success, status, idx)
2027 for (nname, disks) in diskstatus.items()
2028 for idx, (success, status) in enumerate(disks)]
2030 for nname, success, bdev_status, idx in diskdata:
2031 # the 'ghost node' construction in Exec() ensures that we have a
2033 snode = node_image[nname]
2034 bad_snode = snode.ghost or snode.offline
2035 _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2036 self.EINSTANCEFAULTYDISK, instance,
2037 "couldn't retrieve status for disk/%s on %s: %s",
2038 idx, nname, bdev_status)
2039 _ErrorIf((instanceconfig.admin_up and success and
2040 bdev_status.ldisk_status == constants.LDS_FAULTY),
2041 self.EINSTANCEFAULTYDISK, instance,
2042 "disk/%s on %s is faulty", idx, nname)
2044 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2045 """Verify if there are any unknown volumes in the cluster.
2047 The .os, .swap and backup volumes are ignored. All other volumes are
2048 reported as unknown.
2050 @type reserved: L{ganeti.utils.FieldSet}
2051 @param reserved: a FieldSet of reserved volume names
2054 for node, n_img in node_image.items():
2055 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2056 # skip non-healthy nodes
2058 for volume in n_img.volumes:
2059 test = ((node not in node_vol_should or
2060 volume not in node_vol_should[node]) and
2061 not reserved.Matches(volume))
2062 self._ErrorIf(test, self.ENODEORPHANLV, node,
2063 "volume %s is unknown", volume)
2065 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2066 """Verify N+1 Memory Resilience.
2068 Check that if one single node dies we can still start all the
2069 instances it was primary for.
2072 cluster_info = self.cfg.GetClusterInfo()
2073 for node, n_img in node_image.items():
2074 # This code checks that every node which is now listed as
2075 # secondary has enough memory to host all instances it is
2076 # supposed to should a single other node in the cluster fail.
2077 # FIXME: not ready for failover to an arbitrary node
2078 # FIXME: does not support file-backed instances
2079 # WARNING: we currently take into account down instances as well
2080 # as up ones, considering that even if they're down someone
2081 # might want to start them even in the event of a node failure.
2083 # we're skipping offline nodes from the N+1 warning, since
2084 # most likely we don't have good memory infromation from them;
2085 # we already list instances living on such nodes, and that's
2088 for prinode, instances in n_img.sbp.items():
2090 for instance in instances:
2091 bep = cluster_info.FillBE(instance_cfg[instance])
2092 if bep[constants.BE_AUTO_BALANCE]:
2093 needed_mem += bep[constants.BE_MEMORY]
2094 test = n_img.mfree < needed_mem
2095 self._ErrorIf(test, self.ENODEN1, node,
2096 "not enough memory to accomodate instance failovers"
2097 " should node %s fail (%dMiB needed, %dMiB available)",
2098 prinode, needed_mem, n_img.mfree)
2101 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2102 (files_all, files_all_opt, files_mc, files_vm)):
2103 """Verifies file checksums collected from all nodes.
2105 @param errorif: Callback for reporting errors
2106 @param nodeinfo: List of L{objects.Node} objects
2107 @param master_node: Name of master node
2108 @param all_nvinfo: RPC results
2111 node_names = frozenset(node.name for node in nodeinfo if not node.offline)
2113 assert master_node in node_names
2114 assert (len(files_all | files_all_opt | files_mc | files_vm) ==
2115 sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
2116 "Found file listed in more than one file list"
2118 # Define functions determining which nodes to consider for a file
2119 file2nodefn = dict([(filename, fn)
2120 for (files, fn) in [(files_all, None),
2121 (files_all_opt, None),
2122 (files_mc, lambda node: (node.master_candidate or
2123 node.name == master_node)),
2124 (files_vm, lambda node: node.vm_capable)]
2125 for filename in files])
2127 fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
2129 for node in nodeinfo:
2133 nresult = all_nvinfo[node.name]
2135 if nresult.fail_msg or not nresult.payload:
2138 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2140 test = not (node_files and isinstance(node_files, dict))
2141 errorif(test, cls.ENODEFILECHECK, node.name,
2142 "Node did not return file checksum data")
2146 for (filename, checksum) in node_files.items():
2147 # Check if the file should be considered for a node
2148 fn = file2nodefn[filename]
2149 if fn is None or fn(node):
2150 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2152 for (filename, checksums) in fileinfo.items():
2153 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2155 # Nodes having the file
2156 with_file = frozenset(node_name
2157 for nodes in fileinfo[filename].values()
2158 for node_name in nodes)
2160 # Nodes missing file
2161 missing_file = node_names - with_file
2163 if filename in files_all_opt:
2165 errorif(missing_file and missing_file != node_names,
2166 cls.ECLUSTERFILECHECK, None,
2167 "File %s is optional, but it must exist on all or no"
2168 " nodes (not found on %s)",
2169 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2171 errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2172 "File %s is missing from node(s) %s", filename,
2173 utils.CommaJoin(utils.NiceSort(missing_file)))
2175 # See if there are multiple versions of the file
2176 test = len(checksums) > 1
2178 variants = ["variant %s on %s" %
2179 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2180 for (idx, (checksum, nodes)) in
2181 enumerate(sorted(checksums.items()))]
2185 errorif(test, cls.ECLUSTERFILECHECK, None,
2186 "File %s found with %s different checksums (%s)",
2187 filename, len(checksums), "; ".join(variants))
2189 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2191 """Verifies and the node DRBD status.
2193 @type ninfo: L{objects.Node}
2194 @param ninfo: the node to check
2195 @param nresult: the remote results for the node
2196 @param instanceinfo: the dict of instances
2197 @param drbd_helper: the configured DRBD usermode helper
2198 @param drbd_map: the DRBD map as returned by
2199 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2203 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2206 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2207 test = (helper_result == None)
2208 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2209 "no drbd usermode helper returned")
2211 status, payload = helper_result
2213 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2214 "drbd usermode helper check unsuccessful: %s", payload)
2215 test = status and (payload != drbd_helper)
2216 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2217 "wrong drbd usermode helper: %s", payload)
2219 # compute the DRBD minors
2221 for minor, instance in drbd_map[node].items():
2222 test = instance not in instanceinfo
2223 _ErrorIf(test, self.ECLUSTERCFG, None,
2224 "ghost instance '%s' in temporary DRBD map", instance)
2225 # ghost instance should not be running, but otherwise we
2226 # don't give double warnings (both ghost instance and
2227 # unallocated minor in use)
2229 node_drbd[minor] = (instance, False)
2231 instance = instanceinfo[instance]
2232 node_drbd[minor] = (instance.name, instance.admin_up)
2234 # and now check them
2235 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2236 test = not isinstance(used_minors, (tuple, list))
2237 _ErrorIf(test, self.ENODEDRBD, node,
2238 "cannot parse drbd status file: %s", str(used_minors))
2240 # we cannot check drbd status
2243 for minor, (iname, must_exist) in node_drbd.items():
2244 test = minor not in used_minors and must_exist
2245 _ErrorIf(test, self.ENODEDRBD, node,
2246 "drbd minor %d of instance %s is not active", minor, iname)
2247 for minor in used_minors:
2248 test = minor not in node_drbd
2249 _ErrorIf(test, self.ENODEDRBD, node,
2250 "unallocated drbd minor %d is in use", minor)
2252 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2253 """Builds the node OS structures.
2255 @type ninfo: L{objects.Node}
2256 @param ninfo: the node to check
2257 @param nresult: the remote results for the node
2258 @param nimg: the node image object
2262 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2264 remote_os = nresult.get(constants.NV_OSLIST, None)
2265 test = (not isinstance(remote_os, list) or
2266 not compat.all(isinstance(v, list) and len(v) == 7
2267 for v in remote_os))
2269 _ErrorIf(test, self.ENODEOS, node,
2270 "node hasn't returned valid OS data")
2279 for (name, os_path, status, diagnose,
2280 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2282 if name not in os_dict:
2285 # parameters is a list of lists instead of list of tuples due to
2286 # JSON lacking a real tuple type, fix it:
2287 parameters = [tuple(v) for v in parameters]
2288 os_dict[name].append((os_path, status, diagnose,
2289 set(variants), set(parameters), set(api_ver)))
2291 nimg.oslist = os_dict
2293 def _VerifyNodeOS(self, ninfo, nimg, base):
2294 """Verifies the node OS list.
2296 @type ninfo: L{objects.Node}
2297 @param ninfo: the node to check
2298 @param nimg: the node image object
2299 @param base: the 'template' node we match against (e.g. from the master)
2303 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2305 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2307 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2308 for os_name, os_data in nimg.oslist.items():
2309 assert os_data, "Empty OS status for OS %s?!" % os_name
2310 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2311 _ErrorIf(not f_status, self.ENODEOS, node,
2312 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2313 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2314 "OS '%s' has multiple entries (first one shadows the rest): %s",
2315 os_name, utils.CommaJoin([v[0] for v in os_data]))
2316 # comparisons with the 'base' image
2317 test = os_name not in base.oslist
2318 _ErrorIf(test, self.ENODEOS, node,
2319 "Extra OS %s not present on reference node (%s)",
2323 assert base.oslist[os_name], "Base node has empty OS status?"
2324 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2326 # base OS is invalid, skipping
2328 for kind, a, b in [("API version", f_api, b_api),
2329 ("variants list", f_var, b_var),
2330 ("parameters", beautify_params(f_param),
2331 beautify_params(b_param))]:
2332 _ErrorIf(a != b, self.ENODEOS, node,
2333 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2334 kind, os_name, base.name,
2335 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2337 # check any missing OSes
2338 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2339 _ErrorIf(missing, self.ENODEOS, node,
2340 "OSes present on reference node %s but missing on this node: %s",
2341 base.name, utils.CommaJoin(missing))
2343 def _VerifyOob(self, ninfo, nresult):
2344 """Verifies out of band functionality of a node.
2346 @type ninfo: L{objects.Node}
2347 @param ninfo: the node to check
2348 @param nresult: the remote results for the node
2352 # We just have to verify the paths on master and/or master candidates
2353 # as the oob helper is invoked on the master
2354 if ((ninfo.master_candidate or ninfo.master_capable) and
2355 constants.NV_OOB_PATHS in nresult):
2356 for path_result in nresult[constants.NV_OOB_PATHS]:
2357 self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2359 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2360 """Verifies and updates the node volume data.
2362 This function will update a L{NodeImage}'s internal structures
2363 with data from the remote call.
2365 @type ninfo: L{objects.Node}
2366 @param ninfo: the node to check
2367 @param nresult: the remote results for the node
2368 @param nimg: the node image object
2369 @param vg_name: the configured VG name
2373 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2375 nimg.lvm_fail = True
2376 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2379 elif isinstance(lvdata, basestring):
2380 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2381 utils.SafeEncode(lvdata))
2382 elif not isinstance(lvdata, dict):
2383 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2385 nimg.volumes = lvdata
2386 nimg.lvm_fail = False
2388 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2389 """Verifies and updates the node instance list.
2391 If the listing was successful, then updates this node's instance
2392 list. Otherwise, it marks the RPC call as failed for the instance
2395 @type ninfo: L{objects.Node}
2396 @param ninfo: the node to check
2397 @param nresult: the remote results for the node
2398 @param nimg: the node image object
2401 idata = nresult.get(constants.NV_INSTANCELIST, None)
2402 test = not isinstance(idata, list)
2403 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2404 " (instancelist): %s", utils.SafeEncode(str(idata)))
2406 nimg.hyp_fail = True
2408 nimg.instances = idata
2410 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2411 """Verifies and computes a node information map
2413 @type ninfo: L{objects.Node}
2414 @param ninfo: the node to check
2415 @param nresult: the remote results for the node
2416 @param nimg: the node image object
2417 @param vg_name: the configured VG name
2421 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2423 # try to read free memory (from the hypervisor)
2424 hv_info = nresult.get(constants.NV_HVINFO, None)
2425 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2426 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2429 nimg.mfree = int(hv_info["memory_free"])
2430 except (ValueError, TypeError):
2431 _ErrorIf(True, self.ENODERPC, node,
2432 "node returned invalid nodeinfo, check hypervisor")
2434 # FIXME: devise a free space model for file based instances as well
2435 if vg_name is not None:
2436 test = (constants.NV_VGLIST not in nresult or
2437 vg_name not in nresult[constants.NV_VGLIST])
2438 _ErrorIf(test, self.ENODELVM, node,
2439 "node didn't return data for the volume group '%s'"
2440 " - it is either missing or broken", vg_name)
2443 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2444 except (ValueError, TypeError):
2445 _ErrorIf(True, self.ENODERPC, node,
2446 "node returned invalid LVM info, check LVM status")
2448 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2449 """Gets per-disk status information for all instances.
2451 @type nodelist: list of strings
2452 @param nodelist: Node names
2453 @type node_image: dict of (name, L{objects.Node})
2454 @param node_image: Node objects
2455 @type instanceinfo: dict of (name, L{objects.Instance})
2456 @param instanceinfo: Instance objects
2457 @rtype: {instance: {node: [(succes, payload)]}}
2458 @return: a dictionary of per-instance dictionaries with nodes as
2459 keys and disk information as values; the disk information is a
2460 list of tuples (success, payload)
2463 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2466 node_disks_devonly = {}
2467 diskless_instances = set()
2468 diskless = constants.DT_DISKLESS
2470 for nname in nodelist:
2471 node_instances = list(itertools.chain(node_image[nname].pinst,
2472 node_image[nname].sinst))
2473 diskless_instances.update(inst for inst in node_instances
2474 if instanceinfo[inst].disk_template == diskless)
2475 disks = [(inst, disk)
2476 for inst in node_instances
2477 for disk in instanceinfo[inst].disks]
2480 # No need to collect data
2483 node_disks[nname] = disks
2485 # Creating copies as SetDiskID below will modify the objects and that can
2486 # lead to incorrect data returned from nodes
2487 devonly = [dev.Copy() for (_, dev) in disks]
2490 self.cfg.SetDiskID(dev, nname)
2492 node_disks_devonly[nname] = devonly
2494 assert len(node_disks) == len(node_disks_devonly)
2496 # Collect data from all nodes with disks
2497 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2500 assert len(result) == len(node_disks)
2504 for (nname, nres) in result.items():
2505 disks = node_disks[nname]
2508 # No data from this node
2509 data = len(disks) * [(False, "node offline")]
2512 _ErrorIf(msg, self.ENODERPC, nname,
2513 "while getting disk information: %s", msg)
2515 # No data from this node
2516 data = len(disks) * [(False, msg)]
2519 for idx, i in enumerate(nres.payload):
2520 if isinstance(i, (tuple, list)) and len(i) == 2:
2523 logging.warning("Invalid result from node %s, entry %d: %s",
2525 data.append((False, "Invalid result from the remote node"))
2527 for ((inst, _), status) in zip(disks, data):
2528 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2530 # Add empty entries for diskless instances.
2531 for inst in diskless_instances:
2532 assert inst not in instdisk
2535 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2536 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2537 compat.all(isinstance(s, (tuple, list)) and
2538 len(s) == 2 for s in statuses)
2539 for inst, nnames in instdisk.items()
2540 for nname, statuses in nnames.items())
2541 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2545 def BuildHooksEnv(self):
2548 Cluster-Verify hooks just ran in the post phase and their failure makes
2549 the output be logged in the verify output and the verification to fail.
2553 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2556 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2557 for node in self.my_node_info.values())
2561 def BuildHooksNodes(self):
2562 """Build hooks nodes.
2565 return ([], self.my_node_names)
2567 def Exec(self, feedback_fn):
2568 """Verify integrity of the node group, performing various test on nodes.
2571 # This method has too many local variables. pylint: disable=R0914
2572 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2574 if not self.my_node_names:
2576 feedback_fn("* Empty node group, skipping verification")
2580 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2581 verbose = self.op.verbose
2582 self._feedback_fn = feedback_fn
2584 vg_name = self.cfg.GetVGName()
2585 drbd_helper = self.cfg.GetDRBDHelper()
2586 cluster = self.cfg.GetClusterInfo()
2587 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2588 hypervisors = cluster.enabled_hypervisors
2589 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2591 i_non_redundant = [] # Non redundant instances
2592 i_non_a_balanced = [] # Non auto-balanced instances
2593 n_offline = 0 # Count of offline nodes
2594 n_drained = 0 # Count of nodes being drained
2595 node_vol_should = {}
2597 # FIXME: verify OS list
2600 filemap = _ComputeAncillaryFiles(cluster, False)
2602 # do local checksums
2603 master_node = self.master_node = self.cfg.GetMasterNode()
2604 master_ip = self.cfg.GetMasterIP()
2606 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2608 # We will make nodes contact all nodes in their group, and one node from
2609 # every other group.
2610 # TODO: should it be a *random* node, different every time?
2611 online_nodes = [node.name for node in node_data_list if not node.offline]
2612 other_group_nodes = {}
2614 for name in sorted(self.all_node_info):
2615 node = self.all_node_info[name]
2616 if (node.group not in other_group_nodes
2617 and node.group != self.group_uuid
2618 and not node.offline):
2619 other_group_nodes[node.group] = node.name
2621 node_verify_param = {
2622 constants.NV_FILELIST:
2623 utils.UniqueSequence(filename
2624 for files in filemap
2625 for filename in files),
2626 constants.NV_NODELIST: online_nodes + other_group_nodes.values(),
2627 constants.NV_HYPERVISOR: hypervisors,
2628 constants.NV_HVPARAMS:
2629 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2630 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2631 for node in node_data_list
2632 if not node.offline],
2633 constants.NV_INSTANCELIST: hypervisors,
2634 constants.NV_VERSION: None,
2635 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2636 constants.NV_NODESETUP: None,
2637 constants.NV_TIME: None,
2638 constants.NV_MASTERIP: (master_node, master_ip),
2639 constants.NV_OSLIST: None,
2640 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2643 if vg_name is not None:
2644 node_verify_param[constants.NV_VGLIST] = None
2645 node_verify_param[constants.NV_LVLIST] = vg_name
2646 node_verify_param[constants.NV_PVLIST] = [vg_name]
2647 node_verify_param[constants.NV_DRBDLIST] = None
2650 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2653 # FIXME: this needs to be changed per node-group, not cluster-wide
2655 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2656 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2657 bridges.add(default_nicpp[constants.NIC_LINK])
2658 for instance in self.my_inst_info.values():
2659 for nic in instance.nics:
2660 full_nic = cluster.SimpleFillNIC(nic.nicparams)
2661 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2662 bridges.add(full_nic[constants.NIC_LINK])
2665 node_verify_param[constants.NV_BRIDGES] = list(bridges)
2667 # Build our expected cluster state
2668 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2670 vm_capable=node.vm_capable))
2671 for node in node_data_list)
2675 for node in self.all_node_info.values():
2676 path = _SupportsOob(self.cfg, node)
2677 if path and path not in oob_paths:
2678 oob_paths.append(path)
2681 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2683 for instance in self.my_inst_names:
2684 inst_config = self.my_inst_info[instance]
2686 for nname in inst_config.all_nodes:
2687 if nname not in node_image:
2688 gnode = self.NodeImage(name=nname)
2689 gnode.ghost = (nname not in self.all_node_info)
2690 node_image[nname] = gnode
2692 inst_config.MapLVsByNode(node_vol_should)
2694 pnode = inst_config.primary_node
2695 node_image[pnode].pinst.append(instance)
2697 for snode in inst_config.secondary_nodes:
2698 nimg = node_image[snode]
2699 nimg.sinst.append(instance)
2700 if pnode not in nimg.sbp:
2701 nimg.sbp[pnode] = []
2702 nimg.sbp[pnode].append(instance)
2704 # At this point, we have the in-memory data structures complete,
2705 # except for the runtime information, which we'll gather next
2707 # Due to the way our RPC system works, exact response times cannot be
2708 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2709 # time before and after executing the request, we can at least have a time
2711 nvinfo_starttime = time.time()
2712 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2714 self.cfg.GetClusterName())
2715 nvinfo_endtime = time.time()
2717 if self.extra_lv_nodes and vg_name is not None:
2719 self.rpc.call_node_verify(self.extra_lv_nodes,
2720 {constants.NV_LVLIST: vg_name},
2721 self.cfg.GetClusterName())
2723 extra_lv_nvinfo = {}
2725 all_drbd_map = self.cfg.ComputeDRBDMap()
2727 feedback_fn("* Gathering disk information (%s nodes)" %
2728 len(self.my_node_names))
2729 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2732 feedback_fn("* Verifying configuration file consistency")
2734 # If not all nodes are being checked, we need to make sure the master node
2735 # and a non-checked vm_capable node are in the list.
2736 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2738 vf_nvinfo = all_nvinfo.copy()
2739 vf_node_info = list(self.my_node_info.values())
2740 additional_nodes = []
2741 if master_node not in self.my_node_info:
2742 additional_nodes.append(master_node)
2743 vf_node_info.append(self.all_node_info[master_node])
2744 # Add the first vm_capable node we find which is not included
2745 for node in absent_nodes:
2746 nodeinfo = self.all_node_info[node]
2747 if nodeinfo.vm_capable and not nodeinfo.offline:
2748 additional_nodes.append(node)
2749 vf_node_info.append(self.all_node_info[node])
2751 key = constants.NV_FILELIST
2752 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2753 {key: node_verify_param[key]},
2754 self.cfg.GetClusterName()))
2756 vf_nvinfo = all_nvinfo
2757 vf_node_info = self.my_node_info.values()
2759 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2761 feedback_fn("* Verifying node status")
2765 for node_i in node_data_list:
2767 nimg = node_image[node]
2771 feedback_fn("* Skipping offline node %s" % (node,))
2775 if node == master_node:
2777 elif node_i.master_candidate:
2778 ntype = "master candidate"
2779 elif node_i.drained:
2785 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2787 msg = all_nvinfo[node].fail_msg
2788 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2790 nimg.rpc_fail = True
2793 nresult = all_nvinfo[node].payload
2795 nimg.call_ok = self._VerifyNode(node_i, nresult)
2796 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2797 self._VerifyNodeNetwork(node_i, nresult)
2798 self._VerifyOob(node_i, nresult)
2801 self._VerifyNodeLVM(node_i, nresult, vg_name)
2802 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2805 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2806 self._UpdateNodeInstances(node_i, nresult, nimg)
2807 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2808 self._UpdateNodeOS(node_i, nresult, nimg)
2810 if not nimg.os_fail:
2811 if refos_img is None:
2813 self._VerifyNodeOS(node_i, nimg, refos_img)
2814 self._VerifyNodeBridges(node_i, nresult, bridges)
2816 # Check whether all running instancies are primary for the node. (This
2817 # can no longer be done from _VerifyInstance below, since some of the
2818 # wrong instances could be from other node groups.)
2819 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2821 for inst in non_primary_inst:
2822 test = inst in self.all_inst_info
2823 _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2824 "instance should not run on node %s", node_i.name)
2825 _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2826 "node is running unknown instance %s", inst)
2828 for node, result in extra_lv_nvinfo.items():
2829 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2830 node_image[node], vg_name)
2832 feedback_fn("* Verifying instance status")
2833 for instance in self.my_inst_names:
2835 feedback_fn("* Verifying instance %s" % instance)
2836 inst_config = self.my_inst_info[instance]
2837 self._VerifyInstance(instance, inst_config, node_image,
2839 inst_nodes_offline = []
2841 pnode = inst_config.primary_node
2842 pnode_img = node_image[pnode]
2843 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2844 self.ENODERPC, pnode, "instance %s, connection to"
2845 " primary node failed", instance)
2847 _ErrorIf(inst_config.admin_up and pnode_img.offline,
2848 self.EINSTANCEBADNODE, instance,
2849 "instance is marked as running and lives on offline node %s",
2850 inst_config.primary_node)
2852 # If the instance is non-redundant we cannot survive losing its primary
2853 # node, so we are not N+1 compliant. On the other hand we have no disk
2854 # templates with more than one secondary so that situation is not well
2856 # FIXME: does not support file-backed instances
2857 if not inst_config.secondary_nodes:
2858 i_non_redundant.append(instance)
2860 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2861 instance, "instance has multiple secondary nodes: %s",
2862 utils.CommaJoin(inst_config.secondary_nodes),
2863 code=self.ETYPE_WARNING)
2865 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2866 pnode = inst_config.primary_node
2867 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2868 instance_groups = {}
2870 for node in instance_nodes:
2871 instance_groups.setdefault(self.all_node_info[node].group,
2875 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2876 # Sort so that we always list the primary node first.
2877 for group, nodes in sorted(instance_groups.items(),
2878 key=lambda (_, nodes): pnode in nodes,
2881 self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2882 instance, "instance has primary and secondary nodes in"
2883 " different groups: %s", utils.CommaJoin(pretty_list),
2884 code=self.ETYPE_WARNING)
2886 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2887 i_non_a_balanced.append(instance)
2889 for snode in inst_config.secondary_nodes:
2890 s_img = node_image[snode]
2891 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2892 "instance %s, connection to secondary node failed", instance)
2895 inst_nodes_offline.append(snode)
2897 # warn that the instance lives on offline nodes
2898 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2899 "instance has offline secondary node(s) %s",
2900 utils.CommaJoin(inst_nodes_offline))
2901 # ... or ghost/non-vm_capable nodes
2902 for node in inst_config.all_nodes:
2903 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2904 "instance lives on ghost node %s", node)
2905 _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2906 instance, "instance lives on non-vm_capable node %s", node)
2908 feedback_fn("* Verifying orphan volumes")
2909 reserved = utils.FieldSet(*cluster.reserved_lvs)
2911 # We will get spurious "unknown volume" warnings if any node of this group
2912 # is secondary for an instance whose primary is in another group. To avoid
2913 # them, we find these instances and add their volumes to node_vol_should.
2914 for inst in self.all_inst_info.values():
2915 for secondary in inst.secondary_nodes:
2916 if (secondary in self.my_node_info
2917 and inst.name not in self.my_inst_info):
2918 inst.MapLVsByNode(node_vol_should)
2921 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2923 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2924 feedback_fn("* Verifying N+1 Memory redundancy")
2925 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2927 feedback_fn("* Other Notes")
2929 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2930 % len(i_non_redundant))
2932 if i_non_a_balanced:
2933 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2934 % len(i_non_a_balanced))
2937 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2940 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2944 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2945 """Analyze the post-hooks' result
2947 This method analyses the hook result, handles it, and sends some
2948 nicely-formatted feedback back to the user.
2950 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2951 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2952 @param hooks_results: the results of the multi-node hooks rpc call
2953 @param feedback_fn: function used send feedback back to the caller
2954 @param lu_result: previous Exec result
2955 @return: the new Exec result, based on the previous result
2959 # We only really run POST phase hooks, only for non-empty groups,
2960 # and are only interested in their results
2961 if not self.my_node_names:
2964 elif phase == constants.HOOKS_PHASE_POST:
2965 # Used to change hooks' output to proper indentation
2966 feedback_fn("* Hooks Results")
2967 assert hooks_results, "invalid result from hooks"
2969 for node_name in hooks_results:
2970 res = hooks_results[node_name]
2972 test = msg and not res.offline
2973 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2974 "Communication failure in hooks execution: %s", msg)
2975 if res.offline or msg:
2976 # No need to investigate payload if node is offline or gave an error.
2977 # override manually lu_result here as _ErrorIf only
2978 # overrides self.bad
2981 for script, hkr, output in res.payload:
2982 test = hkr == constants.HKR_FAIL
2983 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2984 "Script %s failed, output:", script)
2986 output = self._HOOKS_INDENT_RE.sub(" ", output)
2987 feedback_fn("%s" % output)
2993 class LUClusterVerifyDisks(NoHooksLU):
2994 """Verifies the cluster disks status.
2999 def ExpandNames(self):
3000 self.share_locks = _ShareAll()
3001 self.needed_locks = {
3002 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3005 def Exec(self, feedback_fn):
3006 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3008 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3009 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3010 for group in group_names])
3013 class LUGroupVerifyDisks(NoHooksLU):
3014 """Verifies the status of all disks in a node group.
3019 def ExpandNames(self):
3020 # Raises errors.OpPrereqError on its own if group can't be found
3021 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3023 self.share_locks = _ShareAll()
3024 self.needed_locks = {
3025 locking.LEVEL_INSTANCE: [],
3026 locking.LEVEL_NODEGROUP: [],
3027 locking.LEVEL_NODE: [],
3030 def DeclareLocks(self, level):
3031 if level == locking.LEVEL_INSTANCE:
3032 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3034 # Lock instances optimistically, needs verification once node and group
3035 # locks have been acquired
3036 self.needed_locks[locking.LEVEL_INSTANCE] = \
3037 self.cfg.GetNodeGroupInstances(self.group_uuid)
3039 elif level == locking.LEVEL_NODEGROUP:
3040 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3042 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3043 set([self.group_uuid] +
3044 # Lock all groups used by instances optimistically; this requires
3045 # going via the node before it's locked, requiring verification
3048 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3049 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3051 elif level == locking.LEVEL_NODE:
3052 # This will only lock the nodes in the group to be verified which contain
3054 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3055 self._LockInstancesNodes()
3057 # Lock all nodes in group to be verified
3058 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3059 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3060 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3062 def CheckPrereq(self):
3063 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3064 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3065 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3067 assert self.group_uuid in owned_groups
3069 # Check if locked instances are still correct
3070 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3072 # Get instance information
3073 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3075 # Check if node groups for locked instances are still correct
3076 for (instance_name, inst) in self.instances.items():
3077 assert owned_nodes.issuperset(inst.all_nodes), \
3078 "Instance %s's nodes changed while we kept the lock" % instance_name
3080 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3083 assert self.group_uuid in inst_groups, \
3084 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3086 def Exec(self, feedback_fn):
3087 """Verify integrity of cluster disks.
3089 @rtype: tuple of three items
3090 @return: a tuple of (dict of node-to-node_error, list of instances
3091 which need activate-disks, dict of instance: (node, volume) for
3096 res_instances = set()
3099 nv_dict = _MapInstanceDisksToNodes([inst
3100 for inst in self.instances.values()
3104 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3105 set(self.cfg.GetVmCapableNodeList()))
3107 node_lvs = self.rpc.call_lv_list(nodes, [])
3109 for (node, node_res) in node_lvs.items():
3110 if node_res.offline:
3113 msg = node_res.fail_msg
3115 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3116 res_nodes[node] = msg
3119 for lv_name, (_, _, lv_online) in node_res.payload.items():
3120 inst = nv_dict.pop((node, lv_name), None)
3121 if not (lv_online or inst is None):
3122 res_instances.add(inst)
3124 # any leftover items in nv_dict are missing LVs, let's arrange the data
3126 for key, inst in nv_dict.iteritems():
3127 res_missing.setdefault(inst, []).append(key)
3129 return (res_nodes, list(res_instances), res_missing)
3132 class LUClusterRepairDiskSizes(NoHooksLU):
3133 """Verifies the cluster disks sizes.
3138 def ExpandNames(self):
3139 if self.op.instances:
3140 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3141 self.needed_locks = {
3142 locking.LEVEL_NODE: [],
3143 locking.LEVEL_INSTANCE: self.wanted_names,
3145 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3147 self.wanted_names = None
3148 self.needed_locks = {
3149 locking.LEVEL_NODE: locking.ALL_SET,
3150 locking.LEVEL_INSTANCE: locking.ALL_SET,
3152 self.share_locks = _ShareAll()
3154 def DeclareLocks(self, level):
3155 if level == locking.LEVEL_NODE and self.wanted_names is not None:
3156 self._LockInstancesNodes(primary_only=True)
3158 def CheckPrereq(self):
3159 """Check prerequisites.
3161 This only checks the optional instance list against the existing names.
3164 if self.wanted_names is None:
3165 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3167 self.wanted_instances = \
3168 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3170 def _EnsureChildSizes(self, disk):
3171 """Ensure children of the disk have the needed disk size.
3173 This is valid mainly for DRBD8 and fixes an issue where the
3174 children have smaller disk size.
3176 @param disk: an L{ganeti.objects.Disk} object
3179 if disk.dev_type == constants.LD_DRBD8:
3180 assert disk.children, "Empty children for DRBD8?"
3181 fchild = disk.children[0]
3182 mismatch = fchild.size < disk.size
3184 self.LogInfo("Child disk has size %d, parent %d, fixing",
3185 fchild.size, disk.size)
3186 fchild.size = disk.size
3188 # and we recurse on this child only, not on the metadev
3189 return self._EnsureChildSizes(fchild) or mismatch
3193 def Exec(self, feedback_fn):
3194 """Verify the size of cluster disks.
3197 # TODO: check child disks too
3198 # TODO: check differences in size between primary/secondary nodes
3200 for instance in self.wanted_instances:
3201 pnode = instance.primary_node
3202 if pnode not in per_node_disks:
3203 per_node_disks[pnode] = []
3204 for idx, disk in enumerate(instance.disks):
3205 per_node_disks[pnode].append((instance, idx, disk))
3208 for node, dskl in per_node_disks.items():
3209 newl = [v[2].Copy() for v in dskl]
3211 self.cfg.SetDiskID(dsk, node)
3212 result = self.rpc.call_blockdev_getsize(node, newl)
3214 self.LogWarning("Failure in blockdev_getsize call to node"
3215 " %s, ignoring", node)
3217 if len(result.payload) != len(dskl):
3218 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3219 " result.payload=%s", node, len(dskl), result.payload)
3220 self.LogWarning("Invalid result from node %s, ignoring node results",
3223 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3225 self.LogWarning("Disk %d of instance %s did not return size"
3226 " information, ignoring", idx, instance.name)
3228 if not isinstance(size, (int, long)):
3229 self.LogWarning("Disk %d of instance %s did not return valid"
3230 " size information, ignoring", idx, instance.name)
3233 if size != disk.size:
3234 self.LogInfo("Disk %d of instance %s has mismatched size,"
3235 " correcting: recorded %d, actual %d", idx,
3236 instance.name, disk.size, size)
3238 self.cfg.Update(instance, feedback_fn)
3239 changed.append((instance.name, idx, size))
3240 if self._EnsureChildSizes(disk):
3241 self.cfg.Update(instance, feedback_fn)
3242 changed.append((instance.name, idx, disk.size))
3246 class LUClusterRename(LogicalUnit):
3247 """Rename the cluster.
3250 HPATH = "cluster-rename"
3251 HTYPE = constants.HTYPE_CLUSTER
3253 def BuildHooksEnv(self):
3258 "OP_TARGET": self.cfg.GetClusterName(),
3259 "NEW_NAME": self.op.name,
3262 def BuildHooksNodes(self):
3263 """Build hooks nodes.
3266 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3268 def CheckPrereq(self):
3269 """Verify that the passed name is a valid one.
3272 hostname = netutils.GetHostname(name=self.op.name,
3273 family=self.cfg.GetPrimaryIPFamily())
3275 new_name = hostname.name
3276 self.ip = new_ip = hostname.ip
3277 old_name = self.cfg.GetClusterName()
3278 old_ip = self.cfg.GetMasterIP()
3279 if new_name == old_name and new_ip == old_ip:
3280 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3281 " cluster has changed",
3283 if new_ip != old_ip:
3284 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3285 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3286 " reachable on the network" %
3287 new_ip, errors.ECODE_NOTUNIQUE)
3289 self.op.name = new_name
3291 def Exec(self, feedback_fn):
3292 """Rename the cluster.
3295 clustername = self.op.name
3298 # shutdown the master IP
3299 master = self.cfg.GetMasterNode()
3300 result = self.rpc.call_node_deactivate_master_ip(master)
3301 result.Raise("Could not disable the master role")
3304 cluster = self.cfg.GetClusterInfo()
3305 cluster.cluster_name = clustername
3306 cluster.master_ip = ip
3307 self.cfg.Update(cluster, feedback_fn)
3309 # update the known hosts file
3310 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3311 node_list = self.cfg.GetOnlineNodeList()
3313 node_list.remove(master)
3316 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3318 result = self.rpc.call_node_activate_master_ip(master)
3319 msg = result.fail_msg
3321 self.LogWarning("Could not re-enable the master role on"
3322 " the master, please restart manually: %s", msg)
3327 class LUClusterSetParams(LogicalUnit):
3328 """Change the parameters of the cluster.
3331 HPATH = "cluster-modify"
3332 HTYPE = constants.HTYPE_CLUSTER
3335 def CheckArguments(self):
3339 if self.op.uid_pool:
3340 uidpool.CheckUidPool(self.op.uid_pool)
3342 if self.op.add_uids:
3343 uidpool.CheckUidPool(self.op.add_uids)
3345 if self.op.remove_uids:
3346 uidpool.CheckUidPool(self.op.remove_uids)
3348 def ExpandNames(self):
3349 # FIXME: in the future maybe other cluster params won't require checking on
3350 # all nodes to be modified.
3351 self.needed_locks = {
3352 locking.LEVEL_NODE: locking.ALL_SET,
3354 self.share_locks[locking.LEVEL_NODE] = 1
3356 def BuildHooksEnv(self):
3361 "OP_TARGET": self.cfg.GetClusterName(),
3362 "NEW_VG_NAME": self.op.vg_name,
3365 def BuildHooksNodes(self):
3366 """Build hooks nodes.
3369 mn = self.cfg.GetMasterNode()
3372 def CheckPrereq(self):
3373 """Check prerequisites.
3375 This checks whether the given params don't conflict and
3376 if the given volume group is valid.
3379 if self.op.vg_name is not None and not self.op.vg_name:
3380 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3381 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3382 " instances exist", errors.ECODE_INVAL)
3384 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3385 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3386 raise errors.OpPrereqError("Cannot disable drbd helper while"
3387 " drbd-based instances exist",
3390 node_list = self.owned_locks(locking.LEVEL_NODE)
3392 # if vg_name not None, checks given volume group on all nodes
3394 vglist = self.rpc.call_vg_list(node_list)
3395 for node in node_list:
3396 msg = vglist[node].fail_msg
3398 # ignoring down node
3399 self.LogWarning("Error while gathering data on node %s"
3400 " (ignoring node): %s", node, msg)
3402 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3404 constants.MIN_VG_SIZE)
3406 raise errors.OpPrereqError("Error on node '%s': %s" %
3407 (node, vgstatus), errors.ECODE_ENVIRON)
3409 if self.op.drbd_helper:
3410 # checks given drbd helper on all nodes
3411 helpers = self.rpc.call_drbd_helper(node_list)
3412 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3414 self.LogInfo("Not checking drbd helper on offline node %s", node)
3416 msg = helpers[node].fail_msg
3418 raise errors.OpPrereqError("Error checking drbd helper on node"
3419 " '%s': %s" % (node, msg),
3420 errors.ECODE_ENVIRON)
3421 node_helper = helpers[node].payload
3422 if node_helper != self.op.drbd_helper:
3423 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3424 (node, node_helper), errors.ECODE_ENVIRON)
3426 self.cluster = cluster = self.cfg.GetClusterInfo()
3427 # validate params changes
3428 if self.op.beparams:
3429 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3430 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3432 if self.op.ndparams:
3433 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3434 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3436 # TODO: we need a more general way to handle resetting
3437 # cluster-level parameters to default values
3438 if self.new_ndparams["oob_program"] == "":
3439 self.new_ndparams["oob_program"] = \
3440 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3442 if self.op.nicparams:
3443 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3444 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3445 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3448 # check all instances for consistency
3449 for instance in self.cfg.GetAllInstancesInfo().values():
3450 for nic_idx, nic in enumerate(instance.nics):
3451 params_copy = copy.deepcopy(nic.nicparams)
3452 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3454 # check parameter syntax
3456 objects.NIC.CheckParameterSyntax(params_filled)
3457 except errors.ConfigurationError, err:
3458 nic_errors.append("Instance %s, nic/%d: %s" %
3459 (instance.name, nic_idx, err))
3461 # if we're moving instances to routed, check that they have an ip
3462 target_mode = params_filled[constants.NIC_MODE]
3463 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3464 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3465 " address" % (instance.name, nic_idx))
3467 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3468 "\n".join(nic_errors))
3470 # hypervisor list/parameters
3471 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3472 if self.op.hvparams:
3473 for hv_name, hv_dict in self.op.hvparams.items():
3474 if hv_name not in self.new_hvparams:
3475 self.new_hvparams[hv_name] = hv_dict
3477 self.new_hvparams[hv_name].update(hv_dict)
3479 # os hypervisor parameters
3480 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3482 for os_name, hvs in self.op.os_hvp.items():
3483 if os_name not in self.new_os_hvp:
3484 self.new_os_hvp[os_name] = hvs
3486 for hv_name, hv_dict in hvs.items():
3487 if hv_name not in self.new_os_hvp[os_name]:
3488 self.new_os_hvp[os_name][hv_name] = hv_dict
3490 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3493 self.new_osp = objects.FillDict(cluster.osparams, {})
3494 if self.op.osparams:
3495 for os_name, osp in self.op.osparams.items():
3496 if os_name not in self.new_osp:
3497 self.new_osp[os_name] = {}
3499 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3502 if not self.new_osp[os_name]:
3503 # we removed all parameters
3504 del self.new_osp[os_name]
3506 # check the parameter validity (remote check)
3507 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3508 os_name, self.new_osp[os_name])
3510 # changes to the hypervisor list
3511 if self.op.enabled_hypervisors is not None:
3512 self.hv_list = self.op.enabled_hypervisors
3513 for hv in self.hv_list:
3514 # if the hypervisor doesn't already exist in the cluster
3515 # hvparams, we initialize it to empty, and then (in both
3516 # cases) we make sure to fill the defaults, as we might not
3517 # have a complete defaults list if the hypervisor wasn't
3519 if hv not in new_hvp:
3521 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3522 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3524 self.hv_list = cluster.enabled_hypervisors
3526 if self.op.hvparams or self.op.enabled_hypervisors is not None:
3527 # either the enabled list has changed, or the parameters have, validate
3528 for hv_name, hv_params in self.new_hvparams.items():
3529 if ((self.op.hvparams and hv_name in self.op.hvparams) or
3530 (self.op.enabled_hypervisors and
3531 hv_name in self.op.enabled_hypervisors)):
3532 # either this is a new hypervisor, or its parameters have changed
3533 hv_class = hypervisor.GetHypervisor(hv_name)
3534 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3535 hv_class.CheckParameterSyntax(hv_params)
3536 _CheckHVParams(self, node_list, hv_name, hv_params)
3539 # no need to check any newly-enabled hypervisors, since the
3540 # defaults have already been checked in the above code-block
3541 for os_name, os_hvp in self.new_os_hvp.items():
3542 for hv_name, hv_params in os_hvp.items():
3543 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3544 # we need to fill in the new os_hvp on top of the actual hv_p
3545 cluster_defaults = self.new_hvparams.get(hv_name, {})
3546 new_osp = objects.FillDict(cluster_defaults, hv_params)
3547 hv_class = hypervisor.GetHypervisor(hv_name)
3548 hv_class.CheckParameterSyntax(new_osp)
3549 _CheckHVParams(self, node_list, hv_name, new_osp)
3551 if self.op.default_iallocator:
3552 alloc_script = utils.FindFile(self.op.default_iallocator,
3553 constants.IALLOCATOR_SEARCH_PATH,
3555 if alloc_script is None:
3556 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3557 " specified" % self.op.default_iallocator,
3560 def Exec(self, feedback_fn):
3561 """Change the parameters of the cluster.
3564 if self.op.vg_name is not None:
3565 new_volume = self.op.vg_name
3568 if new_volume != self.cfg.GetVGName():
3569 self.cfg.SetVGName(new_volume)
3571 feedback_fn("Cluster LVM configuration already in desired"
3572 " state, not changing")
3573 if self.op.drbd_helper is not None:
3574 new_helper = self.op.drbd_helper
3577 if new_helper != self.cfg.GetDRBDHelper():
3578 self.cfg.SetDRBDHelper(new_helper)
3580 feedback_fn("Cluster DRBD helper already in desired state,"
3582 if self.op.hvparams:
3583 self.cluster.hvparams = self.new_hvparams
3585 self.cluster.os_hvp = self.new_os_hvp
3586 if self.op.enabled_hypervisors is not None:
3587 self.cluster.hvparams = self.new_hvparams
3588 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3589 if self.op.beparams:
3590 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3591 if self.op.nicparams:
3592 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3593 if self.op.osparams:
3594 self.cluster.osparams = self.new_osp
3595 if self.op.ndparams:
3596 self.cluster.ndparams = self.new_ndparams
3598 if self.op.candidate_pool_size is not None:
3599 self.cluster.candidate_pool_size = self.op.candidate_pool_size
3600 # we need to update the pool size here, otherwise the save will fail
3601 _AdjustCandidatePool(self, [])
3603 if self.op.maintain_node_health is not None:
3604 self.cluster.maintain_node_health = self.op.maintain_node_health
3606 if self.op.prealloc_wipe_disks is not None:
3607 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3609 if self.op.add_uids is not None:
3610 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3612 if self.op.remove_uids is not None:
3613 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3615 if self.op.uid_pool is not None:
3616 self.cluster.uid_pool = self.op.uid_pool
3618 if self.op.default_iallocator is not None:
3619 self.cluster.default_iallocator = self.op.default_iallocator
3621 if self.op.reserved_lvs is not None:
3622 self.cluster.reserved_lvs = self.op.reserved_lvs
3624 def helper_os(aname, mods, desc):
3626 lst = getattr(self.cluster, aname)
3627 for key, val in mods:
3628 if key == constants.DDM_ADD:
3630 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3633 elif key == constants.DDM_REMOVE:
3637 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3639 raise errors.ProgrammerError("Invalid modification '%s'" % key)
3641 if self.op.hidden_os:
3642 helper_os("hidden_os", self.op.hidden_os, "hidden")
3644 if self.op.blacklisted_os:
3645 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3647 if self.op.master_netdev:
3648 master = self.cfg.GetMasterNode()
3649 feedback_fn("Shutting down master ip on the current netdev (%s)" %
3650 self.cluster.master_netdev)
3651 result = self.rpc.call_node_deactivate_master_ip(master)
3652 result.Raise("Could not disable the master ip")
3653 feedback_fn("Changing master_netdev from %s to %s" %
3654 (self.cluster.master_netdev, self.op.master_netdev))
3655 self.cluster.master_netdev = self.op.master_netdev
3657 self.cfg.Update(self.cluster, feedback_fn)
3659 if self.op.master_netdev:
3660 feedback_fn("Starting the master ip on the new master netdev (%s)" %
3661 self.op.master_netdev)
3662 result = self.rpc.call_node_activate_master_ip(master)
3664 self.LogWarning("Could not re-enable the master ip on"
3665 " the master, please restart manually: %s",
3669 def _UploadHelper(lu, nodes, fname):
3670 """Helper for uploading a file and showing warnings.
3673 if os.path.exists(fname):
3674 result = lu.rpc.call_upload_file(nodes, fname)
3675 for to_node, to_result in result.items():
3676 msg = to_result.fail_msg
3678 msg = ("Copy of file %s to node %s failed: %s" %
3679 (fname, to_node, msg))
3680 lu.proc.LogWarning(msg)
3683 def _ComputeAncillaryFiles(cluster, redist):
3684 """Compute files external to Ganeti which need to be consistent.
3686 @type redist: boolean
3687 @param redist: Whether to include files which need to be redistributed
3690 # Compute files for all nodes
3692 constants.SSH_KNOWN_HOSTS_FILE,
3693 constants.CONFD_HMAC_KEY,
3694 constants.CLUSTER_DOMAIN_SECRET_FILE,
3698 files_all.update(constants.ALL_CERT_FILES)
3699 files_all.update(ssconf.SimpleStore().GetFileList())
3701 if cluster.modify_etc_hosts:
3702 files_all.add(constants.ETC_HOSTS)
3704 # Files which must either exist on all nodes or on none
3705 files_all_opt = set([
3706 constants.RAPI_USERS_FILE,
3709 # Files which should only be on master candidates
3712 files_mc.add(constants.CLUSTER_CONF_FILE)
3714 # Files which should only be on VM-capable nodes
3715 files_vm = set(filename
3716 for hv_name in cluster.enabled_hypervisors
3717 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3719 # Filenames must be unique
3720 assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3721 sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3722 "Found file listed in more than one file list"
3724 return (files_all, files_all_opt, files_mc, files_vm)
3727 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3728 """Distribute additional files which are part of the cluster configuration.
3730 ConfigWriter takes care of distributing the config and ssconf files, but
3731 there are more files which should be distributed to all nodes. This function
3732 makes sure those are copied.
3734 @param lu: calling logical unit
3735 @param additional_nodes: list of nodes not in the config to distribute to
3736 @type additional_vm: boolean
3737 @param additional_vm: whether the additional nodes are vm-capable or not
3740 # Gather target nodes
3741 cluster = lu.cfg.GetClusterInfo()
3742 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3744 online_nodes = lu.cfg.GetOnlineNodeList()
3745 vm_nodes = lu.cfg.GetVmCapableNodeList()
3747 if additional_nodes is not None:
3748 online_nodes.extend(additional_nodes)
3750 vm_nodes.extend(additional_nodes)
3752 # Never distribute to master node
3753 for nodelist in [online_nodes, vm_nodes]:
3754 if master_info.name in nodelist:
3755 nodelist.remove(master_info.name)
3758 (files_all, files_all_opt, files_mc, files_vm) = \
3759 _ComputeAncillaryFiles(cluster, True)
3761 # Never re-distribute configuration file from here
3762 assert not (constants.CLUSTER_CONF_FILE in files_all or
3763 constants.CLUSTER_CONF_FILE in files_vm)
3764 assert not files_mc, "Master candidates not handled in this function"
3767 (online_nodes, files_all),
3768 (online_nodes, files_all_opt),
3769 (vm_nodes, files_vm),
3773 for (node_list, files) in filemap:
3775 _UploadHelper(lu, node_list, fname)
3778 class LUClusterRedistConf(NoHooksLU):
3779 """Force the redistribution of cluster configuration.
3781 This is a very simple LU.
3786 def ExpandNames(self):
3787 self.needed_locks = {
3788 locking.LEVEL_NODE: locking.ALL_SET,
3790 self.share_locks[locking.LEVEL_NODE] = 1
3792 def Exec(self, feedback_fn):
3793 """Redistribute the configuration.
3796 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3797 _RedistributeAncillaryFiles(self)
3800 class LUClusterActivateMasterIp(NoHooksLU):
3801 """Activate the master IP on the master node.
3804 def Exec(self, feedback_fn):
3805 """Activate the master IP.
3808 master = self.cfg.GetMasterNode()
3809 self.rpc.call_node_activate_master_ip(master)
3812 class LUClusterDeactivateMasterIp(NoHooksLU):
3813 """Deactivate the master IP on the master node.
3816 def Exec(self, feedback_fn):
3817 """Deactivate the master IP.
3820 master = self.cfg.GetMasterNode()
3821 self.rpc.call_node_deactivate_master_ip(master)
3824 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3825 """Sleep and poll for an instance's disk to sync.
3828 if not instance.disks or disks is not None and not disks:
3831 disks = _ExpandCheckDisks(instance, disks)
3834 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3836 node = instance.primary_node
3839 lu.cfg.SetDiskID(dev, node)
3841 # TODO: Convert to utils.Retry
3844 degr_retries = 10 # in seconds, as we sleep 1 second each time
3848 cumul_degraded = False
3849 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3850 msg = rstats.fail_msg
3852 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3855 raise errors.RemoteError("Can't contact node %s for mirror data,"
3856 " aborting." % node)
3859 rstats = rstats.payload
3861 for i, mstat in enumerate(rstats):
3863 lu.LogWarning("Can't compute data for node %s/%s",
3864 node, disks[i].iv_name)
3867 cumul_degraded = (cumul_degraded or
3868 (mstat.is_degraded and mstat.sync_percent is None))
3869 if mstat.sync_percent is not None:
3871 if mstat.estimated_time is not None:
3872 rem_time = ("%s remaining (estimated)" %
3873 utils.FormatSeconds(mstat.estimated_time))
3874 max_time = mstat.estimated_time
3876 rem_time = "no time estimate"
3877 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3878 (disks[i].iv_name, mstat.sync_percent, rem_time))
3880 # if we're done but degraded, let's do a few small retries, to
3881 # make sure we see a stable and not transient situation; therefore
3882 # we force restart of the loop
3883 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3884 logging.info("Degraded disks found, %d retries left", degr_retries)
3892 time.sleep(min(60, max_time))
3895 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3896 return not cumul_degraded
3899 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3900 """Check that mirrors are not degraded.
3902 The ldisk parameter, if True, will change the test from the
3903 is_degraded attribute (which represents overall non-ok status for
3904 the device(s)) to the ldisk (representing the local storage status).
3907 lu.cfg.SetDiskID(dev, node)
3911 if on_primary or dev.AssembleOnSecondary():
3912 rstats = lu.rpc.call_blockdev_find(node, dev)
3913 msg = rstats.fail_msg
3915 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3917 elif not rstats.payload:
3918 lu.LogWarning("Can't find disk on node %s", node)
3922 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3924 result = result and not rstats.payload.is_degraded
3927 for child in dev.children:
3928 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3933 class LUOobCommand(NoHooksLU):
3934 """Logical unit for OOB handling.
3938 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3940 def ExpandNames(self):
3941 """Gather locks we need.
3944 if self.op.node_names:
3945 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3946 lock_names = self.op.node_names
3948 lock_names = locking.ALL_SET
3950 self.needed_locks = {
3951 locking.LEVEL_NODE: lock_names,
3954 def CheckPrereq(self):
3955 """Check prerequisites.
3958 - the node exists in the configuration
3961 Any errors are signaled by raising errors.OpPrereqError.
3965 self.master_node = self.cfg.GetMasterNode()
3967 assert self.op.power_delay >= 0.0
3969 if self.op.node_names:
3970 if (self.op.command in self._SKIP_MASTER and
3971 self.master_node in self.op.node_names):
3972 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3973 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3975 if master_oob_handler:
3976 additional_text = ("run '%s %s %s' if you want to operate on the"
3977 " master regardless") % (master_oob_handler,
3981 additional_text = "it does not support out-of-band operations"
3983 raise errors.OpPrereqError(("Operating on the master node %s is not"
3984 " allowed for %s; %s") %
3985 (self.master_node, self.op.command,
3986 additional_text), errors.ECODE_INVAL)
3988 self.op.node_names = self.cfg.GetNodeList()
3989 if self.op.command in self._SKIP_MASTER:
3990 self.op.node_names.remove(self.master_node)
3992 if self.op.command in self._SKIP_MASTER:
3993 assert self.master_node not in self.op.node_names
3995 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
3997 raise errors.OpPrereqError("Node %s not found" % node_name,
4000 self.nodes.append(node)
4002 if (not self.op.ignore_status and
4003 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4004 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4005 " not marked offline") % node_name,
4008 def Exec(self, feedback_fn):
4009 """Execute OOB and return result if we expect any.
4012 master_node = self.master_node
4015 for idx, node in enumerate(utils.NiceSort(self.nodes,
4016 key=lambda node: node.name)):
4017 node_entry = [(constants.RS_NORMAL, node.name)]
4018 ret.append(node_entry)
4020 oob_program = _SupportsOob(self.cfg, node)
4023 node_entry.append((constants.RS_UNAVAIL, None))
4026 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4027 self.op.command, oob_program, node.name)
4028 result = self.rpc.call_run_oob(master_node, oob_program,
4029 self.op.command, node.name,
4033 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4034 node.name, result.fail_msg)
4035 node_entry.append((constants.RS_NODATA, None))
4038 self._CheckPayload(result)
4039 except errors.OpExecError, err:
4040 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4042 node_entry.append((constants.RS_NODATA, None))
4044 if self.op.command == constants.OOB_HEALTH:
4045 # For health we should log important events
4046 for item, status in result.payload:
4047 if status in [constants.OOB_STATUS_WARNING,
4048 constants.OOB_STATUS_CRITICAL]:
4049 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4050 item, node.name, status)
4052 if self.op.command == constants.OOB_POWER_ON:
4054 elif self.op.command == constants.OOB_POWER_OFF:
4055 node.powered = False
4056 elif self.op.command == constants.OOB_POWER_STATUS:
4057 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4058 if powered != node.powered:
4059 logging.warning(("Recorded power state (%s) of node '%s' does not"
4060 " match actual power state (%s)"), node.powered,
4063 # For configuration changing commands we should update the node
4064 if self.op.command in (constants.OOB_POWER_ON,
4065 constants.OOB_POWER_OFF):
4066 self.cfg.Update(node, feedback_fn)
4068 node_entry.append((constants.RS_NORMAL, result.payload))
4070 if (self.op.command == constants.OOB_POWER_ON and
4071 idx < len(self.nodes) - 1):
4072 time.sleep(self.op.power_delay)
4076 def _CheckPayload(self, result):
4077 """Checks if the payload is valid.
4079 @param result: RPC result
4080 @raises errors.OpExecError: If payload is not valid
4084 if self.op.command == constants.OOB_HEALTH:
4085 if not isinstance(result.payload, list):
4086 errs.append("command 'health' is expected to return a list but got %s" %
4087 type(result.payload))
4089 for item, status in result.payload:
4090 if status not in constants.OOB_STATUSES:
4091 errs.append("health item '%s' has invalid status '%s'" %
4094 if self.op.command == constants.OOB_POWER_STATUS:
4095 if not isinstance(result.payload, dict):
4096 errs.append("power-status is expected to return a dict but got %s" %
4097 type(result.payload))
4099 if self.op.command in [
4100 constants.OOB_POWER_ON,
4101 constants.OOB_POWER_OFF,
4102 constants.OOB_POWER_CYCLE,
4104 if result.payload is not None:
4105 errs.append("%s is expected to not return payload but got '%s'" %
4106 (self.op.command, result.payload))
4109 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4110 utils.CommaJoin(errs))
4113 class _OsQuery(_QueryBase):
4114 FIELDS = query.OS_FIELDS
4116 def ExpandNames(self, lu):
4117 # Lock all nodes in shared mode
4118 # Temporary removal of locks, should be reverted later
4119 # TODO: reintroduce locks when they are lighter-weight
4120 lu.needed_locks = {}
4121 #self.share_locks[locking.LEVEL_NODE] = 1
4122 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4124 # The following variables interact with _QueryBase._GetNames
4126 self.wanted = self.names
4128 self.wanted = locking.ALL_SET
4130 self.do_locking = self.use_locking
4132 def DeclareLocks(self, lu, level):
4136 def _DiagnoseByOS(rlist):
4137 """Remaps a per-node return list into an a per-os per-node dictionary
4139 @param rlist: a map with node names as keys and OS objects as values
4142 @return: a dictionary with osnames as keys and as value another
4143 map, with nodes as keys and tuples of (path, status, diagnose,
4144 variants, parameters, api_versions) as values, eg::
4146 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4147 (/srv/..., False, "invalid api")],
4148 "node2": [(/srv/..., True, "", [], [])]}
4153 # we build here the list of nodes that didn't fail the RPC (at RPC
4154 # level), so that nodes with a non-responding node daemon don't
4155 # make all OSes invalid
4156 good_nodes = [node_name for node_name in rlist
4157 if not rlist[node_name].fail_msg]
4158 for node_name, nr in rlist.items():
4159 if nr.fail_msg or not nr.payload:
4161 for (name, path, status, diagnose, variants,
4162 params, api_versions) in nr.payload:
4163 if name not in all_os:
4164 # build a list of nodes for this os containing empty lists
4165 # for each node in node_list
4167 for nname in good_nodes:
4168 all_os[name][nname] = []
4169 # convert params from [name, help] to (name, help)
4170 params = [tuple(v) for v in params]
4171 all_os[name][node_name].append((path, status, diagnose,
4172 variants, params, api_versions))
4175 def _GetQueryData(self, lu):
4176 """Computes the list of nodes and their attributes.
4179 # Locking is not used
4180 assert not (compat.any(lu.glm.is_owned(level)
4181 for level in locking.LEVELS
4182 if level != locking.LEVEL_CLUSTER) or
4183 self.do_locking or self.use_locking)
4185 valid_nodes = [node.name
4186 for node in lu.cfg.GetAllNodesInfo().values()
4187 if not node.offline and node.vm_capable]
4188 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4189 cluster = lu.cfg.GetClusterInfo()
4193 for (os_name, os_data) in pol.items():
4194 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4195 hidden=(os_name in cluster.hidden_os),
4196 blacklisted=(os_name in cluster.blacklisted_os))
4200 api_versions = set()
4202 for idx, osl in enumerate(os_data.values()):
4203 info.valid = bool(info.valid and osl and osl[0][1])
4207 (node_variants, node_params, node_api) = osl[0][3:6]
4210 variants.update(node_variants)
4211 parameters.update(node_params)
4212 api_versions.update(node_api)
4214 # Filter out inconsistent values
4215 variants.intersection_update(node_variants)
4216 parameters.intersection_update(node_params)
4217 api_versions.intersection_update(node_api)
4219 info.variants = list(variants)
4220 info.parameters = list(parameters)
4221 info.api_versions = list(api_versions)
4223 data[os_name] = info
4225 # Prepare data in requested order
4226 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4230 class LUOsDiagnose(NoHooksLU):
4231 """Logical unit for OS diagnose/query.
4237 def _BuildFilter(fields, names):
4238 """Builds a filter for querying OSes.
4241 name_filter = qlang.MakeSimpleFilter("name", names)
4243 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4244 # respective field is not requested
4245 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4246 for fname in ["hidden", "blacklisted"]
4247 if fname not in fields]
4248 if "valid" not in fields:
4249 status_filter.append([qlang.OP_TRUE, "valid"])
4252 status_filter.insert(0, qlang.OP_AND)
4254 status_filter = None
4256 if name_filter and status_filter:
4257 return [qlang.OP_AND, name_filter, status_filter]
4261 return status_filter
4263 def CheckArguments(self):
4264 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4265 self.op.output_fields, False)
4267 def ExpandNames(self):
4268 self.oq.ExpandNames(self)
4270 def Exec(self, feedback_fn):
4271 return self.oq.OldStyleQuery(self)
4274 class LUNodeRemove(LogicalUnit):
4275 """Logical unit for removing a node.
4278 HPATH = "node-remove"
4279 HTYPE = constants.HTYPE_NODE
4281 def BuildHooksEnv(self):
4284 This doesn't run on the target node in the pre phase as a failed
4285 node would then be impossible to remove.
4289 "OP_TARGET": self.op.node_name,
4290 "NODE_NAME": self.op.node_name,
4293 def BuildHooksNodes(self):
4294 """Build hooks nodes.
4297 all_nodes = self.cfg.GetNodeList()
4299 all_nodes.remove(self.op.node_name)
4301 logging.warning("Node '%s', which is about to be removed, was not found"
4302 " in the list of all nodes", self.op.node_name)
4303 return (all_nodes, all_nodes)
4305 def CheckPrereq(self):
4306 """Check prerequisites.
4309 - the node exists in the configuration
4310 - it does not have primary or secondary instances
4311 - it's not the master
4313 Any errors are signaled by raising errors.OpPrereqError.
4316 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4317 node = self.cfg.GetNodeInfo(self.op.node_name)
4318 assert node is not None
4320 masternode = self.cfg.GetMasterNode()
4321 if node.name == masternode:
4322 raise errors.OpPrereqError("Node is the master node, failover to another"
4323 " node is required", errors.ECODE_INVAL)
4325 for instance_name, instance in self.cfg.GetAllInstancesInfo():
4326 if node.name in instance.all_nodes:
4327 raise errors.OpPrereqError("Instance %s is still running on the node,"
4328 " please remove first" % instance_name,
4330 self.op.node_name = node.name
4333 def Exec(self, feedback_fn):
4334 """Removes the node from the cluster.
4338 logging.info("Stopping the node daemon and removing configs from node %s",
4341 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4343 # Promote nodes to master candidate as needed
4344 _AdjustCandidatePool(self, exceptions=[node.name])
4345 self.context.RemoveNode(node.name)
4347 # Run post hooks on the node before it's removed
4348 _RunPostHook(self, node.name)
4350 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4351 msg = result.fail_msg
4353 self.LogWarning("Errors encountered on the remote node while leaving"
4354 " the cluster: %s", msg)
4356 # Remove node from our /etc/hosts
4357 if self.cfg.GetClusterInfo().modify_etc_hosts:
4358 master_node = self.cfg.GetMasterNode()
4359 result = self.rpc.call_etc_hosts_modify(master_node,
4360 constants.ETC_HOSTS_REMOVE,
4362 result.Raise("Can't update hosts file with new host data")
4363 _RedistributeAncillaryFiles(self)
4366 class _NodeQuery(_QueryBase):
4367 FIELDS = query.NODE_FIELDS
4369 def ExpandNames(self, lu):
4370 lu.needed_locks = {}
4371 lu.share_locks = _ShareAll()
4374 self.wanted = _GetWantedNodes(lu, self.names)
4376 self.wanted = locking.ALL_SET
4378 self.do_locking = (self.use_locking and
4379 query.NQ_LIVE in self.requested_data)
4382 # If any non-static field is requested we need to lock the nodes
4383 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4385 def DeclareLocks(self, lu, level):
4388 def _GetQueryData(self, lu):
4389 """Computes the list of nodes and their attributes.
4392 all_info = lu.cfg.GetAllNodesInfo()
4394 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4396 # Gather data as requested
4397 if query.NQ_LIVE in self.requested_data:
4398 # filter out non-vm_capable nodes
4399 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4401 node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4402 lu.cfg.GetHypervisorType())
4403 live_data = dict((name, nresult.payload)
4404 for (name, nresult) in node_data.items()
4405 if not nresult.fail_msg and nresult.payload)
4409 if query.NQ_INST in self.requested_data:
4410 node_to_primary = dict([(name, set()) for name in nodenames])
4411 node_to_secondary = dict([(name, set()) for name in nodenames])
4413 inst_data = lu.cfg.GetAllInstancesInfo()
4415 for inst in inst_data.values():
4416 if inst.primary_node in node_to_primary:
4417 node_to_primary[inst.primary_node].add(inst.name)
4418 for secnode in inst.secondary_nodes:
4419 if secnode in node_to_secondary:
4420 node_to_secondary[secnode].add(inst.name)
4422 node_to_primary = None
4423 node_to_secondary = None
4425 if query.NQ_OOB in self.requested_data:
4426 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4427 for name, node in all_info.iteritems())
4431 if query.NQ_GROUP in self.requested_data:
4432 groups = lu.cfg.GetAllNodeGroupsInfo()
4436 return query.NodeQueryData([all_info[name] for name in nodenames],
4437 live_data, lu.cfg.GetMasterNode(),
4438 node_to_primary, node_to_secondary, groups,
4439 oob_support, lu.cfg.GetClusterInfo())
4442 class LUNodeQuery(NoHooksLU):
4443 """Logical unit for querying nodes.
4446 # pylint: disable=W0142
4449 def CheckArguments(self):
4450 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4451 self.op.output_fields, self.op.use_locking)
4453 def ExpandNames(self):
4454 self.nq.ExpandNames(self)
4456 def Exec(self, feedback_fn):
4457 return self.nq.OldStyleQuery(self)
4460 class LUNodeQueryvols(NoHooksLU):
4461 """Logical unit for getting volumes on node(s).
4465 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4466 _FIELDS_STATIC = utils.FieldSet("node")
4468 def CheckArguments(self):
4469 _CheckOutputFields(static=self._FIELDS_STATIC,
4470 dynamic=self._FIELDS_DYNAMIC,
4471 selected=self.op.output_fields)
4473 def ExpandNames(self):
4474 self.needed_locks = {}
4475 self.share_locks[locking.LEVEL_NODE] = 1
4476 if not self.op.nodes:
4477 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4479 self.needed_locks[locking.LEVEL_NODE] = \
4480 _GetWantedNodes(self, self.op.nodes)
4482 def Exec(self, feedback_fn):
4483 """Computes the list of nodes and their attributes.
4486 nodenames = self.owned_locks(locking.LEVEL_NODE)
4487 volumes = self.rpc.call_node_volumes(nodenames)
4489 ilist = self.cfg.GetAllInstancesInfo()
4490 vol2inst = _MapInstanceDisksToNodes(ilist.values())
4493 for node in nodenames:
4494 nresult = volumes[node]
4497 msg = nresult.fail_msg
4499 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4502 node_vols = sorted(nresult.payload,
4503 key=operator.itemgetter("dev"))
4505 for vol in node_vols:
4507 for field in self.op.output_fields:
4510 elif field == "phys":
4514 elif field == "name":
4516 elif field == "size":
4517 val = int(float(vol["size"]))
4518 elif field == "instance":
4519 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4521 raise errors.ParameterError(field)
4522 node_output.append(str(val))
4524 output.append(node_output)
4529 class LUNodeQueryStorage(NoHooksLU):
4530 """Logical unit for getting information on storage units on node(s).
4533 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4536 def CheckArguments(self):
4537 _CheckOutputFields(static=self._FIELDS_STATIC,
4538 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4539 selected=self.op.output_fields)
4541 def ExpandNames(self):
4542 self.needed_locks = {}
4543 self.share_locks[locking.LEVEL_NODE] = 1
4546 self.needed_locks[locking.LEVEL_NODE] = \
4547 _GetWantedNodes(self, self.op.nodes)
4549 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4551 def Exec(self, feedback_fn):
4552 """Computes the list of nodes and their attributes.
4555 self.nodes = self.owned_locks(locking.LEVEL_NODE)
4557 # Always get name to sort by
4558 if constants.SF_NAME in self.op.output_fields:
4559 fields = self.op.output_fields[:]
4561 fields = [constants.SF_NAME] + self.op.output_fields
4563 # Never ask for node or type as it's only known to the LU
4564 for extra in [constants.SF_NODE, constants.SF_TYPE]:
4565 while extra in fields:
4566 fields.remove(extra)
4568 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4569 name_idx = field_idx[constants.SF_NAME]
4571 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4572 data = self.rpc.call_storage_list(self.nodes,
4573 self.op.storage_type, st_args,
4574 self.op.name, fields)
4578 for node in utils.NiceSort(self.nodes):
4579 nresult = data[node]
4583 msg = nresult.fail_msg
4585 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4588 rows = dict([(row[name_idx], row) for row in nresult.payload])
4590 for name in utils.NiceSort(rows.keys()):
4595 for field in self.op.output_fields:
4596 if field == constants.SF_NODE:
4598 elif field == constants.SF_TYPE:
4599 val = self.op.storage_type
4600 elif field in field_idx:
4601 val = row[field_idx[field]]
4603 raise errors.ParameterError(field)
4612 class _InstanceQuery(_QueryBase):
4613 FIELDS = query.INSTANCE_FIELDS
4615 def ExpandNames(self, lu):
4616 lu.needed_locks = {}
4617 lu.share_locks = _ShareAll()
4620 self.wanted = _GetWantedInstances(lu, self.names)
4622 self.wanted = locking.ALL_SET
4624 self.do_locking = (self.use_locking and
4625 query.IQ_LIVE in self.requested_data)
4627 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4628 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4629 lu.needed_locks[locking.LEVEL_NODE] = []
4630 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4632 self.do_grouplocks = (self.do_locking and
4633 query.IQ_NODES in self.requested_data)
4635 def DeclareLocks(self, lu, level):
4637 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4638 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4640 # Lock all groups used by instances optimistically; this requires going
4641 # via the node before it's locked, requiring verification later on
4642 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4644 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4645 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4646 elif level == locking.LEVEL_NODE:
4647 lu._LockInstancesNodes() # pylint: disable=W0212
4650 def _CheckGroupLocks(lu):
4651 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4652 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4654 # Check if node groups for locked instances are still correct
4655 for instance_name in owned_instances:
4656 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4658 def _GetQueryData(self, lu):
4659 """Computes the list of instances and their attributes.
4662 if self.do_grouplocks:
4663 self._CheckGroupLocks(lu)
4665 cluster = lu.cfg.GetClusterInfo()
4666 all_info = lu.cfg.GetAllInstancesInfo()
4668 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4670 instance_list = [all_info[name] for name in instance_names]
4671 nodes = frozenset(itertools.chain(*(inst.all_nodes
4672 for inst in instance_list)))
4673 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4676 wrongnode_inst = set()
4678 # Gather data as requested
4679 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4681 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4683 result = node_data[name]
4685 # offline nodes will be in both lists
4686 assert result.fail_msg
4687 offline_nodes.append(name)
4689 bad_nodes.append(name)
4690 elif result.payload:
4691 for inst in result.payload:
4692 if inst in all_info:
4693 if all_info[inst].primary_node == name:
4694 live_data.update(result.payload)
4696 wrongnode_inst.add(inst)
4698 # orphan instance; we don't list it here as we don't
4699 # handle this case yet in the output of instance listing
4700 logging.warning("Orphan instance '%s' found on node %s",
4702 # else no instance is alive
4706 if query.IQ_DISKUSAGE in self.requested_data:
4707 disk_usage = dict((inst.name,
4708 _ComputeDiskSize(inst.disk_template,
4709 [{constants.IDISK_SIZE: disk.size}
4710 for disk in inst.disks]))
4711 for inst in instance_list)
4715 if query.IQ_CONSOLE in self.requested_data:
4717 for inst in instance_list:
4718 if inst.name in live_data:
4719 # Instance is running
4720 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4722 consinfo[inst.name] = None
4723 assert set(consinfo.keys()) == set(instance_names)
4727 if query.IQ_NODES in self.requested_data:
4728 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4730 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4731 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4732 for uuid in set(map(operator.attrgetter("group"),
4738 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4739 disk_usage, offline_nodes, bad_nodes,
4740 live_data, wrongnode_inst, consinfo,
4744 class LUQuery(NoHooksLU):
4745 """Query for resources/items of a certain kind.
4748 # pylint: disable=W0142
4751 def CheckArguments(self):
4752 qcls = _GetQueryImplementation(self.op.what)
4754 self.impl = qcls(self.op.filter, self.op.fields, self.op.use_locking)
4756 def ExpandNames(self):
4757 self.impl.ExpandNames(self)
4759 def DeclareLocks(self, level):
4760 self.impl.DeclareLocks(self, level)
4762 def Exec(self, feedback_fn):
4763 return self.impl.NewStyleQuery(self)
4766 class LUQueryFields(NoHooksLU):
4767 """Query for resources/items of a certain kind.
4770 # pylint: disable=W0142
4773 def CheckArguments(self):
4774 self.qcls = _GetQueryImplementation(self.op.what)
4776 def ExpandNames(self):
4777 self.needed_locks = {}
4779 def Exec(self, feedback_fn):
4780 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4783 class LUNodeModifyStorage(NoHooksLU):
4784 """Logical unit for modifying a storage volume on a node.
4789 def CheckArguments(self):
4790 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4792 storage_type = self.op.storage_type
4795 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4797 raise errors.OpPrereqError("Storage units of type '%s' can not be"
4798 " modified" % storage_type,
4801 diff = set(self.op.changes.keys()) - modifiable
4803 raise errors.OpPrereqError("The following fields can not be modified for"
4804 " storage units of type '%s': %r" %
4805 (storage_type, list(diff)),
4808 def ExpandNames(self):
4809 self.needed_locks = {
4810 locking.LEVEL_NODE: self.op.node_name,
4813 def Exec(self, feedback_fn):
4814 """Computes the list of nodes and their attributes.
4817 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4818 result = self.rpc.call_storage_modify(self.op.node_name,
4819 self.op.storage_type, st_args,
4820 self.op.name, self.op.changes)
4821 result.Raise("Failed to modify storage unit '%s' on %s" %
4822 (self.op.name, self.op.node_name))
4825 class LUNodeAdd(LogicalUnit):
4826 """Logical unit for adding node to the cluster.
4830 HTYPE = constants.HTYPE_NODE
4831 _NFLAGS = ["master_capable", "vm_capable"]
4833 def CheckArguments(self):
4834 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4835 # validate/normalize the node name
4836 self.hostname = netutils.GetHostname(name=self.op.node_name,
4837 family=self.primary_ip_family)
4838 self.op.node_name = self.hostname.name
4840 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4841 raise errors.OpPrereqError("Cannot readd the master node",
4844 if self.op.readd and self.op.group:
4845 raise errors.OpPrereqError("Cannot pass a node group when a node is"
4846 " being readded", errors.ECODE_INVAL)
4848 def BuildHooksEnv(self):
4851 This will run on all nodes before, and on all nodes + the new node after.
4855 "OP_TARGET": self.op.node_name,
4856 "NODE_NAME": self.op.node_name,
4857 "NODE_PIP": self.op.primary_ip,
4858 "NODE_SIP": self.op.secondary_ip,
4859 "MASTER_CAPABLE": str(self.op.master_capable),
4860 "VM_CAPABLE": str(self.op.vm_capable),
4863 def BuildHooksNodes(self):
4864 """Build hooks nodes.
4867 # Exclude added node
4868 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4869 post_nodes = pre_nodes + [self.op.node_name, ]
4871 return (pre_nodes, post_nodes)
4873 def CheckPrereq(self):
4874 """Check prerequisites.
4877 - the new node is not already in the config
4879 - its parameters (single/dual homed) matches the cluster
4881 Any errors are signaled by raising errors.OpPrereqError.
4885 hostname = self.hostname
4886 node = hostname.name
4887 primary_ip = self.op.primary_ip = hostname.ip
4888 if self.op.secondary_ip is None:
4889 if self.primary_ip_family == netutils.IP6Address.family:
4890 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4891 " IPv4 address must be given as secondary",
4893 self.op.secondary_ip = primary_ip
4895 secondary_ip = self.op.secondary_ip
4896 if not netutils.IP4Address.IsValid(secondary_ip):
4897 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4898 " address" % secondary_ip, errors.ECODE_INVAL)
4900 node_list = cfg.GetNodeList()
4901 if not self.op.readd and node in node_list:
4902 raise errors.OpPrereqError("Node %s is already in the configuration" %
4903 node, errors.ECODE_EXISTS)
4904 elif self.op.readd and node not in node_list:
4905 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4908 self.changed_primary_ip = False
4910 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4911 if self.op.readd and node == existing_node_name:
4912 if existing_node.secondary_ip != secondary_ip:
4913 raise errors.OpPrereqError("Readded node doesn't have the same IP"
4914 " address configuration as before",
4916 if existing_node.primary_ip != primary_ip:
4917 self.changed_primary_ip = True
4921 if (existing_node.primary_ip == primary_ip or
4922 existing_node.secondary_ip == primary_ip or
4923 existing_node.primary_ip == secondary_ip or
4924 existing_node.secondary_ip == secondary_ip):
4925 raise errors.OpPrereqError("New node ip address(es) conflict with"
4926 " existing node %s" % existing_node.name,
4927 errors.ECODE_NOTUNIQUE)
4929 # After this 'if' block, None is no longer a valid value for the
4930 # _capable op attributes
4932 old_node = self.cfg.GetNodeInfo(node)
4933 assert old_node is not None, "Can't retrieve locked node %s" % node
4934 for attr in self._NFLAGS:
4935 if getattr(self.op, attr) is None:
4936 setattr(self.op, attr, getattr(old_node, attr))
4938 for attr in self._NFLAGS:
4939 if getattr(self.op, attr) is None:
4940 setattr(self.op, attr, True)
4942 if self.op.readd and not self.op.vm_capable:
4943 pri, sec = cfg.GetNodeInstances(node)
4945 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4946 " flag set to false, but it already holds"
4947 " instances" % node,
4950 # check that the type of the node (single versus dual homed) is the
4951 # same as for the master
4952 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4953 master_singlehomed = myself.secondary_ip == myself.primary_ip
4954 newbie_singlehomed = secondary_ip == primary_ip
4955 if master_singlehomed != newbie_singlehomed:
4956 if master_singlehomed:
4957 raise errors.OpPrereqError("The master has no secondary ip but the"
4958 " new node has one",
4961 raise errors.OpPrereqError("The master has a secondary ip but the"
4962 " new node doesn't have one",
4965 # checks reachability
4966 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4967 raise errors.OpPrereqError("Node not reachable by ping",
4968 errors.ECODE_ENVIRON)
4970 if not newbie_singlehomed:
4971 # check reachability from my secondary ip to newbie's secondary ip
4972 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4973 source=myself.secondary_ip):
4974 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4975 " based ping to node daemon port",
4976 errors.ECODE_ENVIRON)
4983 if self.op.master_capable:
4984 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4986 self.master_candidate = False
4989 self.new_node = old_node
4991 node_group = cfg.LookupNodeGroup(self.op.group)
4992 self.new_node = objects.Node(name=node,
4993 primary_ip=primary_ip,
4994 secondary_ip=secondary_ip,
4995 master_candidate=self.master_candidate,
4996 offline=False, drained=False,
4999 if self.op.ndparams:
5000 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5002 def Exec(self, feedback_fn):
5003 """Adds the new node to the cluster.
5006 new_node = self.new_node
5007 node = new_node.name
5009 # We adding a new node so we assume it's powered
5010 new_node.powered = True
5012 # for re-adds, reset the offline/drained/master-candidate flags;
5013 # we need to reset here, otherwise offline would prevent RPC calls
5014 # later in the procedure; this also means that if the re-add
5015 # fails, we are left with a non-offlined, broken node
5017 new_node.drained = new_node.offline = False # pylint: disable=W0201
5018 self.LogInfo("Readding a node, the offline/drained flags were reset")
5019 # if we demote the node, we do cleanup later in the procedure
5020 new_node.master_candidate = self.master_candidate
5021 if self.changed_primary_ip:
5022 new_node.primary_ip = self.op.primary_ip
5024 # copy the master/vm_capable flags
5025 for attr in self._NFLAGS:
5026 setattr(new_node, attr, getattr(self.op, attr))
5028 # notify the user about any possible mc promotion
5029 if new_node.master_candidate:
5030 self.LogInfo("Node will be a master candidate")
5032 if self.op.ndparams:
5033 new_node.ndparams = self.op.ndparams
5035 new_node.ndparams = {}
5037 # check connectivity
5038 result = self.rpc.call_version([node])[node]
5039 result.Raise("Can't get version information from node %s" % node)
5040 if constants.PROTOCOL_VERSION == result.payload:
5041 logging.info("Communication to node %s fine, sw version %s match",
5042 node, result.payload)
5044 raise errors.OpExecError("Version mismatch master version %s,"
5045 " node version %s" %
5046 (constants.PROTOCOL_VERSION, result.payload))
5048 # Add node to our /etc/hosts, and add key to known_hosts
5049 if self.cfg.GetClusterInfo().modify_etc_hosts:
5050 master_node = self.cfg.GetMasterNode()
5051 result = self.rpc.call_etc_hosts_modify(master_node,
5052 constants.ETC_HOSTS_ADD,
5055 result.Raise("Can't update hosts file with new host data")
5057 if new_node.secondary_ip != new_node.primary_ip:
5058 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5061 node_verify_list = [self.cfg.GetMasterNode()]
5062 node_verify_param = {
5063 constants.NV_NODELIST: [node],
5064 # TODO: do a node-net-test as well?
5067 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5068 self.cfg.GetClusterName())
5069 for verifier in node_verify_list:
5070 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5071 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5073 for failed in nl_payload:
5074 feedback_fn("ssh/hostname verification failed"
5075 " (checking from %s): %s" %
5076 (verifier, nl_payload[failed]))
5077 raise errors.OpExecError("ssh/hostname verification failed")
5080 _RedistributeAncillaryFiles(self)
5081 self.context.ReaddNode(new_node)
5082 # make sure we redistribute the config
5083 self.cfg.Update(new_node, feedback_fn)
5084 # and make sure the new node will not have old files around
5085 if not new_node.master_candidate:
5086 result = self.rpc.call_node_demote_from_mc(new_node.name)
5087 msg = result.fail_msg
5089 self.LogWarning("Node failed to demote itself from master"
5090 " candidate status: %s" % msg)
5092 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5093 additional_vm=self.op.vm_capable)
5094 self.context.AddNode(new_node, self.proc.GetECId())
5097 class LUNodeSetParams(LogicalUnit):
5098 """Modifies the parameters of a node.
5100 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5101 to the node role (as _ROLE_*)
5102 @cvar _R2F: a dictionary from node role to tuples of flags
5103 @cvar _FLAGS: a list of attribute names corresponding to the flags
5106 HPATH = "node-modify"
5107 HTYPE = constants.HTYPE_NODE
5109 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5111 (True, False, False): _ROLE_CANDIDATE,
5112 (False, True, False): _ROLE_DRAINED,
5113 (False, False, True): _ROLE_OFFLINE,
5114 (False, False, False): _ROLE_REGULAR,
5116 _R2F = dict((v, k) for k, v in _F2R.items())
5117 _FLAGS = ["master_candidate", "drained", "offline"]
5119 def CheckArguments(self):
5120 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5121 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5122 self.op.master_capable, self.op.vm_capable,
5123 self.op.secondary_ip, self.op.ndparams]
5124 if all_mods.count(None) == len(all_mods):
5125 raise errors.OpPrereqError("Please pass at least one modification",
5127 if all_mods.count(True) > 1:
5128 raise errors.OpPrereqError("Can't set the node into more than one"
5129 " state at the same time",
5132 # Boolean value that tells us whether we might be demoting from MC
5133 self.might_demote = (self.op.master_candidate == False or
5134 self.op.offline == True or
5135 self.op.drained == True or
5136 self.op.master_capable == False)
5138 if self.op.secondary_ip:
5139 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5140 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5141 " address" % self.op.secondary_ip,
5144 self.lock_all = self.op.auto_promote and self.might_demote
5145 self.lock_instances = self.op.secondary_ip is not None
5147 def ExpandNames(self):
5149 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5151 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5153 if self.lock_instances:
5154 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5156 def DeclareLocks(self, level):
5157 # If we have locked all instances, before waiting to lock nodes, release
5158 # all the ones living on nodes unrelated to the current operation.
5159 if level == locking.LEVEL_NODE and self.lock_instances:
5160 self.affected_instances = []
5161 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5164 # Build list of instances to release
5165 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5166 for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5167 if (instance.disk_template in constants.DTS_INT_MIRROR and
5168 self.op.node_name in instance.all_nodes):
5169 instances_keep.append(instance_name)
5170 self.affected_instances.append(instance)
5172 _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5174 assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5175 set(instances_keep))
5177 def BuildHooksEnv(self):
5180 This runs on the master node.
5184 "OP_TARGET": self.op.node_name,
5185 "MASTER_CANDIDATE": str(self.op.master_candidate),
5186 "OFFLINE": str(self.op.offline),
5187 "DRAINED": str(self.op.drained),
5188 "MASTER_CAPABLE": str(self.op.master_capable),
5189 "VM_CAPABLE": str(self.op.vm_capable),
5192 def BuildHooksNodes(self):
5193 """Build hooks nodes.
5196 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5199 def CheckPrereq(self):
5200 """Check prerequisites.
5202 This only checks the instance list against the existing names.
5205 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5207 if (self.op.master_candidate is not None or
5208 self.op.drained is not None or
5209 self.op.offline is not None):
5210 # we can't change the master's node flags
5211 if self.op.node_name == self.cfg.GetMasterNode():
5212 raise errors.OpPrereqError("The master role can be changed"
5213 " only via master-failover",
5216 if self.op.master_candidate and not node.master_capable:
5217 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5218 " it a master candidate" % node.name,
5221 if self.op.vm_capable == False:
5222 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5224 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5225 " the vm_capable flag" % node.name,
5228 if node.master_candidate and self.might_demote and not self.lock_all:
5229 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5230 # check if after removing the current node, we're missing master
5232 (mc_remaining, mc_should, _) = \
5233 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5234 if mc_remaining < mc_should:
5235 raise errors.OpPrereqError("Not enough master candidates, please"
5236 " pass auto promote option to allow"
5237 " promotion", errors.ECODE_STATE)
5239 self.old_flags = old_flags = (node.master_candidate,
5240 node.drained, node.offline)
5241 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5242 self.old_role = old_role = self._F2R[old_flags]
5244 # Check for ineffective changes
5245 for attr in self._FLAGS:
5246 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5247 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5248 setattr(self.op, attr, None)
5250 # Past this point, any flag change to False means a transition
5251 # away from the respective state, as only real changes are kept
5253 # TODO: We might query the real power state if it supports OOB
5254 if _SupportsOob(self.cfg, node):
5255 if self.op.offline is False and not (node.powered or
5256 self.op.powered == True):
5257 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5258 " offline status can be reset") %
5260 elif self.op.powered is not None:
5261 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5262 " as it does not support out-of-band"
5263 " handling") % self.op.node_name)
5265 # If we're being deofflined/drained, we'll MC ourself if needed
5266 if (self.op.drained == False or self.op.offline == False or
5267 (self.op.master_capable and not node.master_capable)):
5268 if _DecideSelfPromotion(self):
5269 self.op.master_candidate = True
5270 self.LogInfo("Auto-promoting node to master candidate")
5272 # If we're no longer master capable, we'll demote ourselves from MC
5273 if self.op.master_capable == False and node.master_candidate:
5274 self.LogInfo("Demoting from master candidate")
5275 self.op.master_candidate = False
5278 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5279 if self.op.master_candidate:
5280 new_role = self._ROLE_CANDIDATE
5281 elif self.op.drained:
5282 new_role = self._ROLE_DRAINED
5283 elif self.op.offline:
5284 new_role = self._ROLE_OFFLINE
5285 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5286 # False is still in new flags, which means we're un-setting (the
5288 new_role = self._ROLE_REGULAR
5289 else: # no new flags, nothing, keep old role
5292 self.new_role = new_role
5294 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5295 # Trying to transition out of offline status
5296 result = self.rpc.call_version([node.name])[node.name]
5298 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5299 " to report its version: %s" %
5300 (node.name, result.fail_msg),
5303 self.LogWarning("Transitioning node from offline to online state"
5304 " without using re-add. Please make sure the node"
5307 if self.op.secondary_ip:
5308 # Ok even without locking, because this can't be changed by any LU
5309 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5310 master_singlehomed = master.secondary_ip == master.primary_ip
5311 if master_singlehomed and self.op.secondary_ip:
5312 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5313 " homed cluster", errors.ECODE_INVAL)
5316 if self.affected_instances:
5317 raise errors.OpPrereqError("Cannot change secondary ip: offline"
5318 " node has instances (%s) configured"
5319 " to use it" % self.affected_instances)
5321 # On online nodes, check that no instances are running, and that
5322 # the node has the new ip and we can reach it.
5323 for instance in self.affected_instances:
5324 _CheckInstanceDown(self, instance, "cannot change secondary ip")
5326 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5327 if master.name != node.name:
5328 # check reachability from master secondary ip to new secondary ip
5329 if not netutils.TcpPing(self.op.secondary_ip,
5330 constants.DEFAULT_NODED_PORT,
5331 source=master.secondary_ip):
5332 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5333 " based ping to node daemon port",
5334 errors.ECODE_ENVIRON)
5336 if self.op.ndparams:
5337 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5338 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5339 self.new_ndparams = new_ndparams
5341 def Exec(self, feedback_fn):
5346 old_role = self.old_role
5347 new_role = self.new_role
5351 if self.op.ndparams:
5352 node.ndparams = self.new_ndparams
5354 if self.op.powered is not None:
5355 node.powered = self.op.powered
5357 for attr in ["master_capable", "vm_capable"]:
5358 val = getattr(self.op, attr)
5360 setattr(node, attr, val)
5361 result.append((attr, str(val)))
5363 if new_role != old_role:
5364 # Tell the node to demote itself, if no longer MC and not offline
5365 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5366 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5368 self.LogWarning("Node failed to demote itself: %s", msg)
5370 new_flags = self._R2F[new_role]
5371 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5373 result.append((desc, str(nf)))
5374 (node.master_candidate, node.drained, node.offline) = new_flags
5376 # we locked all nodes, we adjust the CP before updating this node
5378 _AdjustCandidatePool(self, [node.name])
5380 if self.op.secondary_ip:
5381 node.secondary_ip = self.op.secondary_ip
5382 result.append(("secondary_ip", self.op.secondary_ip))
5384 # this will trigger configuration file update, if needed
5385 self.cfg.Update(node, feedback_fn)
5387 # this will trigger job queue propagation or cleanup if the mc
5389 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5390 self.context.ReaddNode(node)
5395 class LUNodePowercycle(NoHooksLU):
5396 """Powercycles a node.
5401 def CheckArguments(self):
5402 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5403 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5404 raise errors.OpPrereqError("The node is the master and the force"
5405 " parameter was not set",
5408 def ExpandNames(self):
5409 """Locking for PowercycleNode.
5411 This is a last-resort option and shouldn't block on other
5412 jobs. Therefore, we grab no locks.
5415 self.needed_locks = {}
5417 def Exec(self, feedback_fn):
5421 result = self.rpc.call_node_powercycle(self.op.node_name,
5422 self.cfg.GetHypervisorType())
5423 result.Raise("Failed to schedule the reboot")
5424 return result.payload
5427 class LUClusterQuery(NoHooksLU):
5428 """Query cluster configuration.
5433 def ExpandNames(self):
5434 self.needed_locks = {}
5436 def Exec(self, feedback_fn):
5437 """Return cluster config.
5440 cluster = self.cfg.GetClusterInfo()
5443 # Filter just for enabled hypervisors
5444 for os_name, hv_dict in cluster.os_hvp.items():
5445 os_hvp[os_name] = {}
5446 for hv_name, hv_params in hv_dict.items():
5447 if hv_name in cluster.enabled_hypervisors:
5448 os_hvp[os_name][hv_name] = hv_params
5450 # Convert ip_family to ip_version
5451 primary_ip_version = constants.IP4_VERSION
5452 if cluster.primary_ip_family == netutils.IP6Address.family:
5453 primary_ip_version = constants.IP6_VERSION
5456 "software_version": constants.RELEASE_VERSION,
5457 "protocol_version": constants.PROTOCOL_VERSION,
5458 "config_version": constants.CONFIG_VERSION,
5459 "os_api_version": max(constants.OS_API_VERSIONS),
5460 "export_version": constants.EXPORT_VERSION,
5461 "architecture": (platform.architecture()[0], platform.machine()),
5462 "name": cluster.cluster_name,
5463 "master": cluster.master_node,
5464 "default_hypervisor": cluster.enabled_hypervisors[0],
5465 "enabled_hypervisors": cluster.enabled_hypervisors,
5466 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5467 for hypervisor_name in cluster.enabled_hypervisors]),
5469 "beparams": cluster.beparams,
5470 "osparams": cluster.osparams,
5471 "nicparams": cluster.nicparams,
5472 "ndparams": cluster.ndparams,
5473 "candidate_pool_size": cluster.candidate_pool_size,
5474 "master_netdev": cluster.master_netdev,
5475 "volume_group_name": cluster.volume_group_name,
5476 "drbd_usermode_helper": cluster.drbd_usermode_helper,
5477 "file_storage_dir": cluster.file_storage_dir,
5478 "shared_file_storage_dir": cluster.shared_file_storage_dir,
5479 "maintain_node_health": cluster.maintain_node_health,
5480 "ctime": cluster.ctime,
5481 "mtime": cluster.mtime,
5482 "uuid": cluster.uuid,
5483 "tags": list(cluster.GetTags()),
5484 "uid_pool": cluster.uid_pool,
5485 "default_iallocator": cluster.default_iallocator,
5486 "reserved_lvs": cluster.reserved_lvs,
5487 "primary_ip_version": primary_ip_version,
5488 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5489 "hidden_os": cluster.hidden_os,
5490 "blacklisted_os": cluster.blacklisted_os,
5496 class LUClusterConfigQuery(NoHooksLU):
5497 """Return configuration values.
5501 _FIELDS_DYNAMIC = utils.FieldSet()
5502 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5503 "watcher_pause", "volume_group_name")
5505 def CheckArguments(self):
5506 _CheckOutputFields(static=self._FIELDS_STATIC,
5507 dynamic=self._FIELDS_DYNAMIC,
5508 selected=self.op.output_fields)
5510 def ExpandNames(self):
5511 self.needed_locks = {}
5513 def Exec(self, feedback_fn):
5514 """Dump a representation of the cluster config to the standard output.
5518 for field in self.op.output_fields:
5519 if field == "cluster_name":
5520 entry = self.cfg.GetClusterName()
5521 elif field == "master_node":
5522 entry = self.cfg.GetMasterNode()
5523 elif field == "drain_flag":
5524 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5525 elif field == "watcher_pause":
5526 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5527 elif field == "volume_group_name":
5528 entry = self.cfg.GetVGName()
5530 raise errors.ParameterError(field)
5531 values.append(entry)
5535 class LUInstanceActivateDisks(NoHooksLU):
5536 """Bring up an instance's disks.
5541 def ExpandNames(self):
5542 self._ExpandAndLockInstance()
5543 self.needed_locks[locking.LEVEL_NODE] = []
5544 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5546 def DeclareLocks(self, level):
5547 if level == locking.LEVEL_NODE:
5548 self._LockInstancesNodes()
5550 def CheckPrereq(self):
5551 """Check prerequisites.
5553 This checks that the instance is in the cluster.
5556 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5557 assert self.instance is not None, \
5558 "Cannot retrieve locked instance %s" % self.op.instance_name
5559 _CheckNodeOnline(self, self.instance.primary_node)
5561 def Exec(self, feedback_fn):
5562 """Activate the disks.
5565 disks_ok, disks_info = \
5566 _AssembleInstanceDisks(self, self.instance,
5567 ignore_size=self.op.ignore_size)
5569 raise errors.OpExecError("Cannot activate block devices")
5574 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5576 """Prepare the block devices for an instance.
5578 This sets up the block devices on all nodes.
5580 @type lu: L{LogicalUnit}
5581 @param lu: the logical unit on whose behalf we execute
5582 @type instance: L{objects.Instance}
5583 @param instance: the instance for whose disks we assemble
5584 @type disks: list of L{objects.Disk} or None
5585 @param disks: which disks to assemble (or all, if None)
5586 @type ignore_secondaries: boolean
5587 @param ignore_secondaries: if true, errors on secondary nodes
5588 won't result in an error return from the function
5589 @type ignore_size: boolean
5590 @param ignore_size: if true, the current known size of the disk
5591 will not be used during the disk activation, useful for cases
5592 when the size is wrong
5593 @return: False if the operation failed, otherwise a list of
5594 (host, instance_visible_name, node_visible_name)
5595 with the mapping from node devices to instance devices
5600 iname = instance.name
5601 disks = _ExpandCheckDisks(instance, disks)
5603 # With the two passes mechanism we try to reduce the window of
5604 # opportunity for the race condition of switching DRBD to primary
5605 # before handshaking occured, but we do not eliminate it
5607 # The proper fix would be to wait (with some limits) until the
5608 # connection has been made and drbd transitions from WFConnection
5609 # into any other network-connected state (Connected, SyncTarget,
5612 # 1st pass, assemble on all nodes in secondary mode
5613 for idx, inst_disk in enumerate(disks):
5614 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5616 node_disk = node_disk.Copy()
5617 node_disk.UnsetSize()
5618 lu.cfg.SetDiskID(node_disk, node)
5619 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5620 msg = result.fail_msg
5622 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5623 " (is_primary=False, pass=1): %s",
5624 inst_disk.iv_name, node, msg)
5625 if not ignore_secondaries:
5628 # FIXME: race condition on drbd migration to primary
5630 # 2nd pass, do only the primary node
5631 for idx, inst_disk in enumerate(disks):
5634 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5635 if node != instance.primary_node:
5638 node_disk = node_disk.Copy()
5639 node_disk.UnsetSize()
5640 lu.cfg.SetDiskID(node_disk, node)
5641 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5642 msg = result.fail_msg
5644 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5645 " (is_primary=True, pass=2): %s",
5646 inst_disk.iv_name, node, msg)
5649 dev_path = result.payload
5651 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5653 # leave the disks configured for the primary node
5654 # this is a workaround that would be fixed better by
5655 # improving the logical/physical id handling
5657 lu.cfg.SetDiskID(disk, instance.primary_node)
5659 return disks_ok, device_info
5662 def _StartInstanceDisks(lu, instance, force):
5663 """Start the disks of an instance.
5666 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5667 ignore_secondaries=force)
5669 _ShutdownInstanceDisks(lu, instance)
5670 if force is not None and not force:
5671 lu.proc.LogWarning("", hint="If the message above refers to a"
5673 " you can retry the operation using '--force'.")
5674 raise errors.OpExecError("Disk consistency error")
5677 class LUInstanceDeactivateDisks(NoHooksLU):
5678 """Shutdown an instance's disks.
5683 def ExpandNames(self):
5684 self._ExpandAndLockInstance()
5685 self.needed_locks[locking.LEVEL_NODE] = []
5686 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5688 def DeclareLocks(self, level):
5689 if level == locking.LEVEL_NODE:
5690 self._LockInstancesNodes()
5692 def CheckPrereq(self):
5693 """Check prerequisites.
5695 This checks that the instance is in the cluster.
5698 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5699 assert self.instance is not None, \
5700 "Cannot retrieve locked instance %s" % self.op.instance_name
5702 def Exec(self, feedback_fn):
5703 """Deactivate the disks
5706 instance = self.instance
5708 _ShutdownInstanceDisks(self, instance)
5710 _SafeShutdownInstanceDisks(self, instance)
5713 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5714 """Shutdown block devices of an instance.
5716 This function checks if an instance is running, before calling
5717 _ShutdownInstanceDisks.
5720 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5721 _ShutdownInstanceDisks(lu, instance, disks=disks)
5724 def _ExpandCheckDisks(instance, disks):
5725 """Return the instance disks selected by the disks list
5727 @type disks: list of L{objects.Disk} or None
5728 @param disks: selected disks
5729 @rtype: list of L{objects.Disk}
5730 @return: selected instance disks to act on
5734 return instance.disks
5736 if not set(disks).issubset(instance.disks):
5737 raise errors.ProgrammerError("Can only act on disks belonging to the"
5742 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5743 """Shutdown block devices of an instance.
5745 This does the shutdown on all nodes of the instance.
5747 If the ignore_primary is false, errors on the primary node are
5752 disks = _ExpandCheckDisks(instance, disks)
5755 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5756 lu.cfg.SetDiskID(top_disk, node)
5757 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5758 msg = result.fail_msg
5760 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5761 disk.iv_name, node, msg)
5762 if ((node == instance.primary_node and not ignore_primary) or
5763 (node != instance.primary_node and not result.offline)):
5768 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5769 """Checks if a node has enough free memory.
5771 This function check if a given node has the needed amount of free
5772 memory. In case the node has less memory or we cannot get the
5773 information from the node, this function raise an OpPrereqError
5776 @type lu: C{LogicalUnit}
5777 @param lu: a logical unit from which we get configuration data
5779 @param node: the node to check
5780 @type reason: C{str}
5781 @param reason: string to use in the error message
5782 @type requested: C{int}
5783 @param requested: the amount of memory in MiB to check for
5784 @type hypervisor_name: C{str}
5785 @param hypervisor_name: the hypervisor to ask for memory stats
5786 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5787 we cannot check the node
5790 nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5791 nodeinfo[node].Raise("Can't get data from node %s" % node,
5792 prereq=True, ecode=errors.ECODE_ENVIRON)
5793 free_mem = nodeinfo[node].payload.get("memory_free", None)
5794 if not isinstance(free_mem, int):
5795 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5796 " was '%s'" % (node, free_mem),
5797 errors.ECODE_ENVIRON)
5798 if requested > free_mem:
5799 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5800 " needed %s MiB, available %s MiB" %
5801 (node, reason, requested, free_mem),
5805 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5806 """Checks if nodes have enough free disk space in the all VGs.
5808 This function check if all given nodes have the needed amount of
5809 free disk. In case any node has less disk or we cannot get the
5810 information from the node, this function raise an OpPrereqError
5813 @type lu: C{LogicalUnit}
5814 @param lu: a logical unit from which we get configuration data
5815 @type nodenames: C{list}
5816 @param nodenames: the list of node names to check
5817 @type req_sizes: C{dict}
5818 @param req_sizes: the hash of vg and corresponding amount of disk in
5820 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5821 or we cannot check the node
5824 for vg, req_size in req_sizes.items():
5825 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5828 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5829 """Checks if nodes have enough free disk space in the specified VG.
5831 This function check if all given nodes have the needed amount of
5832 free disk. In case any node has less disk or we cannot get the
5833 information from the node, this function raise an OpPrereqError
5836 @type lu: C{LogicalUnit}
5837 @param lu: a logical unit from which we get configuration data
5838 @type nodenames: C{list}
5839 @param nodenames: the list of node names to check
5841 @param vg: the volume group to check
5842 @type requested: C{int}
5843 @param requested: the amount of disk in MiB to check for
5844 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5845 or we cannot check the node
5848 nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5849 for node in nodenames:
5850 info = nodeinfo[node]
5851 info.Raise("Cannot get current information from node %s" % node,
5852 prereq=True, ecode=errors.ECODE_ENVIRON)
5853 vg_free = info.payload.get("vg_free", None)
5854 if not isinstance(vg_free, int):
5855 raise errors.OpPrereqError("Can't compute free disk space on node"
5856 " %s for vg %s, result was '%s'" %
5857 (node, vg, vg_free), errors.ECODE_ENVIRON)
5858 if requested > vg_free:
5859 raise errors.OpPrereqError("Not enough disk space on target node %s"
5860 " vg %s: required %d MiB, available %d MiB" %
5861 (node, vg, requested, vg_free),
5865 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
5866 """Checks if nodes have enough physical CPUs
5868 This function checks if all given nodes have the needed number of
5869 physical CPUs. In case any node has less CPUs or we cannot get the
5870 information from the node, this function raises an OpPrereqError
5873 @type lu: C{LogicalUnit}
5874 @param lu: a logical unit from which we get configuration data
5875 @type nodenames: C{list}
5876 @param nodenames: the list of node names to check
5877 @type requested: C{int}
5878 @param requested: the minimum acceptable number of physical CPUs
5879 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
5880 or we cannot check the node
5883 nodeinfo = lu.rpc.call_node_info(nodenames, None, hypervisor_name)
5884 for node in nodenames:
5885 info = nodeinfo[node]
5886 info.Raise("Cannot get current information from node %s" % node,
5887 prereq=True, ecode=errors.ECODE_ENVIRON)
5888 num_cpus = info.payload.get("cpu_total", None)
5889 if not isinstance(num_cpus, int):
5890 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
5891 " on node %s, result was '%s'" %
5892 (node, num_cpus), errors.ECODE_ENVIRON)
5893 if requested > num_cpus:
5894 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
5895 "required" % (node, num_cpus, requested),
5899 class LUInstanceStartup(LogicalUnit):
5900 """Starts an instance.
5903 HPATH = "instance-start"
5904 HTYPE = constants.HTYPE_INSTANCE
5907 def CheckArguments(self):
5909 if self.op.beparams:
5910 # fill the beparams dict
5911 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5913 def ExpandNames(self):
5914 self._ExpandAndLockInstance()
5916 def BuildHooksEnv(self):
5919 This runs on master, primary and secondary nodes of the instance.
5923 "FORCE": self.op.force,
5926 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5930 def BuildHooksNodes(self):
5931 """Build hooks nodes.
5934 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5937 def CheckPrereq(self):
5938 """Check prerequisites.
5940 This checks that the instance is in the cluster.
5943 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5944 assert self.instance is not None, \
5945 "Cannot retrieve locked instance %s" % self.op.instance_name
5948 if self.op.hvparams:
5949 # check hypervisor parameter syntax (locally)
5950 cluster = self.cfg.GetClusterInfo()
5951 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5952 filled_hvp = cluster.FillHV(instance)
5953 filled_hvp.update(self.op.hvparams)
5954 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5955 hv_type.CheckParameterSyntax(filled_hvp)
5956 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5958 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5960 if self.primary_offline and self.op.ignore_offline_nodes:
5961 self.proc.LogWarning("Ignoring offline primary node")
5963 if self.op.hvparams or self.op.beparams:
5964 self.proc.LogWarning("Overridden parameters are ignored")
5966 _CheckNodeOnline(self, instance.primary_node)
5968 bep = self.cfg.GetClusterInfo().FillBE(instance)
5970 # check bridges existence
5971 _CheckInstanceBridgesExist(self, instance)
5973 remote_info = self.rpc.call_instance_info(instance.primary_node,
5975 instance.hypervisor)
5976 remote_info.Raise("Error checking node %s" % instance.primary_node,
5977 prereq=True, ecode=errors.ECODE_ENVIRON)
5978 if not remote_info.payload: # not running already
5979 _CheckNodeFreeMemory(self, instance.primary_node,
5980 "starting instance %s" % instance.name,
5981 bep[constants.BE_MEMORY], instance.hypervisor)
5983 def Exec(self, feedback_fn):
5984 """Start the instance.
5987 instance = self.instance
5988 force = self.op.force
5990 if not self.op.no_remember:
5991 self.cfg.MarkInstanceUp(instance.name)
5993 if self.primary_offline:
5994 assert self.op.ignore_offline_nodes
5995 self.proc.LogInfo("Primary node offline, marked instance as started")
5997 node_current = instance.primary_node
5999 _StartInstanceDisks(self, instance, force)
6001 result = self.rpc.call_instance_start(node_current, instance,
6002 self.op.hvparams, self.op.beparams,
6003 self.op.startup_paused)
6004 msg = result.fail_msg
6006 _ShutdownInstanceDisks(self, instance)
6007 raise errors.OpExecError("Could not start instance: %s" % msg)
6010 class LUInstanceReboot(LogicalUnit):
6011 """Reboot an instance.
6014 HPATH = "instance-reboot"
6015 HTYPE = constants.HTYPE_INSTANCE
6018 def ExpandNames(self):
6019 self._ExpandAndLockInstance()
6021 def BuildHooksEnv(self):
6024 This runs on master, primary and secondary nodes of the instance.
6028 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6029 "REBOOT_TYPE": self.op.reboot_type,
6030 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6033 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6037 def BuildHooksNodes(self):
6038 """Build hooks nodes.
6041 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6044 def CheckPrereq(self):
6045 """Check prerequisites.
6047 This checks that the instance is in the cluster.
6050 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6051 assert self.instance is not None, \
6052 "Cannot retrieve locked instance %s" % self.op.instance_name
6054 _CheckNodeOnline(self, instance.primary_node)
6056 # check bridges existence
6057 _CheckInstanceBridgesExist(self, instance)
6059 def Exec(self, feedback_fn):
6060 """Reboot the instance.
6063 instance = self.instance
6064 ignore_secondaries = self.op.ignore_secondaries
6065 reboot_type = self.op.reboot_type
6067 remote_info = self.rpc.call_instance_info(instance.primary_node,
6069 instance.hypervisor)
6070 remote_info.Raise("Error checking node %s" % instance.primary_node)
6071 instance_running = bool(remote_info.payload)
6073 node_current = instance.primary_node
6075 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6076 constants.INSTANCE_REBOOT_HARD]:
6077 for disk in instance.disks:
6078 self.cfg.SetDiskID(disk, node_current)
6079 result = self.rpc.call_instance_reboot(node_current, instance,
6081 self.op.shutdown_timeout)
6082 result.Raise("Could not reboot instance")
6084 if instance_running:
6085 result = self.rpc.call_instance_shutdown(node_current, instance,
6086 self.op.shutdown_timeout)
6087 result.Raise("Could not shutdown instance for full reboot")
6088 _ShutdownInstanceDisks(self, instance)
6090 self.LogInfo("Instance %s was already stopped, starting now",
6092 _StartInstanceDisks(self, instance, ignore_secondaries)
6093 result = self.rpc.call_instance_start(node_current, instance,
6095 msg = result.fail_msg
6097 _ShutdownInstanceDisks(self, instance)
6098 raise errors.OpExecError("Could not start instance for"
6099 " full reboot: %s" % msg)
6101 self.cfg.MarkInstanceUp(instance.name)
6104 class LUInstanceShutdown(LogicalUnit):
6105 """Shutdown an instance.
6108 HPATH = "instance-stop"
6109 HTYPE = constants.HTYPE_INSTANCE
6112 def ExpandNames(self):
6113 self._ExpandAndLockInstance()
6115 def BuildHooksEnv(self):
6118 This runs on master, primary and secondary nodes of the instance.
6121 env = _BuildInstanceHookEnvByObject(self, self.instance)
6122 env["TIMEOUT"] = self.op.timeout
6125 def BuildHooksNodes(self):
6126 """Build hooks nodes.
6129 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6132 def CheckPrereq(self):
6133 """Check prerequisites.
6135 This checks that the instance is in the cluster.
6138 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6139 assert self.instance is not None, \
6140 "Cannot retrieve locked instance %s" % self.op.instance_name
6142 self.primary_offline = \
6143 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6145 if self.primary_offline and self.op.ignore_offline_nodes:
6146 self.proc.LogWarning("Ignoring offline primary node")
6148 _CheckNodeOnline(self, self.instance.primary_node)
6150 def Exec(self, feedback_fn):
6151 """Shutdown the instance.
6154 instance = self.instance
6155 node_current = instance.primary_node
6156 timeout = self.op.timeout
6158 if not self.op.no_remember:
6159 self.cfg.MarkInstanceDown(instance.name)
6161 if self.primary_offline:
6162 assert self.op.ignore_offline_nodes
6163 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6165 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6166 msg = result.fail_msg
6168 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6170 _ShutdownInstanceDisks(self, instance)
6173 class LUInstanceReinstall(LogicalUnit):
6174 """Reinstall an instance.
6177 HPATH = "instance-reinstall"
6178 HTYPE = constants.HTYPE_INSTANCE
6181 def ExpandNames(self):
6182 self._ExpandAndLockInstance()
6184 def BuildHooksEnv(self):
6187 This runs on master, primary and secondary nodes of the instance.
6190 return _BuildInstanceHookEnvByObject(self, self.instance)
6192 def BuildHooksNodes(self):
6193 """Build hooks nodes.
6196 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6199 def CheckPrereq(self):
6200 """Check prerequisites.
6202 This checks that the instance is in the cluster and is not running.
6205 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6206 assert instance is not None, \
6207 "Cannot retrieve locked instance %s" % self.op.instance_name
6208 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6209 " offline, cannot reinstall")
6210 for node in instance.secondary_nodes:
6211 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6212 " cannot reinstall")
6214 if instance.disk_template == constants.DT_DISKLESS:
6215 raise errors.OpPrereqError("Instance '%s' has no disks" %
6216 self.op.instance_name,
6218 _CheckInstanceDown(self, instance, "cannot reinstall")
6220 if self.op.os_type is not None:
6222 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6223 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6224 instance_os = self.op.os_type
6226 instance_os = instance.os
6228 nodelist = list(instance.all_nodes)
6230 if self.op.osparams:
6231 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6232 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6233 self.os_inst = i_osdict # the new dict (without defaults)
6237 self.instance = instance
6239 def Exec(self, feedback_fn):
6240 """Reinstall the instance.
6243 inst = self.instance
6245 if self.op.os_type is not None:
6246 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6247 inst.os = self.op.os_type
6248 # Write to configuration
6249 self.cfg.Update(inst, feedback_fn)
6251 _StartInstanceDisks(self, inst, None)
6253 feedback_fn("Running the instance OS create scripts...")
6254 # FIXME: pass debug option from opcode to backend
6255 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6256 self.op.debug_level,
6257 osparams=self.os_inst)
6258 result.Raise("Could not install OS for instance %s on node %s" %
6259 (inst.name, inst.primary_node))
6261 _ShutdownInstanceDisks(self, inst)
6264 class LUInstanceRecreateDisks(LogicalUnit):
6265 """Recreate an instance's missing disks.
6268 HPATH = "instance-recreate-disks"
6269 HTYPE = constants.HTYPE_INSTANCE
6272 def CheckArguments(self):
6273 # normalise the disk list
6274 self.op.disks = sorted(frozenset(self.op.disks))
6276 def ExpandNames(self):
6277 self._ExpandAndLockInstance()
6278 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6280 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6281 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6283 self.needed_locks[locking.LEVEL_NODE] = []
6285 def DeclareLocks(self, level):
6286 if level == locking.LEVEL_NODE:
6287 # if we replace the nodes, we only need to lock the old primary,
6288 # otherwise we need to lock all nodes for disk re-creation
6289 primary_only = bool(self.op.nodes)
6290 self._LockInstancesNodes(primary_only=primary_only)
6292 def BuildHooksEnv(self):
6295 This runs on master, primary and secondary nodes of the instance.
6298 return _BuildInstanceHookEnvByObject(self, self.instance)
6300 def BuildHooksNodes(self):
6301 """Build hooks nodes.
6304 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6307 def CheckPrereq(self):
6308 """Check prerequisites.
6310 This checks that the instance is in the cluster and is not running.
6313 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6314 assert instance is not None, \
6315 "Cannot retrieve locked instance %s" % self.op.instance_name
6317 if len(self.op.nodes) != len(instance.all_nodes):
6318 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6319 " %d replacement nodes were specified" %
6320 (instance.name, len(instance.all_nodes),
6321 len(self.op.nodes)),
6323 assert instance.disk_template != constants.DT_DRBD8 or \
6324 len(self.op.nodes) == 2
6325 assert instance.disk_template != constants.DT_PLAIN or \
6326 len(self.op.nodes) == 1
6327 primary_node = self.op.nodes[0]
6329 primary_node = instance.primary_node
6330 _CheckNodeOnline(self, primary_node)
6332 if instance.disk_template == constants.DT_DISKLESS:
6333 raise errors.OpPrereqError("Instance '%s' has no disks" %
6334 self.op.instance_name, errors.ECODE_INVAL)
6335 # if we replace nodes *and* the old primary is offline, we don't
6337 assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6338 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6339 if not (self.op.nodes and old_pnode.offline):
6340 _CheckInstanceDown(self, instance, "cannot recreate disks")
6342 if not self.op.disks:
6343 self.op.disks = range(len(instance.disks))
6345 for idx in self.op.disks:
6346 if idx >= len(instance.disks):
6347 raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6349 if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6350 raise errors.OpPrereqError("Can't recreate disks partially and"
6351 " change the nodes at the same time",
6353 self.instance = instance
6355 def Exec(self, feedback_fn):
6356 """Recreate the disks.
6359 instance = self.instance
6362 mods = [] # keeps track of needed logical_id changes
6364 for idx, disk in enumerate(instance.disks):
6365 if idx not in self.op.disks: # disk idx has not been passed in
6368 # update secondaries for disks, if needed
6370 if disk.dev_type == constants.LD_DRBD8:
6371 # need to update the nodes and minors
6372 assert len(self.op.nodes) == 2
6373 assert len(disk.logical_id) == 6 # otherwise disk internals
6375 (_, _, old_port, _, _, old_secret) = disk.logical_id
6376 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6377 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6378 new_minors[0], new_minors[1], old_secret)
6379 assert len(disk.logical_id) == len(new_id)
6380 mods.append((idx, new_id))
6382 # now that we have passed all asserts above, we can apply the mods
6383 # in a single run (to avoid partial changes)
6384 for idx, new_id in mods:
6385 instance.disks[idx].logical_id = new_id
6387 # change primary node, if needed
6389 instance.primary_node = self.op.nodes[0]
6390 self.LogWarning("Changing the instance's nodes, you will have to"
6391 " remove any disks left on the older nodes manually")
6394 self.cfg.Update(instance, feedback_fn)
6396 _CreateDisks(self, instance, to_skip=to_skip)
6399 class LUInstanceRename(LogicalUnit):
6400 """Rename an instance.
6403 HPATH = "instance-rename"
6404 HTYPE = constants.HTYPE_INSTANCE
6406 def CheckArguments(self):
6410 if self.op.ip_check and not self.op.name_check:
6411 # TODO: make the ip check more flexible and not depend on the name check
6412 raise errors.OpPrereqError("IP address check requires a name check",
6415 def BuildHooksEnv(self):
6418 This runs on master, primary and secondary nodes of the instance.
6421 env = _BuildInstanceHookEnvByObject(self, self.instance)
6422 env["INSTANCE_NEW_NAME"] = self.op.new_name
6425 def BuildHooksNodes(self):
6426 """Build hooks nodes.
6429 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6432 def CheckPrereq(self):
6433 """Check prerequisites.
6435 This checks that the instance is in the cluster and is not running.
6438 self.op.instance_name = _ExpandInstanceName(self.cfg,
6439 self.op.instance_name)
6440 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6441 assert instance is not None
6442 _CheckNodeOnline(self, instance.primary_node)
6443 _CheckInstanceDown(self, instance, "cannot rename")
6444 self.instance = instance
6446 new_name = self.op.new_name
6447 if self.op.name_check:
6448 hostname = netutils.GetHostname(name=new_name)
6449 if hostname != new_name:
6450 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6452 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6453 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6454 " same as given hostname '%s'") %
6455 (hostname.name, self.op.new_name),
6457 new_name = self.op.new_name = hostname.name
6458 if (self.op.ip_check and
6459 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6460 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6461 (hostname.ip, new_name),
6462 errors.ECODE_NOTUNIQUE)
6464 instance_list = self.cfg.GetInstanceList()
6465 if new_name in instance_list and new_name != instance.name:
6466 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6467 new_name, errors.ECODE_EXISTS)
6469 def Exec(self, feedback_fn):
6470 """Rename the instance.
6473 inst = self.instance
6474 old_name = inst.name
6476 rename_file_storage = False
6477 if (inst.disk_template in constants.DTS_FILEBASED and
6478 self.op.new_name != inst.name):
6479 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6480 rename_file_storage = True
6482 self.cfg.RenameInstance(inst.name, self.op.new_name)
6483 # Change the instance lock. This is definitely safe while we hold the BGL.
6484 # Otherwise the new lock would have to be added in acquired mode.
6486 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6487 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6489 # re-read the instance from the configuration after rename
6490 inst = self.cfg.GetInstanceInfo(self.op.new_name)
6492 if rename_file_storage:
6493 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6494 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6495 old_file_storage_dir,
6496 new_file_storage_dir)
6497 result.Raise("Could not rename on node %s directory '%s' to '%s'"
6498 " (but the instance has been renamed in Ganeti)" %
6499 (inst.primary_node, old_file_storage_dir,
6500 new_file_storage_dir))
6502 _StartInstanceDisks(self, inst, None)
6504 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6505 old_name, self.op.debug_level)
6506 msg = result.fail_msg
6508 msg = ("Could not run OS rename script for instance %s on node %s"
6509 " (but the instance has been renamed in Ganeti): %s" %
6510 (inst.name, inst.primary_node, msg))
6511 self.proc.LogWarning(msg)
6513 _ShutdownInstanceDisks(self, inst)
6518 class LUInstanceRemove(LogicalUnit):
6519 """Remove an instance.
6522 HPATH = "instance-remove"
6523 HTYPE = constants.HTYPE_INSTANCE
6526 def ExpandNames(self):
6527 self._ExpandAndLockInstance()
6528 self.needed_locks[locking.LEVEL_NODE] = []
6529 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6531 def DeclareLocks(self, level):
6532 if level == locking.LEVEL_NODE:
6533 self._LockInstancesNodes()
6535 def BuildHooksEnv(self):
6538 This runs on master, primary and secondary nodes of the instance.
6541 env = _BuildInstanceHookEnvByObject(self, self.instance)
6542 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6545 def BuildHooksNodes(self):
6546 """Build hooks nodes.
6549 nl = [self.cfg.GetMasterNode()]
6550 nl_post = list(self.instance.all_nodes) + nl
6551 return (nl, nl_post)
6553 def CheckPrereq(self):
6554 """Check prerequisites.
6556 This checks that the instance is in the cluster.
6559 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6560 assert self.instance is not None, \
6561 "Cannot retrieve locked instance %s" % self.op.instance_name
6563 def Exec(self, feedback_fn):
6564 """Remove the instance.
6567 instance = self.instance
6568 logging.info("Shutting down instance %s on node %s",
6569 instance.name, instance.primary_node)
6571 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6572 self.op.shutdown_timeout)
6573 msg = result.fail_msg
6575 if self.op.ignore_failures:
6576 feedback_fn("Warning: can't shutdown instance: %s" % msg)
6578 raise errors.OpExecError("Could not shutdown instance %s on"
6580 (instance.name, instance.primary_node, msg))
6582 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6585 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6586 """Utility function to remove an instance.
6589 logging.info("Removing block devices for instance %s", instance.name)
6591 if not _RemoveDisks(lu, instance):
6592 if not ignore_failures:
6593 raise errors.OpExecError("Can't remove instance's disks")
6594 feedback_fn("Warning: can't remove instance's disks")
6596 logging.info("Removing instance %s out of cluster config", instance.name)
6598 lu.cfg.RemoveInstance(instance.name)
6600 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6601 "Instance lock removal conflict"
6603 # Remove lock for the instance
6604 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6607 class LUInstanceQuery(NoHooksLU):
6608 """Logical unit for querying instances.
6611 # pylint: disable=W0142
6614 def CheckArguments(self):
6615 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6616 self.op.output_fields, self.op.use_locking)
6618 def ExpandNames(self):
6619 self.iq.ExpandNames(self)
6621 def DeclareLocks(self, level):
6622 self.iq.DeclareLocks(self, level)
6624 def Exec(self, feedback_fn):
6625 return self.iq.OldStyleQuery(self)
6628 class LUInstanceFailover(LogicalUnit):
6629 """Failover an instance.
6632 HPATH = "instance-failover"
6633 HTYPE = constants.HTYPE_INSTANCE
6636 def CheckArguments(self):
6637 """Check the arguments.
6640 self.iallocator = getattr(self.op, "iallocator", None)
6641 self.target_node = getattr(self.op, "target_node", None)
6643 def ExpandNames(self):
6644 self._ExpandAndLockInstance()
6646 if self.op.target_node is not None:
6647 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6649 self.needed_locks[locking.LEVEL_NODE] = []
6650 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6652 ignore_consistency = self.op.ignore_consistency
6653 shutdown_timeout = self.op.shutdown_timeout
6654 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6657 ignore_consistency=ignore_consistency,
6658 shutdown_timeout=shutdown_timeout)
6659 self.tasklets = [self._migrater]
6661 def DeclareLocks(self, level):
6662 if level == locking.LEVEL_NODE:
6663 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6664 if instance.disk_template in constants.DTS_EXT_MIRROR:
6665 if self.op.target_node is None:
6666 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6668 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6669 self.op.target_node]
6670 del self.recalculate_locks[locking.LEVEL_NODE]
6672 self._LockInstancesNodes()
6674 def BuildHooksEnv(self):
6677 This runs on master, primary and secondary nodes of the instance.
6680 instance = self._migrater.instance
6681 source_node = instance.primary_node
6682 target_node = self.op.target_node
6684 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6685 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6686 "OLD_PRIMARY": source_node,
6687 "NEW_PRIMARY": target_node,
6690 if instance.disk_template in constants.DTS_INT_MIRROR:
6691 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6692 env["NEW_SECONDARY"] = source_node
6694 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6696 env.update(_BuildInstanceHookEnvByObject(self, instance))
6700 def BuildHooksNodes(self):
6701 """Build hooks nodes.
6704 instance = self._migrater.instance
6705 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6706 return (nl, nl + [instance.primary_node])
6709 class LUInstanceMigrate(LogicalUnit):
6710 """Migrate an instance.
6712 This is migration without shutting down, compared to the failover,
6713 which is done with shutdown.
6716 HPATH = "instance-migrate"
6717 HTYPE = constants.HTYPE_INSTANCE
6720 def ExpandNames(self):
6721 self._ExpandAndLockInstance()
6723 if self.op.target_node is not None:
6724 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6726 self.needed_locks[locking.LEVEL_NODE] = []
6727 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6729 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6730 cleanup=self.op.cleanup,
6732 fallback=self.op.allow_failover)
6733 self.tasklets = [self._migrater]
6735 def DeclareLocks(self, level):
6736 if level == locking.LEVEL_NODE:
6737 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6738 if instance.disk_template in constants.DTS_EXT_MIRROR:
6739 if self.op.target_node is None:
6740 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6742 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6743 self.op.target_node]
6744 del self.recalculate_locks[locking.LEVEL_NODE]
6746 self._LockInstancesNodes()
6748 def BuildHooksEnv(self):
6751 This runs on master, primary and secondary nodes of the instance.
6754 instance = self._migrater.instance
6755 source_node = instance.primary_node
6756 target_node = self.op.target_node
6757 env = _BuildInstanceHookEnvByObject(self, instance)
6759 "MIGRATE_LIVE": self._migrater.live,
6760 "MIGRATE_CLEANUP": self.op.cleanup,
6761 "OLD_PRIMARY": source_node,
6762 "NEW_PRIMARY": target_node,
6765 if instance.disk_template in constants.DTS_INT_MIRROR:
6766 env["OLD_SECONDARY"] = target_node
6767 env["NEW_SECONDARY"] = source_node
6769 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6773 def BuildHooksNodes(self):
6774 """Build hooks nodes.
6777 instance = self._migrater.instance
6778 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6779 return (nl, nl + [instance.primary_node])
6782 class LUInstanceMove(LogicalUnit):
6783 """Move an instance by data-copying.
6786 HPATH = "instance-move"
6787 HTYPE = constants.HTYPE_INSTANCE
6790 def ExpandNames(self):
6791 self._ExpandAndLockInstance()
6792 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6793 self.op.target_node = target_node
6794 self.needed_locks[locking.LEVEL_NODE] = [target_node]
6795 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6797 def DeclareLocks(self, level):
6798 if level == locking.LEVEL_NODE:
6799 self._LockInstancesNodes(primary_only=True)
6801 def BuildHooksEnv(self):
6804 This runs on master, primary and secondary nodes of the instance.
6808 "TARGET_NODE": self.op.target_node,
6809 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6811 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6814 def BuildHooksNodes(self):
6815 """Build hooks nodes.
6819 self.cfg.GetMasterNode(),
6820 self.instance.primary_node,
6821 self.op.target_node,
6825 def CheckPrereq(self):
6826 """Check prerequisites.
6828 This checks that the instance is in the cluster.
6831 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6832 assert self.instance is not None, \
6833 "Cannot retrieve locked instance %s" % self.op.instance_name
6835 node = self.cfg.GetNodeInfo(self.op.target_node)
6836 assert node is not None, \
6837 "Cannot retrieve locked node %s" % self.op.target_node
6839 self.target_node = target_node = node.name
6841 if target_node == instance.primary_node:
6842 raise errors.OpPrereqError("Instance %s is already on the node %s" %
6843 (instance.name, target_node),
6846 bep = self.cfg.GetClusterInfo().FillBE(instance)
6848 for idx, dsk in enumerate(instance.disks):
6849 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6850 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6851 " cannot copy" % idx, errors.ECODE_STATE)
6853 _CheckNodeOnline(self, target_node)
6854 _CheckNodeNotDrained(self, target_node)
6855 _CheckNodeVmCapable(self, target_node)
6857 if instance.admin_up:
6858 # check memory requirements on the secondary node
6859 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6860 instance.name, bep[constants.BE_MEMORY],
6861 instance.hypervisor)
6863 self.LogInfo("Not checking memory on the secondary node as"
6864 " instance will not be started")
6866 # check bridge existance
6867 _CheckInstanceBridgesExist(self, instance, node=target_node)
6869 def Exec(self, feedback_fn):
6870 """Move an instance.
6872 The move is done by shutting it down on its present node, copying
6873 the data over (slow) and starting it on the new node.
6876 instance = self.instance
6878 source_node = instance.primary_node
6879 target_node = self.target_node
6881 self.LogInfo("Shutting down instance %s on source node %s",
6882 instance.name, source_node)
6884 result = self.rpc.call_instance_shutdown(source_node, instance,
6885 self.op.shutdown_timeout)
6886 msg = result.fail_msg
6888 if self.op.ignore_consistency:
6889 self.proc.LogWarning("Could not shutdown instance %s on node %s."
6890 " Proceeding anyway. Please make sure node"
6891 " %s is down. Error details: %s",
6892 instance.name, source_node, source_node, msg)
6894 raise errors.OpExecError("Could not shutdown instance %s on"
6896 (instance.name, source_node, msg))
6898 # create the target disks
6900 _CreateDisks(self, instance, target_node=target_node)
6901 except errors.OpExecError:
6902 self.LogWarning("Device creation failed, reverting...")
6904 _RemoveDisks(self, instance, target_node=target_node)
6906 self.cfg.ReleaseDRBDMinors(instance.name)
6909 cluster_name = self.cfg.GetClusterInfo().cluster_name
6912 # activate, get path, copy the data over
6913 for idx, disk in enumerate(instance.disks):
6914 self.LogInfo("Copying data for disk %d", idx)
6915 result = self.rpc.call_blockdev_assemble(target_node, disk,
6916 instance.name, True, idx)
6918 self.LogWarning("Can't assemble newly created disk %d: %s",
6919 idx, result.fail_msg)
6920 errs.append(result.fail_msg)
6922 dev_path = result.payload
6923 result = self.rpc.call_blockdev_export(source_node, disk,
6924 target_node, dev_path,
6927 self.LogWarning("Can't copy data over for disk %d: %s",
6928 idx, result.fail_msg)
6929 errs.append(result.fail_msg)
6933 self.LogWarning("Some disks failed to copy, aborting")
6935 _RemoveDisks(self, instance, target_node=target_node)
6937 self.cfg.ReleaseDRBDMinors(instance.name)
6938 raise errors.OpExecError("Errors during disk copy: %s" %
6941 instance.primary_node = target_node
6942 self.cfg.Update(instance, feedback_fn)
6944 self.LogInfo("Removing the disks on the original node")
6945 _RemoveDisks(self, instance, target_node=source_node)
6947 # Only start the instance if it's marked as up
6948 if instance.admin_up:
6949 self.LogInfo("Starting instance %s on node %s",
6950 instance.name, target_node)
6952 disks_ok, _ = _AssembleInstanceDisks(self, instance,
6953 ignore_secondaries=True)
6955 _ShutdownInstanceDisks(self, instance)
6956 raise errors.OpExecError("Can't activate the instance's disks")
6958 result = self.rpc.call_instance_start(target_node, instance,
6960 msg = result.fail_msg
6962 _ShutdownInstanceDisks(self, instance)
6963 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6964 (instance.name, target_node, msg))
6967 class LUNodeMigrate(LogicalUnit):
6968 """Migrate all instances from a node.
6971 HPATH = "node-migrate"
6972 HTYPE = constants.HTYPE_NODE
6975 def CheckArguments(self):
6978 def ExpandNames(self):
6979 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6981 self.share_locks = _ShareAll()
6982 self.needed_locks = {
6983 locking.LEVEL_NODE: [self.op.node_name],
6986 def BuildHooksEnv(self):
6989 This runs on the master, the primary and all the secondaries.
6993 "NODE_NAME": self.op.node_name,
6996 def BuildHooksNodes(self):
6997 """Build hooks nodes.
7000 nl = [self.cfg.GetMasterNode()]
7003 def CheckPrereq(self):
7006 def Exec(self, feedback_fn):
7007 # Prepare jobs for migration instances
7009 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7012 iallocator=self.op.iallocator,
7013 target_node=self.op.target_node)]
7014 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7017 # TODO: Run iallocator in this opcode and pass correct placement options to
7018 # OpInstanceMigrate. Since other jobs can modify the cluster between
7019 # running the iallocator and the actual migration, a good consistency model
7020 # will have to be found.
7022 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7023 frozenset([self.op.node_name]))
7025 return ResultWithJobs(jobs)
7028 class TLMigrateInstance(Tasklet):
7029 """Tasklet class for instance migration.
7032 @ivar live: whether the migration will be done live or non-live;
7033 this variable is initalized only after CheckPrereq has run
7034 @type cleanup: boolean
7035 @ivar cleanup: Wheater we cleanup from a failed migration
7036 @type iallocator: string
7037 @ivar iallocator: The iallocator used to determine target_node
7038 @type target_node: string
7039 @ivar target_node: If given, the target_node to reallocate the instance to
7040 @type failover: boolean
7041 @ivar failover: Whether operation results in failover or migration
7042 @type fallback: boolean
7043 @ivar fallback: Whether fallback to failover is allowed if migration not
7045 @type ignore_consistency: boolean
7046 @ivar ignore_consistency: Wheter we should ignore consistency between source
7048 @type shutdown_timeout: int
7049 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7054 _MIGRATION_POLL_INTERVAL = 1 # seconds
7055 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7057 def __init__(self, lu, instance_name, cleanup=False,
7058 failover=False, fallback=False,
7059 ignore_consistency=False,
7060 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7061 """Initializes this class.
7064 Tasklet.__init__(self, lu)
7067 self.instance_name = instance_name
7068 self.cleanup = cleanup
7069 self.live = False # will be overridden later
7070 self.failover = failover
7071 self.fallback = fallback
7072 self.ignore_consistency = ignore_consistency
7073 self.shutdown_timeout = shutdown_timeout
7075 def CheckPrereq(self):
7076 """Check prerequisites.
7078 This checks that the instance is in the cluster.
7081 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7082 instance = self.cfg.GetInstanceInfo(instance_name)
7083 assert instance is not None
7084 self.instance = instance
7086 if (not self.cleanup and not instance.admin_up and not self.failover and
7088 self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7090 self.failover = True
7092 if instance.disk_template not in constants.DTS_MIRRORED:
7097 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7098 " %s" % (instance.disk_template, text),
7101 if instance.disk_template in constants.DTS_EXT_MIRROR:
7102 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7104 if self.lu.op.iallocator:
7105 self._RunAllocator()
7107 # We set set self.target_node as it is required by
7109 self.target_node = self.lu.op.target_node
7111 # self.target_node is already populated, either directly or by the
7113 target_node = self.target_node
7114 if self.target_node == instance.primary_node:
7115 raise errors.OpPrereqError("Cannot migrate instance %s"
7116 " to its primary (%s)" %
7117 (instance.name, instance.primary_node))
7119 if len(self.lu.tasklets) == 1:
7120 # It is safe to release locks only when we're the only tasklet
7122 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7123 keep=[instance.primary_node, self.target_node])
7126 secondary_nodes = instance.secondary_nodes
7127 if not secondary_nodes:
7128 raise errors.ConfigurationError("No secondary node but using"
7129 " %s disk template" %
7130 instance.disk_template)
7131 target_node = secondary_nodes[0]
7132 if self.lu.op.iallocator or (self.lu.op.target_node and
7133 self.lu.op.target_node != target_node):
7135 text = "failed over"
7138 raise errors.OpPrereqError("Instances with disk template %s cannot"
7139 " be %s to arbitrary nodes"
7140 " (neither an iallocator nor a target"
7141 " node can be passed)" %
7142 (instance.disk_template, text),
7145 i_be = self.cfg.GetClusterInfo().FillBE(instance)
7147 # check memory requirements on the secondary node
7148 if not self.failover or instance.admin_up:
7149 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7150 instance.name, i_be[constants.BE_MEMORY],
7151 instance.hypervisor)
7153 self.lu.LogInfo("Not checking memory on the secondary node as"
7154 " instance will not be started")
7156 # check bridge existance
7157 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7159 if not self.cleanup:
7160 _CheckNodeNotDrained(self.lu, target_node)
7161 if not self.failover:
7162 result = self.rpc.call_instance_migratable(instance.primary_node,
7164 if result.fail_msg and self.fallback:
7165 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7167 self.failover = True
7169 result.Raise("Can't migrate, please use failover",
7170 prereq=True, ecode=errors.ECODE_STATE)
7172 assert not (self.failover and self.cleanup)
7174 if not self.failover:
7175 if self.lu.op.live is not None and self.lu.op.mode is not None:
7176 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7177 " parameters are accepted",
7179 if self.lu.op.live is not None:
7181 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7183 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7184 # reset the 'live' parameter to None so that repeated
7185 # invocations of CheckPrereq do not raise an exception
7186 self.lu.op.live = None
7187 elif self.lu.op.mode is None:
7188 # read the default value from the hypervisor
7189 i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7191 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7193 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7195 # Failover is never live
7198 def _RunAllocator(self):
7199 """Run the allocator based on input opcode.
7202 ial = IAllocator(self.cfg, self.rpc,
7203 mode=constants.IALLOCATOR_MODE_RELOC,
7204 name=self.instance_name,
7205 # TODO See why hail breaks with a single node below
7206 relocate_from=[self.instance.primary_node,
7207 self.instance.primary_node],
7210 ial.Run(self.lu.op.iallocator)
7213 raise errors.OpPrereqError("Can't compute nodes using"
7214 " iallocator '%s': %s" %
7215 (self.lu.op.iallocator, ial.info),
7217 if len(ial.result) != ial.required_nodes:
7218 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7219 " of nodes (%s), required %s" %
7220 (self.lu.op.iallocator, len(ial.result),
7221 ial.required_nodes), errors.ECODE_FAULT)
7222 self.target_node = ial.result[0]
7223 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7224 self.instance_name, self.lu.op.iallocator,
7225 utils.CommaJoin(ial.result))
7227 def _WaitUntilSync(self):
7228 """Poll with custom rpc for disk sync.
7230 This uses our own step-based rpc call.
7233 self.feedback_fn("* wait until resync is done")
7237 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7239 self.instance.disks)
7241 for node, nres in result.items():
7242 nres.Raise("Cannot resync disks on node %s" % node)
7243 node_done, node_percent = nres.payload
7244 all_done = all_done and node_done
7245 if node_percent is not None:
7246 min_percent = min(min_percent, node_percent)
7248 if min_percent < 100:
7249 self.feedback_fn(" - progress: %.1f%%" % min_percent)
7252 def _EnsureSecondary(self, node):
7253 """Demote a node to secondary.
7256 self.feedback_fn("* switching node %s to secondary mode" % node)
7258 for dev in self.instance.disks:
7259 self.cfg.SetDiskID(dev, node)
7261 result = self.rpc.call_blockdev_close(node, self.instance.name,
7262 self.instance.disks)
7263 result.Raise("Cannot change disk to secondary on node %s" % node)
7265 def _GoStandalone(self):
7266 """Disconnect from the network.
7269 self.feedback_fn("* changing into standalone mode")
7270 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7271 self.instance.disks)
7272 for node, nres in result.items():
7273 nres.Raise("Cannot disconnect disks node %s" % node)
7275 def _GoReconnect(self, multimaster):
7276 """Reconnect to the network.
7282 msg = "single-master"
7283 self.feedback_fn("* changing disks into %s mode" % msg)
7284 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7285 self.instance.disks,
7286 self.instance.name, multimaster)
7287 for node, nres in result.items():
7288 nres.Raise("Cannot change disks config on node %s" % node)
7290 def _ExecCleanup(self):
7291 """Try to cleanup after a failed migration.
7293 The cleanup is done by:
7294 - check that the instance is running only on one node
7295 (and update the config if needed)
7296 - change disks on its secondary node to secondary
7297 - wait until disks are fully synchronized
7298 - disconnect from the network
7299 - change disks into single-master mode
7300 - wait again until disks are fully synchronized
7303 instance = self.instance
7304 target_node = self.target_node
7305 source_node = self.source_node
7307 # check running on only one node
7308 self.feedback_fn("* checking where the instance actually runs"
7309 " (if this hangs, the hypervisor might be in"
7311 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7312 for node, result in ins_l.items():
7313 result.Raise("Can't contact node %s" % node)
7315 runningon_source = instance.name in ins_l[source_node].payload
7316 runningon_target = instance.name in ins_l[target_node].payload
7318 if runningon_source and runningon_target:
7319 raise errors.OpExecError("Instance seems to be running on two nodes,"
7320 " or the hypervisor is confused; you will have"
7321 " to ensure manually that it runs only on one"
7322 " and restart this operation")
7324 if not (runningon_source or runningon_target):
7325 raise errors.OpExecError("Instance does not seem to be running at all;"
7326 " in this case it's safer to repair by"
7327 " running 'gnt-instance stop' to ensure disk"
7328 " shutdown, and then restarting it")
7330 if runningon_target:
7331 # the migration has actually succeeded, we need to update the config
7332 self.feedback_fn("* instance running on secondary node (%s),"
7333 " updating config" % target_node)
7334 instance.primary_node = target_node
7335 self.cfg.Update(instance, self.feedback_fn)
7336 demoted_node = source_node
7338 self.feedback_fn("* instance confirmed to be running on its"
7339 " primary node (%s)" % source_node)
7340 demoted_node = target_node
7342 if instance.disk_template in constants.DTS_INT_MIRROR:
7343 self._EnsureSecondary(demoted_node)
7345 self._WaitUntilSync()
7346 except errors.OpExecError:
7347 # we ignore here errors, since if the device is standalone, it
7348 # won't be able to sync
7350 self._GoStandalone()
7351 self._GoReconnect(False)
7352 self._WaitUntilSync()
7354 self.feedback_fn("* done")
7356 def _RevertDiskStatus(self):
7357 """Try to revert the disk status after a failed migration.
7360 target_node = self.target_node
7361 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7365 self._EnsureSecondary(target_node)
7366 self._GoStandalone()
7367 self._GoReconnect(False)
7368 self._WaitUntilSync()
7369 except errors.OpExecError, err:
7370 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7371 " please try to recover the instance manually;"
7372 " error '%s'" % str(err))
7374 def _AbortMigration(self):
7375 """Call the hypervisor code to abort a started migration.
7378 instance = self.instance
7379 target_node = self.target_node
7380 source_node = self.source_node
7381 migration_info = self.migration_info
7383 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7387 abort_msg = abort_result.fail_msg
7389 logging.error("Aborting migration failed on target node %s: %s",
7390 target_node, abort_msg)
7391 # Don't raise an exception here, as we stil have to try to revert the
7392 # disk status, even if this step failed.
7394 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7395 instance, False, self.live)
7396 abort_msg = abort_result.fail_msg
7398 logging.error("Aborting migration failed on source node %s: %s",
7399 source_node, abort_msg)
7401 def _ExecMigration(self):
7402 """Migrate an instance.
7404 The migrate is done by:
7405 - change the disks into dual-master mode
7406 - wait until disks are fully synchronized again
7407 - migrate the instance
7408 - change disks on the new secondary node (the old primary) to secondary
7409 - wait until disks are fully synchronized
7410 - change disks into single-master mode
7413 instance = self.instance
7414 target_node = self.target_node
7415 source_node = self.source_node
7417 self.feedback_fn("* checking disk consistency between source and target")
7418 for dev in instance.disks:
7419 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7420 raise errors.OpExecError("Disk %s is degraded or not fully"
7421 " synchronized on target node,"
7422 " aborting migration" % dev.iv_name)
7424 # First get the migration information from the remote node
7425 result = self.rpc.call_migration_info(source_node, instance)
7426 msg = result.fail_msg
7428 log_err = ("Failed fetching source migration information from %s: %s" %
7430 logging.error(log_err)
7431 raise errors.OpExecError(log_err)
7433 self.migration_info = migration_info = result.payload
7435 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7436 # Then switch the disks to master/master mode
7437 self._EnsureSecondary(target_node)
7438 self._GoStandalone()
7439 self._GoReconnect(True)
7440 self._WaitUntilSync()
7442 self.feedback_fn("* preparing %s to accept the instance" % target_node)
7443 result = self.rpc.call_accept_instance(target_node,
7446 self.nodes_ip[target_node])
7448 msg = result.fail_msg
7450 logging.error("Instance pre-migration failed, trying to revert"
7451 " disk status: %s", msg)
7452 self.feedback_fn("Pre-migration failed, aborting")
7453 self._AbortMigration()
7454 self._RevertDiskStatus()
7455 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7456 (instance.name, msg))
7458 self.feedback_fn("* migrating instance to %s" % target_node)
7459 result = self.rpc.call_instance_migrate(source_node, instance,
7460 self.nodes_ip[target_node],
7462 msg = result.fail_msg
7464 logging.error("Instance migration failed, trying to revert"
7465 " disk status: %s", msg)
7466 self.feedback_fn("Migration failed, aborting")
7467 self._AbortMigration()
7468 self._RevertDiskStatus()
7469 raise errors.OpExecError("Could not migrate instance %s: %s" %
7470 (instance.name, msg))
7472 self.feedback_fn("* starting memory transfer")
7473 last_feedback = time.time()
7475 result = self.rpc.call_instance_get_migration_status(source_node,
7477 msg = result.fail_msg
7478 ms = result.payload # MigrationStatus instance
7479 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7480 logging.error("Instance migration failed, trying to revert"
7481 " disk status: %s", msg)
7482 self.feedback_fn("Migration failed, aborting")
7483 self._AbortMigration()
7484 self._RevertDiskStatus()
7485 raise errors.OpExecError("Could not migrate instance %s: %s" %
7486 (instance.name, msg))
7488 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7489 self.feedback_fn("* memory transfer complete")
7492 if (utils.TimeoutExpired(last_feedback,
7493 self._MIGRATION_FEEDBACK_INTERVAL) and
7494 ms.transferred_ram is not None):
7495 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7496 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7497 last_feedback = time.time()
7499 time.sleep(self._MIGRATION_POLL_INTERVAL)
7501 result = self.rpc.call_instance_finalize_migration_src(source_node,
7505 msg = result.fail_msg
7507 logging.error("Instance migration succeeded, but finalization failed"
7508 " on the source node: %s", msg)
7509 raise errors.OpExecError("Could not finalize instance migration: %s" %
7512 instance.primary_node = target_node
7514 # distribute new instance config to the other nodes
7515 self.cfg.Update(instance, self.feedback_fn)
7517 result = self.rpc.call_instance_finalize_migration_dst(target_node,
7521 msg = result.fail_msg
7523 logging.error("Instance migration succeeded, but finalization failed"
7524 " on the target node: %s", msg)
7525 raise errors.OpExecError("Could not finalize instance migration: %s" %
7528 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7529 self._EnsureSecondary(source_node)
7530 self._WaitUntilSync()
7531 self._GoStandalone()
7532 self._GoReconnect(False)
7533 self._WaitUntilSync()
7535 self.feedback_fn("* done")
7537 def _ExecFailover(self):
7538 """Failover an instance.
7540 The failover is done by shutting it down on its present node and
7541 starting it on the secondary.
7544 instance = self.instance
7545 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7547 source_node = instance.primary_node
7548 target_node = self.target_node
7550 if instance.admin_up:
7551 self.feedback_fn("* checking disk consistency between source and target")
7552 for dev in instance.disks:
7553 # for drbd, these are drbd over lvm
7554 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7555 if primary_node.offline:
7556 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7558 (primary_node.name, dev.iv_name, target_node))
7559 elif not self.ignore_consistency:
7560 raise errors.OpExecError("Disk %s is degraded on target node,"
7561 " aborting failover" % dev.iv_name)
7563 self.feedback_fn("* not checking disk consistency as instance is not"
7566 self.feedback_fn("* shutting down instance on source node")
7567 logging.info("Shutting down instance %s on node %s",
7568 instance.name, source_node)
7570 result = self.rpc.call_instance_shutdown(source_node, instance,
7571 self.shutdown_timeout)
7572 msg = result.fail_msg
7574 if self.ignore_consistency or primary_node.offline:
7575 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7576 " proceeding anyway; please make sure node"
7577 " %s is down; error details: %s",
7578 instance.name, source_node, source_node, msg)
7580 raise errors.OpExecError("Could not shutdown instance %s on"
7582 (instance.name, source_node, msg))
7584 self.feedback_fn("* deactivating the instance's disks on source node")
7585 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7586 raise errors.OpExecError("Can't shut down the instance's disks")
7588 instance.primary_node = target_node
7589 # distribute new instance config to the other nodes
7590 self.cfg.Update(instance, self.feedback_fn)
7592 # Only start the instance if it's marked as up
7593 if instance.admin_up:
7594 self.feedback_fn("* activating the instance's disks on target node %s" %
7596 logging.info("Starting instance %s on node %s",
7597 instance.name, target_node)
7599 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7600 ignore_secondaries=True)
7602 _ShutdownInstanceDisks(self.lu, instance)
7603 raise errors.OpExecError("Can't activate the instance's disks")
7605 self.feedback_fn("* starting the instance on the target node %s" %
7607 result = self.rpc.call_instance_start(target_node, instance, None, None,
7609 msg = result.fail_msg
7611 _ShutdownInstanceDisks(self.lu, instance)
7612 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7613 (instance.name, target_node, msg))
7615 def Exec(self, feedback_fn):
7616 """Perform the migration.
7619 self.feedback_fn = feedback_fn
7620 self.source_node = self.instance.primary_node
7622 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7623 if self.instance.disk_template in constants.DTS_INT_MIRROR:
7624 self.target_node = self.instance.secondary_nodes[0]
7625 # Otherwise self.target_node has been populated either
7626 # directly, or through an iallocator.
7628 self.all_nodes = [self.source_node, self.target_node]
7629 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7630 in self.cfg.GetMultiNodeInfo(self.all_nodes))
7633 feedback_fn("Failover instance %s" % self.instance.name)
7634 self._ExecFailover()
7636 feedback_fn("Migrating instance %s" % self.instance.name)
7639 return self._ExecCleanup()
7641 return self._ExecMigration()
7644 def _CreateBlockDev(lu, node, instance, device, force_create,
7646 """Create a tree of block devices on a given node.
7648 If this device type has to be created on secondaries, create it and
7651 If not, just recurse to children keeping the same 'force' value.
7653 @param lu: the lu on whose behalf we execute
7654 @param node: the node on which to create the device
7655 @type instance: L{objects.Instance}
7656 @param instance: the instance which owns the device
7657 @type device: L{objects.Disk}
7658 @param device: the device to create
7659 @type force_create: boolean
7660 @param force_create: whether to force creation of this device; this
7661 will be change to True whenever we find a device which has
7662 CreateOnSecondary() attribute
7663 @param info: the extra 'metadata' we should attach to the device
7664 (this will be represented as a LVM tag)
7665 @type force_open: boolean
7666 @param force_open: this parameter will be passes to the
7667 L{backend.BlockdevCreate} function where it specifies
7668 whether we run on primary or not, and it affects both
7669 the child assembly and the device own Open() execution
7672 if device.CreateOnSecondary():
7676 for child in device.children:
7677 _CreateBlockDev(lu, node, instance, child, force_create,
7680 if not force_create:
7683 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7686 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7687 """Create a single block device on a given node.
7689 This will not recurse over children of the device, so they must be
7692 @param lu: the lu on whose behalf we execute
7693 @param node: the node on which to create the device
7694 @type instance: L{objects.Instance}
7695 @param instance: the instance which owns the device
7696 @type device: L{objects.Disk}
7697 @param device: the device to create
7698 @param info: the extra 'metadata' we should attach to the device
7699 (this will be represented as a LVM tag)
7700 @type force_open: boolean
7701 @param force_open: this parameter will be passes to the
7702 L{backend.BlockdevCreate} function where it specifies
7703 whether we run on primary or not, and it affects both
7704 the child assembly and the device own Open() execution
7707 lu.cfg.SetDiskID(device, node)
7708 result = lu.rpc.call_blockdev_create(node, device, device.size,
7709 instance.name, force_open, info)
7710 result.Raise("Can't create block device %s on"
7711 " node %s for instance %s" % (device, node, instance.name))
7712 if device.physical_id is None:
7713 device.physical_id = result.payload
7716 def _GenerateUniqueNames(lu, exts):
7717 """Generate a suitable LV name.
7719 This will generate a logical volume name for the given instance.
7724 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7725 results.append("%s%s" % (new_id, val))
7729 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7730 iv_name, p_minor, s_minor):
7731 """Generate a drbd8 device complete with its children.
7734 assert len(vgnames) == len(names) == 2
7735 port = lu.cfg.AllocatePort()
7736 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7737 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7738 logical_id=(vgnames[0], names[0]))
7739 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7740 logical_id=(vgnames[1], names[1]))
7741 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7742 logical_id=(primary, secondary, port,
7745 children=[dev_data, dev_meta],
7750 def _GenerateDiskTemplate(lu, template_name,
7751 instance_name, primary_node,
7752 secondary_nodes, disk_info,
7753 file_storage_dir, file_driver,
7754 base_index, feedback_fn):
7755 """Generate the entire disk layout for a given template type.
7758 #TODO: compute space requirements
7760 vgname = lu.cfg.GetVGName()
7761 disk_count = len(disk_info)
7763 if template_name == constants.DT_DISKLESS:
7765 elif template_name == constants.DT_PLAIN:
7766 if len(secondary_nodes) != 0:
7767 raise errors.ProgrammerError("Wrong template configuration")
7769 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7770 for i in range(disk_count)])
7771 for idx, disk in enumerate(disk_info):
7772 disk_index = idx + base_index
7773 vg = disk.get(constants.IDISK_VG, vgname)
7774 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7775 disk_dev = objects.Disk(dev_type=constants.LD_LV,
7776 size=disk[constants.IDISK_SIZE],
7777 logical_id=(vg, names[idx]),
7778 iv_name="disk/%d" % disk_index,
7779 mode=disk[constants.IDISK_MODE])
7780 disks.append(disk_dev)
7781 elif template_name == constants.DT_DRBD8:
7782 if len(secondary_nodes) != 1:
7783 raise errors.ProgrammerError("Wrong template configuration")
7784 remote_node = secondary_nodes[0]
7785 minors = lu.cfg.AllocateDRBDMinor(
7786 [primary_node, remote_node] * len(disk_info), instance_name)
7789 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7790 for i in range(disk_count)]):
7791 names.append(lv_prefix + "_data")
7792 names.append(lv_prefix + "_meta")
7793 for idx, disk in enumerate(disk_info):
7794 disk_index = idx + base_index
7795 data_vg = disk.get(constants.IDISK_VG, vgname)
7796 meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7797 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7798 disk[constants.IDISK_SIZE],
7800 names[idx * 2:idx * 2 + 2],
7801 "disk/%d" % disk_index,
7802 minors[idx * 2], minors[idx * 2 + 1])
7803 disk_dev.mode = disk[constants.IDISK_MODE]
7804 disks.append(disk_dev)
7805 elif template_name == constants.DT_FILE:
7806 if len(secondary_nodes) != 0:
7807 raise errors.ProgrammerError("Wrong template configuration")
7809 opcodes.RequireFileStorage()
7811 for idx, disk in enumerate(disk_info):
7812 disk_index = idx + base_index
7813 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7814 size=disk[constants.IDISK_SIZE],
7815 iv_name="disk/%d" % disk_index,
7816 logical_id=(file_driver,
7817 "%s/disk%d" % (file_storage_dir,
7819 mode=disk[constants.IDISK_MODE])
7820 disks.append(disk_dev)
7821 elif template_name == constants.DT_SHARED_FILE:
7822 if len(secondary_nodes) != 0:
7823 raise errors.ProgrammerError("Wrong template configuration")
7825 opcodes.RequireSharedFileStorage()
7827 for idx, disk in enumerate(disk_info):
7828 disk_index = idx + base_index
7829 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7830 size=disk[constants.IDISK_SIZE],
7831 iv_name="disk/%d" % disk_index,
7832 logical_id=(file_driver,
7833 "%s/disk%d" % (file_storage_dir,
7835 mode=disk[constants.IDISK_MODE])
7836 disks.append(disk_dev)
7837 elif template_name == constants.DT_BLOCK:
7838 if len(secondary_nodes) != 0:
7839 raise errors.ProgrammerError("Wrong template configuration")
7841 for idx, disk in enumerate(disk_info):
7842 disk_index = idx + base_index
7843 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7844 size=disk[constants.IDISK_SIZE],
7845 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7846 disk[constants.IDISK_ADOPT]),
7847 iv_name="disk/%d" % disk_index,
7848 mode=disk[constants.IDISK_MODE])
7849 disks.append(disk_dev)
7852 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7856 def _GetInstanceInfoText(instance):
7857 """Compute that text that should be added to the disk's metadata.
7860 return "originstname+%s" % instance.name
7863 def _CalcEta(time_taken, written, total_size):
7864 """Calculates the ETA based on size written and total size.
7866 @param time_taken: The time taken so far
7867 @param written: amount written so far
7868 @param total_size: The total size of data to be written
7869 @return: The remaining time in seconds
7872 avg_time = time_taken / float(written)
7873 return (total_size - written) * avg_time
7876 def _WipeDisks(lu, instance):
7877 """Wipes instance disks.
7879 @type lu: L{LogicalUnit}
7880 @param lu: the logical unit on whose behalf we execute
7881 @type instance: L{objects.Instance}
7882 @param instance: the instance whose disks we should create
7883 @return: the success of the wipe
7886 node = instance.primary_node
7888 for device in instance.disks:
7889 lu.cfg.SetDiskID(device, node)
7891 logging.info("Pause sync of instance %s disks", instance.name)
7892 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7894 for idx, success in enumerate(result.payload):
7896 logging.warn("pause-sync of instance %s for disks %d failed",
7900 for idx, device in enumerate(instance.disks):
7901 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7902 # MAX_WIPE_CHUNK at max
7903 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7904 constants.MIN_WIPE_CHUNK_PERCENT)
7905 # we _must_ make this an int, otherwise rounding errors will
7907 wipe_chunk_size = int(wipe_chunk_size)
7909 lu.LogInfo("* Wiping disk %d", idx)
7910 logging.info("Wiping disk %d for instance %s, node %s using"
7911 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7916 start_time = time.time()
7918 while offset < size:
7919 wipe_size = min(wipe_chunk_size, size - offset)
7920 logging.debug("Wiping disk %d, offset %s, chunk %s",
7921 idx, offset, wipe_size)
7922 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7923 result.Raise("Could not wipe disk %d at offset %d for size %d" %
7924 (idx, offset, wipe_size))
7927 if now - last_output >= 60:
7928 eta = _CalcEta(now - start_time, offset, size)
7929 lu.LogInfo(" - done: %.1f%% ETA: %s" %
7930 (offset / float(size) * 100, utils.FormatSeconds(eta)))
7933 logging.info("Resume sync of instance %s disks", instance.name)
7935 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7937 for idx, success in enumerate(result.payload):
7939 lu.LogWarning("Resume sync of disk %d failed, please have a"
7940 " look at the status and troubleshoot the issue", idx)
7941 logging.warn("resume-sync of instance %s for disks %d failed",
7945 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7946 """Create all disks for an instance.
7948 This abstracts away some work from AddInstance.
7950 @type lu: L{LogicalUnit}
7951 @param lu: the logical unit on whose behalf we execute
7952 @type instance: L{objects.Instance}
7953 @param instance: the instance whose disks we should create
7955 @param to_skip: list of indices to skip
7956 @type target_node: string
7957 @param target_node: if passed, overrides the target node for creation
7959 @return: the success of the creation
7962 info = _GetInstanceInfoText(instance)
7963 if target_node is None:
7964 pnode = instance.primary_node
7965 all_nodes = instance.all_nodes
7970 if instance.disk_template in constants.DTS_FILEBASED:
7971 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7972 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7974 result.Raise("Failed to create directory '%s' on"
7975 " node %s" % (file_storage_dir, pnode))
7977 # Note: this needs to be kept in sync with adding of disks in
7978 # LUInstanceSetParams
7979 for idx, device in enumerate(instance.disks):
7980 if to_skip and idx in to_skip:
7982 logging.info("Creating volume %s for instance %s",
7983 device.iv_name, instance.name)
7985 for node in all_nodes:
7986 f_create = node == pnode
7987 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7990 def _RemoveDisks(lu, instance, target_node=None):
7991 """Remove all disks for an instance.
7993 This abstracts away some work from `AddInstance()` and
7994 `RemoveInstance()`. Note that in case some of the devices couldn't
7995 be removed, the removal will continue with the other ones (compare
7996 with `_CreateDisks()`).
7998 @type lu: L{LogicalUnit}
7999 @param lu: the logical unit on whose behalf we execute
8000 @type instance: L{objects.Instance}
8001 @param instance: the instance whose disks we should remove
8002 @type target_node: string
8003 @param target_node: used to override the node on which to remove the disks
8005 @return: the success of the removal
8008 logging.info("Removing block devices for instance %s", instance.name)
8011 for device in instance.disks:
8013 edata = [(target_node, device)]
8015 edata = device.ComputeNodeTree(instance.primary_node)
8016 for node, disk in edata:
8017 lu.cfg.SetDiskID(disk, node)
8018 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8020 lu.LogWarning("Could not remove block device %s on node %s,"
8021 " continuing anyway: %s", device.iv_name, node, msg)
8024 if instance.disk_template == constants.DT_FILE:
8025 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8029 tgt = instance.primary_node
8030 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8032 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8033 file_storage_dir, instance.primary_node, result.fail_msg)
8039 def _ComputeDiskSizePerVG(disk_template, disks):
8040 """Compute disk size requirements in the volume group
8043 def _compute(disks, payload):
8044 """Universal algorithm.
8049 vgs[disk[constants.IDISK_VG]] = \
8050 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8054 # Required free disk space as a function of disk and swap space
8056 constants.DT_DISKLESS: {},
8057 constants.DT_PLAIN: _compute(disks, 0),
8058 # 128 MB are added for drbd metadata for each disk
8059 constants.DT_DRBD8: _compute(disks, 128),
8060 constants.DT_FILE: {},
8061 constants.DT_SHARED_FILE: {},
8064 if disk_template not in req_size_dict:
8065 raise errors.ProgrammerError("Disk template '%s' size requirement"
8066 " is unknown" % disk_template)
8068 return req_size_dict[disk_template]
8071 def _ComputeDiskSize(disk_template, disks):
8072 """Compute disk size requirements in the volume group
8075 # Required free disk space as a function of disk and swap space
8077 constants.DT_DISKLESS: None,
8078 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8079 # 128 MB are added for drbd metadata for each disk
8080 constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
8081 constants.DT_FILE: None,
8082 constants.DT_SHARED_FILE: 0,
8083 constants.DT_BLOCK: 0,
8086 if disk_template not in req_size_dict:
8087 raise errors.ProgrammerError("Disk template '%s' size requirement"
8088 " is unknown" % disk_template)
8090 return req_size_dict[disk_template]
8093 def _FilterVmNodes(lu, nodenames):
8094 """Filters out non-vm_capable nodes from a list.
8096 @type lu: L{LogicalUnit}
8097 @param lu: the logical unit for which we check
8098 @type nodenames: list
8099 @param nodenames: the list of nodes on which we should check
8101 @return: the list of vm-capable nodes
8104 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8105 return [name for name in nodenames if name not in vm_nodes]
8108 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8109 """Hypervisor parameter validation.
8111 This function abstract the hypervisor parameter validation to be
8112 used in both instance create and instance modify.
8114 @type lu: L{LogicalUnit}
8115 @param lu: the logical unit for which we check
8116 @type nodenames: list
8117 @param nodenames: the list of nodes on which we should check
8118 @type hvname: string
8119 @param hvname: the name of the hypervisor we should use
8120 @type hvparams: dict
8121 @param hvparams: the parameters which we need to check
8122 @raise errors.OpPrereqError: if the parameters are not valid
8125 nodenames = _FilterVmNodes(lu, nodenames)
8126 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
8129 for node in nodenames:
8133 info.Raise("Hypervisor parameter validation failed on node %s" % node)
8136 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8137 """OS parameters validation.
8139 @type lu: L{LogicalUnit}
8140 @param lu: the logical unit for which we check
8141 @type required: boolean
8142 @param required: whether the validation should fail if the OS is not
8144 @type nodenames: list
8145 @param nodenames: the list of nodes on which we should check
8146 @type osname: string
8147 @param osname: the name of the hypervisor we should use
8148 @type osparams: dict
8149 @param osparams: the parameters which we need to check
8150 @raise errors.OpPrereqError: if the parameters are not valid
8153 nodenames = _FilterVmNodes(lu, nodenames)
8154 result = lu.rpc.call_os_validate(required, nodenames, osname,
8155 [constants.OS_VALIDATE_PARAMETERS],
8157 for node, nres in result.items():
8158 # we don't check for offline cases since this should be run only
8159 # against the master node and/or an instance's nodes
8160 nres.Raise("OS Parameters validation failed on node %s" % node)
8161 if not nres.payload:
8162 lu.LogInfo("OS %s not found on node %s, validation skipped",
8166 class LUInstanceCreate(LogicalUnit):
8167 """Create an instance.
8170 HPATH = "instance-add"
8171 HTYPE = constants.HTYPE_INSTANCE
8174 def CheckArguments(self):
8178 # do not require name_check to ease forward/backward compatibility
8180 if self.op.no_install and self.op.start:
8181 self.LogInfo("No-installation mode selected, disabling startup")
8182 self.op.start = False
8183 # validate/normalize the instance name
8184 self.op.instance_name = \
8185 netutils.Hostname.GetNormalizedName(self.op.instance_name)
8187 if self.op.ip_check and not self.op.name_check:
8188 # TODO: make the ip check more flexible and not depend on the name check
8189 raise errors.OpPrereqError("Cannot do IP address check without a name"
8190 " check", errors.ECODE_INVAL)
8192 # check nics' parameter names
8193 for nic in self.op.nics:
8194 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8196 # check disks. parameter names and consistent adopt/no-adopt strategy
8197 has_adopt = has_no_adopt = False
8198 for disk in self.op.disks:
8199 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8200 if constants.IDISK_ADOPT in disk:
8204 if has_adopt and has_no_adopt:
8205 raise errors.OpPrereqError("Either all disks are adopted or none is",
8208 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8209 raise errors.OpPrereqError("Disk adoption is not supported for the"
8210 " '%s' disk template" %
8211 self.op.disk_template,
8213 if self.op.iallocator is not None:
8214 raise errors.OpPrereqError("Disk adoption not allowed with an"
8215 " iallocator script", errors.ECODE_INVAL)
8216 if self.op.mode == constants.INSTANCE_IMPORT:
8217 raise errors.OpPrereqError("Disk adoption not allowed for"
8218 " instance import", errors.ECODE_INVAL)
8220 if self.op.disk_template in constants.DTS_MUST_ADOPT:
8221 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8222 " but no 'adopt' parameter given" %
8223 self.op.disk_template,
8226 self.adopt_disks = has_adopt
8228 # instance name verification
8229 if self.op.name_check:
8230 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8231 self.op.instance_name = self.hostname1.name
8232 # used in CheckPrereq for ip ping check
8233 self.check_ip = self.hostname1.ip
8235 self.check_ip = None
8237 # file storage checks
8238 if (self.op.file_driver and
8239 not self.op.file_driver in constants.FILE_DRIVER):
8240 raise errors.OpPrereqError("Invalid file driver name '%s'" %
8241 self.op.file_driver, errors.ECODE_INVAL)
8243 if self.op.disk_template == constants.DT_FILE:
8244 opcodes.RequireFileStorage()
8245 elif self.op.disk_template == constants.DT_SHARED_FILE:
8246 opcodes.RequireSharedFileStorage()
8248 ### Node/iallocator related checks
8249 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8251 if self.op.pnode is not None:
8252 if self.op.disk_template in constants.DTS_INT_MIRROR:
8253 if self.op.snode is None:
8254 raise errors.OpPrereqError("The networked disk templates need"
8255 " a mirror node", errors.ECODE_INVAL)
8257 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8259 self.op.snode = None
8261 self._cds = _GetClusterDomainSecret()
8263 if self.op.mode == constants.INSTANCE_IMPORT:
8264 # On import force_variant must be True, because if we forced it at
8265 # initial install, our only chance when importing it back is that it
8267 self.op.force_variant = True
8269 if self.op.no_install:
8270 self.LogInfo("No-installation mode has no effect during import")
8272 elif self.op.mode == constants.INSTANCE_CREATE:
8273 if self.op.os_type is None:
8274 raise errors.OpPrereqError("No guest OS specified",
8276 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8277 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8278 " installation" % self.op.os_type,
8280 if self.op.disk_template is None:
8281 raise errors.OpPrereqError("No disk template specified",
8284 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8285 # Check handshake to ensure both clusters have the same domain secret
8286 src_handshake = self.op.source_handshake
8287 if not src_handshake:
8288 raise errors.OpPrereqError("Missing source handshake",
8291 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8294 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8297 # Load and check source CA
8298 self.source_x509_ca_pem = self.op.source_x509_ca
8299 if not self.source_x509_ca_pem:
8300 raise errors.OpPrereqError("Missing source X509 CA",
8304 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8306 except OpenSSL.crypto.Error, err:
8307 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8308 (err, ), errors.ECODE_INVAL)
8310 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8311 if errcode is not None:
8312 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8315 self.source_x509_ca = cert
8317 src_instance_name = self.op.source_instance_name
8318 if not src_instance_name:
8319 raise errors.OpPrereqError("Missing source instance name",
8322 self.source_instance_name = \
8323 netutils.GetHostname(name=src_instance_name).name
8326 raise errors.OpPrereqError("Invalid instance creation mode %r" %
8327 self.op.mode, errors.ECODE_INVAL)
8329 def ExpandNames(self):
8330 """ExpandNames for CreateInstance.
8332 Figure out the right locks for instance creation.
8335 self.needed_locks = {}
8337 instance_name = self.op.instance_name
8338 # this is just a preventive check, but someone might still add this
8339 # instance in the meantime, and creation will fail at lock-add time
8340 if instance_name in self.cfg.GetInstanceList():
8341 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8342 instance_name, errors.ECODE_EXISTS)
8344 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8346 if self.op.iallocator:
8347 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8349 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8350 nodelist = [self.op.pnode]
8351 if self.op.snode is not None:
8352 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8353 nodelist.append(self.op.snode)
8354 self.needed_locks[locking.LEVEL_NODE] = nodelist
8356 # in case of import lock the source node too
8357 if self.op.mode == constants.INSTANCE_IMPORT:
8358 src_node = self.op.src_node
8359 src_path = self.op.src_path
8361 if src_path is None:
8362 self.op.src_path = src_path = self.op.instance_name
8364 if src_node is None:
8365 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8366 self.op.src_node = None
8367 if os.path.isabs(src_path):
8368 raise errors.OpPrereqError("Importing an instance from a path"
8369 " requires a source node option",
8372 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8373 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8374 self.needed_locks[locking.LEVEL_NODE].append(src_node)
8375 if not os.path.isabs(src_path):
8376 self.op.src_path = src_path = \
8377 utils.PathJoin(constants.EXPORT_DIR, src_path)
8379 def _RunAllocator(self):
8380 """Run the allocator based on input opcode.
8383 nics = [n.ToDict() for n in self.nics]
8384 ial = IAllocator(self.cfg, self.rpc,
8385 mode=constants.IALLOCATOR_MODE_ALLOC,
8386 name=self.op.instance_name,
8387 disk_template=self.op.disk_template,
8390 vcpus=self.be_full[constants.BE_VCPUS],
8391 memory=self.be_full[constants.BE_MEMORY],
8394 hypervisor=self.op.hypervisor,
8397 ial.Run(self.op.iallocator)
8400 raise errors.OpPrereqError("Can't compute nodes using"
8401 " iallocator '%s': %s" %
8402 (self.op.iallocator, ial.info),
8404 if len(ial.result) != ial.required_nodes:
8405 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8406 " of nodes (%s), required %s" %
8407 (self.op.iallocator, len(ial.result),
8408 ial.required_nodes), errors.ECODE_FAULT)
8409 self.op.pnode = ial.result[0]
8410 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8411 self.op.instance_name, self.op.iallocator,
8412 utils.CommaJoin(ial.result))
8413 if ial.required_nodes == 2:
8414 self.op.snode = ial.result[1]
8416 def BuildHooksEnv(self):
8419 This runs on master, primary and secondary nodes of the instance.
8423 "ADD_MODE": self.op.mode,
8425 if self.op.mode == constants.INSTANCE_IMPORT:
8426 env["SRC_NODE"] = self.op.src_node
8427 env["SRC_PATH"] = self.op.src_path
8428 env["SRC_IMAGES"] = self.src_images
8430 env.update(_BuildInstanceHookEnv(
8431 name=self.op.instance_name,
8432 primary_node=self.op.pnode,
8433 secondary_nodes=self.secondaries,
8434 status=self.op.start,
8435 os_type=self.op.os_type,
8436 memory=self.be_full[constants.BE_MEMORY],
8437 vcpus=self.be_full[constants.BE_VCPUS],
8438 nics=_NICListToTuple(self, self.nics),
8439 disk_template=self.op.disk_template,
8440 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8441 for d in self.disks],
8444 hypervisor_name=self.op.hypervisor,
8450 def BuildHooksNodes(self):
8451 """Build hooks nodes.
8454 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8457 def _ReadExportInfo(self):
8458 """Reads the export information from disk.
8460 It will override the opcode source node and path with the actual
8461 information, if these two were not specified before.
8463 @return: the export information
8466 assert self.op.mode == constants.INSTANCE_IMPORT
8468 src_node = self.op.src_node
8469 src_path = self.op.src_path
8471 if src_node is None:
8472 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8473 exp_list = self.rpc.call_export_list(locked_nodes)
8475 for node in exp_list:
8476 if exp_list[node].fail_msg:
8478 if src_path in exp_list[node].payload:
8480 self.op.src_node = src_node = node
8481 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8485 raise errors.OpPrereqError("No export found for relative path %s" %
8486 src_path, errors.ECODE_INVAL)
8488 _CheckNodeOnline(self, src_node)
8489 result = self.rpc.call_export_info(src_node, src_path)
8490 result.Raise("No export or invalid export found in dir %s" % src_path)
8492 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8493 if not export_info.has_section(constants.INISECT_EXP):
8494 raise errors.ProgrammerError("Corrupted export config",
8495 errors.ECODE_ENVIRON)
8497 ei_version = export_info.get(constants.INISECT_EXP, "version")
8498 if (int(ei_version) != constants.EXPORT_VERSION):
8499 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8500 (ei_version, constants.EXPORT_VERSION),
8501 errors.ECODE_ENVIRON)
8504 def _ReadExportParams(self, einfo):
8505 """Use export parameters as defaults.
8507 In case the opcode doesn't specify (as in override) some instance
8508 parameters, then try to use them from the export information, if
8512 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8514 if self.op.disk_template is None:
8515 if einfo.has_option(constants.INISECT_INS, "disk_template"):
8516 self.op.disk_template = einfo.get(constants.INISECT_INS,
8518 if self.op.disk_template not in constants.DISK_TEMPLATES:
8519 raise errors.OpPrereqError("Disk template specified in configuration"
8520 " file is not one of the allowed values:"
8521 " %s" % " ".join(constants.DISK_TEMPLATES))
8523 raise errors.OpPrereqError("No disk template specified and the export"
8524 " is missing the disk_template information",
8527 if not self.op.disks:
8529 # TODO: import the disk iv_name too
8530 for idx in range(constants.MAX_DISKS):
8531 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
8532 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8533 disks.append({constants.IDISK_SIZE: disk_sz})
8534 self.op.disks = disks
8535 if not disks and self.op.disk_template != constants.DT_DISKLESS:
8536 raise errors.OpPrereqError("No disk info specified and the export"
8537 " is missing the disk information",
8540 if not self.op.nics:
8542 for idx in range(constants.MAX_NICS):
8543 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
8545 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8546 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8553 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8554 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8556 if (self.op.hypervisor is None and
8557 einfo.has_option(constants.INISECT_INS, "hypervisor")):
8558 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8560 if einfo.has_section(constants.INISECT_HYP):
8561 # use the export parameters but do not override the ones
8562 # specified by the user
8563 for name, value in einfo.items(constants.INISECT_HYP):
8564 if name not in self.op.hvparams:
8565 self.op.hvparams[name] = value
8567 if einfo.has_section(constants.INISECT_BEP):
8568 # use the parameters, without overriding
8569 for name, value in einfo.items(constants.INISECT_BEP):
8570 if name not in self.op.beparams:
8571 self.op.beparams[name] = value
8573 # try to read the parameters old style, from the main section
8574 for name in constants.BES_PARAMETERS:
8575 if (name not in self.op.beparams and
8576 einfo.has_option(constants.INISECT_INS, name)):
8577 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8579 if einfo.has_section(constants.INISECT_OSP):
8580 # use the parameters, without overriding
8581 for name, value in einfo.items(constants.INISECT_OSP):
8582 if name not in self.op.osparams:
8583 self.op.osparams[name] = value
8585 def _RevertToDefaults(self, cluster):
8586 """Revert the instance parameters to the default values.
8590 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8591 for name in self.op.hvparams.keys():
8592 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8593 del self.op.hvparams[name]
8595 be_defs = cluster.SimpleFillBE({})
8596 for name in self.op.beparams.keys():
8597 if name in be_defs and be_defs[name] == self.op.beparams[name]:
8598 del self.op.beparams[name]
8600 nic_defs = cluster.SimpleFillNIC({})
8601 for nic in self.op.nics:
8602 for name in constants.NICS_PARAMETERS:
8603 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8606 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8607 for name in self.op.osparams.keys():
8608 if name in os_defs and os_defs[name] == self.op.osparams[name]:
8609 del self.op.osparams[name]
8611 def _CalculateFileStorageDir(self):
8612 """Calculate final instance file storage dir.
8615 # file storage dir calculation/check
8616 self.instance_file_storage_dir = None
8617 if self.op.disk_template in constants.DTS_FILEBASED:
8618 # build the full file storage dir path
8621 if self.op.disk_template == constants.DT_SHARED_FILE:
8622 get_fsd_fn = self.cfg.GetSharedFileStorageDir
8624 get_fsd_fn = self.cfg.GetFileStorageDir
8626 cfg_storagedir = get_fsd_fn()
8627 if not cfg_storagedir:
8628 raise errors.OpPrereqError("Cluster file storage dir not defined")
8629 joinargs.append(cfg_storagedir)
8631 if self.op.file_storage_dir is not None:
8632 joinargs.append(self.op.file_storage_dir)
8634 joinargs.append(self.op.instance_name)
8636 # pylint: disable=W0142
8637 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8639 def CheckPrereq(self):
8640 """Check prerequisites.
8643 self._CalculateFileStorageDir()
8645 if self.op.mode == constants.INSTANCE_IMPORT:
8646 export_info = self._ReadExportInfo()
8647 self._ReadExportParams(export_info)
8649 if (not self.cfg.GetVGName() and
8650 self.op.disk_template not in constants.DTS_NOT_LVM):
8651 raise errors.OpPrereqError("Cluster does not support lvm-based"
8652 " instances", errors.ECODE_STATE)
8654 if (self.op.hypervisor is None or
8655 self.op.hypervisor == constants.VALUE_AUTO):
8656 self.op.hypervisor = self.cfg.GetHypervisorType()
8658 cluster = self.cfg.GetClusterInfo()
8659 enabled_hvs = cluster.enabled_hypervisors
8660 if self.op.hypervisor not in enabled_hvs:
8661 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8662 " cluster (%s)" % (self.op.hypervisor,
8663 ",".join(enabled_hvs)),
8666 # Check tag validity
8667 for tag in self.op.tags:
8668 objects.TaggableObject.ValidateTag(tag)
8670 # check hypervisor parameter syntax (locally)
8671 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8672 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8674 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8675 hv_type.CheckParameterSyntax(filled_hvp)
8676 self.hv_full = filled_hvp
8677 # check that we don't specify global parameters on an instance
8678 _CheckGlobalHvParams(self.op.hvparams)
8680 # fill and remember the beparams dict
8681 default_beparams = cluster.beparams[constants.PP_DEFAULT]
8682 for param, value in self.op.beparams.iteritems():
8683 if value == constants.VALUE_AUTO:
8684 self.op.beparams[param] = default_beparams[param]
8685 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8686 self.be_full = cluster.SimpleFillBE(self.op.beparams)
8688 # build os parameters
8689 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8691 # now that hvp/bep are in final format, let's reset to defaults,
8693 if self.op.identify_defaults:
8694 self._RevertToDefaults(cluster)
8698 for idx, nic in enumerate(self.op.nics):
8699 nic_mode_req = nic.get(constants.INIC_MODE, None)
8700 nic_mode = nic_mode_req
8701 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
8702 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8704 # in routed mode, for the first nic, the default ip is 'auto'
8705 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8706 default_ip_mode = constants.VALUE_AUTO
8708 default_ip_mode = constants.VALUE_NONE
8710 # ip validity checks
8711 ip = nic.get(constants.INIC_IP, default_ip_mode)
8712 if ip is None or ip.lower() == constants.VALUE_NONE:
8714 elif ip.lower() == constants.VALUE_AUTO:
8715 if not self.op.name_check:
8716 raise errors.OpPrereqError("IP address set to auto but name checks"
8717 " have been skipped",
8719 nic_ip = self.hostname1.ip
8721 if not netutils.IPAddress.IsValid(ip):
8722 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8726 # TODO: check the ip address for uniqueness
8727 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8728 raise errors.OpPrereqError("Routed nic mode requires an ip address",
8731 # MAC address verification
8732 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8733 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8734 mac = utils.NormalizeAndValidateMac(mac)
8737 self.cfg.ReserveMAC(mac, self.proc.GetECId())
8738 except errors.ReservationError:
8739 raise errors.OpPrereqError("MAC address %s already in use"
8740 " in cluster" % mac,
8741 errors.ECODE_NOTUNIQUE)
8743 # Build nic parameters
8744 link = nic.get(constants.INIC_LINK, None)
8745 if link == constants.VALUE_AUTO:
8746 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
8749 nicparams[constants.NIC_MODE] = nic_mode
8751 nicparams[constants.NIC_LINK] = link
8753 check_params = cluster.SimpleFillNIC(nicparams)
8754 objects.NIC.CheckParameterSyntax(check_params)
8755 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8757 # disk checks/pre-build
8758 default_vg = self.cfg.GetVGName()
8760 for disk in self.op.disks:
8761 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8762 if mode not in constants.DISK_ACCESS_SET:
8763 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8764 mode, errors.ECODE_INVAL)
8765 size = disk.get(constants.IDISK_SIZE, None)
8767 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8770 except (TypeError, ValueError):
8771 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8774 data_vg = disk.get(constants.IDISK_VG, default_vg)
8776 constants.IDISK_SIZE: size,
8777 constants.IDISK_MODE: mode,
8778 constants.IDISK_VG: data_vg,
8779 constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8781 if constants.IDISK_ADOPT in disk:
8782 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8783 self.disks.append(new_disk)
8785 if self.op.mode == constants.INSTANCE_IMPORT:
8787 for idx in range(len(self.disks)):
8788 option = "disk%d_dump" % idx
8789 if export_info.has_option(constants.INISECT_INS, option):
8790 # FIXME: are the old os-es, disk sizes, etc. useful?
8791 export_name = export_info.get(constants.INISECT_INS, option)
8792 image = utils.PathJoin(self.op.src_path, export_name)
8793 disk_images.append(image)
8795 disk_images.append(False)
8797 self.src_images = disk_images
8799 old_name = export_info.get(constants.INISECT_INS, "name")
8800 if self.op.instance_name == old_name:
8801 for idx, nic in enumerate(self.nics):
8802 if nic.mac == constants.VALUE_AUTO:
8803 nic_mac_ini = "nic%d_mac" % idx
8804 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8806 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8808 # ip ping checks (we use the same ip that was resolved in ExpandNames)
8809 if self.op.ip_check:
8810 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8811 raise errors.OpPrereqError("IP %s of instance %s already in use" %
8812 (self.check_ip, self.op.instance_name),
8813 errors.ECODE_NOTUNIQUE)
8815 #### mac address generation
8816 # By generating here the mac address both the allocator and the hooks get
8817 # the real final mac address rather than the 'auto' or 'generate' value.
8818 # There is a race condition between the generation and the instance object
8819 # creation, which means that we know the mac is valid now, but we're not
8820 # sure it will be when we actually add the instance. If things go bad
8821 # adding the instance will abort because of a duplicate mac, and the
8822 # creation job will fail.
8823 for nic in self.nics:
8824 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8825 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8829 if self.op.iallocator is not None:
8830 self._RunAllocator()
8832 #### node related checks
8834 # check primary node
8835 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8836 assert self.pnode is not None, \
8837 "Cannot retrieve locked node %s" % self.op.pnode
8839 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8840 pnode.name, errors.ECODE_STATE)
8842 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8843 pnode.name, errors.ECODE_STATE)
8844 if not pnode.vm_capable:
8845 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8846 " '%s'" % pnode.name, errors.ECODE_STATE)
8848 self.secondaries = []
8850 # mirror node verification
8851 if self.op.disk_template in constants.DTS_INT_MIRROR:
8852 if self.op.snode == pnode.name:
8853 raise errors.OpPrereqError("The secondary node cannot be the"
8854 " primary node", errors.ECODE_INVAL)
8855 _CheckNodeOnline(self, self.op.snode)
8856 _CheckNodeNotDrained(self, self.op.snode)
8857 _CheckNodeVmCapable(self, self.op.snode)
8858 self.secondaries.append(self.op.snode)
8860 nodenames = [pnode.name] + self.secondaries
8862 if not self.adopt_disks:
8863 # Check lv size requirements, if not adopting
8864 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8865 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8867 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8868 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8869 disk[constants.IDISK_ADOPT])
8870 for disk in self.disks])
8871 if len(all_lvs) != len(self.disks):
8872 raise errors.OpPrereqError("Duplicate volume names given for adoption",
8874 for lv_name in all_lvs:
8876 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8877 # to ReserveLV uses the same syntax
8878 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8879 except errors.ReservationError:
8880 raise errors.OpPrereqError("LV named %s used by another instance" %
8881 lv_name, errors.ECODE_NOTUNIQUE)
8883 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8884 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8886 node_lvs = self.rpc.call_lv_list([pnode.name],
8887 vg_names.payload.keys())[pnode.name]
8888 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8889 node_lvs = node_lvs.payload
8891 delta = all_lvs.difference(node_lvs.keys())
8893 raise errors.OpPrereqError("Missing logical volume(s): %s" %
8894 utils.CommaJoin(delta),
8896 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8898 raise errors.OpPrereqError("Online logical volumes found, cannot"
8899 " adopt: %s" % utils.CommaJoin(online_lvs),
8901 # update the size of disk based on what is found
8902 for dsk in self.disks:
8903 dsk[constants.IDISK_SIZE] = \
8904 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8905 dsk[constants.IDISK_ADOPT])][0]))
8907 elif self.op.disk_template == constants.DT_BLOCK:
8908 # Normalize and de-duplicate device paths
8909 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8910 for disk in self.disks])
8911 if len(all_disks) != len(self.disks):
8912 raise errors.OpPrereqError("Duplicate disk names given for adoption",
8914 baddisks = [d for d in all_disks
8915 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8917 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8918 " cannot be adopted" %
8919 (", ".join(baddisks),
8920 constants.ADOPTABLE_BLOCKDEV_ROOT),
8923 node_disks = self.rpc.call_bdev_sizes([pnode.name],
8924 list(all_disks))[pnode.name]
8925 node_disks.Raise("Cannot get block device information from node %s" %
8927 node_disks = node_disks.payload
8928 delta = all_disks.difference(node_disks.keys())
8930 raise errors.OpPrereqError("Missing block device(s): %s" %
8931 utils.CommaJoin(delta),
8933 for dsk in self.disks:
8934 dsk[constants.IDISK_SIZE] = \
8935 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8937 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8939 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8940 # check OS parameters (remotely)
8941 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8943 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8945 # memory check on primary node
8947 _CheckNodeFreeMemory(self, self.pnode.name,
8948 "creating instance %s" % self.op.instance_name,
8949 self.be_full[constants.BE_MEMORY],
8952 self.dry_run_result = list(nodenames)
8954 def Exec(self, feedback_fn):
8955 """Create and add the instance to the cluster.
8958 instance = self.op.instance_name
8959 pnode_name = self.pnode.name
8961 ht_kind = self.op.hypervisor
8962 if ht_kind in constants.HTS_REQ_PORT:
8963 network_port = self.cfg.AllocatePort()
8967 disks = _GenerateDiskTemplate(self,
8968 self.op.disk_template,
8969 instance, pnode_name,
8972 self.instance_file_storage_dir,
8973 self.op.file_driver,
8977 iobj = objects.Instance(name=instance, os=self.op.os_type,
8978 primary_node=pnode_name,
8979 nics=self.nics, disks=disks,
8980 disk_template=self.op.disk_template,
8982 network_port=network_port,
8983 beparams=self.op.beparams,
8984 hvparams=self.op.hvparams,
8985 hypervisor=self.op.hypervisor,
8986 osparams=self.op.osparams,
8990 for tag in self.op.tags:
8993 if self.adopt_disks:
8994 if self.op.disk_template == constants.DT_PLAIN:
8995 # rename LVs to the newly-generated names; we need to construct
8996 # 'fake' LV disks with the old data, plus the new unique_id
8997 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8999 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9000 rename_to.append(t_dsk.logical_id)
9001 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9002 self.cfg.SetDiskID(t_dsk, pnode_name)
9003 result = self.rpc.call_blockdev_rename(pnode_name,
9004 zip(tmp_disks, rename_to))
9005 result.Raise("Failed to rename adoped LVs")
9007 feedback_fn("* creating instance disks...")
9009 _CreateDisks(self, iobj)
9010 except errors.OpExecError:
9011 self.LogWarning("Device creation failed, reverting...")
9013 _RemoveDisks(self, iobj)
9015 self.cfg.ReleaseDRBDMinors(instance)
9018 feedback_fn("adding instance %s to cluster config" % instance)
9020 self.cfg.AddInstance(iobj, self.proc.GetECId())
9022 # Declare that we don't want to remove the instance lock anymore, as we've
9023 # added the instance to the config
9024 del self.remove_locks[locking.LEVEL_INSTANCE]
9026 if self.op.mode == constants.INSTANCE_IMPORT:
9027 # Release unused nodes
9028 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9031 _ReleaseLocks(self, locking.LEVEL_NODE)
9034 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9035 feedback_fn("* wiping instance disks...")
9037 _WipeDisks(self, iobj)
9038 except errors.OpExecError, err:
9039 logging.exception("Wiping disks failed")
9040 self.LogWarning("Wiping instance disks failed (%s)", err)
9044 # Something is already wrong with the disks, don't do anything else
9046 elif self.op.wait_for_sync:
9047 disk_abort = not _WaitForSync(self, iobj)
9048 elif iobj.disk_template in constants.DTS_INT_MIRROR:
9049 # make sure the disks are not degraded (still sync-ing is ok)
9050 feedback_fn("* checking mirrors status")
9051 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9056 _RemoveDisks(self, iobj)
9057 self.cfg.RemoveInstance(iobj.name)
9058 # Make sure the instance lock gets removed
9059 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9060 raise errors.OpExecError("There are some degraded disks for"
9063 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9064 if self.op.mode == constants.INSTANCE_CREATE:
9065 if not self.op.no_install:
9066 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9067 not self.op.wait_for_sync)
9069 feedback_fn("* pausing disk sync to install instance OS")
9070 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9072 for idx, success in enumerate(result.payload):
9074 logging.warn("pause-sync of instance %s for disk %d failed",
9077 feedback_fn("* running the instance OS create scripts...")
9078 # FIXME: pass debug option from opcode to backend
9080 self.rpc.call_instance_os_add(pnode_name, iobj, False,
9081 self.op.debug_level)
9083 feedback_fn("* resuming disk sync")
9084 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9086 for idx, success in enumerate(result.payload):
9088 logging.warn("resume-sync of instance %s for disk %d failed",
9091 os_add_result.Raise("Could not add os for instance %s"
9092 " on node %s" % (instance, pnode_name))
9094 elif self.op.mode == constants.INSTANCE_IMPORT:
9095 feedback_fn("* running the instance OS import scripts...")
9099 for idx, image in enumerate(self.src_images):
9103 # FIXME: pass debug option from opcode to backend
9104 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9105 constants.IEIO_FILE, (image, ),
9106 constants.IEIO_SCRIPT,
9107 (iobj.disks[idx], idx),
9109 transfers.append(dt)
9112 masterd.instance.TransferInstanceData(self, feedback_fn,
9113 self.op.src_node, pnode_name,
9114 self.pnode.secondary_ip,
9116 if not compat.all(import_result):
9117 self.LogWarning("Some disks for instance %s on node %s were not"
9118 " imported successfully" % (instance, pnode_name))
9120 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9121 feedback_fn("* preparing remote import...")
9122 # The source cluster will stop the instance before attempting to make a
9123 # connection. In some cases stopping an instance can take a long time,
9124 # hence the shutdown timeout is added to the connection timeout.
9125 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9126 self.op.source_shutdown_timeout)
9127 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9129 assert iobj.primary_node == self.pnode.name
9131 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9132 self.source_x509_ca,
9133 self._cds, timeouts)
9134 if not compat.all(disk_results):
9135 # TODO: Should the instance still be started, even if some disks
9136 # failed to import (valid for local imports, too)?
9137 self.LogWarning("Some disks for instance %s on node %s were not"
9138 " imported successfully" % (instance, pnode_name))
9140 # Run rename script on newly imported instance
9141 assert iobj.name == instance
9142 feedback_fn("Running rename script for %s" % instance)
9143 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9144 self.source_instance_name,
9145 self.op.debug_level)
9147 self.LogWarning("Failed to run rename script for %s on node"
9148 " %s: %s" % (instance, pnode_name, result.fail_msg))
9151 # also checked in the prereq part
9152 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9156 iobj.admin_up = True
9157 self.cfg.Update(iobj, feedback_fn)
9158 logging.info("Starting instance %s on node %s", instance, pnode_name)
9159 feedback_fn("* starting instance...")
9160 result = self.rpc.call_instance_start(pnode_name, iobj,
9162 result.Raise("Could not start instance")
9164 return list(iobj.all_nodes)
9167 class LUInstanceConsole(NoHooksLU):
9168 """Connect to an instance's console.
9170 This is somewhat special in that it returns the command line that
9171 you need to run on the master node in order to connect to the
9177 def ExpandNames(self):
9178 self._ExpandAndLockInstance()
9180 def CheckPrereq(self):
9181 """Check prerequisites.
9183 This checks that the instance is in the cluster.
9186 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9187 assert self.instance is not None, \
9188 "Cannot retrieve locked instance %s" % self.op.instance_name
9189 _CheckNodeOnline(self, self.instance.primary_node)
9191 def Exec(self, feedback_fn):
9192 """Connect to the console of an instance
9195 instance = self.instance
9196 node = instance.primary_node
9198 node_insts = self.rpc.call_instance_list([node],
9199 [instance.hypervisor])[node]
9200 node_insts.Raise("Can't get node information from %s" % node)
9202 if instance.name not in node_insts.payload:
9203 if instance.admin_up:
9204 state = constants.INSTST_ERRORDOWN
9206 state = constants.INSTST_ADMINDOWN
9207 raise errors.OpExecError("Instance %s is not running (state %s)" %
9208 (instance.name, state))
9210 logging.debug("Connecting to console of %s on %s", instance.name, node)
9212 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9215 def _GetInstanceConsole(cluster, instance):
9216 """Returns console information for an instance.
9218 @type cluster: L{objects.Cluster}
9219 @type instance: L{objects.Instance}
9223 hyper = hypervisor.GetHypervisor(instance.hypervisor)
9224 # beparams and hvparams are passed separately, to avoid editing the
9225 # instance and then saving the defaults in the instance itself.
9226 hvparams = cluster.FillHV(instance)
9227 beparams = cluster.FillBE(instance)
9228 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9230 assert console.instance == instance.name
9231 assert console.Validate()
9233 return console.ToDict()
9236 class LUInstanceReplaceDisks(LogicalUnit):
9237 """Replace the disks of an instance.
9240 HPATH = "mirrors-replace"
9241 HTYPE = constants.HTYPE_INSTANCE
9244 def CheckArguments(self):
9245 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9248 def ExpandNames(self):
9249 self._ExpandAndLockInstance()
9251 assert locking.LEVEL_NODE not in self.needed_locks
9252 assert locking.LEVEL_NODEGROUP not in self.needed_locks
9254 assert self.op.iallocator is None or self.op.remote_node is None, \
9255 "Conflicting options"
9257 if self.op.remote_node is not None:
9258 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9260 # Warning: do not remove the locking of the new secondary here
9261 # unless DRBD8.AddChildren is changed to work in parallel;
9262 # currently it doesn't since parallel invocations of
9263 # FindUnusedMinor will conflict
9264 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9265 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9267 self.needed_locks[locking.LEVEL_NODE] = []
9268 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9270 if self.op.iallocator is not None:
9271 # iallocator will select a new node in the same group
9272 self.needed_locks[locking.LEVEL_NODEGROUP] = []
9274 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9275 self.op.iallocator, self.op.remote_node,
9276 self.op.disks, False, self.op.early_release)
9278 self.tasklets = [self.replacer]
9280 def DeclareLocks(self, level):
9281 if level == locking.LEVEL_NODEGROUP:
9282 assert self.op.remote_node is None
9283 assert self.op.iallocator is not None
9284 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9286 self.share_locks[locking.LEVEL_NODEGROUP] = 1
9287 self.needed_locks[locking.LEVEL_NODEGROUP] = \
9288 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9290 elif level == locking.LEVEL_NODE:
9291 if self.op.iallocator is not None:
9292 assert self.op.remote_node is None
9293 assert not self.needed_locks[locking.LEVEL_NODE]
9295 # Lock member nodes of all locked groups
9296 self.needed_locks[locking.LEVEL_NODE] = [node_name
9297 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9298 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9300 self._LockInstancesNodes()
9302 def BuildHooksEnv(self):
9305 This runs on the master, the primary and all the secondaries.
9308 instance = self.replacer.instance
9310 "MODE": self.op.mode,
9311 "NEW_SECONDARY": self.op.remote_node,
9312 "OLD_SECONDARY": instance.secondary_nodes[0],
9314 env.update(_BuildInstanceHookEnvByObject(self, instance))
9317 def BuildHooksNodes(self):
9318 """Build hooks nodes.
9321 instance = self.replacer.instance
9323 self.cfg.GetMasterNode(),
9324 instance.primary_node,
9326 if self.op.remote_node is not None:
9327 nl.append(self.op.remote_node)
9330 def CheckPrereq(self):
9331 """Check prerequisites.
9334 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9335 self.op.iallocator is None)
9337 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9339 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9341 return LogicalUnit.CheckPrereq(self)
9344 class TLReplaceDisks(Tasklet):
9345 """Replaces disks for an instance.
9347 Note: Locking is not within the scope of this class.
9350 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9351 disks, delay_iallocator, early_release):
9352 """Initializes this class.
9355 Tasklet.__init__(self, lu)
9358 self.instance_name = instance_name
9360 self.iallocator_name = iallocator_name
9361 self.remote_node = remote_node
9363 self.delay_iallocator = delay_iallocator
9364 self.early_release = early_release
9367 self.instance = None
9368 self.new_node = None
9369 self.target_node = None
9370 self.other_node = None
9371 self.remote_node_info = None
9372 self.node_secondary_ip = None
9375 def CheckArguments(mode, remote_node, iallocator):
9376 """Helper function for users of this class.
9379 # check for valid parameter combination
9380 if mode == constants.REPLACE_DISK_CHG:
9381 if remote_node is None and iallocator is None:
9382 raise errors.OpPrereqError("When changing the secondary either an"
9383 " iallocator script must be used or the"
9384 " new node given", errors.ECODE_INVAL)
9386 if remote_node is not None and iallocator is not None:
9387 raise errors.OpPrereqError("Give either the iallocator or the new"
9388 " secondary, not both", errors.ECODE_INVAL)
9390 elif remote_node is not None or iallocator is not None:
9391 # Not replacing the secondary
9392 raise errors.OpPrereqError("The iallocator and new node options can"
9393 " only be used when changing the"
9394 " secondary node", errors.ECODE_INVAL)
9397 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9398 """Compute a new secondary node using an IAllocator.
9401 ial = IAllocator(lu.cfg, lu.rpc,
9402 mode=constants.IALLOCATOR_MODE_RELOC,
9404 relocate_from=list(relocate_from))
9406 ial.Run(iallocator_name)
9409 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9410 " %s" % (iallocator_name, ial.info),
9413 if len(ial.result) != ial.required_nodes:
9414 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9415 " of nodes (%s), required %s" %
9417 len(ial.result), ial.required_nodes),
9420 remote_node_name = ial.result[0]
9422 lu.LogInfo("Selected new secondary for instance '%s': %s",
9423 instance_name, remote_node_name)
9425 return remote_node_name
9427 def _FindFaultyDisks(self, node_name):
9428 """Wrapper for L{_FindFaultyInstanceDisks}.
9431 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9434 def _CheckDisksActivated(self, instance):
9435 """Checks if the instance disks are activated.
9437 @param instance: The instance to check disks
9438 @return: True if they are activated, False otherwise
9441 nodes = instance.all_nodes
9443 for idx, dev in enumerate(instance.disks):
9445 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9446 self.cfg.SetDiskID(dev, node)
9448 result = self.rpc.call_blockdev_find(node, dev)
9452 elif result.fail_msg or not result.payload:
9457 def CheckPrereq(self):
9458 """Check prerequisites.
9460 This checks that the instance is in the cluster.
9463 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9464 assert instance is not None, \
9465 "Cannot retrieve locked instance %s" % self.instance_name
9467 if instance.disk_template != constants.DT_DRBD8:
9468 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9469 " instances", errors.ECODE_INVAL)
9471 if len(instance.secondary_nodes) != 1:
9472 raise errors.OpPrereqError("The instance has a strange layout,"
9473 " expected one secondary but found %d" %
9474 len(instance.secondary_nodes),
9477 if not self.delay_iallocator:
9478 self._CheckPrereq2()
9480 def _CheckPrereq2(self):
9481 """Check prerequisites, second part.
9483 This function should always be part of CheckPrereq. It was separated and is
9484 now called from Exec because during node evacuation iallocator was only
9485 called with an unmodified cluster model, not taking planned changes into
9489 instance = self.instance
9490 secondary_node = instance.secondary_nodes[0]
9492 if self.iallocator_name is None:
9493 remote_node = self.remote_node
9495 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9496 instance.name, instance.secondary_nodes)
9498 if remote_node is None:
9499 self.remote_node_info = None
9501 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9502 "Remote node '%s' is not locked" % remote_node
9504 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9505 assert self.remote_node_info is not None, \
9506 "Cannot retrieve locked node %s" % remote_node
9508 if remote_node == self.instance.primary_node:
9509 raise errors.OpPrereqError("The specified node is the primary node of"
9510 " the instance", errors.ECODE_INVAL)
9512 if remote_node == secondary_node:
9513 raise errors.OpPrereqError("The specified node is already the"
9514 " secondary node of the instance",
9517 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9518 constants.REPLACE_DISK_CHG):
9519 raise errors.OpPrereqError("Cannot specify disks to be replaced",
9522 if self.mode == constants.REPLACE_DISK_AUTO:
9523 if not self._CheckDisksActivated(instance):
9524 raise errors.OpPrereqError("Please run activate-disks on instance %s"
9525 " first" % self.instance_name,
9527 faulty_primary = self._FindFaultyDisks(instance.primary_node)
9528 faulty_secondary = self._FindFaultyDisks(secondary_node)
9530 if faulty_primary and faulty_secondary:
9531 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9532 " one node and can not be repaired"
9533 " automatically" % self.instance_name,
9537 self.disks = faulty_primary
9538 self.target_node = instance.primary_node
9539 self.other_node = secondary_node
9540 check_nodes = [self.target_node, self.other_node]
9541 elif faulty_secondary:
9542 self.disks = faulty_secondary
9543 self.target_node = secondary_node
9544 self.other_node = instance.primary_node
9545 check_nodes = [self.target_node, self.other_node]
9551 # Non-automatic modes
9552 if self.mode == constants.REPLACE_DISK_PRI:
9553 self.target_node = instance.primary_node
9554 self.other_node = secondary_node
9555 check_nodes = [self.target_node, self.other_node]
9557 elif self.mode == constants.REPLACE_DISK_SEC:
9558 self.target_node = secondary_node
9559 self.other_node = instance.primary_node
9560 check_nodes = [self.target_node, self.other_node]
9562 elif self.mode == constants.REPLACE_DISK_CHG:
9563 self.new_node = remote_node
9564 self.other_node = instance.primary_node
9565 self.target_node = secondary_node
9566 check_nodes = [self.new_node, self.other_node]
9568 _CheckNodeNotDrained(self.lu, remote_node)
9569 _CheckNodeVmCapable(self.lu, remote_node)
9571 old_node_info = self.cfg.GetNodeInfo(secondary_node)
9572 assert old_node_info is not None
9573 if old_node_info.offline and not self.early_release:
9574 # doesn't make sense to delay the release
9575 self.early_release = True
9576 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9577 " early-release mode", secondary_node)
9580 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9583 # If not specified all disks should be replaced
9585 self.disks = range(len(self.instance.disks))
9587 for node in check_nodes:
9588 _CheckNodeOnline(self.lu, node)
9590 touched_nodes = frozenset(node_name for node_name in [self.new_node,
9593 if node_name is not None)
9595 # Release unneeded node locks
9596 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9598 # Release any owned node group
9599 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9600 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9602 # Check whether disks are valid
9603 for disk_idx in self.disks:
9604 instance.FindDisk(disk_idx)
9606 # Get secondary node IP addresses
9607 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9608 in self.cfg.GetMultiNodeInfo(touched_nodes))
9610 def Exec(self, feedback_fn):
9611 """Execute disk replacement.
9613 This dispatches the disk replacement to the appropriate handler.
9616 if self.delay_iallocator:
9617 self._CheckPrereq2()
9620 # Verify owned locks before starting operation
9621 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9622 assert set(owned_nodes) == set(self.node_secondary_ip), \
9623 ("Incorrect node locks, owning %s, expected %s" %
9624 (owned_nodes, self.node_secondary_ip.keys()))
9626 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9627 assert list(owned_instances) == [self.instance_name], \
9628 "Instance '%s' not locked" % self.instance_name
9630 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9631 "Should not own any node group lock at this point"
9634 feedback_fn("No disks need replacement")
9637 feedback_fn("Replacing disk(s) %s for %s" %
9638 (utils.CommaJoin(self.disks), self.instance.name))
9640 activate_disks = (not self.instance.admin_up)
9642 # Activate the instance disks if we're replacing them on a down instance
9644 _StartInstanceDisks(self.lu, self.instance, True)
9647 # Should we replace the secondary node?
9648 if self.new_node is not None:
9649 fn = self._ExecDrbd8Secondary
9651 fn = self._ExecDrbd8DiskOnly
9653 result = fn(feedback_fn)
9655 # Deactivate the instance disks if we're replacing them on a
9658 _SafeShutdownInstanceDisks(self.lu, self.instance)
9661 # Verify owned locks
9662 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9663 nodes = frozenset(self.node_secondary_ip)
9664 assert ((self.early_release and not owned_nodes) or
9665 (not self.early_release and not (set(owned_nodes) - nodes))), \
9666 ("Not owning the correct locks, early_release=%s, owned=%r,"
9667 " nodes=%r" % (self.early_release, owned_nodes, nodes))
9671 def _CheckVolumeGroup(self, nodes):
9672 self.lu.LogInfo("Checking volume groups")
9674 vgname = self.cfg.GetVGName()
9676 # Make sure volume group exists on all involved nodes
9677 results = self.rpc.call_vg_list(nodes)
9679 raise errors.OpExecError("Can't list volume groups on the nodes")
9683 res.Raise("Error checking node %s" % node)
9684 if vgname not in res.payload:
9685 raise errors.OpExecError("Volume group '%s' not found on node %s" %
9688 def _CheckDisksExistence(self, nodes):
9689 # Check disk existence
9690 for idx, dev in enumerate(self.instance.disks):
9691 if idx not in self.disks:
9695 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9696 self.cfg.SetDiskID(dev, node)
9698 result = self.rpc.call_blockdev_find(node, dev)
9700 msg = result.fail_msg
9701 if msg or not result.payload:
9703 msg = "disk not found"
9704 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9707 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9708 for idx, dev in enumerate(self.instance.disks):
9709 if idx not in self.disks:
9712 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9715 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9717 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9718 " replace disks for instance %s" %
9719 (node_name, self.instance.name))
9721 def _CreateNewStorage(self, node_name):
9722 """Create new storage on the primary or secondary node.
9724 This is only used for same-node replaces, not for changing the
9725 secondary node, hence we don't want to modify the existing disk.
9730 for idx, dev in enumerate(self.instance.disks):
9731 if idx not in self.disks:
9734 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9736 self.cfg.SetDiskID(dev, node_name)
9738 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9739 names = _GenerateUniqueNames(self.lu, lv_names)
9741 vg_data = dev.children[0].logical_id[0]
9742 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9743 logical_id=(vg_data, names[0]))
9744 vg_meta = dev.children[1].logical_id[0]
9745 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9746 logical_id=(vg_meta, names[1]))
9748 new_lvs = [lv_data, lv_meta]
9749 old_lvs = [child.Copy() for child in dev.children]
9750 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9752 # we pass force_create=True to force the LVM creation
9753 for new_lv in new_lvs:
9754 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9755 _GetInstanceInfoText(self.instance), False)
9759 def _CheckDevices(self, node_name, iv_names):
9760 for name, (dev, _, _) in iv_names.iteritems():
9761 self.cfg.SetDiskID(dev, node_name)
9763 result = self.rpc.call_blockdev_find(node_name, dev)
9765 msg = result.fail_msg
9766 if msg or not result.payload:
9768 msg = "disk not found"
9769 raise errors.OpExecError("Can't find DRBD device %s: %s" %
9772 if result.payload.is_degraded:
9773 raise errors.OpExecError("DRBD device %s is degraded!" % name)
9775 def _RemoveOldStorage(self, node_name, iv_names):
9776 for name, (_, old_lvs, _) in iv_names.iteritems():
9777 self.lu.LogInfo("Remove logical volumes for %s" % name)
9780 self.cfg.SetDiskID(lv, node_name)
9782 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9784 self.lu.LogWarning("Can't remove old LV: %s" % msg,
9785 hint="remove unused LVs manually")
9787 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9788 """Replace a disk on the primary or secondary for DRBD 8.
9790 The algorithm for replace is quite complicated:
9792 1. for each disk to be replaced:
9794 1. create new LVs on the target node with unique names
9795 1. detach old LVs from the drbd device
9796 1. rename old LVs to name_replaced.<time_t>
9797 1. rename new LVs to old LVs
9798 1. attach the new LVs (with the old names now) to the drbd device
9800 1. wait for sync across all devices
9802 1. for each modified disk:
9804 1. remove old LVs (which have the name name_replaces.<time_t>)
9806 Failures are not very well handled.
9811 # Step: check device activation
9812 self.lu.LogStep(1, steps_total, "Check device existence")
9813 self._CheckDisksExistence([self.other_node, self.target_node])
9814 self._CheckVolumeGroup([self.target_node, self.other_node])
9816 # Step: check other node consistency
9817 self.lu.LogStep(2, steps_total, "Check peer consistency")
9818 self._CheckDisksConsistency(self.other_node,
9819 self.other_node == self.instance.primary_node,
9822 # Step: create new storage
9823 self.lu.LogStep(3, steps_total, "Allocate new storage")
9824 iv_names = self._CreateNewStorage(self.target_node)
9826 # Step: for each lv, detach+rename*2+attach
9827 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9828 for dev, old_lvs, new_lvs in iv_names.itervalues():
9829 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9831 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9833 result.Raise("Can't detach drbd from local storage on node"
9834 " %s for device %s" % (self.target_node, dev.iv_name))
9836 #cfg.Update(instance)
9838 # ok, we created the new LVs, so now we know we have the needed
9839 # storage; as such, we proceed on the target node to rename
9840 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9841 # using the assumption that logical_id == physical_id (which in
9842 # turn is the unique_id on that node)
9844 # FIXME(iustin): use a better name for the replaced LVs
9845 temp_suffix = int(time.time())
9846 ren_fn = lambda d, suff: (d.physical_id[0],
9847 d.physical_id[1] + "_replaced-%s" % suff)
9849 # Build the rename list based on what LVs exist on the node
9850 rename_old_to_new = []
9851 for to_ren in old_lvs:
9852 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9853 if not result.fail_msg and result.payload:
9855 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9857 self.lu.LogInfo("Renaming the old LVs on the target node")
9858 result = self.rpc.call_blockdev_rename(self.target_node,
9860 result.Raise("Can't rename old LVs on node %s" % self.target_node)
9862 # Now we rename the new LVs to the old LVs
9863 self.lu.LogInfo("Renaming the new LVs on the target node")
9864 rename_new_to_old = [(new, old.physical_id)
9865 for old, new in zip(old_lvs, new_lvs)]
9866 result = self.rpc.call_blockdev_rename(self.target_node,
9868 result.Raise("Can't rename new LVs on node %s" % self.target_node)
9870 # Intermediate steps of in memory modifications
9871 for old, new in zip(old_lvs, new_lvs):
9872 new.logical_id = old.logical_id
9873 self.cfg.SetDiskID(new, self.target_node)
9875 # We need to modify old_lvs so that removal later removes the
9876 # right LVs, not the newly added ones; note that old_lvs is a
9878 for disk in old_lvs:
9879 disk.logical_id = ren_fn(disk, temp_suffix)
9880 self.cfg.SetDiskID(disk, self.target_node)
9882 # Now that the new lvs have the old name, we can add them to the device
9883 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9884 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9886 msg = result.fail_msg
9888 for new_lv in new_lvs:
9889 msg2 = self.rpc.call_blockdev_remove(self.target_node,
9892 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9893 hint=("cleanup manually the unused logical"
9895 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9898 if self.early_release:
9899 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9901 self._RemoveOldStorage(self.target_node, iv_names)
9902 # WARNING: we release both node locks here, do not do other RPCs
9903 # than WaitForSync to the primary node
9904 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9905 names=[self.target_node, self.other_node])
9908 # This can fail as the old devices are degraded and _WaitForSync
9909 # does a combined result over all disks, so we don't check its return value
9910 self.lu.LogStep(cstep, steps_total, "Sync devices")
9912 _WaitForSync(self.lu, self.instance)
9914 # Check all devices manually
9915 self._CheckDevices(self.instance.primary_node, iv_names)
9917 # Step: remove old storage
9918 if not self.early_release:
9919 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9921 self._RemoveOldStorage(self.target_node, iv_names)
9923 def _ExecDrbd8Secondary(self, feedback_fn):
9924 """Replace the secondary node for DRBD 8.
9926 The algorithm for replace is quite complicated:
9927 - for all disks of the instance:
9928 - create new LVs on the new node with same names
9929 - shutdown the drbd device on the old secondary
9930 - disconnect the drbd network on the primary
9931 - create the drbd device on the new secondary
9932 - network attach the drbd on the primary, using an artifice:
9933 the drbd code for Attach() will connect to the network if it
9934 finds a device which is connected to the good local disks but
9936 - wait for sync across all devices
9937 - remove all disks from the old secondary
9939 Failures are not very well handled.
9944 pnode = self.instance.primary_node
9946 # Step: check device activation
9947 self.lu.LogStep(1, steps_total, "Check device existence")
9948 self._CheckDisksExistence([self.instance.primary_node])
9949 self._CheckVolumeGroup([self.instance.primary_node])
9951 # Step: check other node consistency
9952 self.lu.LogStep(2, steps_total, "Check peer consistency")
9953 self._CheckDisksConsistency(self.instance.primary_node, True, True)
9955 # Step: create new storage
9956 self.lu.LogStep(3, steps_total, "Allocate new storage")
9957 for idx, dev in enumerate(self.instance.disks):
9958 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9959 (self.new_node, idx))
9960 # we pass force_create=True to force LVM creation
9961 for new_lv in dev.children:
9962 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9963 _GetInstanceInfoText(self.instance), False)
9965 # Step 4: dbrd minors and drbd setups changes
9966 # after this, we must manually remove the drbd minors on both the
9967 # error and the success paths
9968 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9969 minors = self.cfg.AllocateDRBDMinor([self.new_node
9970 for dev in self.instance.disks],
9972 logging.debug("Allocated minors %r", minors)
9975 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9976 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9977 (self.new_node, idx))
9978 # create new devices on new_node; note that we create two IDs:
9979 # one without port, so the drbd will be activated without
9980 # networking information on the new node at this stage, and one
9981 # with network, for the latter activation in step 4
9982 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9983 if self.instance.primary_node == o_node1:
9986 assert self.instance.primary_node == o_node2, "Three-node instance?"
9989 new_alone_id = (self.instance.primary_node, self.new_node, None,
9990 p_minor, new_minor, o_secret)
9991 new_net_id = (self.instance.primary_node, self.new_node, o_port,
9992 p_minor, new_minor, o_secret)
9994 iv_names[idx] = (dev, dev.children, new_net_id)
9995 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9997 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9998 logical_id=new_alone_id,
9999 children=dev.children,
10002 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10003 _GetInstanceInfoText(self.instance), False)
10004 except errors.GenericError:
10005 self.cfg.ReleaseDRBDMinors(self.instance.name)
10008 # We have new devices, shutdown the drbd on the old secondary
10009 for idx, dev in enumerate(self.instance.disks):
10010 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10011 self.cfg.SetDiskID(dev, self.target_node)
10012 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10014 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10015 "node: %s" % (idx, msg),
10016 hint=("Please cleanup this device manually as"
10017 " soon as possible"))
10019 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10020 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10021 self.instance.disks)[pnode]
10023 msg = result.fail_msg
10025 # detaches didn't succeed (unlikely)
10026 self.cfg.ReleaseDRBDMinors(self.instance.name)
10027 raise errors.OpExecError("Can't detach the disks from the network on"
10028 " old node: %s" % (msg,))
10030 # if we managed to detach at least one, we update all the disks of
10031 # the instance to point to the new secondary
10032 self.lu.LogInfo("Updating instance configuration")
10033 for dev, _, new_logical_id in iv_names.itervalues():
10034 dev.logical_id = new_logical_id
10035 self.cfg.SetDiskID(dev, self.instance.primary_node)
10037 self.cfg.Update(self.instance, feedback_fn)
10039 # and now perform the drbd attach
10040 self.lu.LogInfo("Attaching primary drbds to new secondary"
10041 " (standalone => connected)")
10042 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10044 self.node_secondary_ip,
10045 self.instance.disks,
10046 self.instance.name,
10048 for to_node, to_result in result.items():
10049 msg = to_result.fail_msg
10051 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10053 hint=("please do a gnt-instance info to see the"
10054 " status of disks"))
10056 if self.early_release:
10057 self.lu.LogStep(cstep, steps_total, "Removing old storage")
10059 self._RemoveOldStorage(self.target_node, iv_names)
10060 # WARNING: we release all node locks here, do not do other RPCs
10061 # than WaitForSync to the primary node
10062 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10063 names=[self.instance.primary_node,
10068 # This can fail as the old devices are degraded and _WaitForSync
10069 # does a combined result over all disks, so we don't check its return value
10070 self.lu.LogStep(cstep, steps_total, "Sync devices")
10072 _WaitForSync(self.lu, self.instance)
10074 # Check all devices manually
10075 self._CheckDevices(self.instance.primary_node, iv_names)
10077 # Step: remove old storage
10078 if not self.early_release:
10079 self.lu.LogStep(cstep, steps_total, "Removing old storage")
10080 self._RemoveOldStorage(self.target_node, iv_names)
10083 class LURepairNodeStorage(NoHooksLU):
10084 """Repairs the volume group on a node.
10089 def CheckArguments(self):
10090 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10092 storage_type = self.op.storage_type
10094 if (constants.SO_FIX_CONSISTENCY not in
10095 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10096 raise errors.OpPrereqError("Storage units of type '%s' can not be"
10097 " repaired" % storage_type,
10098 errors.ECODE_INVAL)
10100 def ExpandNames(self):
10101 self.needed_locks = {
10102 locking.LEVEL_NODE: [self.op.node_name],
10105 def _CheckFaultyDisks(self, instance, node_name):
10106 """Ensure faulty disks abort the opcode or at least warn."""
10108 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10110 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10111 " node '%s'" % (instance.name, node_name),
10112 errors.ECODE_STATE)
10113 except errors.OpPrereqError, err:
10114 if self.op.ignore_consistency:
10115 self.proc.LogWarning(str(err.args[0]))
10119 def CheckPrereq(self):
10120 """Check prerequisites.
10123 # Check whether any instance on this node has faulty disks
10124 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10125 if not inst.admin_up:
10127 check_nodes = set(inst.all_nodes)
10128 check_nodes.discard(self.op.node_name)
10129 for inst_node_name in check_nodes:
10130 self._CheckFaultyDisks(inst, inst_node_name)
10132 def Exec(self, feedback_fn):
10133 feedback_fn("Repairing storage unit '%s' on %s ..." %
10134 (self.op.name, self.op.node_name))
10136 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10137 result = self.rpc.call_storage_execute(self.op.node_name,
10138 self.op.storage_type, st_args,
10140 constants.SO_FIX_CONSISTENCY)
10141 result.Raise("Failed to repair storage unit '%s' on %s" %
10142 (self.op.name, self.op.node_name))
10145 class LUNodeEvacuate(NoHooksLU):
10146 """Evacuates instances off a list of nodes.
10151 def CheckArguments(self):
10152 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10154 def ExpandNames(self):
10155 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10157 if self.op.remote_node is not None:
10158 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10159 assert self.op.remote_node
10161 if self.op.remote_node == self.op.node_name:
10162 raise errors.OpPrereqError("Can not use evacuated node as a new"
10163 " secondary node", errors.ECODE_INVAL)
10165 if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10166 raise errors.OpPrereqError("Without the use of an iallocator only"
10167 " secondary instances can be evacuated",
10168 errors.ECODE_INVAL)
10171 self.share_locks = _ShareAll()
10172 self.needed_locks = {
10173 locking.LEVEL_INSTANCE: [],
10174 locking.LEVEL_NODEGROUP: [],
10175 locking.LEVEL_NODE: [],
10178 if self.op.remote_node is None:
10179 # Iallocator will choose any node(s) in the same group
10180 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10182 group_nodes = frozenset([self.op.remote_node])
10184 # Determine nodes to be locked
10185 self.lock_nodes = set([self.op.node_name]) | group_nodes
10187 def _DetermineInstances(self):
10188 """Builds list of instances to operate on.
10191 assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10193 if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10194 # Primary instances only
10195 inst_fn = _GetNodePrimaryInstances
10196 assert self.op.remote_node is None, \
10197 "Evacuating primary instances requires iallocator"
10198 elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10199 # Secondary instances only
10200 inst_fn = _GetNodeSecondaryInstances
10203 assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10204 inst_fn = _GetNodeInstances
10206 return inst_fn(self.cfg, self.op.node_name)
10208 def DeclareLocks(self, level):
10209 if level == locking.LEVEL_INSTANCE:
10210 # Lock instances optimistically, needs verification once node and group
10211 # locks have been acquired
10212 self.needed_locks[locking.LEVEL_INSTANCE] = \
10213 set(i.name for i in self._DetermineInstances())
10215 elif level == locking.LEVEL_NODEGROUP:
10216 # Lock node groups optimistically, needs verification once nodes have
10218 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10219 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10221 elif level == locking.LEVEL_NODE:
10222 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10224 def CheckPrereq(self):
10226 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10227 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10228 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10230 assert owned_nodes == self.lock_nodes
10232 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10233 if owned_groups != wanted_groups:
10234 raise errors.OpExecError("Node groups changed since locks were acquired,"
10235 " current groups are '%s', used to be '%s'" %
10236 (utils.CommaJoin(wanted_groups),
10237 utils.CommaJoin(owned_groups)))
10239 # Determine affected instances
10240 self.instances = self._DetermineInstances()
10241 self.instance_names = [i.name for i in self.instances]
10243 if set(self.instance_names) != owned_instances:
10244 raise errors.OpExecError("Instances on node '%s' changed since locks"
10245 " were acquired, current instances are '%s',"
10246 " used to be '%s'" %
10247 (self.op.node_name,
10248 utils.CommaJoin(self.instance_names),
10249 utils.CommaJoin(owned_instances)))
10251 if self.instance_names:
10252 self.LogInfo("Evacuating instances from node '%s': %s",
10254 utils.CommaJoin(utils.NiceSort(self.instance_names)))
10256 self.LogInfo("No instances to evacuate from node '%s'",
10259 if self.op.remote_node is not None:
10260 for i in self.instances:
10261 if i.primary_node == self.op.remote_node:
10262 raise errors.OpPrereqError("Node %s is the primary node of"
10263 " instance %s, cannot use it as"
10265 (self.op.remote_node, i.name),
10266 errors.ECODE_INVAL)
10268 def Exec(self, feedback_fn):
10269 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10271 if not self.instance_names:
10272 # No instances to evacuate
10275 elif self.op.iallocator is not None:
10276 # TODO: Implement relocation to other group
10277 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10278 evac_mode=self.op.mode,
10279 instances=list(self.instance_names))
10281 ial.Run(self.op.iallocator)
10283 if not ial.success:
10284 raise errors.OpPrereqError("Can't compute node evacuation using"
10285 " iallocator '%s': %s" %
10286 (self.op.iallocator, ial.info),
10287 errors.ECODE_NORES)
10289 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10291 elif self.op.remote_node is not None:
10292 assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10294 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10295 remote_node=self.op.remote_node,
10297 mode=constants.REPLACE_DISK_CHG,
10298 early_release=self.op.early_release)]
10299 for instance_name in self.instance_names
10303 raise errors.ProgrammerError("No iallocator or remote node")
10305 return ResultWithJobs(jobs)
10308 def _SetOpEarlyRelease(early_release, op):
10309 """Sets C{early_release} flag on opcodes if available.
10313 op.early_release = early_release
10314 except AttributeError:
10315 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10320 def _NodeEvacDest(use_nodes, group, nodes):
10321 """Returns group or nodes depending on caller's choice.
10325 return utils.CommaJoin(nodes)
10330 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10331 """Unpacks the result of change-group and node-evacuate iallocator requests.
10333 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10334 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10336 @type lu: L{LogicalUnit}
10337 @param lu: Logical unit instance
10338 @type alloc_result: tuple/list
10339 @param alloc_result: Result from iallocator
10340 @type early_release: bool
10341 @param early_release: Whether to release locks early if possible
10342 @type use_nodes: bool
10343 @param use_nodes: Whether to display node names instead of groups
10346 (moved, failed, jobs) = alloc_result
10349 lu.LogWarning("Unable to evacuate instances %s",
10350 utils.CommaJoin("%s (%s)" % (name, reason)
10351 for (name, reason) in failed))
10354 lu.LogInfo("Instances to be moved: %s",
10355 utils.CommaJoin("%s (to %s)" %
10356 (name, _NodeEvacDest(use_nodes, group, nodes))
10357 for (name, group, nodes) in moved))
10359 return [map(compat.partial(_SetOpEarlyRelease, early_release),
10360 map(opcodes.OpCode.LoadOpCode, ops))
10364 class LUInstanceGrowDisk(LogicalUnit):
10365 """Grow a disk of an instance.
10368 HPATH = "disk-grow"
10369 HTYPE = constants.HTYPE_INSTANCE
10372 def ExpandNames(self):
10373 self._ExpandAndLockInstance()
10374 self.needed_locks[locking.LEVEL_NODE] = []
10375 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10377 def DeclareLocks(self, level):
10378 if level == locking.LEVEL_NODE:
10379 self._LockInstancesNodes()
10381 def BuildHooksEnv(self):
10382 """Build hooks env.
10384 This runs on the master, the primary and all the secondaries.
10388 "DISK": self.op.disk,
10389 "AMOUNT": self.op.amount,
10391 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10394 def BuildHooksNodes(self):
10395 """Build hooks nodes.
10398 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10401 def CheckPrereq(self):
10402 """Check prerequisites.
10404 This checks that the instance is in the cluster.
10407 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10408 assert instance is not None, \
10409 "Cannot retrieve locked instance %s" % self.op.instance_name
10410 nodenames = list(instance.all_nodes)
10411 for node in nodenames:
10412 _CheckNodeOnline(self, node)
10414 self.instance = instance
10416 if instance.disk_template not in constants.DTS_GROWABLE:
10417 raise errors.OpPrereqError("Instance's disk layout does not support"
10418 " growing", errors.ECODE_INVAL)
10420 self.disk = instance.FindDisk(self.op.disk)
10422 if instance.disk_template not in (constants.DT_FILE,
10423 constants.DT_SHARED_FILE):
10424 # TODO: check the free disk space for file, when that feature will be
10426 _CheckNodesFreeDiskPerVG(self, nodenames,
10427 self.disk.ComputeGrowth(self.op.amount))
10429 def Exec(self, feedback_fn):
10430 """Execute disk grow.
10433 instance = self.instance
10436 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10438 raise errors.OpExecError("Cannot activate block device to grow")
10440 # First run all grow ops in dry-run mode
10441 for node in instance.all_nodes:
10442 self.cfg.SetDiskID(disk, node)
10443 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10444 result.Raise("Grow request failed to node %s" % node)
10446 # We know that (as far as we can test) operations across different
10447 # nodes will succeed, time to run it for real
10448 for node in instance.all_nodes:
10449 self.cfg.SetDiskID(disk, node)
10450 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10451 result.Raise("Grow request failed to node %s" % node)
10453 # TODO: Rewrite code to work properly
10454 # DRBD goes into sync mode for a short amount of time after executing the
10455 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10456 # calling "resize" in sync mode fails. Sleeping for a short amount of
10457 # time is a work-around.
10460 disk.RecordGrow(self.op.amount)
10461 self.cfg.Update(instance, feedback_fn)
10462 if self.op.wait_for_sync:
10463 disk_abort = not _WaitForSync(self, instance, disks=[disk])
10465 self.proc.LogWarning("Disk sync-ing has not returned a good"
10466 " status; please check the instance")
10467 if not instance.admin_up:
10468 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10469 elif not instance.admin_up:
10470 self.proc.LogWarning("Not shutting down the disk even if the instance is"
10471 " not supposed to be running because no wait for"
10472 " sync mode was requested")
10475 class LUInstanceQueryData(NoHooksLU):
10476 """Query runtime instance data.
10481 def ExpandNames(self):
10482 self.needed_locks = {}
10484 # Use locking if requested or when non-static information is wanted
10485 if not (self.op.static or self.op.use_locking):
10486 self.LogWarning("Non-static data requested, locks need to be acquired")
10487 self.op.use_locking = True
10489 if self.op.instances or not self.op.use_locking:
10490 # Expand instance names right here
10491 self.wanted_names = _GetWantedInstances(self, self.op.instances)
10493 # Will use acquired locks
10494 self.wanted_names = None
10496 if self.op.use_locking:
10497 self.share_locks = _ShareAll()
10499 if self.wanted_names is None:
10500 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10502 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10504 self.needed_locks[locking.LEVEL_NODE] = []
10505 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10507 def DeclareLocks(self, level):
10508 if self.op.use_locking and level == locking.LEVEL_NODE:
10509 self._LockInstancesNodes()
10511 def CheckPrereq(self):
10512 """Check prerequisites.
10514 This only checks the optional instance list against the existing names.
10517 if self.wanted_names is None:
10518 assert self.op.use_locking, "Locking was not used"
10519 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10521 self.wanted_instances = \
10522 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10524 def _ComputeBlockdevStatus(self, node, instance_name, dev):
10525 """Returns the status of a block device
10528 if self.op.static or not node:
10531 self.cfg.SetDiskID(dev, node)
10533 result = self.rpc.call_blockdev_find(node, dev)
10537 result.Raise("Can't compute disk status for %s" % instance_name)
10539 status = result.payload
10543 return (status.dev_path, status.major, status.minor,
10544 status.sync_percent, status.estimated_time,
10545 status.is_degraded, status.ldisk_status)
10547 def _ComputeDiskStatus(self, instance, snode, dev):
10548 """Compute block device status.
10551 if dev.dev_type in constants.LDS_DRBD:
10552 # we change the snode then (otherwise we use the one passed in)
10553 if dev.logical_id[0] == instance.primary_node:
10554 snode = dev.logical_id[1]
10556 snode = dev.logical_id[0]
10558 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10559 instance.name, dev)
10560 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10563 dev_children = map(compat.partial(self._ComputeDiskStatus,
10570 "iv_name": dev.iv_name,
10571 "dev_type": dev.dev_type,
10572 "logical_id": dev.logical_id,
10573 "physical_id": dev.physical_id,
10574 "pstatus": dev_pstatus,
10575 "sstatus": dev_sstatus,
10576 "children": dev_children,
10581 def Exec(self, feedback_fn):
10582 """Gather and return data"""
10585 cluster = self.cfg.GetClusterInfo()
10587 pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10588 for i in self.wanted_instances)
10589 for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10590 if self.op.static or pnode.offline:
10591 remote_state = None
10593 self.LogWarning("Primary node %s is marked offline, returning static"
10594 " information only for instance %s" %
10595 (pnode.name, instance.name))
10597 remote_info = self.rpc.call_instance_info(instance.primary_node,
10599 instance.hypervisor)
10600 remote_info.Raise("Error checking node %s" % instance.primary_node)
10601 remote_info = remote_info.payload
10602 if remote_info and "state" in remote_info:
10603 remote_state = "up"
10605 remote_state = "down"
10607 if instance.admin_up:
10608 config_state = "up"
10610 config_state = "down"
10612 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10615 result[instance.name] = {
10616 "name": instance.name,
10617 "config_state": config_state,
10618 "run_state": remote_state,
10619 "pnode": instance.primary_node,
10620 "snodes": instance.secondary_nodes,
10622 # this happens to be the same format used for hooks
10623 "nics": _NICListToTuple(self, instance.nics),
10624 "disk_template": instance.disk_template,
10626 "hypervisor": instance.hypervisor,
10627 "network_port": instance.network_port,
10628 "hv_instance": instance.hvparams,
10629 "hv_actual": cluster.FillHV(instance, skip_globals=True),
10630 "be_instance": instance.beparams,
10631 "be_actual": cluster.FillBE(instance),
10632 "os_instance": instance.osparams,
10633 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10634 "serial_no": instance.serial_no,
10635 "mtime": instance.mtime,
10636 "ctime": instance.ctime,
10637 "uuid": instance.uuid,
10643 class LUInstanceSetParams(LogicalUnit):
10644 """Modifies an instances's parameters.
10647 HPATH = "instance-modify"
10648 HTYPE = constants.HTYPE_INSTANCE
10651 def CheckArguments(self):
10652 if not (self.op.nics or self.op.disks or self.op.disk_template or
10653 self.op.hvparams or self.op.beparams or self.op.os_name):
10654 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10656 if self.op.hvparams:
10657 _CheckGlobalHvParams(self.op.hvparams)
10661 for disk_op, disk_dict in self.op.disks:
10662 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10663 if disk_op == constants.DDM_REMOVE:
10664 disk_addremove += 1
10666 elif disk_op == constants.DDM_ADD:
10667 disk_addremove += 1
10669 if not isinstance(disk_op, int):
10670 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10671 if not isinstance(disk_dict, dict):
10672 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10673 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10675 if disk_op == constants.DDM_ADD:
10676 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10677 if mode not in constants.DISK_ACCESS_SET:
10678 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10679 errors.ECODE_INVAL)
10680 size = disk_dict.get(constants.IDISK_SIZE, None)
10682 raise errors.OpPrereqError("Required disk parameter size missing",
10683 errors.ECODE_INVAL)
10686 except (TypeError, ValueError), err:
10687 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10688 str(err), errors.ECODE_INVAL)
10689 disk_dict[constants.IDISK_SIZE] = size
10691 # modification of disk
10692 if constants.IDISK_SIZE in disk_dict:
10693 raise errors.OpPrereqError("Disk size change not possible, use"
10694 " grow-disk", errors.ECODE_INVAL)
10696 if disk_addremove > 1:
10697 raise errors.OpPrereqError("Only one disk add or remove operation"
10698 " supported at a time", errors.ECODE_INVAL)
10700 if self.op.disks and self.op.disk_template is not None:
10701 raise errors.OpPrereqError("Disk template conversion and other disk"
10702 " changes not supported at the same time",
10703 errors.ECODE_INVAL)
10705 if (self.op.disk_template and
10706 self.op.disk_template in constants.DTS_INT_MIRROR and
10707 self.op.remote_node is None):
10708 raise errors.OpPrereqError("Changing the disk template to a mirrored"
10709 " one requires specifying a secondary node",
10710 errors.ECODE_INVAL)
10714 for nic_op, nic_dict in self.op.nics:
10715 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10716 if nic_op == constants.DDM_REMOVE:
10719 elif nic_op == constants.DDM_ADD:
10722 if not isinstance(nic_op, int):
10723 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10724 if not isinstance(nic_dict, dict):
10725 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10726 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10728 # nic_dict should be a dict
10729 nic_ip = nic_dict.get(constants.INIC_IP, None)
10730 if nic_ip is not None:
10731 if nic_ip.lower() == constants.VALUE_NONE:
10732 nic_dict[constants.INIC_IP] = None
10734 if not netutils.IPAddress.IsValid(nic_ip):
10735 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10736 errors.ECODE_INVAL)
10738 nic_bridge = nic_dict.get("bridge", None)
10739 nic_link = nic_dict.get(constants.INIC_LINK, None)
10740 if nic_bridge and nic_link:
10741 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10742 " at the same time", errors.ECODE_INVAL)
10743 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10744 nic_dict["bridge"] = None
10745 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10746 nic_dict[constants.INIC_LINK] = None
10748 if nic_op == constants.DDM_ADD:
10749 nic_mac = nic_dict.get(constants.INIC_MAC, None)
10750 if nic_mac is None:
10751 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10753 if constants.INIC_MAC in nic_dict:
10754 nic_mac = nic_dict[constants.INIC_MAC]
10755 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10756 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10758 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10759 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10760 " modifying an existing nic",
10761 errors.ECODE_INVAL)
10763 if nic_addremove > 1:
10764 raise errors.OpPrereqError("Only one NIC add or remove operation"
10765 " supported at a time", errors.ECODE_INVAL)
10767 def ExpandNames(self):
10768 self._ExpandAndLockInstance()
10769 self.needed_locks[locking.LEVEL_NODE] = []
10770 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10772 def DeclareLocks(self, level):
10773 if level == locking.LEVEL_NODE:
10774 self._LockInstancesNodes()
10775 if self.op.disk_template and self.op.remote_node:
10776 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10777 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10779 def BuildHooksEnv(self):
10780 """Build hooks env.
10782 This runs on the master, primary and secondaries.
10786 if constants.BE_MEMORY in self.be_new:
10787 args["memory"] = self.be_new[constants.BE_MEMORY]
10788 if constants.BE_VCPUS in self.be_new:
10789 args["vcpus"] = self.be_new[constants.BE_VCPUS]
10790 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10791 # information at all.
10794 nic_override = dict(self.op.nics)
10795 for idx, nic in enumerate(self.instance.nics):
10796 if idx in nic_override:
10797 this_nic_override = nic_override[idx]
10799 this_nic_override = {}
10800 if constants.INIC_IP in this_nic_override:
10801 ip = this_nic_override[constants.INIC_IP]
10804 if constants.INIC_MAC in this_nic_override:
10805 mac = this_nic_override[constants.INIC_MAC]
10808 if idx in self.nic_pnew:
10809 nicparams = self.nic_pnew[idx]
10811 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10812 mode = nicparams[constants.NIC_MODE]
10813 link = nicparams[constants.NIC_LINK]
10814 args["nics"].append((ip, mac, mode, link))
10815 if constants.DDM_ADD in nic_override:
10816 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10817 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10818 nicparams = self.nic_pnew[constants.DDM_ADD]
10819 mode = nicparams[constants.NIC_MODE]
10820 link = nicparams[constants.NIC_LINK]
10821 args["nics"].append((ip, mac, mode, link))
10822 elif constants.DDM_REMOVE in nic_override:
10823 del args["nics"][-1]
10825 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10826 if self.op.disk_template:
10827 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10831 def BuildHooksNodes(self):
10832 """Build hooks nodes.
10835 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10838 def CheckPrereq(self):
10839 """Check prerequisites.
10841 This only checks the instance list against the existing names.
10844 # checking the new params on the primary/secondary nodes
10846 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10847 cluster = self.cluster = self.cfg.GetClusterInfo()
10848 assert self.instance is not None, \
10849 "Cannot retrieve locked instance %s" % self.op.instance_name
10850 pnode = instance.primary_node
10851 nodelist = list(instance.all_nodes)
10854 if self.op.os_name and not self.op.force:
10855 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10856 self.op.force_variant)
10857 instance_os = self.op.os_name
10859 instance_os = instance.os
10861 if self.op.disk_template:
10862 if instance.disk_template == self.op.disk_template:
10863 raise errors.OpPrereqError("Instance already has disk template %s" %
10864 instance.disk_template, errors.ECODE_INVAL)
10866 if (instance.disk_template,
10867 self.op.disk_template) not in self._DISK_CONVERSIONS:
10868 raise errors.OpPrereqError("Unsupported disk template conversion from"
10869 " %s to %s" % (instance.disk_template,
10870 self.op.disk_template),
10871 errors.ECODE_INVAL)
10872 _CheckInstanceDown(self, instance, "cannot change disk template")
10873 if self.op.disk_template in constants.DTS_INT_MIRROR:
10874 if self.op.remote_node == pnode:
10875 raise errors.OpPrereqError("Given new secondary node %s is the same"
10876 " as the primary node of the instance" %
10877 self.op.remote_node, errors.ECODE_STATE)
10878 _CheckNodeOnline(self, self.op.remote_node)
10879 _CheckNodeNotDrained(self, self.op.remote_node)
10880 # FIXME: here we assume that the old instance type is DT_PLAIN
10881 assert instance.disk_template == constants.DT_PLAIN
10882 disks = [{constants.IDISK_SIZE: d.size,
10883 constants.IDISK_VG: d.logical_id[0]}
10884 for d in instance.disks]
10885 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10886 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10888 # hvparams processing
10889 if self.op.hvparams:
10890 hv_type = instance.hypervisor
10891 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10892 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10893 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10896 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10897 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10898 self.hv_proposed = self.hv_new = hv_new # the new actual values
10899 self.hv_inst = i_hvdict # the new dict (without defaults)
10901 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
10903 self.hv_new = self.hv_inst = {}
10905 # beparams processing
10906 if self.op.beparams:
10907 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10909 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10910 be_new = cluster.SimpleFillBE(i_bedict)
10911 self.be_proposed = self.be_new = be_new # the new actual values
10912 self.be_inst = i_bedict # the new dict (without defaults)
10914 self.be_new = self.be_inst = {}
10915 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
10916 be_old = cluster.FillBE(instance)
10918 # CPU param validation -- checking every time a paramtere is
10919 # changed to cover all cases where either CPU mask or vcpus have
10921 if (constants.BE_VCPUS in self.be_proposed and
10922 constants.HV_CPU_MASK in self.hv_proposed):
10924 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
10925 # Verify mask is consistent with number of vCPUs. Can skip this
10926 # test if only 1 entry in the CPU mask, which means same mask
10927 # is applied to all vCPUs.
10928 if (len(cpu_list) > 1 and
10929 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
10930 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
10932 (self.be_proposed[constants.BE_VCPUS],
10933 self.hv_proposed[constants.HV_CPU_MASK]),
10934 errors.ECODE_INVAL)
10936 # Only perform this test if a new CPU mask is given
10937 if constants.HV_CPU_MASK in self.hv_new:
10938 # Calculate the largest CPU number requested
10939 max_requested_cpu = max(map(max, cpu_list))
10940 # Check that all of the instance's nodes have enough physical CPUs to
10941 # satisfy the requested CPU mask
10942 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
10943 max_requested_cpu + 1, instance.hypervisor)
10945 # osparams processing
10946 if self.op.osparams:
10947 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10948 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10949 self.os_inst = i_osdict # the new dict (without defaults)
10955 if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10956 be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10957 mem_check_list = [pnode]
10958 if be_new[constants.BE_AUTO_BALANCE]:
10959 # either we changed auto_balance to yes or it was from before
10960 mem_check_list.extend(instance.secondary_nodes)
10961 instance_info = self.rpc.call_instance_info(pnode, instance.name,
10962 instance.hypervisor)
10963 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10964 instance.hypervisor)
10965 pninfo = nodeinfo[pnode]
10966 msg = pninfo.fail_msg
10968 # Assume the primary node is unreachable and go ahead
10969 self.warn.append("Can't get info from primary node %s: %s" %
10971 elif not isinstance(pninfo.payload.get("memory_free", None), int):
10972 self.warn.append("Node data from primary node %s doesn't contain"
10973 " free memory information" % pnode)
10974 elif instance_info.fail_msg:
10975 self.warn.append("Can't get instance runtime information: %s" %
10976 instance_info.fail_msg)
10978 if instance_info.payload:
10979 current_mem = int(instance_info.payload["memory"])
10981 # Assume instance not running
10982 # (there is a slight race condition here, but it's not very probable,
10983 # and we have no other way to check)
10985 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10986 pninfo.payload["memory_free"])
10988 raise errors.OpPrereqError("This change will prevent the instance"
10989 " from starting, due to %d MB of memory"
10990 " missing on its primary node" % miss_mem,
10991 errors.ECODE_NORES)
10993 if be_new[constants.BE_AUTO_BALANCE]:
10994 for node, nres in nodeinfo.items():
10995 if node not in instance.secondary_nodes:
10997 nres.Raise("Can't get info from secondary node %s" % node,
10998 prereq=True, ecode=errors.ECODE_STATE)
10999 if not isinstance(nres.payload.get("memory_free", None), int):
11000 raise errors.OpPrereqError("Secondary node %s didn't return free"
11001 " memory information" % node,
11002 errors.ECODE_STATE)
11003 elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
11004 raise errors.OpPrereqError("This change will prevent the instance"
11005 " from failover to its secondary node"
11006 " %s, due to not enough memory" % node,
11007 errors.ECODE_STATE)
11011 self.nic_pinst = {}
11012 for nic_op, nic_dict in self.op.nics:
11013 if nic_op == constants.DDM_REMOVE:
11014 if not instance.nics:
11015 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11016 errors.ECODE_INVAL)
11018 if nic_op != constants.DDM_ADD:
11020 if not instance.nics:
11021 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11022 " no NICs" % nic_op,
11023 errors.ECODE_INVAL)
11024 if nic_op < 0 or nic_op >= len(instance.nics):
11025 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11027 (nic_op, len(instance.nics) - 1),
11028 errors.ECODE_INVAL)
11029 old_nic_params = instance.nics[nic_op].nicparams
11030 old_nic_ip = instance.nics[nic_op].ip
11032 old_nic_params = {}
11035 update_params_dict = dict([(key, nic_dict[key])
11036 for key in constants.NICS_PARAMETERS
11037 if key in nic_dict])
11039 if "bridge" in nic_dict:
11040 update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11042 new_nic_params = _GetUpdatedParams(old_nic_params,
11043 update_params_dict)
11044 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11045 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11046 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11047 self.nic_pinst[nic_op] = new_nic_params
11048 self.nic_pnew[nic_op] = new_filled_nic_params
11049 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11051 if new_nic_mode == constants.NIC_MODE_BRIDGED:
11052 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11053 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11055 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11057 self.warn.append(msg)
11059 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11060 if new_nic_mode == constants.NIC_MODE_ROUTED:
11061 if constants.INIC_IP in nic_dict:
11062 nic_ip = nic_dict[constants.INIC_IP]
11064 nic_ip = old_nic_ip
11066 raise errors.OpPrereqError("Cannot set the nic ip to None"
11067 " on a routed nic", errors.ECODE_INVAL)
11068 if constants.INIC_MAC in nic_dict:
11069 nic_mac = nic_dict[constants.INIC_MAC]
11070 if nic_mac is None:
11071 raise errors.OpPrereqError("Cannot set the nic mac to None",
11072 errors.ECODE_INVAL)
11073 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11074 # otherwise generate the mac
11075 nic_dict[constants.INIC_MAC] = \
11076 self.cfg.GenerateMAC(self.proc.GetECId())
11078 # or validate/reserve the current one
11080 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11081 except errors.ReservationError:
11082 raise errors.OpPrereqError("MAC address %s already in use"
11083 " in cluster" % nic_mac,
11084 errors.ECODE_NOTUNIQUE)
11087 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11088 raise errors.OpPrereqError("Disk operations not supported for"
11089 " diskless instances",
11090 errors.ECODE_INVAL)
11091 for disk_op, _ in self.op.disks:
11092 if disk_op == constants.DDM_REMOVE:
11093 if len(instance.disks) == 1:
11094 raise errors.OpPrereqError("Cannot remove the last disk of"
11095 " an instance", errors.ECODE_INVAL)
11096 _CheckInstanceDown(self, instance, "cannot remove disks")
11098 if (disk_op == constants.DDM_ADD and
11099 len(instance.disks) >= constants.MAX_DISKS):
11100 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11101 " add more" % constants.MAX_DISKS,
11102 errors.ECODE_STATE)
11103 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11105 if disk_op < 0 or disk_op >= len(instance.disks):
11106 raise errors.OpPrereqError("Invalid disk index %s, valid values"
11108 (disk_op, len(instance.disks)),
11109 errors.ECODE_INVAL)
11113 def _ConvertPlainToDrbd(self, feedback_fn):
11114 """Converts an instance from plain to drbd.
11117 feedback_fn("Converting template to drbd")
11118 instance = self.instance
11119 pnode = instance.primary_node
11120 snode = self.op.remote_node
11122 # create a fake disk info for _GenerateDiskTemplate
11123 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11124 constants.IDISK_VG: d.logical_id[0]}
11125 for d in instance.disks]
11126 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11127 instance.name, pnode, [snode],
11128 disk_info, None, None, 0, feedback_fn)
11129 info = _GetInstanceInfoText(instance)
11130 feedback_fn("Creating aditional volumes...")
11131 # first, create the missing data and meta devices
11132 for disk in new_disks:
11133 # unfortunately this is... not too nice
11134 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11136 for child in disk.children:
11137 _CreateSingleBlockDev(self, snode, instance, child, info, True)
11138 # at this stage, all new LVs have been created, we can rename the
11140 feedback_fn("Renaming original volumes...")
11141 rename_list = [(o, n.children[0].logical_id)
11142 for (o, n) in zip(instance.disks, new_disks)]
11143 result = self.rpc.call_blockdev_rename(pnode, rename_list)
11144 result.Raise("Failed to rename original LVs")
11146 feedback_fn("Initializing DRBD devices...")
11147 # all child devices are in place, we can now create the DRBD devices
11148 for disk in new_disks:
11149 for node in [pnode, snode]:
11150 f_create = node == pnode
11151 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11153 # at this point, the instance has been modified
11154 instance.disk_template = constants.DT_DRBD8
11155 instance.disks = new_disks
11156 self.cfg.Update(instance, feedback_fn)
11158 # disks are created, waiting for sync
11159 disk_abort = not _WaitForSync(self, instance,
11160 oneshot=not self.op.wait_for_sync)
11162 raise errors.OpExecError("There are some degraded disks for"
11163 " this instance, please cleanup manually")
11165 def _ConvertDrbdToPlain(self, feedback_fn):
11166 """Converts an instance from drbd to plain.
11169 instance = self.instance
11170 assert len(instance.secondary_nodes) == 1
11171 pnode = instance.primary_node
11172 snode = instance.secondary_nodes[0]
11173 feedback_fn("Converting template to plain")
11175 old_disks = instance.disks
11176 new_disks = [d.children[0] for d in old_disks]
11178 # copy over size and mode
11179 for parent, child in zip(old_disks, new_disks):
11180 child.size = parent.size
11181 child.mode = parent.mode
11183 # update instance structure
11184 instance.disks = new_disks
11185 instance.disk_template = constants.DT_PLAIN
11186 self.cfg.Update(instance, feedback_fn)
11188 feedback_fn("Removing volumes on the secondary node...")
11189 for disk in old_disks:
11190 self.cfg.SetDiskID(disk, snode)
11191 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11193 self.LogWarning("Could not remove block device %s on node %s,"
11194 " continuing anyway: %s", disk.iv_name, snode, msg)
11196 feedback_fn("Removing unneeded volumes on the primary node...")
11197 for idx, disk in enumerate(old_disks):
11198 meta = disk.children[1]
11199 self.cfg.SetDiskID(meta, pnode)
11200 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11202 self.LogWarning("Could not remove metadata for disk %d on node %s,"
11203 " continuing anyway: %s", idx, pnode, msg)
11205 def Exec(self, feedback_fn):
11206 """Modifies an instance.
11208 All parameters take effect only at the next restart of the instance.
11211 # Process here the warnings from CheckPrereq, as we don't have a
11212 # feedback_fn there.
11213 for warn in self.warn:
11214 feedback_fn("WARNING: %s" % warn)
11217 instance = self.instance
11219 for disk_op, disk_dict in self.op.disks:
11220 if disk_op == constants.DDM_REMOVE:
11221 # remove the last disk
11222 device = instance.disks.pop()
11223 device_idx = len(instance.disks)
11224 for node, disk in device.ComputeNodeTree(instance.primary_node):
11225 self.cfg.SetDiskID(disk, node)
11226 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11228 self.LogWarning("Could not remove disk/%d on node %s: %s,"
11229 " continuing anyway", device_idx, node, msg)
11230 result.append(("disk/%d" % device_idx, "remove"))
11231 elif disk_op == constants.DDM_ADD:
11233 if instance.disk_template in (constants.DT_FILE,
11234 constants.DT_SHARED_FILE):
11235 file_driver, file_path = instance.disks[0].logical_id
11236 file_path = os.path.dirname(file_path)
11238 file_driver = file_path = None
11239 disk_idx_base = len(instance.disks)
11240 new_disk = _GenerateDiskTemplate(self,
11241 instance.disk_template,
11242 instance.name, instance.primary_node,
11243 instance.secondary_nodes,
11247 disk_idx_base, feedback_fn)[0]
11248 instance.disks.append(new_disk)
11249 info = _GetInstanceInfoText(instance)
11251 logging.info("Creating volume %s for instance %s",
11252 new_disk.iv_name, instance.name)
11253 # Note: this needs to be kept in sync with _CreateDisks
11255 for node in instance.all_nodes:
11256 f_create = node == instance.primary_node
11258 _CreateBlockDev(self, node, instance, new_disk,
11259 f_create, info, f_create)
11260 except errors.OpExecError, err:
11261 self.LogWarning("Failed to create volume %s (%s) on"
11263 new_disk.iv_name, new_disk, node, err)
11264 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11265 (new_disk.size, new_disk.mode)))
11267 # change a given disk
11268 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11269 result.append(("disk.mode/%d" % disk_op,
11270 disk_dict[constants.IDISK_MODE]))
11272 if self.op.disk_template:
11273 r_shut = _ShutdownInstanceDisks(self, instance)
11275 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11276 " proceed with disk template conversion")
11277 mode = (instance.disk_template, self.op.disk_template)
11279 self._DISK_CONVERSIONS[mode](self, feedback_fn)
11281 self.cfg.ReleaseDRBDMinors(instance.name)
11283 result.append(("disk_template", self.op.disk_template))
11286 for nic_op, nic_dict in self.op.nics:
11287 if nic_op == constants.DDM_REMOVE:
11288 # remove the last nic
11289 del instance.nics[-1]
11290 result.append(("nic.%d" % len(instance.nics), "remove"))
11291 elif nic_op == constants.DDM_ADD:
11292 # mac and bridge should be set, by now
11293 mac = nic_dict[constants.INIC_MAC]
11294 ip = nic_dict.get(constants.INIC_IP, None)
11295 nicparams = self.nic_pinst[constants.DDM_ADD]
11296 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11297 instance.nics.append(new_nic)
11298 result.append(("nic.%d" % (len(instance.nics) - 1),
11299 "add:mac=%s,ip=%s,mode=%s,link=%s" %
11300 (new_nic.mac, new_nic.ip,
11301 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11302 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11305 for key in (constants.INIC_MAC, constants.INIC_IP):
11306 if key in nic_dict:
11307 setattr(instance.nics[nic_op], key, nic_dict[key])
11308 if nic_op in self.nic_pinst:
11309 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11310 for key, val in nic_dict.iteritems():
11311 result.append(("nic.%s/%d" % (key, nic_op), val))
11314 if self.op.hvparams:
11315 instance.hvparams = self.hv_inst
11316 for key, val in self.op.hvparams.iteritems():
11317 result.append(("hv/%s" % key, val))
11320 if self.op.beparams:
11321 instance.beparams = self.be_inst
11322 for key, val in self.op.beparams.iteritems():
11323 result.append(("be/%s" % key, val))
11326 if self.op.os_name:
11327 instance.os = self.op.os_name
11330 if self.op.osparams:
11331 instance.osparams = self.os_inst
11332 for key, val in self.op.osparams.iteritems():
11333 result.append(("os/%s" % key, val))
11335 self.cfg.Update(instance, feedback_fn)
11339 _DISK_CONVERSIONS = {
11340 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11341 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11345 class LUInstanceChangeGroup(LogicalUnit):
11346 HPATH = "instance-change-group"
11347 HTYPE = constants.HTYPE_INSTANCE
11350 def ExpandNames(self):
11351 self.share_locks = _ShareAll()
11352 self.needed_locks = {
11353 locking.LEVEL_NODEGROUP: [],
11354 locking.LEVEL_NODE: [],
11357 self._ExpandAndLockInstance()
11359 if self.op.target_groups:
11360 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11361 self.op.target_groups)
11363 self.req_target_uuids = None
11365 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11367 def DeclareLocks(self, level):
11368 if level == locking.LEVEL_NODEGROUP:
11369 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11371 if self.req_target_uuids:
11372 lock_groups = set(self.req_target_uuids)
11374 # Lock all groups used by instance optimistically; this requires going
11375 # via the node before it's locked, requiring verification later on
11376 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11377 lock_groups.update(instance_groups)
11379 # No target groups, need to lock all of them
11380 lock_groups = locking.ALL_SET
11382 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11384 elif level == locking.LEVEL_NODE:
11385 if self.req_target_uuids:
11386 # Lock all nodes used by instances
11387 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11388 self._LockInstancesNodes()
11390 # Lock all nodes in all potential target groups
11391 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11392 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11393 member_nodes = [node_name
11394 for group in lock_groups
11395 for node_name in self.cfg.GetNodeGroup(group).members]
11396 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11398 # Lock all nodes as all groups are potential targets
11399 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11401 def CheckPrereq(self):
11402 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11403 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11404 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11406 assert (self.req_target_uuids is None or
11407 owned_groups.issuperset(self.req_target_uuids))
11408 assert owned_instances == set([self.op.instance_name])
11410 # Get instance information
11411 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11413 # Check if node groups for locked instance are still correct
11414 assert owned_nodes.issuperset(self.instance.all_nodes), \
11415 ("Instance %s's nodes changed while we kept the lock" %
11416 self.op.instance_name)
11418 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11421 if self.req_target_uuids:
11422 # User requested specific target groups
11423 self.target_uuids = self.req_target_uuids
11425 # All groups except those used by the instance are potential targets
11426 self.target_uuids = owned_groups - inst_groups
11428 conflicting_groups = self.target_uuids & inst_groups
11429 if conflicting_groups:
11430 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11431 " used by the instance '%s'" %
11432 (utils.CommaJoin(conflicting_groups),
11433 self.op.instance_name),
11434 errors.ECODE_INVAL)
11436 if not self.target_uuids:
11437 raise errors.OpPrereqError("There are no possible target groups",
11438 errors.ECODE_INVAL)
11440 def BuildHooksEnv(self):
11441 """Build hooks env.
11444 assert self.target_uuids
11447 "TARGET_GROUPS": " ".join(self.target_uuids),
11450 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11454 def BuildHooksNodes(self):
11455 """Build hooks nodes.
11458 mn = self.cfg.GetMasterNode()
11459 return ([mn], [mn])
11461 def Exec(self, feedback_fn):
11462 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11464 assert instances == [self.op.instance_name], "Instance not locked"
11466 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11467 instances=instances, target_groups=list(self.target_uuids))
11469 ial.Run(self.op.iallocator)
11471 if not ial.success:
11472 raise errors.OpPrereqError("Can't compute solution for changing group of"
11473 " instance '%s' using iallocator '%s': %s" %
11474 (self.op.instance_name, self.op.iallocator,
11476 errors.ECODE_NORES)
11478 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11480 self.LogInfo("Iallocator returned %s job(s) for changing group of"
11481 " instance '%s'", len(jobs), self.op.instance_name)
11483 return ResultWithJobs(jobs)
11486 class LUBackupQuery(NoHooksLU):
11487 """Query the exports list
11492 def ExpandNames(self):
11493 self.needed_locks = {}
11494 self.share_locks[locking.LEVEL_NODE] = 1
11495 if not self.op.nodes:
11496 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11498 self.needed_locks[locking.LEVEL_NODE] = \
11499 _GetWantedNodes(self, self.op.nodes)
11501 def Exec(self, feedback_fn):
11502 """Compute the list of all the exported system images.
11505 @return: a dictionary with the structure node->(export-list)
11506 where export-list is a list of the instances exported on
11510 self.nodes = self.owned_locks(locking.LEVEL_NODE)
11511 rpcresult = self.rpc.call_export_list(self.nodes)
11513 for node in rpcresult:
11514 if rpcresult[node].fail_msg:
11515 result[node] = False
11517 result[node] = rpcresult[node].payload
11522 class LUBackupPrepare(NoHooksLU):
11523 """Prepares an instance for an export and returns useful information.
11528 def ExpandNames(self):
11529 self._ExpandAndLockInstance()
11531 def CheckPrereq(self):
11532 """Check prerequisites.
11535 instance_name = self.op.instance_name
11537 self.instance = self.cfg.GetInstanceInfo(instance_name)
11538 assert self.instance is not None, \
11539 "Cannot retrieve locked instance %s" % self.op.instance_name
11540 _CheckNodeOnline(self, self.instance.primary_node)
11542 self._cds = _GetClusterDomainSecret()
11544 def Exec(self, feedback_fn):
11545 """Prepares an instance for an export.
11548 instance = self.instance
11550 if self.op.mode == constants.EXPORT_MODE_REMOTE:
11551 salt = utils.GenerateSecret(8)
11553 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11554 result = self.rpc.call_x509_cert_create(instance.primary_node,
11555 constants.RIE_CERT_VALIDITY)
11556 result.Raise("Can't create X509 key and certificate on %s" % result.node)
11558 (name, cert_pem) = result.payload
11560 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11564 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11565 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11567 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11573 class LUBackupExport(LogicalUnit):
11574 """Export an instance to an image in the cluster.
11577 HPATH = "instance-export"
11578 HTYPE = constants.HTYPE_INSTANCE
11581 def CheckArguments(self):
11582 """Check the arguments.
11585 self.x509_key_name = self.op.x509_key_name
11586 self.dest_x509_ca_pem = self.op.destination_x509_ca
11588 if self.op.mode == constants.EXPORT_MODE_REMOTE:
11589 if not self.x509_key_name:
11590 raise errors.OpPrereqError("Missing X509 key name for encryption",
11591 errors.ECODE_INVAL)
11593 if not self.dest_x509_ca_pem:
11594 raise errors.OpPrereqError("Missing destination X509 CA",
11595 errors.ECODE_INVAL)
11597 def ExpandNames(self):
11598 self._ExpandAndLockInstance()
11600 # Lock all nodes for local exports
11601 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11602 # FIXME: lock only instance primary and destination node
11604 # Sad but true, for now we have do lock all nodes, as we don't know where
11605 # the previous export might be, and in this LU we search for it and
11606 # remove it from its current node. In the future we could fix this by:
11607 # - making a tasklet to search (share-lock all), then create the
11608 # new one, then one to remove, after
11609 # - removing the removal operation altogether
11610 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11612 def DeclareLocks(self, level):
11613 """Last minute lock declaration."""
11614 # All nodes are locked anyway, so nothing to do here.
11616 def BuildHooksEnv(self):
11617 """Build hooks env.
11619 This will run on the master, primary node and target node.
11623 "EXPORT_MODE": self.op.mode,
11624 "EXPORT_NODE": self.op.target_node,
11625 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11626 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11627 # TODO: Generic function for boolean env variables
11628 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11631 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11635 def BuildHooksNodes(self):
11636 """Build hooks nodes.
11639 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11641 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11642 nl.append(self.op.target_node)
11646 def CheckPrereq(self):
11647 """Check prerequisites.
11649 This checks that the instance and node names are valid.
11652 instance_name = self.op.instance_name
11654 self.instance = self.cfg.GetInstanceInfo(instance_name)
11655 assert self.instance is not None, \
11656 "Cannot retrieve locked instance %s" % self.op.instance_name
11657 _CheckNodeOnline(self, self.instance.primary_node)
11659 if (self.op.remove_instance and self.instance.admin_up and
11660 not self.op.shutdown):
11661 raise errors.OpPrereqError("Can not remove instance without shutting it"
11664 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11665 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11666 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11667 assert self.dst_node is not None
11669 _CheckNodeOnline(self, self.dst_node.name)
11670 _CheckNodeNotDrained(self, self.dst_node.name)
11673 self.dest_disk_info = None
11674 self.dest_x509_ca = None
11676 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11677 self.dst_node = None
11679 if len(self.op.target_node) != len(self.instance.disks):
11680 raise errors.OpPrereqError(("Received destination information for %s"
11681 " disks, but instance %s has %s disks") %
11682 (len(self.op.target_node), instance_name,
11683 len(self.instance.disks)),
11684 errors.ECODE_INVAL)
11686 cds = _GetClusterDomainSecret()
11688 # Check X509 key name
11690 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11691 except (TypeError, ValueError), err:
11692 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11694 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11695 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11696 errors.ECODE_INVAL)
11698 # Load and verify CA
11700 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11701 except OpenSSL.crypto.Error, err:
11702 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11703 (err, ), errors.ECODE_INVAL)
11705 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11706 if errcode is not None:
11707 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11708 (msg, ), errors.ECODE_INVAL)
11710 self.dest_x509_ca = cert
11712 # Verify target information
11714 for idx, disk_data in enumerate(self.op.target_node):
11716 (host, port, magic) = \
11717 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11718 except errors.GenericError, err:
11719 raise errors.OpPrereqError("Target info for disk %s: %s" %
11720 (idx, err), errors.ECODE_INVAL)
11722 disk_info.append((host, port, magic))
11724 assert len(disk_info) == len(self.op.target_node)
11725 self.dest_disk_info = disk_info
11728 raise errors.ProgrammerError("Unhandled export mode %r" %
11731 # instance disk type verification
11732 # TODO: Implement export support for file-based disks
11733 for disk in self.instance.disks:
11734 if disk.dev_type == constants.LD_FILE:
11735 raise errors.OpPrereqError("Export not supported for instances with"
11736 " file-based disks", errors.ECODE_INVAL)
11738 def _CleanupExports(self, feedback_fn):
11739 """Removes exports of current instance from all other nodes.
11741 If an instance in a cluster with nodes A..D was exported to node C, its
11742 exports will be removed from the nodes A, B and D.
11745 assert self.op.mode != constants.EXPORT_MODE_REMOTE
11747 nodelist = self.cfg.GetNodeList()
11748 nodelist.remove(self.dst_node.name)
11750 # on one-node clusters nodelist will be empty after the removal
11751 # if we proceed the backup would be removed because OpBackupQuery
11752 # substitutes an empty list with the full cluster node list.
11753 iname = self.instance.name
11755 feedback_fn("Removing old exports for instance %s" % iname)
11756 exportlist = self.rpc.call_export_list(nodelist)
11757 for node in exportlist:
11758 if exportlist[node].fail_msg:
11760 if iname in exportlist[node].payload:
11761 msg = self.rpc.call_export_remove(node, iname).fail_msg
11763 self.LogWarning("Could not remove older export for instance %s"
11764 " on node %s: %s", iname, node, msg)
11766 def Exec(self, feedback_fn):
11767 """Export an instance to an image in the cluster.
11770 assert self.op.mode in constants.EXPORT_MODES
11772 instance = self.instance
11773 src_node = instance.primary_node
11775 if self.op.shutdown:
11776 # shutdown the instance, but not the disks
11777 feedback_fn("Shutting down instance %s" % instance.name)
11778 result = self.rpc.call_instance_shutdown(src_node, instance,
11779 self.op.shutdown_timeout)
11780 # TODO: Maybe ignore failures if ignore_remove_failures is set
11781 result.Raise("Could not shutdown instance %s on"
11782 " node %s" % (instance.name, src_node))
11784 # set the disks ID correctly since call_instance_start needs the
11785 # correct drbd minor to create the symlinks
11786 for disk in instance.disks:
11787 self.cfg.SetDiskID(disk, src_node)
11789 activate_disks = (not instance.admin_up)
11792 # Activate the instance disks if we'exporting a stopped instance
11793 feedback_fn("Activating disks for %s" % instance.name)
11794 _StartInstanceDisks(self, instance, None)
11797 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11800 helper.CreateSnapshots()
11802 if (self.op.shutdown and instance.admin_up and
11803 not self.op.remove_instance):
11804 assert not activate_disks
11805 feedback_fn("Starting instance %s" % instance.name)
11806 result = self.rpc.call_instance_start(src_node, instance,
11808 msg = result.fail_msg
11810 feedback_fn("Failed to start instance: %s" % msg)
11811 _ShutdownInstanceDisks(self, instance)
11812 raise errors.OpExecError("Could not start instance: %s" % msg)
11814 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11815 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11816 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11817 connect_timeout = constants.RIE_CONNECT_TIMEOUT
11818 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11820 (key_name, _, _) = self.x509_key_name
11823 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11826 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11827 key_name, dest_ca_pem,
11832 # Check for backwards compatibility
11833 assert len(dresults) == len(instance.disks)
11834 assert compat.all(isinstance(i, bool) for i in dresults), \
11835 "Not all results are boolean: %r" % dresults
11839 feedback_fn("Deactivating disks for %s" % instance.name)
11840 _ShutdownInstanceDisks(self, instance)
11842 if not (compat.all(dresults) and fin_resu):
11845 failures.append("export finalization")
11846 if not compat.all(dresults):
11847 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11849 failures.append("disk export: disk(s) %s" % fdsk)
11851 raise errors.OpExecError("Export failed, errors in %s" %
11852 utils.CommaJoin(failures))
11854 # At this point, the export was successful, we can cleanup/finish
11856 # Remove instance if requested
11857 if self.op.remove_instance:
11858 feedback_fn("Removing instance %s" % instance.name)
11859 _RemoveInstance(self, feedback_fn, instance,
11860 self.op.ignore_remove_failures)
11862 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11863 self._CleanupExports(feedback_fn)
11865 return fin_resu, dresults
11868 class LUBackupRemove(NoHooksLU):
11869 """Remove exports related to the named instance.
11874 def ExpandNames(self):
11875 self.needed_locks = {}
11876 # We need all nodes to be locked in order for RemoveExport to work, but we
11877 # don't need to lock the instance itself, as nothing will happen to it (and
11878 # we can remove exports also for a removed instance)
11879 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11881 def Exec(self, feedback_fn):
11882 """Remove any export.
11885 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11886 # If the instance was not found we'll try with the name that was passed in.
11887 # This will only work if it was an FQDN, though.
11889 if not instance_name:
11891 instance_name = self.op.instance_name
11893 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11894 exportlist = self.rpc.call_export_list(locked_nodes)
11896 for node in exportlist:
11897 msg = exportlist[node].fail_msg
11899 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11901 if instance_name in exportlist[node].payload:
11903 result = self.rpc.call_export_remove(node, instance_name)
11904 msg = result.fail_msg
11906 logging.error("Could not remove export for instance %s"
11907 " on node %s: %s", instance_name, node, msg)
11909 if fqdn_warn and not found:
11910 feedback_fn("Export not found. If trying to remove an export belonging"
11911 " to a deleted instance please use its Fully Qualified"
11915 class LUGroupAdd(LogicalUnit):
11916 """Logical unit for creating node groups.
11919 HPATH = "group-add"
11920 HTYPE = constants.HTYPE_GROUP
11923 def ExpandNames(self):
11924 # We need the new group's UUID here so that we can create and acquire the
11925 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11926 # that it should not check whether the UUID exists in the configuration.
11927 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11928 self.needed_locks = {}
11929 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11931 def CheckPrereq(self):
11932 """Check prerequisites.
11934 This checks that the given group name is not an existing node group
11939 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11940 except errors.OpPrereqError:
11943 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11944 " node group (UUID: %s)" %
11945 (self.op.group_name, existing_uuid),
11946 errors.ECODE_EXISTS)
11948 if self.op.ndparams:
11949 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11951 def BuildHooksEnv(self):
11952 """Build hooks env.
11956 "GROUP_NAME": self.op.group_name,
11959 def BuildHooksNodes(self):
11960 """Build hooks nodes.
11963 mn = self.cfg.GetMasterNode()
11964 return ([mn], [mn])
11966 def Exec(self, feedback_fn):
11967 """Add the node group to the cluster.
11970 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11971 uuid=self.group_uuid,
11972 alloc_policy=self.op.alloc_policy,
11973 ndparams=self.op.ndparams)
11975 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11976 del self.remove_locks[locking.LEVEL_NODEGROUP]
11979 class LUGroupAssignNodes(NoHooksLU):
11980 """Logical unit for assigning nodes to groups.
11985 def ExpandNames(self):
11986 # These raise errors.OpPrereqError on their own:
11987 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11988 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11990 # We want to lock all the affected nodes and groups. We have readily
11991 # available the list of nodes, and the *destination* group. To gather the
11992 # list of "source" groups, we need to fetch node information later on.
11993 self.needed_locks = {
11994 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11995 locking.LEVEL_NODE: self.op.nodes,
11998 def DeclareLocks(self, level):
11999 if level == locking.LEVEL_NODEGROUP:
12000 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12002 # Try to get all affected nodes' groups without having the group or node
12003 # lock yet. Needs verification later in the code flow.
12004 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12006 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12008 def CheckPrereq(self):
12009 """Check prerequisites.
12012 assert self.needed_locks[locking.LEVEL_NODEGROUP]
12013 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12014 frozenset(self.op.nodes))
12016 expected_locks = (set([self.group_uuid]) |
12017 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12018 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12019 if actual_locks != expected_locks:
12020 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12021 " current groups are '%s', used to be '%s'" %
12022 (utils.CommaJoin(expected_locks),
12023 utils.CommaJoin(actual_locks)))
12025 self.node_data = self.cfg.GetAllNodesInfo()
12026 self.group = self.cfg.GetNodeGroup(self.group_uuid)
12027 instance_data = self.cfg.GetAllInstancesInfo()
12029 if self.group is None:
12030 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12031 (self.op.group_name, self.group_uuid))
12033 (new_splits, previous_splits) = \
12034 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12035 for node in self.op.nodes],
12036 self.node_data, instance_data)
12039 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12041 if not self.op.force:
12042 raise errors.OpExecError("The following instances get split by this"
12043 " change and --force was not given: %s" %
12046 self.LogWarning("This operation will split the following instances: %s",
12049 if previous_splits:
12050 self.LogWarning("In addition, these already-split instances continue"
12051 " to be split across groups: %s",
12052 utils.CommaJoin(utils.NiceSort(previous_splits)))
12054 def Exec(self, feedback_fn):
12055 """Assign nodes to a new group.
12058 for node in self.op.nodes:
12059 self.node_data[node].group = self.group_uuid
12061 # FIXME: Depends on side-effects of modifying the result of
12062 # C{cfg.GetAllNodesInfo}
12064 self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
12067 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12068 """Check for split instances after a node assignment.
12070 This method considers a series of node assignments as an atomic operation,
12071 and returns information about split instances after applying the set of
12074 In particular, it returns information about newly split instances, and
12075 instances that were already split, and remain so after the change.
12077 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12080 @type changes: list of (node_name, new_group_uuid) pairs.
12081 @param changes: list of node assignments to consider.
12082 @param node_data: a dict with data for all nodes
12083 @param instance_data: a dict with all instances to consider
12084 @rtype: a two-tuple
12085 @return: a list of instances that were previously okay and result split as a
12086 consequence of this change, and a list of instances that were previously
12087 split and this change does not fix.
12090 changed_nodes = dict((node, group) for node, group in changes
12091 if node_data[node].group != group)
12093 all_split_instances = set()
12094 previously_split_instances = set()
12096 def InstanceNodes(instance):
12097 return [instance.primary_node] + list(instance.secondary_nodes)
12099 for inst in instance_data.values():
12100 if inst.disk_template not in constants.DTS_INT_MIRROR:
12103 instance_nodes = InstanceNodes(inst)
12105 if len(set(node_data[node].group for node in instance_nodes)) > 1:
12106 previously_split_instances.add(inst.name)
12108 if len(set(changed_nodes.get(node, node_data[node].group)
12109 for node in instance_nodes)) > 1:
12110 all_split_instances.add(inst.name)
12112 return (list(all_split_instances - previously_split_instances),
12113 list(previously_split_instances & all_split_instances))
12116 class _GroupQuery(_QueryBase):
12117 FIELDS = query.GROUP_FIELDS
12119 def ExpandNames(self, lu):
12120 lu.needed_locks = {}
12122 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12123 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12126 self.wanted = [name_to_uuid[name]
12127 for name in utils.NiceSort(name_to_uuid.keys())]
12129 # Accept names to be either names or UUIDs.
12132 all_uuid = frozenset(self._all_groups.keys())
12134 for name in self.names:
12135 if name in all_uuid:
12136 self.wanted.append(name)
12137 elif name in name_to_uuid:
12138 self.wanted.append(name_to_uuid[name])
12140 missing.append(name)
12143 raise errors.OpPrereqError("Some groups do not exist: %s" %
12144 utils.CommaJoin(missing),
12145 errors.ECODE_NOENT)
12147 def DeclareLocks(self, lu, level):
12150 def _GetQueryData(self, lu):
12151 """Computes the list of node groups and their attributes.
12154 do_nodes = query.GQ_NODE in self.requested_data
12155 do_instances = query.GQ_INST in self.requested_data
12157 group_to_nodes = None
12158 group_to_instances = None
12160 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12161 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12162 # latter GetAllInstancesInfo() is not enough, for we have to go through
12163 # instance->node. Hence, we will need to process nodes even if we only need
12164 # instance information.
12165 if do_nodes or do_instances:
12166 all_nodes = lu.cfg.GetAllNodesInfo()
12167 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12170 for node in all_nodes.values():
12171 if node.group in group_to_nodes:
12172 group_to_nodes[node.group].append(node.name)
12173 node_to_group[node.name] = node.group
12176 all_instances = lu.cfg.GetAllInstancesInfo()
12177 group_to_instances = dict((uuid, []) for uuid in self.wanted)
12179 for instance in all_instances.values():
12180 node = instance.primary_node
12181 if node in node_to_group:
12182 group_to_instances[node_to_group[node]].append(instance.name)
12185 # Do not pass on node information if it was not requested.
12186 group_to_nodes = None
12188 return query.GroupQueryData([self._all_groups[uuid]
12189 for uuid in self.wanted],
12190 group_to_nodes, group_to_instances)
12193 class LUGroupQuery(NoHooksLU):
12194 """Logical unit for querying node groups.
12199 def CheckArguments(self):
12200 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12201 self.op.output_fields, False)
12203 def ExpandNames(self):
12204 self.gq.ExpandNames(self)
12206 def DeclareLocks(self, level):
12207 self.gq.DeclareLocks(self, level)
12209 def Exec(self, feedback_fn):
12210 return self.gq.OldStyleQuery(self)
12213 class LUGroupSetParams(LogicalUnit):
12214 """Modifies the parameters of a node group.
12217 HPATH = "group-modify"
12218 HTYPE = constants.HTYPE_GROUP
12221 def CheckArguments(self):
12224 self.op.alloc_policy,
12227 if all_changes.count(None) == len(all_changes):
12228 raise errors.OpPrereqError("Please pass at least one modification",
12229 errors.ECODE_INVAL)
12231 def ExpandNames(self):
12232 # This raises errors.OpPrereqError on its own:
12233 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12235 self.needed_locks = {
12236 locking.LEVEL_NODEGROUP: [self.group_uuid],
12239 def CheckPrereq(self):
12240 """Check prerequisites.
12243 self.group = self.cfg.GetNodeGroup(self.group_uuid)
12245 if self.group is None:
12246 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12247 (self.op.group_name, self.group_uuid))
12249 if self.op.ndparams:
12250 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12251 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12252 self.new_ndparams = new_ndparams
12254 def BuildHooksEnv(self):
12255 """Build hooks env.
12259 "GROUP_NAME": self.op.group_name,
12260 "NEW_ALLOC_POLICY": self.op.alloc_policy,
12263 def BuildHooksNodes(self):
12264 """Build hooks nodes.
12267 mn = self.cfg.GetMasterNode()
12268 return ([mn], [mn])
12270 def Exec(self, feedback_fn):
12271 """Modifies the node group.
12276 if self.op.ndparams:
12277 self.group.ndparams = self.new_ndparams
12278 result.append(("ndparams", str(self.group.ndparams)))
12280 if self.op.alloc_policy:
12281 self.group.alloc_policy = self.op.alloc_policy
12283 self.cfg.Update(self.group, feedback_fn)
12287 class LUGroupRemove(LogicalUnit):
12288 HPATH = "group-remove"
12289 HTYPE = constants.HTYPE_GROUP
12292 def ExpandNames(self):
12293 # This will raises errors.OpPrereqError on its own:
12294 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12295 self.needed_locks = {
12296 locking.LEVEL_NODEGROUP: [self.group_uuid],
12299 def CheckPrereq(self):
12300 """Check prerequisites.
12302 This checks that the given group name exists as a node group, that is
12303 empty (i.e., contains no nodes), and that is not the last group of the
12307 # Verify that the group is empty.
12308 group_nodes = [node.name
12309 for node in self.cfg.GetAllNodesInfo().values()
12310 if node.group == self.group_uuid]
12313 raise errors.OpPrereqError("Group '%s' not empty, has the following"
12315 (self.op.group_name,
12316 utils.CommaJoin(utils.NiceSort(group_nodes))),
12317 errors.ECODE_STATE)
12319 # Verify the cluster would not be left group-less.
12320 if len(self.cfg.GetNodeGroupList()) == 1:
12321 raise errors.OpPrereqError("Group '%s' is the only group,"
12322 " cannot be removed" %
12323 self.op.group_name,
12324 errors.ECODE_STATE)
12326 def BuildHooksEnv(self):
12327 """Build hooks env.
12331 "GROUP_NAME": self.op.group_name,
12334 def BuildHooksNodes(self):
12335 """Build hooks nodes.
12338 mn = self.cfg.GetMasterNode()
12339 return ([mn], [mn])
12341 def Exec(self, feedback_fn):
12342 """Remove the node group.
12346 self.cfg.RemoveNodeGroup(self.group_uuid)
12347 except errors.ConfigurationError:
12348 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12349 (self.op.group_name, self.group_uuid))
12351 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12354 class LUGroupRename(LogicalUnit):
12355 HPATH = "group-rename"
12356 HTYPE = constants.HTYPE_GROUP
12359 def ExpandNames(self):
12360 # This raises errors.OpPrereqError on its own:
12361 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12363 self.needed_locks = {
12364 locking.LEVEL_NODEGROUP: [self.group_uuid],
12367 def CheckPrereq(self):
12368 """Check prerequisites.
12370 Ensures requested new name is not yet used.
12374 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12375 except errors.OpPrereqError:
12378 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12379 " node group (UUID: %s)" %
12380 (self.op.new_name, new_name_uuid),
12381 errors.ECODE_EXISTS)
12383 def BuildHooksEnv(self):
12384 """Build hooks env.
12388 "OLD_NAME": self.op.group_name,
12389 "NEW_NAME": self.op.new_name,
12392 def BuildHooksNodes(self):
12393 """Build hooks nodes.
12396 mn = self.cfg.GetMasterNode()
12398 all_nodes = self.cfg.GetAllNodesInfo()
12399 all_nodes.pop(mn, None)
12402 run_nodes.extend(node.name for node in all_nodes.values()
12403 if node.group == self.group_uuid)
12405 return (run_nodes, run_nodes)
12407 def Exec(self, feedback_fn):
12408 """Rename the node group.
12411 group = self.cfg.GetNodeGroup(self.group_uuid)
12414 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12415 (self.op.group_name, self.group_uuid))
12417 group.name = self.op.new_name
12418 self.cfg.Update(group, feedback_fn)
12420 return self.op.new_name
12423 class LUGroupEvacuate(LogicalUnit):
12424 HPATH = "group-evacuate"
12425 HTYPE = constants.HTYPE_GROUP
12428 def ExpandNames(self):
12429 # This raises errors.OpPrereqError on its own:
12430 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12432 if self.op.target_groups:
12433 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12434 self.op.target_groups)
12436 self.req_target_uuids = []
12438 if self.group_uuid in self.req_target_uuids:
12439 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12440 " as a target group (targets are %s)" %
12442 utils.CommaJoin(self.req_target_uuids)),
12443 errors.ECODE_INVAL)
12445 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12447 self.share_locks = _ShareAll()
12448 self.needed_locks = {
12449 locking.LEVEL_INSTANCE: [],
12450 locking.LEVEL_NODEGROUP: [],
12451 locking.LEVEL_NODE: [],
12454 def DeclareLocks(self, level):
12455 if level == locking.LEVEL_INSTANCE:
12456 assert not self.needed_locks[locking.LEVEL_INSTANCE]
12458 # Lock instances optimistically, needs verification once node and group
12459 # locks have been acquired
12460 self.needed_locks[locking.LEVEL_INSTANCE] = \
12461 self.cfg.GetNodeGroupInstances(self.group_uuid)
12463 elif level == locking.LEVEL_NODEGROUP:
12464 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12466 if self.req_target_uuids:
12467 lock_groups = set([self.group_uuid] + self.req_target_uuids)
12469 # Lock all groups used by instances optimistically; this requires going
12470 # via the node before it's locked, requiring verification later on
12471 lock_groups.update(group_uuid
12472 for instance_name in
12473 self.owned_locks(locking.LEVEL_INSTANCE)
12475 self.cfg.GetInstanceNodeGroups(instance_name))
12477 # No target groups, need to lock all of them
12478 lock_groups = locking.ALL_SET
12480 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12482 elif level == locking.LEVEL_NODE:
12483 # This will only lock the nodes in the group to be evacuated which
12484 # contain actual instances
12485 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12486 self._LockInstancesNodes()
12488 # Lock all nodes in group to be evacuated and target groups
12489 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12490 assert self.group_uuid in owned_groups
12491 member_nodes = [node_name
12492 for group in owned_groups
12493 for node_name in self.cfg.GetNodeGroup(group).members]
12494 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12496 def CheckPrereq(self):
12497 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12498 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12499 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12501 assert owned_groups.issuperset(self.req_target_uuids)
12502 assert self.group_uuid in owned_groups
12504 # Check if locked instances are still correct
12505 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12507 # Get instance information
12508 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12510 # Check if node groups for locked instances are still correct
12511 for instance_name in owned_instances:
12512 inst = self.instances[instance_name]
12513 assert owned_nodes.issuperset(inst.all_nodes), \
12514 "Instance %s's nodes changed while we kept the lock" % instance_name
12516 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12519 assert self.group_uuid in inst_groups, \
12520 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12522 if self.req_target_uuids:
12523 # User requested specific target groups
12524 self.target_uuids = self.req_target_uuids
12526 # All groups except the one to be evacuated are potential targets
12527 self.target_uuids = [group_uuid for group_uuid in owned_groups
12528 if group_uuid != self.group_uuid]
12530 if not self.target_uuids:
12531 raise errors.OpPrereqError("There are no possible target groups",
12532 errors.ECODE_INVAL)
12534 def BuildHooksEnv(self):
12535 """Build hooks env.
12539 "GROUP_NAME": self.op.group_name,
12540 "TARGET_GROUPS": " ".join(self.target_uuids),
12543 def BuildHooksNodes(self):
12544 """Build hooks nodes.
12547 mn = self.cfg.GetMasterNode()
12549 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12551 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12553 return (run_nodes, run_nodes)
12555 def Exec(self, feedback_fn):
12556 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12558 assert self.group_uuid not in self.target_uuids
12560 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12561 instances=instances, target_groups=self.target_uuids)
12563 ial.Run(self.op.iallocator)
12565 if not ial.success:
12566 raise errors.OpPrereqError("Can't compute group evacuation using"
12567 " iallocator '%s': %s" %
12568 (self.op.iallocator, ial.info),
12569 errors.ECODE_NORES)
12571 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12573 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12574 len(jobs), self.op.group_name)
12576 return ResultWithJobs(jobs)
12579 class TagsLU(NoHooksLU): # pylint: disable=W0223
12580 """Generic tags LU.
12582 This is an abstract class which is the parent of all the other tags LUs.
12585 def ExpandNames(self):
12586 self.group_uuid = None
12587 self.needed_locks = {}
12588 if self.op.kind == constants.TAG_NODE:
12589 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12590 self.needed_locks[locking.LEVEL_NODE] = self.op.name
12591 elif self.op.kind == constants.TAG_INSTANCE:
12592 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12593 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12594 elif self.op.kind == constants.TAG_NODEGROUP:
12595 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12597 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12598 # not possible to acquire the BGL based on opcode parameters)
12600 def CheckPrereq(self):
12601 """Check prerequisites.
12604 if self.op.kind == constants.TAG_CLUSTER:
12605 self.target = self.cfg.GetClusterInfo()
12606 elif self.op.kind == constants.TAG_NODE:
12607 self.target = self.cfg.GetNodeInfo(self.op.name)
12608 elif self.op.kind == constants.TAG_INSTANCE:
12609 self.target = self.cfg.GetInstanceInfo(self.op.name)
12610 elif self.op.kind == constants.TAG_NODEGROUP:
12611 self.target = self.cfg.GetNodeGroup(self.group_uuid)
12613 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12614 str(self.op.kind), errors.ECODE_INVAL)
12617 class LUTagsGet(TagsLU):
12618 """Returns the tags of a given object.
12623 def ExpandNames(self):
12624 TagsLU.ExpandNames(self)
12626 # Share locks as this is only a read operation
12627 self.share_locks = _ShareAll()
12629 def Exec(self, feedback_fn):
12630 """Returns the tag list.
12633 return list(self.target.GetTags())
12636 class LUTagsSearch(NoHooksLU):
12637 """Searches the tags for a given pattern.
12642 def ExpandNames(self):
12643 self.needed_locks = {}
12645 def CheckPrereq(self):
12646 """Check prerequisites.
12648 This checks the pattern passed for validity by compiling it.
12652 self.re = re.compile(self.op.pattern)
12653 except re.error, err:
12654 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12655 (self.op.pattern, err), errors.ECODE_INVAL)
12657 def Exec(self, feedback_fn):
12658 """Returns the tag list.
12662 tgts = [("/cluster", cfg.GetClusterInfo())]
12663 ilist = cfg.GetAllInstancesInfo().values()
12664 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12665 nlist = cfg.GetAllNodesInfo().values()
12666 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12667 tgts.extend(("/nodegroup/%s" % n.name, n)
12668 for n in cfg.GetAllNodeGroupsInfo().values())
12670 for path, target in tgts:
12671 for tag in target.GetTags():
12672 if self.re.search(tag):
12673 results.append((path, tag))
12677 class LUTagsSet(TagsLU):
12678 """Sets a tag on a given object.
12683 def CheckPrereq(self):
12684 """Check prerequisites.
12686 This checks the type and length of the tag name and value.
12689 TagsLU.CheckPrereq(self)
12690 for tag in self.op.tags:
12691 objects.TaggableObject.ValidateTag(tag)
12693 def Exec(self, feedback_fn):
12698 for tag in self.op.tags:
12699 self.target.AddTag(tag)
12700 except errors.TagError, err:
12701 raise errors.OpExecError("Error while setting tag: %s" % str(err))
12702 self.cfg.Update(self.target, feedback_fn)
12705 class LUTagsDel(TagsLU):
12706 """Delete a list of tags from a given object.
12711 def CheckPrereq(self):
12712 """Check prerequisites.
12714 This checks that we have the given tag.
12717 TagsLU.CheckPrereq(self)
12718 for tag in self.op.tags:
12719 objects.TaggableObject.ValidateTag(tag)
12720 del_tags = frozenset(self.op.tags)
12721 cur_tags = self.target.GetTags()
12723 diff_tags = del_tags - cur_tags
12725 diff_names = ("'%s'" % i for i in sorted(diff_tags))
12726 raise errors.OpPrereqError("Tag(s) %s not found" %
12727 (utils.CommaJoin(diff_names), ),
12728 errors.ECODE_NOENT)
12730 def Exec(self, feedback_fn):
12731 """Remove the tag from the object.
12734 for tag in self.op.tags:
12735 self.target.RemoveTag(tag)
12736 self.cfg.Update(self.target, feedback_fn)
12739 class LUTestDelay(NoHooksLU):
12740 """Sleep for a specified amount of time.
12742 This LU sleeps on the master and/or nodes for a specified amount of
12748 def ExpandNames(self):
12749 """Expand names and set required locks.
12751 This expands the node list, if any.
12754 self.needed_locks = {}
12755 if self.op.on_nodes:
12756 # _GetWantedNodes can be used here, but is not always appropriate to use
12757 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12758 # more information.
12759 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12760 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12762 def _TestDelay(self):
12763 """Do the actual sleep.
12766 if self.op.on_master:
12767 if not utils.TestDelay(self.op.duration):
12768 raise errors.OpExecError("Error during master delay test")
12769 if self.op.on_nodes:
12770 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12771 for node, node_result in result.items():
12772 node_result.Raise("Failure during rpc call to node %s" % node)
12774 def Exec(self, feedback_fn):
12775 """Execute the test delay opcode, with the wanted repetitions.
12778 if self.op.repeat == 0:
12781 top_value = self.op.repeat - 1
12782 for i in range(self.op.repeat):
12783 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12787 class LUTestJqueue(NoHooksLU):
12788 """Utility LU to test some aspects of the job queue.
12793 # Must be lower than default timeout for WaitForJobChange to see whether it
12794 # notices changed jobs
12795 _CLIENT_CONNECT_TIMEOUT = 20.0
12796 _CLIENT_CONFIRM_TIMEOUT = 60.0
12799 def _NotifyUsingSocket(cls, cb, errcls):
12800 """Opens a Unix socket and waits for another program to connect.
12803 @param cb: Callback to send socket name to client
12804 @type errcls: class
12805 @param errcls: Exception class to use for errors
12808 # Using a temporary directory as there's no easy way to create temporary
12809 # sockets without writing a custom loop around tempfile.mktemp and
12811 tmpdir = tempfile.mkdtemp()
12813 tmpsock = utils.PathJoin(tmpdir, "sock")
12815 logging.debug("Creating temporary socket at %s", tmpsock)
12816 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12821 # Send details to client
12824 # Wait for client to connect before continuing
12825 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12827 (conn, _) = sock.accept()
12828 except socket.error, err:
12829 raise errcls("Client didn't connect in time (%s)" % err)
12833 # Remove as soon as client is connected
12834 shutil.rmtree(tmpdir)
12836 # Wait for client to close
12839 # pylint: disable=E1101
12840 # Instance of '_socketobject' has no ... member
12841 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12843 except socket.error, err:
12844 raise errcls("Client failed to confirm notification (%s)" % err)
12848 def _SendNotification(self, test, arg, sockname):
12849 """Sends a notification to the client.
12852 @param test: Test name
12853 @param arg: Test argument (depends on test)
12854 @type sockname: string
12855 @param sockname: Socket path
12858 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12860 def _Notify(self, prereq, test, arg):
12861 """Notifies the client of a test.
12864 @param prereq: Whether this is a prereq-phase test
12866 @param test: Test name
12867 @param arg: Test argument (depends on test)
12871 errcls = errors.OpPrereqError
12873 errcls = errors.OpExecError
12875 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12879 def CheckArguments(self):
12880 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12881 self.expandnames_calls = 0
12883 def ExpandNames(self):
12884 checkargs_calls = getattr(self, "checkargs_calls", 0)
12885 if checkargs_calls < 1:
12886 raise errors.ProgrammerError("CheckArguments was not called")
12888 self.expandnames_calls += 1
12890 if self.op.notify_waitlock:
12891 self._Notify(True, constants.JQT_EXPANDNAMES, None)
12893 self.LogInfo("Expanding names")
12895 # Get lock on master node (just to get a lock, not for a particular reason)
12896 self.needed_locks = {
12897 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12900 def Exec(self, feedback_fn):
12901 if self.expandnames_calls < 1:
12902 raise errors.ProgrammerError("ExpandNames was not called")
12904 if self.op.notify_exec:
12905 self._Notify(False, constants.JQT_EXEC, None)
12907 self.LogInfo("Executing")
12909 if self.op.log_messages:
12910 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12911 for idx, msg in enumerate(self.op.log_messages):
12912 self.LogInfo("Sending log message %s", idx + 1)
12913 feedback_fn(constants.JQT_MSGPREFIX + msg)
12914 # Report how many test messages have been sent
12915 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12918 raise errors.OpExecError("Opcode failure was requested")
12923 class IAllocator(object):
12924 """IAllocator framework.
12926 An IAllocator instance has three sets of attributes:
12927 - cfg that is needed to query the cluster
12928 - input data (all members of the _KEYS class attribute are required)
12929 - four buffer attributes (in|out_data|text), that represent the
12930 input (to the external script) in text and data structure format,
12931 and the output from it, again in two formats
12932 - the result variables from the script (success, info, nodes) for
12936 # pylint: disable=R0902
12937 # lots of instance attributes
12939 def __init__(self, cfg, rpc, mode, **kwargs):
12942 # init buffer variables
12943 self.in_text = self.out_text = self.in_data = self.out_data = None
12944 # init all input fields so that pylint is happy
12946 self.memory = self.disks = self.disk_template = None
12947 self.os = self.tags = self.nics = self.vcpus = None
12948 self.hypervisor = None
12949 self.relocate_from = None
12951 self.instances = None
12952 self.evac_mode = None
12953 self.target_groups = []
12955 self.required_nodes = None
12956 # init result fields
12957 self.success = self.info = self.result = None
12960 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12962 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12963 " IAllocator" % self.mode)
12965 keyset = [n for (n, _) in keydata]
12968 if key not in keyset:
12969 raise errors.ProgrammerError("Invalid input parameter '%s' to"
12970 " IAllocator" % key)
12971 setattr(self, key, kwargs[key])
12974 if key not in kwargs:
12975 raise errors.ProgrammerError("Missing input parameter '%s' to"
12976 " IAllocator" % key)
12977 self._BuildInputData(compat.partial(fn, self), keydata)
12979 def _ComputeClusterData(self):
12980 """Compute the generic allocator input data.
12982 This is the data that is independent of the actual operation.
12986 cluster_info = cfg.GetClusterInfo()
12989 "version": constants.IALLOCATOR_VERSION,
12990 "cluster_name": cfg.GetClusterName(),
12991 "cluster_tags": list(cluster_info.GetTags()),
12992 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12993 # we don't have job IDs
12995 ninfo = cfg.GetAllNodesInfo()
12996 iinfo = cfg.GetAllInstancesInfo().values()
12997 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13000 node_list = [n.name for n in ninfo.values() if n.vm_capable]
13002 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13003 hypervisor_name = self.hypervisor
13004 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13005 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13007 hypervisor_name = cluster_info.enabled_hypervisors[0]
13009 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
13012 self.rpc.call_all_instances_info(node_list,
13013 cluster_info.enabled_hypervisors)
13015 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13017 config_ndata = self._ComputeBasicNodeData(ninfo)
13018 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13019 i_list, config_ndata)
13020 assert len(data["nodes"]) == len(ninfo), \
13021 "Incomplete node data computed"
13023 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13025 self.in_data = data
13028 def _ComputeNodeGroupData(cfg):
13029 """Compute node groups data.
13032 ng = dict((guuid, {
13033 "name": gdata.name,
13034 "alloc_policy": gdata.alloc_policy,
13036 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13041 def _ComputeBasicNodeData(node_cfg):
13042 """Compute global node data.
13045 @returns: a dict of name: (node dict, node config)
13048 # fill in static (config-based) values
13049 node_results = dict((ninfo.name, {
13050 "tags": list(ninfo.GetTags()),
13051 "primary_ip": ninfo.primary_ip,
13052 "secondary_ip": ninfo.secondary_ip,
13053 "offline": ninfo.offline,
13054 "drained": ninfo.drained,
13055 "master_candidate": ninfo.master_candidate,
13056 "group": ninfo.group,
13057 "master_capable": ninfo.master_capable,
13058 "vm_capable": ninfo.vm_capable,
13060 for ninfo in node_cfg.values())
13062 return node_results
13065 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13067 """Compute global node data.
13069 @param node_results: the basic node structures as filled from the config
13072 # make a copy of the current dict
13073 node_results = dict(node_results)
13074 for nname, nresult in node_data.items():
13075 assert nname in node_results, "Missing basic data for node %s" % nname
13076 ninfo = node_cfg[nname]
13078 if not (ninfo.offline or ninfo.drained):
13079 nresult.Raise("Can't get data for node %s" % nname)
13080 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13082 remote_info = nresult.payload
13084 for attr in ["memory_total", "memory_free", "memory_dom0",
13085 "vg_size", "vg_free", "cpu_total"]:
13086 if attr not in remote_info:
13087 raise errors.OpExecError("Node '%s' didn't return attribute"
13088 " '%s'" % (nname, attr))
13089 if not isinstance(remote_info[attr], int):
13090 raise errors.OpExecError("Node '%s' returned invalid value"
13092 (nname, attr, remote_info[attr]))
13093 # compute memory used by primary instances
13094 i_p_mem = i_p_up_mem = 0
13095 for iinfo, beinfo in i_list:
13096 if iinfo.primary_node == nname:
13097 i_p_mem += beinfo[constants.BE_MEMORY]
13098 if iinfo.name not in node_iinfo[nname].payload:
13101 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13102 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13103 remote_info["memory_free"] -= max(0, i_mem_diff)
13106 i_p_up_mem += beinfo[constants.BE_MEMORY]
13108 # compute memory used by instances
13110 "total_memory": remote_info["memory_total"],
13111 "reserved_memory": remote_info["memory_dom0"],
13112 "free_memory": remote_info["memory_free"],
13113 "total_disk": remote_info["vg_size"],
13114 "free_disk": remote_info["vg_free"],
13115 "total_cpus": remote_info["cpu_total"],
13116 "i_pri_memory": i_p_mem,
13117 "i_pri_up_memory": i_p_up_mem,
13119 pnr_dyn.update(node_results[nname])
13120 node_results[nname] = pnr_dyn
13122 return node_results
13125 def _ComputeInstanceData(cluster_info, i_list):
13126 """Compute global instance data.
13130 for iinfo, beinfo in i_list:
13132 for nic in iinfo.nics:
13133 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13137 "mode": filled_params[constants.NIC_MODE],
13138 "link": filled_params[constants.NIC_LINK],
13140 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13141 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13142 nic_data.append(nic_dict)
13144 "tags": list(iinfo.GetTags()),
13145 "admin_up": iinfo.admin_up,
13146 "vcpus": beinfo[constants.BE_VCPUS],
13147 "memory": beinfo[constants.BE_MEMORY],
13149 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13151 "disks": [{constants.IDISK_SIZE: dsk.size,
13152 constants.IDISK_MODE: dsk.mode}
13153 for dsk in iinfo.disks],
13154 "disk_template": iinfo.disk_template,
13155 "hypervisor": iinfo.hypervisor,
13157 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13159 instance_data[iinfo.name] = pir
13161 return instance_data
13163 def _AddNewInstance(self):
13164 """Add new instance data to allocator structure.
13166 This in combination with _AllocatorGetClusterData will create the
13167 correct structure needed as input for the allocator.
13169 The checks for the completeness of the opcode must have already been
13173 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13175 if self.disk_template in constants.DTS_INT_MIRROR:
13176 self.required_nodes = 2
13178 self.required_nodes = 1
13182 "disk_template": self.disk_template,
13185 "vcpus": self.vcpus,
13186 "memory": self.memory,
13187 "disks": self.disks,
13188 "disk_space_total": disk_space,
13190 "required_nodes": self.required_nodes,
13191 "hypervisor": self.hypervisor,
13196 def _AddRelocateInstance(self):
13197 """Add relocate instance data to allocator structure.
13199 This in combination with _IAllocatorGetClusterData will create the
13200 correct structure needed as input for the allocator.
13202 The checks for the completeness of the opcode must have already been
13206 instance = self.cfg.GetInstanceInfo(self.name)
13207 if instance is None:
13208 raise errors.ProgrammerError("Unknown instance '%s' passed to"
13209 " IAllocator" % self.name)
13211 if instance.disk_template not in constants.DTS_MIRRORED:
13212 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13213 errors.ECODE_INVAL)
13215 if instance.disk_template in constants.DTS_INT_MIRROR and \
13216 len(instance.secondary_nodes) != 1:
13217 raise errors.OpPrereqError("Instance has not exactly one secondary node",
13218 errors.ECODE_STATE)
13220 self.required_nodes = 1
13221 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13222 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13226 "disk_space_total": disk_space,
13227 "required_nodes": self.required_nodes,
13228 "relocate_from": self.relocate_from,
13232 def _AddNodeEvacuate(self):
13233 """Get data for node-evacuate requests.
13237 "instances": self.instances,
13238 "evac_mode": self.evac_mode,
13241 def _AddChangeGroup(self):
13242 """Get data for node-evacuate requests.
13246 "instances": self.instances,
13247 "target_groups": self.target_groups,
13250 def _BuildInputData(self, fn, keydata):
13251 """Build input data structures.
13254 self._ComputeClusterData()
13257 request["type"] = self.mode
13258 for keyname, keytype in keydata:
13259 if keyname not in request:
13260 raise errors.ProgrammerError("Request parameter %s is missing" %
13262 val = request[keyname]
13263 if not keytype(val):
13264 raise errors.ProgrammerError("Request parameter %s doesn't pass"
13265 " validation, value %s, expected"
13266 " type %s" % (keyname, val, keytype))
13267 self.in_data["request"] = request
13269 self.in_text = serializer.Dump(self.in_data)
13271 _STRING_LIST = ht.TListOf(ht.TString)
13272 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13273 # pylint: disable=E1101
13274 # Class '...' has no 'OP_ID' member
13275 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13276 opcodes.OpInstanceMigrate.OP_ID,
13277 opcodes.OpInstanceReplaceDisks.OP_ID])
13281 ht.TListOf(ht.TAnd(ht.TIsLength(3),
13282 ht.TItems([ht.TNonEmptyString,
13283 ht.TNonEmptyString,
13284 ht.TListOf(ht.TNonEmptyString),
13287 ht.TListOf(ht.TAnd(ht.TIsLength(2),
13288 ht.TItems([ht.TNonEmptyString,
13291 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13292 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13295 constants.IALLOCATOR_MODE_ALLOC:
13298 ("name", ht.TString),
13299 ("memory", ht.TInt),
13300 ("disks", ht.TListOf(ht.TDict)),
13301 ("disk_template", ht.TString),
13302 ("os", ht.TString),
13303 ("tags", _STRING_LIST),
13304 ("nics", ht.TListOf(ht.TDict)),
13305 ("vcpus", ht.TInt),
13306 ("hypervisor", ht.TString),
13308 constants.IALLOCATOR_MODE_RELOC:
13309 (_AddRelocateInstance,
13310 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13312 constants.IALLOCATOR_MODE_NODE_EVAC:
13313 (_AddNodeEvacuate, [
13314 ("instances", _STRING_LIST),
13315 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13317 constants.IALLOCATOR_MODE_CHG_GROUP:
13318 (_AddChangeGroup, [
13319 ("instances", _STRING_LIST),
13320 ("target_groups", _STRING_LIST),
13324 def Run(self, name, validate=True, call_fn=None):
13325 """Run an instance allocator and return the results.
13328 if call_fn is None:
13329 call_fn = self.rpc.call_iallocator_runner
13331 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13332 result.Raise("Failure while running the iallocator script")
13334 self.out_text = result.payload
13336 self._ValidateResult()
13338 def _ValidateResult(self):
13339 """Process the allocator results.
13341 This will process and if successful save the result in
13342 self.out_data and the other parameters.
13346 rdict = serializer.Load(self.out_text)
13347 except Exception, err:
13348 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13350 if not isinstance(rdict, dict):
13351 raise errors.OpExecError("Can't parse iallocator results: not a dict")
13353 # TODO: remove backwards compatiblity in later versions
13354 if "nodes" in rdict and "result" not in rdict:
13355 rdict["result"] = rdict["nodes"]
13358 for key in "success", "info", "result":
13359 if key not in rdict:
13360 raise errors.OpExecError("Can't parse iallocator results:"
13361 " missing key '%s'" % key)
13362 setattr(self, key, rdict[key])
13364 if not self._result_check(self.result):
13365 raise errors.OpExecError("Iallocator returned invalid result,"
13366 " expected %s, got %s" %
13367 (self._result_check, self.result),
13368 errors.ECODE_INVAL)
13370 if self.mode == constants.IALLOCATOR_MODE_RELOC:
13371 assert self.relocate_from is not None
13372 assert self.required_nodes == 1
13374 node2group = dict((name, ndata["group"])
13375 for (name, ndata) in self.in_data["nodes"].items())
13377 fn = compat.partial(self._NodesToGroups, node2group,
13378 self.in_data["nodegroups"])
13380 instance = self.cfg.GetInstanceInfo(self.name)
13381 request_groups = fn(self.relocate_from + [instance.primary_node])
13382 result_groups = fn(rdict["result"] + [instance.primary_node])
13384 if self.success and not set(result_groups).issubset(request_groups):
13385 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13386 " differ from original groups (%s)" %
13387 (utils.CommaJoin(result_groups),
13388 utils.CommaJoin(request_groups)))
13390 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13391 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13393 self.out_data = rdict
13396 def _NodesToGroups(node2group, groups, nodes):
13397 """Returns a list of unique group names for a list of nodes.
13399 @type node2group: dict
13400 @param node2group: Map from node name to group UUID
13402 @param groups: Group information
13404 @param nodes: Node names
13411 group_uuid = node2group[node]
13413 # Ignore unknown node
13417 group = groups[group_uuid]
13419 # Can't find group, let's use UUID
13420 group_name = group_uuid
13422 group_name = group["name"]
13424 result.add(group_name)
13426 return sorted(result)
13429 class LUTestAllocator(NoHooksLU):
13430 """Run allocator tests.
13432 This LU runs the allocator tests
13435 def CheckPrereq(self):
13436 """Check prerequisites.
13438 This checks the opcode parameters depending on the director and mode test.
13441 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13442 for attr in ["memory", "disks", "disk_template",
13443 "os", "tags", "nics", "vcpus"]:
13444 if not hasattr(self.op, attr):
13445 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13446 attr, errors.ECODE_INVAL)
13447 iname = self.cfg.ExpandInstanceName(self.op.name)
13448 if iname is not None:
13449 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13450 iname, errors.ECODE_EXISTS)
13451 if not isinstance(self.op.nics, list):
13452 raise errors.OpPrereqError("Invalid parameter 'nics'",
13453 errors.ECODE_INVAL)
13454 if not isinstance(self.op.disks, list):
13455 raise errors.OpPrereqError("Invalid parameter 'disks'",
13456 errors.ECODE_INVAL)
13457 for row in self.op.disks:
13458 if (not isinstance(row, dict) or
13459 constants.IDISK_SIZE not in row or
13460 not isinstance(row[constants.IDISK_SIZE], int) or
13461 constants.IDISK_MODE not in row or
13462 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13463 raise errors.OpPrereqError("Invalid contents of the 'disks'"
13464 " parameter", errors.ECODE_INVAL)
13465 if self.op.hypervisor is None:
13466 self.op.hypervisor = self.cfg.GetHypervisorType()
13467 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13468 fname = _ExpandInstanceName(self.cfg, self.op.name)
13469 self.op.name = fname
13470 self.relocate_from = \
13471 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13472 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13473 constants.IALLOCATOR_MODE_NODE_EVAC):
13474 if not self.op.instances:
13475 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13476 self.op.instances = _GetWantedInstances(self, self.op.instances)
13478 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13479 self.op.mode, errors.ECODE_INVAL)
13481 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13482 if self.op.allocator is None:
13483 raise errors.OpPrereqError("Missing allocator name",
13484 errors.ECODE_INVAL)
13485 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13486 raise errors.OpPrereqError("Wrong allocator test '%s'" %
13487 self.op.direction, errors.ECODE_INVAL)
13489 def Exec(self, feedback_fn):
13490 """Run the allocator test.
13493 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13494 ial = IAllocator(self.cfg, self.rpc,
13497 memory=self.op.memory,
13498 disks=self.op.disks,
13499 disk_template=self.op.disk_template,
13503 vcpus=self.op.vcpus,
13504 hypervisor=self.op.hypervisor,
13506 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13507 ial = IAllocator(self.cfg, self.rpc,
13510 relocate_from=list(self.relocate_from),
13512 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13513 ial = IAllocator(self.cfg, self.rpc,
13515 instances=self.op.instances,
13516 target_groups=self.op.target_groups)
13517 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13518 ial = IAllocator(self.cfg, self.rpc,
13520 instances=self.op.instances,
13521 evac_mode=self.op.evac_mode)
13523 raise errors.ProgrammerError("Uncatched mode %s in"
13524 " LUTestAllocator.Exec", self.op.mode)
13526 if self.op.direction == constants.IALLOCATOR_DIR_IN:
13527 result = ial.in_text
13529 ial.Run(self.op.allocator, validate=False)
13530 result = ial.out_text
13534 #: Query type implementations
13536 constants.QR_INSTANCE: _InstanceQuery,
13537 constants.QR_NODE: _NodeQuery,
13538 constants.QR_GROUP: _GroupQuery,
13539 constants.QR_OS: _OsQuery,
13542 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13545 def _GetQueryImplementation(name):
13546 """Returns the implemtnation for a query type.
13548 @param name: Query type, must be one of L{constants.QR_VIA_OP}
13552 return _QUERY_IMPL[name]
13554 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13555 errors.ECODE_INVAL)