4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
63 import ganeti.masterd.instance # pylint: disable=W0611
67 """Data container for LU results with jobs.
69 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
70 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
71 contained in the C{jobs} attribute and include the job IDs in the opcode
75 def __init__(self, jobs, **kwargs):
76 """Initializes this class.
78 Additional return values can be specified as keyword arguments.
80 @type jobs: list of lists of L{opcode.OpCode}
81 @param jobs: A list of lists of opcode objects
88 class LogicalUnit(object):
89 """Logical Unit base class.
91 Subclasses must follow these rules:
92 - implement ExpandNames
93 - implement CheckPrereq (except when tasklets are used)
94 - implement Exec (except when tasklets are used)
95 - implement BuildHooksEnv
96 - implement BuildHooksNodes
97 - redefine HPATH and HTYPE
98 - optionally redefine their run requirements:
99 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
101 Note that all commands require root permissions.
103 @ivar dry_run_result: the value (if any) that will be returned to the caller
104 in dry-run mode (signalled by opcode dry_run parameter)
111 def __init__(self, processor, op, context, rpc):
112 """Constructor for LogicalUnit.
114 This needs to be overridden in derived classes in order to check op
118 self.proc = processor
120 self.cfg = context.cfg
121 self.glm = context.glm
123 self.owned_locks = context.glm.list_owned
124 self.context = context
126 # Dicts used to declare locking needs to mcpu
127 self.needed_locks = None
128 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
130 self.remove_locks = {}
131 # Used to force good behavior when calling helper functions
132 self.recalculate_locks = {}
134 self.Log = processor.Log # pylint: disable=C0103
135 self.LogWarning = processor.LogWarning # pylint: disable=C0103
136 self.LogInfo = processor.LogInfo # pylint: disable=C0103
137 self.LogStep = processor.LogStep # pylint: disable=C0103
138 # support for dry-run
139 self.dry_run_result = None
140 # support for generic debug attribute
141 if (not hasattr(self.op, "debug_level") or
142 not isinstance(self.op.debug_level, int)):
143 self.op.debug_level = 0
148 # Validate opcode parameters and set defaults
149 self.op.Validate(True)
151 self.CheckArguments()
153 def CheckArguments(self):
154 """Check syntactic validity for the opcode arguments.
156 This method is for doing a simple syntactic check and ensure
157 validity of opcode parameters, without any cluster-related
158 checks. While the same can be accomplished in ExpandNames and/or
159 CheckPrereq, doing these separate is better because:
161 - ExpandNames is left as as purely a lock-related function
162 - CheckPrereq is run after we have acquired locks (and possible
165 The function is allowed to change the self.op attribute so that
166 later methods can no longer worry about missing parameters.
171 def ExpandNames(self):
172 """Expand names for this LU.
174 This method is called before starting to execute the opcode, and it should
175 update all the parameters of the opcode to their canonical form (e.g. a
176 short node name must be fully expanded after this method has successfully
177 completed). This way locking, hooks, logging, etc. can work correctly.
179 LUs which implement this method must also populate the self.needed_locks
180 member, as a dict with lock levels as keys, and a list of needed lock names
183 - use an empty dict if you don't need any lock
184 - if you don't need any lock at a particular level omit that level
185 - don't put anything for the BGL level
186 - if you want all locks at a level use locking.ALL_SET as a value
188 If you need to share locks (rather than acquire them exclusively) at one
189 level you can modify self.share_locks, setting a true value (usually 1) for
190 that level. By default locks are not shared.
192 This function can also define a list of tasklets, which then will be
193 executed in order instead of the usual LU-level CheckPrereq and Exec
194 functions, if those are not defined by the LU.
198 # Acquire all nodes and one instance
199 self.needed_locks = {
200 locking.LEVEL_NODE: locking.ALL_SET,
201 locking.LEVEL_INSTANCE: ['instance1.example.com'],
203 # Acquire just two nodes
204 self.needed_locks = {
205 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
208 self.needed_locks = {} # No, you can't leave it to the default value None
211 # The implementation of this method is mandatory only if the new LU is
212 # concurrent, so that old LUs don't need to be changed all at the same
215 self.needed_locks = {} # Exclusive LUs don't need locks.
217 raise NotImplementedError
219 def DeclareLocks(self, level):
220 """Declare LU locking needs for a level
222 While most LUs can just declare their locking needs at ExpandNames time,
223 sometimes there's the need to calculate some locks after having acquired
224 the ones before. This function is called just before acquiring locks at a
225 particular level, but after acquiring the ones at lower levels, and permits
226 such calculations. It can be used to modify self.needed_locks, and by
227 default it does nothing.
229 This function is only called if you have something already set in
230 self.needed_locks for the level.
232 @param level: Locking level which is going to be locked
233 @type level: member of ganeti.locking.LEVELS
237 def CheckPrereq(self):
238 """Check prerequisites for this LU.
240 This method should check that the prerequisites for the execution
241 of this LU are fulfilled. It can do internode communication, but
242 it should be idempotent - no cluster or system changes are
245 The method should raise errors.OpPrereqError in case something is
246 not fulfilled. Its return value is ignored.
248 This method should also update all the parameters of the opcode to
249 their canonical form if it hasn't been done by ExpandNames before.
252 if self.tasklets is not None:
253 for (idx, tl) in enumerate(self.tasklets):
254 logging.debug("Checking prerequisites for tasklet %s/%s",
255 idx + 1, len(self.tasklets))
260 def Exec(self, feedback_fn):
263 This method should implement the actual work. It should raise
264 errors.OpExecError for failures that are somewhat dealt with in
268 if self.tasklets is not None:
269 for (idx, tl) in enumerate(self.tasklets):
270 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
273 raise NotImplementedError
275 def BuildHooksEnv(self):
276 """Build hooks environment for this LU.
279 @return: Dictionary containing the environment that will be used for
280 running the hooks for this LU. The keys of the dict must not be prefixed
281 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
282 will extend the environment with additional variables. If no environment
283 should be defined, an empty dictionary should be returned (not C{None}).
284 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
288 raise NotImplementedError
290 def BuildHooksNodes(self):
291 """Build list of nodes to run LU's hooks.
293 @rtype: tuple; (list, list)
294 @return: Tuple containing a list of node names on which the hook
295 should run before the execution and a list of node names on which the
296 hook should run after the execution. No nodes should be returned as an
297 empty list (and not None).
298 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
302 raise NotImplementedError
304 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
305 """Notify the LU about the results of its hooks.
307 This method is called every time a hooks phase is executed, and notifies
308 the Logical Unit about the hooks' result. The LU can then use it to alter
309 its result based on the hooks. By default the method does nothing and the
310 previous result is passed back unchanged but any LU can define it if it
311 wants to use the local cluster hook-scripts somehow.
313 @param phase: one of L{constants.HOOKS_PHASE_POST} or
314 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
315 @param hook_results: the results of the multi-node hooks rpc call
316 @param feedback_fn: function used send feedback back to the caller
317 @param lu_result: the previous Exec result this LU had, or None
319 @return: the new Exec result, based on the previous result
323 # API must be kept, thus we ignore the unused argument and could
324 # be a function warnings
325 # pylint: disable=W0613,R0201
328 def _ExpandAndLockInstance(self):
329 """Helper function to expand and lock an instance.
331 Many LUs that work on an instance take its name in self.op.instance_name
332 and need to expand it and then declare the expanded name for locking. This
333 function does it, and then updates self.op.instance_name to the expanded
334 name. It also initializes needed_locks as a dict, if this hasn't been done
338 if self.needed_locks is None:
339 self.needed_locks = {}
341 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
342 "_ExpandAndLockInstance called with instance-level locks set"
343 self.op.instance_name = _ExpandInstanceName(self.cfg,
344 self.op.instance_name)
345 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
347 def _LockInstancesNodes(self, primary_only=False):
348 """Helper function to declare instances' nodes for locking.
350 This function should be called after locking one or more instances to lock
351 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
352 with all primary or secondary nodes for instances already locked and
353 present in self.needed_locks[locking.LEVEL_INSTANCE].
355 It should be called from DeclareLocks, and for safety only works if
356 self.recalculate_locks[locking.LEVEL_NODE] is set.
358 In the future it may grow parameters to just lock some instance's nodes, or
359 to just lock primaries or secondary nodes, if needed.
361 If should be called in DeclareLocks in a way similar to::
363 if level == locking.LEVEL_NODE:
364 self._LockInstancesNodes()
366 @type primary_only: boolean
367 @param primary_only: only lock primary nodes of locked instances
370 assert locking.LEVEL_NODE in self.recalculate_locks, \
371 "_LockInstancesNodes helper function called with no nodes to recalculate"
373 # TODO: check if we're really been called with the instance locks held
375 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
376 # future we might want to have different behaviors depending on the value
377 # of self.recalculate_locks[locking.LEVEL_NODE]
379 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
380 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
381 wanted_nodes.append(instance.primary_node)
383 wanted_nodes.extend(instance.secondary_nodes)
385 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
386 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
387 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
388 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
390 del self.recalculate_locks[locking.LEVEL_NODE]
393 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
394 """Simple LU which runs no hooks.
396 This LU is intended as a parent for other LogicalUnits which will
397 run no hooks, in order to reduce duplicate code.
403 def BuildHooksEnv(self):
404 """Empty BuildHooksEnv for NoHooksLu.
406 This just raises an error.
409 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
411 def BuildHooksNodes(self):
412 """Empty BuildHooksNodes for NoHooksLU.
415 raise AssertionError("BuildHooksNodes called for NoHooksLU")
419 """Tasklet base class.
421 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
422 they can mix legacy code with tasklets. Locking needs to be done in the LU,
423 tasklets know nothing about locks.
425 Subclasses must follow these rules:
426 - Implement CheckPrereq
430 def __init__(self, lu):
437 def CheckPrereq(self):
438 """Check prerequisites for this tasklets.
440 This method should check whether the prerequisites for the execution of
441 this tasklet are fulfilled. It can do internode communication, but it
442 should be idempotent - no cluster or system changes are allowed.
444 The method should raise errors.OpPrereqError in case something is not
445 fulfilled. Its return value is ignored.
447 This method should also update all parameters to their canonical form if it
448 hasn't been done before.
453 def Exec(self, feedback_fn):
454 """Execute the tasklet.
456 This method should implement the actual work. It should raise
457 errors.OpExecError for failures that are somewhat dealt with in code, or
461 raise NotImplementedError
465 """Base for query utility classes.
468 #: Attribute holding field definitions
471 def __init__(self, filter_, fields, use_locking):
472 """Initializes this class.
475 self.use_locking = use_locking
477 self.query = query.Query(self.FIELDS, fields, filter_=filter_,
479 self.requested_data = self.query.RequestedData()
480 self.names = self.query.RequestedNames()
482 # Sort only if no names were requested
483 self.sort_by_name = not self.names
485 self.do_locking = None
488 def _GetNames(self, lu, all_names, lock_level):
489 """Helper function to determine names asked for in the query.
493 names = lu.owned_locks(lock_level)
497 if self.wanted == locking.ALL_SET:
498 assert not self.names
499 # caller didn't specify names, so ordering is not important
500 return utils.NiceSort(names)
502 # caller specified names and we must keep the same order
504 assert not self.do_locking or lu.glm.is_owned(lock_level)
506 missing = set(self.wanted).difference(names)
508 raise errors.OpExecError("Some items were removed before retrieving"
509 " their data: %s" % missing)
511 # Return expanded names
514 def ExpandNames(self, lu):
515 """Expand names for this query.
517 See L{LogicalUnit.ExpandNames}.
520 raise NotImplementedError()
522 def DeclareLocks(self, lu, level):
523 """Declare locks for this query.
525 See L{LogicalUnit.DeclareLocks}.
528 raise NotImplementedError()
530 def _GetQueryData(self, lu):
531 """Collects all data for this query.
533 @return: Query data object
536 raise NotImplementedError()
538 def NewStyleQuery(self, lu):
539 """Collect data and execute query.
542 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
543 sort_by_name=self.sort_by_name)
545 def OldStyleQuery(self, lu):
546 """Collect data and execute query.
549 return self.query.OldStyleQuery(self._GetQueryData(lu),
550 sort_by_name=self.sort_by_name)
554 """Returns a dict declaring all lock levels shared.
557 return dict.fromkeys(locking.LEVELS, 1)
560 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
561 """Checks if the owned node groups are still correct for an instance.
563 @type cfg: L{config.ConfigWriter}
564 @param cfg: The cluster configuration
565 @type instance_name: string
566 @param instance_name: Instance name
567 @type owned_groups: set or frozenset
568 @param owned_groups: List of currently owned node groups
571 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
573 if not owned_groups.issuperset(inst_groups):
574 raise errors.OpPrereqError("Instance %s's node groups changed since"
575 " locks were acquired, current groups are"
576 " are '%s', owning groups '%s'; retry the"
579 utils.CommaJoin(inst_groups),
580 utils.CommaJoin(owned_groups)),
586 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
587 """Checks if the instances in a node group are still correct.
589 @type cfg: L{config.ConfigWriter}
590 @param cfg: The cluster configuration
591 @type group_uuid: string
592 @param group_uuid: Node group UUID
593 @type owned_instances: set or frozenset
594 @param owned_instances: List of currently owned instances
597 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
598 if owned_instances != wanted_instances:
599 raise errors.OpPrereqError("Instances in node group '%s' changed since"
600 " locks were acquired, wanted '%s', have '%s';"
601 " retry the operation" %
603 utils.CommaJoin(wanted_instances),
604 utils.CommaJoin(owned_instances)),
607 return wanted_instances
610 def _SupportsOob(cfg, node):
611 """Tells if node supports OOB.
613 @type cfg: L{config.ConfigWriter}
614 @param cfg: The cluster configuration
615 @type node: L{objects.Node}
616 @param node: The node
617 @return: The OOB script if supported or an empty string otherwise
620 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
623 def _GetWantedNodes(lu, nodes):
624 """Returns list of checked and expanded node names.
626 @type lu: L{LogicalUnit}
627 @param lu: the logical unit on whose behalf we execute
629 @param nodes: list of node names or None for all nodes
631 @return: the list of nodes, sorted
632 @raise errors.ProgrammerError: if the nodes parameter is wrong type
636 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
638 return utils.NiceSort(lu.cfg.GetNodeList())
641 def _GetWantedInstances(lu, instances):
642 """Returns list of checked and expanded instance names.
644 @type lu: L{LogicalUnit}
645 @param lu: the logical unit on whose behalf we execute
646 @type instances: list
647 @param instances: list of instance names or None for all instances
649 @return: the list of instances, sorted
650 @raise errors.OpPrereqError: if the instances parameter is wrong type
651 @raise errors.OpPrereqError: if any of the passed instances is not found
655 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
657 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
661 def _GetUpdatedParams(old_params, update_dict,
662 use_default=True, use_none=False):
663 """Return the new version of a parameter dictionary.
665 @type old_params: dict
666 @param old_params: old parameters
667 @type update_dict: dict
668 @param update_dict: dict containing new parameter values, or
669 constants.VALUE_DEFAULT to reset the parameter to its default
671 @param use_default: boolean
672 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
673 values as 'to be deleted' values
674 @param use_none: boolean
675 @type use_none: whether to recognise C{None} values as 'to be
678 @return: the new parameter dictionary
681 params_copy = copy.deepcopy(old_params)
682 for key, val in update_dict.iteritems():
683 if ((use_default and val == constants.VALUE_DEFAULT) or
684 (use_none and val is None)):
690 params_copy[key] = val
694 def _ReleaseLocks(lu, level, names=None, keep=None):
695 """Releases locks owned by an LU.
697 @type lu: L{LogicalUnit}
698 @param level: Lock level
699 @type names: list or None
700 @param names: Names of locks to release
701 @type keep: list or None
702 @param keep: Names of locks to retain
705 assert not (keep is not None and names is not None), \
706 "Only one of the 'names' and the 'keep' parameters can be given"
708 if names is not None:
709 should_release = names.__contains__
711 should_release = lambda name: name not in keep
713 should_release = None
719 # Determine which locks to release
720 for name in lu.owned_locks(level):
721 if should_release(name):
726 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
728 # Release just some locks
729 lu.glm.release(level, names=release)
731 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
734 lu.glm.release(level)
736 assert not lu.glm.is_owned(level), "No locks should be owned"
739 def _MapInstanceDisksToNodes(instances):
740 """Creates a map from (node, volume) to instance name.
742 @type instances: list of L{objects.Instance}
743 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
746 return dict(((node, vol), inst.name)
747 for inst in instances
748 for (node, vols) in inst.MapLVsByNode().items()
752 def _RunPostHook(lu, node_name):
753 """Runs the post-hook for an opcode on a single node.
756 hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
758 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
760 # pylint: disable=W0702
761 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
764 def _CheckOutputFields(static, dynamic, selected):
765 """Checks whether all selected fields are valid.
767 @type static: L{utils.FieldSet}
768 @param static: static fields set
769 @type dynamic: L{utils.FieldSet}
770 @param dynamic: dynamic fields set
777 delta = f.NonMatching(selected)
779 raise errors.OpPrereqError("Unknown output fields selected: %s"
780 % ",".join(delta), errors.ECODE_INVAL)
783 def _CheckGlobalHvParams(params):
784 """Validates that given hypervisor params are not global ones.
786 This will ensure that instances don't get customised versions of
790 used_globals = constants.HVC_GLOBALS.intersection(params)
792 msg = ("The following hypervisor parameters are global and cannot"
793 " be customized at instance level, please modify them at"
794 " cluster level: %s" % utils.CommaJoin(used_globals))
795 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
798 def _CheckNodeOnline(lu, node, msg=None):
799 """Ensure that a given node is online.
801 @param lu: the LU on behalf of which we make the check
802 @param node: the node to check
803 @param msg: if passed, should be a message to replace the default one
804 @raise errors.OpPrereqError: if the node is offline
808 msg = "Can't use offline node"
809 if lu.cfg.GetNodeInfo(node).offline:
810 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
813 def _CheckNodeNotDrained(lu, node):
814 """Ensure that a given node is not drained.
816 @param lu: the LU on behalf of which we make the check
817 @param node: the node to check
818 @raise errors.OpPrereqError: if the node is drained
821 if lu.cfg.GetNodeInfo(node).drained:
822 raise errors.OpPrereqError("Can't use drained node %s" % node,
826 def _CheckNodeVmCapable(lu, node):
827 """Ensure that a given node is vm capable.
829 @param lu: the LU on behalf of which we make the check
830 @param node: the node to check
831 @raise errors.OpPrereqError: if the node is not vm capable
834 if not lu.cfg.GetNodeInfo(node).vm_capable:
835 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
839 def _CheckNodeHasOS(lu, node, os_name, force_variant):
840 """Ensure that a node supports a given OS.
842 @param lu: the LU on behalf of which we make the check
843 @param node: the node to check
844 @param os_name: the OS to query about
845 @param force_variant: whether to ignore variant errors
846 @raise errors.OpPrereqError: if the node is not supporting the OS
849 result = lu.rpc.call_os_get(node, os_name)
850 result.Raise("OS '%s' not in supported OS list for node %s" %
852 prereq=True, ecode=errors.ECODE_INVAL)
853 if not force_variant:
854 _CheckOSVariant(result.payload, os_name)
857 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
858 """Ensure that a node has the given secondary ip.
860 @type lu: L{LogicalUnit}
861 @param lu: the LU on behalf of which we make the check
863 @param node: the node to check
864 @type secondary_ip: string
865 @param secondary_ip: the ip to check
866 @type prereq: boolean
867 @param prereq: whether to throw a prerequisite or an execute error
868 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
869 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
872 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
873 result.Raise("Failure checking secondary ip on node %s" % node,
874 prereq=prereq, ecode=errors.ECODE_ENVIRON)
875 if not result.payload:
876 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
877 " please fix and re-run this command" % secondary_ip)
879 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
881 raise errors.OpExecError(msg)
884 def _GetClusterDomainSecret():
885 """Reads the cluster domain secret.
888 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
892 def _CheckInstanceDown(lu, instance, reason):
893 """Ensure that an instance is not running."""
894 if instance.admin_up:
895 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
896 (instance.name, reason), errors.ECODE_STATE)
898 pnode = instance.primary_node
899 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
900 ins_l.Raise("Can't contact node %s for instance information" % pnode,
901 prereq=True, ecode=errors.ECODE_ENVIRON)
903 if instance.name in ins_l.payload:
904 raise errors.OpPrereqError("Instance %s is running, %s" %
905 (instance.name, reason), errors.ECODE_STATE)
908 def _ExpandItemName(fn, name, kind):
909 """Expand an item name.
911 @param fn: the function to use for expansion
912 @param name: requested item name
913 @param kind: text description ('Node' or 'Instance')
914 @return: the resolved (full) name
915 @raise errors.OpPrereqError: if the item is not found
919 if full_name is None:
920 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
925 def _ExpandNodeName(cfg, name):
926 """Wrapper over L{_ExpandItemName} for nodes."""
927 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
930 def _ExpandInstanceName(cfg, name):
931 """Wrapper over L{_ExpandItemName} for instance."""
932 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
935 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
936 memory, vcpus, nics, disk_template, disks,
937 bep, hvp, hypervisor_name, tags):
938 """Builds instance related env variables for hooks
940 This builds the hook environment from individual variables.
943 @param name: the name of the instance
944 @type primary_node: string
945 @param primary_node: the name of the instance's primary node
946 @type secondary_nodes: list
947 @param secondary_nodes: list of secondary nodes as strings
948 @type os_type: string
949 @param os_type: the name of the instance's OS
950 @type status: boolean
951 @param status: the should_run status of the instance
953 @param memory: the memory size of the instance
955 @param vcpus: the count of VCPUs the instance has
957 @param nics: list of tuples (ip, mac, mode, link) representing
958 the NICs the instance has
959 @type disk_template: string
960 @param disk_template: the disk template of the instance
962 @param disks: the list of (size, mode) pairs
964 @param bep: the backend parameters for the instance
966 @param hvp: the hypervisor parameters for the instance
967 @type hypervisor_name: string
968 @param hypervisor_name: the hypervisor for the instance
970 @param tags: list of instance tags as strings
972 @return: the hook environment for this instance
981 "INSTANCE_NAME": name,
982 "INSTANCE_PRIMARY": primary_node,
983 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
984 "INSTANCE_OS_TYPE": os_type,
985 "INSTANCE_STATUS": str_status,
986 "INSTANCE_MEMORY": memory,
987 "INSTANCE_VCPUS": vcpus,
988 "INSTANCE_DISK_TEMPLATE": disk_template,
989 "INSTANCE_HYPERVISOR": hypervisor_name,
993 nic_count = len(nics)
994 for idx, (ip, mac, mode, link) in enumerate(nics):
997 env["INSTANCE_NIC%d_IP" % idx] = ip
998 env["INSTANCE_NIC%d_MAC" % idx] = mac
999 env["INSTANCE_NIC%d_MODE" % idx] = mode
1000 env["INSTANCE_NIC%d_LINK" % idx] = link
1001 if mode == constants.NIC_MODE_BRIDGED:
1002 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1006 env["INSTANCE_NIC_COUNT"] = nic_count
1009 disk_count = len(disks)
1010 for idx, (size, mode) in enumerate(disks):
1011 env["INSTANCE_DISK%d_SIZE" % idx] = size
1012 env["INSTANCE_DISK%d_MODE" % idx] = mode
1016 env["INSTANCE_DISK_COUNT"] = disk_count
1021 env["INSTANCE_TAGS"] = " ".join(tags)
1023 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1024 for key, value in source.items():
1025 env["INSTANCE_%s_%s" % (kind, key)] = value
1030 def _NICListToTuple(lu, nics):
1031 """Build a list of nic information tuples.
1033 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1034 value in LUInstanceQueryData.
1036 @type lu: L{LogicalUnit}
1037 @param lu: the logical unit on whose behalf we execute
1038 @type nics: list of L{objects.NIC}
1039 @param nics: list of nics to convert to hooks tuples
1043 cluster = lu.cfg.GetClusterInfo()
1047 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1048 mode = filled_params[constants.NIC_MODE]
1049 link = filled_params[constants.NIC_LINK]
1050 hooks_nics.append((ip, mac, mode, link))
1054 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1055 """Builds instance related env variables for hooks from an object.
1057 @type lu: L{LogicalUnit}
1058 @param lu: the logical unit on whose behalf we execute
1059 @type instance: L{objects.Instance}
1060 @param instance: the instance for which we should build the
1062 @type override: dict
1063 @param override: dictionary with key/values that will override
1066 @return: the hook environment dictionary
1069 cluster = lu.cfg.GetClusterInfo()
1070 bep = cluster.FillBE(instance)
1071 hvp = cluster.FillHV(instance)
1073 "name": instance.name,
1074 "primary_node": instance.primary_node,
1075 "secondary_nodes": instance.secondary_nodes,
1076 "os_type": instance.os,
1077 "status": instance.admin_up,
1078 "memory": bep[constants.BE_MEMORY],
1079 "vcpus": bep[constants.BE_VCPUS],
1080 "nics": _NICListToTuple(lu, instance.nics),
1081 "disk_template": instance.disk_template,
1082 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1085 "hypervisor_name": instance.hypervisor,
1086 "tags": instance.tags,
1089 args.update(override)
1090 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1093 def _AdjustCandidatePool(lu, exceptions):
1094 """Adjust the candidate pool after node operations.
1097 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1099 lu.LogInfo("Promoted nodes to master candidate role: %s",
1100 utils.CommaJoin(node.name for node in mod_list))
1101 for name in mod_list:
1102 lu.context.ReaddNode(name)
1103 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1105 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1109 def _DecideSelfPromotion(lu, exceptions=None):
1110 """Decide whether I should promote myself as a master candidate.
1113 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1114 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1115 # the new node will increase mc_max with one, so:
1116 mc_should = min(mc_should + 1, cp_size)
1117 return mc_now < mc_should
1120 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1121 """Check that the brigdes needed by a list of nics exist.
1124 cluster = lu.cfg.GetClusterInfo()
1125 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1126 brlist = [params[constants.NIC_LINK] for params in paramslist
1127 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1129 result = lu.rpc.call_bridges_exist(target_node, brlist)
1130 result.Raise("Error checking bridges on destination node '%s'" %
1131 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1134 def _CheckInstanceBridgesExist(lu, instance, node=None):
1135 """Check that the brigdes needed by an instance exist.
1139 node = instance.primary_node
1140 _CheckNicsBridgesExist(lu, instance.nics, node)
1143 def _CheckOSVariant(os_obj, name):
1144 """Check whether an OS name conforms to the os variants specification.
1146 @type os_obj: L{objects.OS}
1147 @param os_obj: OS object to check
1149 @param name: OS name passed by the user, to check for validity
1152 variant = objects.OS.GetVariant(name)
1153 if not os_obj.supported_variants:
1155 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1156 " passed)" % (os_obj.name, variant),
1160 raise errors.OpPrereqError("OS name must include a variant",
1163 if variant not in os_obj.supported_variants:
1164 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1167 def _GetNodeInstancesInner(cfg, fn):
1168 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1171 def _GetNodeInstances(cfg, node_name):
1172 """Returns a list of all primary and secondary instances on a node.
1176 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1179 def _GetNodePrimaryInstances(cfg, node_name):
1180 """Returns primary instances on a node.
1183 return _GetNodeInstancesInner(cfg,
1184 lambda inst: node_name == inst.primary_node)
1187 def _GetNodeSecondaryInstances(cfg, node_name):
1188 """Returns secondary instances on a node.
1191 return _GetNodeInstancesInner(cfg,
1192 lambda inst: node_name in inst.secondary_nodes)
1195 def _GetStorageTypeArgs(cfg, storage_type):
1196 """Returns the arguments for a storage type.
1199 # Special case for file storage
1200 if storage_type == constants.ST_FILE:
1201 # storage.FileStorage wants a list of storage directories
1202 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1207 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1210 for dev in instance.disks:
1211 cfg.SetDiskID(dev, node_name)
1213 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1214 result.Raise("Failed to get disk status from node %s" % node_name,
1215 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1217 for idx, bdev_status in enumerate(result.payload):
1218 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1224 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1225 """Check the sanity of iallocator and node arguments and use the
1226 cluster-wide iallocator if appropriate.
1228 Check that at most one of (iallocator, node) is specified. If none is
1229 specified, then the LU's opcode's iallocator slot is filled with the
1230 cluster-wide default iallocator.
1232 @type iallocator_slot: string
1233 @param iallocator_slot: the name of the opcode iallocator slot
1234 @type node_slot: string
1235 @param node_slot: the name of the opcode target node slot
1238 node = getattr(lu.op, node_slot, None)
1239 iallocator = getattr(lu.op, iallocator_slot, None)
1241 if node is not None and iallocator is not None:
1242 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1244 elif node is None and iallocator is None:
1245 default_iallocator = lu.cfg.GetDefaultIAllocator()
1246 if default_iallocator:
1247 setattr(lu.op, iallocator_slot, default_iallocator)
1249 raise errors.OpPrereqError("No iallocator or node given and no"
1250 " cluster-wide default iallocator found;"
1251 " please specify either an iallocator or a"
1252 " node, or set a cluster-wide default"
1256 def _GetDefaultIAllocator(cfg, iallocator):
1257 """Decides on which iallocator to use.
1259 @type cfg: L{config.ConfigWriter}
1260 @param cfg: Cluster configuration object
1261 @type iallocator: string or None
1262 @param iallocator: Iallocator specified in opcode
1264 @return: Iallocator name
1268 # Use default iallocator
1269 iallocator = cfg.GetDefaultIAllocator()
1272 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1273 " opcode nor as a cluster-wide default",
1279 class LUClusterPostInit(LogicalUnit):
1280 """Logical unit for running hooks after cluster initialization.
1283 HPATH = "cluster-init"
1284 HTYPE = constants.HTYPE_CLUSTER
1286 def BuildHooksEnv(self):
1291 "OP_TARGET": self.cfg.GetClusterName(),
1294 def BuildHooksNodes(self):
1295 """Build hooks nodes.
1298 return ([], [self.cfg.GetMasterNode()])
1300 def Exec(self, feedback_fn):
1307 class LUClusterDestroy(LogicalUnit):
1308 """Logical unit for destroying the cluster.
1311 HPATH = "cluster-destroy"
1312 HTYPE = constants.HTYPE_CLUSTER
1314 def BuildHooksEnv(self):
1319 "OP_TARGET": self.cfg.GetClusterName(),
1322 def BuildHooksNodes(self):
1323 """Build hooks nodes.
1328 def CheckPrereq(self):
1329 """Check prerequisites.
1331 This checks whether the cluster is empty.
1333 Any errors are signaled by raising errors.OpPrereqError.
1336 master = self.cfg.GetMasterNode()
1338 nodelist = self.cfg.GetNodeList()
1339 if len(nodelist) != 1 or nodelist[0] != master:
1340 raise errors.OpPrereqError("There are still %d node(s) in"
1341 " this cluster." % (len(nodelist) - 1),
1343 instancelist = self.cfg.GetInstanceList()
1345 raise errors.OpPrereqError("There are still %d instance(s) in"
1346 " this cluster." % len(instancelist),
1349 def Exec(self, feedback_fn):
1350 """Destroys the cluster.
1353 master = self.cfg.GetMasterNode()
1355 # Run post hooks on master node before it's removed
1356 _RunPostHook(self, master)
1358 result = self.rpc.call_node_stop_master(master, False)
1359 result.Raise("Could not disable the master role")
1364 def _VerifyCertificate(filename):
1365 """Verifies a certificate for L{LUClusterVerifyConfig}.
1367 @type filename: string
1368 @param filename: Path to PEM file
1372 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1373 utils.ReadFile(filename))
1374 except Exception, err: # pylint: disable=W0703
1375 return (LUClusterVerifyConfig.ETYPE_ERROR,
1376 "Failed to load X509 certificate %s: %s" % (filename, err))
1379 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1380 constants.SSL_CERT_EXPIRATION_ERROR)
1383 fnamemsg = "While verifying %s: %s" % (filename, msg)
1388 return (None, fnamemsg)
1389 elif errcode == utils.CERT_WARNING:
1390 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1391 elif errcode == utils.CERT_ERROR:
1392 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1394 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1397 def _GetAllHypervisorParameters(cluster, instances):
1398 """Compute the set of all hypervisor parameters.
1400 @type cluster: L{objects.Cluster}
1401 @param cluster: the cluster object
1402 @param instances: list of L{objects.Instance}
1403 @param instances: additional instances from which to obtain parameters
1404 @rtype: list of (origin, hypervisor, parameters)
1405 @return: a list with all parameters found, indicating the hypervisor they
1406 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1411 for hv_name in cluster.enabled_hypervisors:
1412 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1414 for os_name, os_hvp in cluster.os_hvp.items():
1415 for hv_name, hv_params in os_hvp.items():
1417 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1418 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1420 # TODO: collapse identical parameter values in a single one
1421 for instance in instances:
1422 if instance.hvparams:
1423 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1424 cluster.FillHV(instance)))
1429 class _VerifyErrors(object):
1430 """Mix-in for cluster/group verify LUs.
1432 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1433 self.op and self._feedback_fn to be available.)
1436 TCLUSTER = "cluster"
1438 TINSTANCE = "instance"
1440 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1441 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1442 ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1443 ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1444 ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1445 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1446 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1447 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1448 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1449 EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1450 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1451 EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1452 ENODEDRBD = (TNODE, "ENODEDRBD")
1453 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1454 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1455 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1456 ENODEHV = (TNODE, "ENODEHV")
1457 ENODELVM = (TNODE, "ENODELVM")
1458 ENODEN1 = (TNODE, "ENODEN1")
1459 ENODENET = (TNODE, "ENODENET")
1460 ENODEOS = (TNODE, "ENODEOS")
1461 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1462 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1463 ENODERPC = (TNODE, "ENODERPC")
1464 ENODESSH = (TNODE, "ENODESSH")
1465 ENODEVERSION = (TNODE, "ENODEVERSION")
1466 ENODESETUP = (TNODE, "ENODESETUP")
1467 ENODETIME = (TNODE, "ENODETIME")
1468 ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1470 ETYPE_FIELD = "code"
1471 ETYPE_ERROR = "ERROR"
1472 ETYPE_WARNING = "WARNING"
1474 def _Error(self, ecode, item, msg, *args, **kwargs):
1475 """Format an error message.
1477 Based on the opcode's error_codes parameter, either format a
1478 parseable error code, or a simpler error string.
1480 This must be called only from Exec and functions called from Exec.
1483 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1485 # first complete the msg
1488 # then format the whole message
1489 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1490 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1496 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1497 # and finally report it via the feedback_fn
1498 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1500 def _ErrorIf(self, cond, *args, **kwargs):
1501 """Log an error message if the passed condition is True.
1505 or self.op.debug_simulate_errors) # pylint: disable=E1101
1507 self._Error(*args, **kwargs)
1508 # do not mark the operation as failed for WARN cases only
1509 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1510 self.bad = self.bad or cond
1513 class LUClusterVerify(NoHooksLU):
1514 """Submits all jobs necessary to verify the cluster.
1519 def ExpandNames(self):
1520 self.needed_locks = {}
1522 def Exec(self, feedback_fn):
1525 if self.op.group_name:
1526 groups = [self.op.group_name]
1527 depends_fn = lambda: None
1529 groups = self.cfg.GetNodeGroupList()
1531 # Verify global configuration
1532 jobs.append([opcodes.OpClusterVerifyConfig()])
1534 # Always depend on global verification
1535 depends_fn = lambda: [(-len(jobs), [])]
1537 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1538 depends=depends_fn())]
1539 for group in groups)
1541 # Fix up all parameters
1542 for op in itertools.chain(*jobs): # pylint: disable=W0142
1543 op.debug_simulate_errors = self.op.debug_simulate_errors
1544 op.verbose = self.op.verbose
1545 op.error_codes = self.op.error_codes
1547 op.skip_checks = self.op.skip_checks
1548 except AttributeError:
1549 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1551 return ResultWithJobs(jobs)
1554 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1555 """Verifies the cluster config.
1560 def _VerifyHVP(self, hvp_data):
1561 """Verifies locally the syntax of the hypervisor parameters.
1564 for item, hv_name, hv_params in hvp_data:
1565 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1568 hv_class = hypervisor.GetHypervisor(hv_name)
1569 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1570 hv_class.CheckParameterSyntax(hv_params)
1571 except errors.GenericError, err:
1572 self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1574 def ExpandNames(self):
1575 # Information can be safely retrieved as the BGL is acquired in exclusive
1577 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1578 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1579 self.all_node_info = self.cfg.GetAllNodesInfo()
1580 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1581 self.needed_locks = {}
1583 def Exec(self, feedback_fn):
1584 """Verify integrity of cluster, performing various test on nodes.
1588 self._feedback_fn = feedback_fn
1590 feedback_fn("* Verifying cluster config")
1592 for msg in self.cfg.VerifyConfig():
1593 self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1595 feedback_fn("* Verifying cluster certificate files")
1597 for cert_filename in constants.ALL_CERT_FILES:
1598 (errcode, msg) = _VerifyCertificate(cert_filename)
1599 self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1601 feedback_fn("* Verifying hypervisor parameters")
1603 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1604 self.all_inst_info.values()))
1606 feedback_fn("* Verifying all nodes belong to an existing group")
1608 # We do this verification here because, should this bogus circumstance
1609 # occur, it would never be caught by VerifyGroup, which only acts on
1610 # nodes/instances reachable from existing node groups.
1612 dangling_nodes = set(node.name for node in self.all_node_info.values()
1613 if node.group not in self.all_group_info)
1615 dangling_instances = {}
1616 no_node_instances = []
1618 for inst in self.all_inst_info.values():
1619 if inst.primary_node in dangling_nodes:
1620 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1621 elif inst.primary_node not in self.all_node_info:
1622 no_node_instances.append(inst.name)
1627 utils.CommaJoin(dangling_instances.get(node.name,
1629 for node in dangling_nodes]
1631 self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1632 "the following nodes (and their instances) belong to a non"
1633 " existing group: %s", utils.CommaJoin(pretty_dangling))
1635 self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1636 "the following instances have a non-existing primary-node:"
1637 " %s", utils.CommaJoin(no_node_instances))
1642 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1643 """Verifies the status of a node group.
1646 HPATH = "cluster-verify"
1647 HTYPE = constants.HTYPE_CLUSTER
1650 _HOOKS_INDENT_RE = re.compile("^", re.M)
1652 class NodeImage(object):
1653 """A class representing the logical and physical status of a node.
1656 @ivar name: the node name to which this object refers
1657 @ivar volumes: a structure as returned from
1658 L{ganeti.backend.GetVolumeList} (runtime)
1659 @ivar instances: a list of running instances (runtime)
1660 @ivar pinst: list of configured primary instances (config)
1661 @ivar sinst: list of configured secondary instances (config)
1662 @ivar sbp: dictionary of {primary-node: list of instances} for all
1663 instances for which this node is secondary (config)
1664 @ivar mfree: free memory, as reported by hypervisor (runtime)
1665 @ivar dfree: free disk, as reported by the node (runtime)
1666 @ivar offline: the offline status (config)
1667 @type rpc_fail: boolean
1668 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1669 not whether the individual keys were correct) (runtime)
1670 @type lvm_fail: boolean
1671 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1672 @type hyp_fail: boolean
1673 @ivar hyp_fail: whether the RPC call didn't return the instance list
1674 @type ghost: boolean
1675 @ivar ghost: whether this is a known node or not (config)
1676 @type os_fail: boolean
1677 @ivar os_fail: whether the RPC call didn't return valid OS data
1679 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1680 @type vm_capable: boolean
1681 @ivar vm_capable: whether the node can host instances
1684 def __init__(self, offline=False, name=None, vm_capable=True):
1693 self.offline = offline
1694 self.vm_capable = vm_capable
1695 self.rpc_fail = False
1696 self.lvm_fail = False
1697 self.hyp_fail = False
1699 self.os_fail = False
1702 def ExpandNames(self):
1703 # This raises errors.OpPrereqError on its own:
1704 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1706 # Get instances in node group; this is unsafe and needs verification later
1707 inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1709 self.needed_locks = {
1710 locking.LEVEL_INSTANCE: inst_names,
1711 locking.LEVEL_NODEGROUP: [self.group_uuid],
1712 locking.LEVEL_NODE: [],
1715 self.share_locks = _ShareAll()
1717 def DeclareLocks(self, level):
1718 if level == locking.LEVEL_NODE:
1719 # Get members of node group; this is unsafe and needs verification later
1720 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1722 all_inst_info = self.cfg.GetAllInstancesInfo()
1724 # In Exec(), we warn about mirrored instances that have primary and
1725 # secondary living in separate node groups. To fully verify that
1726 # volumes for these instances are healthy, we will need to do an
1727 # extra call to their secondaries. We ensure here those nodes will
1729 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1730 # Important: access only the instances whose lock is owned
1731 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1732 nodes.update(all_inst_info[inst].secondary_nodes)
1734 self.needed_locks[locking.LEVEL_NODE] = nodes
1736 def CheckPrereq(self):
1737 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1738 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1740 group_nodes = set(self.group_info.members)
1741 group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1744 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1746 unlocked_instances = \
1747 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1750 raise errors.OpPrereqError("Missing lock for nodes: %s" %
1751 utils.CommaJoin(unlocked_nodes))
1753 if unlocked_instances:
1754 raise errors.OpPrereqError("Missing lock for instances: %s" %
1755 utils.CommaJoin(unlocked_instances))
1757 self.all_node_info = self.cfg.GetAllNodesInfo()
1758 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1760 self.my_node_names = utils.NiceSort(group_nodes)
1761 self.my_inst_names = utils.NiceSort(group_instances)
1763 self.my_node_info = dict((name, self.all_node_info[name])
1764 for name in self.my_node_names)
1766 self.my_inst_info = dict((name, self.all_inst_info[name])
1767 for name in self.my_inst_names)
1769 # We detect here the nodes that will need the extra RPC calls for verifying
1770 # split LV volumes; they should be locked.
1771 extra_lv_nodes = set()
1773 for inst in self.my_inst_info.values():
1774 if inst.disk_template in constants.DTS_INT_MIRROR:
1775 group = self.my_node_info[inst.primary_node].group
1776 for nname in inst.secondary_nodes:
1777 if self.all_node_info[nname].group != group:
1778 extra_lv_nodes.add(nname)
1780 unlocked_lv_nodes = \
1781 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1783 if unlocked_lv_nodes:
1784 raise errors.OpPrereqError("these nodes could be locked: %s" %
1785 utils.CommaJoin(unlocked_lv_nodes))
1786 self.extra_lv_nodes = list(extra_lv_nodes)
1788 def _VerifyNode(self, ninfo, nresult):
1789 """Perform some basic validation on data returned from a node.
1791 - check the result data structure is well formed and has all the
1793 - check ganeti version
1795 @type ninfo: L{objects.Node}
1796 @param ninfo: the node to check
1797 @param nresult: the results from the node
1799 @return: whether overall this call was successful (and we can expect
1800 reasonable values in the respose)
1804 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1806 # main result, nresult should be a non-empty dict
1807 test = not nresult or not isinstance(nresult, dict)
1808 _ErrorIf(test, self.ENODERPC, node,
1809 "unable to verify node: no data returned")
1813 # compares ganeti version
1814 local_version = constants.PROTOCOL_VERSION
1815 remote_version = nresult.get("version", None)
1816 test = not (remote_version and
1817 isinstance(remote_version, (list, tuple)) and
1818 len(remote_version) == 2)
1819 _ErrorIf(test, self.ENODERPC, node,
1820 "connection to node returned invalid data")
1824 test = local_version != remote_version[0]
1825 _ErrorIf(test, self.ENODEVERSION, node,
1826 "incompatible protocol versions: master %s,"
1827 " node %s", local_version, remote_version[0])
1831 # node seems compatible, we can actually try to look into its results
1833 # full package version
1834 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1835 self.ENODEVERSION, node,
1836 "software version mismatch: master %s, node %s",
1837 constants.RELEASE_VERSION, remote_version[1],
1838 code=self.ETYPE_WARNING)
1840 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1841 if ninfo.vm_capable and isinstance(hyp_result, dict):
1842 for hv_name, hv_result in hyp_result.iteritems():
1843 test = hv_result is not None
1844 _ErrorIf(test, self.ENODEHV, node,
1845 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1847 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1848 if ninfo.vm_capable and isinstance(hvp_result, list):
1849 for item, hv_name, hv_result in hvp_result:
1850 _ErrorIf(True, self.ENODEHV, node,
1851 "hypervisor %s parameter verify failure (source %s): %s",
1852 hv_name, item, hv_result)
1854 test = nresult.get(constants.NV_NODESETUP,
1855 ["Missing NODESETUP results"])
1856 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1861 def _VerifyNodeTime(self, ninfo, nresult,
1862 nvinfo_starttime, nvinfo_endtime):
1863 """Check the node time.
1865 @type ninfo: L{objects.Node}
1866 @param ninfo: the node to check
1867 @param nresult: the remote results for the node
1868 @param nvinfo_starttime: the start time of the RPC call
1869 @param nvinfo_endtime: the end time of the RPC call
1873 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1875 ntime = nresult.get(constants.NV_TIME, None)
1877 ntime_merged = utils.MergeTime(ntime)
1878 except (ValueError, TypeError):
1879 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1882 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1883 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1884 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1885 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1889 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1890 "Node time diverges by at least %s from master node time",
1893 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1894 """Check the node LVM results.
1896 @type ninfo: L{objects.Node}
1897 @param ninfo: the node to check
1898 @param nresult: the remote results for the node
1899 @param vg_name: the configured VG name
1906 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1908 # checks vg existence and size > 20G
1909 vglist = nresult.get(constants.NV_VGLIST, None)
1911 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1913 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1914 constants.MIN_VG_SIZE)
1915 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1918 pvlist = nresult.get(constants.NV_PVLIST, None)
1919 test = pvlist is None
1920 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1922 # check that ':' is not present in PV names, since it's a
1923 # special character for lvcreate (denotes the range of PEs to
1925 for _, pvname, owner_vg in pvlist:
1926 test = ":" in pvname
1927 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1928 " '%s' of VG '%s'", pvname, owner_vg)
1930 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1931 """Check the node bridges.
1933 @type ninfo: L{objects.Node}
1934 @param ninfo: the node to check
1935 @param nresult: the remote results for the node
1936 @param bridges: the expected list of bridges
1943 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1945 missing = nresult.get(constants.NV_BRIDGES, None)
1946 test = not isinstance(missing, list)
1947 _ErrorIf(test, self.ENODENET, node,
1948 "did not return valid bridge information")
1950 _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1951 utils.CommaJoin(sorted(missing)))
1953 def _VerifyNodeNetwork(self, ninfo, nresult):
1954 """Check the node network connectivity results.
1956 @type ninfo: L{objects.Node}
1957 @param ninfo: the node to check
1958 @param nresult: the remote results for the node
1962 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1964 test = constants.NV_NODELIST not in nresult
1965 _ErrorIf(test, self.ENODESSH, node,
1966 "node hasn't returned node ssh connectivity data")
1968 if nresult[constants.NV_NODELIST]:
1969 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1970 _ErrorIf(True, self.ENODESSH, node,
1971 "ssh communication with node '%s': %s", a_node, a_msg)
1973 test = constants.NV_NODENETTEST not in nresult
1974 _ErrorIf(test, self.ENODENET, node,
1975 "node hasn't returned node tcp connectivity data")
1977 if nresult[constants.NV_NODENETTEST]:
1978 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1980 _ErrorIf(True, self.ENODENET, node,
1981 "tcp communication with node '%s': %s",
1982 anode, nresult[constants.NV_NODENETTEST][anode])
1984 test = constants.NV_MASTERIP not in nresult
1985 _ErrorIf(test, self.ENODENET, node,
1986 "node hasn't returned node master IP reachability data")
1988 if not nresult[constants.NV_MASTERIP]:
1989 if node == self.master_node:
1990 msg = "the master node cannot reach the master IP (not configured?)"
1992 msg = "cannot reach the master IP"
1993 _ErrorIf(True, self.ENODENET, node, msg)
1995 def _VerifyInstance(self, instance, instanceconfig, node_image,
1997 """Verify an instance.
1999 This function checks to see if the required block devices are
2000 available on the instance's node.
2003 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2004 node_current = instanceconfig.primary_node
2006 node_vol_should = {}
2007 instanceconfig.MapLVsByNode(node_vol_should)
2009 for node in node_vol_should:
2010 n_img = node_image[node]
2011 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2012 # ignore missing volumes on offline or broken nodes
2014 for volume in node_vol_should[node]:
2015 test = volume not in n_img.volumes
2016 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
2017 "volume %s missing on node %s", volume, node)
2019 if instanceconfig.admin_up:
2020 pri_img = node_image[node_current]
2021 test = instance not in pri_img.instances and not pri_img.offline
2022 _ErrorIf(test, self.EINSTANCEDOWN, instance,
2023 "instance not running on its primary node %s",
2026 diskdata = [(nname, success, status, idx)
2027 for (nname, disks) in diskstatus.items()
2028 for idx, (success, status) in enumerate(disks)]
2030 for nname, success, bdev_status, idx in diskdata:
2031 # the 'ghost node' construction in Exec() ensures that we have a
2033 snode = node_image[nname]
2034 bad_snode = snode.ghost or snode.offline
2035 _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2036 self.EINSTANCEFAULTYDISK, instance,
2037 "couldn't retrieve status for disk/%s on %s: %s",
2038 idx, nname, bdev_status)
2039 _ErrorIf((instanceconfig.admin_up and success and
2040 bdev_status.ldisk_status == constants.LDS_FAULTY),
2041 self.EINSTANCEFAULTYDISK, instance,
2042 "disk/%s on %s is faulty", idx, nname)
2044 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2045 """Verify if there are any unknown volumes in the cluster.
2047 The .os, .swap and backup volumes are ignored. All other volumes are
2048 reported as unknown.
2050 @type reserved: L{ganeti.utils.FieldSet}
2051 @param reserved: a FieldSet of reserved volume names
2054 for node, n_img in node_image.items():
2055 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2056 # skip non-healthy nodes
2058 for volume in n_img.volumes:
2059 test = ((node not in node_vol_should or
2060 volume not in node_vol_should[node]) and
2061 not reserved.Matches(volume))
2062 self._ErrorIf(test, self.ENODEORPHANLV, node,
2063 "volume %s is unknown", volume)
2065 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2066 """Verify N+1 Memory Resilience.
2068 Check that if one single node dies we can still start all the
2069 instances it was primary for.
2072 cluster_info = self.cfg.GetClusterInfo()
2073 for node, n_img in node_image.items():
2074 # This code checks that every node which is now listed as
2075 # secondary has enough memory to host all instances it is
2076 # supposed to should a single other node in the cluster fail.
2077 # FIXME: not ready for failover to an arbitrary node
2078 # FIXME: does not support file-backed instances
2079 # WARNING: we currently take into account down instances as well
2080 # as up ones, considering that even if they're down someone
2081 # might want to start them even in the event of a node failure.
2083 # we're skipping offline nodes from the N+1 warning, since
2084 # most likely we don't have good memory infromation from them;
2085 # we already list instances living on such nodes, and that's
2088 for prinode, instances in n_img.sbp.items():
2090 for instance in instances:
2091 bep = cluster_info.FillBE(instance_cfg[instance])
2092 if bep[constants.BE_AUTO_BALANCE]:
2093 needed_mem += bep[constants.BE_MEMORY]
2094 test = n_img.mfree < needed_mem
2095 self._ErrorIf(test, self.ENODEN1, node,
2096 "not enough memory to accomodate instance failovers"
2097 " should node %s fail (%dMiB needed, %dMiB available)",
2098 prinode, needed_mem, n_img.mfree)
2101 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2102 (files_all, files_all_opt, files_mc, files_vm)):
2103 """Verifies file checksums collected from all nodes.
2105 @param errorif: Callback for reporting errors
2106 @param nodeinfo: List of L{objects.Node} objects
2107 @param master_node: Name of master node
2108 @param all_nvinfo: RPC results
2111 assert (len(files_all | files_all_opt | files_mc | files_vm) ==
2112 sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
2113 "Found file listed in more than one file list"
2115 # Define functions determining which nodes to consider for a file
2118 (files_all_opt, None),
2119 (files_mc, lambda node: (node.master_candidate or
2120 node.name == master_node)),
2121 (files_vm, lambda node: node.vm_capable),
2124 # Build mapping from filename to list of nodes which should have the file
2126 for (files, fn) in files2nodefn:
2128 filenodes = nodeinfo
2130 filenodes = filter(fn, nodeinfo)
2131 nodefiles.update((filename,
2132 frozenset(map(operator.attrgetter("name"), filenodes)))
2133 for filename in files)
2135 assert set(nodefiles) == (files_all | files_all_opt | files_mc | files_vm)
2137 fileinfo = dict((filename, {}) for filename in nodefiles)
2138 ignore_nodes = set()
2140 for node in nodeinfo:
2142 ignore_nodes.add(node.name)
2145 nresult = all_nvinfo[node.name]
2147 if nresult.fail_msg or not nresult.payload:
2150 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2152 test = not (node_files and isinstance(node_files, dict))
2153 errorif(test, cls.ENODEFILECHECK, node.name,
2154 "Node did not return file checksum data")
2156 ignore_nodes.add(node.name)
2159 # Build per-checksum mapping from filename to nodes having it
2160 for (filename, checksum) in node_files.items():
2161 assert filename in nodefiles
2162 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2164 for (filename, checksums) in fileinfo.items():
2165 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2167 # Nodes having the file
2168 with_file = frozenset(node_name
2169 for nodes in fileinfo[filename].values()
2170 for node_name in nodes) - ignore_nodes
2172 expected_nodes = nodefiles[filename] - ignore_nodes
2174 # Nodes missing file
2175 missing_file = expected_nodes - with_file
2177 if filename in files_all_opt:
2179 errorif(missing_file and missing_file != expected_nodes,
2180 cls.ECLUSTERFILECHECK, None,
2181 "File %s is optional, but it must exist on all or no"
2182 " nodes (not found on %s)",
2183 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2185 # Non-optional files
2186 errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2187 "File %s is missing from node(s) %s", filename,
2188 utils.CommaJoin(utils.NiceSort(missing_file)))
2190 # Warn if a node has a file it shouldn't
2191 unexpected = with_file - expected_nodes
2193 cls.ECLUSTERFILECHECK, None,
2194 "File %s should not exist on node(s) %s",
2195 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2197 # See if there are multiple versions of the file
2198 test = len(checksums) > 1
2200 variants = ["variant %s on %s" %
2201 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2202 for (idx, (checksum, nodes)) in
2203 enumerate(sorted(checksums.items()))]
2207 errorif(test, cls.ECLUSTERFILECHECK, None,
2208 "File %s found with %s different checksums (%s)",
2209 filename, len(checksums), "; ".join(variants))
2211 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2213 """Verifies and the node DRBD status.
2215 @type ninfo: L{objects.Node}
2216 @param ninfo: the node to check
2217 @param nresult: the remote results for the node
2218 @param instanceinfo: the dict of instances
2219 @param drbd_helper: the configured DRBD usermode helper
2220 @param drbd_map: the DRBD map as returned by
2221 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2225 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2228 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2229 test = (helper_result == None)
2230 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2231 "no drbd usermode helper returned")
2233 status, payload = helper_result
2235 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2236 "drbd usermode helper check unsuccessful: %s", payload)
2237 test = status and (payload != drbd_helper)
2238 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2239 "wrong drbd usermode helper: %s", payload)
2241 # compute the DRBD minors
2243 for minor, instance in drbd_map[node].items():
2244 test = instance not in instanceinfo
2245 _ErrorIf(test, self.ECLUSTERCFG, None,
2246 "ghost instance '%s' in temporary DRBD map", instance)
2247 # ghost instance should not be running, but otherwise we
2248 # don't give double warnings (both ghost instance and
2249 # unallocated minor in use)
2251 node_drbd[minor] = (instance, False)
2253 instance = instanceinfo[instance]
2254 node_drbd[minor] = (instance.name, instance.admin_up)
2256 # and now check them
2257 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2258 test = not isinstance(used_minors, (tuple, list))
2259 _ErrorIf(test, self.ENODEDRBD, node,
2260 "cannot parse drbd status file: %s", str(used_minors))
2262 # we cannot check drbd status
2265 for minor, (iname, must_exist) in node_drbd.items():
2266 test = minor not in used_minors and must_exist
2267 _ErrorIf(test, self.ENODEDRBD, node,
2268 "drbd minor %d of instance %s is not active", minor, iname)
2269 for minor in used_minors:
2270 test = minor not in node_drbd
2271 _ErrorIf(test, self.ENODEDRBD, node,
2272 "unallocated drbd minor %d is in use", minor)
2274 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2275 """Builds the node OS structures.
2277 @type ninfo: L{objects.Node}
2278 @param ninfo: the node to check
2279 @param nresult: the remote results for the node
2280 @param nimg: the node image object
2284 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2286 remote_os = nresult.get(constants.NV_OSLIST, None)
2287 test = (not isinstance(remote_os, list) or
2288 not compat.all(isinstance(v, list) and len(v) == 7
2289 for v in remote_os))
2291 _ErrorIf(test, self.ENODEOS, node,
2292 "node hasn't returned valid OS data")
2301 for (name, os_path, status, diagnose,
2302 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2304 if name not in os_dict:
2307 # parameters is a list of lists instead of list of tuples due to
2308 # JSON lacking a real tuple type, fix it:
2309 parameters = [tuple(v) for v in parameters]
2310 os_dict[name].append((os_path, status, diagnose,
2311 set(variants), set(parameters), set(api_ver)))
2313 nimg.oslist = os_dict
2315 def _VerifyNodeOS(self, ninfo, nimg, base):
2316 """Verifies the node OS list.
2318 @type ninfo: L{objects.Node}
2319 @param ninfo: the node to check
2320 @param nimg: the node image object
2321 @param base: the 'template' node we match against (e.g. from the master)
2325 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2327 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2329 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2330 for os_name, os_data in nimg.oslist.items():
2331 assert os_data, "Empty OS status for OS %s?!" % os_name
2332 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2333 _ErrorIf(not f_status, self.ENODEOS, node,
2334 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2335 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2336 "OS '%s' has multiple entries (first one shadows the rest): %s",
2337 os_name, utils.CommaJoin([v[0] for v in os_data]))
2338 # comparisons with the 'base' image
2339 test = os_name not in base.oslist
2340 _ErrorIf(test, self.ENODEOS, node,
2341 "Extra OS %s not present on reference node (%s)",
2345 assert base.oslist[os_name], "Base node has empty OS status?"
2346 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2348 # base OS is invalid, skipping
2350 for kind, a, b in [("API version", f_api, b_api),
2351 ("variants list", f_var, b_var),
2352 ("parameters", beautify_params(f_param),
2353 beautify_params(b_param))]:
2354 _ErrorIf(a != b, self.ENODEOS, node,
2355 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2356 kind, os_name, base.name,
2357 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2359 # check any missing OSes
2360 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2361 _ErrorIf(missing, self.ENODEOS, node,
2362 "OSes present on reference node %s but missing on this node: %s",
2363 base.name, utils.CommaJoin(missing))
2365 def _VerifyOob(self, ninfo, nresult):
2366 """Verifies out of band functionality of a node.
2368 @type ninfo: L{objects.Node}
2369 @param ninfo: the node to check
2370 @param nresult: the remote results for the node
2374 # We just have to verify the paths on master and/or master candidates
2375 # as the oob helper is invoked on the master
2376 if ((ninfo.master_candidate or ninfo.master_capable) and
2377 constants.NV_OOB_PATHS in nresult):
2378 for path_result in nresult[constants.NV_OOB_PATHS]:
2379 self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2381 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2382 """Verifies and updates the node volume data.
2384 This function will update a L{NodeImage}'s internal structures
2385 with data from the remote call.
2387 @type ninfo: L{objects.Node}
2388 @param ninfo: the node to check
2389 @param nresult: the remote results for the node
2390 @param nimg: the node image object
2391 @param vg_name: the configured VG name
2395 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2397 nimg.lvm_fail = True
2398 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2401 elif isinstance(lvdata, basestring):
2402 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2403 utils.SafeEncode(lvdata))
2404 elif not isinstance(lvdata, dict):
2405 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2407 nimg.volumes = lvdata
2408 nimg.lvm_fail = False
2410 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2411 """Verifies and updates the node instance list.
2413 If the listing was successful, then updates this node's instance
2414 list. Otherwise, it marks the RPC call as failed for the instance
2417 @type ninfo: L{objects.Node}
2418 @param ninfo: the node to check
2419 @param nresult: the remote results for the node
2420 @param nimg: the node image object
2423 idata = nresult.get(constants.NV_INSTANCELIST, None)
2424 test = not isinstance(idata, list)
2425 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2426 " (instancelist): %s", utils.SafeEncode(str(idata)))
2428 nimg.hyp_fail = True
2430 nimg.instances = idata
2432 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2433 """Verifies and computes a node information map
2435 @type ninfo: L{objects.Node}
2436 @param ninfo: the node to check
2437 @param nresult: the remote results for the node
2438 @param nimg: the node image object
2439 @param vg_name: the configured VG name
2443 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2445 # try to read free memory (from the hypervisor)
2446 hv_info = nresult.get(constants.NV_HVINFO, None)
2447 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2448 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2451 nimg.mfree = int(hv_info["memory_free"])
2452 except (ValueError, TypeError):
2453 _ErrorIf(True, self.ENODERPC, node,
2454 "node returned invalid nodeinfo, check hypervisor")
2456 # FIXME: devise a free space model for file based instances as well
2457 if vg_name is not None:
2458 test = (constants.NV_VGLIST not in nresult or
2459 vg_name not in nresult[constants.NV_VGLIST])
2460 _ErrorIf(test, self.ENODELVM, node,
2461 "node didn't return data for the volume group '%s'"
2462 " - it is either missing or broken", vg_name)
2465 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2466 except (ValueError, TypeError):
2467 _ErrorIf(True, self.ENODERPC, node,
2468 "node returned invalid LVM info, check LVM status")
2470 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2471 """Gets per-disk status information for all instances.
2473 @type nodelist: list of strings
2474 @param nodelist: Node names
2475 @type node_image: dict of (name, L{objects.Node})
2476 @param node_image: Node objects
2477 @type instanceinfo: dict of (name, L{objects.Instance})
2478 @param instanceinfo: Instance objects
2479 @rtype: {instance: {node: [(succes, payload)]}}
2480 @return: a dictionary of per-instance dictionaries with nodes as
2481 keys and disk information as values; the disk information is a
2482 list of tuples (success, payload)
2485 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2488 node_disks_devonly = {}
2489 diskless_instances = set()
2490 diskless = constants.DT_DISKLESS
2492 for nname in nodelist:
2493 node_instances = list(itertools.chain(node_image[nname].pinst,
2494 node_image[nname].sinst))
2495 diskless_instances.update(inst for inst in node_instances
2496 if instanceinfo[inst].disk_template == diskless)
2497 disks = [(inst, disk)
2498 for inst in node_instances
2499 for disk in instanceinfo[inst].disks]
2502 # No need to collect data
2505 node_disks[nname] = disks
2507 # Creating copies as SetDiskID below will modify the objects and that can
2508 # lead to incorrect data returned from nodes
2509 devonly = [dev.Copy() for (_, dev) in disks]
2512 self.cfg.SetDiskID(dev, nname)
2514 node_disks_devonly[nname] = devonly
2516 assert len(node_disks) == len(node_disks_devonly)
2518 # Collect data from all nodes with disks
2519 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2522 assert len(result) == len(node_disks)
2526 for (nname, nres) in result.items():
2527 disks = node_disks[nname]
2530 # No data from this node
2531 data = len(disks) * [(False, "node offline")]
2534 _ErrorIf(msg, self.ENODERPC, nname,
2535 "while getting disk information: %s", msg)
2537 # No data from this node
2538 data = len(disks) * [(False, msg)]
2541 for idx, i in enumerate(nres.payload):
2542 if isinstance(i, (tuple, list)) and len(i) == 2:
2545 logging.warning("Invalid result from node %s, entry %d: %s",
2547 data.append((False, "Invalid result from the remote node"))
2549 for ((inst, _), status) in zip(disks, data):
2550 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2552 # Add empty entries for diskless instances.
2553 for inst in diskless_instances:
2554 assert inst not in instdisk
2557 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2558 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2559 compat.all(isinstance(s, (tuple, list)) and
2560 len(s) == 2 for s in statuses)
2561 for inst, nnames in instdisk.items()
2562 for nname, statuses in nnames.items())
2563 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2568 def _SshNodeSelector(group_uuid, all_nodes):
2569 """Create endless iterators for all potential SSH check hosts.
2572 nodes = [node for node in all_nodes
2573 if (node.group != group_uuid and
2575 keyfunc = operator.attrgetter("group")
2577 return map(itertools.cycle,
2578 [sorted(map(operator.attrgetter("name"), names))
2579 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2583 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2584 """Choose which nodes should talk to which other nodes.
2586 We will make nodes contact all nodes in their group, and one node from
2589 @warning: This algorithm has a known issue if one node group is much
2590 smaller than others (e.g. just one node). In such a case all other
2591 nodes will talk to the single node.
2594 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2595 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2597 return (online_nodes,
2598 dict((name, sorted([i.next() for i in sel]))
2599 for name in online_nodes))
2601 def BuildHooksEnv(self):
2604 Cluster-Verify hooks just ran in the post phase and their failure makes
2605 the output be logged in the verify output and the verification to fail.
2609 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2612 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2613 for node in self.my_node_info.values())
2617 def BuildHooksNodes(self):
2618 """Build hooks nodes.
2621 return ([], self.my_node_names)
2623 def Exec(self, feedback_fn):
2624 """Verify integrity of the node group, performing various test on nodes.
2627 # This method has too many local variables. pylint: disable=R0914
2628 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2630 if not self.my_node_names:
2632 feedback_fn("* Empty node group, skipping verification")
2636 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2637 verbose = self.op.verbose
2638 self._feedback_fn = feedback_fn
2640 vg_name = self.cfg.GetVGName()
2641 drbd_helper = self.cfg.GetDRBDHelper()
2642 cluster = self.cfg.GetClusterInfo()
2643 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2644 hypervisors = cluster.enabled_hypervisors
2645 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2647 i_non_redundant = [] # Non redundant instances
2648 i_non_a_balanced = [] # Non auto-balanced instances
2649 n_offline = 0 # Count of offline nodes
2650 n_drained = 0 # Count of nodes being drained
2651 node_vol_should = {}
2653 # FIXME: verify OS list
2656 filemap = _ComputeAncillaryFiles(cluster, False)
2658 # do local checksums
2659 master_node = self.master_node = self.cfg.GetMasterNode()
2660 master_ip = self.cfg.GetMasterIP()
2662 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2664 node_verify_param = {
2665 constants.NV_FILELIST:
2666 utils.UniqueSequence(filename
2667 for files in filemap
2668 for filename in files),
2669 constants.NV_NODELIST:
2670 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2671 self.all_node_info.values()),
2672 constants.NV_HYPERVISOR: hypervisors,
2673 constants.NV_HVPARAMS:
2674 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2675 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2676 for node in node_data_list
2677 if not node.offline],
2678 constants.NV_INSTANCELIST: hypervisors,
2679 constants.NV_VERSION: None,
2680 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2681 constants.NV_NODESETUP: None,
2682 constants.NV_TIME: None,
2683 constants.NV_MASTERIP: (master_node, master_ip),
2684 constants.NV_OSLIST: None,
2685 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2688 if vg_name is not None:
2689 node_verify_param[constants.NV_VGLIST] = None
2690 node_verify_param[constants.NV_LVLIST] = vg_name
2691 node_verify_param[constants.NV_PVLIST] = [vg_name]
2692 node_verify_param[constants.NV_DRBDLIST] = None
2695 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2698 # FIXME: this needs to be changed per node-group, not cluster-wide
2700 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2701 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2702 bridges.add(default_nicpp[constants.NIC_LINK])
2703 for instance in self.my_inst_info.values():
2704 for nic in instance.nics:
2705 full_nic = cluster.SimpleFillNIC(nic.nicparams)
2706 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2707 bridges.add(full_nic[constants.NIC_LINK])
2710 node_verify_param[constants.NV_BRIDGES] = list(bridges)
2712 # Build our expected cluster state
2713 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2715 vm_capable=node.vm_capable))
2716 for node in node_data_list)
2720 for node in self.all_node_info.values():
2721 path = _SupportsOob(self.cfg, node)
2722 if path and path not in oob_paths:
2723 oob_paths.append(path)
2726 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2728 for instance in self.my_inst_names:
2729 inst_config = self.my_inst_info[instance]
2731 for nname in inst_config.all_nodes:
2732 if nname not in node_image:
2733 gnode = self.NodeImage(name=nname)
2734 gnode.ghost = (nname not in self.all_node_info)
2735 node_image[nname] = gnode
2737 inst_config.MapLVsByNode(node_vol_should)
2739 pnode = inst_config.primary_node
2740 node_image[pnode].pinst.append(instance)
2742 for snode in inst_config.secondary_nodes:
2743 nimg = node_image[snode]
2744 nimg.sinst.append(instance)
2745 if pnode not in nimg.sbp:
2746 nimg.sbp[pnode] = []
2747 nimg.sbp[pnode].append(instance)
2749 # At this point, we have the in-memory data structures complete,
2750 # except for the runtime information, which we'll gather next
2752 # Due to the way our RPC system works, exact response times cannot be
2753 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2754 # time before and after executing the request, we can at least have a time
2756 nvinfo_starttime = time.time()
2757 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2759 self.cfg.GetClusterName())
2760 nvinfo_endtime = time.time()
2762 if self.extra_lv_nodes and vg_name is not None:
2764 self.rpc.call_node_verify(self.extra_lv_nodes,
2765 {constants.NV_LVLIST: vg_name},
2766 self.cfg.GetClusterName())
2768 extra_lv_nvinfo = {}
2770 all_drbd_map = self.cfg.ComputeDRBDMap()
2772 feedback_fn("* Gathering disk information (%s nodes)" %
2773 len(self.my_node_names))
2774 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2777 feedback_fn("* Verifying configuration file consistency")
2779 # If not all nodes are being checked, we need to make sure the master node
2780 # and a non-checked vm_capable node are in the list.
2781 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2783 vf_nvinfo = all_nvinfo.copy()
2784 vf_node_info = list(self.my_node_info.values())
2785 additional_nodes = []
2786 if master_node not in self.my_node_info:
2787 additional_nodes.append(master_node)
2788 vf_node_info.append(self.all_node_info[master_node])
2789 # Add the first vm_capable node we find which is not included
2790 for node in absent_nodes:
2791 nodeinfo = self.all_node_info[node]
2792 if nodeinfo.vm_capable and not nodeinfo.offline:
2793 additional_nodes.append(node)
2794 vf_node_info.append(self.all_node_info[node])
2796 key = constants.NV_FILELIST
2797 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2798 {key: node_verify_param[key]},
2799 self.cfg.GetClusterName()))
2801 vf_nvinfo = all_nvinfo
2802 vf_node_info = self.my_node_info.values()
2804 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2806 feedback_fn("* Verifying node status")
2810 for node_i in node_data_list:
2812 nimg = node_image[node]
2816 feedback_fn("* Skipping offline node %s" % (node,))
2820 if node == master_node:
2822 elif node_i.master_candidate:
2823 ntype = "master candidate"
2824 elif node_i.drained:
2830 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2832 msg = all_nvinfo[node].fail_msg
2833 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2835 nimg.rpc_fail = True
2838 nresult = all_nvinfo[node].payload
2840 nimg.call_ok = self._VerifyNode(node_i, nresult)
2841 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2842 self._VerifyNodeNetwork(node_i, nresult)
2843 self._VerifyOob(node_i, nresult)
2846 self._VerifyNodeLVM(node_i, nresult, vg_name)
2847 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2850 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2851 self._UpdateNodeInstances(node_i, nresult, nimg)
2852 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2853 self._UpdateNodeOS(node_i, nresult, nimg)
2855 if not nimg.os_fail:
2856 if refos_img is None:
2858 self._VerifyNodeOS(node_i, nimg, refos_img)
2859 self._VerifyNodeBridges(node_i, nresult, bridges)
2861 # Check whether all running instancies are primary for the node. (This
2862 # can no longer be done from _VerifyInstance below, since some of the
2863 # wrong instances could be from other node groups.)
2864 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2866 for inst in non_primary_inst:
2867 test = inst in self.all_inst_info
2868 _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2869 "instance should not run on node %s", node_i.name)
2870 _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2871 "node is running unknown instance %s", inst)
2873 for node, result in extra_lv_nvinfo.items():
2874 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2875 node_image[node], vg_name)
2877 feedback_fn("* Verifying instance status")
2878 for instance in self.my_inst_names:
2880 feedback_fn("* Verifying instance %s" % instance)
2881 inst_config = self.my_inst_info[instance]
2882 self._VerifyInstance(instance, inst_config, node_image,
2884 inst_nodes_offline = []
2886 pnode = inst_config.primary_node
2887 pnode_img = node_image[pnode]
2888 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2889 self.ENODERPC, pnode, "instance %s, connection to"
2890 " primary node failed", instance)
2892 _ErrorIf(inst_config.admin_up and pnode_img.offline,
2893 self.EINSTANCEBADNODE, instance,
2894 "instance is marked as running and lives on offline node %s",
2895 inst_config.primary_node)
2897 # If the instance is non-redundant we cannot survive losing its primary
2898 # node, so we are not N+1 compliant. On the other hand we have no disk
2899 # templates with more than one secondary so that situation is not well
2901 # FIXME: does not support file-backed instances
2902 if not inst_config.secondary_nodes:
2903 i_non_redundant.append(instance)
2905 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2906 instance, "instance has multiple secondary nodes: %s",
2907 utils.CommaJoin(inst_config.secondary_nodes),
2908 code=self.ETYPE_WARNING)
2910 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2911 pnode = inst_config.primary_node
2912 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2913 instance_groups = {}
2915 for node in instance_nodes:
2916 instance_groups.setdefault(self.all_node_info[node].group,
2920 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2921 # Sort so that we always list the primary node first.
2922 for group, nodes in sorted(instance_groups.items(),
2923 key=lambda (_, nodes): pnode in nodes,
2926 self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2927 instance, "instance has primary and secondary nodes in"
2928 " different groups: %s", utils.CommaJoin(pretty_list),
2929 code=self.ETYPE_WARNING)
2931 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2932 i_non_a_balanced.append(instance)
2934 for snode in inst_config.secondary_nodes:
2935 s_img = node_image[snode]
2936 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2937 "instance %s, connection to secondary node failed", instance)
2940 inst_nodes_offline.append(snode)
2942 # warn that the instance lives on offline nodes
2943 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2944 "instance has offline secondary node(s) %s",
2945 utils.CommaJoin(inst_nodes_offline))
2946 # ... or ghost/non-vm_capable nodes
2947 for node in inst_config.all_nodes:
2948 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2949 "instance lives on ghost node %s", node)
2950 _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2951 instance, "instance lives on non-vm_capable node %s", node)
2953 feedback_fn("* Verifying orphan volumes")
2954 reserved = utils.FieldSet(*cluster.reserved_lvs)
2956 # We will get spurious "unknown volume" warnings if any node of this group
2957 # is secondary for an instance whose primary is in another group. To avoid
2958 # them, we find these instances and add their volumes to node_vol_should.
2959 for inst in self.all_inst_info.values():
2960 for secondary in inst.secondary_nodes:
2961 if (secondary in self.my_node_info
2962 and inst.name not in self.my_inst_info):
2963 inst.MapLVsByNode(node_vol_should)
2966 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2968 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2969 feedback_fn("* Verifying N+1 Memory redundancy")
2970 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2972 feedback_fn("* Other Notes")
2974 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2975 % len(i_non_redundant))
2977 if i_non_a_balanced:
2978 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2979 % len(i_non_a_balanced))
2982 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2985 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2989 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2990 """Analyze the post-hooks' result
2992 This method analyses the hook result, handles it, and sends some
2993 nicely-formatted feedback back to the user.
2995 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2996 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2997 @param hooks_results: the results of the multi-node hooks rpc call
2998 @param feedback_fn: function used send feedback back to the caller
2999 @param lu_result: previous Exec result
3000 @return: the new Exec result, based on the previous result
3004 # We only really run POST phase hooks, only for non-empty groups,
3005 # and are only interested in their results
3006 if not self.my_node_names:
3009 elif phase == constants.HOOKS_PHASE_POST:
3010 # Used to change hooks' output to proper indentation
3011 feedback_fn("* Hooks Results")
3012 assert hooks_results, "invalid result from hooks"
3014 for node_name in hooks_results:
3015 res = hooks_results[node_name]
3017 test = msg and not res.offline
3018 self._ErrorIf(test, self.ENODEHOOKS, node_name,
3019 "Communication failure in hooks execution: %s", msg)
3020 if res.offline or msg:
3021 # No need to investigate payload if node is offline or gave
3024 for script, hkr, output in res.payload:
3025 test = hkr == constants.HKR_FAIL
3026 self._ErrorIf(test, self.ENODEHOOKS, node_name,
3027 "Script %s failed, output:", script)
3029 output = self._HOOKS_INDENT_RE.sub(" ", output)
3030 feedback_fn("%s" % output)
3036 class LUClusterVerifyDisks(NoHooksLU):
3037 """Verifies the cluster disks status.
3042 def ExpandNames(self):
3043 self.share_locks = _ShareAll()
3044 self.needed_locks = {
3045 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3048 def Exec(self, feedback_fn):
3049 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3051 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3052 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3053 for group in group_names])
3056 class LUGroupVerifyDisks(NoHooksLU):
3057 """Verifies the status of all disks in a node group.
3062 def ExpandNames(self):
3063 # Raises errors.OpPrereqError on its own if group can't be found
3064 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3066 self.share_locks = _ShareAll()
3067 self.needed_locks = {
3068 locking.LEVEL_INSTANCE: [],
3069 locking.LEVEL_NODEGROUP: [],
3070 locking.LEVEL_NODE: [],
3073 def DeclareLocks(self, level):
3074 if level == locking.LEVEL_INSTANCE:
3075 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3077 # Lock instances optimistically, needs verification once node and group
3078 # locks have been acquired
3079 self.needed_locks[locking.LEVEL_INSTANCE] = \
3080 self.cfg.GetNodeGroupInstances(self.group_uuid)
3082 elif level == locking.LEVEL_NODEGROUP:
3083 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3085 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3086 set([self.group_uuid] +
3087 # Lock all groups used by instances optimistically; this requires
3088 # going via the node before it's locked, requiring verification
3091 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3092 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3094 elif level == locking.LEVEL_NODE:
3095 # This will only lock the nodes in the group to be verified which contain
3097 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3098 self._LockInstancesNodes()
3100 # Lock all nodes in group to be verified
3101 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3102 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3103 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3105 def CheckPrereq(self):
3106 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3107 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3108 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3110 assert self.group_uuid in owned_groups
3112 # Check if locked instances are still correct
3113 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3115 # Get instance information
3116 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3118 # Check if node groups for locked instances are still correct
3119 for (instance_name, inst) in self.instances.items():
3120 assert owned_nodes.issuperset(inst.all_nodes), \
3121 "Instance %s's nodes changed while we kept the lock" % instance_name
3123 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3126 assert self.group_uuid in inst_groups, \
3127 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3129 def Exec(self, feedback_fn):
3130 """Verify integrity of cluster disks.
3132 @rtype: tuple of three items
3133 @return: a tuple of (dict of node-to-node_error, list of instances
3134 which need activate-disks, dict of instance: (node, volume) for
3139 res_instances = set()
3142 nv_dict = _MapInstanceDisksToNodes([inst
3143 for inst in self.instances.values()
3147 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3148 set(self.cfg.GetVmCapableNodeList()))
3150 node_lvs = self.rpc.call_lv_list(nodes, [])
3152 for (node, node_res) in node_lvs.items():
3153 if node_res.offline:
3156 msg = node_res.fail_msg
3158 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3159 res_nodes[node] = msg
3162 for lv_name, (_, _, lv_online) in node_res.payload.items():
3163 inst = nv_dict.pop((node, lv_name), None)
3164 if not (lv_online or inst is None):
3165 res_instances.add(inst)
3167 # any leftover items in nv_dict are missing LVs, let's arrange the data
3169 for key, inst in nv_dict.iteritems():
3170 res_missing.setdefault(inst, []).append(list(key))
3172 return (res_nodes, list(res_instances), res_missing)
3175 class LUClusterRepairDiskSizes(NoHooksLU):
3176 """Verifies the cluster disks sizes.
3181 def ExpandNames(self):
3182 if self.op.instances:
3183 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3184 self.needed_locks = {
3185 locking.LEVEL_NODE: [],
3186 locking.LEVEL_INSTANCE: self.wanted_names,
3188 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3190 self.wanted_names = None
3191 self.needed_locks = {
3192 locking.LEVEL_NODE: locking.ALL_SET,
3193 locking.LEVEL_INSTANCE: locking.ALL_SET,
3195 self.share_locks = {
3196 locking.LEVEL_NODE: 1,
3197 locking.LEVEL_INSTANCE: 0,
3200 def DeclareLocks(self, level):
3201 if level == locking.LEVEL_NODE and self.wanted_names is not None:
3202 self._LockInstancesNodes(primary_only=True)
3204 def CheckPrereq(self):
3205 """Check prerequisites.
3207 This only checks the optional instance list against the existing names.
3210 if self.wanted_names is None:
3211 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3213 self.wanted_instances = \
3214 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3216 def _EnsureChildSizes(self, disk):
3217 """Ensure children of the disk have the needed disk size.
3219 This is valid mainly for DRBD8 and fixes an issue where the
3220 children have smaller disk size.
3222 @param disk: an L{ganeti.objects.Disk} object
3225 if disk.dev_type == constants.LD_DRBD8:
3226 assert disk.children, "Empty children for DRBD8?"
3227 fchild = disk.children[0]
3228 mismatch = fchild.size < disk.size
3230 self.LogInfo("Child disk has size %d, parent %d, fixing",
3231 fchild.size, disk.size)
3232 fchild.size = disk.size
3234 # and we recurse on this child only, not on the metadev
3235 return self._EnsureChildSizes(fchild) or mismatch
3239 def Exec(self, feedback_fn):
3240 """Verify the size of cluster disks.
3243 # TODO: check child disks too
3244 # TODO: check differences in size between primary/secondary nodes
3246 for instance in self.wanted_instances:
3247 pnode = instance.primary_node
3248 if pnode not in per_node_disks:
3249 per_node_disks[pnode] = []
3250 for idx, disk in enumerate(instance.disks):
3251 per_node_disks[pnode].append((instance, idx, disk))
3254 for node, dskl in per_node_disks.items():
3255 newl = [v[2].Copy() for v in dskl]
3257 self.cfg.SetDiskID(dsk, node)
3258 result = self.rpc.call_blockdev_getsize(node, newl)
3260 self.LogWarning("Failure in blockdev_getsize call to node"
3261 " %s, ignoring", node)
3263 if len(result.payload) != len(dskl):
3264 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3265 " result.payload=%s", node, len(dskl), result.payload)
3266 self.LogWarning("Invalid result from node %s, ignoring node results",
3269 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3271 self.LogWarning("Disk %d of instance %s did not return size"
3272 " information, ignoring", idx, instance.name)
3274 if not isinstance(size, (int, long)):
3275 self.LogWarning("Disk %d of instance %s did not return valid"
3276 " size information, ignoring", idx, instance.name)
3279 if size != disk.size:
3280 self.LogInfo("Disk %d of instance %s has mismatched size,"
3281 " correcting: recorded %d, actual %d", idx,
3282 instance.name, disk.size, size)
3284 self.cfg.Update(instance, feedback_fn)
3285 changed.append((instance.name, idx, size))
3286 if self._EnsureChildSizes(disk):
3287 self.cfg.Update(instance, feedback_fn)
3288 changed.append((instance.name, idx, disk.size))
3292 class LUClusterRename(LogicalUnit):
3293 """Rename the cluster.
3296 HPATH = "cluster-rename"
3297 HTYPE = constants.HTYPE_CLUSTER
3299 def BuildHooksEnv(self):
3304 "OP_TARGET": self.cfg.GetClusterName(),
3305 "NEW_NAME": self.op.name,
3308 def BuildHooksNodes(self):
3309 """Build hooks nodes.
3312 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3314 def CheckPrereq(self):
3315 """Verify that the passed name is a valid one.
3318 hostname = netutils.GetHostname(name=self.op.name,
3319 family=self.cfg.GetPrimaryIPFamily())
3321 new_name = hostname.name
3322 self.ip = new_ip = hostname.ip
3323 old_name = self.cfg.GetClusterName()
3324 old_ip = self.cfg.GetMasterIP()
3325 if new_name == old_name and new_ip == old_ip:
3326 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3327 " cluster has changed",
3329 if new_ip != old_ip:
3330 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3331 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3332 " reachable on the network" %
3333 new_ip, errors.ECODE_NOTUNIQUE)
3335 self.op.name = new_name
3337 def Exec(self, feedback_fn):
3338 """Rename the cluster.
3341 clustername = self.op.name
3344 # shutdown the master IP
3345 master = self.cfg.GetMasterNode()
3346 result = self.rpc.call_node_stop_master(master, False)
3347 result.Raise("Could not disable the master role")
3350 cluster = self.cfg.GetClusterInfo()
3351 cluster.cluster_name = clustername
3352 cluster.master_ip = ip
3353 self.cfg.Update(cluster, feedback_fn)
3355 # update the known hosts file
3356 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3357 node_list = self.cfg.GetOnlineNodeList()
3359 node_list.remove(master)
3362 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3364 result = self.rpc.call_node_start_master(master, False, False)
3365 msg = result.fail_msg
3367 self.LogWarning("Could not re-enable the master role on"
3368 " the master, please restart manually: %s", msg)
3373 class LUClusterSetParams(LogicalUnit):
3374 """Change the parameters of the cluster.
3377 HPATH = "cluster-modify"
3378 HTYPE = constants.HTYPE_CLUSTER
3381 def CheckArguments(self):
3385 if self.op.uid_pool:
3386 uidpool.CheckUidPool(self.op.uid_pool)
3388 if self.op.add_uids:
3389 uidpool.CheckUidPool(self.op.add_uids)
3391 if self.op.remove_uids:
3392 uidpool.CheckUidPool(self.op.remove_uids)
3394 def ExpandNames(self):
3395 # FIXME: in the future maybe other cluster params won't require checking on
3396 # all nodes to be modified.
3397 self.needed_locks = {
3398 locking.LEVEL_NODE: locking.ALL_SET,
3400 self.share_locks[locking.LEVEL_NODE] = 1
3402 def BuildHooksEnv(self):
3407 "OP_TARGET": self.cfg.GetClusterName(),
3408 "NEW_VG_NAME": self.op.vg_name,
3411 def BuildHooksNodes(self):
3412 """Build hooks nodes.
3415 mn = self.cfg.GetMasterNode()
3418 def CheckPrereq(self):
3419 """Check prerequisites.
3421 This checks whether the given params don't conflict and
3422 if the given volume group is valid.
3425 if self.op.vg_name is not None and not self.op.vg_name:
3426 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3427 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3428 " instances exist", errors.ECODE_INVAL)
3430 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3431 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3432 raise errors.OpPrereqError("Cannot disable drbd helper while"
3433 " drbd-based instances exist",
3436 node_list = self.owned_locks(locking.LEVEL_NODE)
3438 # if vg_name not None, checks given volume group on all nodes
3440 vglist = self.rpc.call_vg_list(node_list)
3441 for node in node_list:
3442 msg = vglist[node].fail_msg
3444 # ignoring down node
3445 self.LogWarning("Error while gathering data on node %s"
3446 " (ignoring node): %s", node, msg)
3448 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3450 constants.MIN_VG_SIZE)
3452 raise errors.OpPrereqError("Error on node '%s': %s" %
3453 (node, vgstatus), errors.ECODE_ENVIRON)
3455 if self.op.drbd_helper:
3456 # checks given drbd helper on all nodes
3457 helpers = self.rpc.call_drbd_helper(node_list)
3458 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3460 self.LogInfo("Not checking drbd helper on offline node %s", node)
3462 msg = helpers[node].fail_msg
3464 raise errors.OpPrereqError("Error checking drbd helper on node"
3465 " '%s': %s" % (node, msg),
3466 errors.ECODE_ENVIRON)
3467 node_helper = helpers[node].payload
3468 if node_helper != self.op.drbd_helper:
3469 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3470 (node, node_helper), errors.ECODE_ENVIRON)
3472 self.cluster = cluster = self.cfg.GetClusterInfo()
3473 # validate params changes
3474 if self.op.beparams:
3475 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3476 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3478 if self.op.ndparams:
3479 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3480 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3482 # TODO: we need a more general way to handle resetting
3483 # cluster-level parameters to default values
3484 if self.new_ndparams["oob_program"] == "":
3485 self.new_ndparams["oob_program"] = \
3486 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3488 if self.op.nicparams:
3489 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3490 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3491 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3494 # check all instances for consistency
3495 for instance in self.cfg.GetAllInstancesInfo().values():
3496 for nic_idx, nic in enumerate(instance.nics):
3497 params_copy = copy.deepcopy(nic.nicparams)
3498 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3500 # check parameter syntax
3502 objects.NIC.CheckParameterSyntax(params_filled)
3503 except errors.ConfigurationError, err:
3504 nic_errors.append("Instance %s, nic/%d: %s" %
3505 (instance.name, nic_idx, err))
3507 # if we're moving instances to routed, check that they have an ip
3508 target_mode = params_filled[constants.NIC_MODE]
3509 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3510 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3511 " address" % (instance.name, nic_idx))
3513 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3514 "\n".join(nic_errors))
3516 # hypervisor list/parameters
3517 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3518 if self.op.hvparams:
3519 for hv_name, hv_dict in self.op.hvparams.items():
3520 if hv_name not in self.new_hvparams:
3521 self.new_hvparams[hv_name] = hv_dict
3523 self.new_hvparams[hv_name].update(hv_dict)
3525 # os hypervisor parameters
3526 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3528 for os_name, hvs in self.op.os_hvp.items():
3529 if os_name not in self.new_os_hvp:
3530 self.new_os_hvp[os_name] = hvs
3532 for hv_name, hv_dict in hvs.items():
3533 if hv_name not in self.new_os_hvp[os_name]:
3534 self.new_os_hvp[os_name][hv_name] = hv_dict
3536 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3539 self.new_osp = objects.FillDict(cluster.osparams, {})
3540 if self.op.osparams:
3541 for os_name, osp in self.op.osparams.items():
3542 if os_name not in self.new_osp:
3543 self.new_osp[os_name] = {}
3545 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3548 if not self.new_osp[os_name]:
3549 # we removed all parameters
3550 del self.new_osp[os_name]
3552 # check the parameter validity (remote check)
3553 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3554 os_name, self.new_osp[os_name])
3556 # changes to the hypervisor list
3557 if self.op.enabled_hypervisors is not None:
3558 self.hv_list = self.op.enabled_hypervisors
3559 for hv in self.hv_list:
3560 # if the hypervisor doesn't already exist in the cluster
3561 # hvparams, we initialize it to empty, and then (in both
3562 # cases) we make sure to fill the defaults, as we might not
3563 # have a complete defaults list if the hypervisor wasn't
3565 if hv not in new_hvp:
3567 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3568 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3570 self.hv_list = cluster.enabled_hypervisors
3572 if self.op.hvparams or self.op.enabled_hypervisors is not None:
3573 # either the enabled list has changed, or the parameters have, validate
3574 for hv_name, hv_params in self.new_hvparams.items():
3575 if ((self.op.hvparams and hv_name in self.op.hvparams) or
3576 (self.op.enabled_hypervisors and
3577 hv_name in self.op.enabled_hypervisors)):
3578 # either this is a new hypervisor, or its parameters have changed
3579 hv_class = hypervisor.GetHypervisor(hv_name)
3580 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3581 hv_class.CheckParameterSyntax(hv_params)
3582 _CheckHVParams(self, node_list, hv_name, hv_params)
3585 # no need to check any newly-enabled hypervisors, since the
3586 # defaults have already been checked in the above code-block
3587 for os_name, os_hvp in self.new_os_hvp.items():
3588 for hv_name, hv_params in os_hvp.items():
3589 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3590 # we need to fill in the new os_hvp on top of the actual hv_p
3591 cluster_defaults = self.new_hvparams.get(hv_name, {})
3592 new_osp = objects.FillDict(cluster_defaults, hv_params)
3593 hv_class = hypervisor.GetHypervisor(hv_name)
3594 hv_class.CheckParameterSyntax(new_osp)
3595 _CheckHVParams(self, node_list, hv_name, new_osp)
3597 if self.op.default_iallocator:
3598 alloc_script = utils.FindFile(self.op.default_iallocator,
3599 constants.IALLOCATOR_SEARCH_PATH,
3601 if alloc_script is None:
3602 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3603 " specified" % self.op.default_iallocator,
3606 def Exec(self, feedback_fn):
3607 """Change the parameters of the cluster.
3610 if self.op.vg_name is not None:
3611 new_volume = self.op.vg_name
3614 if new_volume != self.cfg.GetVGName():
3615 self.cfg.SetVGName(new_volume)
3617 feedback_fn("Cluster LVM configuration already in desired"
3618 " state, not changing")
3619 if self.op.drbd_helper is not None:
3620 new_helper = self.op.drbd_helper
3623 if new_helper != self.cfg.GetDRBDHelper():
3624 self.cfg.SetDRBDHelper(new_helper)
3626 feedback_fn("Cluster DRBD helper already in desired state,"
3628 if self.op.hvparams:
3629 self.cluster.hvparams = self.new_hvparams
3631 self.cluster.os_hvp = self.new_os_hvp
3632 if self.op.enabled_hypervisors is not None:
3633 self.cluster.hvparams = self.new_hvparams
3634 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3635 if self.op.beparams:
3636 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3637 if self.op.nicparams:
3638 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3639 if self.op.osparams:
3640 self.cluster.osparams = self.new_osp
3641 if self.op.ndparams:
3642 self.cluster.ndparams = self.new_ndparams
3644 if self.op.candidate_pool_size is not None:
3645 self.cluster.candidate_pool_size = self.op.candidate_pool_size
3646 # we need to update the pool size here, otherwise the save will fail
3647 _AdjustCandidatePool(self, [])
3649 if self.op.maintain_node_health is not None:
3650 self.cluster.maintain_node_health = self.op.maintain_node_health
3652 if self.op.prealloc_wipe_disks is not None:
3653 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3655 if self.op.add_uids is not None:
3656 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3658 if self.op.remove_uids is not None:
3659 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3661 if self.op.uid_pool is not None:
3662 self.cluster.uid_pool = self.op.uid_pool
3664 if self.op.default_iallocator is not None:
3665 self.cluster.default_iallocator = self.op.default_iallocator
3667 if self.op.reserved_lvs is not None:
3668 self.cluster.reserved_lvs = self.op.reserved_lvs
3670 def helper_os(aname, mods, desc):
3672 lst = getattr(self.cluster, aname)
3673 for key, val in mods:
3674 if key == constants.DDM_ADD:
3676 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3679 elif key == constants.DDM_REMOVE:
3683 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3685 raise errors.ProgrammerError("Invalid modification '%s'" % key)
3687 if self.op.hidden_os:
3688 helper_os("hidden_os", self.op.hidden_os, "hidden")
3690 if self.op.blacklisted_os:
3691 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3693 if self.op.master_netdev:
3694 master = self.cfg.GetMasterNode()
3695 feedback_fn("Shutting down master ip on the current netdev (%s)" %
3696 self.cluster.master_netdev)
3697 result = self.rpc.call_node_stop_master(master, False)
3698 result.Raise("Could not disable the master ip")
3699 feedback_fn("Changing master_netdev from %s to %s" %
3700 (self.cluster.master_netdev, self.op.master_netdev))
3701 self.cluster.master_netdev = self.op.master_netdev
3703 self.cfg.Update(self.cluster, feedback_fn)
3705 if self.op.master_netdev:
3706 feedback_fn("Starting the master ip on the new master netdev (%s)" %
3707 self.op.master_netdev)
3708 result = self.rpc.call_node_start_master(master, False, False)
3710 self.LogWarning("Could not re-enable the master ip on"
3711 " the master, please restart manually: %s",
3715 def _UploadHelper(lu, nodes, fname):
3716 """Helper for uploading a file and showing warnings.
3719 if os.path.exists(fname):
3720 result = lu.rpc.call_upload_file(nodes, fname)
3721 for to_node, to_result in result.items():
3722 msg = to_result.fail_msg
3724 msg = ("Copy of file %s to node %s failed: %s" %
3725 (fname, to_node, msg))
3726 lu.proc.LogWarning(msg)
3729 def _ComputeAncillaryFiles(cluster, redist):
3730 """Compute files external to Ganeti which need to be consistent.
3732 @type redist: boolean
3733 @param redist: Whether to include files which need to be redistributed
3736 # Compute files for all nodes
3738 constants.SSH_KNOWN_HOSTS_FILE,
3739 constants.CONFD_HMAC_KEY,
3740 constants.CLUSTER_DOMAIN_SECRET_FILE,
3744 files_all.update(constants.ALL_CERT_FILES)
3745 files_all.update(ssconf.SimpleStore().GetFileList())
3747 # we need to ship at least the RAPI certificate
3748 files_all.add(constants.RAPI_CERT_FILE)
3750 if cluster.modify_etc_hosts:
3751 files_all.add(constants.ETC_HOSTS)
3753 # Files which must either exist on all nodes or on none
3754 files_all_opt = set([
3755 constants.RAPI_USERS_FILE,
3758 # Files which should only be on master candidates
3761 files_mc.add(constants.CLUSTER_CONF_FILE)
3763 # Files which should only be on VM-capable nodes
3764 files_vm = set(filename
3765 for hv_name in cluster.enabled_hypervisors
3766 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3768 # Filenames must be unique
3769 assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3770 sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3771 "Found file listed in more than one file list"
3773 return (files_all, files_all_opt, files_mc, files_vm)
3776 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3777 """Distribute additional files which are part of the cluster configuration.
3779 ConfigWriter takes care of distributing the config and ssconf files, but
3780 there are more files which should be distributed to all nodes. This function
3781 makes sure those are copied.
3783 @param lu: calling logical unit
3784 @param additional_nodes: list of nodes not in the config to distribute to
3785 @type additional_vm: boolean
3786 @param additional_vm: whether the additional nodes are vm-capable or not
3789 # Gather target nodes
3790 cluster = lu.cfg.GetClusterInfo()
3791 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3793 online_nodes = lu.cfg.GetOnlineNodeList()
3794 vm_nodes = lu.cfg.GetVmCapableNodeList()
3796 if additional_nodes is not None:
3797 online_nodes.extend(additional_nodes)
3799 vm_nodes.extend(additional_nodes)
3801 # Never distribute to master node
3802 for nodelist in [online_nodes, vm_nodes]:
3803 if master_info.name in nodelist:
3804 nodelist.remove(master_info.name)
3807 (files_all, files_all_opt, files_mc, files_vm) = \
3808 _ComputeAncillaryFiles(cluster, True)
3810 # Never re-distribute configuration file from here
3811 assert not (constants.CLUSTER_CONF_FILE in files_all or
3812 constants.CLUSTER_CONF_FILE in files_vm)
3813 assert not files_mc, "Master candidates not handled in this function"
3816 (online_nodes, files_all),
3817 (online_nodes, files_all_opt),
3818 (vm_nodes, files_vm),
3822 for (node_list, files) in filemap:
3824 _UploadHelper(lu, node_list, fname)
3827 class LUClusterRedistConf(NoHooksLU):
3828 """Force the redistribution of cluster configuration.
3830 This is a very simple LU.
3835 def ExpandNames(self):
3836 self.needed_locks = {
3837 locking.LEVEL_NODE: locking.ALL_SET,
3839 self.share_locks[locking.LEVEL_NODE] = 1
3841 def Exec(self, feedback_fn):
3842 """Redistribute the configuration.
3845 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3846 _RedistributeAncillaryFiles(self)
3849 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3850 """Sleep and poll for an instance's disk to sync.
3853 if not instance.disks or disks is not None and not disks:
3856 disks = _ExpandCheckDisks(instance, disks)
3859 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3861 node = instance.primary_node
3864 lu.cfg.SetDiskID(dev, node)
3866 # TODO: Convert to utils.Retry
3869 degr_retries = 10 # in seconds, as we sleep 1 second each time
3873 cumul_degraded = False
3874 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3875 msg = rstats.fail_msg
3877 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3880 raise errors.RemoteError("Can't contact node %s for mirror data,"
3881 " aborting." % node)
3884 rstats = rstats.payload
3886 for i, mstat in enumerate(rstats):
3888 lu.LogWarning("Can't compute data for node %s/%s",
3889 node, disks[i].iv_name)
3892 cumul_degraded = (cumul_degraded or
3893 (mstat.is_degraded and mstat.sync_percent is None))
3894 if mstat.sync_percent is not None:
3896 if mstat.estimated_time is not None:
3897 rem_time = ("%s remaining (estimated)" %
3898 utils.FormatSeconds(mstat.estimated_time))
3899 max_time = mstat.estimated_time
3901 rem_time = "no time estimate"
3902 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3903 (disks[i].iv_name, mstat.sync_percent, rem_time))
3905 # if we're done but degraded, let's do a few small retries, to
3906 # make sure we see a stable and not transient situation; therefore
3907 # we force restart of the loop
3908 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3909 logging.info("Degraded disks found, %d retries left", degr_retries)
3917 time.sleep(min(60, max_time))
3920 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3921 return not cumul_degraded
3924 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3925 """Check that mirrors are not degraded.
3927 The ldisk parameter, if True, will change the test from the
3928 is_degraded attribute (which represents overall non-ok status for
3929 the device(s)) to the ldisk (representing the local storage status).
3932 lu.cfg.SetDiskID(dev, node)
3936 if on_primary or dev.AssembleOnSecondary():
3937 rstats = lu.rpc.call_blockdev_find(node, dev)
3938 msg = rstats.fail_msg
3940 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3942 elif not rstats.payload:
3943 lu.LogWarning("Can't find disk on node %s", node)
3947 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3949 result = result and not rstats.payload.is_degraded
3952 for child in dev.children:
3953 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3958 class LUOobCommand(NoHooksLU):
3959 """Logical unit for OOB handling.
3963 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3965 def ExpandNames(self):
3966 """Gather locks we need.
3969 if self.op.node_names:
3970 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3971 lock_names = self.op.node_names
3973 lock_names = locking.ALL_SET
3975 self.needed_locks = {
3976 locking.LEVEL_NODE: lock_names,
3979 def CheckPrereq(self):
3980 """Check prerequisites.
3983 - the node exists in the configuration
3986 Any errors are signaled by raising errors.OpPrereqError.
3990 self.master_node = self.cfg.GetMasterNode()
3992 assert self.op.power_delay >= 0.0
3994 if self.op.node_names:
3995 if (self.op.command in self._SKIP_MASTER and
3996 self.master_node in self.op.node_names):
3997 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3998 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4000 if master_oob_handler:
4001 additional_text = ("run '%s %s %s' if you want to operate on the"
4002 " master regardless") % (master_oob_handler,
4006 additional_text = "it does not support out-of-band operations"
4008 raise errors.OpPrereqError(("Operating on the master node %s is not"
4009 " allowed for %s; %s") %
4010 (self.master_node, self.op.command,
4011 additional_text), errors.ECODE_INVAL)
4013 self.op.node_names = self.cfg.GetNodeList()
4014 if self.op.command in self._SKIP_MASTER:
4015 self.op.node_names.remove(self.master_node)
4017 if self.op.command in self._SKIP_MASTER:
4018 assert self.master_node not in self.op.node_names
4020 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4022 raise errors.OpPrereqError("Node %s not found" % node_name,
4025 self.nodes.append(node)
4027 if (not self.op.ignore_status and
4028 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4029 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4030 " not marked offline") % node_name,
4033 def Exec(self, feedback_fn):
4034 """Execute OOB and return result if we expect any.
4037 master_node = self.master_node
4040 for idx, node in enumerate(utils.NiceSort(self.nodes,
4041 key=lambda node: node.name)):
4042 node_entry = [(constants.RS_NORMAL, node.name)]
4043 ret.append(node_entry)
4045 oob_program = _SupportsOob(self.cfg, node)
4048 node_entry.append((constants.RS_UNAVAIL, None))
4051 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4052 self.op.command, oob_program, node.name)
4053 result = self.rpc.call_run_oob(master_node, oob_program,
4054 self.op.command, node.name,
4058 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4059 node.name, result.fail_msg)
4060 node_entry.append((constants.RS_NODATA, None))
4063 self._CheckPayload(result)
4064 except errors.OpExecError, err:
4065 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4067 node_entry.append((constants.RS_NODATA, None))
4069 if self.op.command == constants.OOB_HEALTH:
4070 # For health we should log important events
4071 for item, status in result.payload:
4072 if status in [constants.OOB_STATUS_WARNING,
4073 constants.OOB_STATUS_CRITICAL]:
4074 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4075 item, node.name, status)
4077 if self.op.command == constants.OOB_POWER_ON:
4079 elif self.op.command == constants.OOB_POWER_OFF:
4080 node.powered = False
4081 elif self.op.command == constants.OOB_POWER_STATUS:
4082 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4083 if powered != node.powered:
4084 logging.warning(("Recorded power state (%s) of node '%s' does not"
4085 " match actual power state (%s)"), node.powered,
4088 # For configuration changing commands we should update the node
4089 if self.op.command in (constants.OOB_POWER_ON,
4090 constants.OOB_POWER_OFF):
4091 self.cfg.Update(node, feedback_fn)
4093 node_entry.append((constants.RS_NORMAL, result.payload))
4095 if (self.op.command == constants.OOB_POWER_ON and
4096 idx < len(self.nodes) - 1):
4097 time.sleep(self.op.power_delay)
4101 def _CheckPayload(self, result):
4102 """Checks if the payload is valid.
4104 @param result: RPC result
4105 @raises errors.OpExecError: If payload is not valid
4109 if self.op.command == constants.OOB_HEALTH:
4110 if not isinstance(result.payload, list):
4111 errs.append("command 'health' is expected to return a list but got %s" %
4112 type(result.payload))
4114 for item, status in result.payload:
4115 if status not in constants.OOB_STATUSES:
4116 errs.append("health item '%s' has invalid status '%s'" %
4119 if self.op.command == constants.OOB_POWER_STATUS:
4120 if not isinstance(result.payload, dict):
4121 errs.append("power-status is expected to return a dict but got %s" %
4122 type(result.payload))
4124 if self.op.command in [
4125 constants.OOB_POWER_ON,
4126 constants.OOB_POWER_OFF,
4127 constants.OOB_POWER_CYCLE,
4129 if result.payload is not None:
4130 errs.append("%s is expected to not return payload but got '%s'" %
4131 (self.op.command, result.payload))
4134 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4135 utils.CommaJoin(errs))
4138 class _OsQuery(_QueryBase):
4139 FIELDS = query.OS_FIELDS
4141 def ExpandNames(self, lu):
4142 # Lock all nodes in shared mode
4143 # Temporary removal of locks, should be reverted later
4144 # TODO: reintroduce locks when they are lighter-weight
4145 lu.needed_locks = {}
4146 #self.share_locks[locking.LEVEL_NODE] = 1
4147 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4149 # The following variables interact with _QueryBase._GetNames
4151 self.wanted = self.names
4153 self.wanted = locking.ALL_SET
4155 self.do_locking = self.use_locking
4157 def DeclareLocks(self, lu, level):
4161 def _DiagnoseByOS(rlist):
4162 """Remaps a per-node return list into an a per-os per-node dictionary
4164 @param rlist: a map with node names as keys and OS objects as values
4167 @return: a dictionary with osnames as keys and as value another
4168 map, with nodes as keys and tuples of (path, status, diagnose,
4169 variants, parameters, api_versions) as values, eg::
4171 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4172 (/srv/..., False, "invalid api")],
4173 "node2": [(/srv/..., True, "", [], [])]}
4178 # we build here the list of nodes that didn't fail the RPC (at RPC
4179 # level), so that nodes with a non-responding node daemon don't
4180 # make all OSes invalid
4181 good_nodes = [node_name for node_name in rlist
4182 if not rlist[node_name].fail_msg]
4183 for node_name, nr in rlist.items():
4184 if nr.fail_msg or not nr.payload:
4186 for (name, path, status, diagnose, variants,
4187 params, api_versions) in nr.payload:
4188 if name not in all_os:
4189 # build a list of nodes for this os containing empty lists
4190 # for each node in node_list
4192 for nname in good_nodes:
4193 all_os[name][nname] = []
4194 # convert params from [name, help] to (name, help)
4195 params = [tuple(v) for v in params]
4196 all_os[name][node_name].append((path, status, diagnose,
4197 variants, params, api_versions))
4200 def _GetQueryData(self, lu):
4201 """Computes the list of nodes and their attributes.
4204 # Locking is not used
4205 assert not (compat.any(lu.glm.is_owned(level)
4206 for level in locking.LEVELS
4207 if level != locking.LEVEL_CLUSTER) or
4208 self.do_locking or self.use_locking)
4210 valid_nodes = [node.name
4211 for node in lu.cfg.GetAllNodesInfo().values()
4212 if not node.offline and node.vm_capable]
4213 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4214 cluster = lu.cfg.GetClusterInfo()
4218 for (os_name, os_data) in pol.items():
4219 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4220 hidden=(os_name in cluster.hidden_os),
4221 blacklisted=(os_name in cluster.blacklisted_os))
4225 api_versions = set()
4227 for idx, osl in enumerate(os_data.values()):
4228 info.valid = bool(info.valid and osl and osl[0][1])
4232 (node_variants, node_params, node_api) = osl[0][3:6]
4235 variants.update(node_variants)
4236 parameters.update(node_params)
4237 api_versions.update(node_api)
4239 # Filter out inconsistent values
4240 variants.intersection_update(node_variants)
4241 parameters.intersection_update(node_params)
4242 api_versions.intersection_update(node_api)
4244 info.variants = list(variants)
4245 info.parameters = list(parameters)
4246 info.api_versions = list(api_versions)
4248 data[os_name] = info
4250 # Prepare data in requested order
4251 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4255 class LUOsDiagnose(NoHooksLU):
4256 """Logical unit for OS diagnose/query.
4262 def _BuildFilter(fields, names):
4263 """Builds a filter for querying OSes.
4266 name_filter = qlang.MakeSimpleFilter("name", names)
4268 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4269 # respective field is not requested
4270 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4271 for fname in ["hidden", "blacklisted"]
4272 if fname not in fields]
4273 if "valid" not in fields:
4274 status_filter.append([qlang.OP_TRUE, "valid"])
4277 status_filter.insert(0, qlang.OP_AND)
4279 status_filter = None
4281 if name_filter and status_filter:
4282 return [qlang.OP_AND, name_filter, status_filter]
4286 return status_filter
4288 def CheckArguments(self):
4289 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4290 self.op.output_fields, False)
4292 def ExpandNames(self):
4293 self.oq.ExpandNames(self)
4295 def Exec(self, feedback_fn):
4296 return self.oq.OldStyleQuery(self)
4299 class LUNodeRemove(LogicalUnit):
4300 """Logical unit for removing a node.
4303 HPATH = "node-remove"
4304 HTYPE = constants.HTYPE_NODE
4306 def BuildHooksEnv(self):
4309 This doesn't run on the target node in the pre phase as a failed
4310 node would then be impossible to remove.
4314 "OP_TARGET": self.op.node_name,
4315 "NODE_NAME": self.op.node_name,
4318 def BuildHooksNodes(self):
4319 """Build hooks nodes.
4322 all_nodes = self.cfg.GetNodeList()
4324 all_nodes.remove(self.op.node_name)
4326 logging.warning("Node '%s', which is about to be removed, was not found"
4327 " in the list of all nodes", self.op.node_name)
4328 return (all_nodes, all_nodes)
4330 def CheckPrereq(self):
4331 """Check prerequisites.
4334 - the node exists in the configuration
4335 - it does not have primary or secondary instances
4336 - it's not the master
4338 Any errors are signaled by raising errors.OpPrereqError.
4341 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4342 node = self.cfg.GetNodeInfo(self.op.node_name)
4343 assert node is not None
4345 masternode = self.cfg.GetMasterNode()
4346 if node.name == masternode:
4347 raise errors.OpPrereqError("Node is the master node, failover to another"
4348 " node is required", errors.ECODE_INVAL)
4350 for instance_name, instance in self.cfg.GetAllInstancesInfo():
4351 if node.name in instance.all_nodes:
4352 raise errors.OpPrereqError("Instance %s is still running on the node,"
4353 " please remove first" % instance_name,
4355 self.op.node_name = node.name
4358 def Exec(self, feedback_fn):
4359 """Removes the node from the cluster.
4363 logging.info("Stopping the node daemon and removing configs from node %s",
4366 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4368 # Promote nodes to master candidate as needed
4369 _AdjustCandidatePool(self, exceptions=[node.name])
4370 self.context.RemoveNode(node.name)
4372 # Run post hooks on the node before it's removed
4373 _RunPostHook(self, node.name)
4375 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4376 msg = result.fail_msg
4378 self.LogWarning("Errors encountered on the remote node while leaving"
4379 " the cluster: %s", msg)
4381 # Remove node from our /etc/hosts
4382 if self.cfg.GetClusterInfo().modify_etc_hosts:
4383 master_node = self.cfg.GetMasterNode()
4384 result = self.rpc.call_etc_hosts_modify(master_node,
4385 constants.ETC_HOSTS_REMOVE,
4387 result.Raise("Can't update hosts file with new host data")
4388 _RedistributeAncillaryFiles(self)
4391 class _NodeQuery(_QueryBase):
4392 FIELDS = query.NODE_FIELDS
4394 def ExpandNames(self, lu):
4395 lu.needed_locks = {}
4396 lu.share_locks = _ShareAll()
4399 self.wanted = _GetWantedNodes(lu, self.names)
4401 self.wanted = locking.ALL_SET
4403 self.do_locking = (self.use_locking and
4404 query.NQ_LIVE in self.requested_data)
4407 # If any non-static field is requested we need to lock the nodes
4408 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4410 def DeclareLocks(self, lu, level):
4413 def _GetQueryData(self, lu):
4414 """Computes the list of nodes and their attributes.
4417 all_info = lu.cfg.GetAllNodesInfo()
4419 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4421 # Gather data as requested
4422 if query.NQ_LIVE in self.requested_data:
4423 # filter out non-vm_capable nodes
4424 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4426 node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4427 lu.cfg.GetHypervisorType())
4428 live_data = dict((name, nresult.payload)
4429 for (name, nresult) in node_data.items()
4430 if not nresult.fail_msg and nresult.payload)
4434 if query.NQ_INST in self.requested_data:
4435 node_to_primary = dict([(name, set()) for name in nodenames])
4436 node_to_secondary = dict([(name, set()) for name in nodenames])
4438 inst_data = lu.cfg.GetAllInstancesInfo()
4440 for inst in inst_data.values():
4441 if inst.primary_node in node_to_primary:
4442 node_to_primary[inst.primary_node].add(inst.name)
4443 for secnode in inst.secondary_nodes:
4444 if secnode in node_to_secondary:
4445 node_to_secondary[secnode].add(inst.name)
4447 node_to_primary = None
4448 node_to_secondary = None
4450 if query.NQ_OOB in self.requested_data:
4451 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4452 for name, node in all_info.iteritems())
4456 if query.NQ_GROUP in self.requested_data:
4457 groups = lu.cfg.GetAllNodeGroupsInfo()
4461 return query.NodeQueryData([all_info[name] for name in nodenames],
4462 live_data, lu.cfg.GetMasterNode(),
4463 node_to_primary, node_to_secondary, groups,
4464 oob_support, lu.cfg.GetClusterInfo())
4467 class LUNodeQuery(NoHooksLU):
4468 """Logical unit for querying nodes.
4471 # pylint: disable=W0142
4474 def CheckArguments(self):
4475 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4476 self.op.output_fields, self.op.use_locking)
4478 def ExpandNames(self):
4479 self.nq.ExpandNames(self)
4481 def Exec(self, feedback_fn):
4482 return self.nq.OldStyleQuery(self)
4485 class LUNodeQueryvols(NoHooksLU):
4486 """Logical unit for getting volumes on node(s).
4490 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4491 _FIELDS_STATIC = utils.FieldSet("node")
4493 def CheckArguments(self):
4494 _CheckOutputFields(static=self._FIELDS_STATIC,
4495 dynamic=self._FIELDS_DYNAMIC,
4496 selected=self.op.output_fields)
4498 def ExpandNames(self):
4499 self.needed_locks = {}
4500 self.share_locks[locking.LEVEL_NODE] = 1
4501 if not self.op.nodes:
4502 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4504 self.needed_locks[locking.LEVEL_NODE] = \
4505 _GetWantedNodes(self, self.op.nodes)
4507 def Exec(self, feedback_fn):
4508 """Computes the list of nodes and their attributes.
4511 nodenames = self.owned_locks(locking.LEVEL_NODE)
4512 volumes = self.rpc.call_node_volumes(nodenames)
4514 ilist = self.cfg.GetAllInstancesInfo()
4515 vol2inst = _MapInstanceDisksToNodes(ilist.values())
4518 for node in nodenames:
4519 nresult = volumes[node]
4522 msg = nresult.fail_msg
4524 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4527 node_vols = sorted(nresult.payload,
4528 key=operator.itemgetter("dev"))
4530 for vol in node_vols:
4532 for field in self.op.output_fields:
4535 elif field == "phys":
4539 elif field == "name":
4541 elif field == "size":
4542 val = int(float(vol["size"]))
4543 elif field == "instance":
4544 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4546 raise errors.ParameterError(field)
4547 node_output.append(str(val))
4549 output.append(node_output)
4554 class LUNodeQueryStorage(NoHooksLU):
4555 """Logical unit for getting information on storage units on node(s).
4558 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4561 def CheckArguments(self):
4562 _CheckOutputFields(static=self._FIELDS_STATIC,
4563 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4564 selected=self.op.output_fields)
4566 def ExpandNames(self):
4567 self.needed_locks = {}
4568 self.share_locks[locking.LEVEL_NODE] = 1
4571 self.needed_locks[locking.LEVEL_NODE] = \
4572 _GetWantedNodes(self, self.op.nodes)
4574 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4576 def Exec(self, feedback_fn):
4577 """Computes the list of nodes and their attributes.
4580 self.nodes = self.owned_locks(locking.LEVEL_NODE)
4582 # Always get name to sort by
4583 if constants.SF_NAME in self.op.output_fields:
4584 fields = self.op.output_fields[:]
4586 fields = [constants.SF_NAME] + self.op.output_fields
4588 # Never ask for node or type as it's only known to the LU
4589 for extra in [constants.SF_NODE, constants.SF_TYPE]:
4590 while extra in fields:
4591 fields.remove(extra)
4593 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4594 name_idx = field_idx[constants.SF_NAME]
4596 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4597 data = self.rpc.call_storage_list(self.nodes,
4598 self.op.storage_type, st_args,
4599 self.op.name, fields)
4603 for node in utils.NiceSort(self.nodes):
4604 nresult = data[node]
4608 msg = nresult.fail_msg
4610 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4613 rows = dict([(row[name_idx], row) for row in nresult.payload])
4615 for name in utils.NiceSort(rows.keys()):
4620 for field in self.op.output_fields:
4621 if field == constants.SF_NODE:
4623 elif field == constants.SF_TYPE:
4624 val = self.op.storage_type
4625 elif field in field_idx:
4626 val = row[field_idx[field]]
4628 raise errors.ParameterError(field)
4637 class _InstanceQuery(_QueryBase):
4638 FIELDS = query.INSTANCE_FIELDS
4640 def ExpandNames(self, lu):
4641 lu.needed_locks = {}
4642 lu.share_locks = _ShareAll()
4645 self.wanted = _GetWantedInstances(lu, self.names)
4647 self.wanted = locking.ALL_SET
4649 self.do_locking = (self.use_locking and
4650 query.IQ_LIVE in self.requested_data)
4652 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4653 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4654 lu.needed_locks[locking.LEVEL_NODE] = []
4655 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4657 self.do_grouplocks = (self.do_locking and
4658 query.IQ_NODES in self.requested_data)
4660 def DeclareLocks(self, lu, level):
4662 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4663 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4665 # Lock all groups used by instances optimistically; this requires going
4666 # via the node before it's locked, requiring verification later on
4667 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4669 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4670 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4671 elif level == locking.LEVEL_NODE:
4672 lu._LockInstancesNodes() # pylint: disable=W0212
4675 def _CheckGroupLocks(lu):
4676 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4677 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4679 # Check if node groups for locked instances are still correct
4680 for instance_name in owned_instances:
4681 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4683 def _GetQueryData(self, lu):
4684 """Computes the list of instances and their attributes.
4687 if self.do_grouplocks:
4688 self._CheckGroupLocks(lu)
4690 cluster = lu.cfg.GetClusterInfo()
4691 all_info = lu.cfg.GetAllInstancesInfo()
4693 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4695 instance_list = [all_info[name] for name in instance_names]
4696 nodes = frozenset(itertools.chain(*(inst.all_nodes
4697 for inst in instance_list)))
4698 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4701 wrongnode_inst = set()
4703 # Gather data as requested
4704 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4706 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4708 result = node_data[name]
4710 # offline nodes will be in both lists
4711 assert result.fail_msg
4712 offline_nodes.append(name)
4714 bad_nodes.append(name)
4715 elif result.payload:
4716 for inst in result.payload:
4717 if inst in all_info:
4718 if all_info[inst].primary_node == name:
4719 live_data.update(result.payload)
4721 wrongnode_inst.add(inst)
4723 # orphan instance; we don't list it here as we don't
4724 # handle this case yet in the output of instance listing
4725 logging.warning("Orphan instance '%s' found on node %s",
4727 # else no instance is alive
4731 if query.IQ_DISKUSAGE in self.requested_data:
4732 disk_usage = dict((inst.name,
4733 _ComputeDiskSize(inst.disk_template,
4734 [{constants.IDISK_SIZE: disk.size}
4735 for disk in inst.disks]))
4736 for inst in instance_list)
4740 if query.IQ_CONSOLE in self.requested_data:
4742 for inst in instance_list:
4743 if inst.name in live_data:
4744 # Instance is running
4745 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4747 consinfo[inst.name] = None
4748 assert set(consinfo.keys()) == set(instance_names)
4752 if query.IQ_NODES in self.requested_data:
4753 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4755 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4756 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4757 for uuid in set(map(operator.attrgetter("group"),
4763 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4764 disk_usage, offline_nodes, bad_nodes,
4765 live_data, wrongnode_inst, consinfo,
4769 class LUQuery(NoHooksLU):
4770 """Query for resources/items of a certain kind.
4773 # pylint: disable=W0142
4776 def CheckArguments(self):
4777 qcls = _GetQueryImplementation(self.op.what)
4779 self.impl = qcls(self.op.filter, self.op.fields, self.op.use_locking)
4781 def ExpandNames(self):
4782 self.impl.ExpandNames(self)
4784 def DeclareLocks(self, level):
4785 self.impl.DeclareLocks(self, level)
4787 def Exec(self, feedback_fn):
4788 return self.impl.NewStyleQuery(self)
4791 class LUQueryFields(NoHooksLU):
4792 """Query for resources/items of a certain kind.
4795 # pylint: disable=W0142
4798 def CheckArguments(self):
4799 self.qcls = _GetQueryImplementation(self.op.what)
4801 def ExpandNames(self):
4802 self.needed_locks = {}
4804 def Exec(self, feedback_fn):
4805 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4808 class LUNodeModifyStorage(NoHooksLU):
4809 """Logical unit for modifying a storage volume on a node.
4814 def CheckArguments(self):
4815 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4817 storage_type = self.op.storage_type
4820 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4822 raise errors.OpPrereqError("Storage units of type '%s' can not be"
4823 " modified" % storage_type,
4826 diff = set(self.op.changes.keys()) - modifiable
4828 raise errors.OpPrereqError("The following fields can not be modified for"
4829 " storage units of type '%s': %r" %
4830 (storage_type, list(diff)),
4833 def ExpandNames(self):
4834 self.needed_locks = {
4835 locking.LEVEL_NODE: self.op.node_name,
4838 def Exec(self, feedback_fn):
4839 """Computes the list of nodes and their attributes.
4842 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4843 result = self.rpc.call_storage_modify(self.op.node_name,
4844 self.op.storage_type, st_args,
4845 self.op.name, self.op.changes)
4846 result.Raise("Failed to modify storage unit '%s' on %s" %
4847 (self.op.name, self.op.node_name))
4850 class LUNodeAdd(LogicalUnit):
4851 """Logical unit for adding node to the cluster.
4855 HTYPE = constants.HTYPE_NODE
4856 _NFLAGS = ["master_capable", "vm_capable"]
4858 def CheckArguments(self):
4859 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4860 # validate/normalize the node name
4861 self.hostname = netutils.GetHostname(name=self.op.node_name,
4862 family=self.primary_ip_family)
4863 self.op.node_name = self.hostname.name
4865 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4866 raise errors.OpPrereqError("Cannot readd the master node",
4869 if self.op.readd and self.op.group:
4870 raise errors.OpPrereqError("Cannot pass a node group when a node is"
4871 " being readded", errors.ECODE_INVAL)
4873 def BuildHooksEnv(self):
4876 This will run on all nodes before, and on all nodes + the new node after.
4880 "OP_TARGET": self.op.node_name,
4881 "NODE_NAME": self.op.node_name,
4882 "NODE_PIP": self.op.primary_ip,
4883 "NODE_SIP": self.op.secondary_ip,
4884 "MASTER_CAPABLE": str(self.op.master_capable),
4885 "VM_CAPABLE": str(self.op.vm_capable),
4888 def BuildHooksNodes(self):
4889 """Build hooks nodes.
4892 # Exclude added node
4893 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4894 post_nodes = pre_nodes + [self.op.node_name, ]
4896 return (pre_nodes, post_nodes)
4898 def CheckPrereq(self):
4899 """Check prerequisites.
4902 - the new node is not already in the config
4904 - its parameters (single/dual homed) matches the cluster
4906 Any errors are signaled by raising errors.OpPrereqError.
4910 hostname = self.hostname
4911 node = hostname.name
4912 primary_ip = self.op.primary_ip = hostname.ip
4913 if self.op.secondary_ip is None:
4914 if self.primary_ip_family == netutils.IP6Address.family:
4915 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4916 " IPv4 address must be given as secondary",
4918 self.op.secondary_ip = primary_ip
4920 secondary_ip = self.op.secondary_ip
4921 if not netutils.IP4Address.IsValid(secondary_ip):
4922 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4923 " address" % secondary_ip, errors.ECODE_INVAL)
4925 node_list = cfg.GetNodeList()
4926 if not self.op.readd and node in node_list:
4927 raise errors.OpPrereqError("Node %s is already in the configuration" %
4928 node, errors.ECODE_EXISTS)
4929 elif self.op.readd and node not in node_list:
4930 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4933 self.changed_primary_ip = False
4935 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4936 if self.op.readd and node == existing_node_name:
4937 if existing_node.secondary_ip != secondary_ip:
4938 raise errors.OpPrereqError("Readded node doesn't have the same IP"
4939 " address configuration as before",
4941 if existing_node.primary_ip != primary_ip:
4942 self.changed_primary_ip = True
4946 if (existing_node.primary_ip == primary_ip or
4947 existing_node.secondary_ip == primary_ip or
4948 existing_node.primary_ip == secondary_ip or
4949 existing_node.secondary_ip == secondary_ip):
4950 raise errors.OpPrereqError("New node ip address(es) conflict with"
4951 " existing node %s" % existing_node.name,
4952 errors.ECODE_NOTUNIQUE)
4954 # After this 'if' block, None is no longer a valid value for the
4955 # _capable op attributes
4957 old_node = self.cfg.GetNodeInfo(node)
4958 assert old_node is not None, "Can't retrieve locked node %s" % node
4959 for attr in self._NFLAGS:
4960 if getattr(self.op, attr) is None:
4961 setattr(self.op, attr, getattr(old_node, attr))
4963 for attr in self._NFLAGS:
4964 if getattr(self.op, attr) is None:
4965 setattr(self.op, attr, True)
4967 if self.op.readd and not self.op.vm_capable:
4968 pri, sec = cfg.GetNodeInstances(node)
4970 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4971 " flag set to false, but it already holds"
4972 " instances" % node,
4975 # check that the type of the node (single versus dual homed) is the
4976 # same as for the master
4977 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4978 master_singlehomed = myself.secondary_ip == myself.primary_ip
4979 newbie_singlehomed = secondary_ip == primary_ip
4980 if master_singlehomed != newbie_singlehomed:
4981 if master_singlehomed:
4982 raise errors.OpPrereqError("The master has no secondary ip but the"
4983 " new node has one",
4986 raise errors.OpPrereqError("The master has a secondary ip but the"
4987 " new node doesn't have one",
4990 # checks reachability
4991 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4992 raise errors.OpPrereqError("Node not reachable by ping",
4993 errors.ECODE_ENVIRON)
4995 if not newbie_singlehomed:
4996 # check reachability from my secondary ip to newbie's secondary ip
4997 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4998 source=myself.secondary_ip):
4999 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5000 " based ping to node daemon port",
5001 errors.ECODE_ENVIRON)
5008 if self.op.master_capable:
5009 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5011 self.master_candidate = False
5014 self.new_node = old_node
5016 node_group = cfg.LookupNodeGroup(self.op.group)
5017 self.new_node = objects.Node(name=node,
5018 primary_ip=primary_ip,
5019 secondary_ip=secondary_ip,
5020 master_candidate=self.master_candidate,
5021 offline=False, drained=False,
5024 if self.op.ndparams:
5025 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5027 def Exec(self, feedback_fn):
5028 """Adds the new node to the cluster.
5031 new_node = self.new_node
5032 node = new_node.name
5034 # We adding a new node so we assume it's powered
5035 new_node.powered = True
5037 # for re-adds, reset the offline/drained/master-candidate flags;
5038 # we need to reset here, otherwise offline would prevent RPC calls
5039 # later in the procedure; this also means that if the re-add
5040 # fails, we are left with a non-offlined, broken node
5042 new_node.drained = new_node.offline = False # pylint: disable=W0201
5043 self.LogInfo("Readding a node, the offline/drained flags were reset")
5044 # if we demote the node, we do cleanup later in the procedure
5045 new_node.master_candidate = self.master_candidate
5046 if self.changed_primary_ip:
5047 new_node.primary_ip = self.op.primary_ip
5049 # copy the master/vm_capable flags
5050 for attr in self._NFLAGS:
5051 setattr(new_node, attr, getattr(self.op, attr))
5053 # notify the user about any possible mc promotion
5054 if new_node.master_candidate:
5055 self.LogInfo("Node will be a master candidate")
5057 if self.op.ndparams:
5058 new_node.ndparams = self.op.ndparams
5060 new_node.ndparams = {}
5062 # check connectivity
5063 result = self.rpc.call_version([node])[node]
5064 result.Raise("Can't get version information from node %s" % node)
5065 if constants.PROTOCOL_VERSION == result.payload:
5066 logging.info("Communication to node %s fine, sw version %s match",
5067 node, result.payload)
5069 raise errors.OpExecError("Version mismatch master version %s,"
5070 " node version %s" %
5071 (constants.PROTOCOL_VERSION, result.payload))
5073 # Add node to our /etc/hosts, and add key to known_hosts
5074 if self.cfg.GetClusterInfo().modify_etc_hosts:
5075 master_node = self.cfg.GetMasterNode()
5076 result = self.rpc.call_etc_hosts_modify(master_node,
5077 constants.ETC_HOSTS_ADD,
5080 result.Raise("Can't update hosts file with new host data")
5082 if new_node.secondary_ip != new_node.primary_ip:
5083 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5086 node_verify_list = [self.cfg.GetMasterNode()]
5087 node_verify_param = {
5088 constants.NV_NODELIST: ([node], {}),
5089 # TODO: do a node-net-test as well?
5092 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5093 self.cfg.GetClusterName())
5094 for verifier in node_verify_list:
5095 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5096 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5098 for failed in nl_payload:
5099 feedback_fn("ssh/hostname verification failed"
5100 " (checking from %s): %s" %
5101 (verifier, nl_payload[failed]))
5102 raise errors.OpExecError("ssh/hostname verification failed")
5105 _RedistributeAncillaryFiles(self)
5106 self.context.ReaddNode(new_node)
5107 # make sure we redistribute the config
5108 self.cfg.Update(new_node, feedback_fn)
5109 # and make sure the new node will not have old files around
5110 if not new_node.master_candidate:
5111 result = self.rpc.call_node_demote_from_mc(new_node.name)
5112 msg = result.fail_msg
5114 self.LogWarning("Node failed to demote itself from master"
5115 " candidate status: %s" % msg)
5117 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5118 additional_vm=self.op.vm_capable)
5119 self.context.AddNode(new_node, self.proc.GetECId())
5122 class LUNodeSetParams(LogicalUnit):
5123 """Modifies the parameters of a node.
5125 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5126 to the node role (as _ROLE_*)
5127 @cvar _R2F: a dictionary from node role to tuples of flags
5128 @cvar _FLAGS: a list of attribute names corresponding to the flags
5131 HPATH = "node-modify"
5132 HTYPE = constants.HTYPE_NODE
5134 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5136 (True, False, False): _ROLE_CANDIDATE,
5137 (False, True, False): _ROLE_DRAINED,
5138 (False, False, True): _ROLE_OFFLINE,
5139 (False, False, False): _ROLE_REGULAR,
5141 _R2F = dict((v, k) for k, v in _F2R.items())
5142 _FLAGS = ["master_candidate", "drained", "offline"]
5144 def CheckArguments(self):
5145 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5146 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5147 self.op.master_capable, self.op.vm_capable,
5148 self.op.secondary_ip, self.op.ndparams]
5149 if all_mods.count(None) == len(all_mods):
5150 raise errors.OpPrereqError("Please pass at least one modification",
5152 if all_mods.count(True) > 1:
5153 raise errors.OpPrereqError("Can't set the node into more than one"
5154 " state at the same time",
5157 # Boolean value that tells us whether we might be demoting from MC
5158 self.might_demote = (self.op.master_candidate == False or
5159 self.op.offline == True or
5160 self.op.drained == True or
5161 self.op.master_capable == False)
5163 if self.op.secondary_ip:
5164 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5165 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5166 " address" % self.op.secondary_ip,
5169 self.lock_all = self.op.auto_promote and self.might_demote
5170 self.lock_instances = self.op.secondary_ip is not None
5172 def ExpandNames(self):
5174 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5176 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5178 if self.lock_instances:
5179 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5181 def DeclareLocks(self, level):
5182 # If we have locked all instances, before waiting to lock nodes, release
5183 # all the ones living on nodes unrelated to the current operation.
5184 if level == locking.LEVEL_NODE and self.lock_instances:
5185 self.affected_instances = []
5186 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5189 # Build list of instances to release
5190 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5191 for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5192 if (instance.disk_template in constants.DTS_INT_MIRROR and
5193 self.op.node_name in instance.all_nodes):
5194 instances_keep.append(instance_name)
5195 self.affected_instances.append(instance)
5197 _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5199 assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5200 set(instances_keep))
5202 def BuildHooksEnv(self):
5205 This runs on the master node.
5209 "OP_TARGET": self.op.node_name,
5210 "MASTER_CANDIDATE": str(self.op.master_candidate),
5211 "OFFLINE": str(self.op.offline),
5212 "DRAINED": str(self.op.drained),
5213 "MASTER_CAPABLE": str(self.op.master_capable),
5214 "VM_CAPABLE": str(self.op.vm_capable),
5217 def BuildHooksNodes(self):
5218 """Build hooks nodes.
5221 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5224 def CheckPrereq(self):
5225 """Check prerequisites.
5227 This only checks the instance list against the existing names.
5230 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5232 if (self.op.master_candidate is not None or
5233 self.op.drained is not None or
5234 self.op.offline is not None):
5235 # we can't change the master's node flags
5236 if self.op.node_name == self.cfg.GetMasterNode():
5237 raise errors.OpPrereqError("The master role can be changed"
5238 " only via master-failover",
5241 if self.op.master_candidate and not node.master_capable:
5242 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5243 " it a master candidate" % node.name,
5246 if self.op.vm_capable == False:
5247 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5249 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5250 " the vm_capable flag" % node.name,
5253 if node.master_candidate and self.might_demote and not self.lock_all:
5254 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5255 # check if after removing the current node, we're missing master
5257 (mc_remaining, mc_should, _) = \
5258 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5259 if mc_remaining < mc_should:
5260 raise errors.OpPrereqError("Not enough master candidates, please"
5261 " pass auto promote option to allow"
5262 " promotion", errors.ECODE_STATE)
5264 self.old_flags = old_flags = (node.master_candidate,
5265 node.drained, node.offline)
5266 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5267 self.old_role = old_role = self._F2R[old_flags]
5269 # Check for ineffective changes
5270 for attr in self._FLAGS:
5271 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5272 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5273 setattr(self.op, attr, None)
5275 # Past this point, any flag change to False means a transition
5276 # away from the respective state, as only real changes are kept
5278 # TODO: We might query the real power state if it supports OOB
5279 if _SupportsOob(self.cfg, node):
5280 if self.op.offline is False and not (node.powered or
5281 self.op.powered == True):
5282 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5283 " offline status can be reset") %
5285 elif self.op.powered is not None:
5286 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5287 " as it does not support out-of-band"
5288 " handling") % self.op.node_name)
5290 # If we're being deofflined/drained, we'll MC ourself if needed
5291 if (self.op.drained == False or self.op.offline == False or
5292 (self.op.master_capable and not node.master_capable)):
5293 if _DecideSelfPromotion(self):
5294 self.op.master_candidate = True
5295 self.LogInfo("Auto-promoting node to master candidate")
5297 # If we're no longer master capable, we'll demote ourselves from MC
5298 if self.op.master_capable == False and node.master_candidate:
5299 self.LogInfo("Demoting from master candidate")
5300 self.op.master_candidate = False
5303 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5304 if self.op.master_candidate:
5305 new_role = self._ROLE_CANDIDATE
5306 elif self.op.drained:
5307 new_role = self._ROLE_DRAINED
5308 elif self.op.offline:
5309 new_role = self._ROLE_OFFLINE
5310 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5311 # False is still in new flags, which means we're un-setting (the
5313 new_role = self._ROLE_REGULAR
5314 else: # no new flags, nothing, keep old role
5317 self.new_role = new_role
5319 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5320 # Trying to transition out of offline status
5321 result = self.rpc.call_version([node.name])[node.name]
5323 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5324 " to report its version: %s" %
5325 (node.name, result.fail_msg),
5328 self.LogWarning("Transitioning node from offline to online state"
5329 " without using re-add. Please make sure the node"
5332 if self.op.secondary_ip:
5333 # Ok even without locking, because this can't be changed by any LU
5334 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5335 master_singlehomed = master.secondary_ip == master.primary_ip
5336 if master_singlehomed and self.op.secondary_ip:
5337 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5338 " homed cluster", errors.ECODE_INVAL)
5341 if self.affected_instances:
5342 raise errors.OpPrereqError("Cannot change secondary ip: offline"
5343 " node has instances (%s) configured"
5344 " to use it" % self.affected_instances)
5346 # On online nodes, check that no instances are running, and that
5347 # the node has the new ip and we can reach it.
5348 for instance in self.affected_instances:
5349 _CheckInstanceDown(self, instance, "cannot change secondary ip")
5351 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5352 if master.name != node.name:
5353 # check reachability from master secondary ip to new secondary ip
5354 if not netutils.TcpPing(self.op.secondary_ip,
5355 constants.DEFAULT_NODED_PORT,
5356 source=master.secondary_ip):
5357 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5358 " based ping to node daemon port",
5359 errors.ECODE_ENVIRON)
5361 if self.op.ndparams:
5362 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5363 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5364 self.new_ndparams = new_ndparams
5366 def Exec(self, feedback_fn):
5371 old_role = self.old_role
5372 new_role = self.new_role
5376 if self.op.ndparams:
5377 node.ndparams = self.new_ndparams
5379 if self.op.powered is not None:
5380 node.powered = self.op.powered
5382 for attr in ["master_capable", "vm_capable"]:
5383 val = getattr(self.op, attr)
5385 setattr(node, attr, val)
5386 result.append((attr, str(val)))
5388 if new_role != old_role:
5389 # Tell the node to demote itself, if no longer MC and not offline
5390 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5391 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5393 self.LogWarning("Node failed to demote itself: %s", msg)
5395 new_flags = self._R2F[new_role]
5396 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5398 result.append((desc, str(nf)))
5399 (node.master_candidate, node.drained, node.offline) = new_flags
5401 # we locked all nodes, we adjust the CP before updating this node
5403 _AdjustCandidatePool(self, [node.name])
5405 if self.op.secondary_ip:
5406 node.secondary_ip = self.op.secondary_ip
5407 result.append(("secondary_ip", self.op.secondary_ip))
5409 # this will trigger configuration file update, if needed
5410 self.cfg.Update(node, feedback_fn)
5412 # this will trigger job queue propagation or cleanup if the mc
5414 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5415 self.context.ReaddNode(node)
5420 class LUNodePowercycle(NoHooksLU):
5421 """Powercycles a node.
5426 def CheckArguments(self):
5427 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5428 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5429 raise errors.OpPrereqError("The node is the master and the force"
5430 " parameter was not set",
5433 def ExpandNames(self):
5434 """Locking for PowercycleNode.
5436 This is a last-resort option and shouldn't block on other
5437 jobs. Therefore, we grab no locks.
5440 self.needed_locks = {}
5442 def Exec(self, feedback_fn):
5446 result = self.rpc.call_node_powercycle(self.op.node_name,
5447 self.cfg.GetHypervisorType())
5448 result.Raise("Failed to schedule the reboot")
5449 return result.payload
5452 class LUClusterQuery(NoHooksLU):
5453 """Query cluster configuration.
5458 def ExpandNames(self):
5459 self.needed_locks = {}
5461 def Exec(self, feedback_fn):
5462 """Return cluster config.
5465 cluster = self.cfg.GetClusterInfo()
5468 # Filter just for enabled hypervisors
5469 for os_name, hv_dict in cluster.os_hvp.items():
5470 os_hvp[os_name] = {}
5471 for hv_name, hv_params in hv_dict.items():
5472 if hv_name in cluster.enabled_hypervisors:
5473 os_hvp[os_name][hv_name] = hv_params
5475 # Convert ip_family to ip_version
5476 primary_ip_version = constants.IP4_VERSION
5477 if cluster.primary_ip_family == netutils.IP6Address.family:
5478 primary_ip_version = constants.IP6_VERSION
5481 "software_version": constants.RELEASE_VERSION,
5482 "protocol_version": constants.PROTOCOL_VERSION,
5483 "config_version": constants.CONFIG_VERSION,
5484 "os_api_version": max(constants.OS_API_VERSIONS),
5485 "export_version": constants.EXPORT_VERSION,
5486 "architecture": (platform.architecture()[0], platform.machine()),
5487 "name": cluster.cluster_name,
5488 "master": cluster.master_node,
5489 "default_hypervisor": cluster.enabled_hypervisors[0],
5490 "enabled_hypervisors": cluster.enabled_hypervisors,
5491 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5492 for hypervisor_name in cluster.enabled_hypervisors]),
5494 "beparams": cluster.beparams,
5495 "osparams": cluster.osparams,
5496 "nicparams": cluster.nicparams,
5497 "ndparams": cluster.ndparams,
5498 "candidate_pool_size": cluster.candidate_pool_size,
5499 "master_netdev": cluster.master_netdev,
5500 "volume_group_name": cluster.volume_group_name,
5501 "drbd_usermode_helper": cluster.drbd_usermode_helper,
5502 "file_storage_dir": cluster.file_storage_dir,
5503 "shared_file_storage_dir": cluster.shared_file_storage_dir,
5504 "maintain_node_health": cluster.maintain_node_health,
5505 "ctime": cluster.ctime,
5506 "mtime": cluster.mtime,
5507 "uuid": cluster.uuid,
5508 "tags": list(cluster.GetTags()),
5509 "uid_pool": cluster.uid_pool,
5510 "default_iallocator": cluster.default_iallocator,
5511 "reserved_lvs": cluster.reserved_lvs,
5512 "primary_ip_version": primary_ip_version,
5513 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5514 "hidden_os": cluster.hidden_os,
5515 "blacklisted_os": cluster.blacklisted_os,
5521 class LUClusterConfigQuery(NoHooksLU):
5522 """Return configuration values.
5526 _FIELDS_DYNAMIC = utils.FieldSet()
5527 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5528 "watcher_pause", "volume_group_name")
5530 def CheckArguments(self):
5531 _CheckOutputFields(static=self._FIELDS_STATIC,
5532 dynamic=self._FIELDS_DYNAMIC,
5533 selected=self.op.output_fields)
5535 def ExpandNames(self):
5536 self.needed_locks = {}
5538 def Exec(self, feedback_fn):
5539 """Dump a representation of the cluster config to the standard output.
5543 for field in self.op.output_fields:
5544 if field == "cluster_name":
5545 entry = self.cfg.GetClusterName()
5546 elif field == "master_node":
5547 entry = self.cfg.GetMasterNode()
5548 elif field == "drain_flag":
5549 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5550 elif field == "watcher_pause":
5551 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5552 elif field == "volume_group_name":
5553 entry = self.cfg.GetVGName()
5555 raise errors.ParameterError(field)
5556 values.append(entry)
5560 class LUInstanceActivateDisks(NoHooksLU):
5561 """Bring up an instance's disks.
5566 def ExpandNames(self):
5567 self._ExpandAndLockInstance()
5568 self.needed_locks[locking.LEVEL_NODE] = []
5569 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5571 def DeclareLocks(self, level):
5572 if level == locking.LEVEL_NODE:
5573 self._LockInstancesNodes()
5575 def CheckPrereq(self):
5576 """Check prerequisites.
5578 This checks that the instance is in the cluster.
5581 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5582 assert self.instance is not None, \
5583 "Cannot retrieve locked instance %s" % self.op.instance_name
5584 _CheckNodeOnline(self, self.instance.primary_node)
5586 def Exec(self, feedback_fn):
5587 """Activate the disks.
5590 disks_ok, disks_info = \
5591 _AssembleInstanceDisks(self, self.instance,
5592 ignore_size=self.op.ignore_size)
5594 raise errors.OpExecError("Cannot activate block devices")
5599 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5601 """Prepare the block devices for an instance.
5603 This sets up the block devices on all nodes.
5605 @type lu: L{LogicalUnit}
5606 @param lu: the logical unit on whose behalf we execute
5607 @type instance: L{objects.Instance}
5608 @param instance: the instance for whose disks we assemble
5609 @type disks: list of L{objects.Disk} or None
5610 @param disks: which disks to assemble (or all, if None)
5611 @type ignore_secondaries: boolean
5612 @param ignore_secondaries: if true, errors on secondary nodes
5613 won't result in an error return from the function
5614 @type ignore_size: boolean
5615 @param ignore_size: if true, the current known size of the disk
5616 will not be used during the disk activation, useful for cases
5617 when the size is wrong
5618 @return: False if the operation failed, otherwise a list of
5619 (host, instance_visible_name, node_visible_name)
5620 with the mapping from node devices to instance devices
5625 iname = instance.name
5626 disks = _ExpandCheckDisks(instance, disks)
5628 # With the two passes mechanism we try to reduce the window of
5629 # opportunity for the race condition of switching DRBD to primary
5630 # before handshaking occured, but we do not eliminate it
5632 # The proper fix would be to wait (with some limits) until the
5633 # connection has been made and drbd transitions from WFConnection
5634 # into any other network-connected state (Connected, SyncTarget,
5637 # 1st pass, assemble on all nodes in secondary mode
5638 for idx, inst_disk in enumerate(disks):
5639 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5641 node_disk = node_disk.Copy()
5642 node_disk.UnsetSize()
5643 lu.cfg.SetDiskID(node_disk, node)
5644 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5645 msg = result.fail_msg
5647 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5648 " (is_primary=False, pass=1): %s",
5649 inst_disk.iv_name, node, msg)
5650 if not ignore_secondaries:
5653 # FIXME: race condition on drbd migration to primary
5655 # 2nd pass, do only the primary node
5656 for idx, inst_disk in enumerate(disks):
5659 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5660 if node != instance.primary_node:
5663 node_disk = node_disk.Copy()
5664 node_disk.UnsetSize()
5665 lu.cfg.SetDiskID(node_disk, node)
5666 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5667 msg = result.fail_msg
5669 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5670 " (is_primary=True, pass=2): %s",
5671 inst_disk.iv_name, node, msg)
5674 dev_path = result.payload
5676 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5678 # leave the disks configured for the primary node
5679 # this is a workaround that would be fixed better by
5680 # improving the logical/physical id handling
5682 lu.cfg.SetDiskID(disk, instance.primary_node)
5684 return disks_ok, device_info
5687 def _StartInstanceDisks(lu, instance, force):
5688 """Start the disks of an instance.
5691 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5692 ignore_secondaries=force)
5694 _ShutdownInstanceDisks(lu, instance)
5695 if force is not None and not force:
5696 lu.proc.LogWarning("", hint="If the message above refers to a"
5698 " you can retry the operation using '--force'.")
5699 raise errors.OpExecError("Disk consistency error")
5702 class LUInstanceDeactivateDisks(NoHooksLU):
5703 """Shutdown an instance's disks.
5708 def ExpandNames(self):
5709 self._ExpandAndLockInstance()
5710 self.needed_locks[locking.LEVEL_NODE] = []
5711 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5713 def DeclareLocks(self, level):
5714 if level == locking.LEVEL_NODE:
5715 self._LockInstancesNodes()
5717 def CheckPrereq(self):
5718 """Check prerequisites.
5720 This checks that the instance is in the cluster.
5723 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5724 assert self.instance is not None, \
5725 "Cannot retrieve locked instance %s" % self.op.instance_name
5727 def Exec(self, feedback_fn):
5728 """Deactivate the disks
5731 instance = self.instance
5733 _ShutdownInstanceDisks(self, instance)
5735 _SafeShutdownInstanceDisks(self, instance)
5738 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5739 """Shutdown block devices of an instance.
5741 This function checks if an instance is running, before calling
5742 _ShutdownInstanceDisks.
5745 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5746 _ShutdownInstanceDisks(lu, instance, disks=disks)
5749 def _ExpandCheckDisks(instance, disks):
5750 """Return the instance disks selected by the disks list
5752 @type disks: list of L{objects.Disk} or None
5753 @param disks: selected disks
5754 @rtype: list of L{objects.Disk}
5755 @return: selected instance disks to act on
5759 return instance.disks
5761 if not set(disks).issubset(instance.disks):
5762 raise errors.ProgrammerError("Can only act on disks belonging to the"
5767 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5768 """Shutdown block devices of an instance.
5770 This does the shutdown on all nodes of the instance.
5772 If the ignore_primary is false, errors on the primary node are
5777 disks = _ExpandCheckDisks(instance, disks)
5780 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5781 lu.cfg.SetDiskID(top_disk, node)
5782 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5783 msg = result.fail_msg
5785 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5786 disk.iv_name, node, msg)
5787 if ((node == instance.primary_node and not ignore_primary) or
5788 (node != instance.primary_node and not result.offline)):
5793 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5794 """Checks if a node has enough free memory.
5796 This function check if a given node has the needed amount of free
5797 memory. In case the node has less memory or we cannot get the
5798 information from the node, this function raise an OpPrereqError
5801 @type lu: C{LogicalUnit}
5802 @param lu: a logical unit from which we get configuration data
5804 @param node: the node to check
5805 @type reason: C{str}
5806 @param reason: string to use in the error message
5807 @type requested: C{int}
5808 @param requested: the amount of memory in MiB to check for
5809 @type hypervisor_name: C{str}
5810 @param hypervisor_name: the hypervisor to ask for memory stats
5811 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5812 we cannot check the node
5815 nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5816 nodeinfo[node].Raise("Can't get data from node %s" % node,
5817 prereq=True, ecode=errors.ECODE_ENVIRON)
5818 free_mem = nodeinfo[node].payload.get("memory_free", None)
5819 if not isinstance(free_mem, int):
5820 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5821 " was '%s'" % (node, free_mem),
5822 errors.ECODE_ENVIRON)
5823 if requested > free_mem:
5824 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5825 " needed %s MiB, available %s MiB" %
5826 (node, reason, requested, free_mem),
5830 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5831 """Checks if nodes have enough free disk space in the all VGs.
5833 This function check if all given nodes have the needed amount of
5834 free disk. In case any node has less disk or we cannot get the
5835 information from the node, this function raise an OpPrereqError
5838 @type lu: C{LogicalUnit}
5839 @param lu: a logical unit from which we get configuration data
5840 @type nodenames: C{list}
5841 @param nodenames: the list of node names to check
5842 @type req_sizes: C{dict}
5843 @param req_sizes: the hash of vg and corresponding amount of disk in
5845 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5846 or we cannot check the node
5849 for vg, req_size in req_sizes.items():
5850 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5853 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5854 """Checks if nodes have enough free disk space in the specified VG.
5856 This function check if all given nodes have the needed amount of
5857 free disk. In case any node has less disk or we cannot get the
5858 information from the node, this function raise an OpPrereqError
5861 @type lu: C{LogicalUnit}
5862 @param lu: a logical unit from which we get configuration data
5863 @type nodenames: C{list}
5864 @param nodenames: the list of node names to check
5866 @param vg: the volume group to check
5867 @type requested: C{int}
5868 @param requested: the amount of disk in MiB to check for
5869 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5870 or we cannot check the node
5873 nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5874 for node in nodenames:
5875 info = nodeinfo[node]
5876 info.Raise("Cannot get current information from node %s" % node,
5877 prereq=True, ecode=errors.ECODE_ENVIRON)
5878 vg_free = info.payload.get("vg_free", None)
5879 if not isinstance(vg_free, int):
5880 raise errors.OpPrereqError("Can't compute free disk space on node"
5881 " %s for vg %s, result was '%s'" %
5882 (node, vg, vg_free), errors.ECODE_ENVIRON)
5883 if requested > vg_free:
5884 raise errors.OpPrereqError("Not enough disk space on target node %s"
5885 " vg %s: required %d MiB, available %d MiB" %
5886 (node, vg, requested, vg_free),
5890 class LUInstanceStartup(LogicalUnit):
5891 """Starts an instance.
5894 HPATH = "instance-start"
5895 HTYPE = constants.HTYPE_INSTANCE
5898 def CheckArguments(self):
5900 if self.op.beparams:
5901 # fill the beparams dict
5902 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5904 def ExpandNames(self):
5905 self._ExpandAndLockInstance()
5907 def BuildHooksEnv(self):
5910 This runs on master, primary and secondary nodes of the instance.
5914 "FORCE": self.op.force,
5917 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5921 def BuildHooksNodes(self):
5922 """Build hooks nodes.
5925 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5928 def CheckPrereq(self):
5929 """Check prerequisites.
5931 This checks that the instance is in the cluster.
5934 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5935 assert self.instance is not None, \
5936 "Cannot retrieve locked instance %s" % self.op.instance_name
5939 if self.op.hvparams:
5940 # check hypervisor parameter syntax (locally)
5941 cluster = self.cfg.GetClusterInfo()
5942 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5943 filled_hvp = cluster.FillHV(instance)
5944 filled_hvp.update(self.op.hvparams)
5945 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5946 hv_type.CheckParameterSyntax(filled_hvp)
5947 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5949 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5951 if self.primary_offline and self.op.ignore_offline_nodes:
5952 self.proc.LogWarning("Ignoring offline primary node")
5954 if self.op.hvparams or self.op.beparams:
5955 self.proc.LogWarning("Overridden parameters are ignored")
5957 _CheckNodeOnline(self, instance.primary_node)
5959 bep = self.cfg.GetClusterInfo().FillBE(instance)
5961 # check bridges existence
5962 _CheckInstanceBridgesExist(self, instance)
5964 remote_info = self.rpc.call_instance_info(instance.primary_node,
5966 instance.hypervisor)
5967 remote_info.Raise("Error checking node %s" % instance.primary_node,
5968 prereq=True, ecode=errors.ECODE_ENVIRON)
5969 if not remote_info.payload: # not running already
5970 _CheckNodeFreeMemory(self, instance.primary_node,
5971 "starting instance %s" % instance.name,
5972 bep[constants.BE_MEMORY], instance.hypervisor)
5974 def Exec(self, feedback_fn):
5975 """Start the instance.
5978 instance = self.instance
5979 force = self.op.force
5981 if not self.op.no_remember:
5982 self.cfg.MarkInstanceUp(instance.name)
5984 if self.primary_offline:
5985 assert self.op.ignore_offline_nodes
5986 self.proc.LogInfo("Primary node offline, marked instance as started")
5988 node_current = instance.primary_node
5990 _StartInstanceDisks(self, instance, force)
5992 result = self.rpc.call_instance_start(node_current, instance,
5993 self.op.hvparams, self.op.beparams,
5994 self.op.startup_paused)
5995 msg = result.fail_msg
5997 _ShutdownInstanceDisks(self, instance)
5998 raise errors.OpExecError("Could not start instance: %s" % msg)
6001 class LUInstanceReboot(LogicalUnit):
6002 """Reboot an instance.
6005 HPATH = "instance-reboot"
6006 HTYPE = constants.HTYPE_INSTANCE
6009 def ExpandNames(self):
6010 self._ExpandAndLockInstance()
6012 def BuildHooksEnv(self):
6015 This runs on master, primary and secondary nodes of the instance.
6019 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6020 "REBOOT_TYPE": self.op.reboot_type,
6021 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6024 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6028 def BuildHooksNodes(self):
6029 """Build hooks nodes.
6032 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6035 def CheckPrereq(self):
6036 """Check prerequisites.
6038 This checks that the instance is in the cluster.
6041 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6042 assert self.instance is not None, \
6043 "Cannot retrieve locked instance %s" % self.op.instance_name
6045 _CheckNodeOnline(self, instance.primary_node)
6047 # check bridges existence
6048 _CheckInstanceBridgesExist(self, instance)
6050 def Exec(self, feedback_fn):
6051 """Reboot the instance.
6054 instance = self.instance
6055 ignore_secondaries = self.op.ignore_secondaries
6056 reboot_type = self.op.reboot_type
6058 remote_info = self.rpc.call_instance_info(instance.primary_node,
6060 instance.hypervisor)
6061 remote_info.Raise("Error checking node %s" % instance.primary_node)
6062 instance_running = bool(remote_info.payload)
6064 node_current = instance.primary_node
6066 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6067 constants.INSTANCE_REBOOT_HARD]:
6068 for disk in instance.disks:
6069 self.cfg.SetDiskID(disk, node_current)
6070 result = self.rpc.call_instance_reboot(node_current, instance,
6072 self.op.shutdown_timeout)
6073 result.Raise("Could not reboot instance")
6075 if instance_running:
6076 result = self.rpc.call_instance_shutdown(node_current, instance,
6077 self.op.shutdown_timeout)
6078 result.Raise("Could not shutdown instance for full reboot")
6079 _ShutdownInstanceDisks(self, instance)
6081 self.LogInfo("Instance %s was already stopped, starting now",
6083 _StartInstanceDisks(self, instance, ignore_secondaries)
6084 result = self.rpc.call_instance_start(node_current, instance,
6086 msg = result.fail_msg
6088 _ShutdownInstanceDisks(self, instance)
6089 raise errors.OpExecError("Could not start instance for"
6090 " full reboot: %s" % msg)
6092 self.cfg.MarkInstanceUp(instance.name)
6095 class LUInstanceShutdown(LogicalUnit):
6096 """Shutdown an instance.
6099 HPATH = "instance-stop"
6100 HTYPE = constants.HTYPE_INSTANCE
6103 def ExpandNames(self):
6104 self._ExpandAndLockInstance()
6106 def BuildHooksEnv(self):
6109 This runs on master, primary and secondary nodes of the instance.
6112 env = _BuildInstanceHookEnvByObject(self, self.instance)
6113 env["TIMEOUT"] = self.op.timeout
6116 def BuildHooksNodes(self):
6117 """Build hooks nodes.
6120 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6123 def CheckPrereq(self):
6124 """Check prerequisites.
6126 This checks that the instance is in the cluster.
6129 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6130 assert self.instance is not None, \
6131 "Cannot retrieve locked instance %s" % self.op.instance_name
6133 self.primary_offline = \
6134 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6136 if self.primary_offline and self.op.ignore_offline_nodes:
6137 self.proc.LogWarning("Ignoring offline primary node")
6139 _CheckNodeOnline(self, self.instance.primary_node)
6141 def Exec(self, feedback_fn):
6142 """Shutdown the instance.
6145 instance = self.instance
6146 node_current = instance.primary_node
6147 timeout = self.op.timeout
6149 if not self.op.no_remember:
6150 self.cfg.MarkInstanceDown(instance.name)
6152 if self.primary_offline:
6153 assert self.op.ignore_offline_nodes
6154 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6156 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6157 msg = result.fail_msg
6159 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6161 _ShutdownInstanceDisks(self, instance)
6164 class LUInstanceReinstall(LogicalUnit):
6165 """Reinstall an instance.
6168 HPATH = "instance-reinstall"
6169 HTYPE = constants.HTYPE_INSTANCE
6172 def ExpandNames(self):
6173 self._ExpandAndLockInstance()
6175 def BuildHooksEnv(self):
6178 This runs on master, primary and secondary nodes of the instance.
6181 return _BuildInstanceHookEnvByObject(self, self.instance)
6183 def BuildHooksNodes(self):
6184 """Build hooks nodes.
6187 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6190 def CheckPrereq(self):
6191 """Check prerequisites.
6193 This checks that the instance is in the cluster and is not running.
6196 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6197 assert instance is not None, \
6198 "Cannot retrieve locked instance %s" % self.op.instance_name
6199 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6200 " offline, cannot reinstall")
6201 for node in instance.secondary_nodes:
6202 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6203 " cannot reinstall")
6205 if instance.disk_template == constants.DT_DISKLESS:
6206 raise errors.OpPrereqError("Instance '%s' has no disks" %
6207 self.op.instance_name,
6209 _CheckInstanceDown(self, instance, "cannot reinstall")
6211 if self.op.os_type is not None:
6213 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6214 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6215 instance_os = self.op.os_type
6217 instance_os = instance.os
6219 nodelist = list(instance.all_nodes)
6221 if self.op.osparams:
6222 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6223 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6224 self.os_inst = i_osdict # the new dict (without defaults)
6228 self.instance = instance
6230 def Exec(self, feedback_fn):
6231 """Reinstall the instance.
6234 inst = self.instance
6236 if self.op.os_type is not None:
6237 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6238 inst.os = self.op.os_type
6239 # Write to configuration
6240 self.cfg.Update(inst, feedback_fn)
6242 _StartInstanceDisks(self, inst, None)
6244 feedback_fn("Running the instance OS create scripts...")
6245 # FIXME: pass debug option from opcode to backend
6246 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6247 self.op.debug_level,
6248 osparams=self.os_inst)
6249 result.Raise("Could not install OS for instance %s on node %s" %
6250 (inst.name, inst.primary_node))
6252 _ShutdownInstanceDisks(self, inst)
6255 class LUInstanceRecreateDisks(LogicalUnit):
6256 """Recreate an instance's missing disks.
6259 HPATH = "instance-recreate-disks"
6260 HTYPE = constants.HTYPE_INSTANCE
6263 def CheckArguments(self):
6264 # normalise the disk list
6265 self.op.disks = sorted(frozenset(self.op.disks))
6267 def ExpandNames(self):
6268 self._ExpandAndLockInstance()
6269 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6271 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6272 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6274 self.needed_locks[locking.LEVEL_NODE] = []
6276 def DeclareLocks(self, level):
6277 if level == locking.LEVEL_NODE:
6278 # if we replace the nodes, we only need to lock the old primary,
6279 # otherwise we need to lock all nodes for disk re-creation
6280 primary_only = bool(self.op.nodes)
6281 self._LockInstancesNodes(primary_only=primary_only)
6283 def BuildHooksEnv(self):
6286 This runs on master, primary and secondary nodes of the instance.
6289 return _BuildInstanceHookEnvByObject(self, self.instance)
6291 def BuildHooksNodes(self):
6292 """Build hooks nodes.
6295 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6298 def CheckPrereq(self):
6299 """Check prerequisites.
6301 This checks that the instance is in the cluster and is not running.
6304 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6305 assert instance is not None, \
6306 "Cannot retrieve locked instance %s" % self.op.instance_name
6308 if len(self.op.nodes) != len(instance.all_nodes):
6309 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6310 " %d replacement nodes were specified" %
6311 (instance.name, len(instance.all_nodes),
6312 len(self.op.nodes)),
6314 assert instance.disk_template != constants.DT_DRBD8 or \
6315 len(self.op.nodes) == 2
6316 assert instance.disk_template != constants.DT_PLAIN or \
6317 len(self.op.nodes) == 1
6318 primary_node = self.op.nodes[0]
6320 primary_node = instance.primary_node
6321 _CheckNodeOnline(self, primary_node)
6323 if instance.disk_template == constants.DT_DISKLESS:
6324 raise errors.OpPrereqError("Instance '%s' has no disks" %
6325 self.op.instance_name, errors.ECODE_INVAL)
6326 # if we replace nodes *and* the old primary is offline, we don't
6328 assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6329 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6330 if not (self.op.nodes and old_pnode.offline):
6331 _CheckInstanceDown(self, instance, "cannot recreate disks")
6333 if not self.op.disks:
6334 self.op.disks = range(len(instance.disks))
6336 for idx in self.op.disks:
6337 if idx >= len(instance.disks):
6338 raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6340 if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6341 raise errors.OpPrereqError("Can't recreate disks partially and"
6342 " change the nodes at the same time",
6344 self.instance = instance
6346 def Exec(self, feedback_fn):
6347 """Recreate the disks.
6350 instance = self.instance
6353 mods = [] # keeps track of needed logical_id changes
6355 for idx, disk in enumerate(instance.disks):
6356 if idx not in self.op.disks: # disk idx has not been passed in
6359 # update secondaries for disks, if needed
6361 if disk.dev_type == constants.LD_DRBD8:
6362 # need to update the nodes and minors
6363 assert len(self.op.nodes) == 2
6364 assert len(disk.logical_id) == 6 # otherwise disk internals
6366 (_, _, old_port, _, _, old_secret) = disk.logical_id
6367 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6368 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6369 new_minors[0], new_minors[1], old_secret)
6370 assert len(disk.logical_id) == len(new_id)
6371 mods.append((idx, new_id))
6373 # now that we have passed all asserts above, we can apply the mods
6374 # in a single run (to avoid partial changes)
6375 for idx, new_id in mods:
6376 instance.disks[idx].logical_id = new_id
6378 # change primary node, if needed
6380 instance.primary_node = self.op.nodes[0]
6381 self.LogWarning("Changing the instance's nodes, you will have to"
6382 " remove any disks left on the older nodes manually")
6385 self.cfg.Update(instance, feedback_fn)
6387 _CreateDisks(self, instance, to_skip=to_skip)
6390 class LUInstanceRename(LogicalUnit):
6391 """Rename an instance.
6394 HPATH = "instance-rename"
6395 HTYPE = constants.HTYPE_INSTANCE
6397 def CheckArguments(self):
6401 if self.op.ip_check and not self.op.name_check:
6402 # TODO: make the ip check more flexible and not depend on the name check
6403 raise errors.OpPrereqError("IP address check requires a name check",
6406 def BuildHooksEnv(self):
6409 This runs on master, primary and secondary nodes of the instance.
6412 env = _BuildInstanceHookEnvByObject(self, self.instance)
6413 env["INSTANCE_NEW_NAME"] = self.op.new_name
6416 def BuildHooksNodes(self):
6417 """Build hooks nodes.
6420 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6423 def CheckPrereq(self):
6424 """Check prerequisites.
6426 This checks that the instance is in the cluster and is not running.
6429 self.op.instance_name = _ExpandInstanceName(self.cfg,
6430 self.op.instance_name)
6431 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6432 assert instance is not None
6433 _CheckNodeOnline(self, instance.primary_node)
6434 _CheckInstanceDown(self, instance, "cannot rename")
6435 self.instance = instance
6437 new_name = self.op.new_name
6438 if self.op.name_check:
6439 hostname = netutils.GetHostname(name=new_name)
6440 if hostname.name != new_name:
6441 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6443 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6444 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6445 " same as given hostname '%s'") %
6446 (hostname.name, self.op.new_name),
6448 new_name = self.op.new_name = hostname.name
6449 if (self.op.ip_check and
6450 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6451 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6452 (hostname.ip, new_name),
6453 errors.ECODE_NOTUNIQUE)
6455 instance_list = self.cfg.GetInstanceList()
6456 if new_name in instance_list and new_name != instance.name:
6457 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6458 new_name, errors.ECODE_EXISTS)
6460 def Exec(self, feedback_fn):
6461 """Rename the instance.
6464 inst = self.instance
6465 old_name = inst.name
6467 rename_file_storage = False
6468 if (inst.disk_template in constants.DTS_FILEBASED and
6469 self.op.new_name != inst.name):
6470 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6471 rename_file_storage = True
6473 self.cfg.RenameInstance(inst.name, self.op.new_name)
6474 # Change the instance lock. This is definitely safe while we hold the BGL.
6475 # Otherwise the new lock would have to be added in acquired mode.
6477 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6478 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6480 # re-read the instance from the configuration after rename
6481 inst = self.cfg.GetInstanceInfo(self.op.new_name)
6483 if rename_file_storage:
6484 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6485 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6486 old_file_storage_dir,
6487 new_file_storage_dir)
6488 result.Raise("Could not rename on node %s directory '%s' to '%s'"
6489 " (but the instance has been renamed in Ganeti)" %
6490 (inst.primary_node, old_file_storage_dir,
6491 new_file_storage_dir))
6493 _StartInstanceDisks(self, inst, None)
6495 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6496 old_name, self.op.debug_level)
6497 msg = result.fail_msg
6499 msg = ("Could not run OS rename script for instance %s on node %s"
6500 " (but the instance has been renamed in Ganeti): %s" %
6501 (inst.name, inst.primary_node, msg))
6502 self.proc.LogWarning(msg)
6504 _ShutdownInstanceDisks(self, inst)
6509 class LUInstanceRemove(LogicalUnit):
6510 """Remove an instance.
6513 HPATH = "instance-remove"
6514 HTYPE = constants.HTYPE_INSTANCE
6517 def ExpandNames(self):
6518 self._ExpandAndLockInstance()
6519 self.needed_locks[locking.LEVEL_NODE] = []
6520 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6522 def DeclareLocks(self, level):
6523 if level == locking.LEVEL_NODE:
6524 self._LockInstancesNodes()
6526 def BuildHooksEnv(self):
6529 This runs on master, primary and secondary nodes of the instance.
6532 env = _BuildInstanceHookEnvByObject(self, self.instance)
6533 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6536 def BuildHooksNodes(self):
6537 """Build hooks nodes.
6540 nl = [self.cfg.GetMasterNode()]
6541 nl_post = list(self.instance.all_nodes) + nl
6542 return (nl, nl_post)
6544 def CheckPrereq(self):
6545 """Check prerequisites.
6547 This checks that the instance is in the cluster.
6550 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6551 assert self.instance is not None, \
6552 "Cannot retrieve locked instance %s" % self.op.instance_name
6554 def Exec(self, feedback_fn):
6555 """Remove the instance.
6558 instance = self.instance
6559 logging.info("Shutting down instance %s on node %s",
6560 instance.name, instance.primary_node)
6562 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6563 self.op.shutdown_timeout)
6564 msg = result.fail_msg
6566 if self.op.ignore_failures:
6567 feedback_fn("Warning: can't shutdown instance: %s" % msg)
6569 raise errors.OpExecError("Could not shutdown instance %s on"
6571 (instance.name, instance.primary_node, msg))
6573 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6576 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6577 """Utility function to remove an instance.
6580 logging.info("Removing block devices for instance %s", instance.name)
6582 if not _RemoveDisks(lu, instance):
6583 if not ignore_failures:
6584 raise errors.OpExecError("Can't remove instance's disks")
6585 feedback_fn("Warning: can't remove instance's disks")
6587 logging.info("Removing instance %s out of cluster config", instance.name)
6589 lu.cfg.RemoveInstance(instance.name)
6591 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6592 "Instance lock removal conflict"
6594 # Remove lock for the instance
6595 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6598 class LUInstanceQuery(NoHooksLU):
6599 """Logical unit for querying instances.
6602 # pylint: disable=W0142
6605 def CheckArguments(self):
6606 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6607 self.op.output_fields, self.op.use_locking)
6609 def ExpandNames(self):
6610 self.iq.ExpandNames(self)
6612 def DeclareLocks(self, level):
6613 self.iq.DeclareLocks(self, level)
6615 def Exec(self, feedback_fn):
6616 return self.iq.OldStyleQuery(self)
6619 class LUInstanceFailover(LogicalUnit):
6620 """Failover an instance.
6623 HPATH = "instance-failover"
6624 HTYPE = constants.HTYPE_INSTANCE
6627 def CheckArguments(self):
6628 """Check the arguments.
6631 self.iallocator = getattr(self.op, "iallocator", None)
6632 self.target_node = getattr(self.op, "target_node", None)
6634 def ExpandNames(self):
6635 self._ExpandAndLockInstance()
6637 if self.op.target_node is not None:
6638 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6640 self.needed_locks[locking.LEVEL_NODE] = []
6641 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6643 ignore_consistency = self.op.ignore_consistency
6644 shutdown_timeout = self.op.shutdown_timeout
6645 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6648 ignore_consistency=ignore_consistency,
6649 shutdown_timeout=shutdown_timeout)
6650 self.tasklets = [self._migrater]
6652 def DeclareLocks(self, level):
6653 if level == locking.LEVEL_NODE:
6654 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6655 if instance.disk_template in constants.DTS_EXT_MIRROR:
6656 if self.op.target_node is None:
6657 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6659 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6660 self.op.target_node]
6661 del self.recalculate_locks[locking.LEVEL_NODE]
6663 self._LockInstancesNodes()
6665 def BuildHooksEnv(self):
6668 This runs on master, primary and secondary nodes of the instance.
6671 instance = self._migrater.instance
6672 source_node = instance.primary_node
6673 target_node = self.op.target_node
6675 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6676 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6677 "OLD_PRIMARY": source_node,
6678 "NEW_PRIMARY": target_node,
6681 if instance.disk_template in constants.DTS_INT_MIRROR:
6682 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6683 env["NEW_SECONDARY"] = source_node
6685 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6687 env.update(_BuildInstanceHookEnvByObject(self, instance))
6691 def BuildHooksNodes(self):
6692 """Build hooks nodes.
6695 instance = self._migrater.instance
6696 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6697 return (nl, nl + [instance.primary_node])
6700 class LUInstanceMigrate(LogicalUnit):
6701 """Migrate an instance.
6703 This is migration without shutting down, compared to the failover,
6704 which is done with shutdown.
6707 HPATH = "instance-migrate"
6708 HTYPE = constants.HTYPE_INSTANCE
6711 def ExpandNames(self):
6712 self._ExpandAndLockInstance()
6714 if self.op.target_node is not None:
6715 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6717 self.needed_locks[locking.LEVEL_NODE] = []
6718 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6720 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6721 cleanup=self.op.cleanup,
6723 fallback=self.op.allow_failover)
6724 self.tasklets = [self._migrater]
6726 def DeclareLocks(self, level):
6727 if level == locking.LEVEL_NODE:
6728 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6729 if instance.disk_template in constants.DTS_EXT_MIRROR:
6730 if self.op.target_node is None:
6731 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6733 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6734 self.op.target_node]
6735 del self.recalculate_locks[locking.LEVEL_NODE]
6737 self._LockInstancesNodes()
6739 def BuildHooksEnv(self):
6742 This runs on master, primary and secondary nodes of the instance.
6745 instance = self._migrater.instance
6746 source_node = instance.primary_node
6747 target_node = self.op.target_node
6748 env = _BuildInstanceHookEnvByObject(self, instance)
6750 "MIGRATE_LIVE": self._migrater.live,
6751 "MIGRATE_CLEANUP": self.op.cleanup,
6752 "OLD_PRIMARY": source_node,
6753 "NEW_PRIMARY": target_node,
6756 if instance.disk_template in constants.DTS_INT_MIRROR:
6757 env["OLD_SECONDARY"] = target_node
6758 env["NEW_SECONDARY"] = source_node
6760 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6764 def BuildHooksNodes(self):
6765 """Build hooks nodes.
6768 instance = self._migrater.instance
6769 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6770 return (nl, nl + [instance.primary_node])
6773 class LUInstanceMove(LogicalUnit):
6774 """Move an instance by data-copying.
6777 HPATH = "instance-move"
6778 HTYPE = constants.HTYPE_INSTANCE
6781 def ExpandNames(self):
6782 self._ExpandAndLockInstance()
6783 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6784 self.op.target_node = target_node
6785 self.needed_locks[locking.LEVEL_NODE] = [target_node]
6786 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6788 def DeclareLocks(self, level):
6789 if level == locking.LEVEL_NODE:
6790 self._LockInstancesNodes(primary_only=True)
6792 def BuildHooksEnv(self):
6795 This runs on master, primary and secondary nodes of the instance.
6799 "TARGET_NODE": self.op.target_node,
6800 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6802 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6805 def BuildHooksNodes(self):
6806 """Build hooks nodes.
6810 self.cfg.GetMasterNode(),
6811 self.instance.primary_node,
6812 self.op.target_node,
6816 def CheckPrereq(self):
6817 """Check prerequisites.
6819 This checks that the instance is in the cluster.
6822 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6823 assert self.instance is not None, \
6824 "Cannot retrieve locked instance %s" % self.op.instance_name
6826 node = self.cfg.GetNodeInfo(self.op.target_node)
6827 assert node is not None, \
6828 "Cannot retrieve locked node %s" % self.op.target_node
6830 self.target_node = target_node = node.name
6832 if target_node == instance.primary_node:
6833 raise errors.OpPrereqError("Instance %s is already on the node %s" %
6834 (instance.name, target_node),
6837 bep = self.cfg.GetClusterInfo().FillBE(instance)
6839 for idx, dsk in enumerate(instance.disks):
6840 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6841 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6842 " cannot copy" % idx, errors.ECODE_STATE)
6844 _CheckNodeOnline(self, target_node)
6845 _CheckNodeNotDrained(self, target_node)
6846 _CheckNodeVmCapable(self, target_node)
6848 if instance.admin_up:
6849 # check memory requirements on the secondary node
6850 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6851 instance.name, bep[constants.BE_MEMORY],
6852 instance.hypervisor)
6854 self.LogInfo("Not checking memory on the secondary node as"
6855 " instance will not be started")
6857 # check bridge existance
6858 _CheckInstanceBridgesExist(self, instance, node=target_node)
6860 def Exec(self, feedback_fn):
6861 """Move an instance.
6863 The move is done by shutting it down on its present node, copying
6864 the data over (slow) and starting it on the new node.
6867 instance = self.instance
6869 source_node = instance.primary_node
6870 target_node = self.target_node
6872 self.LogInfo("Shutting down instance %s on source node %s",
6873 instance.name, source_node)
6875 result = self.rpc.call_instance_shutdown(source_node, instance,
6876 self.op.shutdown_timeout)
6877 msg = result.fail_msg
6879 if self.op.ignore_consistency:
6880 self.proc.LogWarning("Could not shutdown instance %s on node %s."
6881 " Proceeding anyway. Please make sure node"
6882 " %s is down. Error details: %s",
6883 instance.name, source_node, source_node, msg)
6885 raise errors.OpExecError("Could not shutdown instance %s on"
6887 (instance.name, source_node, msg))
6889 # create the target disks
6891 _CreateDisks(self, instance, target_node=target_node)
6892 except errors.OpExecError:
6893 self.LogWarning("Device creation failed, reverting...")
6895 _RemoveDisks(self, instance, target_node=target_node)
6897 self.cfg.ReleaseDRBDMinors(instance.name)
6900 cluster_name = self.cfg.GetClusterInfo().cluster_name
6903 # activate, get path, copy the data over
6904 for idx, disk in enumerate(instance.disks):
6905 self.LogInfo("Copying data for disk %d", idx)
6906 result = self.rpc.call_blockdev_assemble(target_node, disk,
6907 instance.name, True, idx)
6909 self.LogWarning("Can't assemble newly created disk %d: %s",
6910 idx, result.fail_msg)
6911 errs.append(result.fail_msg)
6913 dev_path = result.payload
6914 result = self.rpc.call_blockdev_export(source_node, disk,
6915 target_node, dev_path,
6918 self.LogWarning("Can't copy data over for disk %d: %s",
6919 idx, result.fail_msg)
6920 errs.append(result.fail_msg)
6924 self.LogWarning("Some disks failed to copy, aborting")
6926 _RemoveDisks(self, instance, target_node=target_node)
6928 self.cfg.ReleaseDRBDMinors(instance.name)
6929 raise errors.OpExecError("Errors during disk copy: %s" %
6932 instance.primary_node = target_node
6933 self.cfg.Update(instance, feedback_fn)
6935 self.LogInfo("Removing the disks on the original node")
6936 _RemoveDisks(self, instance, target_node=source_node)
6938 # Only start the instance if it's marked as up
6939 if instance.admin_up:
6940 self.LogInfo("Starting instance %s on node %s",
6941 instance.name, target_node)
6943 disks_ok, _ = _AssembleInstanceDisks(self, instance,
6944 ignore_secondaries=True)
6946 _ShutdownInstanceDisks(self, instance)
6947 raise errors.OpExecError("Can't activate the instance's disks")
6949 result = self.rpc.call_instance_start(target_node, instance,
6951 msg = result.fail_msg
6953 _ShutdownInstanceDisks(self, instance)
6954 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6955 (instance.name, target_node, msg))
6958 class LUNodeMigrate(LogicalUnit):
6959 """Migrate all instances from a node.
6962 HPATH = "node-migrate"
6963 HTYPE = constants.HTYPE_NODE
6966 def CheckArguments(self):
6969 def ExpandNames(self):
6970 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6972 self.share_locks = _ShareAll()
6973 self.needed_locks = {
6974 locking.LEVEL_NODE: [self.op.node_name],
6977 def BuildHooksEnv(self):
6980 This runs on the master, the primary and all the secondaries.
6984 "NODE_NAME": self.op.node_name,
6987 def BuildHooksNodes(self):
6988 """Build hooks nodes.
6991 nl = [self.cfg.GetMasterNode()]
6994 def CheckPrereq(self):
6997 def Exec(self, feedback_fn):
6998 # Prepare jobs for migration instances
7000 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7003 iallocator=self.op.iallocator,
7004 target_node=self.op.target_node)]
7005 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7008 # TODO: Run iallocator in this opcode and pass correct placement options to
7009 # OpInstanceMigrate. Since other jobs can modify the cluster between
7010 # running the iallocator and the actual migration, a good consistency model
7011 # will have to be found.
7013 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7014 frozenset([self.op.node_name]))
7016 return ResultWithJobs(jobs)
7019 class TLMigrateInstance(Tasklet):
7020 """Tasklet class for instance migration.
7023 @ivar live: whether the migration will be done live or non-live;
7024 this variable is initalized only after CheckPrereq has run
7025 @type cleanup: boolean
7026 @ivar cleanup: Wheater we cleanup from a failed migration
7027 @type iallocator: string
7028 @ivar iallocator: The iallocator used to determine target_node
7029 @type target_node: string
7030 @ivar target_node: If given, the target_node to reallocate the instance to
7031 @type failover: boolean
7032 @ivar failover: Whether operation results in failover or migration
7033 @type fallback: boolean
7034 @ivar fallback: Whether fallback to failover is allowed if migration not
7036 @type ignore_consistency: boolean
7037 @ivar ignore_consistency: Wheter we should ignore consistency between source
7039 @type shutdown_timeout: int
7040 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7043 def __init__(self, lu, instance_name, cleanup=False,
7044 failover=False, fallback=False,
7045 ignore_consistency=False,
7046 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7047 """Initializes this class.
7050 Tasklet.__init__(self, lu)
7053 self.instance_name = instance_name
7054 self.cleanup = cleanup
7055 self.live = False # will be overridden later
7056 self.failover = failover
7057 self.fallback = fallback
7058 self.ignore_consistency = ignore_consistency
7059 self.shutdown_timeout = shutdown_timeout
7061 def CheckPrereq(self):
7062 """Check prerequisites.
7064 This checks that the instance is in the cluster.
7067 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7068 instance = self.cfg.GetInstanceInfo(instance_name)
7069 assert instance is not None
7070 self.instance = instance
7072 if (not self.cleanup and not instance.admin_up and not self.failover and
7074 self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7076 self.failover = True
7078 if instance.disk_template not in constants.DTS_MIRRORED:
7083 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7084 " %s" % (instance.disk_template, text),
7087 if instance.disk_template in constants.DTS_EXT_MIRROR:
7088 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7090 if self.lu.op.iallocator:
7091 self._RunAllocator()
7093 # We set set self.target_node as it is required by
7095 self.target_node = self.lu.op.target_node
7097 # self.target_node is already populated, either directly or by the
7099 target_node = self.target_node
7100 if self.target_node == instance.primary_node:
7101 raise errors.OpPrereqError("Cannot migrate instance %s"
7102 " to its primary (%s)" %
7103 (instance.name, instance.primary_node))
7105 if len(self.lu.tasklets) == 1:
7106 # It is safe to release locks only when we're the only tasklet
7108 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7109 keep=[instance.primary_node, self.target_node])
7112 secondary_nodes = instance.secondary_nodes
7113 if not secondary_nodes:
7114 raise errors.ConfigurationError("No secondary node but using"
7115 " %s disk template" %
7116 instance.disk_template)
7117 target_node = secondary_nodes[0]
7118 if self.lu.op.iallocator or (self.lu.op.target_node and
7119 self.lu.op.target_node != target_node):
7121 text = "failed over"
7124 raise errors.OpPrereqError("Instances with disk template %s cannot"
7125 " be %s to arbitrary nodes"
7126 " (neither an iallocator nor a target"
7127 " node can be passed)" %
7128 (instance.disk_template, text),
7131 i_be = self.cfg.GetClusterInfo().FillBE(instance)
7133 # check memory requirements on the secondary node
7134 if not self.failover or instance.admin_up:
7135 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7136 instance.name, i_be[constants.BE_MEMORY],
7137 instance.hypervisor)
7139 self.lu.LogInfo("Not checking memory on the secondary node as"
7140 " instance will not be started")
7142 # check bridge existance
7143 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7145 if not self.cleanup:
7146 _CheckNodeNotDrained(self.lu, target_node)
7147 if not self.failover:
7148 result = self.rpc.call_instance_migratable(instance.primary_node,
7150 if result.fail_msg and self.fallback:
7151 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7153 self.failover = True
7155 result.Raise("Can't migrate, please use failover",
7156 prereq=True, ecode=errors.ECODE_STATE)
7158 assert not (self.failover and self.cleanup)
7160 if not self.failover:
7161 if self.lu.op.live is not None and self.lu.op.mode is not None:
7162 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7163 " parameters are accepted",
7165 if self.lu.op.live is not None:
7167 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7169 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7170 # reset the 'live' parameter to None so that repeated
7171 # invocations of CheckPrereq do not raise an exception
7172 self.lu.op.live = None
7173 elif self.lu.op.mode is None:
7174 # read the default value from the hypervisor
7175 i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7177 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7179 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7181 # Failover is never live
7184 def _RunAllocator(self):
7185 """Run the allocator based on input opcode.
7188 ial = IAllocator(self.cfg, self.rpc,
7189 mode=constants.IALLOCATOR_MODE_RELOC,
7190 name=self.instance_name,
7191 # TODO See why hail breaks with a single node below
7192 relocate_from=[self.instance.primary_node,
7193 self.instance.primary_node],
7196 ial.Run(self.lu.op.iallocator)
7199 raise errors.OpPrereqError("Can't compute nodes using"
7200 " iallocator '%s': %s" %
7201 (self.lu.op.iallocator, ial.info),
7203 if len(ial.result) != ial.required_nodes:
7204 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7205 " of nodes (%s), required %s" %
7206 (self.lu.op.iallocator, len(ial.result),
7207 ial.required_nodes), errors.ECODE_FAULT)
7208 self.target_node = ial.result[0]
7209 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7210 self.instance_name, self.lu.op.iallocator,
7211 utils.CommaJoin(ial.result))
7213 def _WaitUntilSync(self):
7214 """Poll with custom rpc for disk sync.
7216 This uses our own step-based rpc call.
7219 self.feedback_fn("* wait until resync is done")
7223 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7225 self.instance.disks)
7227 for node, nres in result.items():
7228 nres.Raise("Cannot resync disks on node %s" % node)
7229 node_done, node_percent = nres.payload
7230 all_done = all_done and node_done
7231 if node_percent is not None:
7232 min_percent = min(min_percent, node_percent)
7234 if min_percent < 100:
7235 self.feedback_fn(" - progress: %.1f%%" % min_percent)
7238 def _EnsureSecondary(self, node):
7239 """Demote a node to secondary.
7242 self.feedback_fn("* switching node %s to secondary mode" % node)
7244 for dev in self.instance.disks:
7245 self.cfg.SetDiskID(dev, node)
7247 result = self.rpc.call_blockdev_close(node, self.instance.name,
7248 self.instance.disks)
7249 result.Raise("Cannot change disk to secondary on node %s" % node)
7251 def _GoStandalone(self):
7252 """Disconnect from the network.
7255 self.feedback_fn("* changing into standalone mode")
7256 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7257 self.instance.disks)
7258 for node, nres in result.items():
7259 nres.Raise("Cannot disconnect disks node %s" % node)
7261 def _GoReconnect(self, multimaster):
7262 """Reconnect to the network.
7268 msg = "single-master"
7269 self.feedback_fn("* changing disks into %s mode" % msg)
7270 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7271 self.instance.disks,
7272 self.instance.name, multimaster)
7273 for node, nres in result.items():
7274 nres.Raise("Cannot change disks config on node %s" % node)
7276 def _ExecCleanup(self):
7277 """Try to cleanup after a failed migration.
7279 The cleanup is done by:
7280 - check that the instance is running only on one node
7281 (and update the config if needed)
7282 - change disks on its secondary node to secondary
7283 - wait until disks are fully synchronized
7284 - disconnect from the network
7285 - change disks into single-master mode
7286 - wait again until disks are fully synchronized
7289 instance = self.instance
7290 target_node = self.target_node
7291 source_node = self.source_node
7293 # check running on only one node
7294 self.feedback_fn("* checking where the instance actually runs"
7295 " (if this hangs, the hypervisor might be in"
7297 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7298 for node, result in ins_l.items():
7299 result.Raise("Can't contact node %s" % node)
7301 runningon_source = instance.name in ins_l[source_node].payload
7302 runningon_target = instance.name in ins_l[target_node].payload
7304 if runningon_source and runningon_target:
7305 raise errors.OpExecError("Instance seems to be running on two nodes,"
7306 " or the hypervisor is confused; you will have"
7307 " to ensure manually that it runs only on one"
7308 " and restart this operation")
7310 if not (runningon_source or runningon_target):
7311 raise errors.OpExecError("Instance does not seem to be running at all;"
7312 " in this case it's safer to repair by"
7313 " running 'gnt-instance stop' to ensure disk"
7314 " shutdown, and then restarting it")
7316 if runningon_target:
7317 # the migration has actually succeeded, we need to update the config
7318 self.feedback_fn("* instance running on secondary node (%s),"
7319 " updating config" % target_node)
7320 instance.primary_node = target_node
7321 self.cfg.Update(instance, self.feedback_fn)
7322 demoted_node = source_node
7324 self.feedback_fn("* instance confirmed to be running on its"
7325 " primary node (%s)" % source_node)
7326 demoted_node = target_node
7328 if instance.disk_template in constants.DTS_INT_MIRROR:
7329 self._EnsureSecondary(demoted_node)
7331 self._WaitUntilSync()
7332 except errors.OpExecError:
7333 # we ignore here errors, since if the device is standalone, it
7334 # won't be able to sync
7336 self._GoStandalone()
7337 self._GoReconnect(False)
7338 self._WaitUntilSync()
7340 self.feedback_fn("* done")
7342 def _RevertDiskStatus(self):
7343 """Try to revert the disk status after a failed migration.
7346 target_node = self.target_node
7347 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7351 self._EnsureSecondary(target_node)
7352 self._GoStandalone()
7353 self._GoReconnect(False)
7354 self._WaitUntilSync()
7355 except errors.OpExecError, err:
7356 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7357 " please try to recover the instance manually;"
7358 " error '%s'" % str(err))
7360 def _AbortMigration(self):
7361 """Call the hypervisor code to abort a started migration.
7364 instance = self.instance
7365 target_node = self.target_node
7366 migration_info = self.migration_info
7368 abort_result = self.rpc.call_finalize_migration(target_node,
7372 abort_msg = abort_result.fail_msg
7374 logging.error("Aborting migration failed on target node %s: %s",
7375 target_node, abort_msg)
7376 # Don't raise an exception here, as we stil have to try to revert the
7377 # disk status, even if this step failed.
7379 def _ExecMigration(self):
7380 """Migrate an instance.
7382 The migrate is done by:
7383 - change the disks into dual-master mode
7384 - wait until disks are fully synchronized again
7385 - migrate the instance
7386 - change disks on the new secondary node (the old primary) to secondary
7387 - wait until disks are fully synchronized
7388 - change disks into single-master mode
7391 instance = self.instance
7392 target_node = self.target_node
7393 source_node = self.source_node
7395 self.feedback_fn("* checking disk consistency between source and target")
7396 for dev in instance.disks:
7397 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7398 raise errors.OpExecError("Disk %s is degraded or not fully"
7399 " synchronized on target node,"
7400 " aborting migration" % dev.iv_name)
7402 # First get the migration information from the remote node
7403 result = self.rpc.call_migration_info(source_node, instance)
7404 msg = result.fail_msg
7406 log_err = ("Failed fetching source migration information from %s: %s" %
7408 logging.error(log_err)
7409 raise errors.OpExecError(log_err)
7411 self.migration_info = migration_info = result.payload
7413 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7414 # Then switch the disks to master/master mode
7415 self._EnsureSecondary(target_node)
7416 self._GoStandalone()
7417 self._GoReconnect(True)
7418 self._WaitUntilSync()
7420 self.feedback_fn("* preparing %s to accept the instance" % target_node)
7421 result = self.rpc.call_accept_instance(target_node,
7424 self.nodes_ip[target_node])
7426 msg = result.fail_msg
7428 logging.error("Instance pre-migration failed, trying to revert"
7429 " disk status: %s", msg)
7430 self.feedback_fn("Pre-migration failed, aborting")
7431 self._AbortMigration()
7432 self._RevertDiskStatus()
7433 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7434 (instance.name, msg))
7436 self.feedback_fn("* migrating instance to %s" % target_node)
7437 result = self.rpc.call_instance_migrate(source_node, instance,
7438 self.nodes_ip[target_node],
7440 msg = result.fail_msg
7442 logging.error("Instance migration failed, trying to revert"
7443 " disk status: %s", msg)
7444 self.feedback_fn("Migration failed, aborting")
7445 self._AbortMigration()
7446 self._RevertDiskStatus()
7447 raise errors.OpExecError("Could not migrate instance %s: %s" %
7448 (instance.name, msg))
7450 instance.primary_node = target_node
7451 # distribute new instance config to the other nodes
7452 self.cfg.Update(instance, self.feedback_fn)
7454 result = self.rpc.call_finalize_migration(target_node,
7458 msg = result.fail_msg
7460 logging.error("Instance migration succeeded, but finalization failed:"
7462 raise errors.OpExecError("Could not finalize instance migration: %s" %
7465 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7466 self._EnsureSecondary(source_node)
7467 self._WaitUntilSync()
7468 self._GoStandalone()
7469 self._GoReconnect(False)
7470 self._WaitUntilSync()
7472 self.feedback_fn("* done")
7474 def _ExecFailover(self):
7475 """Failover an instance.
7477 The failover is done by shutting it down on its present node and
7478 starting it on the secondary.
7481 instance = self.instance
7482 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7484 source_node = instance.primary_node
7485 target_node = self.target_node
7487 if instance.admin_up:
7488 self.feedback_fn("* checking disk consistency between source and target")
7489 for dev in instance.disks:
7490 # for drbd, these are drbd over lvm
7491 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7492 if primary_node.offline:
7493 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7495 (primary_node.name, dev.iv_name, target_node))
7496 elif not self.ignore_consistency:
7497 raise errors.OpExecError("Disk %s is degraded on target node,"
7498 " aborting failover" % dev.iv_name)
7500 self.feedback_fn("* not checking disk consistency as instance is not"
7503 self.feedback_fn("* shutting down instance on source node")
7504 logging.info("Shutting down instance %s on node %s",
7505 instance.name, source_node)
7507 result = self.rpc.call_instance_shutdown(source_node, instance,
7508 self.shutdown_timeout)
7509 msg = result.fail_msg
7511 if self.ignore_consistency or primary_node.offline:
7512 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7513 " proceeding anyway; please make sure node"
7514 " %s is down; error details: %s",
7515 instance.name, source_node, source_node, msg)
7517 raise errors.OpExecError("Could not shutdown instance %s on"
7519 (instance.name, source_node, msg))
7521 self.feedback_fn("* deactivating the instance's disks on source node")
7522 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7523 raise errors.OpExecError("Can't shut down the instance's disks")
7525 instance.primary_node = target_node
7526 # distribute new instance config to the other nodes
7527 self.cfg.Update(instance, self.feedback_fn)
7529 # Only start the instance if it's marked as up
7530 if instance.admin_up:
7531 self.feedback_fn("* activating the instance's disks on target node %s" %
7533 logging.info("Starting instance %s on node %s",
7534 instance.name, target_node)
7536 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7537 ignore_secondaries=True)
7539 _ShutdownInstanceDisks(self.lu, instance)
7540 raise errors.OpExecError("Can't activate the instance's disks")
7542 self.feedback_fn("* starting the instance on the target node %s" %
7544 result = self.rpc.call_instance_start(target_node, instance, None, None,
7546 msg = result.fail_msg
7548 _ShutdownInstanceDisks(self.lu, instance)
7549 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7550 (instance.name, target_node, msg))
7552 def Exec(self, feedback_fn):
7553 """Perform the migration.
7556 self.feedback_fn = feedback_fn
7557 self.source_node = self.instance.primary_node
7559 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7560 if self.instance.disk_template in constants.DTS_INT_MIRROR:
7561 self.target_node = self.instance.secondary_nodes[0]
7562 # Otherwise self.target_node has been populated either
7563 # directly, or through an iallocator.
7565 self.all_nodes = [self.source_node, self.target_node]
7566 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7567 in self.cfg.GetMultiNodeInfo(self.all_nodes))
7570 feedback_fn("Failover instance %s" % self.instance.name)
7571 self._ExecFailover()
7573 feedback_fn("Migrating instance %s" % self.instance.name)
7576 return self._ExecCleanup()
7578 return self._ExecMigration()
7581 def _CreateBlockDev(lu, node, instance, device, force_create,
7583 """Create a tree of block devices on a given node.
7585 If this device type has to be created on secondaries, create it and
7588 If not, just recurse to children keeping the same 'force' value.
7590 @param lu: the lu on whose behalf we execute
7591 @param node: the node on which to create the device
7592 @type instance: L{objects.Instance}
7593 @param instance: the instance which owns the device
7594 @type device: L{objects.Disk}
7595 @param device: the device to create
7596 @type force_create: boolean
7597 @param force_create: whether to force creation of this device; this
7598 will be change to True whenever we find a device which has
7599 CreateOnSecondary() attribute
7600 @param info: the extra 'metadata' we should attach to the device
7601 (this will be represented as a LVM tag)
7602 @type force_open: boolean
7603 @param force_open: this parameter will be passes to the
7604 L{backend.BlockdevCreate} function where it specifies
7605 whether we run on primary or not, and it affects both
7606 the child assembly and the device own Open() execution
7609 if device.CreateOnSecondary():
7613 for child in device.children:
7614 _CreateBlockDev(lu, node, instance, child, force_create,
7617 if not force_create:
7620 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7623 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7624 """Create a single block device on a given node.
7626 This will not recurse over children of the device, so they must be
7629 @param lu: the lu on whose behalf we execute
7630 @param node: the node on which to create the device
7631 @type instance: L{objects.Instance}
7632 @param instance: the instance which owns the device
7633 @type device: L{objects.Disk}
7634 @param device: the device to create
7635 @param info: the extra 'metadata' we should attach to the device
7636 (this will be represented as a LVM tag)
7637 @type force_open: boolean
7638 @param force_open: this parameter will be passes to the
7639 L{backend.BlockdevCreate} function where it specifies
7640 whether we run on primary or not, and it affects both
7641 the child assembly and the device own Open() execution
7644 lu.cfg.SetDiskID(device, node)
7645 result = lu.rpc.call_blockdev_create(node, device, device.size,
7646 instance.name, force_open, info)
7647 result.Raise("Can't create block device %s on"
7648 " node %s for instance %s" % (device, node, instance.name))
7649 if device.physical_id is None:
7650 device.physical_id = result.payload
7653 def _GenerateUniqueNames(lu, exts):
7654 """Generate a suitable LV name.
7656 This will generate a logical volume name for the given instance.
7661 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7662 results.append("%s%s" % (new_id, val))
7666 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7667 iv_name, p_minor, s_minor):
7668 """Generate a drbd8 device complete with its children.
7671 assert len(vgnames) == len(names) == 2
7672 port = lu.cfg.AllocatePort()
7673 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7674 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7675 logical_id=(vgnames[0], names[0]))
7676 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7677 logical_id=(vgnames[1], names[1]))
7678 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7679 logical_id=(primary, secondary, port,
7682 children=[dev_data, dev_meta],
7687 def _GenerateDiskTemplate(lu, template_name,
7688 instance_name, primary_node,
7689 secondary_nodes, disk_info,
7690 file_storage_dir, file_driver,
7691 base_index, feedback_fn):
7692 """Generate the entire disk layout for a given template type.
7695 #TODO: compute space requirements
7697 vgname = lu.cfg.GetVGName()
7698 disk_count = len(disk_info)
7700 if template_name == constants.DT_DISKLESS:
7702 elif template_name == constants.DT_PLAIN:
7703 if len(secondary_nodes) != 0:
7704 raise errors.ProgrammerError("Wrong template configuration")
7706 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7707 for i in range(disk_count)])
7708 for idx, disk in enumerate(disk_info):
7709 disk_index = idx + base_index
7710 vg = disk.get(constants.IDISK_VG, vgname)
7711 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7712 disk_dev = objects.Disk(dev_type=constants.LD_LV,
7713 size=disk[constants.IDISK_SIZE],
7714 logical_id=(vg, names[idx]),
7715 iv_name="disk/%d" % disk_index,
7716 mode=disk[constants.IDISK_MODE])
7717 disks.append(disk_dev)
7718 elif template_name == constants.DT_DRBD8:
7719 if len(secondary_nodes) != 1:
7720 raise errors.ProgrammerError("Wrong template configuration")
7721 remote_node = secondary_nodes[0]
7722 minors = lu.cfg.AllocateDRBDMinor(
7723 [primary_node, remote_node] * len(disk_info), instance_name)
7726 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7727 for i in range(disk_count)]):
7728 names.append(lv_prefix + "_data")
7729 names.append(lv_prefix + "_meta")
7730 for idx, disk in enumerate(disk_info):
7731 disk_index = idx + base_index
7732 data_vg = disk.get(constants.IDISK_VG, vgname)
7733 meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7734 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7735 disk[constants.IDISK_SIZE],
7737 names[idx * 2:idx * 2 + 2],
7738 "disk/%d" % disk_index,
7739 minors[idx * 2], minors[idx * 2 + 1])
7740 disk_dev.mode = disk[constants.IDISK_MODE]
7741 disks.append(disk_dev)
7742 elif template_name == constants.DT_FILE:
7743 if len(secondary_nodes) != 0:
7744 raise errors.ProgrammerError("Wrong template configuration")
7746 opcodes.RequireFileStorage()
7748 for idx, disk in enumerate(disk_info):
7749 disk_index = idx + base_index
7750 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7751 size=disk[constants.IDISK_SIZE],
7752 iv_name="disk/%d" % disk_index,
7753 logical_id=(file_driver,
7754 "%s/disk%d" % (file_storage_dir,
7756 mode=disk[constants.IDISK_MODE])
7757 disks.append(disk_dev)
7758 elif template_name == constants.DT_SHARED_FILE:
7759 if len(secondary_nodes) != 0:
7760 raise errors.ProgrammerError("Wrong template configuration")
7762 opcodes.RequireSharedFileStorage()
7764 for idx, disk in enumerate(disk_info):
7765 disk_index = idx + base_index
7766 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7767 size=disk[constants.IDISK_SIZE],
7768 iv_name="disk/%d" % disk_index,
7769 logical_id=(file_driver,
7770 "%s/disk%d" % (file_storage_dir,
7772 mode=disk[constants.IDISK_MODE])
7773 disks.append(disk_dev)
7774 elif template_name == constants.DT_BLOCK:
7775 if len(secondary_nodes) != 0:
7776 raise errors.ProgrammerError("Wrong template configuration")
7778 for idx, disk in enumerate(disk_info):
7779 disk_index = idx + base_index
7780 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7781 size=disk[constants.IDISK_SIZE],
7782 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7783 disk[constants.IDISK_ADOPT]),
7784 iv_name="disk/%d" % disk_index,
7785 mode=disk[constants.IDISK_MODE])
7786 disks.append(disk_dev)
7789 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7793 def _GetInstanceInfoText(instance):
7794 """Compute that text that should be added to the disk's metadata.
7797 return "originstname+%s" % instance.name
7800 def _CalcEta(time_taken, written, total_size):
7801 """Calculates the ETA based on size written and total size.
7803 @param time_taken: The time taken so far
7804 @param written: amount written so far
7805 @param total_size: The total size of data to be written
7806 @return: The remaining time in seconds
7809 avg_time = time_taken / float(written)
7810 return (total_size - written) * avg_time
7813 def _WipeDisks(lu, instance):
7814 """Wipes instance disks.
7816 @type lu: L{LogicalUnit}
7817 @param lu: the logical unit on whose behalf we execute
7818 @type instance: L{objects.Instance}
7819 @param instance: the instance whose disks we should create
7820 @return: the success of the wipe
7823 node = instance.primary_node
7825 for device in instance.disks:
7826 lu.cfg.SetDiskID(device, node)
7828 logging.info("Pause sync of instance %s disks", instance.name)
7829 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7831 for idx, success in enumerate(result.payload):
7833 logging.warn("pause-sync of instance %s for disks %d failed",
7837 for idx, device in enumerate(instance.disks):
7838 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7839 # MAX_WIPE_CHUNK at max
7840 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7841 constants.MIN_WIPE_CHUNK_PERCENT)
7842 # we _must_ make this an int, otherwise rounding errors will
7844 wipe_chunk_size = int(wipe_chunk_size)
7846 lu.LogInfo("* Wiping disk %d", idx)
7847 logging.info("Wiping disk %d for instance %s, node %s using"
7848 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7853 start_time = time.time()
7855 while offset < size:
7856 wipe_size = min(wipe_chunk_size, size - offset)
7857 logging.debug("Wiping disk %d, offset %s, chunk %s",
7858 idx, offset, wipe_size)
7859 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7860 result.Raise("Could not wipe disk %d at offset %d for size %d" %
7861 (idx, offset, wipe_size))
7864 if now - last_output >= 60:
7865 eta = _CalcEta(now - start_time, offset, size)
7866 lu.LogInfo(" - done: %.1f%% ETA: %s" %
7867 (offset / float(size) * 100, utils.FormatSeconds(eta)))
7870 logging.info("Resume sync of instance %s disks", instance.name)
7872 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7874 for idx, success in enumerate(result.payload):
7876 lu.LogWarning("Resume sync of disk %d failed, please have a"
7877 " look at the status and troubleshoot the issue", idx)
7878 logging.warn("resume-sync of instance %s for disks %d failed",
7882 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7883 """Create all disks for an instance.
7885 This abstracts away some work from AddInstance.
7887 @type lu: L{LogicalUnit}
7888 @param lu: the logical unit on whose behalf we execute
7889 @type instance: L{objects.Instance}
7890 @param instance: the instance whose disks we should create
7892 @param to_skip: list of indices to skip
7893 @type target_node: string
7894 @param target_node: if passed, overrides the target node for creation
7896 @return: the success of the creation
7899 info = _GetInstanceInfoText(instance)
7900 if target_node is None:
7901 pnode = instance.primary_node
7902 all_nodes = instance.all_nodes
7907 if instance.disk_template in constants.DTS_FILEBASED:
7908 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7909 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7911 result.Raise("Failed to create directory '%s' on"
7912 " node %s" % (file_storage_dir, pnode))
7914 # Note: this needs to be kept in sync with adding of disks in
7915 # LUInstanceSetParams
7916 for idx, device in enumerate(instance.disks):
7917 if to_skip and idx in to_skip:
7919 logging.info("Creating volume %s for instance %s",
7920 device.iv_name, instance.name)
7922 for node in all_nodes:
7923 f_create = node == pnode
7924 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7927 def _RemoveDisks(lu, instance, target_node=None):
7928 """Remove all disks for an instance.
7930 This abstracts away some work from `AddInstance()` and
7931 `RemoveInstance()`. Note that in case some of the devices couldn't
7932 be removed, the removal will continue with the other ones (compare
7933 with `_CreateDisks()`).
7935 @type lu: L{LogicalUnit}
7936 @param lu: the logical unit on whose behalf we execute
7937 @type instance: L{objects.Instance}
7938 @param instance: the instance whose disks we should remove
7939 @type target_node: string
7940 @param target_node: used to override the node on which to remove the disks
7942 @return: the success of the removal
7945 logging.info("Removing block devices for instance %s", instance.name)
7948 for device in instance.disks:
7950 edata = [(target_node, device)]
7952 edata = device.ComputeNodeTree(instance.primary_node)
7953 for node, disk in edata:
7954 lu.cfg.SetDiskID(disk, node)
7955 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7957 lu.LogWarning("Could not remove block device %s on node %s,"
7958 " continuing anyway: %s", device.iv_name, node, msg)
7961 if instance.disk_template == constants.DT_FILE:
7962 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7966 tgt = instance.primary_node
7967 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7969 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7970 file_storage_dir, instance.primary_node, result.fail_msg)
7976 def _ComputeDiskSizePerVG(disk_template, disks):
7977 """Compute disk size requirements in the volume group
7980 def _compute(disks, payload):
7981 """Universal algorithm.
7986 vgs[disk[constants.IDISK_VG]] = \
7987 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7991 # Required free disk space as a function of disk and swap space
7993 constants.DT_DISKLESS: {},
7994 constants.DT_PLAIN: _compute(disks, 0),
7995 # 128 MB are added for drbd metadata for each disk
7996 constants.DT_DRBD8: _compute(disks, 128),
7997 constants.DT_FILE: {},
7998 constants.DT_SHARED_FILE: {},
8001 if disk_template not in req_size_dict:
8002 raise errors.ProgrammerError("Disk template '%s' size requirement"
8003 " is unknown" % disk_template)
8005 return req_size_dict[disk_template]
8008 def _ComputeDiskSize(disk_template, disks):
8009 """Compute disk size requirements in the volume group
8012 # Required free disk space as a function of disk and swap space
8014 constants.DT_DISKLESS: None,
8015 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8016 # 128 MB are added for drbd metadata for each disk
8017 constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
8018 constants.DT_FILE: None,
8019 constants.DT_SHARED_FILE: 0,
8020 constants.DT_BLOCK: 0,
8023 if disk_template not in req_size_dict:
8024 raise errors.ProgrammerError("Disk template '%s' size requirement"
8025 " is unknown" % disk_template)
8027 return req_size_dict[disk_template]
8030 def _FilterVmNodes(lu, nodenames):
8031 """Filters out non-vm_capable nodes from a list.
8033 @type lu: L{LogicalUnit}
8034 @param lu: the logical unit for which we check
8035 @type nodenames: list
8036 @param nodenames: the list of nodes on which we should check
8038 @return: the list of vm-capable nodes
8041 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8042 return [name for name in nodenames if name not in vm_nodes]
8045 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8046 """Hypervisor parameter validation.
8048 This function abstract the hypervisor parameter validation to be
8049 used in both instance create and instance modify.
8051 @type lu: L{LogicalUnit}
8052 @param lu: the logical unit for which we check
8053 @type nodenames: list
8054 @param nodenames: the list of nodes on which we should check
8055 @type hvname: string
8056 @param hvname: the name of the hypervisor we should use
8057 @type hvparams: dict
8058 @param hvparams: the parameters which we need to check
8059 @raise errors.OpPrereqError: if the parameters are not valid
8062 nodenames = _FilterVmNodes(lu, nodenames)
8063 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
8066 for node in nodenames:
8070 info.Raise("Hypervisor parameter validation failed on node %s" % node)
8073 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8074 """OS parameters validation.
8076 @type lu: L{LogicalUnit}
8077 @param lu: the logical unit for which we check
8078 @type required: boolean
8079 @param required: whether the validation should fail if the OS is not
8081 @type nodenames: list
8082 @param nodenames: the list of nodes on which we should check
8083 @type osname: string
8084 @param osname: the name of the hypervisor we should use
8085 @type osparams: dict
8086 @param osparams: the parameters which we need to check
8087 @raise errors.OpPrereqError: if the parameters are not valid
8090 nodenames = _FilterVmNodes(lu, nodenames)
8091 result = lu.rpc.call_os_validate(required, nodenames, osname,
8092 [constants.OS_VALIDATE_PARAMETERS],
8094 for node, nres in result.items():
8095 # we don't check for offline cases since this should be run only
8096 # against the master node and/or an instance's nodes
8097 nres.Raise("OS Parameters validation failed on node %s" % node)
8098 if not nres.payload:
8099 lu.LogInfo("OS %s not found on node %s, validation skipped",
8103 class LUInstanceCreate(LogicalUnit):
8104 """Create an instance.
8107 HPATH = "instance-add"
8108 HTYPE = constants.HTYPE_INSTANCE
8111 def CheckArguments(self):
8115 # do not require name_check to ease forward/backward compatibility
8117 if self.op.no_install and self.op.start:
8118 self.LogInfo("No-installation mode selected, disabling startup")
8119 self.op.start = False
8120 # validate/normalize the instance name
8121 self.op.instance_name = \
8122 netutils.Hostname.GetNormalizedName(self.op.instance_name)
8124 if self.op.ip_check and not self.op.name_check:
8125 # TODO: make the ip check more flexible and not depend on the name check
8126 raise errors.OpPrereqError("Cannot do IP address check without a name"
8127 " check", errors.ECODE_INVAL)
8129 # check nics' parameter names
8130 for nic in self.op.nics:
8131 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8133 # check disks. parameter names and consistent adopt/no-adopt strategy
8134 has_adopt = has_no_adopt = False
8135 for disk in self.op.disks:
8136 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8137 if constants.IDISK_ADOPT in disk:
8141 if has_adopt and has_no_adopt:
8142 raise errors.OpPrereqError("Either all disks are adopted or none is",
8145 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8146 raise errors.OpPrereqError("Disk adoption is not supported for the"
8147 " '%s' disk template" %
8148 self.op.disk_template,
8150 if self.op.iallocator is not None:
8151 raise errors.OpPrereqError("Disk adoption not allowed with an"
8152 " iallocator script", errors.ECODE_INVAL)
8153 if self.op.mode == constants.INSTANCE_IMPORT:
8154 raise errors.OpPrereqError("Disk adoption not allowed for"
8155 " instance import", errors.ECODE_INVAL)
8157 if self.op.disk_template in constants.DTS_MUST_ADOPT:
8158 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8159 " but no 'adopt' parameter given" %
8160 self.op.disk_template,
8163 self.adopt_disks = has_adopt
8165 # instance name verification
8166 if self.op.name_check:
8167 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8168 self.op.instance_name = self.hostname1.name
8169 # used in CheckPrereq for ip ping check
8170 self.check_ip = self.hostname1.ip
8172 self.check_ip = None
8174 # file storage checks
8175 if (self.op.file_driver and
8176 not self.op.file_driver in constants.FILE_DRIVER):
8177 raise errors.OpPrereqError("Invalid file driver name '%s'" %
8178 self.op.file_driver, errors.ECODE_INVAL)
8180 if self.op.disk_template == constants.DT_FILE:
8181 opcodes.RequireFileStorage()
8182 elif self.op.disk_template == constants.DT_SHARED_FILE:
8183 opcodes.RequireSharedFileStorage()
8185 ### Node/iallocator related checks
8186 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8188 if self.op.pnode is not None:
8189 if self.op.disk_template in constants.DTS_INT_MIRROR:
8190 if self.op.snode is None:
8191 raise errors.OpPrereqError("The networked disk templates need"
8192 " a mirror node", errors.ECODE_INVAL)
8194 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8196 self.op.snode = None
8198 self._cds = _GetClusterDomainSecret()
8200 if self.op.mode == constants.INSTANCE_IMPORT:
8201 # On import force_variant must be True, because if we forced it at
8202 # initial install, our only chance when importing it back is that it
8204 self.op.force_variant = True
8206 if self.op.no_install:
8207 self.LogInfo("No-installation mode has no effect during import")
8209 elif self.op.mode == constants.INSTANCE_CREATE:
8210 if self.op.os_type is None:
8211 raise errors.OpPrereqError("No guest OS specified",
8213 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8214 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8215 " installation" % self.op.os_type,
8217 if self.op.disk_template is None:
8218 raise errors.OpPrereqError("No disk template specified",
8221 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8222 # Check handshake to ensure both clusters have the same domain secret
8223 src_handshake = self.op.source_handshake
8224 if not src_handshake:
8225 raise errors.OpPrereqError("Missing source handshake",
8228 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8231 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8234 # Load and check source CA
8235 self.source_x509_ca_pem = self.op.source_x509_ca
8236 if not self.source_x509_ca_pem:
8237 raise errors.OpPrereqError("Missing source X509 CA",
8241 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8243 except OpenSSL.crypto.Error, err:
8244 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8245 (err, ), errors.ECODE_INVAL)
8247 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8248 if errcode is not None:
8249 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8252 self.source_x509_ca = cert
8254 src_instance_name = self.op.source_instance_name
8255 if not src_instance_name:
8256 raise errors.OpPrereqError("Missing source instance name",
8259 self.source_instance_name = \
8260 netutils.GetHostname(name=src_instance_name).name
8263 raise errors.OpPrereqError("Invalid instance creation mode %r" %
8264 self.op.mode, errors.ECODE_INVAL)
8266 def ExpandNames(self):
8267 """ExpandNames for CreateInstance.
8269 Figure out the right locks for instance creation.
8272 self.needed_locks = {}
8274 instance_name = self.op.instance_name
8275 # this is just a preventive check, but someone might still add this
8276 # instance in the meantime, and creation will fail at lock-add time
8277 if instance_name in self.cfg.GetInstanceList():
8278 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8279 instance_name, errors.ECODE_EXISTS)
8281 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8283 if self.op.iallocator:
8284 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8286 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8287 nodelist = [self.op.pnode]
8288 if self.op.snode is not None:
8289 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8290 nodelist.append(self.op.snode)
8291 self.needed_locks[locking.LEVEL_NODE] = nodelist
8293 # in case of import lock the source node too
8294 if self.op.mode == constants.INSTANCE_IMPORT:
8295 src_node = self.op.src_node
8296 src_path = self.op.src_path
8298 if src_path is None:
8299 self.op.src_path = src_path = self.op.instance_name
8301 if src_node is None:
8302 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8303 self.op.src_node = None
8304 if os.path.isabs(src_path):
8305 raise errors.OpPrereqError("Importing an instance from a path"
8306 " requires a source node option",
8309 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8310 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8311 self.needed_locks[locking.LEVEL_NODE].append(src_node)
8312 if not os.path.isabs(src_path):
8313 self.op.src_path = src_path = \
8314 utils.PathJoin(constants.EXPORT_DIR, src_path)
8316 def _RunAllocator(self):
8317 """Run the allocator based on input opcode.
8320 nics = [n.ToDict() for n in self.nics]
8321 ial = IAllocator(self.cfg, self.rpc,
8322 mode=constants.IALLOCATOR_MODE_ALLOC,
8323 name=self.op.instance_name,
8324 disk_template=self.op.disk_template,
8327 vcpus=self.be_full[constants.BE_VCPUS],
8328 memory=self.be_full[constants.BE_MEMORY],
8331 hypervisor=self.op.hypervisor,
8334 ial.Run(self.op.iallocator)
8337 raise errors.OpPrereqError("Can't compute nodes using"
8338 " iallocator '%s': %s" %
8339 (self.op.iallocator, ial.info),
8341 if len(ial.result) != ial.required_nodes:
8342 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8343 " of nodes (%s), required %s" %
8344 (self.op.iallocator, len(ial.result),
8345 ial.required_nodes), errors.ECODE_FAULT)
8346 self.op.pnode = ial.result[0]
8347 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8348 self.op.instance_name, self.op.iallocator,
8349 utils.CommaJoin(ial.result))
8350 if ial.required_nodes == 2:
8351 self.op.snode = ial.result[1]
8353 def BuildHooksEnv(self):
8356 This runs on master, primary and secondary nodes of the instance.
8360 "ADD_MODE": self.op.mode,
8362 if self.op.mode == constants.INSTANCE_IMPORT:
8363 env["SRC_NODE"] = self.op.src_node
8364 env["SRC_PATH"] = self.op.src_path
8365 env["SRC_IMAGES"] = self.src_images
8367 env.update(_BuildInstanceHookEnv(
8368 name=self.op.instance_name,
8369 primary_node=self.op.pnode,
8370 secondary_nodes=self.secondaries,
8371 status=self.op.start,
8372 os_type=self.op.os_type,
8373 memory=self.be_full[constants.BE_MEMORY],
8374 vcpus=self.be_full[constants.BE_VCPUS],
8375 nics=_NICListToTuple(self, self.nics),
8376 disk_template=self.op.disk_template,
8377 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8378 for d in self.disks],
8381 hypervisor_name=self.op.hypervisor,
8387 def BuildHooksNodes(self):
8388 """Build hooks nodes.
8391 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8394 def _ReadExportInfo(self):
8395 """Reads the export information from disk.
8397 It will override the opcode source node and path with the actual
8398 information, if these two were not specified before.
8400 @return: the export information
8403 assert self.op.mode == constants.INSTANCE_IMPORT
8405 src_node = self.op.src_node
8406 src_path = self.op.src_path
8408 if src_node is None:
8409 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8410 exp_list = self.rpc.call_export_list(locked_nodes)
8412 for node in exp_list:
8413 if exp_list[node].fail_msg:
8415 if src_path in exp_list[node].payload:
8417 self.op.src_node = src_node = node
8418 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8422 raise errors.OpPrereqError("No export found for relative path %s" %
8423 src_path, errors.ECODE_INVAL)
8425 _CheckNodeOnline(self, src_node)
8426 result = self.rpc.call_export_info(src_node, src_path)
8427 result.Raise("No export or invalid export found in dir %s" % src_path)
8429 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8430 if not export_info.has_section(constants.INISECT_EXP):
8431 raise errors.ProgrammerError("Corrupted export config",
8432 errors.ECODE_ENVIRON)
8434 ei_version = export_info.get(constants.INISECT_EXP, "version")
8435 if (int(ei_version) != constants.EXPORT_VERSION):
8436 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8437 (ei_version, constants.EXPORT_VERSION),
8438 errors.ECODE_ENVIRON)
8441 def _ReadExportParams(self, einfo):
8442 """Use export parameters as defaults.
8444 In case the opcode doesn't specify (as in override) some instance
8445 parameters, then try to use them from the export information, if
8449 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8451 if self.op.disk_template is None:
8452 if einfo.has_option(constants.INISECT_INS, "disk_template"):
8453 self.op.disk_template = einfo.get(constants.INISECT_INS,
8456 raise errors.OpPrereqError("No disk template specified and the export"
8457 " is missing the disk_template information",
8460 if not self.op.disks:
8461 if einfo.has_option(constants.INISECT_INS, "disk_count"):
8463 # TODO: import the disk iv_name too
8464 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8465 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8466 disks.append({constants.IDISK_SIZE: disk_sz})
8467 self.op.disks = disks
8469 raise errors.OpPrereqError("No disk info specified and the export"
8470 " is missing the disk information",
8473 if (not self.op.nics and
8474 einfo.has_option(constants.INISECT_INS, "nic_count")):
8476 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8478 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8479 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8484 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8485 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8487 if (self.op.hypervisor is None and
8488 einfo.has_option(constants.INISECT_INS, "hypervisor")):
8489 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8491 if einfo.has_section(constants.INISECT_HYP):
8492 # use the export parameters but do not override the ones
8493 # specified by the user
8494 for name, value in einfo.items(constants.INISECT_HYP):
8495 if name not in self.op.hvparams:
8496 self.op.hvparams[name] = value
8498 if einfo.has_section(constants.INISECT_BEP):
8499 # use the parameters, without overriding
8500 for name, value in einfo.items(constants.INISECT_BEP):
8501 if name not in self.op.beparams:
8502 self.op.beparams[name] = value
8504 # try to read the parameters old style, from the main section
8505 for name in constants.BES_PARAMETERS:
8506 if (name not in self.op.beparams and
8507 einfo.has_option(constants.INISECT_INS, name)):
8508 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8510 if einfo.has_section(constants.INISECT_OSP):
8511 # use the parameters, without overriding
8512 for name, value in einfo.items(constants.INISECT_OSP):
8513 if name not in self.op.osparams:
8514 self.op.osparams[name] = value
8516 def _RevertToDefaults(self, cluster):
8517 """Revert the instance parameters to the default values.
8521 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8522 for name in self.op.hvparams.keys():
8523 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8524 del self.op.hvparams[name]
8526 be_defs = cluster.SimpleFillBE({})
8527 for name in self.op.beparams.keys():
8528 if name in be_defs and be_defs[name] == self.op.beparams[name]:
8529 del self.op.beparams[name]
8531 nic_defs = cluster.SimpleFillNIC({})
8532 for nic in self.op.nics:
8533 for name in constants.NICS_PARAMETERS:
8534 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8537 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8538 for name in self.op.osparams.keys():
8539 if name in os_defs and os_defs[name] == self.op.osparams[name]:
8540 del self.op.osparams[name]
8542 def _CalculateFileStorageDir(self):
8543 """Calculate final instance file storage dir.
8546 # file storage dir calculation/check
8547 self.instance_file_storage_dir = None
8548 if self.op.disk_template in constants.DTS_FILEBASED:
8549 # build the full file storage dir path
8552 if self.op.disk_template == constants.DT_SHARED_FILE:
8553 get_fsd_fn = self.cfg.GetSharedFileStorageDir
8555 get_fsd_fn = self.cfg.GetFileStorageDir
8557 cfg_storagedir = get_fsd_fn()
8558 if not cfg_storagedir:
8559 raise errors.OpPrereqError("Cluster file storage dir not defined")
8560 joinargs.append(cfg_storagedir)
8562 if self.op.file_storage_dir is not None:
8563 joinargs.append(self.op.file_storage_dir)
8565 joinargs.append(self.op.instance_name)
8567 # pylint: disable=W0142
8568 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8570 def CheckPrereq(self):
8571 """Check prerequisites.
8574 self._CalculateFileStorageDir()
8576 if self.op.mode == constants.INSTANCE_IMPORT:
8577 export_info = self._ReadExportInfo()
8578 self._ReadExportParams(export_info)
8580 if (not self.cfg.GetVGName() and
8581 self.op.disk_template not in constants.DTS_NOT_LVM):
8582 raise errors.OpPrereqError("Cluster does not support lvm-based"
8583 " instances", errors.ECODE_STATE)
8585 if self.op.hypervisor is None:
8586 self.op.hypervisor = self.cfg.GetHypervisorType()
8588 cluster = self.cfg.GetClusterInfo()
8589 enabled_hvs = cluster.enabled_hypervisors
8590 if self.op.hypervisor not in enabled_hvs:
8591 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8592 " cluster (%s)" % (self.op.hypervisor,
8593 ",".join(enabled_hvs)),
8596 # Check tag validity
8597 for tag in self.op.tags:
8598 objects.TaggableObject.ValidateTag(tag)
8600 # check hypervisor parameter syntax (locally)
8601 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8602 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8604 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8605 hv_type.CheckParameterSyntax(filled_hvp)
8606 self.hv_full = filled_hvp
8607 # check that we don't specify global parameters on an instance
8608 _CheckGlobalHvParams(self.op.hvparams)
8610 # fill and remember the beparams dict
8611 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8612 self.be_full = cluster.SimpleFillBE(self.op.beparams)
8614 # build os parameters
8615 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8617 # now that hvp/bep are in final format, let's reset to defaults,
8619 if self.op.identify_defaults:
8620 self._RevertToDefaults(cluster)
8624 for idx, nic in enumerate(self.op.nics):
8625 nic_mode_req = nic.get(constants.INIC_MODE, None)
8626 nic_mode = nic_mode_req
8627 if nic_mode is None:
8628 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8630 # in routed mode, for the first nic, the default ip is 'auto'
8631 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8632 default_ip_mode = constants.VALUE_AUTO
8634 default_ip_mode = constants.VALUE_NONE
8636 # ip validity checks
8637 ip = nic.get(constants.INIC_IP, default_ip_mode)
8638 if ip is None or ip.lower() == constants.VALUE_NONE:
8640 elif ip.lower() == constants.VALUE_AUTO:
8641 if not self.op.name_check:
8642 raise errors.OpPrereqError("IP address set to auto but name checks"
8643 " have been skipped",
8645 nic_ip = self.hostname1.ip
8647 if not netutils.IPAddress.IsValid(ip):
8648 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8652 # TODO: check the ip address for uniqueness
8653 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8654 raise errors.OpPrereqError("Routed nic mode requires an ip address",
8657 # MAC address verification
8658 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8659 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8660 mac = utils.NormalizeAndValidateMac(mac)
8663 self.cfg.ReserveMAC(mac, self.proc.GetECId())
8664 except errors.ReservationError:
8665 raise errors.OpPrereqError("MAC address %s already in use"
8666 " in cluster" % mac,
8667 errors.ECODE_NOTUNIQUE)
8669 # Build nic parameters
8670 link = nic.get(constants.INIC_LINK, None)
8673 nicparams[constants.NIC_MODE] = nic_mode_req
8675 nicparams[constants.NIC_LINK] = link
8677 check_params = cluster.SimpleFillNIC(nicparams)
8678 objects.NIC.CheckParameterSyntax(check_params)
8679 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8681 # disk checks/pre-build
8682 default_vg = self.cfg.GetVGName()
8684 for disk in self.op.disks:
8685 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8686 if mode not in constants.DISK_ACCESS_SET:
8687 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8688 mode, errors.ECODE_INVAL)
8689 size = disk.get(constants.IDISK_SIZE, None)
8691 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8694 except (TypeError, ValueError):
8695 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8698 data_vg = disk.get(constants.IDISK_VG, default_vg)
8700 constants.IDISK_SIZE: size,
8701 constants.IDISK_MODE: mode,
8702 constants.IDISK_VG: data_vg,
8703 constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8705 if constants.IDISK_ADOPT in disk:
8706 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8707 self.disks.append(new_disk)
8709 if self.op.mode == constants.INSTANCE_IMPORT:
8711 # Check that the new instance doesn't have less disks than the export
8712 instance_disks = len(self.disks)
8713 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8714 if instance_disks < export_disks:
8715 raise errors.OpPrereqError("Not enough disks to import."
8716 " (instance: %d, export: %d)" %
8717 (instance_disks, export_disks),
8721 for idx in range(export_disks):
8722 option = "disk%d_dump" % idx
8723 if export_info.has_option(constants.INISECT_INS, option):
8724 # FIXME: are the old os-es, disk sizes, etc. useful?
8725 export_name = export_info.get(constants.INISECT_INS, option)
8726 image = utils.PathJoin(self.op.src_path, export_name)
8727 disk_images.append(image)
8729 disk_images.append(False)
8731 self.src_images = disk_images
8733 old_name = export_info.get(constants.INISECT_INS, "name")
8735 exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8736 except (TypeError, ValueError), err:
8737 raise errors.OpPrereqError("Invalid export file, nic_count is not"
8738 " an integer: %s" % str(err),
8740 if self.op.instance_name == old_name:
8741 for idx, nic in enumerate(self.nics):
8742 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8743 nic_mac_ini = "nic%d_mac" % idx
8744 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8746 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8748 # ip ping checks (we use the same ip that was resolved in ExpandNames)
8749 if self.op.ip_check:
8750 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8751 raise errors.OpPrereqError("IP %s of instance %s already in use" %
8752 (self.check_ip, self.op.instance_name),
8753 errors.ECODE_NOTUNIQUE)
8755 #### mac address generation
8756 # By generating here the mac address both the allocator and the hooks get
8757 # the real final mac address rather than the 'auto' or 'generate' value.
8758 # There is a race condition between the generation and the instance object
8759 # creation, which means that we know the mac is valid now, but we're not
8760 # sure it will be when we actually add the instance. If things go bad
8761 # adding the instance will abort because of a duplicate mac, and the
8762 # creation job will fail.
8763 for nic in self.nics:
8764 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8765 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8769 if self.op.iallocator is not None:
8770 self._RunAllocator()
8772 #### node related checks
8774 # check primary node
8775 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8776 assert self.pnode is not None, \
8777 "Cannot retrieve locked node %s" % self.op.pnode
8779 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8780 pnode.name, errors.ECODE_STATE)
8782 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8783 pnode.name, errors.ECODE_STATE)
8784 if not pnode.vm_capable:
8785 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8786 " '%s'" % pnode.name, errors.ECODE_STATE)
8788 self.secondaries = []
8790 # mirror node verification
8791 if self.op.disk_template in constants.DTS_INT_MIRROR:
8792 if self.op.snode == pnode.name:
8793 raise errors.OpPrereqError("The secondary node cannot be the"
8794 " primary node", errors.ECODE_INVAL)
8795 _CheckNodeOnline(self, self.op.snode)
8796 _CheckNodeNotDrained(self, self.op.snode)
8797 _CheckNodeVmCapable(self, self.op.snode)
8798 self.secondaries.append(self.op.snode)
8800 nodenames = [pnode.name] + self.secondaries
8802 if not self.adopt_disks:
8803 # Check lv size requirements, if not adopting
8804 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8805 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8807 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8808 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8809 disk[constants.IDISK_ADOPT])
8810 for disk in self.disks])
8811 if len(all_lvs) != len(self.disks):
8812 raise errors.OpPrereqError("Duplicate volume names given for adoption",
8814 for lv_name in all_lvs:
8816 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8817 # to ReserveLV uses the same syntax
8818 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8819 except errors.ReservationError:
8820 raise errors.OpPrereqError("LV named %s used by another instance" %
8821 lv_name, errors.ECODE_NOTUNIQUE)
8823 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8824 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8826 node_lvs = self.rpc.call_lv_list([pnode.name],
8827 vg_names.payload.keys())[pnode.name]
8828 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8829 node_lvs = node_lvs.payload
8831 delta = all_lvs.difference(node_lvs.keys())
8833 raise errors.OpPrereqError("Missing logical volume(s): %s" %
8834 utils.CommaJoin(delta),
8836 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8838 raise errors.OpPrereqError("Online logical volumes found, cannot"
8839 " adopt: %s" % utils.CommaJoin(online_lvs),
8841 # update the size of disk based on what is found
8842 for dsk in self.disks:
8843 dsk[constants.IDISK_SIZE] = \
8844 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8845 dsk[constants.IDISK_ADOPT])][0]))
8847 elif self.op.disk_template == constants.DT_BLOCK:
8848 # Normalize and de-duplicate device paths
8849 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8850 for disk in self.disks])
8851 if len(all_disks) != len(self.disks):
8852 raise errors.OpPrereqError("Duplicate disk names given for adoption",
8854 baddisks = [d for d in all_disks
8855 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8857 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8858 " cannot be adopted" %
8859 (", ".join(baddisks),
8860 constants.ADOPTABLE_BLOCKDEV_ROOT),
8863 node_disks = self.rpc.call_bdev_sizes([pnode.name],
8864 list(all_disks))[pnode.name]
8865 node_disks.Raise("Cannot get block device information from node %s" %
8867 node_disks = node_disks.payload
8868 delta = all_disks.difference(node_disks.keys())
8870 raise errors.OpPrereqError("Missing block device(s): %s" %
8871 utils.CommaJoin(delta),
8873 for dsk in self.disks:
8874 dsk[constants.IDISK_SIZE] = \
8875 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8877 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8879 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8880 # check OS parameters (remotely)
8881 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8883 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8885 # memory check on primary node
8887 _CheckNodeFreeMemory(self, self.pnode.name,
8888 "creating instance %s" % self.op.instance_name,
8889 self.be_full[constants.BE_MEMORY],
8892 self.dry_run_result = list(nodenames)
8894 def Exec(self, feedback_fn):
8895 """Create and add the instance to the cluster.
8898 instance = self.op.instance_name
8899 pnode_name = self.pnode.name
8901 ht_kind = self.op.hypervisor
8902 if ht_kind in constants.HTS_REQ_PORT:
8903 network_port = self.cfg.AllocatePort()
8907 disks = _GenerateDiskTemplate(self,
8908 self.op.disk_template,
8909 instance, pnode_name,
8912 self.instance_file_storage_dir,
8913 self.op.file_driver,
8917 iobj = objects.Instance(name=instance, os=self.op.os_type,
8918 primary_node=pnode_name,
8919 nics=self.nics, disks=disks,
8920 disk_template=self.op.disk_template,
8922 network_port=network_port,
8923 beparams=self.op.beparams,
8924 hvparams=self.op.hvparams,
8925 hypervisor=self.op.hypervisor,
8926 osparams=self.op.osparams,
8930 for tag in self.op.tags:
8933 if self.adopt_disks:
8934 if self.op.disk_template == constants.DT_PLAIN:
8935 # rename LVs to the newly-generated names; we need to construct
8936 # 'fake' LV disks with the old data, plus the new unique_id
8937 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8939 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
8940 rename_to.append(t_dsk.logical_id)
8941 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8942 self.cfg.SetDiskID(t_dsk, pnode_name)
8943 result = self.rpc.call_blockdev_rename(pnode_name,
8944 zip(tmp_disks, rename_to))
8945 result.Raise("Failed to rename adoped LVs")
8947 feedback_fn("* creating instance disks...")
8949 _CreateDisks(self, iobj)
8950 except errors.OpExecError:
8951 self.LogWarning("Device creation failed, reverting...")
8953 _RemoveDisks(self, iobj)
8955 self.cfg.ReleaseDRBDMinors(instance)
8958 feedback_fn("adding instance %s to cluster config" % instance)
8960 self.cfg.AddInstance(iobj, self.proc.GetECId())
8962 # Declare that we don't want to remove the instance lock anymore, as we've
8963 # added the instance to the config
8964 del self.remove_locks[locking.LEVEL_INSTANCE]
8966 if self.op.mode == constants.INSTANCE_IMPORT:
8967 # Release unused nodes
8968 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8971 _ReleaseLocks(self, locking.LEVEL_NODE)
8974 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8975 feedback_fn("* wiping instance disks...")
8977 _WipeDisks(self, iobj)
8978 except errors.OpExecError, err:
8979 logging.exception("Wiping disks failed")
8980 self.LogWarning("Wiping instance disks failed (%s)", err)
8984 # Something is already wrong with the disks, don't do anything else
8986 elif self.op.wait_for_sync:
8987 disk_abort = not _WaitForSync(self, iobj)
8988 elif iobj.disk_template in constants.DTS_INT_MIRROR:
8989 # make sure the disks are not degraded (still sync-ing is ok)
8990 feedback_fn("* checking mirrors status")
8991 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8996 _RemoveDisks(self, iobj)
8997 self.cfg.RemoveInstance(iobj.name)
8998 # Make sure the instance lock gets removed
8999 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9000 raise errors.OpExecError("There are some degraded disks for"
9003 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9004 if self.op.mode == constants.INSTANCE_CREATE:
9005 if not self.op.no_install:
9006 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9007 not self.op.wait_for_sync)
9009 feedback_fn("* pausing disk sync to install instance OS")
9010 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9012 for idx, success in enumerate(result.payload):
9014 logging.warn("pause-sync of instance %s for disk %d failed",
9017 feedback_fn("* running the instance OS create scripts...")
9018 # FIXME: pass debug option from opcode to backend
9019 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
9020 self.op.debug_level)
9022 feedback_fn("* resuming disk sync")
9023 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9025 for idx, success in enumerate(result.payload):
9027 logging.warn("resume-sync of instance %s for disk %d failed",
9030 result.Raise("Could not add os for instance %s"
9031 " on node %s" % (instance, pnode_name))
9033 elif self.op.mode == constants.INSTANCE_IMPORT:
9034 feedback_fn("* running the instance OS import scripts...")
9038 for idx, image in enumerate(self.src_images):
9042 # FIXME: pass debug option from opcode to backend
9043 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9044 constants.IEIO_FILE, (image, ),
9045 constants.IEIO_SCRIPT,
9046 (iobj.disks[idx], idx),
9048 transfers.append(dt)
9051 masterd.instance.TransferInstanceData(self, feedback_fn,
9052 self.op.src_node, pnode_name,
9053 self.pnode.secondary_ip,
9055 if not compat.all(import_result):
9056 self.LogWarning("Some disks for instance %s on node %s were not"
9057 " imported successfully" % (instance, pnode_name))
9059 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9060 feedback_fn("* preparing remote import...")
9061 # The source cluster will stop the instance before attempting to make a
9062 # connection. In some cases stopping an instance can take a long time,
9063 # hence the shutdown timeout is added to the connection timeout.
9064 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9065 self.op.source_shutdown_timeout)
9066 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9068 assert iobj.primary_node == self.pnode.name
9070 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9071 self.source_x509_ca,
9072 self._cds, timeouts)
9073 if not compat.all(disk_results):
9074 # TODO: Should the instance still be started, even if some disks
9075 # failed to import (valid for local imports, too)?
9076 self.LogWarning("Some disks for instance %s on node %s were not"
9077 " imported successfully" % (instance, pnode_name))
9079 # Run rename script on newly imported instance
9080 assert iobj.name == instance
9081 feedback_fn("Running rename script for %s" % instance)
9082 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9083 self.source_instance_name,
9084 self.op.debug_level)
9086 self.LogWarning("Failed to run rename script for %s on node"
9087 " %s: %s" % (instance, pnode_name, result.fail_msg))
9090 # also checked in the prereq part
9091 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9095 iobj.admin_up = True
9096 self.cfg.Update(iobj, feedback_fn)
9097 logging.info("Starting instance %s on node %s", instance, pnode_name)
9098 feedback_fn("* starting instance...")
9099 result = self.rpc.call_instance_start(pnode_name, iobj,
9101 result.Raise("Could not start instance")
9103 return list(iobj.all_nodes)
9106 class LUInstanceConsole(NoHooksLU):
9107 """Connect to an instance's console.
9109 This is somewhat special in that it returns the command line that
9110 you need to run on the master node in order to connect to the
9116 def ExpandNames(self):
9117 self._ExpandAndLockInstance()
9119 def CheckPrereq(self):
9120 """Check prerequisites.
9122 This checks that the instance is in the cluster.
9125 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9126 assert self.instance is not None, \
9127 "Cannot retrieve locked instance %s" % self.op.instance_name
9128 _CheckNodeOnline(self, self.instance.primary_node)
9130 def Exec(self, feedback_fn):
9131 """Connect to the console of an instance
9134 instance = self.instance
9135 node = instance.primary_node
9137 node_insts = self.rpc.call_instance_list([node],
9138 [instance.hypervisor])[node]
9139 node_insts.Raise("Can't get node information from %s" % node)
9141 if instance.name not in node_insts.payload:
9142 if instance.admin_up:
9143 state = constants.INSTST_ERRORDOWN
9145 state = constants.INSTST_ADMINDOWN
9146 raise errors.OpExecError("Instance %s is not running (state %s)" %
9147 (instance.name, state))
9149 logging.debug("Connecting to console of %s on %s", instance.name, node)
9151 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9154 def _GetInstanceConsole(cluster, instance):
9155 """Returns console information for an instance.
9157 @type cluster: L{objects.Cluster}
9158 @type instance: L{objects.Instance}
9162 hyper = hypervisor.GetHypervisor(instance.hypervisor)
9163 # beparams and hvparams are passed separately, to avoid editing the
9164 # instance and then saving the defaults in the instance itself.
9165 hvparams = cluster.FillHV(instance)
9166 beparams = cluster.FillBE(instance)
9167 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9169 assert console.instance == instance.name
9170 assert console.Validate()
9172 return console.ToDict()
9175 class LUInstanceReplaceDisks(LogicalUnit):
9176 """Replace the disks of an instance.
9179 HPATH = "mirrors-replace"
9180 HTYPE = constants.HTYPE_INSTANCE
9183 def CheckArguments(self):
9184 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9187 def ExpandNames(self):
9188 self._ExpandAndLockInstance()
9190 assert locking.LEVEL_NODE not in self.needed_locks
9191 assert locking.LEVEL_NODEGROUP not in self.needed_locks
9193 assert self.op.iallocator is None or self.op.remote_node is None, \
9194 "Conflicting options"
9196 if self.op.remote_node is not None:
9197 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9199 # Warning: do not remove the locking of the new secondary here
9200 # unless DRBD8.AddChildren is changed to work in parallel;
9201 # currently it doesn't since parallel invocations of
9202 # FindUnusedMinor will conflict
9203 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9204 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9206 self.needed_locks[locking.LEVEL_NODE] = []
9207 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9209 if self.op.iallocator is not None:
9210 # iallocator will select a new node in the same group
9211 self.needed_locks[locking.LEVEL_NODEGROUP] = []
9213 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9214 self.op.iallocator, self.op.remote_node,
9215 self.op.disks, False, self.op.early_release)
9217 self.tasklets = [self.replacer]
9219 def DeclareLocks(self, level):
9220 if level == locking.LEVEL_NODEGROUP:
9221 assert self.op.remote_node is None
9222 assert self.op.iallocator is not None
9223 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9225 self.share_locks[locking.LEVEL_NODEGROUP] = 1
9226 self.needed_locks[locking.LEVEL_NODEGROUP] = \
9227 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9229 elif level == locking.LEVEL_NODE:
9230 if self.op.iallocator is not None:
9231 assert self.op.remote_node is None
9232 assert not self.needed_locks[locking.LEVEL_NODE]
9234 # Lock member nodes of all locked groups
9235 self.needed_locks[locking.LEVEL_NODE] = [node_name
9236 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9237 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9239 self._LockInstancesNodes()
9241 def BuildHooksEnv(self):
9244 This runs on the master, the primary and all the secondaries.
9247 instance = self.replacer.instance
9249 "MODE": self.op.mode,
9250 "NEW_SECONDARY": self.op.remote_node,
9251 "OLD_SECONDARY": instance.secondary_nodes[0],
9253 env.update(_BuildInstanceHookEnvByObject(self, instance))
9256 def BuildHooksNodes(self):
9257 """Build hooks nodes.
9260 instance = self.replacer.instance
9262 self.cfg.GetMasterNode(),
9263 instance.primary_node,
9265 if self.op.remote_node is not None:
9266 nl.append(self.op.remote_node)
9269 def CheckPrereq(self):
9270 """Check prerequisites.
9273 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9274 self.op.iallocator is None)
9276 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9278 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9280 return LogicalUnit.CheckPrereq(self)
9283 class TLReplaceDisks(Tasklet):
9284 """Replaces disks for an instance.
9286 Note: Locking is not within the scope of this class.
9289 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9290 disks, delay_iallocator, early_release):
9291 """Initializes this class.
9294 Tasklet.__init__(self, lu)
9297 self.instance_name = instance_name
9299 self.iallocator_name = iallocator_name
9300 self.remote_node = remote_node
9302 self.delay_iallocator = delay_iallocator
9303 self.early_release = early_release
9306 self.instance = None
9307 self.new_node = None
9308 self.target_node = None
9309 self.other_node = None
9310 self.remote_node_info = None
9311 self.node_secondary_ip = None
9314 def CheckArguments(mode, remote_node, iallocator):
9315 """Helper function for users of this class.
9318 # check for valid parameter combination
9319 if mode == constants.REPLACE_DISK_CHG:
9320 if remote_node is None and iallocator is None:
9321 raise errors.OpPrereqError("When changing the secondary either an"
9322 " iallocator script must be used or the"
9323 " new node given", errors.ECODE_INVAL)
9325 if remote_node is not None and iallocator is not None:
9326 raise errors.OpPrereqError("Give either the iallocator or the new"
9327 " secondary, not both", errors.ECODE_INVAL)
9329 elif remote_node is not None or iallocator is not None:
9330 # Not replacing the secondary
9331 raise errors.OpPrereqError("The iallocator and new node options can"
9332 " only be used when changing the"
9333 " secondary node", errors.ECODE_INVAL)
9336 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9337 """Compute a new secondary node using an IAllocator.
9340 ial = IAllocator(lu.cfg, lu.rpc,
9341 mode=constants.IALLOCATOR_MODE_RELOC,
9343 relocate_from=list(relocate_from))
9345 ial.Run(iallocator_name)
9348 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9349 " %s" % (iallocator_name, ial.info),
9352 if len(ial.result) != ial.required_nodes:
9353 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9354 " of nodes (%s), required %s" %
9356 len(ial.result), ial.required_nodes),
9359 remote_node_name = ial.result[0]
9361 lu.LogInfo("Selected new secondary for instance '%s': %s",
9362 instance_name, remote_node_name)
9364 return remote_node_name
9366 def _FindFaultyDisks(self, node_name):
9367 """Wrapper for L{_FindFaultyInstanceDisks}.
9370 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9373 def _CheckDisksActivated(self, instance):
9374 """Checks if the instance disks are activated.
9376 @param instance: The instance to check disks
9377 @return: True if they are activated, False otherwise
9380 nodes = instance.all_nodes
9382 for idx, dev in enumerate(instance.disks):
9384 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9385 self.cfg.SetDiskID(dev, node)
9387 result = self.rpc.call_blockdev_find(node, dev)
9391 elif result.fail_msg or not result.payload:
9396 def CheckPrereq(self):
9397 """Check prerequisites.
9399 This checks that the instance is in the cluster.
9402 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9403 assert instance is not None, \
9404 "Cannot retrieve locked instance %s" % self.instance_name
9406 if instance.disk_template != constants.DT_DRBD8:
9407 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9408 " instances", errors.ECODE_INVAL)
9410 if len(instance.secondary_nodes) != 1:
9411 raise errors.OpPrereqError("The instance has a strange layout,"
9412 " expected one secondary but found %d" %
9413 len(instance.secondary_nodes),
9416 if not self.delay_iallocator:
9417 self._CheckPrereq2()
9419 def _CheckPrereq2(self):
9420 """Check prerequisites, second part.
9422 This function should always be part of CheckPrereq. It was separated and is
9423 now called from Exec because during node evacuation iallocator was only
9424 called with an unmodified cluster model, not taking planned changes into
9428 instance = self.instance
9429 secondary_node = instance.secondary_nodes[0]
9431 if self.iallocator_name is None:
9432 remote_node = self.remote_node
9434 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9435 instance.name, instance.secondary_nodes)
9437 if remote_node is None:
9438 self.remote_node_info = None
9440 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9441 "Remote node '%s' is not locked" % remote_node
9443 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9444 assert self.remote_node_info is not None, \
9445 "Cannot retrieve locked node %s" % remote_node
9447 if remote_node == self.instance.primary_node:
9448 raise errors.OpPrereqError("The specified node is the primary node of"
9449 " the instance", errors.ECODE_INVAL)
9451 if remote_node == secondary_node:
9452 raise errors.OpPrereqError("The specified node is already the"
9453 " secondary node of the instance",
9456 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9457 constants.REPLACE_DISK_CHG):
9458 raise errors.OpPrereqError("Cannot specify disks to be replaced",
9461 if self.mode == constants.REPLACE_DISK_AUTO:
9462 if not self._CheckDisksActivated(instance):
9463 raise errors.OpPrereqError("Please run activate-disks on instance %s"
9464 " first" % self.instance_name,
9466 faulty_primary = self._FindFaultyDisks(instance.primary_node)
9467 faulty_secondary = self._FindFaultyDisks(secondary_node)
9469 if faulty_primary and faulty_secondary:
9470 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9471 " one node and can not be repaired"
9472 " automatically" % self.instance_name,
9476 self.disks = faulty_primary
9477 self.target_node = instance.primary_node
9478 self.other_node = secondary_node
9479 check_nodes = [self.target_node, self.other_node]
9480 elif faulty_secondary:
9481 self.disks = faulty_secondary
9482 self.target_node = secondary_node
9483 self.other_node = instance.primary_node
9484 check_nodes = [self.target_node, self.other_node]
9490 # Non-automatic modes
9491 if self.mode == constants.REPLACE_DISK_PRI:
9492 self.target_node = instance.primary_node
9493 self.other_node = secondary_node
9494 check_nodes = [self.target_node, self.other_node]
9496 elif self.mode == constants.REPLACE_DISK_SEC:
9497 self.target_node = secondary_node
9498 self.other_node = instance.primary_node
9499 check_nodes = [self.target_node, self.other_node]
9501 elif self.mode == constants.REPLACE_DISK_CHG:
9502 self.new_node = remote_node
9503 self.other_node = instance.primary_node
9504 self.target_node = secondary_node
9505 check_nodes = [self.new_node, self.other_node]
9507 _CheckNodeNotDrained(self.lu, remote_node)
9508 _CheckNodeVmCapable(self.lu, remote_node)
9510 old_node_info = self.cfg.GetNodeInfo(secondary_node)
9511 assert old_node_info is not None
9512 if old_node_info.offline and not self.early_release:
9513 # doesn't make sense to delay the release
9514 self.early_release = True
9515 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9516 " early-release mode", secondary_node)
9519 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9522 # If not specified all disks should be replaced
9524 self.disks = range(len(self.instance.disks))
9526 for node in check_nodes:
9527 _CheckNodeOnline(self.lu, node)
9529 touched_nodes = frozenset(node_name for node_name in [self.new_node,
9532 if node_name is not None)
9534 # Release unneeded node locks
9535 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9537 # Release any owned node group
9538 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9539 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9541 # Check whether disks are valid
9542 for disk_idx in self.disks:
9543 instance.FindDisk(disk_idx)
9545 # Get secondary node IP addresses
9546 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9547 in self.cfg.GetMultiNodeInfo(touched_nodes))
9549 def Exec(self, feedback_fn):
9550 """Execute disk replacement.
9552 This dispatches the disk replacement to the appropriate handler.
9555 if self.delay_iallocator:
9556 self._CheckPrereq2()
9559 # Verify owned locks before starting operation
9560 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9561 assert set(owned_nodes) == set(self.node_secondary_ip), \
9562 ("Incorrect node locks, owning %s, expected %s" %
9563 (owned_nodes, self.node_secondary_ip.keys()))
9565 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9566 assert list(owned_instances) == [self.instance_name], \
9567 "Instance '%s' not locked" % self.instance_name
9569 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9570 "Should not own any node group lock at this point"
9573 feedback_fn("No disks need replacement")
9576 feedback_fn("Replacing disk(s) %s for %s" %
9577 (utils.CommaJoin(self.disks), self.instance.name))
9579 activate_disks = (not self.instance.admin_up)
9581 # Activate the instance disks if we're replacing them on a down instance
9583 _StartInstanceDisks(self.lu, self.instance, True)
9586 # Should we replace the secondary node?
9587 if self.new_node is not None:
9588 fn = self._ExecDrbd8Secondary
9590 fn = self._ExecDrbd8DiskOnly
9592 result = fn(feedback_fn)
9594 # Deactivate the instance disks if we're replacing them on a
9597 _SafeShutdownInstanceDisks(self.lu, self.instance)
9600 # Verify owned locks
9601 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9602 nodes = frozenset(self.node_secondary_ip)
9603 assert ((self.early_release and not owned_nodes) or
9604 (not self.early_release and not (set(owned_nodes) - nodes))), \
9605 ("Not owning the correct locks, early_release=%s, owned=%r,"
9606 " nodes=%r" % (self.early_release, owned_nodes, nodes))
9610 def _CheckVolumeGroup(self, nodes):
9611 self.lu.LogInfo("Checking volume groups")
9613 vgname = self.cfg.GetVGName()
9615 # Make sure volume group exists on all involved nodes
9616 results = self.rpc.call_vg_list(nodes)
9618 raise errors.OpExecError("Can't list volume groups on the nodes")
9622 res.Raise("Error checking node %s" % node)
9623 if vgname not in res.payload:
9624 raise errors.OpExecError("Volume group '%s' not found on node %s" %
9627 def _CheckDisksExistence(self, nodes):
9628 # Check disk existence
9629 for idx, dev in enumerate(self.instance.disks):
9630 if idx not in self.disks:
9634 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9635 self.cfg.SetDiskID(dev, node)
9637 result = self.rpc.call_blockdev_find(node, dev)
9639 msg = result.fail_msg
9640 if msg or not result.payload:
9642 msg = "disk not found"
9643 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9646 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9647 for idx, dev in enumerate(self.instance.disks):
9648 if idx not in self.disks:
9651 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9654 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9656 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9657 " replace disks for instance %s" %
9658 (node_name, self.instance.name))
9660 def _CreateNewStorage(self, node_name):
9661 """Create new storage on the primary or secondary node.
9663 This is only used for same-node replaces, not for changing the
9664 secondary node, hence we don't want to modify the existing disk.
9669 for idx, dev in enumerate(self.instance.disks):
9670 if idx not in self.disks:
9673 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9675 self.cfg.SetDiskID(dev, node_name)
9677 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9678 names = _GenerateUniqueNames(self.lu, lv_names)
9680 vg_data = dev.children[0].logical_id[0]
9681 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9682 logical_id=(vg_data, names[0]))
9683 vg_meta = dev.children[1].logical_id[0]
9684 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9685 logical_id=(vg_meta, names[1]))
9687 new_lvs = [lv_data, lv_meta]
9688 old_lvs = [child.Copy() for child in dev.children]
9689 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9691 # we pass force_create=True to force the LVM creation
9692 for new_lv in new_lvs:
9693 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9694 _GetInstanceInfoText(self.instance), False)
9698 def _CheckDevices(self, node_name, iv_names):
9699 for name, (dev, _, _) in iv_names.iteritems():
9700 self.cfg.SetDiskID(dev, node_name)
9702 result = self.rpc.call_blockdev_find(node_name, dev)
9704 msg = result.fail_msg
9705 if msg or not result.payload:
9707 msg = "disk not found"
9708 raise errors.OpExecError("Can't find DRBD device %s: %s" %
9711 if result.payload.is_degraded:
9712 raise errors.OpExecError("DRBD device %s is degraded!" % name)
9714 def _RemoveOldStorage(self, node_name, iv_names):
9715 for name, (_, old_lvs, _) in iv_names.iteritems():
9716 self.lu.LogInfo("Remove logical volumes for %s" % name)
9719 self.cfg.SetDiskID(lv, node_name)
9721 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9723 self.lu.LogWarning("Can't remove old LV: %s" % msg,
9724 hint="remove unused LVs manually")
9726 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9727 """Replace a disk on the primary or secondary for DRBD 8.
9729 The algorithm for replace is quite complicated:
9731 1. for each disk to be replaced:
9733 1. create new LVs on the target node with unique names
9734 1. detach old LVs from the drbd device
9735 1. rename old LVs to name_replaced.<time_t>
9736 1. rename new LVs to old LVs
9737 1. attach the new LVs (with the old names now) to the drbd device
9739 1. wait for sync across all devices
9741 1. for each modified disk:
9743 1. remove old LVs (which have the name name_replaces.<time_t>)
9745 Failures are not very well handled.
9750 # Step: check device activation
9751 self.lu.LogStep(1, steps_total, "Check device existence")
9752 self._CheckDisksExistence([self.other_node, self.target_node])
9753 self._CheckVolumeGroup([self.target_node, self.other_node])
9755 # Step: check other node consistency
9756 self.lu.LogStep(2, steps_total, "Check peer consistency")
9757 self._CheckDisksConsistency(self.other_node,
9758 self.other_node == self.instance.primary_node,
9761 # Step: create new storage
9762 self.lu.LogStep(3, steps_total, "Allocate new storage")
9763 iv_names = self._CreateNewStorage(self.target_node)
9765 # Step: for each lv, detach+rename*2+attach
9766 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9767 for dev, old_lvs, new_lvs in iv_names.itervalues():
9768 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9770 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9772 result.Raise("Can't detach drbd from local storage on node"
9773 " %s for device %s" % (self.target_node, dev.iv_name))
9775 #cfg.Update(instance)
9777 # ok, we created the new LVs, so now we know we have the needed
9778 # storage; as such, we proceed on the target node to rename
9779 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9780 # using the assumption that logical_id == physical_id (which in
9781 # turn is the unique_id on that node)
9783 # FIXME(iustin): use a better name for the replaced LVs
9784 temp_suffix = int(time.time())
9785 ren_fn = lambda d, suff: (d.physical_id[0],
9786 d.physical_id[1] + "_replaced-%s" % suff)
9788 # Build the rename list based on what LVs exist on the node
9789 rename_old_to_new = []
9790 for to_ren in old_lvs:
9791 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9792 if not result.fail_msg and result.payload:
9794 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9796 self.lu.LogInfo("Renaming the old LVs on the target node")
9797 result = self.rpc.call_blockdev_rename(self.target_node,
9799 result.Raise("Can't rename old LVs on node %s" % self.target_node)
9801 # Now we rename the new LVs to the old LVs
9802 self.lu.LogInfo("Renaming the new LVs on the target node")
9803 rename_new_to_old = [(new, old.physical_id)
9804 for old, new in zip(old_lvs, new_lvs)]
9805 result = self.rpc.call_blockdev_rename(self.target_node,
9807 result.Raise("Can't rename new LVs on node %s" % self.target_node)
9809 # Intermediate steps of in memory modifications
9810 for old, new in zip(old_lvs, new_lvs):
9811 new.logical_id = old.logical_id
9812 self.cfg.SetDiskID(new, self.target_node)
9814 # We need to modify old_lvs so that removal later removes the
9815 # right LVs, not the newly added ones; note that old_lvs is a
9817 for disk in old_lvs:
9818 disk.logical_id = ren_fn(disk, temp_suffix)
9819 self.cfg.SetDiskID(disk, self.target_node)
9821 # Now that the new lvs have the old name, we can add them to the device
9822 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9823 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9825 msg = result.fail_msg
9827 for new_lv in new_lvs:
9828 msg2 = self.rpc.call_blockdev_remove(self.target_node,
9831 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9832 hint=("cleanup manually the unused logical"
9834 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9837 if self.early_release:
9838 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9840 self._RemoveOldStorage(self.target_node, iv_names)
9841 # WARNING: we release both node locks here, do not do other RPCs
9842 # than WaitForSync to the primary node
9843 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9844 names=[self.target_node, self.other_node])
9847 # This can fail as the old devices are degraded and _WaitForSync
9848 # does a combined result over all disks, so we don't check its return value
9849 self.lu.LogStep(cstep, steps_total, "Sync devices")
9851 _WaitForSync(self.lu, self.instance)
9853 # Check all devices manually
9854 self._CheckDevices(self.instance.primary_node, iv_names)
9856 # Step: remove old storage
9857 if not self.early_release:
9858 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9860 self._RemoveOldStorage(self.target_node, iv_names)
9862 def _ExecDrbd8Secondary(self, feedback_fn):
9863 """Replace the secondary node for DRBD 8.
9865 The algorithm for replace is quite complicated:
9866 - for all disks of the instance:
9867 - create new LVs on the new node with same names
9868 - shutdown the drbd device on the old secondary
9869 - disconnect the drbd network on the primary
9870 - create the drbd device on the new secondary
9871 - network attach the drbd on the primary, using an artifice:
9872 the drbd code for Attach() will connect to the network if it
9873 finds a device which is connected to the good local disks but
9875 - wait for sync across all devices
9876 - remove all disks from the old secondary
9878 Failures are not very well handled.
9883 pnode = self.instance.primary_node
9885 # Step: check device activation
9886 self.lu.LogStep(1, steps_total, "Check device existence")
9887 self._CheckDisksExistence([self.instance.primary_node])
9888 self._CheckVolumeGroup([self.instance.primary_node])
9890 # Step: check other node consistency
9891 self.lu.LogStep(2, steps_total, "Check peer consistency")
9892 self._CheckDisksConsistency(self.instance.primary_node, True, True)
9894 # Step: create new storage
9895 self.lu.LogStep(3, steps_total, "Allocate new storage")
9896 for idx, dev in enumerate(self.instance.disks):
9897 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9898 (self.new_node, idx))
9899 # we pass force_create=True to force LVM creation
9900 for new_lv in dev.children:
9901 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9902 _GetInstanceInfoText(self.instance), False)
9904 # Step 4: dbrd minors and drbd setups changes
9905 # after this, we must manually remove the drbd minors on both the
9906 # error and the success paths
9907 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9908 minors = self.cfg.AllocateDRBDMinor([self.new_node
9909 for dev in self.instance.disks],
9911 logging.debug("Allocated minors %r", minors)
9914 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9915 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9916 (self.new_node, idx))
9917 # create new devices on new_node; note that we create two IDs:
9918 # one without port, so the drbd will be activated without
9919 # networking information on the new node at this stage, and one
9920 # with network, for the latter activation in step 4
9921 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9922 if self.instance.primary_node == o_node1:
9925 assert self.instance.primary_node == o_node2, "Three-node instance?"
9928 new_alone_id = (self.instance.primary_node, self.new_node, None,
9929 p_minor, new_minor, o_secret)
9930 new_net_id = (self.instance.primary_node, self.new_node, o_port,
9931 p_minor, new_minor, o_secret)
9933 iv_names[idx] = (dev, dev.children, new_net_id)
9934 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9936 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9937 logical_id=new_alone_id,
9938 children=dev.children,
9941 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9942 _GetInstanceInfoText(self.instance), False)
9943 except errors.GenericError:
9944 self.cfg.ReleaseDRBDMinors(self.instance.name)
9947 # We have new devices, shutdown the drbd on the old secondary
9948 for idx, dev in enumerate(self.instance.disks):
9949 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9950 self.cfg.SetDiskID(dev, self.target_node)
9951 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9953 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9954 "node: %s" % (idx, msg),
9955 hint=("Please cleanup this device manually as"
9956 " soon as possible"))
9958 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9959 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
9960 self.instance.disks)[pnode]
9962 msg = result.fail_msg
9964 # detaches didn't succeed (unlikely)
9965 self.cfg.ReleaseDRBDMinors(self.instance.name)
9966 raise errors.OpExecError("Can't detach the disks from the network on"
9967 " old node: %s" % (msg,))
9969 # if we managed to detach at least one, we update all the disks of
9970 # the instance to point to the new secondary
9971 self.lu.LogInfo("Updating instance configuration")
9972 for dev, _, new_logical_id in iv_names.itervalues():
9973 dev.logical_id = new_logical_id
9974 self.cfg.SetDiskID(dev, self.instance.primary_node)
9976 self.cfg.Update(self.instance, feedback_fn)
9978 # and now perform the drbd attach
9979 self.lu.LogInfo("Attaching primary drbds to new secondary"
9980 " (standalone => connected)")
9981 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9983 self.node_secondary_ip,
9984 self.instance.disks,
9987 for to_node, to_result in result.items():
9988 msg = to_result.fail_msg
9990 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9992 hint=("please do a gnt-instance info to see the"
9993 " status of disks"))
9995 if self.early_release:
9996 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9998 self._RemoveOldStorage(self.target_node, iv_names)
9999 # WARNING: we release all node locks here, do not do other RPCs
10000 # than WaitForSync to the primary node
10001 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10002 names=[self.instance.primary_node,
10007 # This can fail as the old devices are degraded and _WaitForSync
10008 # does a combined result over all disks, so we don't check its return value
10009 self.lu.LogStep(cstep, steps_total, "Sync devices")
10011 _WaitForSync(self.lu, self.instance)
10013 # Check all devices manually
10014 self._CheckDevices(self.instance.primary_node, iv_names)
10016 # Step: remove old storage
10017 if not self.early_release:
10018 self.lu.LogStep(cstep, steps_total, "Removing old storage")
10019 self._RemoveOldStorage(self.target_node, iv_names)
10022 class LURepairNodeStorage(NoHooksLU):
10023 """Repairs the volume group on a node.
10028 def CheckArguments(self):
10029 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10031 storage_type = self.op.storage_type
10033 if (constants.SO_FIX_CONSISTENCY not in
10034 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10035 raise errors.OpPrereqError("Storage units of type '%s' can not be"
10036 " repaired" % storage_type,
10037 errors.ECODE_INVAL)
10039 def ExpandNames(self):
10040 self.needed_locks = {
10041 locking.LEVEL_NODE: [self.op.node_name],
10044 def _CheckFaultyDisks(self, instance, node_name):
10045 """Ensure faulty disks abort the opcode or at least warn."""
10047 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10049 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10050 " node '%s'" % (instance.name, node_name),
10051 errors.ECODE_STATE)
10052 except errors.OpPrereqError, err:
10053 if self.op.ignore_consistency:
10054 self.proc.LogWarning(str(err.args[0]))
10058 def CheckPrereq(self):
10059 """Check prerequisites.
10062 # Check whether any instance on this node has faulty disks
10063 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10064 if not inst.admin_up:
10066 check_nodes = set(inst.all_nodes)
10067 check_nodes.discard(self.op.node_name)
10068 for inst_node_name in check_nodes:
10069 self._CheckFaultyDisks(inst, inst_node_name)
10071 def Exec(self, feedback_fn):
10072 feedback_fn("Repairing storage unit '%s' on %s ..." %
10073 (self.op.name, self.op.node_name))
10075 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10076 result = self.rpc.call_storage_execute(self.op.node_name,
10077 self.op.storage_type, st_args,
10079 constants.SO_FIX_CONSISTENCY)
10080 result.Raise("Failed to repair storage unit '%s' on %s" %
10081 (self.op.name, self.op.node_name))
10084 class LUNodeEvacuate(NoHooksLU):
10085 """Evacuates instances off a list of nodes.
10090 def CheckArguments(self):
10091 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10093 def ExpandNames(self):
10094 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10096 if self.op.remote_node is not None:
10097 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10098 assert self.op.remote_node
10100 if self.op.remote_node == self.op.node_name:
10101 raise errors.OpPrereqError("Can not use evacuated node as a new"
10102 " secondary node", errors.ECODE_INVAL)
10104 if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10105 raise errors.OpPrereqError("Without the use of an iallocator only"
10106 " secondary instances can be evacuated",
10107 errors.ECODE_INVAL)
10110 self.share_locks = _ShareAll()
10111 self.needed_locks = {
10112 locking.LEVEL_INSTANCE: [],
10113 locking.LEVEL_NODEGROUP: [],
10114 locking.LEVEL_NODE: [],
10117 if self.op.remote_node is None:
10118 # Iallocator will choose any node(s) in the same group
10119 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10121 group_nodes = frozenset([self.op.remote_node])
10123 # Determine nodes to be locked
10124 self.lock_nodes = set([self.op.node_name]) | group_nodes
10126 def _DetermineInstances(self):
10127 """Builds list of instances to operate on.
10130 assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10132 if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10133 # Primary instances only
10134 inst_fn = _GetNodePrimaryInstances
10135 assert self.op.remote_node is None, \
10136 "Evacuating primary instances requires iallocator"
10137 elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10138 # Secondary instances only
10139 inst_fn = _GetNodeSecondaryInstances
10142 assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10143 inst_fn = _GetNodeInstances
10145 return inst_fn(self.cfg, self.op.node_name)
10147 def DeclareLocks(self, level):
10148 if level == locking.LEVEL_INSTANCE:
10149 # Lock instances optimistically, needs verification once node and group
10150 # locks have been acquired
10151 self.needed_locks[locking.LEVEL_INSTANCE] = \
10152 set(i.name for i in self._DetermineInstances())
10154 elif level == locking.LEVEL_NODEGROUP:
10155 # Lock node groups optimistically, needs verification once nodes have
10157 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10158 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10160 elif level == locking.LEVEL_NODE:
10161 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10163 def CheckPrereq(self):
10165 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10166 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10167 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10169 assert owned_nodes == self.lock_nodes
10171 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10172 if owned_groups != wanted_groups:
10173 raise errors.OpExecError("Node groups changed since locks were acquired,"
10174 " current groups are '%s', used to be '%s'" %
10175 (utils.CommaJoin(wanted_groups),
10176 utils.CommaJoin(owned_groups)))
10178 # Determine affected instances
10179 self.instances = self._DetermineInstances()
10180 self.instance_names = [i.name for i in self.instances]
10182 if set(self.instance_names) != owned_instances:
10183 raise errors.OpExecError("Instances on node '%s' changed since locks"
10184 " were acquired, current instances are '%s',"
10185 " used to be '%s'" %
10186 (self.op.node_name,
10187 utils.CommaJoin(self.instance_names),
10188 utils.CommaJoin(owned_instances)))
10190 if self.instance_names:
10191 self.LogInfo("Evacuating instances from node '%s': %s",
10193 utils.CommaJoin(utils.NiceSort(self.instance_names)))
10195 self.LogInfo("No instances to evacuate from node '%s'",
10198 if self.op.remote_node is not None:
10199 for i in self.instances:
10200 if i.primary_node == self.op.remote_node:
10201 raise errors.OpPrereqError("Node %s is the primary node of"
10202 " instance %s, cannot use it as"
10204 (self.op.remote_node, i.name),
10205 errors.ECODE_INVAL)
10207 def Exec(self, feedback_fn):
10208 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10210 if not self.instance_names:
10211 # No instances to evacuate
10214 elif self.op.iallocator is not None:
10215 # TODO: Implement relocation to other group
10216 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10217 evac_mode=self.op.mode,
10218 instances=list(self.instance_names))
10220 ial.Run(self.op.iallocator)
10222 if not ial.success:
10223 raise errors.OpPrereqError("Can't compute node evacuation using"
10224 " iallocator '%s': %s" %
10225 (self.op.iallocator, ial.info),
10226 errors.ECODE_NORES)
10228 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10230 elif self.op.remote_node is not None:
10231 assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10233 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10234 remote_node=self.op.remote_node,
10236 mode=constants.REPLACE_DISK_CHG,
10237 early_release=self.op.early_release)]
10238 for instance_name in self.instance_names
10242 raise errors.ProgrammerError("No iallocator or remote node")
10244 return ResultWithJobs(jobs)
10247 def _SetOpEarlyRelease(early_release, op):
10248 """Sets C{early_release} flag on opcodes if available.
10252 op.early_release = early_release
10253 except AttributeError:
10254 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10259 def _NodeEvacDest(use_nodes, group, nodes):
10260 """Returns group or nodes depending on caller's choice.
10264 return utils.CommaJoin(nodes)
10269 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10270 """Unpacks the result of change-group and node-evacuate iallocator requests.
10272 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10273 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10275 @type lu: L{LogicalUnit}
10276 @param lu: Logical unit instance
10277 @type alloc_result: tuple/list
10278 @param alloc_result: Result from iallocator
10279 @type early_release: bool
10280 @param early_release: Whether to release locks early if possible
10281 @type use_nodes: bool
10282 @param use_nodes: Whether to display node names instead of groups
10285 (moved, failed, jobs) = alloc_result
10288 lu.LogWarning("Unable to evacuate instances %s",
10289 utils.CommaJoin("%s (%s)" % (name, reason)
10290 for (name, reason) in failed))
10293 lu.LogInfo("Instances to be moved: %s",
10294 utils.CommaJoin("%s (to %s)" %
10295 (name, _NodeEvacDest(use_nodes, group, nodes))
10296 for (name, group, nodes) in moved))
10298 return [map(compat.partial(_SetOpEarlyRelease, early_release),
10299 map(opcodes.OpCode.LoadOpCode, ops))
10303 class LUInstanceGrowDisk(LogicalUnit):
10304 """Grow a disk of an instance.
10307 HPATH = "disk-grow"
10308 HTYPE = constants.HTYPE_INSTANCE
10311 def ExpandNames(self):
10312 self._ExpandAndLockInstance()
10313 self.needed_locks[locking.LEVEL_NODE] = []
10314 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10316 def DeclareLocks(self, level):
10317 if level == locking.LEVEL_NODE:
10318 self._LockInstancesNodes()
10320 def BuildHooksEnv(self):
10321 """Build hooks env.
10323 This runs on the master, the primary and all the secondaries.
10327 "DISK": self.op.disk,
10328 "AMOUNT": self.op.amount,
10330 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10333 def BuildHooksNodes(self):
10334 """Build hooks nodes.
10337 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10340 def CheckPrereq(self):
10341 """Check prerequisites.
10343 This checks that the instance is in the cluster.
10346 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10347 assert instance is not None, \
10348 "Cannot retrieve locked instance %s" % self.op.instance_name
10349 nodenames = list(instance.all_nodes)
10350 for node in nodenames:
10351 _CheckNodeOnline(self, node)
10353 self.instance = instance
10355 if instance.disk_template not in constants.DTS_GROWABLE:
10356 raise errors.OpPrereqError("Instance's disk layout does not support"
10357 " growing", errors.ECODE_INVAL)
10359 self.disk = instance.FindDisk(self.op.disk)
10361 if instance.disk_template not in (constants.DT_FILE,
10362 constants.DT_SHARED_FILE):
10363 # TODO: check the free disk space for file, when that feature will be
10365 _CheckNodesFreeDiskPerVG(self, nodenames,
10366 self.disk.ComputeGrowth(self.op.amount))
10368 def Exec(self, feedback_fn):
10369 """Execute disk grow.
10372 instance = self.instance
10375 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10377 raise errors.OpExecError("Cannot activate block device to grow")
10379 # First run all grow ops in dry-run mode
10380 for node in instance.all_nodes:
10381 self.cfg.SetDiskID(disk, node)
10382 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10383 result.Raise("Grow request failed to node %s" % node)
10385 # We know that (as far as we can test) operations across different
10386 # nodes will succeed, time to run it for real
10387 for node in instance.all_nodes:
10388 self.cfg.SetDiskID(disk, node)
10389 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10390 result.Raise("Grow request failed to node %s" % node)
10392 # TODO: Rewrite code to work properly
10393 # DRBD goes into sync mode for a short amount of time after executing the
10394 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10395 # calling "resize" in sync mode fails. Sleeping for a short amount of
10396 # time is a work-around.
10399 disk.RecordGrow(self.op.amount)
10400 self.cfg.Update(instance, feedback_fn)
10401 if self.op.wait_for_sync:
10402 disk_abort = not _WaitForSync(self, instance, disks=[disk])
10404 self.proc.LogWarning("Disk sync-ing has not returned a good"
10405 " status; please check the instance")
10406 if not instance.admin_up:
10407 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10408 elif not instance.admin_up:
10409 self.proc.LogWarning("Not shutting down the disk even if the instance is"
10410 " not supposed to be running because no wait for"
10411 " sync mode was requested")
10414 class LUInstanceQueryData(NoHooksLU):
10415 """Query runtime instance data.
10420 def ExpandNames(self):
10421 self.needed_locks = {}
10423 # Use locking if requested or when non-static information is wanted
10424 if not (self.op.static or self.op.use_locking):
10425 self.LogWarning("Non-static data requested, locks need to be acquired")
10426 self.op.use_locking = True
10428 if self.op.instances or not self.op.use_locking:
10429 # Expand instance names right here
10430 self.wanted_names = _GetWantedInstances(self, self.op.instances)
10432 # Will use acquired locks
10433 self.wanted_names = None
10435 if self.op.use_locking:
10436 self.share_locks = _ShareAll()
10438 if self.wanted_names is None:
10439 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10441 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10443 self.needed_locks[locking.LEVEL_NODE] = []
10444 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10446 def DeclareLocks(self, level):
10447 if self.op.use_locking and level == locking.LEVEL_NODE:
10448 self._LockInstancesNodes()
10450 def CheckPrereq(self):
10451 """Check prerequisites.
10453 This only checks the optional instance list against the existing names.
10456 if self.wanted_names is None:
10457 assert self.op.use_locking, "Locking was not used"
10458 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10460 self.wanted_instances = \
10461 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10463 def _ComputeBlockdevStatus(self, node, instance_name, dev):
10464 """Returns the status of a block device
10467 if self.op.static or not node:
10470 self.cfg.SetDiskID(dev, node)
10472 result = self.rpc.call_blockdev_find(node, dev)
10476 result.Raise("Can't compute disk status for %s" % instance_name)
10478 status = result.payload
10482 return (status.dev_path, status.major, status.minor,
10483 status.sync_percent, status.estimated_time,
10484 status.is_degraded, status.ldisk_status)
10486 def _ComputeDiskStatus(self, instance, snode, dev):
10487 """Compute block device status.
10490 if dev.dev_type in constants.LDS_DRBD:
10491 # we change the snode then (otherwise we use the one passed in)
10492 if dev.logical_id[0] == instance.primary_node:
10493 snode = dev.logical_id[1]
10495 snode = dev.logical_id[0]
10497 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10498 instance.name, dev)
10499 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10502 dev_children = map(compat.partial(self._ComputeDiskStatus,
10509 "iv_name": dev.iv_name,
10510 "dev_type": dev.dev_type,
10511 "logical_id": dev.logical_id,
10512 "physical_id": dev.physical_id,
10513 "pstatus": dev_pstatus,
10514 "sstatus": dev_sstatus,
10515 "children": dev_children,
10520 def Exec(self, feedback_fn):
10521 """Gather and return data"""
10524 cluster = self.cfg.GetClusterInfo()
10526 pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10527 for i in self.wanted_instances)
10528 for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10529 if self.op.static or pnode.offline:
10530 remote_state = None
10532 self.LogWarning("Primary node %s is marked offline, returning static"
10533 " information only for instance %s" %
10534 (pnode.name, instance.name))
10536 remote_info = self.rpc.call_instance_info(instance.primary_node,
10538 instance.hypervisor)
10539 remote_info.Raise("Error checking node %s" % instance.primary_node)
10540 remote_info = remote_info.payload
10541 if remote_info and "state" in remote_info:
10542 remote_state = "up"
10544 remote_state = "down"
10546 if instance.admin_up:
10547 config_state = "up"
10549 config_state = "down"
10551 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10554 result[instance.name] = {
10555 "name": instance.name,
10556 "config_state": config_state,
10557 "run_state": remote_state,
10558 "pnode": instance.primary_node,
10559 "snodes": instance.secondary_nodes,
10561 # this happens to be the same format used for hooks
10562 "nics": _NICListToTuple(self, instance.nics),
10563 "disk_template": instance.disk_template,
10565 "hypervisor": instance.hypervisor,
10566 "network_port": instance.network_port,
10567 "hv_instance": instance.hvparams,
10568 "hv_actual": cluster.FillHV(instance, skip_globals=True),
10569 "be_instance": instance.beparams,
10570 "be_actual": cluster.FillBE(instance),
10571 "os_instance": instance.osparams,
10572 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10573 "serial_no": instance.serial_no,
10574 "mtime": instance.mtime,
10575 "ctime": instance.ctime,
10576 "uuid": instance.uuid,
10582 class LUInstanceSetParams(LogicalUnit):
10583 """Modifies an instances's parameters.
10586 HPATH = "instance-modify"
10587 HTYPE = constants.HTYPE_INSTANCE
10590 def CheckArguments(self):
10591 if not (self.op.nics or self.op.disks or self.op.disk_template or
10592 self.op.hvparams or self.op.beparams or self.op.os_name):
10593 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10595 if self.op.hvparams:
10596 _CheckGlobalHvParams(self.op.hvparams)
10600 for disk_op, disk_dict in self.op.disks:
10601 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10602 if disk_op == constants.DDM_REMOVE:
10603 disk_addremove += 1
10605 elif disk_op == constants.DDM_ADD:
10606 disk_addremove += 1
10608 if not isinstance(disk_op, int):
10609 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10610 if not isinstance(disk_dict, dict):
10611 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10612 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10614 if disk_op == constants.DDM_ADD:
10615 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10616 if mode not in constants.DISK_ACCESS_SET:
10617 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10618 errors.ECODE_INVAL)
10619 size = disk_dict.get(constants.IDISK_SIZE, None)
10621 raise errors.OpPrereqError("Required disk parameter size missing",
10622 errors.ECODE_INVAL)
10625 except (TypeError, ValueError), err:
10626 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10627 str(err), errors.ECODE_INVAL)
10628 disk_dict[constants.IDISK_SIZE] = size
10630 # modification of disk
10631 if constants.IDISK_SIZE in disk_dict:
10632 raise errors.OpPrereqError("Disk size change not possible, use"
10633 " grow-disk", errors.ECODE_INVAL)
10635 if disk_addremove > 1:
10636 raise errors.OpPrereqError("Only one disk add or remove operation"
10637 " supported at a time", errors.ECODE_INVAL)
10639 if self.op.disks and self.op.disk_template is not None:
10640 raise errors.OpPrereqError("Disk template conversion and other disk"
10641 " changes not supported at the same time",
10642 errors.ECODE_INVAL)
10644 if (self.op.disk_template and
10645 self.op.disk_template in constants.DTS_INT_MIRROR and
10646 self.op.remote_node is None):
10647 raise errors.OpPrereqError("Changing the disk template to a mirrored"
10648 " one requires specifying a secondary node",
10649 errors.ECODE_INVAL)
10653 for nic_op, nic_dict in self.op.nics:
10654 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10655 if nic_op == constants.DDM_REMOVE:
10658 elif nic_op == constants.DDM_ADD:
10661 if not isinstance(nic_op, int):
10662 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10663 if not isinstance(nic_dict, dict):
10664 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10665 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10667 # nic_dict should be a dict
10668 nic_ip = nic_dict.get(constants.INIC_IP, None)
10669 if nic_ip is not None:
10670 if nic_ip.lower() == constants.VALUE_NONE:
10671 nic_dict[constants.INIC_IP] = None
10673 if not netutils.IPAddress.IsValid(nic_ip):
10674 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10675 errors.ECODE_INVAL)
10677 nic_bridge = nic_dict.get("bridge", None)
10678 nic_link = nic_dict.get(constants.INIC_LINK, None)
10679 if nic_bridge and nic_link:
10680 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10681 " at the same time", errors.ECODE_INVAL)
10682 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10683 nic_dict["bridge"] = None
10684 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10685 nic_dict[constants.INIC_LINK] = None
10687 if nic_op == constants.DDM_ADD:
10688 nic_mac = nic_dict.get(constants.INIC_MAC, None)
10689 if nic_mac is None:
10690 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10692 if constants.INIC_MAC in nic_dict:
10693 nic_mac = nic_dict[constants.INIC_MAC]
10694 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10695 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10697 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10698 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10699 " modifying an existing nic",
10700 errors.ECODE_INVAL)
10702 if nic_addremove > 1:
10703 raise errors.OpPrereqError("Only one NIC add or remove operation"
10704 " supported at a time", errors.ECODE_INVAL)
10706 def ExpandNames(self):
10707 self._ExpandAndLockInstance()
10708 self.needed_locks[locking.LEVEL_NODE] = []
10709 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10711 def DeclareLocks(self, level):
10712 if level == locking.LEVEL_NODE:
10713 self._LockInstancesNodes()
10714 if self.op.disk_template and self.op.remote_node:
10715 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10716 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10718 def BuildHooksEnv(self):
10719 """Build hooks env.
10721 This runs on the master, primary and secondaries.
10725 if constants.BE_MEMORY in self.be_new:
10726 args["memory"] = self.be_new[constants.BE_MEMORY]
10727 if constants.BE_VCPUS in self.be_new:
10728 args["vcpus"] = self.be_new[constants.BE_VCPUS]
10729 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10730 # information at all.
10733 nic_override = dict(self.op.nics)
10734 for idx, nic in enumerate(self.instance.nics):
10735 if idx in nic_override:
10736 this_nic_override = nic_override[idx]
10738 this_nic_override = {}
10739 if constants.INIC_IP in this_nic_override:
10740 ip = this_nic_override[constants.INIC_IP]
10743 if constants.INIC_MAC in this_nic_override:
10744 mac = this_nic_override[constants.INIC_MAC]
10747 if idx in self.nic_pnew:
10748 nicparams = self.nic_pnew[idx]
10750 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10751 mode = nicparams[constants.NIC_MODE]
10752 link = nicparams[constants.NIC_LINK]
10753 args["nics"].append((ip, mac, mode, link))
10754 if constants.DDM_ADD in nic_override:
10755 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10756 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10757 nicparams = self.nic_pnew[constants.DDM_ADD]
10758 mode = nicparams[constants.NIC_MODE]
10759 link = nicparams[constants.NIC_LINK]
10760 args["nics"].append((ip, mac, mode, link))
10761 elif constants.DDM_REMOVE in nic_override:
10762 del args["nics"][-1]
10764 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10765 if self.op.disk_template:
10766 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10770 def BuildHooksNodes(self):
10771 """Build hooks nodes.
10774 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10777 def CheckPrereq(self):
10778 """Check prerequisites.
10780 This only checks the instance list against the existing names.
10783 # checking the new params on the primary/secondary nodes
10785 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10786 cluster = self.cluster = self.cfg.GetClusterInfo()
10787 assert self.instance is not None, \
10788 "Cannot retrieve locked instance %s" % self.op.instance_name
10789 pnode = instance.primary_node
10790 nodelist = list(instance.all_nodes)
10793 if self.op.os_name and not self.op.force:
10794 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10795 self.op.force_variant)
10796 instance_os = self.op.os_name
10798 instance_os = instance.os
10800 if self.op.disk_template:
10801 if instance.disk_template == self.op.disk_template:
10802 raise errors.OpPrereqError("Instance already has disk template %s" %
10803 instance.disk_template, errors.ECODE_INVAL)
10805 if (instance.disk_template,
10806 self.op.disk_template) not in self._DISK_CONVERSIONS:
10807 raise errors.OpPrereqError("Unsupported disk template conversion from"
10808 " %s to %s" % (instance.disk_template,
10809 self.op.disk_template),
10810 errors.ECODE_INVAL)
10811 _CheckInstanceDown(self, instance, "cannot change disk template")
10812 if self.op.disk_template in constants.DTS_INT_MIRROR:
10813 if self.op.remote_node == pnode:
10814 raise errors.OpPrereqError("Given new secondary node %s is the same"
10815 " as the primary node of the instance" %
10816 self.op.remote_node, errors.ECODE_STATE)
10817 _CheckNodeOnline(self, self.op.remote_node)
10818 _CheckNodeNotDrained(self, self.op.remote_node)
10819 # FIXME: here we assume that the old instance type is DT_PLAIN
10820 assert instance.disk_template == constants.DT_PLAIN
10821 disks = [{constants.IDISK_SIZE: d.size,
10822 constants.IDISK_VG: d.logical_id[0]}
10823 for d in instance.disks]
10824 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10825 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10827 # hvparams processing
10828 if self.op.hvparams:
10829 hv_type = instance.hypervisor
10830 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10831 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10832 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10835 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10836 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10837 self.hv_new = hv_new # the new actual values
10838 self.hv_inst = i_hvdict # the new dict (without defaults)
10840 self.hv_new = self.hv_inst = {}
10842 # beparams processing
10843 if self.op.beparams:
10844 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10846 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10847 be_new = cluster.SimpleFillBE(i_bedict)
10848 self.be_new = be_new # the new actual values
10849 self.be_inst = i_bedict # the new dict (without defaults)
10851 self.be_new = self.be_inst = {}
10852 be_old = cluster.FillBE(instance)
10854 # osparams processing
10855 if self.op.osparams:
10856 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10857 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10858 self.os_inst = i_osdict # the new dict (without defaults)
10864 if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10865 be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10866 mem_check_list = [pnode]
10867 if be_new[constants.BE_AUTO_BALANCE]:
10868 # either we changed auto_balance to yes or it was from before
10869 mem_check_list.extend(instance.secondary_nodes)
10870 instance_info = self.rpc.call_instance_info(pnode, instance.name,
10871 instance.hypervisor)
10872 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10873 instance.hypervisor)
10874 pninfo = nodeinfo[pnode]
10875 msg = pninfo.fail_msg
10877 # Assume the primary node is unreachable and go ahead
10878 self.warn.append("Can't get info from primary node %s: %s" %
10880 elif not isinstance(pninfo.payload.get("memory_free", None), int):
10881 self.warn.append("Node data from primary node %s doesn't contain"
10882 " free memory information" % pnode)
10883 elif instance_info.fail_msg:
10884 self.warn.append("Can't get instance runtime information: %s" %
10885 instance_info.fail_msg)
10887 if instance_info.payload:
10888 current_mem = int(instance_info.payload["memory"])
10890 # Assume instance not running
10891 # (there is a slight race condition here, but it's not very probable,
10892 # and we have no other way to check)
10894 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10895 pninfo.payload["memory_free"])
10897 raise errors.OpPrereqError("This change will prevent the instance"
10898 " from starting, due to %d MB of memory"
10899 " missing on its primary node" % miss_mem,
10900 errors.ECODE_NORES)
10902 if be_new[constants.BE_AUTO_BALANCE]:
10903 for node, nres in nodeinfo.items():
10904 if node not in instance.secondary_nodes:
10906 nres.Raise("Can't get info from secondary node %s" % node,
10907 prereq=True, ecode=errors.ECODE_STATE)
10908 if not isinstance(nres.payload.get("memory_free", None), int):
10909 raise errors.OpPrereqError("Secondary node %s didn't return free"
10910 " memory information" % node,
10911 errors.ECODE_STATE)
10912 elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
10913 raise errors.OpPrereqError("This change will prevent the instance"
10914 " from failover to its secondary node"
10915 " %s, due to not enough memory" % node,
10916 errors.ECODE_STATE)
10920 self.nic_pinst = {}
10921 for nic_op, nic_dict in self.op.nics:
10922 if nic_op == constants.DDM_REMOVE:
10923 if not instance.nics:
10924 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10925 errors.ECODE_INVAL)
10927 if nic_op != constants.DDM_ADD:
10929 if not instance.nics:
10930 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10931 " no NICs" % nic_op,
10932 errors.ECODE_INVAL)
10933 if nic_op < 0 or nic_op >= len(instance.nics):
10934 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10936 (nic_op, len(instance.nics) - 1),
10937 errors.ECODE_INVAL)
10938 old_nic_params = instance.nics[nic_op].nicparams
10939 old_nic_ip = instance.nics[nic_op].ip
10941 old_nic_params = {}
10944 update_params_dict = dict([(key, nic_dict[key])
10945 for key in constants.NICS_PARAMETERS
10946 if key in nic_dict])
10948 if "bridge" in nic_dict:
10949 update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
10951 new_nic_params = _GetUpdatedParams(old_nic_params,
10952 update_params_dict)
10953 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10954 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10955 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10956 self.nic_pinst[nic_op] = new_nic_params
10957 self.nic_pnew[nic_op] = new_filled_nic_params
10958 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10960 if new_nic_mode == constants.NIC_MODE_BRIDGED:
10961 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10962 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10964 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10966 self.warn.append(msg)
10968 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10969 if new_nic_mode == constants.NIC_MODE_ROUTED:
10970 if constants.INIC_IP in nic_dict:
10971 nic_ip = nic_dict[constants.INIC_IP]
10973 nic_ip = old_nic_ip
10975 raise errors.OpPrereqError("Cannot set the nic ip to None"
10976 " on a routed nic", errors.ECODE_INVAL)
10977 if constants.INIC_MAC in nic_dict:
10978 nic_mac = nic_dict[constants.INIC_MAC]
10979 if nic_mac is None:
10980 raise errors.OpPrereqError("Cannot set the nic mac to None",
10981 errors.ECODE_INVAL)
10982 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10983 # otherwise generate the mac
10984 nic_dict[constants.INIC_MAC] = \
10985 self.cfg.GenerateMAC(self.proc.GetECId())
10987 # or validate/reserve the current one
10989 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10990 except errors.ReservationError:
10991 raise errors.OpPrereqError("MAC address %s already in use"
10992 " in cluster" % nic_mac,
10993 errors.ECODE_NOTUNIQUE)
10996 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10997 raise errors.OpPrereqError("Disk operations not supported for"
10998 " diskless instances",
10999 errors.ECODE_INVAL)
11000 for disk_op, _ in self.op.disks:
11001 if disk_op == constants.DDM_REMOVE:
11002 if len(instance.disks) == 1:
11003 raise errors.OpPrereqError("Cannot remove the last disk of"
11004 " an instance", errors.ECODE_INVAL)
11005 _CheckInstanceDown(self, instance, "cannot remove disks")
11007 if (disk_op == constants.DDM_ADD and
11008 len(instance.disks) >= constants.MAX_DISKS):
11009 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11010 " add more" % constants.MAX_DISKS,
11011 errors.ECODE_STATE)
11012 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11014 if disk_op < 0 or disk_op >= len(instance.disks):
11015 raise errors.OpPrereqError("Invalid disk index %s, valid values"
11017 (disk_op, len(instance.disks)),
11018 errors.ECODE_INVAL)
11022 def _ConvertPlainToDrbd(self, feedback_fn):
11023 """Converts an instance from plain to drbd.
11026 feedback_fn("Converting template to drbd")
11027 instance = self.instance
11028 pnode = instance.primary_node
11029 snode = self.op.remote_node
11031 # create a fake disk info for _GenerateDiskTemplate
11032 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11033 constants.IDISK_VG: d.logical_id[0]}
11034 for d in instance.disks]
11035 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11036 instance.name, pnode, [snode],
11037 disk_info, None, None, 0, feedback_fn)
11038 info = _GetInstanceInfoText(instance)
11039 feedback_fn("Creating aditional volumes...")
11040 # first, create the missing data and meta devices
11041 for disk in new_disks:
11042 # unfortunately this is... not too nice
11043 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11045 for child in disk.children:
11046 _CreateSingleBlockDev(self, snode, instance, child, info, True)
11047 # at this stage, all new LVs have been created, we can rename the
11049 feedback_fn("Renaming original volumes...")
11050 rename_list = [(o, n.children[0].logical_id)
11051 for (o, n) in zip(instance.disks, new_disks)]
11052 result = self.rpc.call_blockdev_rename(pnode, rename_list)
11053 result.Raise("Failed to rename original LVs")
11055 feedback_fn("Initializing DRBD devices...")
11056 # all child devices are in place, we can now create the DRBD devices
11057 for disk in new_disks:
11058 for node in [pnode, snode]:
11059 f_create = node == pnode
11060 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11062 # at this point, the instance has been modified
11063 instance.disk_template = constants.DT_DRBD8
11064 instance.disks = new_disks
11065 self.cfg.Update(instance, feedback_fn)
11067 # disks are created, waiting for sync
11068 disk_abort = not _WaitForSync(self, instance,
11069 oneshot=not self.op.wait_for_sync)
11071 raise errors.OpExecError("There are some degraded disks for"
11072 " this instance, please cleanup manually")
11074 def _ConvertDrbdToPlain(self, feedback_fn):
11075 """Converts an instance from drbd to plain.
11078 instance = self.instance
11079 assert len(instance.secondary_nodes) == 1
11080 pnode = instance.primary_node
11081 snode = instance.secondary_nodes[0]
11082 feedback_fn("Converting template to plain")
11084 old_disks = instance.disks
11085 new_disks = [d.children[0] for d in old_disks]
11087 # copy over size and mode
11088 for parent, child in zip(old_disks, new_disks):
11089 child.size = parent.size
11090 child.mode = parent.mode
11092 # update instance structure
11093 instance.disks = new_disks
11094 instance.disk_template = constants.DT_PLAIN
11095 self.cfg.Update(instance, feedback_fn)
11097 feedback_fn("Removing volumes on the secondary node...")
11098 for disk in old_disks:
11099 self.cfg.SetDiskID(disk, snode)
11100 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11102 self.LogWarning("Could not remove block device %s on node %s,"
11103 " continuing anyway: %s", disk.iv_name, snode, msg)
11105 feedback_fn("Removing unneeded volumes on the primary node...")
11106 for idx, disk in enumerate(old_disks):
11107 meta = disk.children[1]
11108 self.cfg.SetDiskID(meta, pnode)
11109 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11111 self.LogWarning("Could not remove metadata for disk %d on node %s,"
11112 " continuing anyway: %s", idx, pnode, msg)
11114 def Exec(self, feedback_fn):
11115 """Modifies an instance.
11117 All parameters take effect only at the next restart of the instance.
11120 # Process here the warnings from CheckPrereq, as we don't have a
11121 # feedback_fn there.
11122 for warn in self.warn:
11123 feedback_fn("WARNING: %s" % warn)
11126 instance = self.instance
11128 for disk_op, disk_dict in self.op.disks:
11129 if disk_op == constants.DDM_REMOVE:
11130 # remove the last disk
11131 device = instance.disks.pop()
11132 device_idx = len(instance.disks)
11133 for node, disk in device.ComputeNodeTree(instance.primary_node):
11134 self.cfg.SetDiskID(disk, node)
11135 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11137 self.LogWarning("Could not remove disk/%d on node %s: %s,"
11138 " continuing anyway", device_idx, node, msg)
11139 result.append(("disk/%d" % device_idx, "remove"))
11140 elif disk_op == constants.DDM_ADD:
11142 if instance.disk_template in (constants.DT_FILE,
11143 constants.DT_SHARED_FILE):
11144 file_driver, file_path = instance.disks[0].logical_id
11145 file_path = os.path.dirname(file_path)
11147 file_driver = file_path = None
11148 disk_idx_base = len(instance.disks)
11149 new_disk = _GenerateDiskTemplate(self,
11150 instance.disk_template,
11151 instance.name, instance.primary_node,
11152 instance.secondary_nodes,
11156 disk_idx_base, feedback_fn)[0]
11157 instance.disks.append(new_disk)
11158 info = _GetInstanceInfoText(instance)
11160 logging.info("Creating volume %s for instance %s",
11161 new_disk.iv_name, instance.name)
11162 # Note: this needs to be kept in sync with _CreateDisks
11164 for node in instance.all_nodes:
11165 f_create = node == instance.primary_node
11167 _CreateBlockDev(self, node, instance, new_disk,
11168 f_create, info, f_create)
11169 except errors.OpExecError, err:
11170 self.LogWarning("Failed to create volume %s (%s) on"
11172 new_disk.iv_name, new_disk, node, err)
11173 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11174 (new_disk.size, new_disk.mode)))
11176 # change a given disk
11177 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11178 result.append(("disk.mode/%d" % disk_op,
11179 disk_dict[constants.IDISK_MODE]))
11181 if self.op.disk_template:
11182 r_shut = _ShutdownInstanceDisks(self, instance)
11184 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11185 " proceed with disk template conversion")
11186 mode = (instance.disk_template, self.op.disk_template)
11188 self._DISK_CONVERSIONS[mode](self, feedback_fn)
11190 self.cfg.ReleaseDRBDMinors(instance.name)
11192 result.append(("disk_template", self.op.disk_template))
11195 for nic_op, nic_dict in self.op.nics:
11196 if nic_op == constants.DDM_REMOVE:
11197 # remove the last nic
11198 del instance.nics[-1]
11199 result.append(("nic.%d" % len(instance.nics), "remove"))
11200 elif nic_op == constants.DDM_ADD:
11201 # mac and bridge should be set, by now
11202 mac = nic_dict[constants.INIC_MAC]
11203 ip = nic_dict.get(constants.INIC_IP, None)
11204 nicparams = self.nic_pinst[constants.DDM_ADD]
11205 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11206 instance.nics.append(new_nic)
11207 result.append(("nic.%d" % (len(instance.nics) - 1),
11208 "add:mac=%s,ip=%s,mode=%s,link=%s" %
11209 (new_nic.mac, new_nic.ip,
11210 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11211 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11214 for key in (constants.INIC_MAC, constants.INIC_IP):
11215 if key in nic_dict:
11216 setattr(instance.nics[nic_op], key, nic_dict[key])
11217 if nic_op in self.nic_pinst:
11218 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11219 for key, val in nic_dict.iteritems():
11220 result.append(("nic.%s/%d" % (key, nic_op), val))
11223 if self.op.hvparams:
11224 instance.hvparams = self.hv_inst
11225 for key, val in self.op.hvparams.iteritems():
11226 result.append(("hv/%s" % key, val))
11229 if self.op.beparams:
11230 instance.beparams = self.be_inst
11231 for key, val in self.op.beparams.iteritems():
11232 result.append(("be/%s" % key, val))
11235 if self.op.os_name:
11236 instance.os = self.op.os_name
11239 if self.op.osparams:
11240 instance.osparams = self.os_inst
11241 for key, val in self.op.osparams.iteritems():
11242 result.append(("os/%s" % key, val))
11244 self.cfg.Update(instance, feedback_fn)
11248 _DISK_CONVERSIONS = {
11249 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11250 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11254 class LUInstanceChangeGroup(LogicalUnit):
11255 HPATH = "instance-change-group"
11256 HTYPE = constants.HTYPE_INSTANCE
11259 def ExpandNames(self):
11260 self.share_locks = _ShareAll()
11261 self.needed_locks = {
11262 locking.LEVEL_NODEGROUP: [],
11263 locking.LEVEL_NODE: [],
11266 self._ExpandAndLockInstance()
11268 if self.op.target_groups:
11269 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11270 self.op.target_groups)
11272 self.req_target_uuids = None
11274 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11276 def DeclareLocks(self, level):
11277 if level == locking.LEVEL_NODEGROUP:
11278 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11280 if self.req_target_uuids:
11281 lock_groups = set(self.req_target_uuids)
11283 # Lock all groups used by instance optimistically; this requires going
11284 # via the node before it's locked, requiring verification later on
11285 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11286 lock_groups.update(instance_groups)
11288 # No target groups, need to lock all of them
11289 lock_groups = locking.ALL_SET
11291 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11293 elif level == locking.LEVEL_NODE:
11294 if self.req_target_uuids:
11295 # Lock all nodes used by instances
11296 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11297 self._LockInstancesNodes()
11299 # Lock all nodes in all potential target groups
11300 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11301 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11302 member_nodes = [node_name
11303 for group in lock_groups
11304 for node_name in self.cfg.GetNodeGroup(group).members]
11305 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11307 # Lock all nodes as all groups are potential targets
11308 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11310 def CheckPrereq(self):
11311 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11312 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11313 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11315 assert (self.req_target_uuids is None or
11316 owned_groups.issuperset(self.req_target_uuids))
11317 assert owned_instances == set([self.op.instance_name])
11319 # Get instance information
11320 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11322 # Check if node groups for locked instance are still correct
11323 assert owned_nodes.issuperset(self.instance.all_nodes), \
11324 ("Instance %s's nodes changed while we kept the lock" %
11325 self.op.instance_name)
11327 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11330 if self.req_target_uuids:
11331 # User requested specific target groups
11332 self.target_uuids = self.req_target_uuids
11334 # All groups except those used by the instance are potential targets
11335 self.target_uuids = owned_groups - inst_groups
11337 conflicting_groups = self.target_uuids & inst_groups
11338 if conflicting_groups:
11339 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11340 " used by the instance '%s'" %
11341 (utils.CommaJoin(conflicting_groups),
11342 self.op.instance_name),
11343 errors.ECODE_INVAL)
11345 if not self.target_uuids:
11346 raise errors.OpPrereqError("There are no possible target groups",
11347 errors.ECODE_INVAL)
11349 def BuildHooksEnv(self):
11350 """Build hooks env.
11353 assert self.target_uuids
11356 "TARGET_GROUPS": " ".join(self.target_uuids),
11359 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11363 def BuildHooksNodes(self):
11364 """Build hooks nodes.
11367 mn = self.cfg.GetMasterNode()
11368 return ([mn], [mn])
11370 def Exec(self, feedback_fn):
11371 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11373 assert instances == [self.op.instance_name], "Instance not locked"
11375 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11376 instances=instances, target_groups=list(self.target_uuids))
11378 ial.Run(self.op.iallocator)
11380 if not ial.success:
11381 raise errors.OpPrereqError("Can't compute solution for changing group of"
11382 " instance '%s' using iallocator '%s': %s" %
11383 (self.op.instance_name, self.op.iallocator,
11385 errors.ECODE_NORES)
11387 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11389 self.LogInfo("Iallocator returned %s job(s) for changing group of"
11390 " instance '%s'", len(jobs), self.op.instance_name)
11392 return ResultWithJobs(jobs)
11395 class LUBackupQuery(NoHooksLU):
11396 """Query the exports list
11401 def ExpandNames(self):
11402 self.needed_locks = {}
11403 self.share_locks[locking.LEVEL_NODE] = 1
11404 if not self.op.nodes:
11405 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11407 self.needed_locks[locking.LEVEL_NODE] = \
11408 _GetWantedNodes(self, self.op.nodes)
11410 def Exec(self, feedback_fn):
11411 """Compute the list of all the exported system images.
11414 @return: a dictionary with the structure node->(export-list)
11415 where export-list is a list of the instances exported on
11419 self.nodes = self.owned_locks(locking.LEVEL_NODE)
11420 rpcresult = self.rpc.call_export_list(self.nodes)
11422 for node in rpcresult:
11423 if rpcresult[node].fail_msg:
11424 result[node] = False
11426 result[node] = rpcresult[node].payload
11431 class LUBackupPrepare(NoHooksLU):
11432 """Prepares an instance for an export and returns useful information.
11437 def ExpandNames(self):
11438 self._ExpandAndLockInstance()
11440 def CheckPrereq(self):
11441 """Check prerequisites.
11444 instance_name = self.op.instance_name
11446 self.instance = self.cfg.GetInstanceInfo(instance_name)
11447 assert self.instance is not None, \
11448 "Cannot retrieve locked instance %s" % self.op.instance_name
11449 _CheckNodeOnline(self, self.instance.primary_node)
11451 self._cds = _GetClusterDomainSecret()
11453 def Exec(self, feedback_fn):
11454 """Prepares an instance for an export.
11457 instance = self.instance
11459 if self.op.mode == constants.EXPORT_MODE_REMOTE:
11460 salt = utils.GenerateSecret(8)
11462 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11463 result = self.rpc.call_x509_cert_create(instance.primary_node,
11464 constants.RIE_CERT_VALIDITY)
11465 result.Raise("Can't create X509 key and certificate on %s" % result.node)
11467 (name, cert_pem) = result.payload
11469 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11473 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11474 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11476 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11482 class LUBackupExport(LogicalUnit):
11483 """Export an instance to an image in the cluster.
11486 HPATH = "instance-export"
11487 HTYPE = constants.HTYPE_INSTANCE
11490 def CheckArguments(self):
11491 """Check the arguments.
11494 self.x509_key_name = self.op.x509_key_name
11495 self.dest_x509_ca_pem = self.op.destination_x509_ca
11497 if self.op.mode == constants.EXPORT_MODE_REMOTE:
11498 if not self.x509_key_name:
11499 raise errors.OpPrereqError("Missing X509 key name for encryption",
11500 errors.ECODE_INVAL)
11502 if not self.dest_x509_ca_pem:
11503 raise errors.OpPrereqError("Missing destination X509 CA",
11504 errors.ECODE_INVAL)
11506 def ExpandNames(self):
11507 self._ExpandAndLockInstance()
11509 # Lock all nodes for local exports
11510 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11511 # FIXME: lock only instance primary and destination node
11513 # Sad but true, for now we have do lock all nodes, as we don't know where
11514 # the previous export might be, and in this LU we search for it and
11515 # remove it from its current node. In the future we could fix this by:
11516 # - making a tasklet to search (share-lock all), then create the
11517 # new one, then one to remove, after
11518 # - removing the removal operation altogether
11519 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11521 def DeclareLocks(self, level):
11522 """Last minute lock declaration."""
11523 # All nodes are locked anyway, so nothing to do here.
11525 def BuildHooksEnv(self):
11526 """Build hooks env.
11528 This will run on the master, primary node and target node.
11532 "EXPORT_MODE": self.op.mode,
11533 "EXPORT_NODE": self.op.target_node,
11534 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11535 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11536 # TODO: Generic function for boolean env variables
11537 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11540 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11544 def BuildHooksNodes(self):
11545 """Build hooks nodes.
11548 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11550 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11551 nl.append(self.op.target_node)
11555 def CheckPrereq(self):
11556 """Check prerequisites.
11558 This checks that the instance and node names are valid.
11561 instance_name = self.op.instance_name
11563 self.instance = self.cfg.GetInstanceInfo(instance_name)
11564 assert self.instance is not None, \
11565 "Cannot retrieve locked instance %s" % self.op.instance_name
11566 _CheckNodeOnline(self, self.instance.primary_node)
11568 if (self.op.remove_instance and self.instance.admin_up and
11569 not self.op.shutdown):
11570 raise errors.OpPrereqError("Can not remove instance without shutting it"
11573 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11574 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11575 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11576 assert self.dst_node is not None
11578 _CheckNodeOnline(self, self.dst_node.name)
11579 _CheckNodeNotDrained(self, self.dst_node.name)
11582 self.dest_disk_info = None
11583 self.dest_x509_ca = None
11585 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11586 self.dst_node = None
11588 if len(self.op.target_node) != len(self.instance.disks):
11589 raise errors.OpPrereqError(("Received destination information for %s"
11590 " disks, but instance %s has %s disks") %
11591 (len(self.op.target_node), instance_name,
11592 len(self.instance.disks)),
11593 errors.ECODE_INVAL)
11595 cds = _GetClusterDomainSecret()
11597 # Check X509 key name
11599 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11600 except (TypeError, ValueError), err:
11601 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11603 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11604 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11605 errors.ECODE_INVAL)
11607 # Load and verify CA
11609 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11610 except OpenSSL.crypto.Error, err:
11611 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11612 (err, ), errors.ECODE_INVAL)
11614 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11615 if errcode is not None:
11616 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11617 (msg, ), errors.ECODE_INVAL)
11619 self.dest_x509_ca = cert
11621 # Verify target information
11623 for idx, disk_data in enumerate(self.op.target_node):
11625 (host, port, magic) = \
11626 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11627 except errors.GenericError, err:
11628 raise errors.OpPrereqError("Target info for disk %s: %s" %
11629 (idx, err), errors.ECODE_INVAL)
11631 disk_info.append((host, port, magic))
11633 assert len(disk_info) == len(self.op.target_node)
11634 self.dest_disk_info = disk_info
11637 raise errors.ProgrammerError("Unhandled export mode %r" %
11640 # instance disk type verification
11641 # TODO: Implement export support for file-based disks
11642 for disk in self.instance.disks:
11643 if disk.dev_type == constants.LD_FILE:
11644 raise errors.OpPrereqError("Export not supported for instances with"
11645 " file-based disks", errors.ECODE_INVAL)
11647 def _CleanupExports(self, feedback_fn):
11648 """Removes exports of current instance from all other nodes.
11650 If an instance in a cluster with nodes A..D was exported to node C, its
11651 exports will be removed from the nodes A, B and D.
11654 assert self.op.mode != constants.EXPORT_MODE_REMOTE
11656 nodelist = self.cfg.GetNodeList()
11657 nodelist.remove(self.dst_node.name)
11659 # on one-node clusters nodelist will be empty after the removal
11660 # if we proceed the backup would be removed because OpBackupQuery
11661 # substitutes an empty list with the full cluster node list.
11662 iname = self.instance.name
11664 feedback_fn("Removing old exports for instance %s" % iname)
11665 exportlist = self.rpc.call_export_list(nodelist)
11666 for node in exportlist:
11667 if exportlist[node].fail_msg:
11669 if iname in exportlist[node].payload:
11670 msg = self.rpc.call_export_remove(node, iname).fail_msg
11672 self.LogWarning("Could not remove older export for instance %s"
11673 " on node %s: %s", iname, node, msg)
11675 def Exec(self, feedback_fn):
11676 """Export an instance to an image in the cluster.
11679 assert self.op.mode in constants.EXPORT_MODES
11681 instance = self.instance
11682 src_node = instance.primary_node
11684 if self.op.shutdown:
11685 # shutdown the instance, but not the disks
11686 feedback_fn("Shutting down instance %s" % instance.name)
11687 result = self.rpc.call_instance_shutdown(src_node, instance,
11688 self.op.shutdown_timeout)
11689 # TODO: Maybe ignore failures if ignore_remove_failures is set
11690 result.Raise("Could not shutdown instance %s on"
11691 " node %s" % (instance.name, src_node))
11693 # set the disks ID correctly since call_instance_start needs the
11694 # correct drbd minor to create the symlinks
11695 for disk in instance.disks:
11696 self.cfg.SetDiskID(disk, src_node)
11698 activate_disks = (not instance.admin_up)
11701 # Activate the instance disks if we'exporting a stopped instance
11702 feedback_fn("Activating disks for %s" % instance.name)
11703 _StartInstanceDisks(self, instance, None)
11706 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11709 helper.CreateSnapshots()
11711 if (self.op.shutdown and instance.admin_up and
11712 not self.op.remove_instance):
11713 assert not activate_disks
11714 feedback_fn("Starting instance %s" % instance.name)
11715 result = self.rpc.call_instance_start(src_node, instance,
11717 msg = result.fail_msg
11719 feedback_fn("Failed to start instance: %s" % msg)
11720 _ShutdownInstanceDisks(self, instance)
11721 raise errors.OpExecError("Could not start instance: %s" % msg)
11723 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11724 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11725 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11726 connect_timeout = constants.RIE_CONNECT_TIMEOUT
11727 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11729 (key_name, _, _) = self.x509_key_name
11732 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11735 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11736 key_name, dest_ca_pem,
11741 # Check for backwards compatibility
11742 assert len(dresults) == len(instance.disks)
11743 assert compat.all(isinstance(i, bool) for i in dresults), \
11744 "Not all results are boolean: %r" % dresults
11748 feedback_fn("Deactivating disks for %s" % instance.name)
11749 _ShutdownInstanceDisks(self, instance)
11751 if not (compat.all(dresults) and fin_resu):
11754 failures.append("export finalization")
11755 if not compat.all(dresults):
11756 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11758 failures.append("disk export: disk(s) %s" % fdsk)
11760 raise errors.OpExecError("Export failed, errors in %s" %
11761 utils.CommaJoin(failures))
11763 # At this point, the export was successful, we can cleanup/finish
11765 # Remove instance if requested
11766 if self.op.remove_instance:
11767 feedback_fn("Removing instance %s" % instance.name)
11768 _RemoveInstance(self, feedback_fn, instance,
11769 self.op.ignore_remove_failures)
11771 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11772 self._CleanupExports(feedback_fn)
11774 return fin_resu, dresults
11777 class LUBackupRemove(NoHooksLU):
11778 """Remove exports related to the named instance.
11783 def ExpandNames(self):
11784 self.needed_locks = {}
11785 # We need all nodes to be locked in order for RemoveExport to work, but we
11786 # don't need to lock the instance itself, as nothing will happen to it (and
11787 # we can remove exports also for a removed instance)
11788 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11790 def Exec(self, feedback_fn):
11791 """Remove any export.
11794 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11795 # If the instance was not found we'll try with the name that was passed in.
11796 # This will only work if it was an FQDN, though.
11798 if not instance_name:
11800 instance_name = self.op.instance_name
11802 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11803 exportlist = self.rpc.call_export_list(locked_nodes)
11805 for node in exportlist:
11806 msg = exportlist[node].fail_msg
11808 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11810 if instance_name in exportlist[node].payload:
11812 result = self.rpc.call_export_remove(node, instance_name)
11813 msg = result.fail_msg
11815 logging.error("Could not remove export for instance %s"
11816 " on node %s: %s", instance_name, node, msg)
11818 if fqdn_warn and not found:
11819 feedback_fn("Export not found. If trying to remove an export belonging"
11820 " to a deleted instance please use its Fully Qualified"
11824 class LUGroupAdd(LogicalUnit):
11825 """Logical unit for creating node groups.
11828 HPATH = "group-add"
11829 HTYPE = constants.HTYPE_GROUP
11832 def ExpandNames(self):
11833 # We need the new group's UUID here so that we can create and acquire the
11834 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11835 # that it should not check whether the UUID exists in the configuration.
11836 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11837 self.needed_locks = {}
11838 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11840 def CheckPrereq(self):
11841 """Check prerequisites.
11843 This checks that the given group name is not an existing node group
11848 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11849 except errors.OpPrereqError:
11852 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11853 " node group (UUID: %s)" %
11854 (self.op.group_name, existing_uuid),
11855 errors.ECODE_EXISTS)
11857 if self.op.ndparams:
11858 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11860 def BuildHooksEnv(self):
11861 """Build hooks env.
11865 "GROUP_NAME": self.op.group_name,
11868 def BuildHooksNodes(self):
11869 """Build hooks nodes.
11872 mn = self.cfg.GetMasterNode()
11873 return ([mn], [mn])
11875 def Exec(self, feedback_fn):
11876 """Add the node group to the cluster.
11879 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11880 uuid=self.group_uuid,
11881 alloc_policy=self.op.alloc_policy,
11882 ndparams=self.op.ndparams)
11884 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11885 del self.remove_locks[locking.LEVEL_NODEGROUP]
11888 class LUGroupAssignNodes(NoHooksLU):
11889 """Logical unit for assigning nodes to groups.
11894 def ExpandNames(self):
11895 # These raise errors.OpPrereqError on their own:
11896 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11897 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11899 # We want to lock all the affected nodes and groups. We have readily
11900 # available the list of nodes, and the *destination* group. To gather the
11901 # list of "source" groups, we need to fetch node information later on.
11902 self.needed_locks = {
11903 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11904 locking.LEVEL_NODE: self.op.nodes,
11907 def DeclareLocks(self, level):
11908 if level == locking.LEVEL_NODEGROUP:
11909 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11911 # Try to get all affected nodes' groups without having the group or node
11912 # lock yet. Needs verification later in the code flow.
11913 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11915 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11917 def CheckPrereq(self):
11918 """Check prerequisites.
11921 assert self.needed_locks[locking.LEVEL_NODEGROUP]
11922 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
11923 frozenset(self.op.nodes))
11925 expected_locks = (set([self.group_uuid]) |
11926 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11927 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
11928 if actual_locks != expected_locks:
11929 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11930 " current groups are '%s', used to be '%s'" %
11931 (utils.CommaJoin(expected_locks),
11932 utils.CommaJoin(actual_locks)))
11934 self.node_data = self.cfg.GetAllNodesInfo()
11935 self.group = self.cfg.GetNodeGroup(self.group_uuid)
11936 instance_data = self.cfg.GetAllInstancesInfo()
11938 if self.group is None:
11939 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11940 (self.op.group_name, self.group_uuid))
11942 (new_splits, previous_splits) = \
11943 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11944 for node in self.op.nodes],
11945 self.node_data, instance_data)
11948 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11950 if not self.op.force:
11951 raise errors.OpExecError("The following instances get split by this"
11952 " change and --force was not given: %s" %
11955 self.LogWarning("This operation will split the following instances: %s",
11958 if previous_splits:
11959 self.LogWarning("In addition, these already-split instances continue"
11960 " to be split across groups: %s",
11961 utils.CommaJoin(utils.NiceSort(previous_splits)))
11963 def Exec(self, feedback_fn):
11964 """Assign nodes to a new group.
11967 for node in self.op.nodes:
11968 self.node_data[node].group = self.group_uuid
11970 # FIXME: Depends on side-effects of modifying the result of
11971 # C{cfg.GetAllNodesInfo}
11973 self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
11976 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
11977 """Check for split instances after a node assignment.
11979 This method considers a series of node assignments as an atomic operation,
11980 and returns information about split instances after applying the set of
11983 In particular, it returns information about newly split instances, and
11984 instances that were already split, and remain so after the change.
11986 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
11989 @type changes: list of (node_name, new_group_uuid) pairs.
11990 @param changes: list of node assignments to consider.
11991 @param node_data: a dict with data for all nodes
11992 @param instance_data: a dict with all instances to consider
11993 @rtype: a two-tuple
11994 @return: a list of instances that were previously okay and result split as a
11995 consequence of this change, and a list of instances that were previously
11996 split and this change does not fix.
11999 changed_nodes = dict((node, group) for node, group in changes
12000 if node_data[node].group != group)
12002 all_split_instances = set()
12003 previously_split_instances = set()
12005 def InstanceNodes(instance):
12006 return [instance.primary_node] + list(instance.secondary_nodes)
12008 for inst in instance_data.values():
12009 if inst.disk_template not in constants.DTS_INT_MIRROR:
12012 instance_nodes = InstanceNodes(inst)
12014 if len(set(node_data[node].group for node in instance_nodes)) > 1:
12015 previously_split_instances.add(inst.name)
12017 if len(set(changed_nodes.get(node, node_data[node].group)
12018 for node in instance_nodes)) > 1:
12019 all_split_instances.add(inst.name)
12021 return (list(all_split_instances - previously_split_instances),
12022 list(previously_split_instances & all_split_instances))
12025 class _GroupQuery(_QueryBase):
12026 FIELDS = query.GROUP_FIELDS
12028 def ExpandNames(self, lu):
12029 lu.needed_locks = {}
12031 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12032 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12035 self.wanted = [name_to_uuid[name]
12036 for name in utils.NiceSort(name_to_uuid.keys())]
12038 # Accept names to be either names or UUIDs.
12041 all_uuid = frozenset(self._all_groups.keys())
12043 for name in self.names:
12044 if name in all_uuid:
12045 self.wanted.append(name)
12046 elif name in name_to_uuid:
12047 self.wanted.append(name_to_uuid[name])
12049 missing.append(name)
12052 raise errors.OpPrereqError("Some groups do not exist: %s" %
12053 utils.CommaJoin(missing),
12054 errors.ECODE_NOENT)
12056 def DeclareLocks(self, lu, level):
12059 def _GetQueryData(self, lu):
12060 """Computes the list of node groups and their attributes.
12063 do_nodes = query.GQ_NODE in self.requested_data
12064 do_instances = query.GQ_INST in self.requested_data
12066 group_to_nodes = None
12067 group_to_instances = None
12069 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12070 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12071 # latter GetAllInstancesInfo() is not enough, for we have to go through
12072 # instance->node. Hence, we will need to process nodes even if we only need
12073 # instance information.
12074 if do_nodes or do_instances:
12075 all_nodes = lu.cfg.GetAllNodesInfo()
12076 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12079 for node in all_nodes.values():
12080 if node.group in group_to_nodes:
12081 group_to_nodes[node.group].append(node.name)
12082 node_to_group[node.name] = node.group
12085 all_instances = lu.cfg.GetAllInstancesInfo()
12086 group_to_instances = dict((uuid, []) for uuid in self.wanted)
12088 for instance in all_instances.values():
12089 node = instance.primary_node
12090 if node in node_to_group:
12091 group_to_instances[node_to_group[node]].append(instance.name)
12094 # Do not pass on node information if it was not requested.
12095 group_to_nodes = None
12097 return query.GroupQueryData([self._all_groups[uuid]
12098 for uuid in self.wanted],
12099 group_to_nodes, group_to_instances)
12102 class LUGroupQuery(NoHooksLU):
12103 """Logical unit for querying node groups.
12108 def CheckArguments(self):
12109 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12110 self.op.output_fields, False)
12112 def ExpandNames(self):
12113 self.gq.ExpandNames(self)
12115 def DeclareLocks(self, level):
12116 self.gq.DeclareLocks(self, level)
12118 def Exec(self, feedback_fn):
12119 return self.gq.OldStyleQuery(self)
12122 class LUGroupSetParams(LogicalUnit):
12123 """Modifies the parameters of a node group.
12126 HPATH = "group-modify"
12127 HTYPE = constants.HTYPE_GROUP
12130 def CheckArguments(self):
12133 self.op.alloc_policy,
12136 if all_changes.count(None) == len(all_changes):
12137 raise errors.OpPrereqError("Please pass at least one modification",
12138 errors.ECODE_INVAL)
12140 def ExpandNames(self):
12141 # This raises errors.OpPrereqError on its own:
12142 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12144 self.needed_locks = {
12145 locking.LEVEL_NODEGROUP: [self.group_uuid],
12148 def CheckPrereq(self):
12149 """Check prerequisites.
12152 self.group = self.cfg.GetNodeGroup(self.group_uuid)
12154 if self.group is None:
12155 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12156 (self.op.group_name, self.group_uuid))
12158 if self.op.ndparams:
12159 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12160 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12161 self.new_ndparams = new_ndparams
12163 def BuildHooksEnv(self):
12164 """Build hooks env.
12168 "GROUP_NAME": self.op.group_name,
12169 "NEW_ALLOC_POLICY": self.op.alloc_policy,
12172 def BuildHooksNodes(self):
12173 """Build hooks nodes.
12176 mn = self.cfg.GetMasterNode()
12177 return ([mn], [mn])
12179 def Exec(self, feedback_fn):
12180 """Modifies the node group.
12185 if self.op.ndparams:
12186 self.group.ndparams = self.new_ndparams
12187 result.append(("ndparams", str(self.group.ndparams)))
12189 if self.op.alloc_policy:
12190 self.group.alloc_policy = self.op.alloc_policy
12192 self.cfg.Update(self.group, feedback_fn)
12196 class LUGroupRemove(LogicalUnit):
12197 HPATH = "group-remove"
12198 HTYPE = constants.HTYPE_GROUP
12201 def ExpandNames(self):
12202 # This will raises errors.OpPrereqError on its own:
12203 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12204 self.needed_locks = {
12205 locking.LEVEL_NODEGROUP: [self.group_uuid],
12208 def CheckPrereq(self):
12209 """Check prerequisites.
12211 This checks that the given group name exists as a node group, that is
12212 empty (i.e., contains no nodes), and that is not the last group of the
12216 # Verify that the group is empty.
12217 group_nodes = [node.name
12218 for node in self.cfg.GetAllNodesInfo().values()
12219 if node.group == self.group_uuid]
12222 raise errors.OpPrereqError("Group '%s' not empty, has the following"
12224 (self.op.group_name,
12225 utils.CommaJoin(utils.NiceSort(group_nodes))),
12226 errors.ECODE_STATE)
12228 # Verify the cluster would not be left group-less.
12229 if len(self.cfg.GetNodeGroupList()) == 1:
12230 raise errors.OpPrereqError("Group '%s' is the only group,"
12231 " cannot be removed" %
12232 self.op.group_name,
12233 errors.ECODE_STATE)
12235 def BuildHooksEnv(self):
12236 """Build hooks env.
12240 "GROUP_NAME": self.op.group_name,
12243 def BuildHooksNodes(self):
12244 """Build hooks nodes.
12247 mn = self.cfg.GetMasterNode()
12248 return ([mn], [mn])
12250 def Exec(self, feedback_fn):
12251 """Remove the node group.
12255 self.cfg.RemoveNodeGroup(self.group_uuid)
12256 except errors.ConfigurationError:
12257 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12258 (self.op.group_name, self.group_uuid))
12260 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12263 class LUGroupRename(LogicalUnit):
12264 HPATH = "group-rename"
12265 HTYPE = constants.HTYPE_GROUP
12268 def ExpandNames(self):
12269 # This raises errors.OpPrereqError on its own:
12270 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12272 self.needed_locks = {
12273 locking.LEVEL_NODEGROUP: [self.group_uuid],
12276 def CheckPrereq(self):
12277 """Check prerequisites.
12279 Ensures requested new name is not yet used.
12283 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12284 except errors.OpPrereqError:
12287 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12288 " node group (UUID: %s)" %
12289 (self.op.new_name, new_name_uuid),
12290 errors.ECODE_EXISTS)
12292 def BuildHooksEnv(self):
12293 """Build hooks env.
12297 "OLD_NAME": self.op.group_name,
12298 "NEW_NAME": self.op.new_name,
12301 def BuildHooksNodes(self):
12302 """Build hooks nodes.
12305 mn = self.cfg.GetMasterNode()
12307 all_nodes = self.cfg.GetAllNodesInfo()
12308 all_nodes.pop(mn, None)
12311 run_nodes.extend(node.name for node in all_nodes.values()
12312 if node.group == self.group_uuid)
12314 return (run_nodes, run_nodes)
12316 def Exec(self, feedback_fn):
12317 """Rename the node group.
12320 group = self.cfg.GetNodeGroup(self.group_uuid)
12323 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12324 (self.op.group_name, self.group_uuid))
12326 group.name = self.op.new_name
12327 self.cfg.Update(group, feedback_fn)
12329 return self.op.new_name
12332 class LUGroupEvacuate(LogicalUnit):
12333 HPATH = "group-evacuate"
12334 HTYPE = constants.HTYPE_GROUP
12337 def ExpandNames(self):
12338 # This raises errors.OpPrereqError on its own:
12339 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12341 if self.op.target_groups:
12342 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12343 self.op.target_groups)
12345 self.req_target_uuids = []
12347 if self.group_uuid in self.req_target_uuids:
12348 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12349 " as a target group (targets are %s)" %
12351 utils.CommaJoin(self.req_target_uuids)),
12352 errors.ECODE_INVAL)
12354 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12356 self.share_locks = _ShareAll()
12357 self.needed_locks = {
12358 locking.LEVEL_INSTANCE: [],
12359 locking.LEVEL_NODEGROUP: [],
12360 locking.LEVEL_NODE: [],
12363 def DeclareLocks(self, level):
12364 if level == locking.LEVEL_INSTANCE:
12365 assert not self.needed_locks[locking.LEVEL_INSTANCE]
12367 # Lock instances optimistically, needs verification once node and group
12368 # locks have been acquired
12369 self.needed_locks[locking.LEVEL_INSTANCE] = \
12370 self.cfg.GetNodeGroupInstances(self.group_uuid)
12372 elif level == locking.LEVEL_NODEGROUP:
12373 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12375 if self.req_target_uuids:
12376 lock_groups = set([self.group_uuid] + self.req_target_uuids)
12378 # Lock all groups used by instances optimistically; this requires going
12379 # via the node before it's locked, requiring verification later on
12380 lock_groups.update(group_uuid
12381 for instance_name in
12382 self.owned_locks(locking.LEVEL_INSTANCE)
12384 self.cfg.GetInstanceNodeGroups(instance_name))
12386 # No target groups, need to lock all of them
12387 lock_groups = locking.ALL_SET
12389 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12391 elif level == locking.LEVEL_NODE:
12392 # This will only lock the nodes in the group to be evacuated which
12393 # contain actual instances
12394 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12395 self._LockInstancesNodes()
12397 # Lock all nodes in group to be evacuated and target groups
12398 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12399 assert self.group_uuid in owned_groups
12400 member_nodes = [node_name
12401 for group in owned_groups
12402 for node_name in self.cfg.GetNodeGroup(group).members]
12403 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12405 def CheckPrereq(self):
12406 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12407 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12408 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12410 assert owned_groups.issuperset(self.req_target_uuids)
12411 assert self.group_uuid in owned_groups
12413 # Check if locked instances are still correct
12414 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12416 # Get instance information
12417 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12419 # Check if node groups for locked instances are still correct
12420 for instance_name in owned_instances:
12421 inst = self.instances[instance_name]
12422 assert owned_nodes.issuperset(inst.all_nodes), \
12423 "Instance %s's nodes changed while we kept the lock" % instance_name
12425 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12428 assert self.group_uuid in inst_groups, \
12429 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12431 if self.req_target_uuids:
12432 # User requested specific target groups
12433 self.target_uuids = self.req_target_uuids
12435 # All groups except the one to be evacuated are potential targets
12436 self.target_uuids = [group_uuid for group_uuid in owned_groups
12437 if group_uuid != self.group_uuid]
12439 if not self.target_uuids:
12440 raise errors.OpPrereqError("There are no possible target groups",
12441 errors.ECODE_INVAL)
12443 def BuildHooksEnv(self):
12444 """Build hooks env.
12448 "GROUP_NAME": self.op.group_name,
12449 "TARGET_GROUPS": " ".join(self.target_uuids),
12452 def BuildHooksNodes(self):
12453 """Build hooks nodes.
12456 mn = self.cfg.GetMasterNode()
12458 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12460 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12462 return (run_nodes, run_nodes)
12464 def Exec(self, feedback_fn):
12465 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12467 assert self.group_uuid not in self.target_uuids
12469 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12470 instances=instances, target_groups=self.target_uuids)
12472 ial.Run(self.op.iallocator)
12474 if not ial.success:
12475 raise errors.OpPrereqError("Can't compute group evacuation using"
12476 " iallocator '%s': %s" %
12477 (self.op.iallocator, ial.info),
12478 errors.ECODE_NORES)
12480 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12482 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12483 len(jobs), self.op.group_name)
12485 return ResultWithJobs(jobs)
12488 class TagsLU(NoHooksLU): # pylint: disable=W0223
12489 """Generic tags LU.
12491 This is an abstract class which is the parent of all the other tags LUs.
12494 def ExpandNames(self):
12495 self.group_uuid = None
12496 self.needed_locks = {}
12497 if self.op.kind == constants.TAG_NODE:
12498 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12499 self.needed_locks[locking.LEVEL_NODE] = self.op.name
12500 elif self.op.kind == constants.TAG_INSTANCE:
12501 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12502 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12503 elif self.op.kind == constants.TAG_NODEGROUP:
12504 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12506 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12507 # not possible to acquire the BGL based on opcode parameters)
12509 def CheckPrereq(self):
12510 """Check prerequisites.
12513 if self.op.kind == constants.TAG_CLUSTER:
12514 self.target = self.cfg.GetClusterInfo()
12515 elif self.op.kind == constants.TAG_NODE:
12516 self.target = self.cfg.GetNodeInfo(self.op.name)
12517 elif self.op.kind == constants.TAG_INSTANCE:
12518 self.target = self.cfg.GetInstanceInfo(self.op.name)
12519 elif self.op.kind == constants.TAG_NODEGROUP:
12520 self.target = self.cfg.GetNodeGroup(self.group_uuid)
12522 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12523 str(self.op.kind), errors.ECODE_INVAL)
12526 class LUTagsGet(TagsLU):
12527 """Returns the tags of a given object.
12532 def ExpandNames(self):
12533 TagsLU.ExpandNames(self)
12535 # Share locks as this is only a read operation
12536 self.share_locks = _ShareAll()
12538 def Exec(self, feedback_fn):
12539 """Returns the tag list.
12542 return list(self.target.GetTags())
12545 class LUTagsSearch(NoHooksLU):
12546 """Searches the tags for a given pattern.
12551 def ExpandNames(self):
12552 self.needed_locks = {}
12554 def CheckPrereq(self):
12555 """Check prerequisites.
12557 This checks the pattern passed for validity by compiling it.
12561 self.re = re.compile(self.op.pattern)
12562 except re.error, err:
12563 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12564 (self.op.pattern, err), errors.ECODE_INVAL)
12566 def Exec(self, feedback_fn):
12567 """Returns the tag list.
12571 tgts = [("/cluster", cfg.GetClusterInfo())]
12572 ilist = cfg.GetAllInstancesInfo().values()
12573 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12574 nlist = cfg.GetAllNodesInfo().values()
12575 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12576 tgts.extend(("/nodegroup/%s" % n.name, n)
12577 for n in cfg.GetAllNodeGroupsInfo().values())
12579 for path, target in tgts:
12580 for tag in target.GetTags():
12581 if self.re.search(tag):
12582 results.append((path, tag))
12586 class LUTagsSet(TagsLU):
12587 """Sets a tag on a given object.
12592 def CheckPrereq(self):
12593 """Check prerequisites.
12595 This checks the type and length of the tag name and value.
12598 TagsLU.CheckPrereq(self)
12599 for tag in self.op.tags:
12600 objects.TaggableObject.ValidateTag(tag)
12602 def Exec(self, feedback_fn):
12607 for tag in self.op.tags:
12608 self.target.AddTag(tag)
12609 except errors.TagError, err:
12610 raise errors.OpExecError("Error while setting tag: %s" % str(err))
12611 self.cfg.Update(self.target, feedback_fn)
12614 class LUTagsDel(TagsLU):
12615 """Delete a list of tags from a given object.
12620 def CheckPrereq(self):
12621 """Check prerequisites.
12623 This checks that we have the given tag.
12626 TagsLU.CheckPrereq(self)
12627 for tag in self.op.tags:
12628 objects.TaggableObject.ValidateTag(tag)
12629 del_tags = frozenset(self.op.tags)
12630 cur_tags = self.target.GetTags()
12632 diff_tags = del_tags - cur_tags
12634 diff_names = ("'%s'" % i for i in sorted(diff_tags))
12635 raise errors.OpPrereqError("Tag(s) %s not found" %
12636 (utils.CommaJoin(diff_names), ),
12637 errors.ECODE_NOENT)
12639 def Exec(self, feedback_fn):
12640 """Remove the tag from the object.
12643 for tag in self.op.tags:
12644 self.target.RemoveTag(tag)
12645 self.cfg.Update(self.target, feedback_fn)
12648 class LUTestDelay(NoHooksLU):
12649 """Sleep for a specified amount of time.
12651 This LU sleeps on the master and/or nodes for a specified amount of
12657 def ExpandNames(self):
12658 """Expand names and set required locks.
12660 This expands the node list, if any.
12663 self.needed_locks = {}
12664 if self.op.on_nodes:
12665 # _GetWantedNodes can be used here, but is not always appropriate to use
12666 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12667 # more information.
12668 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12669 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12671 def _TestDelay(self):
12672 """Do the actual sleep.
12675 if self.op.on_master:
12676 if not utils.TestDelay(self.op.duration):
12677 raise errors.OpExecError("Error during master delay test")
12678 if self.op.on_nodes:
12679 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12680 for node, node_result in result.items():
12681 node_result.Raise("Failure during rpc call to node %s" % node)
12683 def Exec(self, feedback_fn):
12684 """Execute the test delay opcode, with the wanted repetitions.
12687 if self.op.repeat == 0:
12690 top_value = self.op.repeat - 1
12691 for i in range(self.op.repeat):
12692 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12696 class LUTestJqueue(NoHooksLU):
12697 """Utility LU to test some aspects of the job queue.
12702 # Must be lower than default timeout for WaitForJobChange to see whether it
12703 # notices changed jobs
12704 _CLIENT_CONNECT_TIMEOUT = 20.0
12705 _CLIENT_CONFIRM_TIMEOUT = 60.0
12708 def _NotifyUsingSocket(cls, cb, errcls):
12709 """Opens a Unix socket and waits for another program to connect.
12712 @param cb: Callback to send socket name to client
12713 @type errcls: class
12714 @param errcls: Exception class to use for errors
12717 # Using a temporary directory as there's no easy way to create temporary
12718 # sockets without writing a custom loop around tempfile.mktemp and
12720 tmpdir = tempfile.mkdtemp()
12722 tmpsock = utils.PathJoin(tmpdir, "sock")
12724 logging.debug("Creating temporary socket at %s", tmpsock)
12725 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12730 # Send details to client
12733 # Wait for client to connect before continuing
12734 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12736 (conn, _) = sock.accept()
12737 except socket.error, err:
12738 raise errcls("Client didn't connect in time (%s)" % err)
12742 # Remove as soon as client is connected
12743 shutil.rmtree(tmpdir)
12745 # Wait for client to close
12748 # pylint: disable=E1101
12749 # Instance of '_socketobject' has no ... member
12750 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12752 except socket.error, err:
12753 raise errcls("Client failed to confirm notification (%s)" % err)
12757 def _SendNotification(self, test, arg, sockname):
12758 """Sends a notification to the client.
12761 @param test: Test name
12762 @param arg: Test argument (depends on test)
12763 @type sockname: string
12764 @param sockname: Socket path
12767 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12769 def _Notify(self, prereq, test, arg):
12770 """Notifies the client of a test.
12773 @param prereq: Whether this is a prereq-phase test
12775 @param test: Test name
12776 @param arg: Test argument (depends on test)
12780 errcls = errors.OpPrereqError
12782 errcls = errors.OpExecError
12784 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12788 def CheckArguments(self):
12789 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12790 self.expandnames_calls = 0
12792 def ExpandNames(self):
12793 checkargs_calls = getattr(self, "checkargs_calls", 0)
12794 if checkargs_calls < 1:
12795 raise errors.ProgrammerError("CheckArguments was not called")
12797 self.expandnames_calls += 1
12799 if self.op.notify_waitlock:
12800 self._Notify(True, constants.JQT_EXPANDNAMES, None)
12802 self.LogInfo("Expanding names")
12804 # Get lock on master node (just to get a lock, not for a particular reason)
12805 self.needed_locks = {
12806 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12809 def Exec(self, feedback_fn):
12810 if self.expandnames_calls < 1:
12811 raise errors.ProgrammerError("ExpandNames was not called")
12813 if self.op.notify_exec:
12814 self._Notify(False, constants.JQT_EXEC, None)
12816 self.LogInfo("Executing")
12818 if self.op.log_messages:
12819 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12820 for idx, msg in enumerate(self.op.log_messages):
12821 self.LogInfo("Sending log message %s", idx + 1)
12822 feedback_fn(constants.JQT_MSGPREFIX + msg)
12823 # Report how many test messages have been sent
12824 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12827 raise errors.OpExecError("Opcode failure was requested")
12832 class IAllocator(object):
12833 """IAllocator framework.
12835 An IAllocator instance has three sets of attributes:
12836 - cfg that is needed to query the cluster
12837 - input data (all members of the _KEYS class attribute are required)
12838 - four buffer attributes (in|out_data|text), that represent the
12839 input (to the external script) in text and data structure format,
12840 and the output from it, again in two formats
12841 - the result variables from the script (success, info, nodes) for
12845 # pylint: disable=R0902
12846 # lots of instance attributes
12848 def __init__(self, cfg, rpc, mode, **kwargs):
12851 # init buffer variables
12852 self.in_text = self.out_text = self.in_data = self.out_data = None
12853 # init all input fields so that pylint is happy
12855 self.memory = self.disks = self.disk_template = None
12856 self.os = self.tags = self.nics = self.vcpus = None
12857 self.hypervisor = None
12858 self.relocate_from = None
12860 self.instances = None
12861 self.evac_mode = None
12862 self.target_groups = []
12864 self.required_nodes = None
12865 # init result fields
12866 self.success = self.info = self.result = None
12869 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12871 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12872 " IAllocator" % self.mode)
12874 keyset = [n for (n, _) in keydata]
12877 if key not in keyset:
12878 raise errors.ProgrammerError("Invalid input parameter '%s' to"
12879 " IAllocator" % key)
12880 setattr(self, key, kwargs[key])
12883 if key not in kwargs:
12884 raise errors.ProgrammerError("Missing input parameter '%s' to"
12885 " IAllocator" % key)
12886 self._BuildInputData(compat.partial(fn, self), keydata)
12888 def _ComputeClusterData(self):
12889 """Compute the generic allocator input data.
12891 This is the data that is independent of the actual operation.
12895 cluster_info = cfg.GetClusterInfo()
12898 "version": constants.IALLOCATOR_VERSION,
12899 "cluster_name": cfg.GetClusterName(),
12900 "cluster_tags": list(cluster_info.GetTags()),
12901 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12902 # we don't have job IDs
12904 ninfo = cfg.GetAllNodesInfo()
12905 iinfo = cfg.GetAllInstancesInfo().values()
12906 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12909 node_list = [n.name for n in ninfo.values() if n.vm_capable]
12911 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12912 hypervisor_name = self.hypervisor
12913 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12914 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12916 hypervisor_name = cluster_info.enabled_hypervisors[0]
12918 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12921 self.rpc.call_all_instances_info(node_list,
12922 cluster_info.enabled_hypervisors)
12924 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12926 config_ndata = self._ComputeBasicNodeData(ninfo)
12927 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12928 i_list, config_ndata)
12929 assert len(data["nodes"]) == len(ninfo), \
12930 "Incomplete node data computed"
12932 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12934 self.in_data = data
12937 def _ComputeNodeGroupData(cfg):
12938 """Compute node groups data.
12941 ng = dict((guuid, {
12942 "name": gdata.name,
12943 "alloc_policy": gdata.alloc_policy,
12945 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12950 def _ComputeBasicNodeData(node_cfg):
12951 """Compute global node data.
12954 @returns: a dict of name: (node dict, node config)
12957 # fill in static (config-based) values
12958 node_results = dict((ninfo.name, {
12959 "tags": list(ninfo.GetTags()),
12960 "primary_ip": ninfo.primary_ip,
12961 "secondary_ip": ninfo.secondary_ip,
12962 "offline": ninfo.offline,
12963 "drained": ninfo.drained,
12964 "master_candidate": ninfo.master_candidate,
12965 "group": ninfo.group,
12966 "master_capable": ninfo.master_capable,
12967 "vm_capable": ninfo.vm_capable,
12969 for ninfo in node_cfg.values())
12971 return node_results
12974 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
12976 """Compute global node data.
12978 @param node_results: the basic node structures as filled from the config
12981 # make a copy of the current dict
12982 node_results = dict(node_results)
12983 for nname, nresult in node_data.items():
12984 assert nname in node_results, "Missing basic data for node %s" % nname
12985 ninfo = node_cfg[nname]
12987 if not (ninfo.offline or ninfo.drained):
12988 nresult.Raise("Can't get data for node %s" % nname)
12989 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
12991 remote_info = nresult.payload
12993 for attr in ["memory_total", "memory_free", "memory_dom0",
12994 "vg_size", "vg_free", "cpu_total"]:
12995 if attr not in remote_info:
12996 raise errors.OpExecError("Node '%s' didn't return attribute"
12997 " '%s'" % (nname, attr))
12998 if not isinstance(remote_info[attr], int):
12999 raise errors.OpExecError("Node '%s' returned invalid value"
13001 (nname, attr, remote_info[attr]))
13002 # compute memory used by primary instances
13003 i_p_mem = i_p_up_mem = 0
13004 for iinfo, beinfo in i_list:
13005 if iinfo.primary_node == nname:
13006 i_p_mem += beinfo[constants.BE_MEMORY]
13007 if iinfo.name not in node_iinfo[nname].payload:
13010 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13011 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13012 remote_info["memory_free"] -= max(0, i_mem_diff)
13015 i_p_up_mem += beinfo[constants.BE_MEMORY]
13017 # compute memory used by instances
13019 "total_memory": remote_info["memory_total"],
13020 "reserved_memory": remote_info["memory_dom0"],
13021 "free_memory": remote_info["memory_free"],
13022 "total_disk": remote_info["vg_size"],
13023 "free_disk": remote_info["vg_free"],
13024 "total_cpus": remote_info["cpu_total"],
13025 "i_pri_memory": i_p_mem,
13026 "i_pri_up_memory": i_p_up_mem,
13028 pnr_dyn.update(node_results[nname])
13029 node_results[nname] = pnr_dyn
13031 return node_results
13034 def _ComputeInstanceData(cluster_info, i_list):
13035 """Compute global instance data.
13039 for iinfo, beinfo in i_list:
13041 for nic in iinfo.nics:
13042 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13046 "mode": filled_params[constants.NIC_MODE],
13047 "link": filled_params[constants.NIC_LINK],
13049 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13050 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13051 nic_data.append(nic_dict)
13053 "tags": list(iinfo.GetTags()),
13054 "admin_up": iinfo.admin_up,
13055 "vcpus": beinfo[constants.BE_VCPUS],
13056 "memory": beinfo[constants.BE_MEMORY],
13058 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13060 "disks": [{constants.IDISK_SIZE: dsk.size,
13061 constants.IDISK_MODE: dsk.mode}
13062 for dsk in iinfo.disks],
13063 "disk_template": iinfo.disk_template,
13064 "hypervisor": iinfo.hypervisor,
13066 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13068 instance_data[iinfo.name] = pir
13070 return instance_data
13072 def _AddNewInstance(self):
13073 """Add new instance data to allocator structure.
13075 This in combination with _AllocatorGetClusterData will create the
13076 correct structure needed as input for the allocator.
13078 The checks for the completeness of the opcode must have already been
13082 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13084 if self.disk_template in constants.DTS_INT_MIRROR:
13085 self.required_nodes = 2
13087 self.required_nodes = 1
13091 "disk_template": self.disk_template,
13094 "vcpus": self.vcpus,
13095 "memory": self.memory,
13096 "disks": self.disks,
13097 "disk_space_total": disk_space,
13099 "required_nodes": self.required_nodes,
13100 "hypervisor": self.hypervisor,
13105 def _AddRelocateInstance(self):
13106 """Add relocate instance data to allocator structure.
13108 This in combination with _IAllocatorGetClusterData will create the
13109 correct structure needed as input for the allocator.
13111 The checks for the completeness of the opcode must have already been
13115 instance = self.cfg.GetInstanceInfo(self.name)
13116 if instance is None:
13117 raise errors.ProgrammerError("Unknown instance '%s' passed to"
13118 " IAllocator" % self.name)
13120 if instance.disk_template not in constants.DTS_MIRRORED:
13121 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13122 errors.ECODE_INVAL)
13124 if instance.disk_template in constants.DTS_INT_MIRROR and \
13125 len(instance.secondary_nodes) != 1:
13126 raise errors.OpPrereqError("Instance has not exactly one secondary node",
13127 errors.ECODE_STATE)
13129 self.required_nodes = 1
13130 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13131 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13135 "disk_space_total": disk_space,
13136 "required_nodes": self.required_nodes,
13137 "relocate_from": self.relocate_from,
13141 def _AddNodeEvacuate(self):
13142 """Get data for node-evacuate requests.
13146 "instances": self.instances,
13147 "evac_mode": self.evac_mode,
13150 def _AddChangeGroup(self):
13151 """Get data for node-evacuate requests.
13155 "instances": self.instances,
13156 "target_groups": self.target_groups,
13159 def _BuildInputData(self, fn, keydata):
13160 """Build input data structures.
13163 self._ComputeClusterData()
13166 request["type"] = self.mode
13167 for keyname, keytype in keydata:
13168 if keyname not in request:
13169 raise errors.ProgrammerError("Request parameter %s is missing" %
13171 val = request[keyname]
13172 if not keytype(val):
13173 raise errors.ProgrammerError("Request parameter %s doesn't pass"
13174 " validation, value %s, expected"
13175 " type %s" % (keyname, val, keytype))
13176 self.in_data["request"] = request
13178 self.in_text = serializer.Dump(self.in_data)
13180 _STRING_LIST = ht.TListOf(ht.TString)
13181 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13182 # pylint: disable=E1101
13183 # Class '...' has no 'OP_ID' member
13184 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13185 opcodes.OpInstanceMigrate.OP_ID,
13186 opcodes.OpInstanceReplaceDisks.OP_ID])
13190 ht.TListOf(ht.TAnd(ht.TIsLength(3),
13191 ht.TItems([ht.TNonEmptyString,
13192 ht.TNonEmptyString,
13193 ht.TListOf(ht.TNonEmptyString),
13196 ht.TListOf(ht.TAnd(ht.TIsLength(2),
13197 ht.TItems([ht.TNonEmptyString,
13200 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13201 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13204 constants.IALLOCATOR_MODE_ALLOC:
13207 ("name", ht.TString),
13208 ("memory", ht.TInt),
13209 ("disks", ht.TListOf(ht.TDict)),
13210 ("disk_template", ht.TString),
13211 ("os", ht.TString),
13212 ("tags", _STRING_LIST),
13213 ("nics", ht.TListOf(ht.TDict)),
13214 ("vcpus", ht.TInt),
13215 ("hypervisor", ht.TString),
13217 constants.IALLOCATOR_MODE_RELOC:
13218 (_AddRelocateInstance,
13219 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13221 constants.IALLOCATOR_MODE_NODE_EVAC:
13222 (_AddNodeEvacuate, [
13223 ("instances", _STRING_LIST),
13224 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13226 constants.IALLOCATOR_MODE_CHG_GROUP:
13227 (_AddChangeGroup, [
13228 ("instances", _STRING_LIST),
13229 ("target_groups", _STRING_LIST),
13233 def Run(self, name, validate=True, call_fn=None):
13234 """Run an instance allocator and return the results.
13237 if call_fn is None:
13238 call_fn = self.rpc.call_iallocator_runner
13240 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13241 result.Raise("Failure while running the iallocator script")
13243 self.out_text = result.payload
13245 self._ValidateResult()
13247 def _ValidateResult(self):
13248 """Process the allocator results.
13250 This will process and if successful save the result in
13251 self.out_data and the other parameters.
13255 rdict = serializer.Load(self.out_text)
13256 except Exception, err:
13257 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13259 if not isinstance(rdict, dict):
13260 raise errors.OpExecError("Can't parse iallocator results: not a dict")
13262 # TODO: remove backwards compatiblity in later versions
13263 if "nodes" in rdict and "result" not in rdict:
13264 rdict["result"] = rdict["nodes"]
13267 for key in "success", "info", "result":
13268 if key not in rdict:
13269 raise errors.OpExecError("Can't parse iallocator results:"
13270 " missing key '%s'" % key)
13271 setattr(self, key, rdict[key])
13273 if not self._result_check(self.result):
13274 raise errors.OpExecError("Iallocator returned invalid result,"
13275 " expected %s, got %s" %
13276 (self._result_check, self.result),
13277 errors.ECODE_INVAL)
13279 if self.mode == constants.IALLOCATOR_MODE_RELOC:
13280 assert self.relocate_from is not None
13281 assert self.required_nodes == 1
13283 node2group = dict((name, ndata["group"])
13284 for (name, ndata) in self.in_data["nodes"].items())
13286 fn = compat.partial(self._NodesToGroups, node2group,
13287 self.in_data["nodegroups"])
13289 instance = self.cfg.GetInstanceInfo(self.name)
13290 request_groups = fn(self.relocate_from + [instance.primary_node])
13291 result_groups = fn(rdict["result"] + [instance.primary_node])
13293 if self.success and not set(result_groups).issubset(request_groups):
13294 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13295 " differ from original groups (%s)" %
13296 (utils.CommaJoin(result_groups),
13297 utils.CommaJoin(request_groups)))
13299 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13300 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13302 self.out_data = rdict
13305 def _NodesToGroups(node2group, groups, nodes):
13306 """Returns a list of unique group names for a list of nodes.
13308 @type node2group: dict
13309 @param node2group: Map from node name to group UUID
13311 @param groups: Group information
13313 @param nodes: Node names
13320 group_uuid = node2group[node]
13322 # Ignore unknown node
13326 group = groups[group_uuid]
13328 # Can't find group, let's use UUID
13329 group_name = group_uuid
13331 group_name = group["name"]
13333 result.add(group_name)
13335 return sorted(result)
13338 class LUTestAllocator(NoHooksLU):
13339 """Run allocator tests.
13341 This LU runs the allocator tests
13344 def CheckPrereq(self):
13345 """Check prerequisites.
13347 This checks the opcode parameters depending on the director and mode test.
13350 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13351 for attr in ["memory", "disks", "disk_template",
13352 "os", "tags", "nics", "vcpus"]:
13353 if not hasattr(self.op, attr):
13354 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13355 attr, errors.ECODE_INVAL)
13356 iname = self.cfg.ExpandInstanceName(self.op.name)
13357 if iname is not None:
13358 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13359 iname, errors.ECODE_EXISTS)
13360 if not isinstance(self.op.nics, list):
13361 raise errors.OpPrereqError("Invalid parameter 'nics'",
13362 errors.ECODE_INVAL)
13363 if not isinstance(self.op.disks, list):
13364 raise errors.OpPrereqError("Invalid parameter 'disks'",
13365 errors.ECODE_INVAL)
13366 for row in self.op.disks:
13367 if (not isinstance(row, dict) or
13368 constants.IDISK_SIZE not in row or
13369 not isinstance(row[constants.IDISK_SIZE], int) or
13370 constants.IDISK_MODE not in row or
13371 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13372 raise errors.OpPrereqError("Invalid contents of the 'disks'"
13373 " parameter", errors.ECODE_INVAL)
13374 if self.op.hypervisor is None:
13375 self.op.hypervisor = self.cfg.GetHypervisorType()
13376 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13377 fname = _ExpandInstanceName(self.cfg, self.op.name)
13378 self.op.name = fname
13379 self.relocate_from = \
13380 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13381 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13382 constants.IALLOCATOR_MODE_NODE_EVAC):
13383 if not self.op.instances:
13384 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13385 self.op.instances = _GetWantedInstances(self, self.op.instances)
13387 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13388 self.op.mode, errors.ECODE_INVAL)
13390 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13391 if self.op.allocator is None:
13392 raise errors.OpPrereqError("Missing allocator name",
13393 errors.ECODE_INVAL)
13394 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13395 raise errors.OpPrereqError("Wrong allocator test '%s'" %
13396 self.op.direction, errors.ECODE_INVAL)
13398 def Exec(self, feedback_fn):
13399 """Run the allocator test.
13402 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13403 ial = IAllocator(self.cfg, self.rpc,
13406 memory=self.op.memory,
13407 disks=self.op.disks,
13408 disk_template=self.op.disk_template,
13412 vcpus=self.op.vcpus,
13413 hypervisor=self.op.hypervisor,
13415 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13416 ial = IAllocator(self.cfg, self.rpc,
13419 relocate_from=list(self.relocate_from),
13421 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13422 ial = IAllocator(self.cfg, self.rpc,
13424 instances=self.op.instances,
13425 target_groups=self.op.target_groups)
13426 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13427 ial = IAllocator(self.cfg, self.rpc,
13429 instances=self.op.instances,
13430 evac_mode=self.op.evac_mode)
13432 raise errors.ProgrammerError("Uncatched mode %s in"
13433 " LUTestAllocator.Exec", self.op.mode)
13435 if self.op.direction == constants.IALLOCATOR_DIR_IN:
13436 result = ial.in_text
13438 ial.Run(self.op.allocator, validate=False)
13439 result = ial.out_text
13443 #: Query type implementations
13445 constants.QR_INSTANCE: _InstanceQuery,
13446 constants.QR_NODE: _NodeQuery,
13447 constants.QR_GROUP: _GroupQuery,
13448 constants.QR_OS: _OsQuery,
13451 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13454 def _GetQueryImplementation(name):
13455 """Returns the implemtnation for a query type.
13457 @param name: Query type, must be one of L{constants.QR_VIA_OP}
13461 return _QUERY_IMPL[name]
13463 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13464 errors.ECODE_INVAL)