4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
63 import ganeti.masterd.instance # pylint: disable=W0611
67 """Data container for LU results with jobs.
69 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
70 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
71 contained in the C{jobs} attribute and include the job IDs in the opcode
75 def __init__(self, jobs, **kwargs):
76 """Initializes this class.
78 Additional return values can be specified as keyword arguments.
80 @type jobs: list of lists of L{opcode.OpCode}
81 @param jobs: A list of lists of opcode objects
88 class LogicalUnit(object):
89 """Logical Unit base class.
91 Subclasses must follow these rules:
92 - implement ExpandNames
93 - implement CheckPrereq (except when tasklets are used)
94 - implement Exec (except when tasklets are used)
95 - implement BuildHooksEnv
96 - implement BuildHooksNodes
97 - redefine HPATH and HTYPE
98 - optionally redefine their run requirements:
99 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
101 Note that all commands require root permissions.
103 @ivar dry_run_result: the value (if any) that will be returned to the caller
104 in dry-run mode (signalled by opcode dry_run parameter)
111 def __init__(self, processor, op, context, rpc):
112 """Constructor for LogicalUnit.
114 This needs to be overridden in derived classes in order to check op
118 self.proc = processor
120 self.cfg = context.cfg
121 self.glm = context.glm
123 self.owned_locks = context.glm.list_owned
124 self.context = context
126 # Dicts used to declare locking needs to mcpu
127 self.needed_locks = None
128 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
130 self.remove_locks = {}
131 # Used to force good behavior when calling helper functions
132 self.recalculate_locks = {}
134 self.Log = processor.Log # pylint: disable=C0103
135 self.LogWarning = processor.LogWarning # pylint: disable=C0103
136 self.LogInfo = processor.LogInfo # pylint: disable=C0103
137 self.LogStep = processor.LogStep # pylint: disable=C0103
138 # support for dry-run
139 self.dry_run_result = None
140 # support for generic debug attribute
141 if (not hasattr(self.op, "debug_level") or
142 not isinstance(self.op.debug_level, int)):
143 self.op.debug_level = 0
148 # Validate opcode parameters and set defaults
149 self.op.Validate(True)
151 self.CheckArguments()
153 def CheckArguments(self):
154 """Check syntactic validity for the opcode arguments.
156 This method is for doing a simple syntactic check and ensure
157 validity of opcode parameters, without any cluster-related
158 checks. While the same can be accomplished in ExpandNames and/or
159 CheckPrereq, doing these separate is better because:
161 - ExpandNames is left as as purely a lock-related function
162 - CheckPrereq is run after we have acquired locks (and possible
165 The function is allowed to change the self.op attribute so that
166 later methods can no longer worry about missing parameters.
171 def ExpandNames(self):
172 """Expand names for this LU.
174 This method is called before starting to execute the opcode, and it should
175 update all the parameters of the opcode to their canonical form (e.g. a
176 short node name must be fully expanded after this method has successfully
177 completed). This way locking, hooks, logging, etc. can work correctly.
179 LUs which implement this method must also populate the self.needed_locks
180 member, as a dict with lock levels as keys, and a list of needed lock names
183 - use an empty dict if you don't need any lock
184 - if you don't need any lock at a particular level omit that level
185 - don't put anything for the BGL level
186 - if you want all locks at a level use locking.ALL_SET as a value
188 If you need to share locks (rather than acquire them exclusively) at one
189 level you can modify self.share_locks, setting a true value (usually 1) for
190 that level. By default locks are not shared.
192 This function can also define a list of tasklets, which then will be
193 executed in order instead of the usual LU-level CheckPrereq and Exec
194 functions, if those are not defined by the LU.
198 # Acquire all nodes and one instance
199 self.needed_locks = {
200 locking.LEVEL_NODE: locking.ALL_SET,
201 locking.LEVEL_INSTANCE: ['instance1.example.com'],
203 # Acquire just two nodes
204 self.needed_locks = {
205 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
208 self.needed_locks = {} # No, you can't leave it to the default value None
211 # The implementation of this method is mandatory only if the new LU is
212 # concurrent, so that old LUs don't need to be changed all at the same
215 self.needed_locks = {} # Exclusive LUs don't need locks.
217 raise NotImplementedError
219 def DeclareLocks(self, level):
220 """Declare LU locking needs for a level
222 While most LUs can just declare their locking needs at ExpandNames time,
223 sometimes there's the need to calculate some locks after having acquired
224 the ones before. This function is called just before acquiring locks at a
225 particular level, but after acquiring the ones at lower levels, and permits
226 such calculations. It can be used to modify self.needed_locks, and by
227 default it does nothing.
229 This function is only called if you have something already set in
230 self.needed_locks for the level.
232 @param level: Locking level which is going to be locked
233 @type level: member of ganeti.locking.LEVELS
237 def CheckPrereq(self):
238 """Check prerequisites for this LU.
240 This method should check that the prerequisites for the execution
241 of this LU are fulfilled. It can do internode communication, but
242 it should be idempotent - no cluster or system changes are
245 The method should raise errors.OpPrereqError in case something is
246 not fulfilled. Its return value is ignored.
248 This method should also update all the parameters of the opcode to
249 their canonical form if it hasn't been done by ExpandNames before.
252 if self.tasklets is not None:
253 for (idx, tl) in enumerate(self.tasklets):
254 logging.debug("Checking prerequisites for tasklet %s/%s",
255 idx + 1, len(self.tasklets))
260 def Exec(self, feedback_fn):
263 This method should implement the actual work. It should raise
264 errors.OpExecError for failures that are somewhat dealt with in
268 if self.tasklets is not None:
269 for (idx, tl) in enumerate(self.tasklets):
270 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
273 raise NotImplementedError
275 def BuildHooksEnv(self):
276 """Build hooks environment for this LU.
279 @return: Dictionary containing the environment that will be used for
280 running the hooks for this LU. The keys of the dict must not be prefixed
281 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
282 will extend the environment with additional variables. If no environment
283 should be defined, an empty dictionary should be returned (not C{None}).
284 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
288 raise NotImplementedError
290 def BuildHooksNodes(self):
291 """Build list of nodes to run LU's hooks.
293 @rtype: tuple; (list, list)
294 @return: Tuple containing a list of node names on which the hook
295 should run before the execution and a list of node names on which the
296 hook should run after the execution. No nodes should be returned as an
297 empty list (and not None).
298 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
302 raise NotImplementedError
304 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
305 """Notify the LU about the results of its hooks.
307 This method is called every time a hooks phase is executed, and notifies
308 the Logical Unit about the hooks' result. The LU can then use it to alter
309 its result based on the hooks. By default the method does nothing and the
310 previous result is passed back unchanged but any LU can define it if it
311 wants to use the local cluster hook-scripts somehow.
313 @param phase: one of L{constants.HOOKS_PHASE_POST} or
314 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
315 @param hook_results: the results of the multi-node hooks rpc call
316 @param feedback_fn: function used send feedback back to the caller
317 @param lu_result: the previous Exec result this LU had, or None
319 @return: the new Exec result, based on the previous result
323 # API must be kept, thus we ignore the unused argument and could
324 # be a function warnings
325 # pylint: disable=W0613,R0201
328 def _ExpandAndLockInstance(self):
329 """Helper function to expand and lock an instance.
331 Many LUs that work on an instance take its name in self.op.instance_name
332 and need to expand it and then declare the expanded name for locking. This
333 function does it, and then updates self.op.instance_name to the expanded
334 name. It also initializes needed_locks as a dict, if this hasn't been done
338 if self.needed_locks is None:
339 self.needed_locks = {}
341 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
342 "_ExpandAndLockInstance called with instance-level locks set"
343 self.op.instance_name = _ExpandInstanceName(self.cfg,
344 self.op.instance_name)
345 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
347 def _LockInstancesNodes(self, primary_only=False):
348 """Helper function to declare instances' nodes for locking.
350 This function should be called after locking one or more instances to lock
351 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
352 with all primary or secondary nodes for instances already locked and
353 present in self.needed_locks[locking.LEVEL_INSTANCE].
355 It should be called from DeclareLocks, and for safety only works if
356 self.recalculate_locks[locking.LEVEL_NODE] is set.
358 In the future it may grow parameters to just lock some instance's nodes, or
359 to just lock primaries or secondary nodes, if needed.
361 If should be called in DeclareLocks in a way similar to::
363 if level == locking.LEVEL_NODE:
364 self._LockInstancesNodes()
366 @type primary_only: boolean
367 @param primary_only: only lock primary nodes of locked instances
370 assert locking.LEVEL_NODE in self.recalculate_locks, \
371 "_LockInstancesNodes helper function called with no nodes to recalculate"
373 # TODO: check if we're really been called with the instance locks held
375 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
376 # future we might want to have different behaviors depending on the value
377 # of self.recalculate_locks[locking.LEVEL_NODE]
379 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
380 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
381 wanted_nodes.append(instance.primary_node)
383 wanted_nodes.extend(instance.secondary_nodes)
385 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
386 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
387 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
388 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
390 del self.recalculate_locks[locking.LEVEL_NODE]
393 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
394 """Simple LU which runs no hooks.
396 This LU is intended as a parent for other LogicalUnits which will
397 run no hooks, in order to reduce duplicate code.
403 def BuildHooksEnv(self):
404 """Empty BuildHooksEnv for NoHooksLu.
406 This just raises an error.
409 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
411 def BuildHooksNodes(self):
412 """Empty BuildHooksNodes for NoHooksLU.
415 raise AssertionError("BuildHooksNodes called for NoHooksLU")
419 """Tasklet base class.
421 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
422 they can mix legacy code with tasklets. Locking needs to be done in the LU,
423 tasklets know nothing about locks.
425 Subclasses must follow these rules:
426 - Implement CheckPrereq
430 def __init__(self, lu):
437 def CheckPrereq(self):
438 """Check prerequisites for this tasklets.
440 This method should check whether the prerequisites for the execution of
441 this tasklet are fulfilled. It can do internode communication, but it
442 should be idempotent - no cluster or system changes are allowed.
444 The method should raise errors.OpPrereqError in case something is not
445 fulfilled. Its return value is ignored.
447 This method should also update all parameters to their canonical form if it
448 hasn't been done before.
453 def Exec(self, feedback_fn):
454 """Execute the tasklet.
456 This method should implement the actual work. It should raise
457 errors.OpExecError for failures that are somewhat dealt with in code, or
461 raise NotImplementedError
465 """Base for query utility classes.
468 #: Attribute holding field definitions
471 def __init__(self, filter_, fields, use_locking):
472 """Initializes this class.
475 self.use_locking = use_locking
477 self.query = query.Query(self.FIELDS, fields, filter_=filter_,
479 self.requested_data = self.query.RequestedData()
480 self.names = self.query.RequestedNames()
482 # Sort only if no names were requested
483 self.sort_by_name = not self.names
485 self.do_locking = None
488 def _GetNames(self, lu, all_names, lock_level):
489 """Helper function to determine names asked for in the query.
493 names = lu.owned_locks(lock_level)
497 if self.wanted == locking.ALL_SET:
498 assert not self.names
499 # caller didn't specify names, so ordering is not important
500 return utils.NiceSort(names)
502 # caller specified names and we must keep the same order
504 assert not self.do_locking or lu.glm.is_owned(lock_level)
506 missing = set(self.wanted).difference(names)
508 raise errors.OpExecError("Some items were removed before retrieving"
509 " their data: %s" % missing)
511 # Return expanded names
514 def ExpandNames(self, lu):
515 """Expand names for this query.
517 See L{LogicalUnit.ExpandNames}.
520 raise NotImplementedError()
522 def DeclareLocks(self, lu, level):
523 """Declare locks for this query.
525 See L{LogicalUnit.DeclareLocks}.
528 raise NotImplementedError()
530 def _GetQueryData(self, lu):
531 """Collects all data for this query.
533 @return: Query data object
536 raise NotImplementedError()
538 def NewStyleQuery(self, lu):
539 """Collect data and execute query.
542 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
543 sort_by_name=self.sort_by_name)
545 def OldStyleQuery(self, lu):
546 """Collect data and execute query.
549 return self.query.OldStyleQuery(self._GetQueryData(lu),
550 sort_by_name=self.sort_by_name)
554 """Returns a dict declaring all lock levels shared.
557 return dict.fromkeys(locking.LEVELS, 1)
560 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
561 """Checks if the owned node groups are still correct for an instance.
563 @type cfg: L{config.ConfigWriter}
564 @param cfg: The cluster configuration
565 @type instance_name: string
566 @param instance_name: Instance name
567 @type owned_groups: set or frozenset
568 @param owned_groups: List of currently owned node groups
571 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
573 if not owned_groups.issuperset(inst_groups):
574 raise errors.OpPrereqError("Instance %s's node groups changed since"
575 " locks were acquired, current groups are"
576 " are '%s', owning groups '%s'; retry the"
579 utils.CommaJoin(inst_groups),
580 utils.CommaJoin(owned_groups)),
586 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
587 """Checks if the instances in a node group are still correct.
589 @type cfg: L{config.ConfigWriter}
590 @param cfg: The cluster configuration
591 @type group_uuid: string
592 @param group_uuid: Node group UUID
593 @type owned_instances: set or frozenset
594 @param owned_instances: List of currently owned instances
597 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
598 if owned_instances != wanted_instances:
599 raise errors.OpPrereqError("Instances in node group '%s' changed since"
600 " locks were acquired, wanted '%s', have '%s';"
601 " retry the operation" %
603 utils.CommaJoin(wanted_instances),
604 utils.CommaJoin(owned_instances)),
607 return wanted_instances
610 def _SupportsOob(cfg, node):
611 """Tells if node supports OOB.
613 @type cfg: L{config.ConfigWriter}
614 @param cfg: The cluster configuration
615 @type node: L{objects.Node}
616 @param node: The node
617 @return: The OOB script if supported or an empty string otherwise
620 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
623 def _GetWantedNodes(lu, nodes):
624 """Returns list of checked and expanded node names.
626 @type lu: L{LogicalUnit}
627 @param lu: the logical unit on whose behalf we execute
629 @param nodes: list of node names or None for all nodes
631 @return: the list of nodes, sorted
632 @raise errors.ProgrammerError: if the nodes parameter is wrong type
636 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
638 return utils.NiceSort(lu.cfg.GetNodeList())
641 def _GetWantedInstances(lu, instances):
642 """Returns list of checked and expanded instance names.
644 @type lu: L{LogicalUnit}
645 @param lu: the logical unit on whose behalf we execute
646 @type instances: list
647 @param instances: list of instance names or None for all instances
649 @return: the list of instances, sorted
650 @raise errors.OpPrereqError: if the instances parameter is wrong type
651 @raise errors.OpPrereqError: if any of the passed instances is not found
655 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
657 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
661 def _GetUpdatedParams(old_params, update_dict,
662 use_default=True, use_none=False):
663 """Return the new version of a parameter dictionary.
665 @type old_params: dict
666 @param old_params: old parameters
667 @type update_dict: dict
668 @param update_dict: dict containing new parameter values, or
669 constants.VALUE_DEFAULT to reset the parameter to its default
671 @param use_default: boolean
672 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
673 values as 'to be deleted' values
674 @param use_none: boolean
675 @type use_none: whether to recognise C{None} values as 'to be
678 @return: the new parameter dictionary
681 params_copy = copy.deepcopy(old_params)
682 for key, val in update_dict.iteritems():
683 if ((use_default and val == constants.VALUE_DEFAULT) or
684 (use_none and val is None)):
690 params_copy[key] = val
694 def _ReleaseLocks(lu, level, names=None, keep=None):
695 """Releases locks owned by an LU.
697 @type lu: L{LogicalUnit}
698 @param level: Lock level
699 @type names: list or None
700 @param names: Names of locks to release
701 @type keep: list or None
702 @param keep: Names of locks to retain
705 assert not (keep is not None and names is not None), \
706 "Only one of the 'names' and the 'keep' parameters can be given"
708 if names is not None:
709 should_release = names.__contains__
711 should_release = lambda name: name not in keep
713 should_release = None
719 # Determine which locks to release
720 for name in lu.owned_locks(level):
721 if should_release(name):
726 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
728 # Release just some locks
729 lu.glm.release(level, names=release)
731 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
734 lu.glm.release(level)
736 assert not lu.glm.is_owned(level), "No locks should be owned"
739 def _MapInstanceDisksToNodes(instances):
740 """Creates a map from (node, volume) to instance name.
742 @type instances: list of L{objects.Instance}
743 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
746 return dict(((node, vol), inst.name)
747 for inst in instances
748 for (node, vols) in inst.MapLVsByNode().items()
752 def _RunPostHook(lu, node_name):
753 """Runs the post-hook for an opcode on a single node.
756 hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
758 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
760 # pylint: disable=W0702
761 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
764 def _CheckOutputFields(static, dynamic, selected):
765 """Checks whether all selected fields are valid.
767 @type static: L{utils.FieldSet}
768 @param static: static fields set
769 @type dynamic: L{utils.FieldSet}
770 @param dynamic: dynamic fields set
777 delta = f.NonMatching(selected)
779 raise errors.OpPrereqError("Unknown output fields selected: %s"
780 % ",".join(delta), errors.ECODE_INVAL)
783 def _CheckGlobalHvParams(params):
784 """Validates that given hypervisor params are not global ones.
786 This will ensure that instances don't get customised versions of
790 used_globals = constants.HVC_GLOBALS.intersection(params)
792 msg = ("The following hypervisor parameters are global and cannot"
793 " be customized at instance level, please modify them at"
794 " cluster level: %s" % utils.CommaJoin(used_globals))
795 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
798 def _CheckNodeOnline(lu, node, msg=None):
799 """Ensure that a given node is online.
801 @param lu: the LU on behalf of which we make the check
802 @param node: the node to check
803 @param msg: if passed, should be a message to replace the default one
804 @raise errors.OpPrereqError: if the node is offline
808 msg = "Can't use offline node"
809 if lu.cfg.GetNodeInfo(node).offline:
810 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
813 def _CheckNodeNotDrained(lu, node):
814 """Ensure that a given node is not drained.
816 @param lu: the LU on behalf of which we make the check
817 @param node: the node to check
818 @raise errors.OpPrereqError: if the node is drained
821 if lu.cfg.GetNodeInfo(node).drained:
822 raise errors.OpPrereqError("Can't use drained node %s" % node,
826 def _CheckNodeVmCapable(lu, node):
827 """Ensure that a given node is vm capable.
829 @param lu: the LU on behalf of which we make the check
830 @param node: the node to check
831 @raise errors.OpPrereqError: if the node is not vm capable
834 if not lu.cfg.GetNodeInfo(node).vm_capable:
835 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
839 def _CheckNodeHasOS(lu, node, os_name, force_variant):
840 """Ensure that a node supports a given OS.
842 @param lu: the LU on behalf of which we make the check
843 @param node: the node to check
844 @param os_name: the OS to query about
845 @param force_variant: whether to ignore variant errors
846 @raise errors.OpPrereqError: if the node is not supporting the OS
849 result = lu.rpc.call_os_get(node, os_name)
850 result.Raise("OS '%s' not in supported OS list for node %s" %
852 prereq=True, ecode=errors.ECODE_INVAL)
853 if not force_variant:
854 _CheckOSVariant(result.payload, os_name)
857 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
858 """Ensure that a node has the given secondary ip.
860 @type lu: L{LogicalUnit}
861 @param lu: the LU on behalf of which we make the check
863 @param node: the node to check
864 @type secondary_ip: string
865 @param secondary_ip: the ip to check
866 @type prereq: boolean
867 @param prereq: whether to throw a prerequisite or an execute error
868 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
869 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
872 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
873 result.Raise("Failure checking secondary ip on node %s" % node,
874 prereq=prereq, ecode=errors.ECODE_ENVIRON)
875 if not result.payload:
876 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
877 " please fix and re-run this command" % secondary_ip)
879 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
881 raise errors.OpExecError(msg)
884 def _GetClusterDomainSecret():
885 """Reads the cluster domain secret.
888 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
892 def _CheckInstanceDown(lu, instance, reason):
893 """Ensure that an instance is not running."""
894 if instance.admin_up:
895 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
896 (instance.name, reason), errors.ECODE_STATE)
898 pnode = instance.primary_node
899 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
900 ins_l.Raise("Can't contact node %s for instance information" % pnode,
901 prereq=True, ecode=errors.ECODE_ENVIRON)
903 if instance.name in ins_l.payload:
904 raise errors.OpPrereqError("Instance %s is running, %s" %
905 (instance.name, reason), errors.ECODE_STATE)
908 def _ExpandItemName(fn, name, kind):
909 """Expand an item name.
911 @param fn: the function to use for expansion
912 @param name: requested item name
913 @param kind: text description ('Node' or 'Instance')
914 @return: the resolved (full) name
915 @raise errors.OpPrereqError: if the item is not found
919 if full_name is None:
920 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
925 def _ExpandNodeName(cfg, name):
926 """Wrapper over L{_ExpandItemName} for nodes."""
927 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
930 def _ExpandInstanceName(cfg, name):
931 """Wrapper over L{_ExpandItemName} for instance."""
932 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
935 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
936 memory, vcpus, nics, disk_template, disks,
937 bep, hvp, hypervisor_name, tags):
938 """Builds instance related env variables for hooks
940 This builds the hook environment from individual variables.
943 @param name: the name of the instance
944 @type primary_node: string
945 @param primary_node: the name of the instance's primary node
946 @type secondary_nodes: list
947 @param secondary_nodes: list of secondary nodes as strings
948 @type os_type: string
949 @param os_type: the name of the instance's OS
950 @type status: boolean
951 @param status: the should_run status of the instance
953 @param memory: the memory size of the instance
955 @param vcpus: the count of VCPUs the instance has
957 @param nics: list of tuples (ip, mac, mode, link) representing
958 the NICs the instance has
959 @type disk_template: string
960 @param disk_template: the disk template of the instance
962 @param disks: the list of (size, mode) pairs
964 @param bep: the backend parameters for the instance
966 @param hvp: the hypervisor parameters for the instance
967 @type hypervisor_name: string
968 @param hypervisor_name: the hypervisor for the instance
970 @param tags: list of instance tags as strings
972 @return: the hook environment for this instance
981 "INSTANCE_NAME": name,
982 "INSTANCE_PRIMARY": primary_node,
983 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
984 "INSTANCE_OS_TYPE": os_type,
985 "INSTANCE_STATUS": str_status,
986 "INSTANCE_MEMORY": memory,
987 "INSTANCE_VCPUS": vcpus,
988 "INSTANCE_DISK_TEMPLATE": disk_template,
989 "INSTANCE_HYPERVISOR": hypervisor_name,
993 nic_count = len(nics)
994 for idx, (ip, mac, mode, link) in enumerate(nics):
997 env["INSTANCE_NIC%d_IP" % idx] = ip
998 env["INSTANCE_NIC%d_MAC" % idx] = mac
999 env["INSTANCE_NIC%d_MODE" % idx] = mode
1000 env["INSTANCE_NIC%d_LINK" % idx] = link
1001 if mode == constants.NIC_MODE_BRIDGED:
1002 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1006 env["INSTANCE_NIC_COUNT"] = nic_count
1009 disk_count = len(disks)
1010 for idx, (size, mode) in enumerate(disks):
1011 env["INSTANCE_DISK%d_SIZE" % idx] = size
1012 env["INSTANCE_DISK%d_MODE" % idx] = mode
1016 env["INSTANCE_DISK_COUNT"] = disk_count
1021 env["INSTANCE_TAGS"] = " ".join(tags)
1023 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1024 for key, value in source.items():
1025 env["INSTANCE_%s_%s" % (kind, key)] = value
1030 def _NICListToTuple(lu, nics):
1031 """Build a list of nic information tuples.
1033 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1034 value in LUInstanceQueryData.
1036 @type lu: L{LogicalUnit}
1037 @param lu: the logical unit on whose behalf we execute
1038 @type nics: list of L{objects.NIC}
1039 @param nics: list of nics to convert to hooks tuples
1043 cluster = lu.cfg.GetClusterInfo()
1047 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1048 mode = filled_params[constants.NIC_MODE]
1049 link = filled_params[constants.NIC_LINK]
1050 hooks_nics.append((ip, mac, mode, link))
1054 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1055 """Builds instance related env variables for hooks from an object.
1057 @type lu: L{LogicalUnit}
1058 @param lu: the logical unit on whose behalf we execute
1059 @type instance: L{objects.Instance}
1060 @param instance: the instance for which we should build the
1062 @type override: dict
1063 @param override: dictionary with key/values that will override
1066 @return: the hook environment dictionary
1069 cluster = lu.cfg.GetClusterInfo()
1070 bep = cluster.FillBE(instance)
1071 hvp = cluster.FillHV(instance)
1073 "name": instance.name,
1074 "primary_node": instance.primary_node,
1075 "secondary_nodes": instance.secondary_nodes,
1076 "os_type": instance.os,
1077 "status": instance.admin_up,
1078 "memory": bep[constants.BE_MEMORY],
1079 "vcpus": bep[constants.BE_VCPUS],
1080 "nics": _NICListToTuple(lu, instance.nics),
1081 "disk_template": instance.disk_template,
1082 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1085 "hypervisor_name": instance.hypervisor,
1086 "tags": instance.tags,
1089 args.update(override)
1090 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1093 def _AdjustCandidatePool(lu, exceptions):
1094 """Adjust the candidate pool after node operations.
1097 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1099 lu.LogInfo("Promoted nodes to master candidate role: %s",
1100 utils.CommaJoin(node.name for node in mod_list))
1101 for name in mod_list:
1102 lu.context.ReaddNode(name)
1103 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1105 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1109 def _DecideSelfPromotion(lu, exceptions=None):
1110 """Decide whether I should promote myself as a master candidate.
1113 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1114 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1115 # the new node will increase mc_max with one, so:
1116 mc_should = min(mc_should + 1, cp_size)
1117 return mc_now < mc_should
1120 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1121 """Check that the brigdes needed by a list of nics exist.
1124 cluster = lu.cfg.GetClusterInfo()
1125 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1126 brlist = [params[constants.NIC_LINK] for params in paramslist
1127 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1129 result = lu.rpc.call_bridges_exist(target_node, brlist)
1130 result.Raise("Error checking bridges on destination node '%s'" %
1131 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1134 def _CheckInstanceBridgesExist(lu, instance, node=None):
1135 """Check that the brigdes needed by an instance exist.
1139 node = instance.primary_node
1140 _CheckNicsBridgesExist(lu, instance.nics, node)
1143 def _CheckOSVariant(os_obj, name):
1144 """Check whether an OS name conforms to the os variants specification.
1146 @type os_obj: L{objects.OS}
1147 @param os_obj: OS object to check
1149 @param name: OS name passed by the user, to check for validity
1152 variant = objects.OS.GetVariant(name)
1153 if not os_obj.supported_variants:
1155 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1156 " passed)" % (os_obj.name, variant),
1160 raise errors.OpPrereqError("OS name must include a variant",
1163 if variant not in os_obj.supported_variants:
1164 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1167 def _GetNodeInstancesInner(cfg, fn):
1168 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1171 def _GetNodeInstances(cfg, node_name):
1172 """Returns a list of all primary and secondary instances on a node.
1176 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1179 def _GetNodePrimaryInstances(cfg, node_name):
1180 """Returns primary instances on a node.
1183 return _GetNodeInstancesInner(cfg,
1184 lambda inst: node_name == inst.primary_node)
1187 def _GetNodeSecondaryInstances(cfg, node_name):
1188 """Returns secondary instances on a node.
1191 return _GetNodeInstancesInner(cfg,
1192 lambda inst: node_name in inst.secondary_nodes)
1195 def _GetStorageTypeArgs(cfg, storage_type):
1196 """Returns the arguments for a storage type.
1199 # Special case for file storage
1200 if storage_type == constants.ST_FILE:
1201 # storage.FileStorage wants a list of storage directories
1202 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1207 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1210 for dev in instance.disks:
1211 cfg.SetDiskID(dev, node_name)
1213 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1214 result.Raise("Failed to get disk status from node %s" % node_name,
1215 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1217 for idx, bdev_status in enumerate(result.payload):
1218 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1224 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1225 """Check the sanity of iallocator and node arguments and use the
1226 cluster-wide iallocator if appropriate.
1228 Check that at most one of (iallocator, node) is specified. If none is
1229 specified, then the LU's opcode's iallocator slot is filled with the
1230 cluster-wide default iallocator.
1232 @type iallocator_slot: string
1233 @param iallocator_slot: the name of the opcode iallocator slot
1234 @type node_slot: string
1235 @param node_slot: the name of the opcode target node slot
1238 node = getattr(lu.op, node_slot, None)
1239 iallocator = getattr(lu.op, iallocator_slot, None)
1241 if node is not None and iallocator is not None:
1242 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1244 elif node is None and iallocator is None:
1245 default_iallocator = lu.cfg.GetDefaultIAllocator()
1246 if default_iallocator:
1247 setattr(lu.op, iallocator_slot, default_iallocator)
1249 raise errors.OpPrereqError("No iallocator or node given and no"
1250 " cluster-wide default iallocator found;"
1251 " please specify either an iallocator or a"
1252 " node, or set a cluster-wide default"
1256 def _GetDefaultIAllocator(cfg, iallocator):
1257 """Decides on which iallocator to use.
1259 @type cfg: L{config.ConfigWriter}
1260 @param cfg: Cluster configuration object
1261 @type iallocator: string or None
1262 @param iallocator: Iallocator specified in opcode
1264 @return: Iallocator name
1268 # Use default iallocator
1269 iallocator = cfg.GetDefaultIAllocator()
1272 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1273 " opcode nor as a cluster-wide default",
1279 class LUClusterPostInit(LogicalUnit):
1280 """Logical unit for running hooks after cluster initialization.
1283 HPATH = "cluster-init"
1284 HTYPE = constants.HTYPE_CLUSTER
1286 def BuildHooksEnv(self):
1291 "OP_TARGET": self.cfg.GetClusterName(),
1294 def BuildHooksNodes(self):
1295 """Build hooks nodes.
1298 return ([], [self.cfg.GetMasterNode()])
1300 def Exec(self, feedback_fn):
1307 class LUClusterDestroy(LogicalUnit):
1308 """Logical unit for destroying the cluster.
1311 HPATH = "cluster-destroy"
1312 HTYPE = constants.HTYPE_CLUSTER
1314 def BuildHooksEnv(self):
1319 "OP_TARGET": self.cfg.GetClusterName(),
1322 def BuildHooksNodes(self):
1323 """Build hooks nodes.
1328 def CheckPrereq(self):
1329 """Check prerequisites.
1331 This checks whether the cluster is empty.
1333 Any errors are signaled by raising errors.OpPrereqError.
1336 master = self.cfg.GetMasterNode()
1338 nodelist = self.cfg.GetNodeList()
1339 if len(nodelist) != 1 or nodelist[0] != master:
1340 raise errors.OpPrereqError("There are still %d node(s) in"
1341 " this cluster." % (len(nodelist) - 1),
1343 instancelist = self.cfg.GetInstanceList()
1345 raise errors.OpPrereqError("There are still %d instance(s) in"
1346 " this cluster." % len(instancelist),
1349 def Exec(self, feedback_fn):
1350 """Destroys the cluster.
1353 master = self.cfg.GetMasterNode()
1355 # Run post hooks on master node before it's removed
1356 _RunPostHook(self, master)
1358 result = self.rpc.call_node_stop_master(master, False)
1359 result.Raise("Could not disable the master role")
1364 def _VerifyCertificate(filename):
1365 """Verifies a certificate for L{LUClusterVerifyConfig}.
1367 @type filename: string
1368 @param filename: Path to PEM file
1372 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1373 utils.ReadFile(filename))
1374 except Exception, err: # pylint: disable=W0703
1375 return (LUClusterVerifyConfig.ETYPE_ERROR,
1376 "Failed to load X509 certificate %s: %s" % (filename, err))
1379 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1380 constants.SSL_CERT_EXPIRATION_ERROR)
1383 fnamemsg = "While verifying %s: %s" % (filename, msg)
1388 return (None, fnamemsg)
1389 elif errcode == utils.CERT_WARNING:
1390 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1391 elif errcode == utils.CERT_ERROR:
1392 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1394 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1397 def _GetAllHypervisorParameters(cluster, instances):
1398 """Compute the set of all hypervisor parameters.
1400 @type cluster: L{objects.Cluster}
1401 @param cluster: the cluster object
1402 @param instances: list of L{objects.Instance}
1403 @param instances: additional instances from which to obtain parameters
1404 @rtype: list of (origin, hypervisor, parameters)
1405 @return: a list with all parameters found, indicating the hypervisor they
1406 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1411 for hv_name in cluster.enabled_hypervisors:
1412 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1414 for os_name, os_hvp in cluster.os_hvp.items():
1415 for hv_name, hv_params in os_hvp.items():
1417 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1418 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1420 # TODO: collapse identical parameter values in a single one
1421 for instance in instances:
1422 if instance.hvparams:
1423 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1424 cluster.FillHV(instance)))
1429 class _VerifyErrors(object):
1430 """Mix-in for cluster/group verify LUs.
1432 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1433 self.op and self._feedback_fn to be available.)
1436 TCLUSTER = "cluster"
1438 TINSTANCE = "instance"
1440 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1441 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1442 ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1443 ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1444 ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1445 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1446 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1447 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1448 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1449 EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1450 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1451 EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1452 ENODEDRBD = (TNODE, "ENODEDRBD")
1453 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1454 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1455 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1456 ENODEHV = (TNODE, "ENODEHV")
1457 ENODELVM = (TNODE, "ENODELVM")
1458 ENODEN1 = (TNODE, "ENODEN1")
1459 ENODENET = (TNODE, "ENODENET")
1460 ENODEOS = (TNODE, "ENODEOS")
1461 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1462 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1463 ENODERPC = (TNODE, "ENODERPC")
1464 ENODESSH = (TNODE, "ENODESSH")
1465 ENODEVERSION = (TNODE, "ENODEVERSION")
1466 ENODESETUP = (TNODE, "ENODESETUP")
1467 ENODETIME = (TNODE, "ENODETIME")
1468 ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1470 ETYPE_FIELD = "code"
1471 ETYPE_ERROR = "ERROR"
1472 ETYPE_WARNING = "WARNING"
1474 def _Error(self, ecode, item, msg, *args, **kwargs):
1475 """Format an error message.
1477 Based on the opcode's error_codes parameter, either format a
1478 parseable error code, or a simpler error string.
1480 This must be called only from Exec and functions called from Exec.
1483 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1485 # first complete the msg
1488 # then format the whole message
1489 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1490 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1496 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1497 # and finally report it via the feedback_fn
1498 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1500 def _ErrorIf(self, cond, *args, **kwargs):
1501 """Log an error message if the passed condition is True.
1505 or self.op.debug_simulate_errors) # pylint: disable=E1101
1507 self._Error(*args, **kwargs)
1508 # do not mark the operation as failed for WARN cases only
1509 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1510 self.bad = self.bad or cond
1513 class LUClusterVerify(NoHooksLU):
1514 """Submits all jobs necessary to verify the cluster.
1519 def ExpandNames(self):
1520 self.needed_locks = {}
1522 def Exec(self, feedback_fn):
1525 if self.op.group_name:
1526 groups = [self.op.group_name]
1527 depends_fn = lambda: None
1529 groups = self.cfg.GetNodeGroupList()
1531 # Verify global configuration
1532 jobs.append([opcodes.OpClusterVerifyConfig()])
1534 # Always depend on global verification
1535 depends_fn = lambda: [(-len(jobs), [])]
1537 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1538 depends=depends_fn())]
1539 for group in groups)
1541 # Fix up all parameters
1542 for op in itertools.chain(*jobs): # pylint: disable=W0142
1543 op.debug_simulate_errors = self.op.debug_simulate_errors
1544 op.verbose = self.op.verbose
1545 op.error_codes = self.op.error_codes
1547 op.skip_checks = self.op.skip_checks
1548 except AttributeError:
1549 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1551 return ResultWithJobs(jobs)
1554 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1555 """Verifies the cluster config.
1560 def _VerifyHVP(self, hvp_data):
1561 """Verifies locally the syntax of the hypervisor parameters.
1564 for item, hv_name, hv_params in hvp_data:
1565 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1568 hv_class = hypervisor.GetHypervisor(hv_name)
1569 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1570 hv_class.CheckParameterSyntax(hv_params)
1571 except errors.GenericError, err:
1572 self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1574 def ExpandNames(self):
1575 # Information can be safely retrieved as the BGL is acquired in exclusive
1577 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1578 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1579 self.all_node_info = self.cfg.GetAllNodesInfo()
1580 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1581 self.needed_locks = {}
1583 def Exec(self, feedback_fn):
1584 """Verify integrity of cluster, performing various test on nodes.
1588 self._feedback_fn = feedback_fn
1590 feedback_fn("* Verifying cluster config")
1592 for msg in self.cfg.VerifyConfig():
1593 self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1595 feedback_fn("* Verifying cluster certificate files")
1597 for cert_filename in constants.ALL_CERT_FILES:
1598 (errcode, msg) = _VerifyCertificate(cert_filename)
1599 self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1601 feedback_fn("* Verifying hypervisor parameters")
1603 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1604 self.all_inst_info.values()))
1606 feedback_fn("* Verifying all nodes belong to an existing group")
1608 # We do this verification here because, should this bogus circumstance
1609 # occur, it would never be caught by VerifyGroup, which only acts on
1610 # nodes/instances reachable from existing node groups.
1612 dangling_nodes = set(node.name for node in self.all_node_info.values()
1613 if node.group not in self.all_group_info)
1615 dangling_instances = {}
1616 no_node_instances = []
1618 for inst in self.all_inst_info.values():
1619 if inst.primary_node in dangling_nodes:
1620 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1621 elif inst.primary_node not in self.all_node_info:
1622 no_node_instances.append(inst.name)
1627 utils.CommaJoin(dangling_instances.get(node.name,
1629 for node in dangling_nodes]
1631 self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1632 "the following nodes (and their instances) belong to a non"
1633 " existing group: %s", utils.CommaJoin(pretty_dangling))
1635 self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1636 "the following instances have a non-existing primary-node:"
1637 " %s", utils.CommaJoin(no_node_instances))
1642 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1643 """Verifies the status of a node group.
1646 HPATH = "cluster-verify"
1647 HTYPE = constants.HTYPE_CLUSTER
1650 _HOOKS_INDENT_RE = re.compile("^", re.M)
1652 class NodeImage(object):
1653 """A class representing the logical and physical status of a node.
1656 @ivar name: the node name to which this object refers
1657 @ivar volumes: a structure as returned from
1658 L{ganeti.backend.GetVolumeList} (runtime)
1659 @ivar instances: a list of running instances (runtime)
1660 @ivar pinst: list of configured primary instances (config)
1661 @ivar sinst: list of configured secondary instances (config)
1662 @ivar sbp: dictionary of {primary-node: list of instances} for all
1663 instances for which this node is secondary (config)
1664 @ivar mfree: free memory, as reported by hypervisor (runtime)
1665 @ivar dfree: free disk, as reported by the node (runtime)
1666 @ivar offline: the offline status (config)
1667 @type rpc_fail: boolean
1668 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1669 not whether the individual keys were correct) (runtime)
1670 @type lvm_fail: boolean
1671 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1672 @type hyp_fail: boolean
1673 @ivar hyp_fail: whether the RPC call didn't return the instance list
1674 @type ghost: boolean
1675 @ivar ghost: whether this is a known node or not (config)
1676 @type os_fail: boolean
1677 @ivar os_fail: whether the RPC call didn't return valid OS data
1679 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1680 @type vm_capable: boolean
1681 @ivar vm_capable: whether the node can host instances
1684 def __init__(self, offline=False, name=None, vm_capable=True):
1693 self.offline = offline
1694 self.vm_capable = vm_capable
1695 self.rpc_fail = False
1696 self.lvm_fail = False
1697 self.hyp_fail = False
1699 self.os_fail = False
1702 def ExpandNames(self):
1703 # This raises errors.OpPrereqError on its own:
1704 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1706 # Get instances in node group; this is unsafe and needs verification later
1707 inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1709 self.needed_locks = {
1710 locking.LEVEL_INSTANCE: inst_names,
1711 locking.LEVEL_NODEGROUP: [self.group_uuid],
1712 locking.LEVEL_NODE: [],
1715 self.share_locks = _ShareAll()
1717 def DeclareLocks(self, level):
1718 if level == locking.LEVEL_NODE:
1719 # Get members of node group; this is unsafe and needs verification later
1720 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1722 all_inst_info = self.cfg.GetAllInstancesInfo()
1724 # In Exec(), we warn about mirrored instances that have primary and
1725 # secondary living in separate node groups. To fully verify that
1726 # volumes for these instances are healthy, we will need to do an
1727 # extra call to their secondaries. We ensure here those nodes will
1729 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1730 # Important: access only the instances whose lock is owned
1731 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1732 nodes.update(all_inst_info[inst].secondary_nodes)
1734 self.needed_locks[locking.LEVEL_NODE] = nodes
1736 def CheckPrereq(self):
1737 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1738 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1740 group_nodes = set(self.group_info.members)
1741 group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1744 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1746 unlocked_instances = \
1747 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1750 raise errors.OpPrereqError("Missing lock for nodes: %s" %
1751 utils.CommaJoin(unlocked_nodes))
1753 if unlocked_instances:
1754 raise errors.OpPrereqError("Missing lock for instances: %s" %
1755 utils.CommaJoin(unlocked_instances))
1757 self.all_node_info = self.cfg.GetAllNodesInfo()
1758 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1760 self.my_node_names = utils.NiceSort(group_nodes)
1761 self.my_inst_names = utils.NiceSort(group_instances)
1763 self.my_node_info = dict((name, self.all_node_info[name])
1764 for name in self.my_node_names)
1766 self.my_inst_info = dict((name, self.all_inst_info[name])
1767 for name in self.my_inst_names)
1769 # We detect here the nodes that will need the extra RPC calls for verifying
1770 # split LV volumes; they should be locked.
1771 extra_lv_nodes = set()
1773 for inst in self.my_inst_info.values():
1774 if inst.disk_template in constants.DTS_INT_MIRROR:
1775 group = self.my_node_info[inst.primary_node].group
1776 for nname in inst.secondary_nodes:
1777 if self.all_node_info[nname].group != group:
1778 extra_lv_nodes.add(nname)
1780 unlocked_lv_nodes = \
1781 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1783 if unlocked_lv_nodes:
1784 raise errors.OpPrereqError("these nodes could be locked: %s" %
1785 utils.CommaJoin(unlocked_lv_nodes))
1786 self.extra_lv_nodes = list(extra_lv_nodes)
1788 def _VerifyNode(self, ninfo, nresult):
1789 """Perform some basic validation on data returned from a node.
1791 - check the result data structure is well formed and has all the
1793 - check ganeti version
1795 @type ninfo: L{objects.Node}
1796 @param ninfo: the node to check
1797 @param nresult: the results from the node
1799 @return: whether overall this call was successful (and we can expect
1800 reasonable values in the respose)
1804 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1806 # main result, nresult should be a non-empty dict
1807 test = not nresult or not isinstance(nresult, dict)
1808 _ErrorIf(test, self.ENODERPC, node,
1809 "unable to verify node: no data returned")
1813 # compares ganeti version
1814 local_version = constants.PROTOCOL_VERSION
1815 remote_version = nresult.get("version", None)
1816 test = not (remote_version and
1817 isinstance(remote_version, (list, tuple)) and
1818 len(remote_version) == 2)
1819 _ErrorIf(test, self.ENODERPC, node,
1820 "connection to node returned invalid data")
1824 test = local_version != remote_version[0]
1825 _ErrorIf(test, self.ENODEVERSION, node,
1826 "incompatible protocol versions: master %s,"
1827 " node %s", local_version, remote_version[0])
1831 # node seems compatible, we can actually try to look into its results
1833 # full package version
1834 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1835 self.ENODEVERSION, node,
1836 "software version mismatch: master %s, node %s",
1837 constants.RELEASE_VERSION, remote_version[1],
1838 code=self.ETYPE_WARNING)
1840 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1841 if ninfo.vm_capable and isinstance(hyp_result, dict):
1842 for hv_name, hv_result in hyp_result.iteritems():
1843 test = hv_result is not None
1844 _ErrorIf(test, self.ENODEHV, node,
1845 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1847 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1848 if ninfo.vm_capable and isinstance(hvp_result, list):
1849 for item, hv_name, hv_result in hvp_result:
1850 _ErrorIf(True, self.ENODEHV, node,
1851 "hypervisor %s parameter verify failure (source %s): %s",
1852 hv_name, item, hv_result)
1854 test = nresult.get(constants.NV_NODESETUP,
1855 ["Missing NODESETUP results"])
1856 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1861 def _VerifyNodeTime(self, ninfo, nresult,
1862 nvinfo_starttime, nvinfo_endtime):
1863 """Check the node time.
1865 @type ninfo: L{objects.Node}
1866 @param ninfo: the node to check
1867 @param nresult: the remote results for the node
1868 @param nvinfo_starttime: the start time of the RPC call
1869 @param nvinfo_endtime: the end time of the RPC call
1873 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1875 ntime = nresult.get(constants.NV_TIME, None)
1877 ntime_merged = utils.MergeTime(ntime)
1878 except (ValueError, TypeError):
1879 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1882 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1883 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1884 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1885 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1889 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1890 "Node time diverges by at least %s from master node time",
1893 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1894 """Check the node LVM results.
1896 @type ninfo: L{objects.Node}
1897 @param ninfo: the node to check
1898 @param nresult: the remote results for the node
1899 @param vg_name: the configured VG name
1906 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1908 # checks vg existence and size > 20G
1909 vglist = nresult.get(constants.NV_VGLIST, None)
1911 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1913 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1914 constants.MIN_VG_SIZE)
1915 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1918 pvlist = nresult.get(constants.NV_PVLIST, None)
1919 test = pvlist is None
1920 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1922 # check that ':' is not present in PV names, since it's a
1923 # special character for lvcreate (denotes the range of PEs to
1925 for _, pvname, owner_vg in pvlist:
1926 test = ":" in pvname
1927 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1928 " '%s' of VG '%s'", pvname, owner_vg)
1930 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1931 """Check the node bridges.
1933 @type ninfo: L{objects.Node}
1934 @param ninfo: the node to check
1935 @param nresult: the remote results for the node
1936 @param bridges: the expected list of bridges
1943 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1945 missing = nresult.get(constants.NV_BRIDGES, None)
1946 test = not isinstance(missing, list)
1947 _ErrorIf(test, self.ENODENET, node,
1948 "did not return valid bridge information")
1950 _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1951 utils.CommaJoin(sorted(missing)))
1953 def _VerifyNodeNetwork(self, ninfo, nresult):
1954 """Check the node network connectivity results.
1956 @type ninfo: L{objects.Node}
1957 @param ninfo: the node to check
1958 @param nresult: the remote results for the node
1962 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1964 test = constants.NV_NODELIST not in nresult
1965 _ErrorIf(test, self.ENODESSH, node,
1966 "node hasn't returned node ssh connectivity data")
1968 if nresult[constants.NV_NODELIST]:
1969 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1970 _ErrorIf(True, self.ENODESSH, node,
1971 "ssh communication with node '%s': %s", a_node, a_msg)
1973 test = constants.NV_NODENETTEST not in nresult
1974 _ErrorIf(test, self.ENODENET, node,
1975 "node hasn't returned node tcp connectivity data")
1977 if nresult[constants.NV_NODENETTEST]:
1978 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1980 _ErrorIf(True, self.ENODENET, node,
1981 "tcp communication with node '%s': %s",
1982 anode, nresult[constants.NV_NODENETTEST][anode])
1984 test = constants.NV_MASTERIP not in nresult
1985 _ErrorIf(test, self.ENODENET, node,
1986 "node hasn't returned node master IP reachability data")
1988 if not nresult[constants.NV_MASTERIP]:
1989 if node == self.master_node:
1990 msg = "the master node cannot reach the master IP (not configured?)"
1992 msg = "cannot reach the master IP"
1993 _ErrorIf(True, self.ENODENET, node, msg)
1995 def _VerifyInstance(self, instance, instanceconfig, node_image,
1997 """Verify an instance.
1999 This function checks to see if the required block devices are
2000 available on the instance's node.
2003 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2004 node_current = instanceconfig.primary_node
2006 node_vol_should = {}
2007 instanceconfig.MapLVsByNode(node_vol_should)
2009 for node in node_vol_should:
2010 n_img = node_image[node]
2011 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2012 # ignore missing volumes on offline or broken nodes
2014 for volume in node_vol_should[node]:
2015 test = volume not in n_img.volumes
2016 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
2017 "volume %s missing on node %s", volume, node)
2019 if instanceconfig.admin_up:
2020 pri_img = node_image[node_current]
2021 test = instance not in pri_img.instances and not pri_img.offline
2022 _ErrorIf(test, self.EINSTANCEDOWN, instance,
2023 "instance not running on its primary node %s",
2026 diskdata = [(nname, success, status, idx)
2027 for (nname, disks) in diskstatus.items()
2028 for idx, (success, status) in enumerate(disks)]
2030 for nname, success, bdev_status, idx in diskdata:
2031 # the 'ghost node' construction in Exec() ensures that we have a
2033 snode = node_image[nname]
2034 bad_snode = snode.ghost or snode.offline
2035 _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2036 self.EINSTANCEFAULTYDISK, instance,
2037 "couldn't retrieve status for disk/%s on %s: %s",
2038 idx, nname, bdev_status)
2039 _ErrorIf((instanceconfig.admin_up and success and
2040 bdev_status.ldisk_status == constants.LDS_FAULTY),
2041 self.EINSTANCEFAULTYDISK, instance,
2042 "disk/%s on %s is faulty", idx, nname)
2044 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2045 """Verify if there are any unknown volumes in the cluster.
2047 The .os, .swap and backup volumes are ignored. All other volumes are
2048 reported as unknown.
2050 @type reserved: L{ganeti.utils.FieldSet}
2051 @param reserved: a FieldSet of reserved volume names
2054 for node, n_img in node_image.items():
2055 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2056 # skip non-healthy nodes
2058 for volume in n_img.volumes:
2059 test = ((node not in node_vol_should or
2060 volume not in node_vol_should[node]) and
2061 not reserved.Matches(volume))
2062 self._ErrorIf(test, self.ENODEORPHANLV, node,
2063 "volume %s is unknown", volume)
2065 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2066 """Verify N+1 Memory Resilience.
2068 Check that if one single node dies we can still start all the
2069 instances it was primary for.
2072 cluster_info = self.cfg.GetClusterInfo()
2073 for node, n_img in node_image.items():
2074 # This code checks that every node which is now listed as
2075 # secondary has enough memory to host all instances it is
2076 # supposed to should a single other node in the cluster fail.
2077 # FIXME: not ready for failover to an arbitrary node
2078 # FIXME: does not support file-backed instances
2079 # WARNING: we currently take into account down instances as well
2080 # as up ones, considering that even if they're down someone
2081 # might want to start them even in the event of a node failure.
2083 # we're skipping offline nodes from the N+1 warning, since
2084 # most likely we don't have good memory infromation from them;
2085 # we already list instances living on such nodes, and that's
2088 for prinode, instances in n_img.sbp.items():
2090 for instance in instances:
2091 bep = cluster_info.FillBE(instance_cfg[instance])
2092 if bep[constants.BE_AUTO_BALANCE]:
2093 needed_mem += bep[constants.BE_MEMORY]
2094 test = n_img.mfree < needed_mem
2095 self._ErrorIf(test, self.ENODEN1, node,
2096 "not enough memory to accomodate instance failovers"
2097 " should node %s fail (%dMiB needed, %dMiB available)",
2098 prinode, needed_mem, n_img.mfree)
2101 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2102 (files_all, files_all_opt, files_mc, files_vm)):
2103 """Verifies file checksums collected from all nodes.
2105 @param errorif: Callback for reporting errors
2106 @param nodeinfo: List of L{objects.Node} objects
2107 @param master_node: Name of master node
2108 @param all_nvinfo: RPC results
2111 assert (len(files_all | files_all_opt | files_mc | files_vm) ==
2112 sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
2113 "Found file listed in more than one file list"
2115 # Define functions determining which nodes to consider for a file
2118 (files_all_opt, None),
2119 (files_mc, lambda node: (node.master_candidate or
2120 node.name == master_node)),
2121 (files_vm, lambda node: node.vm_capable),
2124 # Build mapping from filename to list of nodes which should have the file
2126 for (files, fn) in files2nodefn:
2128 filenodes = nodeinfo
2130 filenodes = filter(fn, nodeinfo)
2131 nodefiles.update((filename,
2132 frozenset(map(operator.attrgetter("name"), filenodes)))
2133 for filename in files)
2135 assert set(nodefiles) == (files_all | files_all_opt | files_mc | files_vm)
2137 fileinfo = dict((filename, {}) for filename in nodefiles)
2138 ignore_nodes = set()
2140 for node in nodeinfo:
2142 ignore_nodes.add(node.name)
2145 nresult = all_nvinfo[node.name]
2147 if nresult.fail_msg or not nresult.payload:
2150 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2152 test = not (node_files and isinstance(node_files, dict))
2153 errorif(test, cls.ENODEFILECHECK, node.name,
2154 "Node did not return file checksum data")
2156 ignore_nodes.add(node.name)
2159 # Build per-checksum mapping from filename to nodes having it
2160 for (filename, checksum) in node_files.items():
2161 assert filename in nodefiles
2162 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2164 for (filename, checksums) in fileinfo.items():
2165 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2167 # Nodes having the file
2168 with_file = frozenset(node_name
2169 for nodes in fileinfo[filename].values()
2170 for node_name in nodes) - ignore_nodes
2172 expected_nodes = nodefiles[filename] - ignore_nodes
2174 # Nodes missing file
2175 missing_file = expected_nodes - with_file
2177 if filename in files_all_opt:
2179 errorif(missing_file and missing_file != expected_nodes,
2180 cls.ECLUSTERFILECHECK, None,
2181 "File %s is optional, but it must exist on all or no"
2182 " nodes (not found on %s)",
2183 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2185 # Non-optional files
2186 errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2187 "File %s is missing from node(s) %s", filename,
2188 utils.CommaJoin(utils.NiceSort(missing_file)))
2190 # Warn if a node has a file it shouldn't
2191 unexpected = with_file - expected_nodes
2193 cls.ECLUSTERFILECHECK, None,
2194 "File %s should not exist on node(s) %s",
2195 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2197 # See if there are multiple versions of the file
2198 test = len(checksums) > 1
2200 variants = ["variant %s on %s" %
2201 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2202 for (idx, (checksum, nodes)) in
2203 enumerate(sorted(checksums.items()))]
2207 errorif(test, cls.ECLUSTERFILECHECK, None,
2208 "File %s found with %s different checksums (%s)",
2209 filename, len(checksums), "; ".join(variants))
2211 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2213 """Verifies and the node DRBD status.
2215 @type ninfo: L{objects.Node}
2216 @param ninfo: the node to check
2217 @param nresult: the remote results for the node
2218 @param instanceinfo: the dict of instances
2219 @param drbd_helper: the configured DRBD usermode helper
2220 @param drbd_map: the DRBD map as returned by
2221 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2225 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2228 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2229 test = (helper_result == None)
2230 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2231 "no drbd usermode helper returned")
2233 status, payload = helper_result
2235 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2236 "drbd usermode helper check unsuccessful: %s", payload)
2237 test = status and (payload != drbd_helper)
2238 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2239 "wrong drbd usermode helper: %s", payload)
2241 # compute the DRBD minors
2243 for minor, instance in drbd_map[node].items():
2244 test = instance not in instanceinfo
2245 _ErrorIf(test, self.ECLUSTERCFG, None,
2246 "ghost instance '%s' in temporary DRBD map", instance)
2247 # ghost instance should not be running, but otherwise we
2248 # don't give double warnings (both ghost instance and
2249 # unallocated minor in use)
2251 node_drbd[minor] = (instance, False)
2253 instance = instanceinfo[instance]
2254 node_drbd[minor] = (instance.name, instance.admin_up)
2256 # and now check them
2257 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2258 test = not isinstance(used_minors, (tuple, list))
2259 _ErrorIf(test, self.ENODEDRBD, node,
2260 "cannot parse drbd status file: %s", str(used_minors))
2262 # we cannot check drbd status
2265 for minor, (iname, must_exist) in node_drbd.items():
2266 test = minor not in used_minors and must_exist
2267 _ErrorIf(test, self.ENODEDRBD, node,
2268 "drbd minor %d of instance %s is not active", minor, iname)
2269 for minor in used_minors:
2270 test = minor not in node_drbd
2271 _ErrorIf(test, self.ENODEDRBD, node,
2272 "unallocated drbd minor %d is in use", minor)
2274 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2275 """Builds the node OS structures.
2277 @type ninfo: L{objects.Node}
2278 @param ninfo: the node to check
2279 @param nresult: the remote results for the node
2280 @param nimg: the node image object
2284 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2286 remote_os = nresult.get(constants.NV_OSLIST, None)
2287 test = (not isinstance(remote_os, list) or
2288 not compat.all(isinstance(v, list) and len(v) == 7
2289 for v in remote_os))
2291 _ErrorIf(test, self.ENODEOS, node,
2292 "node hasn't returned valid OS data")
2301 for (name, os_path, status, diagnose,
2302 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2304 if name not in os_dict:
2307 # parameters is a list of lists instead of list of tuples due to
2308 # JSON lacking a real tuple type, fix it:
2309 parameters = [tuple(v) for v in parameters]
2310 os_dict[name].append((os_path, status, diagnose,
2311 set(variants), set(parameters), set(api_ver)))
2313 nimg.oslist = os_dict
2315 def _VerifyNodeOS(self, ninfo, nimg, base):
2316 """Verifies the node OS list.
2318 @type ninfo: L{objects.Node}
2319 @param ninfo: the node to check
2320 @param nimg: the node image object
2321 @param base: the 'template' node we match against (e.g. from the master)
2325 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2327 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2329 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2330 for os_name, os_data in nimg.oslist.items():
2331 assert os_data, "Empty OS status for OS %s?!" % os_name
2332 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2333 _ErrorIf(not f_status, self.ENODEOS, node,
2334 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2335 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2336 "OS '%s' has multiple entries (first one shadows the rest): %s",
2337 os_name, utils.CommaJoin([v[0] for v in os_data]))
2338 # comparisons with the 'base' image
2339 test = os_name not in base.oslist
2340 _ErrorIf(test, self.ENODEOS, node,
2341 "Extra OS %s not present on reference node (%s)",
2345 assert base.oslist[os_name], "Base node has empty OS status?"
2346 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2348 # base OS is invalid, skipping
2350 for kind, a, b in [("API version", f_api, b_api),
2351 ("variants list", f_var, b_var),
2352 ("parameters", beautify_params(f_param),
2353 beautify_params(b_param))]:
2354 _ErrorIf(a != b, self.ENODEOS, node,
2355 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2356 kind, os_name, base.name,
2357 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2359 # check any missing OSes
2360 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2361 _ErrorIf(missing, self.ENODEOS, node,
2362 "OSes present on reference node %s but missing on this node: %s",
2363 base.name, utils.CommaJoin(missing))
2365 def _VerifyOob(self, ninfo, nresult):
2366 """Verifies out of band functionality of a node.
2368 @type ninfo: L{objects.Node}
2369 @param ninfo: the node to check
2370 @param nresult: the remote results for the node
2374 # We just have to verify the paths on master and/or master candidates
2375 # as the oob helper is invoked on the master
2376 if ((ninfo.master_candidate or ninfo.master_capable) and
2377 constants.NV_OOB_PATHS in nresult):
2378 for path_result in nresult[constants.NV_OOB_PATHS]:
2379 self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2381 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2382 """Verifies and updates the node volume data.
2384 This function will update a L{NodeImage}'s internal structures
2385 with data from the remote call.
2387 @type ninfo: L{objects.Node}
2388 @param ninfo: the node to check
2389 @param nresult: the remote results for the node
2390 @param nimg: the node image object
2391 @param vg_name: the configured VG name
2395 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2397 nimg.lvm_fail = True
2398 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2401 elif isinstance(lvdata, basestring):
2402 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2403 utils.SafeEncode(lvdata))
2404 elif not isinstance(lvdata, dict):
2405 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2407 nimg.volumes = lvdata
2408 nimg.lvm_fail = False
2410 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2411 """Verifies and updates the node instance list.
2413 If the listing was successful, then updates this node's instance
2414 list. Otherwise, it marks the RPC call as failed for the instance
2417 @type ninfo: L{objects.Node}
2418 @param ninfo: the node to check
2419 @param nresult: the remote results for the node
2420 @param nimg: the node image object
2423 idata = nresult.get(constants.NV_INSTANCELIST, None)
2424 test = not isinstance(idata, list)
2425 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2426 " (instancelist): %s", utils.SafeEncode(str(idata)))
2428 nimg.hyp_fail = True
2430 nimg.instances = idata
2432 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2433 """Verifies and computes a node information map
2435 @type ninfo: L{objects.Node}
2436 @param ninfo: the node to check
2437 @param nresult: the remote results for the node
2438 @param nimg: the node image object
2439 @param vg_name: the configured VG name
2443 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2445 # try to read free memory (from the hypervisor)
2446 hv_info = nresult.get(constants.NV_HVINFO, None)
2447 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2448 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2451 nimg.mfree = int(hv_info["memory_free"])
2452 except (ValueError, TypeError):
2453 _ErrorIf(True, self.ENODERPC, node,
2454 "node returned invalid nodeinfo, check hypervisor")
2456 # FIXME: devise a free space model for file based instances as well
2457 if vg_name is not None:
2458 test = (constants.NV_VGLIST not in nresult or
2459 vg_name not in nresult[constants.NV_VGLIST])
2460 _ErrorIf(test, self.ENODELVM, node,
2461 "node didn't return data for the volume group '%s'"
2462 " - it is either missing or broken", vg_name)
2465 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2466 except (ValueError, TypeError):
2467 _ErrorIf(True, self.ENODERPC, node,
2468 "node returned invalid LVM info, check LVM status")
2470 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2471 """Gets per-disk status information for all instances.
2473 @type nodelist: list of strings
2474 @param nodelist: Node names
2475 @type node_image: dict of (name, L{objects.Node})
2476 @param node_image: Node objects
2477 @type instanceinfo: dict of (name, L{objects.Instance})
2478 @param instanceinfo: Instance objects
2479 @rtype: {instance: {node: [(succes, payload)]}}
2480 @return: a dictionary of per-instance dictionaries with nodes as
2481 keys and disk information as values; the disk information is a
2482 list of tuples (success, payload)
2485 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2488 node_disks_devonly = {}
2489 diskless_instances = set()
2490 diskless = constants.DT_DISKLESS
2492 for nname in nodelist:
2493 node_instances = list(itertools.chain(node_image[nname].pinst,
2494 node_image[nname].sinst))
2495 diskless_instances.update(inst for inst in node_instances
2496 if instanceinfo[inst].disk_template == diskless)
2497 disks = [(inst, disk)
2498 for inst in node_instances
2499 for disk in instanceinfo[inst].disks]
2502 # No need to collect data
2505 node_disks[nname] = disks
2507 # Creating copies as SetDiskID below will modify the objects and that can
2508 # lead to incorrect data returned from nodes
2509 devonly = [dev.Copy() for (_, dev) in disks]
2512 self.cfg.SetDiskID(dev, nname)
2514 node_disks_devonly[nname] = devonly
2516 assert len(node_disks) == len(node_disks_devonly)
2518 # Collect data from all nodes with disks
2519 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2522 assert len(result) == len(node_disks)
2526 for (nname, nres) in result.items():
2527 disks = node_disks[nname]
2530 # No data from this node
2531 data = len(disks) * [(False, "node offline")]
2534 _ErrorIf(msg, self.ENODERPC, nname,
2535 "while getting disk information: %s", msg)
2537 # No data from this node
2538 data = len(disks) * [(False, msg)]
2541 for idx, i in enumerate(nres.payload):
2542 if isinstance(i, (tuple, list)) and len(i) == 2:
2545 logging.warning("Invalid result from node %s, entry %d: %s",
2547 data.append((False, "Invalid result from the remote node"))
2549 for ((inst, _), status) in zip(disks, data):
2550 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2552 # Add empty entries for diskless instances.
2553 for inst in diskless_instances:
2554 assert inst not in instdisk
2557 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2558 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2559 compat.all(isinstance(s, (tuple, list)) and
2560 len(s) == 2 for s in statuses)
2561 for inst, nnames in instdisk.items()
2562 for nname, statuses in nnames.items())
2563 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2568 def _SshNodeSelector(group_uuid, all_nodes):
2569 """Create endless iterators for all potential SSH check hosts.
2572 nodes = [node for node in all_nodes
2573 if (node.group != group_uuid and
2575 keyfunc = operator.attrgetter("group")
2577 return map(itertools.cycle,
2578 [sorted(map(operator.attrgetter("name"), names))
2579 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2583 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2584 """Choose which nodes should talk to which other nodes.
2586 We will make nodes contact all nodes in their group, and one node from
2589 @warning: This algorithm has a known issue if one node group is much
2590 smaller than others (e.g. just one node). In such a case all other
2591 nodes will talk to the single node.
2594 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2595 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2597 return (online_nodes,
2598 dict((name, sorted([i.next() for i in sel]))
2599 for name in online_nodes))
2601 def BuildHooksEnv(self):
2604 Cluster-Verify hooks just ran in the post phase and their failure makes
2605 the output be logged in the verify output and the verification to fail.
2609 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2612 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2613 for node in self.my_node_info.values())
2617 def BuildHooksNodes(self):
2618 """Build hooks nodes.
2621 return ([], self.my_node_names)
2623 def Exec(self, feedback_fn):
2624 """Verify integrity of the node group, performing various test on nodes.
2627 # This method has too many local variables. pylint: disable=R0914
2628 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2630 if not self.my_node_names:
2632 feedback_fn("* Empty node group, skipping verification")
2636 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2637 verbose = self.op.verbose
2638 self._feedback_fn = feedback_fn
2640 vg_name = self.cfg.GetVGName()
2641 drbd_helper = self.cfg.GetDRBDHelper()
2642 cluster = self.cfg.GetClusterInfo()
2643 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2644 hypervisors = cluster.enabled_hypervisors
2645 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2647 i_non_redundant = [] # Non redundant instances
2648 i_non_a_balanced = [] # Non auto-balanced instances
2649 n_offline = 0 # Count of offline nodes
2650 n_drained = 0 # Count of nodes being drained
2651 node_vol_should = {}
2653 # FIXME: verify OS list
2656 filemap = _ComputeAncillaryFiles(cluster, False)
2658 # do local checksums
2659 master_node = self.master_node = self.cfg.GetMasterNode()
2660 master_ip = self.cfg.GetMasterIP()
2662 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2664 node_verify_param = {
2665 constants.NV_FILELIST:
2666 utils.UniqueSequence(filename
2667 for files in filemap
2668 for filename in files),
2669 constants.NV_NODELIST:
2670 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2671 self.all_node_info.values()),
2672 constants.NV_HYPERVISOR: hypervisors,
2673 constants.NV_HVPARAMS:
2674 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2675 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2676 for node in node_data_list
2677 if not node.offline],
2678 constants.NV_INSTANCELIST: hypervisors,
2679 constants.NV_VERSION: None,
2680 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2681 constants.NV_NODESETUP: None,
2682 constants.NV_TIME: None,
2683 constants.NV_MASTERIP: (master_node, master_ip),
2684 constants.NV_OSLIST: None,
2685 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2688 if vg_name is not None:
2689 node_verify_param[constants.NV_VGLIST] = None
2690 node_verify_param[constants.NV_LVLIST] = vg_name
2691 node_verify_param[constants.NV_PVLIST] = [vg_name]
2692 node_verify_param[constants.NV_DRBDLIST] = None
2695 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2698 # FIXME: this needs to be changed per node-group, not cluster-wide
2700 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2701 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2702 bridges.add(default_nicpp[constants.NIC_LINK])
2703 for instance in self.my_inst_info.values():
2704 for nic in instance.nics:
2705 full_nic = cluster.SimpleFillNIC(nic.nicparams)
2706 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2707 bridges.add(full_nic[constants.NIC_LINK])
2710 node_verify_param[constants.NV_BRIDGES] = list(bridges)
2712 # Build our expected cluster state
2713 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2715 vm_capable=node.vm_capable))
2716 for node in node_data_list)
2720 for node in self.all_node_info.values():
2721 path = _SupportsOob(self.cfg, node)
2722 if path and path not in oob_paths:
2723 oob_paths.append(path)
2726 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2728 for instance in self.my_inst_names:
2729 inst_config = self.my_inst_info[instance]
2731 for nname in inst_config.all_nodes:
2732 if nname not in node_image:
2733 gnode = self.NodeImage(name=nname)
2734 gnode.ghost = (nname not in self.all_node_info)
2735 node_image[nname] = gnode
2737 inst_config.MapLVsByNode(node_vol_should)
2739 pnode = inst_config.primary_node
2740 node_image[pnode].pinst.append(instance)
2742 for snode in inst_config.secondary_nodes:
2743 nimg = node_image[snode]
2744 nimg.sinst.append(instance)
2745 if pnode not in nimg.sbp:
2746 nimg.sbp[pnode] = []
2747 nimg.sbp[pnode].append(instance)
2749 # At this point, we have the in-memory data structures complete,
2750 # except for the runtime information, which we'll gather next
2752 # Due to the way our RPC system works, exact response times cannot be
2753 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2754 # time before and after executing the request, we can at least have a time
2756 nvinfo_starttime = time.time()
2757 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2759 self.cfg.GetClusterName())
2760 nvinfo_endtime = time.time()
2762 if self.extra_lv_nodes and vg_name is not None:
2764 self.rpc.call_node_verify(self.extra_lv_nodes,
2765 {constants.NV_LVLIST: vg_name},
2766 self.cfg.GetClusterName())
2768 extra_lv_nvinfo = {}
2770 all_drbd_map = self.cfg.ComputeDRBDMap()
2772 feedback_fn("* Gathering disk information (%s nodes)" %
2773 len(self.my_node_names))
2774 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2777 feedback_fn("* Verifying configuration file consistency")
2779 # If not all nodes are being checked, we need to make sure the master node
2780 # and a non-checked vm_capable node are in the list.
2781 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2783 vf_nvinfo = all_nvinfo.copy()
2784 vf_node_info = list(self.my_node_info.values())
2785 additional_nodes = []
2786 if master_node not in self.my_node_info:
2787 additional_nodes.append(master_node)
2788 vf_node_info.append(self.all_node_info[master_node])
2789 # Add the first vm_capable node we find which is not included
2790 for node in absent_nodes:
2791 nodeinfo = self.all_node_info[node]
2792 if nodeinfo.vm_capable and not nodeinfo.offline:
2793 additional_nodes.append(node)
2794 vf_node_info.append(self.all_node_info[node])
2796 key = constants.NV_FILELIST
2797 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2798 {key: node_verify_param[key]},
2799 self.cfg.GetClusterName()))
2801 vf_nvinfo = all_nvinfo
2802 vf_node_info = self.my_node_info.values()
2804 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2806 feedback_fn("* Verifying node status")
2810 for node_i in node_data_list:
2812 nimg = node_image[node]
2816 feedback_fn("* Skipping offline node %s" % (node,))
2820 if node == master_node:
2822 elif node_i.master_candidate:
2823 ntype = "master candidate"
2824 elif node_i.drained:
2830 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2832 msg = all_nvinfo[node].fail_msg
2833 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2835 nimg.rpc_fail = True
2838 nresult = all_nvinfo[node].payload
2840 nimg.call_ok = self._VerifyNode(node_i, nresult)
2841 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2842 self._VerifyNodeNetwork(node_i, nresult)
2843 self._VerifyOob(node_i, nresult)
2846 self._VerifyNodeLVM(node_i, nresult, vg_name)
2847 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2850 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2851 self._UpdateNodeInstances(node_i, nresult, nimg)
2852 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2853 self._UpdateNodeOS(node_i, nresult, nimg)
2855 if not nimg.os_fail:
2856 if refos_img is None:
2858 self._VerifyNodeOS(node_i, nimg, refos_img)
2859 self._VerifyNodeBridges(node_i, nresult, bridges)
2861 # Check whether all running instancies are primary for the node. (This
2862 # can no longer be done from _VerifyInstance below, since some of the
2863 # wrong instances could be from other node groups.)
2864 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2866 for inst in non_primary_inst:
2867 test = inst in self.all_inst_info
2868 _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2869 "instance should not run on node %s", node_i.name)
2870 _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2871 "node is running unknown instance %s", inst)
2873 for node, result in extra_lv_nvinfo.items():
2874 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2875 node_image[node], vg_name)
2877 feedback_fn("* Verifying instance status")
2878 for instance in self.my_inst_names:
2880 feedback_fn("* Verifying instance %s" % instance)
2881 inst_config = self.my_inst_info[instance]
2882 self._VerifyInstance(instance, inst_config, node_image,
2884 inst_nodes_offline = []
2886 pnode = inst_config.primary_node
2887 pnode_img = node_image[pnode]
2888 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2889 self.ENODERPC, pnode, "instance %s, connection to"
2890 " primary node failed", instance)
2892 _ErrorIf(inst_config.admin_up and pnode_img.offline,
2893 self.EINSTANCEBADNODE, instance,
2894 "instance is marked as running and lives on offline node %s",
2895 inst_config.primary_node)
2897 # If the instance is non-redundant we cannot survive losing its primary
2898 # node, so we are not N+1 compliant. On the other hand we have no disk
2899 # templates with more than one secondary so that situation is not well
2901 # FIXME: does not support file-backed instances
2902 if not inst_config.secondary_nodes:
2903 i_non_redundant.append(instance)
2905 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2906 instance, "instance has multiple secondary nodes: %s",
2907 utils.CommaJoin(inst_config.secondary_nodes),
2908 code=self.ETYPE_WARNING)
2910 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2911 pnode = inst_config.primary_node
2912 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2913 instance_groups = {}
2915 for node in instance_nodes:
2916 instance_groups.setdefault(self.all_node_info[node].group,
2920 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2921 # Sort so that we always list the primary node first.
2922 for group, nodes in sorted(instance_groups.items(),
2923 key=lambda (_, nodes): pnode in nodes,
2926 self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2927 instance, "instance has primary and secondary nodes in"
2928 " different groups: %s", utils.CommaJoin(pretty_list),
2929 code=self.ETYPE_WARNING)
2931 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2932 i_non_a_balanced.append(instance)
2934 for snode in inst_config.secondary_nodes:
2935 s_img = node_image[snode]
2936 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2937 "instance %s, connection to secondary node failed", instance)
2940 inst_nodes_offline.append(snode)
2942 # warn that the instance lives on offline nodes
2943 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2944 "instance has offline secondary node(s) %s",
2945 utils.CommaJoin(inst_nodes_offline))
2946 # ... or ghost/non-vm_capable nodes
2947 for node in inst_config.all_nodes:
2948 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2949 "instance lives on ghost node %s", node)
2950 _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2951 instance, "instance lives on non-vm_capable node %s", node)
2953 feedback_fn("* Verifying orphan volumes")
2954 reserved = utils.FieldSet(*cluster.reserved_lvs)
2956 # We will get spurious "unknown volume" warnings if any node of this group
2957 # is secondary for an instance whose primary is in another group. To avoid
2958 # them, we find these instances and add their volumes to node_vol_should.
2959 for inst in self.all_inst_info.values():
2960 for secondary in inst.secondary_nodes:
2961 if (secondary in self.my_node_info
2962 and inst.name not in self.my_inst_info):
2963 inst.MapLVsByNode(node_vol_should)
2966 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2968 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2969 feedback_fn("* Verifying N+1 Memory redundancy")
2970 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2972 feedback_fn("* Other Notes")
2974 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2975 % len(i_non_redundant))
2977 if i_non_a_balanced:
2978 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2979 % len(i_non_a_balanced))
2982 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2985 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2989 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2990 """Analyze the post-hooks' result
2992 This method analyses the hook result, handles it, and sends some
2993 nicely-formatted feedback back to the user.
2995 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2996 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2997 @param hooks_results: the results of the multi-node hooks rpc call
2998 @param feedback_fn: function used send feedback back to the caller
2999 @param lu_result: previous Exec result
3000 @return: the new Exec result, based on the previous result
3004 # We only really run POST phase hooks, only for non-empty groups,
3005 # and are only interested in their results
3006 if not self.my_node_names:
3009 elif phase == constants.HOOKS_PHASE_POST:
3010 # Used to change hooks' output to proper indentation
3011 feedback_fn("* Hooks Results")
3012 assert hooks_results, "invalid result from hooks"
3014 for node_name in hooks_results:
3015 res = hooks_results[node_name]
3017 test = msg and not res.offline
3018 self._ErrorIf(test, self.ENODEHOOKS, node_name,
3019 "Communication failure in hooks execution: %s", msg)
3020 if res.offline or msg:
3021 # No need to investigate payload if node is offline or gave
3024 for script, hkr, output in res.payload:
3025 test = hkr == constants.HKR_FAIL
3026 self._ErrorIf(test, self.ENODEHOOKS, node_name,
3027 "Script %s failed, output:", script)
3029 output = self._HOOKS_INDENT_RE.sub(" ", output)
3030 feedback_fn("%s" % output)
3036 class LUClusterVerifyDisks(NoHooksLU):
3037 """Verifies the cluster disks status.
3042 def ExpandNames(self):
3043 self.share_locks = _ShareAll()
3044 self.needed_locks = {
3045 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3048 def Exec(self, feedback_fn):
3049 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3051 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3052 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3053 for group in group_names])
3056 class LUGroupVerifyDisks(NoHooksLU):
3057 """Verifies the status of all disks in a node group.
3062 def ExpandNames(self):
3063 # Raises errors.OpPrereqError on its own if group can't be found
3064 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3066 self.share_locks = _ShareAll()
3067 self.needed_locks = {
3068 locking.LEVEL_INSTANCE: [],
3069 locking.LEVEL_NODEGROUP: [],
3070 locking.LEVEL_NODE: [],
3073 def DeclareLocks(self, level):
3074 if level == locking.LEVEL_INSTANCE:
3075 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3077 # Lock instances optimistically, needs verification once node and group
3078 # locks have been acquired
3079 self.needed_locks[locking.LEVEL_INSTANCE] = \
3080 self.cfg.GetNodeGroupInstances(self.group_uuid)
3082 elif level == locking.LEVEL_NODEGROUP:
3083 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3085 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3086 set([self.group_uuid] +
3087 # Lock all groups used by instances optimistically; this requires
3088 # going via the node before it's locked, requiring verification
3091 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3092 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3094 elif level == locking.LEVEL_NODE:
3095 # This will only lock the nodes in the group to be verified which contain
3097 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3098 self._LockInstancesNodes()
3100 # Lock all nodes in group to be verified
3101 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3102 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3103 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3105 def CheckPrereq(self):
3106 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3107 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3108 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3110 assert self.group_uuid in owned_groups
3112 # Check if locked instances are still correct
3113 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3115 # Get instance information
3116 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3118 # Check if node groups for locked instances are still correct
3119 for (instance_name, inst) in self.instances.items():
3120 assert owned_nodes.issuperset(inst.all_nodes), \
3121 "Instance %s's nodes changed while we kept the lock" % instance_name
3123 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3126 assert self.group_uuid in inst_groups, \
3127 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3129 def Exec(self, feedback_fn):
3130 """Verify integrity of cluster disks.
3132 @rtype: tuple of three items
3133 @return: a tuple of (dict of node-to-node_error, list of instances
3134 which need activate-disks, dict of instance: (node, volume) for
3139 res_instances = set()
3142 nv_dict = _MapInstanceDisksToNodes([inst
3143 for inst in self.instances.values()
3147 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3148 set(self.cfg.GetVmCapableNodeList()))
3150 node_lvs = self.rpc.call_lv_list(nodes, [])
3152 for (node, node_res) in node_lvs.items():
3153 if node_res.offline:
3156 msg = node_res.fail_msg
3158 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3159 res_nodes[node] = msg
3162 for lv_name, (_, _, lv_online) in node_res.payload.items():
3163 inst = nv_dict.pop((node, lv_name), None)
3164 if not (lv_online or inst is None):
3165 res_instances.add(inst)
3167 # any leftover items in nv_dict are missing LVs, let's arrange the data
3169 for key, inst in nv_dict.iteritems():
3170 res_missing.setdefault(inst, []).append(key)
3172 return (res_nodes, list(res_instances), res_missing)
3175 class LUClusterRepairDiskSizes(NoHooksLU):
3176 """Verifies the cluster disks sizes.
3181 def ExpandNames(self):
3182 if self.op.instances:
3183 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3184 self.needed_locks = {
3185 locking.LEVEL_NODE: [],
3186 locking.LEVEL_INSTANCE: self.wanted_names,
3188 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3190 self.wanted_names = None
3191 self.needed_locks = {
3192 locking.LEVEL_NODE: locking.ALL_SET,
3193 locking.LEVEL_INSTANCE: locking.ALL_SET,
3195 self.share_locks = _ShareAll()
3197 def DeclareLocks(self, level):
3198 if level == locking.LEVEL_NODE and self.wanted_names is not None:
3199 self._LockInstancesNodes(primary_only=True)
3201 def CheckPrereq(self):
3202 """Check prerequisites.
3204 This only checks the optional instance list against the existing names.
3207 if self.wanted_names is None:
3208 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3210 self.wanted_instances = \
3211 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3213 def _EnsureChildSizes(self, disk):
3214 """Ensure children of the disk have the needed disk size.
3216 This is valid mainly for DRBD8 and fixes an issue where the
3217 children have smaller disk size.
3219 @param disk: an L{ganeti.objects.Disk} object
3222 if disk.dev_type == constants.LD_DRBD8:
3223 assert disk.children, "Empty children for DRBD8?"
3224 fchild = disk.children[0]
3225 mismatch = fchild.size < disk.size
3227 self.LogInfo("Child disk has size %d, parent %d, fixing",
3228 fchild.size, disk.size)
3229 fchild.size = disk.size
3231 # and we recurse on this child only, not on the metadev
3232 return self._EnsureChildSizes(fchild) or mismatch
3236 def Exec(self, feedback_fn):
3237 """Verify the size of cluster disks.
3240 # TODO: check child disks too
3241 # TODO: check differences in size between primary/secondary nodes
3243 for instance in self.wanted_instances:
3244 pnode = instance.primary_node
3245 if pnode not in per_node_disks:
3246 per_node_disks[pnode] = []
3247 for idx, disk in enumerate(instance.disks):
3248 per_node_disks[pnode].append((instance, idx, disk))
3251 for node, dskl in per_node_disks.items():
3252 newl = [v[2].Copy() for v in dskl]
3254 self.cfg.SetDiskID(dsk, node)
3255 result = self.rpc.call_blockdev_getsize(node, newl)
3257 self.LogWarning("Failure in blockdev_getsize call to node"
3258 " %s, ignoring", node)
3260 if len(result.payload) != len(dskl):
3261 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3262 " result.payload=%s", node, len(dskl), result.payload)
3263 self.LogWarning("Invalid result from node %s, ignoring node results",
3266 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3268 self.LogWarning("Disk %d of instance %s did not return size"
3269 " information, ignoring", idx, instance.name)
3271 if not isinstance(size, (int, long)):
3272 self.LogWarning("Disk %d of instance %s did not return valid"
3273 " size information, ignoring", idx, instance.name)
3276 if size != disk.size:
3277 self.LogInfo("Disk %d of instance %s has mismatched size,"
3278 " correcting: recorded %d, actual %d", idx,
3279 instance.name, disk.size, size)
3281 self.cfg.Update(instance, feedback_fn)
3282 changed.append((instance.name, idx, size))
3283 if self._EnsureChildSizes(disk):
3284 self.cfg.Update(instance, feedback_fn)
3285 changed.append((instance.name, idx, disk.size))
3289 class LUClusterRename(LogicalUnit):
3290 """Rename the cluster.
3293 HPATH = "cluster-rename"
3294 HTYPE = constants.HTYPE_CLUSTER
3296 def BuildHooksEnv(self):
3301 "OP_TARGET": self.cfg.GetClusterName(),
3302 "NEW_NAME": self.op.name,
3305 def BuildHooksNodes(self):
3306 """Build hooks nodes.
3309 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3311 def CheckPrereq(self):
3312 """Verify that the passed name is a valid one.
3315 hostname = netutils.GetHostname(name=self.op.name,
3316 family=self.cfg.GetPrimaryIPFamily())
3318 new_name = hostname.name
3319 self.ip = new_ip = hostname.ip
3320 old_name = self.cfg.GetClusterName()
3321 old_ip = self.cfg.GetMasterIP()
3322 if new_name == old_name and new_ip == old_ip:
3323 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3324 " cluster has changed",
3326 if new_ip != old_ip:
3327 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3328 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3329 " reachable on the network" %
3330 new_ip, errors.ECODE_NOTUNIQUE)
3332 self.op.name = new_name
3334 def Exec(self, feedback_fn):
3335 """Rename the cluster.
3338 clustername = self.op.name
3341 # shutdown the master IP
3342 master = self.cfg.GetMasterNode()
3343 result = self.rpc.call_node_stop_master(master, False)
3344 result.Raise("Could not disable the master role")
3347 cluster = self.cfg.GetClusterInfo()
3348 cluster.cluster_name = clustername
3349 cluster.master_ip = ip
3350 self.cfg.Update(cluster, feedback_fn)
3352 # update the known hosts file
3353 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3354 node_list = self.cfg.GetOnlineNodeList()
3356 node_list.remove(master)
3359 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3361 result = self.rpc.call_node_start_master(master, False, False)
3362 msg = result.fail_msg
3364 self.LogWarning("Could not re-enable the master role on"
3365 " the master, please restart manually: %s", msg)
3370 class LUClusterSetParams(LogicalUnit):
3371 """Change the parameters of the cluster.
3374 HPATH = "cluster-modify"
3375 HTYPE = constants.HTYPE_CLUSTER
3378 def CheckArguments(self):
3382 if self.op.uid_pool:
3383 uidpool.CheckUidPool(self.op.uid_pool)
3385 if self.op.add_uids:
3386 uidpool.CheckUidPool(self.op.add_uids)
3388 if self.op.remove_uids:
3389 uidpool.CheckUidPool(self.op.remove_uids)
3391 def ExpandNames(self):
3392 # FIXME: in the future maybe other cluster params won't require checking on
3393 # all nodes to be modified.
3394 self.needed_locks = {
3395 locking.LEVEL_NODE: locking.ALL_SET,
3397 self.share_locks[locking.LEVEL_NODE] = 1
3399 def BuildHooksEnv(self):
3404 "OP_TARGET": self.cfg.GetClusterName(),
3405 "NEW_VG_NAME": self.op.vg_name,
3408 def BuildHooksNodes(self):
3409 """Build hooks nodes.
3412 mn = self.cfg.GetMasterNode()
3415 def CheckPrereq(self):
3416 """Check prerequisites.
3418 This checks whether the given params don't conflict and
3419 if the given volume group is valid.
3422 if self.op.vg_name is not None and not self.op.vg_name:
3423 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3424 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3425 " instances exist", errors.ECODE_INVAL)
3427 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3428 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3429 raise errors.OpPrereqError("Cannot disable drbd helper while"
3430 " drbd-based instances exist",
3433 node_list = self.owned_locks(locking.LEVEL_NODE)
3435 # if vg_name not None, checks given volume group on all nodes
3437 vglist = self.rpc.call_vg_list(node_list)
3438 for node in node_list:
3439 msg = vglist[node].fail_msg
3441 # ignoring down node
3442 self.LogWarning("Error while gathering data on node %s"
3443 " (ignoring node): %s", node, msg)
3445 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3447 constants.MIN_VG_SIZE)
3449 raise errors.OpPrereqError("Error on node '%s': %s" %
3450 (node, vgstatus), errors.ECODE_ENVIRON)
3452 if self.op.drbd_helper:
3453 # checks given drbd helper on all nodes
3454 helpers = self.rpc.call_drbd_helper(node_list)
3455 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3457 self.LogInfo("Not checking drbd helper on offline node %s", node)
3459 msg = helpers[node].fail_msg
3461 raise errors.OpPrereqError("Error checking drbd helper on node"
3462 " '%s': %s" % (node, msg),
3463 errors.ECODE_ENVIRON)
3464 node_helper = helpers[node].payload
3465 if node_helper != self.op.drbd_helper:
3466 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3467 (node, node_helper), errors.ECODE_ENVIRON)
3469 self.cluster = cluster = self.cfg.GetClusterInfo()
3470 # validate params changes
3471 if self.op.beparams:
3472 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3473 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3475 if self.op.ndparams:
3476 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3477 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3479 # TODO: we need a more general way to handle resetting
3480 # cluster-level parameters to default values
3481 if self.new_ndparams["oob_program"] == "":
3482 self.new_ndparams["oob_program"] = \
3483 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3485 if self.op.nicparams:
3486 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3487 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3488 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3491 # check all instances for consistency
3492 for instance in self.cfg.GetAllInstancesInfo().values():
3493 for nic_idx, nic in enumerate(instance.nics):
3494 params_copy = copy.deepcopy(nic.nicparams)
3495 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3497 # check parameter syntax
3499 objects.NIC.CheckParameterSyntax(params_filled)
3500 except errors.ConfigurationError, err:
3501 nic_errors.append("Instance %s, nic/%d: %s" %
3502 (instance.name, nic_idx, err))
3504 # if we're moving instances to routed, check that they have an ip
3505 target_mode = params_filled[constants.NIC_MODE]
3506 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3507 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3508 " address" % (instance.name, nic_idx))
3510 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3511 "\n".join(nic_errors))
3513 # hypervisor list/parameters
3514 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3515 if self.op.hvparams:
3516 for hv_name, hv_dict in self.op.hvparams.items():
3517 if hv_name not in self.new_hvparams:
3518 self.new_hvparams[hv_name] = hv_dict
3520 self.new_hvparams[hv_name].update(hv_dict)
3522 # os hypervisor parameters
3523 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3525 for os_name, hvs in self.op.os_hvp.items():
3526 if os_name not in self.new_os_hvp:
3527 self.new_os_hvp[os_name] = hvs
3529 for hv_name, hv_dict in hvs.items():
3530 if hv_name not in self.new_os_hvp[os_name]:
3531 self.new_os_hvp[os_name][hv_name] = hv_dict
3533 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3536 self.new_osp = objects.FillDict(cluster.osparams, {})
3537 if self.op.osparams:
3538 for os_name, osp in self.op.osparams.items():
3539 if os_name not in self.new_osp:
3540 self.new_osp[os_name] = {}
3542 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3545 if not self.new_osp[os_name]:
3546 # we removed all parameters
3547 del self.new_osp[os_name]
3549 # check the parameter validity (remote check)
3550 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3551 os_name, self.new_osp[os_name])
3553 # changes to the hypervisor list
3554 if self.op.enabled_hypervisors is not None:
3555 self.hv_list = self.op.enabled_hypervisors
3556 for hv in self.hv_list:
3557 # if the hypervisor doesn't already exist in the cluster
3558 # hvparams, we initialize it to empty, and then (in both
3559 # cases) we make sure to fill the defaults, as we might not
3560 # have a complete defaults list if the hypervisor wasn't
3562 if hv not in new_hvp:
3564 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3565 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3567 self.hv_list = cluster.enabled_hypervisors
3569 if self.op.hvparams or self.op.enabled_hypervisors is not None:
3570 # either the enabled list has changed, or the parameters have, validate
3571 for hv_name, hv_params in self.new_hvparams.items():
3572 if ((self.op.hvparams and hv_name in self.op.hvparams) or
3573 (self.op.enabled_hypervisors and
3574 hv_name in self.op.enabled_hypervisors)):
3575 # either this is a new hypervisor, or its parameters have changed
3576 hv_class = hypervisor.GetHypervisor(hv_name)
3577 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3578 hv_class.CheckParameterSyntax(hv_params)
3579 _CheckHVParams(self, node_list, hv_name, hv_params)
3582 # no need to check any newly-enabled hypervisors, since the
3583 # defaults have already been checked in the above code-block
3584 for os_name, os_hvp in self.new_os_hvp.items():
3585 for hv_name, hv_params in os_hvp.items():
3586 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3587 # we need to fill in the new os_hvp on top of the actual hv_p
3588 cluster_defaults = self.new_hvparams.get(hv_name, {})
3589 new_osp = objects.FillDict(cluster_defaults, hv_params)
3590 hv_class = hypervisor.GetHypervisor(hv_name)
3591 hv_class.CheckParameterSyntax(new_osp)
3592 _CheckHVParams(self, node_list, hv_name, new_osp)
3594 if self.op.default_iallocator:
3595 alloc_script = utils.FindFile(self.op.default_iallocator,
3596 constants.IALLOCATOR_SEARCH_PATH,
3598 if alloc_script is None:
3599 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3600 " specified" % self.op.default_iallocator,
3603 def Exec(self, feedback_fn):
3604 """Change the parameters of the cluster.
3607 if self.op.vg_name is not None:
3608 new_volume = self.op.vg_name
3611 if new_volume != self.cfg.GetVGName():
3612 self.cfg.SetVGName(new_volume)
3614 feedback_fn("Cluster LVM configuration already in desired"
3615 " state, not changing")
3616 if self.op.drbd_helper is not None:
3617 new_helper = self.op.drbd_helper
3620 if new_helper != self.cfg.GetDRBDHelper():
3621 self.cfg.SetDRBDHelper(new_helper)
3623 feedback_fn("Cluster DRBD helper already in desired state,"
3625 if self.op.hvparams:
3626 self.cluster.hvparams = self.new_hvparams
3628 self.cluster.os_hvp = self.new_os_hvp
3629 if self.op.enabled_hypervisors is not None:
3630 self.cluster.hvparams = self.new_hvparams
3631 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3632 if self.op.beparams:
3633 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3634 if self.op.nicparams:
3635 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3636 if self.op.osparams:
3637 self.cluster.osparams = self.new_osp
3638 if self.op.ndparams:
3639 self.cluster.ndparams = self.new_ndparams
3641 if self.op.candidate_pool_size is not None:
3642 self.cluster.candidate_pool_size = self.op.candidate_pool_size
3643 # we need to update the pool size here, otherwise the save will fail
3644 _AdjustCandidatePool(self, [])
3646 if self.op.maintain_node_health is not None:
3647 self.cluster.maintain_node_health = self.op.maintain_node_health
3649 if self.op.prealloc_wipe_disks is not None:
3650 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3652 if self.op.add_uids is not None:
3653 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3655 if self.op.remove_uids is not None:
3656 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3658 if self.op.uid_pool is not None:
3659 self.cluster.uid_pool = self.op.uid_pool
3661 if self.op.default_iallocator is not None:
3662 self.cluster.default_iallocator = self.op.default_iallocator
3664 if self.op.reserved_lvs is not None:
3665 self.cluster.reserved_lvs = self.op.reserved_lvs
3667 def helper_os(aname, mods, desc):
3669 lst = getattr(self.cluster, aname)
3670 for key, val in mods:
3671 if key == constants.DDM_ADD:
3673 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3676 elif key == constants.DDM_REMOVE:
3680 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3682 raise errors.ProgrammerError("Invalid modification '%s'" % key)
3684 if self.op.hidden_os:
3685 helper_os("hidden_os", self.op.hidden_os, "hidden")
3687 if self.op.blacklisted_os:
3688 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3690 if self.op.master_netdev:
3691 master = self.cfg.GetMasterNode()
3692 feedback_fn("Shutting down master ip on the current netdev (%s)" %
3693 self.cluster.master_netdev)
3694 result = self.rpc.call_node_stop_master(master, False)
3695 result.Raise("Could not disable the master ip")
3696 feedback_fn("Changing master_netdev from %s to %s" %
3697 (self.cluster.master_netdev, self.op.master_netdev))
3698 self.cluster.master_netdev = self.op.master_netdev
3700 self.cfg.Update(self.cluster, feedback_fn)
3702 if self.op.master_netdev:
3703 feedback_fn("Starting the master ip on the new master netdev (%s)" %
3704 self.op.master_netdev)
3705 result = self.rpc.call_node_start_master(master, False, False)
3707 self.LogWarning("Could not re-enable the master ip on"
3708 " the master, please restart manually: %s",
3712 def _UploadHelper(lu, nodes, fname):
3713 """Helper for uploading a file and showing warnings.
3716 if os.path.exists(fname):
3717 result = lu.rpc.call_upload_file(nodes, fname)
3718 for to_node, to_result in result.items():
3719 msg = to_result.fail_msg
3721 msg = ("Copy of file %s to node %s failed: %s" %
3722 (fname, to_node, msg))
3723 lu.proc.LogWarning(msg)
3726 def _ComputeAncillaryFiles(cluster, redist):
3727 """Compute files external to Ganeti which need to be consistent.
3729 @type redist: boolean
3730 @param redist: Whether to include files which need to be redistributed
3733 # Compute files for all nodes
3735 constants.SSH_KNOWN_HOSTS_FILE,
3736 constants.CONFD_HMAC_KEY,
3737 constants.CLUSTER_DOMAIN_SECRET_FILE,
3741 files_all.update(constants.ALL_CERT_FILES)
3742 files_all.update(ssconf.SimpleStore().GetFileList())
3744 # we need to ship at least the RAPI certificate
3745 files_all.add(constants.RAPI_CERT_FILE)
3747 if cluster.modify_etc_hosts:
3748 files_all.add(constants.ETC_HOSTS)
3750 # Files which must either exist on all nodes or on none
3751 files_all_opt = set([
3752 constants.RAPI_USERS_FILE,
3755 # Files which should only be on master candidates
3758 files_mc.add(constants.CLUSTER_CONF_FILE)
3760 # Files which should only be on VM-capable nodes
3761 files_vm = set(filename
3762 for hv_name in cluster.enabled_hypervisors
3763 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3765 # Filenames must be unique
3766 assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3767 sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3768 "Found file listed in more than one file list"
3770 return (files_all, files_all_opt, files_mc, files_vm)
3773 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3774 """Distribute additional files which are part of the cluster configuration.
3776 ConfigWriter takes care of distributing the config and ssconf files, but
3777 there are more files which should be distributed to all nodes. This function
3778 makes sure those are copied.
3780 @param lu: calling logical unit
3781 @param additional_nodes: list of nodes not in the config to distribute to
3782 @type additional_vm: boolean
3783 @param additional_vm: whether the additional nodes are vm-capable or not
3786 # Gather target nodes
3787 cluster = lu.cfg.GetClusterInfo()
3788 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3790 online_nodes = lu.cfg.GetOnlineNodeList()
3791 vm_nodes = lu.cfg.GetVmCapableNodeList()
3793 if additional_nodes is not None:
3794 online_nodes.extend(additional_nodes)
3796 vm_nodes.extend(additional_nodes)
3798 # Never distribute to master node
3799 for nodelist in [online_nodes, vm_nodes]:
3800 if master_info.name in nodelist:
3801 nodelist.remove(master_info.name)
3804 (files_all, files_all_opt, files_mc, files_vm) = \
3805 _ComputeAncillaryFiles(cluster, True)
3807 # Never re-distribute configuration file from here
3808 assert not (constants.CLUSTER_CONF_FILE in files_all or
3809 constants.CLUSTER_CONF_FILE in files_vm)
3810 assert not files_mc, "Master candidates not handled in this function"
3813 (online_nodes, files_all),
3814 (online_nodes, files_all_opt),
3815 (vm_nodes, files_vm),
3819 for (node_list, files) in filemap:
3821 _UploadHelper(lu, node_list, fname)
3824 class LUClusterRedistConf(NoHooksLU):
3825 """Force the redistribution of cluster configuration.
3827 This is a very simple LU.
3832 def ExpandNames(self):
3833 self.needed_locks = {
3834 locking.LEVEL_NODE: locking.ALL_SET,
3836 self.share_locks[locking.LEVEL_NODE] = 1
3838 def Exec(self, feedback_fn):
3839 """Redistribute the configuration.
3842 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3843 _RedistributeAncillaryFiles(self)
3846 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3847 """Sleep and poll for an instance's disk to sync.
3850 if not instance.disks or disks is not None and not disks:
3853 disks = _ExpandCheckDisks(instance, disks)
3856 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3858 node = instance.primary_node
3861 lu.cfg.SetDiskID(dev, node)
3863 # TODO: Convert to utils.Retry
3866 degr_retries = 10 # in seconds, as we sleep 1 second each time
3870 cumul_degraded = False
3871 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3872 msg = rstats.fail_msg
3874 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3877 raise errors.RemoteError("Can't contact node %s for mirror data,"
3878 " aborting." % node)
3881 rstats = rstats.payload
3883 for i, mstat in enumerate(rstats):
3885 lu.LogWarning("Can't compute data for node %s/%s",
3886 node, disks[i].iv_name)
3889 cumul_degraded = (cumul_degraded or
3890 (mstat.is_degraded and mstat.sync_percent is None))
3891 if mstat.sync_percent is not None:
3893 if mstat.estimated_time is not None:
3894 rem_time = ("%s remaining (estimated)" %
3895 utils.FormatSeconds(mstat.estimated_time))
3896 max_time = mstat.estimated_time
3898 rem_time = "no time estimate"
3899 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3900 (disks[i].iv_name, mstat.sync_percent, rem_time))
3902 # if we're done but degraded, let's do a few small retries, to
3903 # make sure we see a stable and not transient situation; therefore
3904 # we force restart of the loop
3905 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3906 logging.info("Degraded disks found, %d retries left", degr_retries)
3914 time.sleep(min(60, max_time))
3917 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3918 return not cumul_degraded
3921 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3922 """Check that mirrors are not degraded.
3924 The ldisk parameter, if True, will change the test from the
3925 is_degraded attribute (which represents overall non-ok status for
3926 the device(s)) to the ldisk (representing the local storage status).
3929 lu.cfg.SetDiskID(dev, node)
3933 if on_primary or dev.AssembleOnSecondary():
3934 rstats = lu.rpc.call_blockdev_find(node, dev)
3935 msg = rstats.fail_msg
3937 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3939 elif not rstats.payload:
3940 lu.LogWarning("Can't find disk on node %s", node)
3944 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3946 result = result and not rstats.payload.is_degraded
3949 for child in dev.children:
3950 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3955 class LUOobCommand(NoHooksLU):
3956 """Logical unit for OOB handling.
3960 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3962 def ExpandNames(self):
3963 """Gather locks we need.
3966 if self.op.node_names:
3967 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3968 lock_names = self.op.node_names
3970 lock_names = locking.ALL_SET
3972 self.needed_locks = {
3973 locking.LEVEL_NODE: lock_names,
3976 def CheckPrereq(self):
3977 """Check prerequisites.
3980 - the node exists in the configuration
3983 Any errors are signaled by raising errors.OpPrereqError.
3987 self.master_node = self.cfg.GetMasterNode()
3989 assert self.op.power_delay >= 0.0
3991 if self.op.node_names:
3992 if (self.op.command in self._SKIP_MASTER and
3993 self.master_node in self.op.node_names):
3994 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3995 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3997 if master_oob_handler:
3998 additional_text = ("run '%s %s %s' if you want to operate on the"
3999 " master regardless") % (master_oob_handler,
4003 additional_text = "it does not support out-of-band operations"
4005 raise errors.OpPrereqError(("Operating on the master node %s is not"
4006 " allowed for %s; %s") %
4007 (self.master_node, self.op.command,
4008 additional_text), errors.ECODE_INVAL)
4010 self.op.node_names = self.cfg.GetNodeList()
4011 if self.op.command in self._SKIP_MASTER:
4012 self.op.node_names.remove(self.master_node)
4014 if self.op.command in self._SKIP_MASTER:
4015 assert self.master_node not in self.op.node_names
4017 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4019 raise errors.OpPrereqError("Node %s not found" % node_name,
4022 self.nodes.append(node)
4024 if (not self.op.ignore_status and
4025 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4026 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4027 " not marked offline") % node_name,
4030 def Exec(self, feedback_fn):
4031 """Execute OOB and return result if we expect any.
4034 master_node = self.master_node
4037 for idx, node in enumerate(utils.NiceSort(self.nodes,
4038 key=lambda node: node.name)):
4039 node_entry = [(constants.RS_NORMAL, node.name)]
4040 ret.append(node_entry)
4042 oob_program = _SupportsOob(self.cfg, node)
4045 node_entry.append((constants.RS_UNAVAIL, None))
4048 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4049 self.op.command, oob_program, node.name)
4050 result = self.rpc.call_run_oob(master_node, oob_program,
4051 self.op.command, node.name,
4055 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4056 node.name, result.fail_msg)
4057 node_entry.append((constants.RS_NODATA, None))
4060 self._CheckPayload(result)
4061 except errors.OpExecError, err:
4062 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4064 node_entry.append((constants.RS_NODATA, None))
4066 if self.op.command == constants.OOB_HEALTH:
4067 # For health we should log important events
4068 for item, status in result.payload:
4069 if status in [constants.OOB_STATUS_WARNING,
4070 constants.OOB_STATUS_CRITICAL]:
4071 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4072 item, node.name, status)
4074 if self.op.command == constants.OOB_POWER_ON:
4076 elif self.op.command == constants.OOB_POWER_OFF:
4077 node.powered = False
4078 elif self.op.command == constants.OOB_POWER_STATUS:
4079 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4080 if powered != node.powered:
4081 logging.warning(("Recorded power state (%s) of node '%s' does not"
4082 " match actual power state (%s)"), node.powered,
4085 # For configuration changing commands we should update the node
4086 if self.op.command in (constants.OOB_POWER_ON,
4087 constants.OOB_POWER_OFF):
4088 self.cfg.Update(node, feedback_fn)
4090 node_entry.append((constants.RS_NORMAL, result.payload))
4092 if (self.op.command == constants.OOB_POWER_ON and
4093 idx < len(self.nodes) - 1):
4094 time.sleep(self.op.power_delay)
4098 def _CheckPayload(self, result):
4099 """Checks if the payload is valid.
4101 @param result: RPC result
4102 @raises errors.OpExecError: If payload is not valid
4106 if self.op.command == constants.OOB_HEALTH:
4107 if not isinstance(result.payload, list):
4108 errs.append("command 'health' is expected to return a list but got %s" %
4109 type(result.payload))
4111 for item, status in result.payload:
4112 if status not in constants.OOB_STATUSES:
4113 errs.append("health item '%s' has invalid status '%s'" %
4116 if self.op.command == constants.OOB_POWER_STATUS:
4117 if not isinstance(result.payload, dict):
4118 errs.append("power-status is expected to return a dict but got %s" %
4119 type(result.payload))
4121 if self.op.command in [
4122 constants.OOB_POWER_ON,
4123 constants.OOB_POWER_OFF,
4124 constants.OOB_POWER_CYCLE,
4126 if result.payload is not None:
4127 errs.append("%s is expected to not return payload but got '%s'" %
4128 (self.op.command, result.payload))
4131 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4132 utils.CommaJoin(errs))
4135 class _OsQuery(_QueryBase):
4136 FIELDS = query.OS_FIELDS
4138 def ExpandNames(self, lu):
4139 # Lock all nodes in shared mode
4140 # Temporary removal of locks, should be reverted later
4141 # TODO: reintroduce locks when they are lighter-weight
4142 lu.needed_locks = {}
4143 #self.share_locks[locking.LEVEL_NODE] = 1
4144 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4146 # The following variables interact with _QueryBase._GetNames
4148 self.wanted = self.names
4150 self.wanted = locking.ALL_SET
4152 self.do_locking = self.use_locking
4154 def DeclareLocks(self, lu, level):
4158 def _DiagnoseByOS(rlist):
4159 """Remaps a per-node return list into an a per-os per-node dictionary
4161 @param rlist: a map with node names as keys and OS objects as values
4164 @return: a dictionary with osnames as keys and as value another
4165 map, with nodes as keys and tuples of (path, status, diagnose,
4166 variants, parameters, api_versions) as values, eg::
4168 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4169 (/srv/..., False, "invalid api")],
4170 "node2": [(/srv/..., True, "", [], [])]}
4175 # we build here the list of nodes that didn't fail the RPC (at RPC
4176 # level), so that nodes with a non-responding node daemon don't
4177 # make all OSes invalid
4178 good_nodes = [node_name for node_name in rlist
4179 if not rlist[node_name].fail_msg]
4180 for node_name, nr in rlist.items():
4181 if nr.fail_msg or not nr.payload:
4183 for (name, path, status, diagnose, variants,
4184 params, api_versions) in nr.payload:
4185 if name not in all_os:
4186 # build a list of nodes for this os containing empty lists
4187 # for each node in node_list
4189 for nname in good_nodes:
4190 all_os[name][nname] = []
4191 # convert params from [name, help] to (name, help)
4192 params = [tuple(v) for v in params]
4193 all_os[name][node_name].append((path, status, diagnose,
4194 variants, params, api_versions))
4197 def _GetQueryData(self, lu):
4198 """Computes the list of nodes and their attributes.
4201 # Locking is not used
4202 assert not (compat.any(lu.glm.is_owned(level)
4203 for level in locking.LEVELS
4204 if level != locking.LEVEL_CLUSTER) or
4205 self.do_locking or self.use_locking)
4207 valid_nodes = [node.name
4208 for node in lu.cfg.GetAllNodesInfo().values()
4209 if not node.offline and node.vm_capable]
4210 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4211 cluster = lu.cfg.GetClusterInfo()
4215 for (os_name, os_data) in pol.items():
4216 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4217 hidden=(os_name in cluster.hidden_os),
4218 blacklisted=(os_name in cluster.blacklisted_os))
4222 api_versions = set()
4224 for idx, osl in enumerate(os_data.values()):
4225 info.valid = bool(info.valid and osl and osl[0][1])
4229 (node_variants, node_params, node_api) = osl[0][3:6]
4232 variants.update(node_variants)
4233 parameters.update(node_params)
4234 api_versions.update(node_api)
4236 # Filter out inconsistent values
4237 variants.intersection_update(node_variants)
4238 parameters.intersection_update(node_params)
4239 api_versions.intersection_update(node_api)
4241 info.variants = list(variants)
4242 info.parameters = list(parameters)
4243 info.api_versions = list(api_versions)
4245 data[os_name] = info
4247 # Prepare data in requested order
4248 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4252 class LUOsDiagnose(NoHooksLU):
4253 """Logical unit for OS diagnose/query.
4259 def _BuildFilter(fields, names):
4260 """Builds a filter for querying OSes.
4263 name_filter = qlang.MakeSimpleFilter("name", names)
4265 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4266 # respective field is not requested
4267 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4268 for fname in ["hidden", "blacklisted"]
4269 if fname not in fields]
4270 if "valid" not in fields:
4271 status_filter.append([qlang.OP_TRUE, "valid"])
4274 status_filter.insert(0, qlang.OP_AND)
4276 status_filter = None
4278 if name_filter and status_filter:
4279 return [qlang.OP_AND, name_filter, status_filter]
4283 return status_filter
4285 def CheckArguments(self):
4286 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4287 self.op.output_fields, False)
4289 def ExpandNames(self):
4290 self.oq.ExpandNames(self)
4292 def Exec(self, feedback_fn):
4293 return self.oq.OldStyleQuery(self)
4296 class LUNodeRemove(LogicalUnit):
4297 """Logical unit for removing a node.
4300 HPATH = "node-remove"
4301 HTYPE = constants.HTYPE_NODE
4303 def BuildHooksEnv(self):
4306 This doesn't run on the target node in the pre phase as a failed
4307 node would then be impossible to remove.
4311 "OP_TARGET": self.op.node_name,
4312 "NODE_NAME": self.op.node_name,
4315 def BuildHooksNodes(self):
4316 """Build hooks nodes.
4319 all_nodes = self.cfg.GetNodeList()
4321 all_nodes.remove(self.op.node_name)
4323 logging.warning("Node '%s', which is about to be removed, was not found"
4324 " in the list of all nodes", self.op.node_name)
4325 return (all_nodes, all_nodes)
4327 def CheckPrereq(self):
4328 """Check prerequisites.
4331 - the node exists in the configuration
4332 - it does not have primary or secondary instances
4333 - it's not the master
4335 Any errors are signaled by raising errors.OpPrereqError.
4338 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4339 node = self.cfg.GetNodeInfo(self.op.node_name)
4340 assert node is not None
4342 masternode = self.cfg.GetMasterNode()
4343 if node.name == masternode:
4344 raise errors.OpPrereqError("Node is the master node, failover to another"
4345 " node is required", errors.ECODE_INVAL)
4347 for instance_name, instance in self.cfg.GetAllInstancesInfo():
4348 if node.name in instance.all_nodes:
4349 raise errors.OpPrereqError("Instance %s is still running on the node,"
4350 " please remove first" % instance_name,
4352 self.op.node_name = node.name
4355 def Exec(self, feedback_fn):
4356 """Removes the node from the cluster.
4360 logging.info("Stopping the node daemon and removing configs from node %s",
4363 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4365 # Promote nodes to master candidate as needed
4366 _AdjustCandidatePool(self, exceptions=[node.name])
4367 self.context.RemoveNode(node.name)
4369 # Run post hooks on the node before it's removed
4370 _RunPostHook(self, node.name)
4372 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4373 msg = result.fail_msg
4375 self.LogWarning("Errors encountered on the remote node while leaving"
4376 " the cluster: %s", msg)
4378 # Remove node from our /etc/hosts
4379 if self.cfg.GetClusterInfo().modify_etc_hosts:
4380 master_node = self.cfg.GetMasterNode()
4381 result = self.rpc.call_etc_hosts_modify(master_node,
4382 constants.ETC_HOSTS_REMOVE,
4384 result.Raise("Can't update hosts file with new host data")
4385 _RedistributeAncillaryFiles(self)
4388 class _NodeQuery(_QueryBase):
4389 FIELDS = query.NODE_FIELDS
4391 def ExpandNames(self, lu):
4392 lu.needed_locks = {}
4393 lu.share_locks = _ShareAll()
4396 self.wanted = _GetWantedNodes(lu, self.names)
4398 self.wanted = locking.ALL_SET
4400 self.do_locking = (self.use_locking and
4401 query.NQ_LIVE in self.requested_data)
4404 # If any non-static field is requested we need to lock the nodes
4405 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4407 def DeclareLocks(self, lu, level):
4410 def _GetQueryData(self, lu):
4411 """Computes the list of nodes and their attributes.
4414 all_info = lu.cfg.GetAllNodesInfo()
4416 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4418 # Gather data as requested
4419 if query.NQ_LIVE in self.requested_data:
4420 # filter out non-vm_capable nodes
4421 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4423 node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4424 lu.cfg.GetHypervisorType())
4425 live_data = dict((name, nresult.payload)
4426 for (name, nresult) in node_data.items()
4427 if not nresult.fail_msg and nresult.payload)
4431 if query.NQ_INST in self.requested_data:
4432 node_to_primary = dict([(name, set()) for name in nodenames])
4433 node_to_secondary = dict([(name, set()) for name in nodenames])
4435 inst_data = lu.cfg.GetAllInstancesInfo()
4437 for inst in inst_data.values():
4438 if inst.primary_node in node_to_primary:
4439 node_to_primary[inst.primary_node].add(inst.name)
4440 for secnode in inst.secondary_nodes:
4441 if secnode in node_to_secondary:
4442 node_to_secondary[secnode].add(inst.name)
4444 node_to_primary = None
4445 node_to_secondary = None
4447 if query.NQ_OOB in self.requested_data:
4448 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4449 for name, node in all_info.iteritems())
4453 if query.NQ_GROUP in self.requested_data:
4454 groups = lu.cfg.GetAllNodeGroupsInfo()
4458 return query.NodeQueryData([all_info[name] for name in nodenames],
4459 live_data, lu.cfg.GetMasterNode(),
4460 node_to_primary, node_to_secondary, groups,
4461 oob_support, lu.cfg.GetClusterInfo())
4464 class LUNodeQuery(NoHooksLU):
4465 """Logical unit for querying nodes.
4468 # pylint: disable=W0142
4471 def CheckArguments(self):
4472 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4473 self.op.output_fields, self.op.use_locking)
4475 def ExpandNames(self):
4476 self.nq.ExpandNames(self)
4478 def Exec(self, feedback_fn):
4479 return self.nq.OldStyleQuery(self)
4482 class LUNodeQueryvols(NoHooksLU):
4483 """Logical unit for getting volumes on node(s).
4487 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4488 _FIELDS_STATIC = utils.FieldSet("node")
4490 def CheckArguments(self):
4491 _CheckOutputFields(static=self._FIELDS_STATIC,
4492 dynamic=self._FIELDS_DYNAMIC,
4493 selected=self.op.output_fields)
4495 def ExpandNames(self):
4496 self.needed_locks = {}
4497 self.share_locks[locking.LEVEL_NODE] = 1
4498 if not self.op.nodes:
4499 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4501 self.needed_locks[locking.LEVEL_NODE] = \
4502 _GetWantedNodes(self, self.op.nodes)
4504 def Exec(self, feedback_fn):
4505 """Computes the list of nodes and their attributes.
4508 nodenames = self.owned_locks(locking.LEVEL_NODE)
4509 volumes = self.rpc.call_node_volumes(nodenames)
4511 ilist = self.cfg.GetAllInstancesInfo()
4512 vol2inst = _MapInstanceDisksToNodes(ilist.values())
4515 for node in nodenames:
4516 nresult = volumes[node]
4519 msg = nresult.fail_msg
4521 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4524 node_vols = sorted(nresult.payload,
4525 key=operator.itemgetter("dev"))
4527 for vol in node_vols:
4529 for field in self.op.output_fields:
4532 elif field == "phys":
4536 elif field == "name":
4538 elif field == "size":
4539 val = int(float(vol["size"]))
4540 elif field == "instance":
4541 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4543 raise errors.ParameterError(field)
4544 node_output.append(str(val))
4546 output.append(node_output)
4551 class LUNodeQueryStorage(NoHooksLU):
4552 """Logical unit for getting information on storage units on node(s).
4555 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4558 def CheckArguments(self):
4559 _CheckOutputFields(static=self._FIELDS_STATIC,
4560 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4561 selected=self.op.output_fields)
4563 def ExpandNames(self):
4564 self.needed_locks = {}
4565 self.share_locks[locking.LEVEL_NODE] = 1
4568 self.needed_locks[locking.LEVEL_NODE] = \
4569 _GetWantedNodes(self, self.op.nodes)
4571 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4573 def Exec(self, feedback_fn):
4574 """Computes the list of nodes and their attributes.
4577 self.nodes = self.owned_locks(locking.LEVEL_NODE)
4579 # Always get name to sort by
4580 if constants.SF_NAME in self.op.output_fields:
4581 fields = self.op.output_fields[:]
4583 fields = [constants.SF_NAME] + self.op.output_fields
4585 # Never ask for node or type as it's only known to the LU
4586 for extra in [constants.SF_NODE, constants.SF_TYPE]:
4587 while extra in fields:
4588 fields.remove(extra)
4590 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4591 name_idx = field_idx[constants.SF_NAME]
4593 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4594 data = self.rpc.call_storage_list(self.nodes,
4595 self.op.storage_type, st_args,
4596 self.op.name, fields)
4600 for node in utils.NiceSort(self.nodes):
4601 nresult = data[node]
4605 msg = nresult.fail_msg
4607 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4610 rows = dict([(row[name_idx], row) for row in nresult.payload])
4612 for name in utils.NiceSort(rows.keys()):
4617 for field in self.op.output_fields:
4618 if field == constants.SF_NODE:
4620 elif field == constants.SF_TYPE:
4621 val = self.op.storage_type
4622 elif field in field_idx:
4623 val = row[field_idx[field]]
4625 raise errors.ParameterError(field)
4634 class _InstanceQuery(_QueryBase):
4635 FIELDS = query.INSTANCE_FIELDS
4637 def ExpandNames(self, lu):
4638 lu.needed_locks = {}
4639 lu.share_locks = _ShareAll()
4642 self.wanted = _GetWantedInstances(lu, self.names)
4644 self.wanted = locking.ALL_SET
4646 self.do_locking = (self.use_locking and
4647 query.IQ_LIVE in self.requested_data)
4649 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4650 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4651 lu.needed_locks[locking.LEVEL_NODE] = []
4652 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4654 self.do_grouplocks = (self.do_locking and
4655 query.IQ_NODES in self.requested_data)
4657 def DeclareLocks(self, lu, level):
4659 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4660 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4662 # Lock all groups used by instances optimistically; this requires going
4663 # via the node before it's locked, requiring verification later on
4664 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4666 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4667 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4668 elif level == locking.LEVEL_NODE:
4669 lu._LockInstancesNodes() # pylint: disable=W0212
4672 def _CheckGroupLocks(lu):
4673 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4674 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4676 # Check if node groups for locked instances are still correct
4677 for instance_name in owned_instances:
4678 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4680 def _GetQueryData(self, lu):
4681 """Computes the list of instances and their attributes.
4684 if self.do_grouplocks:
4685 self._CheckGroupLocks(lu)
4687 cluster = lu.cfg.GetClusterInfo()
4688 all_info = lu.cfg.GetAllInstancesInfo()
4690 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4692 instance_list = [all_info[name] for name in instance_names]
4693 nodes = frozenset(itertools.chain(*(inst.all_nodes
4694 for inst in instance_list)))
4695 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4698 wrongnode_inst = set()
4700 # Gather data as requested
4701 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4703 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4705 result = node_data[name]
4707 # offline nodes will be in both lists
4708 assert result.fail_msg
4709 offline_nodes.append(name)
4711 bad_nodes.append(name)
4712 elif result.payload:
4713 for inst in result.payload:
4714 if inst in all_info:
4715 if all_info[inst].primary_node == name:
4716 live_data.update(result.payload)
4718 wrongnode_inst.add(inst)
4720 # orphan instance; we don't list it here as we don't
4721 # handle this case yet in the output of instance listing
4722 logging.warning("Orphan instance '%s' found on node %s",
4724 # else no instance is alive
4728 if query.IQ_DISKUSAGE in self.requested_data:
4729 disk_usage = dict((inst.name,
4730 _ComputeDiskSize(inst.disk_template,
4731 [{constants.IDISK_SIZE: disk.size}
4732 for disk in inst.disks]))
4733 for inst in instance_list)
4737 if query.IQ_CONSOLE in self.requested_data:
4739 for inst in instance_list:
4740 if inst.name in live_data:
4741 # Instance is running
4742 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4744 consinfo[inst.name] = None
4745 assert set(consinfo.keys()) == set(instance_names)
4749 if query.IQ_NODES in self.requested_data:
4750 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4752 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4753 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4754 for uuid in set(map(operator.attrgetter("group"),
4760 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4761 disk_usage, offline_nodes, bad_nodes,
4762 live_data, wrongnode_inst, consinfo,
4766 class LUQuery(NoHooksLU):
4767 """Query for resources/items of a certain kind.
4770 # pylint: disable=W0142
4773 def CheckArguments(self):
4774 qcls = _GetQueryImplementation(self.op.what)
4776 self.impl = qcls(self.op.filter, self.op.fields, self.op.use_locking)
4778 def ExpandNames(self):
4779 self.impl.ExpandNames(self)
4781 def DeclareLocks(self, level):
4782 self.impl.DeclareLocks(self, level)
4784 def Exec(self, feedback_fn):
4785 return self.impl.NewStyleQuery(self)
4788 class LUQueryFields(NoHooksLU):
4789 """Query for resources/items of a certain kind.
4792 # pylint: disable=W0142
4795 def CheckArguments(self):
4796 self.qcls = _GetQueryImplementation(self.op.what)
4798 def ExpandNames(self):
4799 self.needed_locks = {}
4801 def Exec(self, feedback_fn):
4802 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4805 class LUNodeModifyStorage(NoHooksLU):
4806 """Logical unit for modifying a storage volume on a node.
4811 def CheckArguments(self):
4812 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4814 storage_type = self.op.storage_type
4817 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4819 raise errors.OpPrereqError("Storage units of type '%s' can not be"
4820 " modified" % storage_type,
4823 diff = set(self.op.changes.keys()) - modifiable
4825 raise errors.OpPrereqError("The following fields can not be modified for"
4826 " storage units of type '%s': %r" %
4827 (storage_type, list(diff)),
4830 def ExpandNames(self):
4831 self.needed_locks = {
4832 locking.LEVEL_NODE: self.op.node_name,
4835 def Exec(self, feedback_fn):
4836 """Computes the list of nodes and their attributes.
4839 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4840 result = self.rpc.call_storage_modify(self.op.node_name,
4841 self.op.storage_type, st_args,
4842 self.op.name, self.op.changes)
4843 result.Raise("Failed to modify storage unit '%s' on %s" %
4844 (self.op.name, self.op.node_name))
4847 class LUNodeAdd(LogicalUnit):
4848 """Logical unit for adding node to the cluster.
4852 HTYPE = constants.HTYPE_NODE
4853 _NFLAGS = ["master_capable", "vm_capable"]
4855 def CheckArguments(self):
4856 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4857 # validate/normalize the node name
4858 self.hostname = netutils.GetHostname(name=self.op.node_name,
4859 family=self.primary_ip_family)
4860 self.op.node_name = self.hostname.name
4862 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4863 raise errors.OpPrereqError("Cannot readd the master node",
4866 if self.op.readd and self.op.group:
4867 raise errors.OpPrereqError("Cannot pass a node group when a node is"
4868 " being readded", errors.ECODE_INVAL)
4870 def BuildHooksEnv(self):
4873 This will run on all nodes before, and on all nodes + the new node after.
4877 "OP_TARGET": self.op.node_name,
4878 "NODE_NAME": self.op.node_name,
4879 "NODE_PIP": self.op.primary_ip,
4880 "NODE_SIP": self.op.secondary_ip,
4881 "MASTER_CAPABLE": str(self.op.master_capable),
4882 "VM_CAPABLE": str(self.op.vm_capable),
4885 def BuildHooksNodes(self):
4886 """Build hooks nodes.
4889 # Exclude added node
4890 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4891 post_nodes = pre_nodes + [self.op.node_name, ]
4893 return (pre_nodes, post_nodes)
4895 def CheckPrereq(self):
4896 """Check prerequisites.
4899 - the new node is not already in the config
4901 - its parameters (single/dual homed) matches the cluster
4903 Any errors are signaled by raising errors.OpPrereqError.
4907 hostname = self.hostname
4908 node = hostname.name
4909 primary_ip = self.op.primary_ip = hostname.ip
4910 if self.op.secondary_ip is None:
4911 if self.primary_ip_family == netutils.IP6Address.family:
4912 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4913 " IPv4 address must be given as secondary",
4915 self.op.secondary_ip = primary_ip
4917 secondary_ip = self.op.secondary_ip
4918 if not netutils.IP4Address.IsValid(secondary_ip):
4919 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4920 " address" % secondary_ip, errors.ECODE_INVAL)
4922 node_list = cfg.GetNodeList()
4923 if not self.op.readd and node in node_list:
4924 raise errors.OpPrereqError("Node %s is already in the configuration" %
4925 node, errors.ECODE_EXISTS)
4926 elif self.op.readd and node not in node_list:
4927 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4930 self.changed_primary_ip = False
4932 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4933 if self.op.readd and node == existing_node_name:
4934 if existing_node.secondary_ip != secondary_ip:
4935 raise errors.OpPrereqError("Readded node doesn't have the same IP"
4936 " address configuration as before",
4938 if existing_node.primary_ip != primary_ip:
4939 self.changed_primary_ip = True
4943 if (existing_node.primary_ip == primary_ip or
4944 existing_node.secondary_ip == primary_ip or
4945 existing_node.primary_ip == secondary_ip or
4946 existing_node.secondary_ip == secondary_ip):
4947 raise errors.OpPrereqError("New node ip address(es) conflict with"
4948 " existing node %s" % existing_node.name,
4949 errors.ECODE_NOTUNIQUE)
4951 # After this 'if' block, None is no longer a valid value for the
4952 # _capable op attributes
4954 old_node = self.cfg.GetNodeInfo(node)
4955 assert old_node is not None, "Can't retrieve locked node %s" % node
4956 for attr in self._NFLAGS:
4957 if getattr(self.op, attr) is None:
4958 setattr(self.op, attr, getattr(old_node, attr))
4960 for attr in self._NFLAGS:
4961 if getattr(self.op, attr) is None:
4962 setattr(self.op, attr, True)
4964 if self.op.readd and not self.op.vm_capable:
4965 pri, sec = cfg.GetNodeInstances(node)
4967 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4968 " flag set to false, but it already holds"
4969 " instances" % node,
4972 # check that the type of the node (single versus dual homed) is the
4973 # same as for the master
4974 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4975 master_singlehomed = myself.secondary_ip == myself.primary_ip
4976 newbie_singlehomed = secondary_ip == primary_ip
4977 if master_singlehomed != newbie_singlehomed:
4978 if master_singlehomed:
4979 raise errors.OpPrereqError("The master has no secondary ip but the"
4980 " new node has one",
4983 raise errors.OpPrereqError("The master has a secondary ip but the"
4984 " new node doesn't have one",
4987 # checks reachability
4988 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4989 raise errors.OpPrereqError("Node not reachable by ping",
4990 errors.ECODE_ENVIRON)
4992 if not newbie_singlehomed:
4993 # check reachability from my secondary ip to newbie's secondary ip
4994 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4995 source=myself.secondary_ip):
4996 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4997 " based ping to node daemon port",
4998 errors.ECODE_ENVIRON)
5005 if self.op.master_capable:
5006 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5008 self.master_candidate = False
5011 self.new_node = old_node
5013 node_group = cfg.LookupNodeGroup(self.op.group)
5014 self.new_node = objects.Node(name=node,
5015 primary_ip=primary_ip,
5016 secondary_ip=secondary_ip,
5017 master_candidate=self.master_candidate,
5018 offline=False, drained=False,
5021 if self.op.ndparams:
5022 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5024 def Exec(self, feedback_fn):
5025 """Adds the new node to the cluster.
5028 new_node = self.new_node
5029 node = new_node.name
5031 # We adding a new node so we assume it's powered
5032 new_node.powered = True
5034 # for re-adds, reset the offline/drained/master-candidate flags;
5035 # we need to reset here, otherwise offline would prevent RPC calls
5036 # later in the procedure; this also means that if the re-add
5037 # fails, we are left with a non-offlined, broken node
5039 new_node.drained = new_node.offline = False # pylint: disable=W0201
5040 self.LogInfo("Readding a node, the offline/drained flags were reset")
5041 # if we demote the node, we do cleanup later in the procedure
5042 new_node.master_candidate = self.master_candidate
5043 if self.changed_primary_ip:
5044 new_node.primary_ip = self.op.primary_ip
5046 # copy the master/vm_capable flags
5047 for attr in self._NFLAGS:
5048 setattr(new_node, attr, getattr(self.op, attr))
5050 # notify the user about any possible mc promotion
5051 if new_node.master_candidate:
5052 self.LogInfo("Node will be a master candidate")
5054 if self.op.ndparams:
5055 new_node.ndparams = self.op.ndparams
5057 new_node.ndparams = {}
5059 # check connectivity
5060 result = self.rpc.call_version([node])[node]
5061 result.Raise("Can't get version information from node %s" % node)
5062 if constants.PROTOCOL_VERSION == result.payload:
5063 logging.info("Communication to node %s fine, sw version %s match",
5064 node, result.payload)
5066 raise errors.OpExecError("Version mismatch master version %s,"
5067 " node version %s" %
5068 (constants.PROTOCOL_VERSION, result.payload))
5070 # Add node to our /etc/hosts, and add key to known_hosts
5071 if self.cfg.GetClusterInfo().modify_etc_hosts:
5072 master_node = self.cfg.GetMasterNode()
5073 result = self.rpc.call_etc_hosts_modify(master_node,
5074 constants.ETC_HOSTS_ADD,
5077 result.Raise("Can't update hosts file with new host data")
5079 if new_node.secondary_ip != new_node.primary_ip:
5080 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5083 node_verify_list = [self.cfg.GetMasterNode()]
5084 node_verify_param = {
5085 constants.NV_NODELIST: ([node], {}),
5086 # TODO: do a node-net-test as well?
5089 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5090 self.cfg.GetClusterName())
5091 for verifier in node_verify_list:
5092 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5093 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5095 for failed in nl_payload:
5096 feedback_fn("ssh/hostname verification failed"
5097 " (checking from %s): %s" %
5098 (verifier, nl_payload[failed]))
5099 raise errors.OpExecError("ssh/hostname verification failed")
5102 _RedistributeAncillaryFiles(self)
5103 self.context.ReaddNode(new_node)
5104 # make sure we redistribute the config
5105 self.cfg.Update(new_node, feedback_fn)
5106 # and make sure the new node will not have old files around
5107 if not new_node.master_candidate:
5108 result = self.rpc.call_node_demote_from_mc(new_node.name)
5109 msg = result.fail_msg
5111 self.LogWarning("Node failed to demote itself from master"
5112 " candidate status: %s" % msg)
5114 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5115 additional_vm=self.op.vm_capable)
5116 self.context.AddNode(new_node, self.proc.GetECId())
5119 class LUNodeSetParams(LogicalUnit):
5120 """Modifies the parameters of a node.
5122 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5123 to the node role (as _ROLE_*)
5124 @cvar _R2F: a dictionary from node role to tuples of flags
5125 @cvar _FLAGS: a list of attribute names corresponding to the flags
5128 HPATH = "node-modify"
5129 HTYPE = constants.HTYPE_NODE
5131 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5133 (True, False, False): _ROLE_CANDIDATE,
5134 (False, True, False): _ROLE_DRAINED,
5135 (False, False, True): _ROLE_OFFLINE,
5136 (False, False, False): _ROLE_REGULAR,
5138 _R2F = dict((v, k) for k, v in _F2R.items())
5139 _FLAGS = ["master_candidate", "drained", "offline"]
5141 def CheckArguments(self):
5142 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5143 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5144 self.op.master_capable, self.op.vm_capable,
5145 self.op.secondary_ip, self.op.ndparams]
5146 if all_mods.count(None) == len(all_mods):
5147 raise errors.OpPrereqError("Please pass at least one modification",
5149 if all_mods.count(True) > 1:
5150 raise errors.OpPrereqError("Can't set the node into more than one"
5151 " state at the same time",
5154 # Boolean value that tells us whether we might be demoting from MC
5155 self.might_demote = (self.op.master_candidate == False or
5156 self.op.offline == True or
5157 self.op.drained == True or
5158 self.op.master_capable == False)
5160 if self.op.secondary_ip:
5161 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5162 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5163 " address" % self.op.secondary_ip,
5166 self.lock_all = self.op.auto_promote and self.might_demote
5167 self.lock_instances = self.op.secondary_ip is not None
5169 def ExpandNames(self):
5171 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5173 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5175 if self.lock_instances:
5176 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5178 def DeclareLocks(self, level):
5179 # If we have locked all instances, before waiting to lock nodes, release
5180 # all the ones living on nodes unrelated to the current operation.
5181 if level == locking.LEVEL_NODE and self.lock_instances:
5182 self.affected_instances = []
5183 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5186 # Build list of instances to release
5187 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5188 for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5189 if (instance.disk_template in constants.DTS_INT_MIRROR and
5190 self.op.node_name in instance.all_nodes):
5191 instances_keep.append(instance_name)
5192 self.affected_instances.append(instance)
5194 _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5196 assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5197 set(instances_keep))
5199 def BuildHooksEnv(self):
5202 This runs on the master node.
5206 "OP_TARGET": self.op.node_name,
5207 "MASTER_CANDIDATE": str(self.op.master_candidate),
5208 "OFFLINE": str(self.op.offline),
5209 "DRAINED": str(self.op.drained),
5210 "MASTER_CAPABLE": str(self.op.master_capable),
5211 "VM_CAPABLE": str(self.op.vm_capable),
5214 def BuildHooksNodes(self):
5215 """Build hooks nodes.
5218 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5221 def CheckPrereq(self):
5222 """Check prerequisites.
5224 This only checks the instance list against the existing names.
5227 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5229 if (self.op.master_candidate is not None or
5230 self.op.drained is not None or
5231 self.op.offline is not None):
5232 # we can't change the master's node flags
5233 if self.op.node_name == self.cfg.GetMasterNode():
5234 raise errors.OpPrereqError("The master role can be changed"
5235 " only via master-failover",
5238 if self.op.master_candidate and not node.master_capable:
5239 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5240 " it a master candidate" % node.name,
5243 if self.op.vm_capable == False:
5244 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5246 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5247 " the vm_capable flag" % node.name,
5250 if node.master_candidate and self.might_demote and not self.lock_all:
5251 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5252 # check if after removing the current node, we're missing master
5254 (mc_remaining, mc_should, _) = \
5255 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5256 if mc_remaining < mc_should:
5257 raise errors.OpPrereqError("Not enough master candidates, please"
5258 " pass auto promote option to allow"
5259 " promotion", errors.ECODE_STATE)
5261 self.old_flags = old_flags = (node.master_candidate,
5262 node.drained, node.offline)
5263 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5264 self.old_role = old_role = self._F2R[old_flags]
5266 # Check for ineffective changes
5267 for attr in self._FLAGS:
5268 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5269 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5270 setattr(self.op, attr, None)
5272 # Past this point, any flag change to False means a transition
5273 # away from the respective state, as only real changes are kept
5275 # TODO: We might query the real power state if it supports OOB
5276 if _SupportsOob(self.cfg, node):
5277 if self.op.offline is False and not (node.powered or
5278 self.op.powered == True):
5279 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5280 " offline status can be reset") %
5282 elif self.op.powered is not None:
5283 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5284 " as it does not support out-of-band"
5285 " handling") % self.op.node_name)
5287 # If we're being deofflined/drained, we'll MC ourself if needed
5288 if (self.op.drained == False or self.op.offline == False or
5289 (self.op.master_capable and not node.master_capable)):
5290 if _DecideSelfPromotion(self):
5291 self.op.master_candidate = True
5292 self.LogInfo("Auto-promoting node to master candidate")
5294 # If we're no longer master capable, we'll demote ourselves from MC
5295 if self.op.master_capable == False and node.master_candidate:
5296 self.LogInfo("Demoting from master candidate")
5297 self.op.master_candidate = False
5300 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5301 if self.op.master_candidate:
5302 new_role = self._ROLE_CANDIDATE
5303 elif self.op.drained:
5304 new_role = self._ROLE_DRAINED
5305 elif self.op.offline:
5306 new_role = self._ROLE_OFFLINE
5307 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5308 # False is still in new flags, which means we're un-setting (the
5310 new_role = self._ROLE_REGULAR
5311 else: # no new flags, nothing, keep old role
5314 self.new_role = new_role
5316 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5317 # Trying to transition out of offline status
5318 result = self.rpc.call_version([node.name])[node.name]
5320 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5321 " to report its version: %s" %
5322 (node.name, result.fail_msg),
5325 self.LogWarning("Transitioning node from offline to online state"
5326 " without using re-add. Please make sure the node"
5329 if self.op.secondary_ip:
5330 # Ok even without locking, because this can't be changed by any LU
5331 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5332 master_singlehomed = master.secondary_ip == master.primary_ip
5333 if master_singlehomed and self.op.secondary_ip:
5334 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5335 " homed cluster", errors.ECODE_INVAL)
5338 if self.affected_instances:
5339 raise errors.OpPrereqError("Cannot change secondary ip: offline"
5340 " node has instances (%s) configured"
5341 " to use it" % self.affected_instances)
5343 # On online nodes, check that no instances are running, and that
5344 # the node has the new ip and we can reach it.
5345 for instance in self.affected_instances:
5346 _CheckInstanceDown(self, instance, "cannot change secondary ip")
5348 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5349 if master.name != node.name:
5350 # check reachability from master secondary ip to new secondary ip
5351 if not netutils.TcpPing(self.op.secondary_ip,
5352 constants.DEFAULT_NODED_PORT,
5353 source=master.secondary_ip):
5354 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5355 " based ping to node daemon port",
5356 errors.ECODE_ENVIRON)
5358 if self.op.ndparams:
5359 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5360 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5361 self.new_ndparams = new_ndparams
5363 def Exec(self, feedback_fn):
5368 old_role = self.old_role
5369 new_role = self.new_role
5373 if self.op.ndparams:
5374 node.ndparams = self.new_ndparams
5376 if self.op.powered is not None:
5377 node.powered = self.op.powered
5379 for attr in ["master_capable", "vm_capable"]:
5380 val = getattr(self.op, attr)
5382 setattr(node, attr, val)
5383 result.append((attr, str(val)))
5385 if new_role != old_role:
5386 # Tell the node to demote itself, if no longer MC and not offline
5387 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5388 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5390 self.LogWarning("Node failed to demote itself: %s", msg)
5392 new_flags = self._R2F[new_role]
5393 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5395 result.append((desc, str(nf)))
5396 (node.master_candidate, node.drained, node.offline) = new_flags
5398 # we locked all nodes, we adjust the CP before updating this node
5400 _AdjustCandidatePool(self, [node.name])
5402 if self.op.secondary_ip:
5403 node.secondary_ip = self.op.secondary_ip
5404 result.append(("secondary_ip", self.op.secondary_ip))
5406 # this will trigger configuration file update, if needed
5407 self.cfg.Update(node, feedback_fn)
5409 # this will trigger job queue propagation or cleanup if the mc
5411 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5412 self.context.ReaddNode(node)
5417 class LUNodePowercycle(NoHooksLU):
5418 """Powercycles a node.
5423 def CheckArguments(self):
5424 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5425 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5426 raise errors.OpPrereqError("The node is the master and the force"
5427 " parameter was not set",
5430 def ExpandNames(self):
5431 """Locking for PowercycleNode.
5433 This is a last-resort option and shouldn't block on other
5434 jobs. Therefore, we grab no locks.
5437 self.needed_locks = {}
5439 def Exec(self, feedback_fn):
5443 result = self.rpc.call_node_powercycle(self.op.node_name,
5444 self.cfg.GetHypervisorType())
5445 result.Raise("Failed to schedule the reboot")
5446 return result.payload
5449 class LUClusterQuery(NoHooksLU):
5450 """Query cluster configuration.
5455 def ExpandNames(self):
5456 self.needed_locks = {}
5458 def Exec(self, feedback_fn):
5459 """Return cluster config.
5462 cluster = self.cfg.GetClusterInfo()
5465 # Filter just for enabled hypervisors
5466 for os_name, hv_dict in cluster.os_hvp.items():
5467 os_hvp[os_name] = {}
5468 for hv_name, hv_params in hv_dict.items():
5469 if hv_name in cluster.enabled_hypervisors:
5470 os_hvp[os_name][hv_name] = hv_params
5472 # Convert ip_family to ip_version
5473 primary_ip_version = constants.IP4_VERSION
5474 if cluster.primary_ip_family == netutils.IP6Address.family:
5475 primary_ip_version = constants.IP6_VERSION
5478 "software_version": constants.RELEASE_VERSION,
5479 "protocol_version": constants.PROTOCOL_VERSION,
5480 "config_version": constants.CONFIG_VERSION,
5481 "os_api_version": max(constants.OS_API_VERSIONS),
5482 "export_version": constants.EXPORT_VERSION,
5483 "architecture": (platform.architecture()[0], platform.machine()),
5484 "name": cluster.cluster_name,
5485 "master": cluster.master_node,
5486 "default_hypervisor": cluster.enabled_hypervisors[0],
5487 "enabled_hypervisors": cluster.enabled_hypervisors,
5488 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5489 for hypervisor_name in cluster.enabled_hypervisors]),
5491 "beparams": cluster.beparams,
5492 "osparams": cluster.osparams,
5493 "nicparams": cluster.nicparams,
5494 "ndparams": cluster.ndparams,
5495 "candidate_pool_size": cluster.candidate_pool_size,
5496 "master_netdev": cluster.master_netdev,
5497 "volume_group_name": cluster.volume_group_name,
5498 "drbd_usermode_helper": cluster.drbd_usermode_helper,
5499 "file_storage_dir": cluster.file_storage_dir,
5500 "shared_file_storage_dir": cluster.shared_file_storage_dir,
5501 "maintain_node_health": cluster.maintain_node_health,
5502 "ctime": cluster.ctime,
5503 "mtime": cluster.mtime,
5504 "uuid": cluster.uuid,
5505 "tags": list(cluster.GetTags()),
5506 "uid_pool": cluster.uid_pool,
5507 "default_iallocator": cluster.default_iallocator,
5508 "reserved_lvs": cluster.reserved_lvs,
5509 "primary_ip_version": primary_ip_version,
5510 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5511 "hidden_os": cluster.hidden_os,
5512 "blacklisted_os": cluster.blacklisted_os,
5518 class LUClusterConfigQuery(NoHooksLU):
5519 """Return configuration values.
5523 _FIELDS_DYNAMIC = utils.FieldSet()
5524 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5525 "watcher_pause", "volume_group_name")
5527 def CheckArguments(self):
5528 _CheckOutputFields(static=self._FIELDS_STATIC,
5529 dynamic=self._FIELDS_DYNAMIC,
5530 selected=self.op.output_fields)
5532 def ExpandNames(self):
5533 self.needed_locks = {}
5535 def Exec(self, feedback_fn):
5536 """Dump a representation of the cluster config to the standard output.
5540 for field in self.op.output_fields:
5541 if field == "cluster_name":
5542 entry = self.cfg.GetClusterName()
5543 elif field == "master_node":
5544 entry = self.cfg.GetMasterNode()
5545 elif field == "drain_flag":
5546 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5547 elif field == "watcher_pause":
5548 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5549 elif field == "volume_group_name":
5550 entry = self.cfg.GetVGName()
5552 raise errors.ParameterError(field)
5553 values.append(entry)
5557 class LUInstanceActivateDisks(NoHooksLU):
5558 """Bring up an instance's disks.
5563 def ExpandNames(self):
5564 self._ExpandAndLockInstance()
5565 self.needed_locks[locking.LEVEL_NODE] = []
5566 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5568 def DeclareLocks(self, level):
5569 if level == locking.LEVEL_NODE:
5570 self._LockInstancesNodes()
5572 def CheckPrereq(self):
5573 """Check prerequisites.
5575 This checks that the instance is in the cluster.
5578 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5579 assert self.instance is not None, \
5580 "Cannot retrieve locked instance %s" % self.op.instance_name
5581 _CheckNodeOnline(self, self.instance.primary_node)
5583 def Exec(self, feedback_fn):
5584 """Activate the disks.
5587 disks_ok, disks_info = \
5588 _AssembleInstanceDisks(self, self.instance,
5589 ignore_size=self.op.ignore_size)
5591 raise errors.OpExecError("Cannot activate block devices")
5596 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5598 """Prepare the block devices for an instance.
5600 This sets up the block devices on all nodes.
5602 @type lu: L{LogicalUnit}
5603 @param lu: the logical unit on whose behalf we execute
5604 @type instance: L{objects.Instance}
5605 @param instance: the instance for whose disks we assemble
5606 @type disks: list of L{objects.Disk} or None
5607 @param disks: which disks to assemble (or all, if None)
5608 @type ignore_secondaries: boolean
5609 @param ignore_secondaries: if true, errors on secondary nodes
5610 won't result in an error return from the function
5611 @type ignore_size: boolean
5612 @param ignore_size: if true, the current known size of the disk
5613 will not be used during the disk activation, useful for cases
5614 when the size is wrong
5615 @return: False if the operation failed, otherwise a list of
5616 (host, instance_visible_name, node_visible_name)
5617 with the mapping from node devices to instance devices
5622 iname = instance.name
5623 disks = _ExpandCheckDisks(instance, disks)
5625 # With the two passes mechanism we try to reduce the window of
5626 # opportunity for the race condition of switching DRBD to primary
5627 # before handshaking occured, but we do not eliminate it
5629 # The proper fix would be to wait (with some limits) until the
5630 # connection has been made and drbd transitions from WFConnection
5631 # into any other network-connected state (Connected, SyncTarget,
5634 # 1st pass, assemble on all nodes in secondary mode
5635 for idx, inst_disk in enumerate(disks):
5636 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5638 node_disk = node_disk.Copy()
5639 node_disk.UnsetSize()
5640 lu.cfg.SetDiskID(node_disk, node)
5641 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5642 msg = result.fail_msg
5644 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5645 " (is_primary=False, pass=1): %s",
5646 inst_disk.iv_name, node, msg)
5647 if not ignore_secondaries:
5650 # FIXME: race condition on drbd migration to primary
5652 # 2nd pass, do only the primary node
5653 for idx, inst_disk in enumerate(disks):
5656 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5657 if node != instance.primary_node:
5660 node_disk = node_disk.Copy()
5661 node_disk.UnsetSize()
5662 lu.cfg.SetDiskID(node_disk, node)
5663 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5664 msg = result.fail_msg
5666 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5667 " (is_primary=True, pass=2): %s",
5668 inst_disk.iv_name, node, msg)
5671 dev_path = result.payload
5673 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5675 # leave the disks configured for the primary node
5676 # this is a workaround that would be fixed better by
5677 # improving the logical/physical id handling
5679 lu.cfg.SetDiskID(disk, instance.primary_node)
5681 return disks_ok, device_info
5684 def _StartInstanceDisks(lu, instance, force):
5685 """Start the disks of an instance.
5688 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5689 ignore_secondaries=force)
5691 _ShutdownInstanceDisks(lu, instance)
5692 if force is not None and not force:
5693 lu.proc.LogWarning("", hint="If the message above refers to a"
5695 " you can retry the operation using '--force'.")
5696 raise errors.OpExecError("Disk consistency error")
5699 class LUInstanceDeactivateDisks(NoHooksLU):
5700 """Shutdown an instance's disks.
5705 def ExpandNames(self):
5706 self._ExpandAndLockInstance()
5707 self.needed_locks[locking.LEVEL_NODE] = []
5708 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5710 def DeclareLocks(self, level):
5711 if level == locking.LEVEL_NODE:
5712 self._LockInstancesNodes()
5714 def CheckPrereq(self):
5715 """Check prerequisites.
5717 This checks that the instance is in the cluster.
5720 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5721 assert self.instance is not None, \
5722 "Cannot retrieve locked instance %s" % self.op.instance_name
5724 def Exec(self, feedback_fn):
5725 """Deactivate the disks
5728 instance = self.instance
5730 _ShutdownInstanceDisks(self, instance)
5732 _SafeShutdownInstanceDisks(self, instance)
5735 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5736 """Shutdown block devices of an instance.
5738 This function checks if an instance is running, before calling
5739 _ShutdownInstanceDisks.
5742 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5743 _ShutdownInstanceDisks(lu, instance, disks=disks)
5746 def _ExpandCheckDisks(instance, disks):
5747 """Return the instance disks selected by the disks list
5749 @type disks: list of L{objects.Disk} or None
5750 @param disks: selected disks
5751 @rtype: list of L{objects.Disk}
5752 @return: selected instance disks to act on
5756 return instance.disks
5758 if not set(disks).issubset(instance.disks):
5759 raise errors.ProgrammerError("Can only act on disks belonging to the"
5764 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5765 """Shutdown block devices of an instance.
5767 This does the shutdown on all nodes of the instance.
5769 If the ignore_primary is false, errors on the primary node are
5774 disks = _ExpandCheckDisks(instance, disks)
5777 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5778 lu.cfg.SetDiskID(top_disk, node)
5779 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5780 msg = result.fail_msg
5782 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5783 disk.iv_name, node, msg)
5784 if ((node == instance.primary_node and not ignore_primary) or
5785 (node != instance.primary_node and not result.offline)):
5790 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5791 """Checks if a node has enough free memory.
5793 This function check if a given node has the needed amount of free
5794 memory. In case the node has less memory or we cannot get the
5795 information from the node, this function raise an OpPrereqError
5798 @type lu: C{LogicalUnit}
5799 @param lu: a logical unit from which we get configuration data
5801 @param node: the node to check
5802 @type reason: C{str}
5803 @param reason: string to use in the error message
5804 @type requested: C{int}
5805 @param requested: the amount of memory in MiB to check for
5806 @type hypervisor_name: C{str}
5807 @param hypervisor_name: the hypervisor to ask for memory stats
5808 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5809 we cannot check the node
5812 nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5813 nodeinfo[node].Raise("Can't get data from node %s" % node,
5814 prereq=True, ecode=errors.ECODE_ENVIRON)
5815 free_mem = nodeinfo[node].payload.get("memory_free", None)
5816 if not isinstance(free_mem, int):
5817 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5818 " was '%s'" % (node, free_mem),
5819 errors.ECODE_ENVIRON)
5820 if requested > free_mem:
5821 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5822 " needed %s MiB, available %s MiB" %
5823 (node, reason, requested, free_mem),
5827 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5828 """Checks if nodes have enough free disk space in the all VGs.
5830 This function check if all given nodes have the needed amount of
5831 free disk. In case any node has less disk or we cannot get the
5832 information from the node, this function raise an OpPrereqError
5835 @type lu: C{LogicalUnit}
5836 @param lu: a logical unit from which we get configuration data
5837 @type nodenames: C{list}
5838 @param nodenames: the list of node names to check
5839 @type req_sizes: C{dict}
5840 @param req_sizes: the hash of vg and corresponding amount of disk in
5842 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5843 or we cannot check the node
5846 for vg, req_size in req_sizes.items():
5847 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5850 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5851 """Checks if nodes have enough free disk space in the specified VG.
5853 This function check if all given nodes have the needed amount of
5854 free disk. In case any node has less disk or we cannot get the
5855 information from the node, this function raise an OpPrereqError
5858 @type lu: C{LogicalUnit}
5859 @param lu: a logical unit from which we get configuration data
5860 @type nodenames: C{list}
5861 @param nodenames: the list of node names to check
5863 @param vg: the volume group to check
5864 @type requested: C{int}
5865 @param requested: the amount of disk in MiB to check for
5866 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5867 or we cannot check the node
5870 nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5871 for node in nodenames:
5872 info = nodeinfo[node]
5873 info.Raise("Cannot get current information from node %s" % node,
5874 prereq=True, ecode=errors.ECODE_ENVIRON)
5875 vg_free = info.payload.get("vg_free", None)
5876 if not isinstance(vg_free, int):
5877 raise errors.OpPrereqError("Can't compute free disk space on node"
5878 " %s for vg %s, result was '%s'" %
5879 (node, vg, vg_free), errors.ECODE_ENVIRON)
5880 if requested > vg_free:
5881 raise errors.OpPrereqError("Not enough disk space on target node %s"
5882 " vg %s: required %d MiB, available %d MiB" %
5883 (node, vg, requested, vg_free),
5887 class LUInstanceStartup(LogicalUnit):
5888 """Starts an instance.
5891 HPATH = "instance-start"
5892 HTYPE = constants.HTYPE_INSTANCE
5895 def CheckArguments(self):
5897 if self.op.beparams:
5898 # fill the beparams dict
5899 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5901 def ExpandNames(self):
5902 self._ExpandAndLockInstance()
5904 def BuildHooksEnv(self):
5907 This runs on master, primary and secondary nodes of the instance.
5911 "FORCE": self.op.force,
5914 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5918 def BuildHooksNodes(self):
5919 """Build hooks nodes.
5922 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5925 def CheckPrereq(self):
5926 """Check prerequisites.
5928 This checks that the instance is in the cluster.
5931 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5932 assert self.instance is not None, \
5933 "Cannot retrieve locked instance %s" % self.op.instance_name
5936 if self.op.hvparams:
5937 # check hypervisor parameter syntax (locally)
5938 cluster = self.cfg.GetClusterInfo()
5939 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5940 filled_hvp = cluster.FillHV(instance)
5941 filled_hvp.update(self.op.hvparams)
5942 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5943 hv_type.CheckParameterSyntax(filled_hvp)
5944 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5946 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5948 if self.primary_offline and self.op.ignore_offline_nodes:
5949 self.proc.LogWarning("Ignoring offline primary node")
5951 if self.op.hvparams or self.op.beparams:
5952 self.proc.LogWarning("Overridden parameters are ignored")
5954 _CheckNodeOnline(self, instance.primary_node)
5956 bep = self.cfg.GetClusterInfo().FillBE(instance)
5958 # check bridges existence
5959 _CheckInstanceBridgesExist(self, instance)
5961 remote_info = self.rpc.call_instance_info(instance.primary_node,
5963 instance.hypervisor)
5964 remote_info.Raise("Error checking node %s" % instance.primary_node,
5965 prereq=True, ecode=errors.ECODE_ENVIRON)
5966 if not remote_info.payload: # not running already
5967 _CheckNodeFreeMemory(self, instance.primary_node,
5968 "starting instance %s" % instance.name,
5969 bep[constants.BE_MEMORY], instance.hypervisor)
5971 def Exec(self, feedback_fn):
5972 """Start the instance.
5975 instance = self.instance
5976 force = self.op.force
5978 if not self.op.no_remember:
5979 self.cfg.MarkInstanceUp(instance.name)
5981 if self.primary_offline:
5982 assert self.op.ignore_offline_nodes
5983 self.proc.LogInfo("Primary node offline, marked instance as started")
5985 node_current = instance.primary_node
5987 _StartInstanceDisks(self, instance, force)
5989 result = self.rpc.call_instance_start(node_current, instance,
5990 self.op.hvparams, self.op.beparams,
5991 self.op.startup_paused)
5992 msg = result.fail_msg
5994 _ShutdownInstanceDisks(self, instance)
5995 raise errors.OpExecError("Could not start instance: %s" % msg)
5998 class LUInstanceReboot(LogicalUnit):
5999 """Reboot an instance.
6002 HPATH = "instance-reboot"
6003 HTYPE = constants.HTYPE_INSTANCE
6006 def ExpandNames(self):
6007 self._ExpandAndLockInstance()
6009 def BuildHooksEnv(self):
6012 This runs on master, primary and secondary nodes of the instance.
6016 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6017 "REBOOT_TYPE": self.op.reboot_type,
6018 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6021 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6025 def BuildHooksNodes(self):
6026 """Build hooks nodes.
6029 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6032 def CheckPrereq(self):
6033 """Check prerequisites.
6035 This checks that the instance is in the cluster.
6038 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6039 assert self.instance is not None, \
6040 "Cannot retrieve locked instance %s" % self.op.instance_name
6042 _CheckNodeOnline(self, instance.primary_node)
6044 # check bridges existence
6045 _CheckInstanceBridgesExist(self, instance)
6047 def Exec(self, feedback_fn):
6048 """Reboot the instance.
6051 instance = self.instance
6052 ignore_secondaries = self.op.ignore_secondaries
6053 reboot_type = self.op.reboot_type
6055 remote_info = self.rpc.call_instance_info(instance.primary_node,
6057 instance.hypervisor)
6058 remote_info.Raise("Error checking node %s" % instance.primary_node)
6059 instance_running = bool(remote_info.payload)
6061 node_current = instance.primary_node
6063 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6064 constants.INSTANCE_REBOOT_HARD]:
6065 for disk in instance.disks:
6066 self.cfg.SetDiskID(disk, node_current)
6067 result = self.rpc.call_instance_reboot(node_current, instance,
6069 self.op.shutdown_timeout)
6070 result.Raise("Could not reboot instance")
6072 if instance_running:
6073 result = self.rpc.call_instance_shutdown(node_current, instance,
6074 self.op.shutdown_timeout)
6075 result.Raise("Could not shutdown instance for full reboot")
6076 _ShutdownInstanceDisks(self, instance)
6078 self.LogInfo("Instance %s was already stopped, starting now",
6080 _StartInstanceDisks(self, instance, ignore_secondaries)
6081 result = self.rpc.call_instance_start(node_current, instance,
6083 msg = result.fail_msg
6085 _ShutdownInstanceDisks(self, instance)
6086 raise errors.OpExecError("Could not start instance for"
6087 " full reboot: %s" % msg)
6089 self.cfg.MarkInstanceUp(instance.name)
6092 class LUInstanceShutdown(LogicalUnit):
6093 """Shutdown an instance.
6096 HPATH = "instance-stop"
6097 HTYPE = constants.HTYPE_INSTANCE
6100 def ExpandNames(self):
6101 self._ExpandAndLockInstance()
6103 def BuildHooksEnv(self):
6106 This runs on master, primary and secondary nodes of the instance.
6109 env = _BuildInstanceHookEnvByObject(self, self.instance)
6110 env["TIMEOUT"] = self.op.timeout
6113 def BuildHooksNodes(self):
6114 """Build hooks nodes.
6117 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6120 def CheckPrereq(self):
6121 """Check prerequisites.
6123 This checks that the instance is in the cluster.
6126 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6127 assert self.instance is not None, \
6128 "Cannot retrieve locked instance %s" % self.op.instance_name
6130 self.primary_offline = \
6131 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6133 if self.primary_offline and self.op.ignore_offline_nodes:
6134 self.proc.LogWarning("Ignoring offline primary node")
6136 _CheckNodeOnline(self, self.instance.primary_node)
6138 def Exec(self, feedback_fn):
6139 """Shutdown the instance.
6142 instance = self.instance
6143 node_current = instance.primary_node
6144 timeout = self.op.timeout
6146 if not self.op.no_remember:
6147 self.cfg.MarkInstanceDown(instance.name)
6149 if self.primary_offline:
6150 assert self.op.ignore_offline_nodes
6151 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6153 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6154 msg = result.fail_msg
6156 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6158 _ShutdownInstanceDisks(self, instance)
6161 class LUInstanceReinstall(LogicalUnit):
6162 """Reinstall an instance.
6165 HPATH = "instance-reinstall"
6166 HTYPE = constants.HTYPE_INSTANCE
6169 def ExpandNames(self):
6170 self._ExpandAndLockInstance()
6172 def BuildHooksEnv(self):
6175 This runs on master, primary and secondary nodes of the instance.
6178 return _BuildInstanceHookEnvByObject(self, self.instance)
6180 def BuildHooksNodes(self):
6181 """Build hooks nodes.
6184 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6187 def CheckPrereq(self):
6188 """Check prerequisites.
6190 This checks that the instance is in the cluster and is not running.
6193 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6194 assert instance is not None, \
6195 "Cannot retrieve locked instance %s" % self.op.instance_name
6196 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6197 " offline, cannot reinstall")
6198 for node in instance.secondary_nodes:
6199 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6200 " cannot reinstall")
6202 if instance.disk_template == constants.DT_DISKLESS:
6203 raise errors.OpPrereqError("Instance '%s' has no disks" %
6204 self.op.instance_name,
6206 _CheckInstanceDown(self, instance, "cannot reinstall")
6208 if self.op.os_type is not None:
6210 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6211 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6212 instance_os = self.op.os_type
6214 instance_os = instance.os
6216 nodelist = list(instance.all_nodes)
6218 if self.op.osparams:
6219 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6220 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6221 self.os_inst = i_osdict # the new dict (without defaults)
6225 self.instance = instance
6227 def Exec(self, feedback_fn):
6228 """Reinstall the instance.
6231 inst = self.instance
6233 if self.op.os_type is not None:
6234 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6235 inst.os = self.op.os_type
6236 # Write to configuration
6237 self.cfg.Update(inst, feedback_fn)
6239 _StartInstanceDisks(self, inst, None)
6241 feedback_fn("Running the instance OS create scripts...")
6242 # FIXME: pass debug option from opcode to backend
6243 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6244 self.op.debug_level,
6245 osparams=self.os_inst)
6246 result.Raise("Could not install OS for instance %s on node %s" %
6247 (inst.name, inst.primary_node))
6249 _ShutdownInstanceDisks(self, inst)
6252 class LUInstanceRecreateDisks(LogicalUnit):
6253 """Recreate an instance's missing disks.
6256 HPATH = "instance-recreate-disks"
6257 HTYPE = constants.HTYPE_INSTANCE
6260 def CheckArguments(self):
6261 # normalise the disk list
6262 self.op.disks = sorted(frozenset(self.op.disks))
6264 def ExpandNames(self):
6265 self._ExpandAndLockInstance()
6266 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6268 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6269 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6271 self.needed_locks[locking.LEVEL_NODE] = []
6273 def DeclareLocks(self, level):
6274 if level == locking.LEVEL_NODE:
6275 # if we replace the nodes, we only need to lock the old primary,
6276 # otherwise we need to lock all nodes for disk re-creation
6277 primary_only = bool(self.op.nodes)
6278 self._LockInstancesNodes(primary_only=primary_only)
6280 def BuildHooksEnv(self):
6283 This runs on master, primary and secondary nodes of the instance.
6286 return _BuildInstanceHookEnvByObject(self, self.instance)
6288 def BuildHooksNodes(self):
6289 """Build hooks nodes.
6292 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6295 def CheckPrereq(self):
6296 """Check prerequisites.
6298 This checks that the instance is in the cluster and is not running.
6301 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6302 assert instance is not None, \
6303 "Cannot retrieve locked instance %s" % self.op.instance_name
6305 if len(self.op.nodes) != len(instance.all_nodes):
6306 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6307 " %d replacement nodes were specified" %
6308 (instance.name, len(instance.all_nodes),
6309 len(self.op.nodes)),
6311 assert instance.disk_template != constants.DT_DRBD8 or \
6312 len(self.op.nodes) == 2
6313 assert instance.disk_template != constants.DT_PLAIN or \
6314 len(self.op.nodes) == 1
6315 primary_node = self.op.nodes[0]
6317 primary_node = instance.primary_node
6318 _CheckNodeOnline(self, primary_node)
6320 if instance.disk_template == constants.DT_DISKLESS:
6321 raise errors.OpPrereqError("Instance '%s' has no disks" %
6322 self.op.instance_name, errors.ECODE_INVAL)
6323 # if we replace nodes *and* the old primary is offline, we don't
6325 assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6326 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6327 if not (self.op.nodes and old_pnode.offline):
6328 _CheckInstanceDown(self, instance, "cannot recreate disks")
6330 if not self.op.disks:
6331 self.op.disks = range(len(instance.disks))
6333 for idx in self.op.disks:
6334 if idx >= len(instance.disks):
6335 raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6337 if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6338 raise errors.OpPrereqError("Can't recreate disks partially and"
6339 " change the nodes at the same time",
6341 self.instance = instance
6343 def Exec(self, feedback_fn):
6344 """Recreate the disks.
6347 instance = self.instance
6350 mods = [] # keeps track of needed logical_id changes
6352 for idx, disk in enumerate(instance.disks):
6353 if idx not in self.op.disks: # disk idx has not been passed in
6356 # update secondaries for disks, if needed
6358 if disk.dev_type == constants.LD_DRBD8:
6359 # need to update the nodes and minors
6360 assert len(self.op.nodes) == 2
6361 assert len(disk.logical_id) == 6 # otherwise disk internals
6363 (_, _, old_port, _, _, old_secret) = disk.logical_id
6364 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6365 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6366 new_minors[0], new_minors[1], old_secret)
6367 assert len(disk.logical_id) == len(new_id)
6368 mods.append((idx, new_id))
6370 # now that we have passed all asserts above, we can apply the mods
6371 # in a single run (to avoid partial changes)
6372 for idx, new_id in mods:
6373 instance.disks[idx].logical_id = new_id
6375 # change primary node, if needed
6377 instance.primary_node = self.op.nodes[0]
6378 self.LogWarning("Changing the instance's nodes, you will have to"
6379 " remove any disks left on the older nodes manually")
6382 self.cfg.Update(instance, feedback_fn)
6384 _CreateDisks(self, instance, to_skip=to_skip)
6387 class LUInstanceRename(LogicalUnit):
6388 """Rename an instance.
6391 HPATH = "instance-rename"
6392 HTYPE = constants.HTYPE_INSTANCE
6394 def CheckArguments(self):
6398 if self.op.ip_check and not self.op.name_check:
6399 # TODO: make the ip check more flexible and not depend on the name check
6400 raise errors.OpPrereqError("IP address check requires a name check",
6403 def BuildHooksEnv(self):
6406 This runs on master, primary and secondary nodes of the instance.
6409 env = _BuildInstanceHookEnvByObject(self, self.instance)
6410 env["INSTANCE_NEW_NAME"] = self.op.new_name
6413 def BuildHooksNodes(self):
6414 """Build hooks nodes.
6417 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6420 def CheckPrereq(self):
6421 """Check prerequisites.
6423 This checks that the instance is in the cluster and is not running.
6426 self.op.instance_name = _ExpandInstanceName(self.cfg,
6427 self.op.instance_name)
6428 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6429 assert instance is not None
6430 _CheckNodeOnline(self, instance.primary_node)
6431 _CheckInstanceDown(self, instance, "cannot rename")
6432 self.instance = instance
6434 new_name = self.op.new_name
6435 if self.op.name_check:
6436 hostname = netutils.GetHostname(name=new_name)
6437 if hostname != new_name:
6438 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6440 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6441 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6442 " same as given hostname '%s'") %
6443 (hostname.name, self.op.new_name),
6445 new_name = self.op.new_name = hostname.name
6446 if (self.op.ip_check and
6447 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6448 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6449 (hostname.ip, new_name),
6450 errors.ECODE_NOTUNIQUE)
6452 instance_list = self.cfg.GetInstanceList()
6453 if new_name in instance_list and new_name != instance.name:
6454 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6455 new_name, errors.ECODE_EXISTS)
6457 def Exec(self, feedback_fn):
6458 """Rename the instance.
6461 inst = self.instance
6462 old_name = inst.name
6464 rename_file_storage = False
6465 if (inst.disk_template in constants.DTS_FILEBASED and
6466 self.op.new_name != inst.name):
6467 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6468 rename_file_storage = True
6470 self.cfg.RenameInstance(inst.name, self.op.new_name)
6471 # Change the instance lock. This is definitely safe while we hold the BGL.
6472 # Otherwise the new lock would have to be added in acquired mode.
6474 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6475 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6477 # re-read the instance from the configuration after rename
6478 inst = self.cfg.GetInstanceInfo(self.op.new_name)
6480 if rename_file_storage:
6481 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6482 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6483 old_file_storage_dir,
6484 new_file_storage_dir)
6485 result.Raise("Could not rename on node %s directory '%s' to '%s'"
6486 " (but the instance has been renamed in Ganeti)" %
6487 (inst.primary_node, old_file_storage_dir,
6488 new_file_storage_dir))
6490 _StartInstanceDisks(self, inst, None)
6492 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6493 old_name, self.op.debug_level)
6494 msg = result.fail_msg
6496 msg = ("Could not run OS rename script for instance %s on node %s"
6497 " (but the instance has been renamed in Ganeti): %s" %
6498 (inst.name, inst.primary_node, msg))
6499 self.proc.LogWarning(msg)
6501 _ShutdownInstanceDisks(self, inst)
6506 class LUInstanceRemove(LogicalUnit):
6507 """Remove an instance.
6510 HPATH = "instance-remove"
6511 HTYPE = constants.HTYPE_INSTANCE
6514 def ExpandNames(self):
6515 self._ExpandAndLockInstance()
6516 self.needed_locks[locking.LEVEL_NODE] = []
6517 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6519 def DeclareLocks(self, level):
6520 if level == locking.LEVEL_NODE:
6521 self._LockInstancesNodes()
6523 def BuildHooksEnv(self):
6526 This runs on master, primary and secondary nodes of the instance.
6529 env = _BuildInstanceHookEnvByObject(self, self.instance)
6530 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6533 def BuildHooksNodes(self):
6534 """Build hooks nodes.
6537 nl = [self.cfg.GetMasterNode()]
6538 nl_post = list(self.instance.all_nodes) + nl
6539 return (nl, nl_post)
6541 def CheckPrereq(self):
6542 """Check prerequisites.
6544 This checks that the instance is in the cluster.
6547 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6548 assert self.instance is not None, \
6549 "Cannot retrieve locked instance %s" % self.op.instance_name
6551 def Exec(self, feedback_fn):
6552 """Remove the instance.
6555 instance = self.instance
6556 logging.info("Shutting down instance %s on node %s",
6557 instance.name, instance.primary_node)
6559 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6560 self.op.shutdown_timeout)
6561 msg = result.fail_msg
6563 if self.op.ignore_failures:
6564 feedback_fn("Warning: can't shutdown instance: %s" % msg)
6566 raise errors.OpExecError("Could not shutdown instance %s on"
6568 (instance.name, instance.primary_node, msg))
6570 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6573 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6574 """Utility function to remove an instance.
6577 logging.info("Removing block devices for instance %s", instance.name)
6579 if not _RemoveDisks(lu, instance):
6580 if not ignore_failures:
6581 raise errors.OpExecError("Can't remove instance's disks")
6582 feedback_fn("Warning: can't remove instance's disks")
6584 logging.info("Removing instance %s out of cluster config", instance.name)
6586 lu.cfg.RemoveInstance(instance.name)
6588 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6589 "Instance lock removal conflict"
6591 # Remove lock for the instance
6592 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6595 class LUInstanceQuery(NoHooksLU):
6596 """Logical unit for querying instances.
6599 # pylint: disable=W0142
6602 def CheckArguments(self):
6603 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6604 self.op.output_fields, self.op.use_locking)
6606 def ExpandNames(self):
6607 self.iq.ExpandNames(self)
6609 def DeclareLocks(self, level):
6610 self.iq.DeclareLocks(self, level)
6612 def Exec(self, feedback_fn):
6613 return self.iq.OldStyleQuery(self)
6616 class LUInstanceFailover(LogicalUnit):
6617 """Failover an instance.
6620 HPATH = "instance-failover"
6621 HTYPE = constants.HTYPE_INSTANCE
6624 def CheckArguments(self):
6625 """Check the arguments.
6628 self.iallocator = getattr(self.op, "iallocator", None)
6629 self.target_node = getattr(self.op, "target_node", None)
6631 def ExpandNames(self):
6632 self._ExpandAndLockInstance()
6634 if self.op.target_node is not None:
6635 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6637 self.needed_locks[locking.LEVEL_NODE] = []
6638 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6640 ignore_consistency = self.op.ignore_consistency
6641 shutdown_timeout = self.op.shutdown_timeout
6642 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6645 ignore_consistency=ignore_consistency,
6646 shutdown_timeout=shutdown_timeout)
6647 self.tasklets = [self._migrater]
6649 def DeclareLocks(self, level):
6650 if level == locking.LEVEL_NODE:
6651 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6652 if instance.disk_template in constants.DTS_EXT_MIRROR:
6653 if self.op.target_node is None:
6654 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6656 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6657 self.op.target_node]
6658 del self.recalculate_locks[locking.LEVEL_NODE]
6660 self._LockInstancesNodes()
6662 def BuildHooksEnv(self):
6665 This runs on master, primary and secondary nodes of the instance.
6668 instance = self._migrater.instance
6669 source_node = instance.primary_node
6670 target_node = self.op.target_node
6672 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6673 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6674 "OLD_PRIMARY": source_node,
6675 "NEW_PRIMARY": target_node,
6678 if instance.disk_template in constants.DTS_INT_MIRROR:
6679 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6680 env["NEW_SECONDARY"] = source_node
6682 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6684 env.update(_BuildInstanceHookEnvByObject(self, instance))
6688 def BuildHooksNodes(self):
6689 """Build hooks nodes.
6692 instance = self._migrater.instance
6693 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6694 return (nl, nl + [instance.primary_node])
6697 class LUInstanceMigrate(LogicalUnit):
6698 """Migrate an instance.
6700 This is migration without shutting down, compared to the failover,
6701 which is done with shutdown.
6704 HPATH = "instance-migrate"
6705 HTYPE = constants.HTYPE_INSTANCE
6708 def ExpandNames(self):
6709 self._ExpandAndLockInstance()
6711 if self.op.target_node is not None:
6712 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6714 self.needed_locks[locking.LEVEL_NODE] = []
6715 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6717 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6718 cleanup=self.op.cleanup,
6720 fallback=self.op.allow_failover)
6721 self.tasklets = [self._migrater]
6723 def DeclareLocks(self, level):
6724 if level == locking.LEVEL_NODE:
6725 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6726 if instance.disk_template in constants.DTS_EXT_MIRROR:
6727 if self.op.target_node is None:
6728 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6730 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6731 self.op.target_node]
6732 del self.recalculate_locks[locking.LEVEL_NODE]
6734 self._LockInstancesNodes()
6736 def BuildHooksEnv(self):
6739 This runs on master, primary and secondary nodes of the instance.
6742 instance = self._migrater.instance
6743 source_node = instance.primary_node
6744 target_node = self.op.target_node
6745 env = _BuildInstanceHookEnvByObject(self, instance)
6747 "MIGRATE_LIVE": self._migrater.live,
6748 "MIGRATE_CLEANUP": self.op.cleanup,
6749 "OLD_PRIMARY": source_node,
6750 "NEW_PRIMARY": target_node,
6753 if instance.disk_template in constants.DTS_INT_MIRROR:
6754 env["OLD_SECONDARY"] = target_node
6755 env["NEW_SECONDARY"] = source_node
6757 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6761 def BuildHooksNodes(self):
6762 """Build hooks nodes.
6765 instance = self._migrater.instance
6766 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6767 return (nl, nl + [instance.primary_node])
6770 class LUInstanceMove(LogicalUnit):
6771 """Move an instance by data-copying.
6774 HPATH = "instance-move"
6775 HTYPE = constants.HTYPE_INSTANCE
6778 def ExpandNames(self):
6779 self._ExpandAndLockInstance()
6780 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6781 self.op.target_node = target_node
6782 self.needed_locks[locking.LEVEL_NODE] = [target_node]
6783 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6785 def DeclareLocks(self, level):
6786 if level == locking.LEVEL_NODE:
6787 self._LockInstancesNodes(primary_only=True)
6789 def BuildHooksEnv(self):
6792 This runs on master, primary and secondary nodes of the instance.
6796 "TARGET_NODE": self.op.target_node,
6797 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6799 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6802 def BuildHooksNodes(self):
6803 """Build hooks nodes.
6807 self.cfg.GetMasterNode(),
6808 self.instance.primary_node,
6809 self.op.target_node,
6813 def CheckPrereq(self):
6814 """Check prerequisites.
6816 This checks that the instance is in the cluster.
6819 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6820 assert self.instance is not None, \
6821 "Cannot retrieve locked instance %s" % self.op.instance_name
6823 node = self.cfg.GetNodeInfo(self.op.target_node)
6824 assert node is not None, \
6825 "Cannot retrieve locked node %s" % self.op.target_node
6827 self.target_node = target_node = node.name
6829 if target_node == instance.primary_node:
6830 raise errors.OpPrereqError("Instance %s is already on the node %s" %
6831 (instance.name, target_node),
6834 bep = self.cfg.GetClusterInfo().FillBE(instance)
6836 for idx, dsk in enumerate(instance.disks):
6837 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6838 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6839 " cannot copy" % idx, errors.ECODE_STATE)
6841 _CheckNodeOnline(self, target_node)
6842 _CheckNodeNotDrained(self, target_node)
6843 _CheckNodeVmCapable(self, target_node)
6845 if instance.admin_up:
6846 # check memory requirements on the secondary node
6847 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6848 instance.name, bep[constants.BE_MEMORY],
6849 instance.hypervisor)
6851 self.LogInfo("Not checking memory on the secondary node as"
6852 " instance will not be started")
6854 # check bridge existance
6855 _CheckInstanceBridgesExist(self, instance, node=target_node)
6857 def Exec(self, feedback_fn):
6858 """Move an instance.
6860 The move is done by shutting it down on its present node, copying
6861 the data over (slow) and starting it on the new node.
6864 instance = self.instance
6866 source_node = instance.primary_node
6867 target_node = self.target_node
6869 self.LogInfo("Shutting down instance %s on source node %s",
6870 instance.name, source_node)
6872 result = self.rpc.call_instance_shutdown(source_node, instance,
6873 self.op.shutdown_timeout)
6874 msg = result.fail_msg
6876 if self.op.ignore_consistency:
6877 self.proc.LogWarning("Could not shutdown instance %s on node %s."
6878 " Proceeding anyway. Please make sure node"
6879 " %s is down. Error details: %s",
6880 instance.name, source_node, source_node, msg)
6882 raise errors.OpExecError("Could not shutdown instance %s on"
6884 (instance.name, source_node, msg))
6886 # create the target disks
6888 _CreateDisks(self, instance, target_node=target_node)
6889 except errors.OpExecError:
6890 self.LogWarning("Device creation failed, reverting...")
6892 _RemoveDisks(self, instance, target_node=target_node)
6894 self.cfg.ReleaseDRBDMinors(instance.name)
6897 cluster_name = self.cfg.GetClusterInfo().cluster_name
6900 # activate, get path, copy the data over
6901 for idx, disk in enumerate(instance.disks):
6902 self.LogInfo("Copying data for disk %d", idx)
6903 result = self.rpc.call_blockdev_assemble(target_node, disk,
6904 instance.name, True, idx)
6906 self.LogWarning("Can't assemble newly created disk %d: %s",
6907 idx, result.fail_msg)
6908 errs.append(result.fail_msg)
6910 dev_path = result.payload
6911 result = self.rpc.call_blockdev_export(source_node, disk,
6912 target_node, dev_path,
6915 self.LogWarning("Can't copy data over for disk %d: %s",
6916 idx, result.fail_msg)
6917 errs.append(result.fail_msg)
6921 self.LogWarning("Some disks failed to copy, aborting")
6923 _RemoveDisks(self, instance, target_node=target_node)
6925 self.cfg.ReleaseDRBDMinors(instance.name)
6926 raise errors.OpExecError("Errors during disk copy: %s" %
6929 instance.primary_node = target_node
6930 self.cfg.Update(instance, feedback_fn)
6932 self.LogInfo("Removing the disks on the original node")
6933 _RemoveDisks(self, instance, target_node=source_node)
6935 # Only start the instance if it's marked as up
6936 if instance.admin_up:
6937 self.LogInfo("Starting instance %s on node %s",
6938 instance.name, target_node)
6940 disks_ok, _ = _AssembleInstanceDisks(self, instance,
6941 ignore_secondaries=True)
6943 _ShutdownInstanceDisks(self, instance)
6944 raise errors.OpExecError("Can't activate the instance's disks")
6946 result = self.rpc.call_instance_start(target_node, instance,
6948 msg = result.fail_msg
6950 _ShutdownInstanceDisks(self, instance)
6951 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6952 (instance.name, target_node, msg))
6955 class LUNodeMigrate(LogicalUnit):
6956 """Migrate all instances from a node.
6959 HPATH = "node-migrate"
6960 HTYPE = constants.HTYPE_NODE
6963 def CheckArguments(self):
6966 def ExpandNames(self):
6967 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6969 self.share_locks = _ShareAll()
6970 self.needed_locks = {
6971 locking.LEVEL_NODE: [self.op.node_name],
6974 def BuildHooksEnv(self):
6977 This runs on the master, the primary and all the secondaries.
6981 "NODE_NAME": self.op.node_name,
6984 def BuildHooksNodes(self):
6985 """Build hooks nodes.
6988 nl = [self.cfg.GetMasterNode()]
6991 def CheckPrereq(self):
6994 def Exec(self, feedback_fn):
6995 # Prepare jobs for migration instances
6997 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7000 iallocator=self.op.iallocator,
7001 target_node=self.op.target_node)]
7002 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7005 # TODO: Run iallocator in this opcode and pass correct placement options to
7006 # OpInstanceMigrate. Since other jobs can modify the cluster between
7007 # running the iallocator and the actual migration, a good consistency model
7008 # will have to be found.
7010 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7011 frozenset([self.op.node_name]))
7013 return ResultWithJobs(jobs)
7016 class TLMigrateInstance(Tasklet):
7017 """Tasklet class for instance migration.
7020 @ivar live: whether the migration will be done live or non-live;
7021 this variable is initalized only after CheckPrereq has run
7022 @type cleanup: boolean
7023 @ivar cleanup: Wheater we cleanup from a failed migration
7024 @type iallocator: string
7025 @ivar iallocator: The iallocator used to determine target_node
7026 @type target_node: string
7027 @ivar target_node: If given, the target_node to reallocate the instance to
7028 @type failover: boolean
7029 @ivar failover: Whether operation results in failover or migration
7030 @type fallback: boolean
7031 @ivar fallback: Whether fallback to failover is allowed if migration not
7033 @type ignore_consistency: boolean
7034 @ivar ignore_consistency: Wheter we should ignore consistency between source
7036 @type shutdown_timeout: int
7037 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7040 def __init__(self, lu, instance_name, cleanup=False,
7041 failover=False, fallback=False,
7042 ignore_consistency=False,
7043 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7044 """Initializes this class.
7047 Tasklet.__init__(self, lu)
7050 self.instance_name = instance_name
7051 self.cleanup = cleanup
7052 self.live = False # will be overridden later
7053 self.failover = failover
7054 self.fallback = fallback
7055 self.ignore_consistency = ignore_consistency
7056 self.shutdown_timeout = shutdown_timeout
7058 def CheckPrereq(self):
7059 """Check prerequisites.
7061 This checks that the instance is in the cluster.
7064 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7065 instance = self.cfg.GetInstanceInfo(instance_name)
7066 assert instance is not None
7067 self.instance = instance
7069 if (not self.cleanup and not instance.admin_up and not self.failover and
7071 self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7073 self.failover = True
7075 if instance.disk_template not in constants.DTS_MIRRORED:
7080 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7081 " %s" % (instance.disk_template, text),
7084 if instance.disk_template in constants.DTS_EXT_MIRROR:
7085 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7087 if self.lu.op.iallocator:
7088 self._RunAllocator()
7090 # We set set self.target_node as it is required by
7092 self.target_node = self.lu.op.target_node
7094 # self.target_node is already populated, either directly or by the
7096 target_node = self.target_node
7097 if self.target_node == instance.primary_node:
7098 raise errors.OpPrereqError("Cannot migrate instance %s"
7099 " to its primary (%s)" %
7100 (instance.name, instance.primary_node))
7102 if len(self.lu.tasklets) == 1:
7103 # It is safe to release locks only when we're the only tasklet
7105 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7106 keep=[instance.primary_node, self.target_node])
7109 secondary_nodes = instance.secondary_nodes
7110 if not secondary_nodes:
7111 raise errors.ConfigurationError("No secondary node but using"
7112 " %s disk template" %
7113 instance.disk_template)
7114 target_node = secondary_nodes[0]
7115 if self.lu.op.iallocator or (self.lu.op.target_node and
7116 self.lu.op.target_node != target_node):
7118 text = "failed over"
7121 raise errors.OpPrereqError("Instances with disk template %s cannot"
7122 " be %s to arbitrary nodes"
7123 " (neither an iallocator nor a target"
7124 " node can be passed)" %
7125 (instance.disk_template, text),
7128 i_be = self.cfg.GetClusterInfo().FillBE(instance)
7130 # check memory requirements on the secondary node
7131 if not self.failover or instance.admin_up:
7132 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7133 instance.name, i_be[constants.BE_MEMORY],
7134 instance.hypervisor)
7136 self.lu.LogInfo("Not checking memory on the secondary node as"
7137 " instance will not be started")
7139 # check bridge existance
7140 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7142 if not self.cleanup:
7143 _CheckNodeNotDrained(self.lu, target_node)
7144 if not self.failover:
7145 result = self.rpc.call_instance_migratable(instance.primary_node,
7147 if result.fail_msg and self.fallback:
7148 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7150 self.failover = True
7152 result.Raise("Can't migrate, please use failover",
7153 prereq=True, ecode=errors.ECODE_STATE)
7155 assert not (self.failover and self.cleanup)
7157 if not self.failover:
7158 if self.lu.op.live is not None and self.lu.op.mode is not None:
7159 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7160 " parameters are accepted",
7162 if self.lu.op.live is not None:
7164 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7166 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7167 # reset the 'live' parameter to None so that repeated
7168 # invocations of CheckPrereq do not raise an exception
7169 self.lu.op.live = None
7170 elif self.lu.op.mode is None:
7171 # read the default value from the hypervisor
7172 i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7174 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7176 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7178 # Failover is never live
7181 def _RunAllocator(self):
7182 """Run the allocator based on input opcode.
7185 ial = IAllocator(self.cfg, self.rpc,
7186 mode=constants.IALLOCATOR_MODE_RELOC,
7187 name=self.instance_name,
7188 # TODO See why hail breaks with a single node below
7189 relocate_from=[self.instance.primary_node,
7190 self.instance.primary_node],
7193 ial.Run(self.lu.op.iallocator)
7196 raise errors.OpPrereqError("Can't compute nodes using"
7197 " iallocator '%s': %s" %
7198 (self.lu.op.iallocator, ial.info),
7200 if len(ial.result) != ial.required_nodes:
7201 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7202 " of nodes (%s), required %s" %
7203 (self.lu.op.iallocator, len(ial.result),
7204 ial.required_nodes), errors.ECODE_FAULT)
7205 self.target_node = ial.result[0]
7206 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7207 self.instance_name, self.lu.op.iallocator,
7208 utils.CommaJoin(ial.result))
7210 def _WaitUntilSync(self):
7211 """Poll with custom rpc for disk sync.
7213 This uses our own step-based rpc call.
7216 self.feedback_fn("* wait until resync is done")
7220 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7222 self.instance.disks)
7224 for node, nres in result.items():
7225 nres.Raise("Cannot resync disks on node %s" % node)
7226 node_done, node_percent = nres.payload
7227 all_done = all_done and node_done
7228 if node_percent is not None:
7229 min_percent = min(min_percent, node_percent)
7231 if min_percent < 100:
7232 self.feedback_fn(" - progress: %.1f%%" % min_percent)
7235 def _EnsureSecondary(self, node):
7236 """Demote a node to secondary.
7239 self.feedback_fn("* switching node %s to secondary mode" % node)
7241 for dev in self.instance.disks:
7242 self.cfg.SetDiskID(dev, node)
7244 result = self.rpc.call_blockdev_close(node, self.instance.name,
7245 self.instance.disks)
7246 result.Raise("Cannot change disk to secondary on node %s" % node)
7248 def _GoStandalone(self):
7249 """Disconnect from the network.
7252 self.feedback_fn("* changing into standalone mode")
7253 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7254 self.instance.disks)
7255 for node, nres in result.items():
7256 nres.Raise("Cannot disconnect disks node %s" % node)
7258 def _GoReconnect(self, multimaster):
7259 """Reconnect to the network.
7265 msg = "single-master"
7266 self.feedback_fn("* changing disks into %s mode" % msg)
7267 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7268 self.instance.disks,
7269 self.instance.name, multimaster)
7270 for node, nres in result.items():
7271 nres.Raise("Cannot change disks config on node %s" % node)
7273 def _ExecCleanup(self):
7274 """Try to cleanup after a failed migration.
7276 The cleanup is done by:
7277 - check that the instance is running only on one node
7278 (and update the config if needed)
7279 - change disks on its secondary node to secondary
7280 - wait until disks are fully synchronized
7281 - disconnect from the network
7282 - change disks into single-master mode
7283 - wait again until disks are fully synchronized
7286 instance = self.instance
7287 target_node = self.target_node
7288 source_node = self.source_node
7290 # check running on only one node
7291 self.feedback_fn("* checking where the instance actually runs"
7292 " (if this hangs, the hypervisor might be in"
7294 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7295 for node, result in ins_l.items():
7296 result.Raise("Can't contact node %s" % node)
7298 runningon_source = instance.name in ins_l[source_node].payload
7299 runningon_target = instance.name in ins_l[target_node].payload
7301 if runningon_source and runningon_target:
7302 raise errors.OpExecError("Instance seems to be running on two nodes,"
7303 " or the hypervisor is confused; you will have"
7304 " to ensure manually that it runs only on one"
7305 " and restart this operation")
7307 if not (runningon_source or runningon_target):
7308 raise errors.OpExecError("Instance does not seem to be running at all;"
7309 " in this case it's safer to repair by"
7310 " running 'gnt-instance stop' to ensure disk"
7311 " shutdown, and then restarting it")
7313 if runningon_target:
7314 # the migration has actually succeeded, we need to update the config
7315 self.feedback_fn("* instance running on secondary node (%s),"
7316 " updating config" % target_node)
7317 instance.primary_node = target_node
7318 self.cfg.Update(instance, self.feedback_fn)
7319 demoted_node = source_node
7321 self.feedback_fn("* instance confirmed to be running on its"
7322 " primary node (%s)" % source_node)
7323 demoted_node = target_node
7325 if instance.disk_template in constants.DTS_INT_MIRROR:
7326 self._EnsureSecondary(demoted_node)
7328 self._WaitUntilSync()
7329 except errors.OpExecError:
7330 # we ignore here errors, since if the device is standalone, it
7331 # won't be able to sync
7333 self._GoStandalone()
7334 self._GoReconnect(False)
7335 self._WaitUntilSync()
7337 self.feedback_fn("* done")
7339 def _RevertDiskStatus(self):
7340 """Try to revert the disk status after a failed migration.
7343 target_node = self.target_node
7344 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7348 self._EnsureSecondary(target_node)
7349 self._GoStandalone()
7350 self._GoReconnect(False)
7351 self._WaitUntilSync()
7352 except errors.OpExecError, err:
7353 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7354 " please try to recover the instance manually;"
7355 " error '%s'" % str(err))
7357 def _AbortMigration(self):
7358 """Call the hypervisor code to abort a started migration.
7361 instance = self.instance
7362 target_node = self.target_node
7363 migration_info = self.migration_info
7365 abort_result = self.rpc.call_finalize_migration(target_node,
7369 abort_msg = abort_result.fail_msg
7371 logging.error("Aborting migration failed on target node %s: %s",
7372 target_node, abort_msg)
7373 # Don't raise an exception here, as we stil have to try to revert the
7374 # disk status, even if this step failed.
7376 def _ExecMigration(self):
7377 """Migrate an instance.
7379 The migrate is done by:
7380 - change the disks into dual-master mode
7381 - wait until disks are fully synchronized again
7382 - migrate the instance
7383 - change disks on the new secondary node (the old primary) to secondary
7384 - wait until disks are fully synchronized
7385 - change disks into single-master mode
7388 instance = self.instance
7389 target_node = self.target_node
7390 source_node = self.source_node
7392 self.feedback_fn("* checking disk consistency between source and target")
7393 for dev in instance.disks:
7394 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7395 raise errors.OpExecError("Disk %s is degraded or not fully"
7396 " synchronized on target node,"
7397 " aborting migration" % dev.iv_name)
7399 # First get the migration information from the remote node
7400 result = self.rpc.call_migration_info(source_node, instance)
7401 msg = result.fail_msg
7403 log_err = ("Failed fetching source migration information from %s: %s" %
7405 logging.error(log_err)
7406 raise errors.OpExecError(log_err)
7408 self.migration_info = migration_info = result.payload
7410 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7411 # Then switch the disks to master/master mode
7412 self._EnsureSecondary(target_node)
7413 self._GoStandalone()
7414 self._GoReconnect(True)
7415 self._WaitUntilSync()
7417 self.feedback_fn("* preparing %s to accept the instance" % target_node)
7418 result = self.rpc.call_accept_instance(target_node,
7421 self.nodes_ip[target_node])
7423 msg = result.fail_msg
7425 logging.error("Instance pre-migration failed, trying to revert"
7426 " disk status: %s", msg)
7427 self.feedback_fn("Pre-migration failed, aborting")
7428 self._AbortMigration()
7429 self._RevertDiskStatus()
7430 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7431 (instance.name, msg))
7433 self.feedback_fn("* migrating instance to %s" % target_node)
7434 result = self.rpc.call_instance_migrate(source_node, instance,
7435 self.nodes_ip[target_node],
7437 msg = result.fail_msg
7439 logging.error("Instance migration failed, trying to revert"
7440 " disk status: %s", msg)
7441 self.feedback_fn("Migration failed, aborting")
7442 self._AbortMigration()
7443 self._RevertDiskStatus()
7444 raise errors.OpExecError("Could not migrate instance %s: %s" %
7445 (instance.name, msg))
7447 instance.primary_node = target_node
7448 # distribute new instance config to the other nodes
7449 self.cfg.Update(instance, self.feedback_fn)
7451 result = self.rpc.call_finalize_migration(target_node,
7455 msg = result.fail_msg
7457 logging.error("Instance migration succeeded, but finalization failed:"
7459 raise errors.OpExecError("Could not finalize instance migration: %s" %
7462 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7463 self._EnsureSecondary(source_node)
7464 self._WaitUntilSync()
7465 self._GoStandalone()
7466 self._GoReconnect(False)
7467 self._WaitUntilSync()
7469 self.feedback_fn("* done")
7471 def _ExecFailover(self):
7472 """Failover an instance.
7474 The failover is done by shutting it down on its present node and
7475 starting it on the secondary.
7478 instance = self.instance
7479 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7481 source_node = instance.primary_node
7482 target_node = self.target_node
7484 if instance.admin_up:
7485 self.feedback_fn("* checking disk consistency between source and target")
7486 for dev in instance.disks:
7487 # for drbd, these are drbd over lvm
7488 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7489 if primary_node.offline:
7490 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7492 (primary_node.name, dev.iv_name, target_node))
7493 elif not self.ignore_consistency:
7494 raise errors.OpExecError("Disk %s is degraded on target node,"
7495 " aborting failover" % dev.iv_name)
7497 self.feedback_fn("* not checking disk consistency as instance is not"
7500 self.feedback_fn("* shutting down instance on source node")
7501 logging.info("Shutting down instance %s on node %s",
7502 instance.name, source_node)
7504 result = self.rpc.call_instance_shutdown(source_node, instance,
7505 self.shutdown_timeout)
7506 msg = result.fail_msg
7508 if self.ignore_consistency or primary_node.offline:
7509 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7510 " proceeding anyway; please make sure node"
7511 " %s is down; error details: %s",
7512 instance.name, source_node, source_node, msg)
7514 raise errors.OpExecError("Could not shutdown instance %s on"
7516 (instance.name, source_node, msg))
7518 self.feedback_fn("* deactivating the instance's disks on source node")
7519 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7520 raise errors.OpExecError("Can't shut down the instance's disks")
7522 instance.primary_node = target_node
7523 # distribute new instance config to the other nodes
7524 self.cfg.Update(instance, self.feedback_fn)
7526 # Only start the instance if it's marked as up
7527 if instance.admin_up:
7528 self.feedback_fn("* activating the instance's disks on target node %s" %
7530 logging.info("Starting instance %s on node %s",
7531 instance.name, target_node)
7533 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7534 ignore_secondaries=True)
7536 _ShutdownInstanceDisks(self.lu, instance)
7537 raise errors.OpExecError("Can't activate the instance's disks")
7539 self.feedback_fn("* starting the instance on the target node %s" %
7541 result = self.rpc.call_instance_start(target_node, instance, None, None,
7543 msg = result.fail_msg
7545 _ShutdownInstanceDisks(self.lu, instance)
7546 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7547 (instance.name, target_node, msg))
7549 def Exec(self, feedback_fn):
7550 """Perform the migration.
7553 self.feedback_fn = feedback_fn
7554 self.source_node = self.instance.primary_node
7556 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7557 if self.instance.disk_template in constants.DTS_INT_MIRROR:
7558 self.target_node = self.instance.secondary_nodes[0]
7559 # Otherwise self.target_node has been populated either
7560 # directly, or through an iallocator.
7562 self.all_nodes = [self.source_node, self.target_node]
7563 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7564 in self.cfg.GetMultiNodeInfo(self.all_nodes))
7567 feedback_fn("Failover instance %s" % self.instance.name)
7568 self._ExecFailover()
7570 feedback_fn("Migrating instance %s" % self.instance.name)
7573 return self._ExecCleanup()
7575 return self._ExecMigration()
7578 def _CreateBlockDev(lu, node, instance, device, force_create,
7580 """Create a tree of block devices on a given node.
7582 If this device type has to be created on secondaries, create it and
7585 If not, just recurse to children keeping the same 'force' value.
7587 @param lu: the lu on whose behalf we execute
7588 @param node: the node on which to create the device
7589 @type instance: L{objects.Instance}
7590 @param instance: the instance which owns the device
7591 @type device: L{objects.Disk}
7592 @param device: the device to create
7593 @type force_create: boolean
7594 @param force_create: whether to force creation of this device; this
7595 will be change to True whenever we find a device which has
7596 CreateOnSecondary() attribute
7597 @param info: the extra 'metadata' we should attach to the device
7598 (this will be represented as a LVM tag)
7599 @type force_open: boolean
7600 @param force_open: this parameter will be passes to the
7601 L{backend.BlockdevCreate} function where it specifies
7602 whether we run on primary or not, and it affects both
7603 the child assembly and the device own Open() execution
7606 if device.CreateOnSecondary():
7610 for child in device.children:
7611 _CreateBlockDev(lu, node, instance, child, force_create,
7614 if not force_create:
7617 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7620 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7621 """Create a single block device on a given node.
7623 This will not recurse over children of the device, so they must be
7626 @param lu: the lu on whose behalf we execute
7627 @param node: the node on which to create the device
7628 @type instance: L{objects.Instance}
7629 @param instance: the instance which owns the device
7630 @type device: L{objects.Disk}
7631 @param device: the device to create
7632 @param info: the extra 'metadata' we should attach to the device
7633 (this will be represented as a LVM tag)
7634 @type force_open: boolean
7635 @param force_open: this parameter will be passes to the
7636 L{backend.BlockdevCreate} function where it specifies
7637 whether we run on primary or not, and it affects both
7638 the child assembly and the device own Open() execution
7641 lu.cfg.SetDiskID(device, node)
7642 result = lu.rpc.call_blockdev_create(node, device, device.size,
7643 instance.name, force_open, info)
7644 result.Raise("Can't create block device %s on"
7645 " node %s for instance %s" % (device, node, instance.name))
7646 if device.physical_id is None:
7647 device.physical_id = result.payload
7650 def _GenerateUniqueNames(lu, exts):
7651 """Generate a suitable LV name.
7653 This will generate a logical volume name for the given instance.
7658 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7659 results.append("%s%s" % (new_id, val))
7663 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7664 iv_name, p_minor, s_minor):
7665 """Generate a drbd8 device complete with its children.
7668 assert len(vgnames) == len(names) == 2
7669 port = lu.cfg.AllocatePort()
7670 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7671 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7672 logical_id=(vgnames[0], names[0]))
7673 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7674 logical_id=(vgnames[1], names[1]))
7675 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7676 logical_id=(primary, secondary, port,
7679 children=[dev_data, dev_meta],
7684 def _GenerateDiskTemplate(lu, template_name,
7685 instance_name, primary_node,
7686 secondary_nodes, disk_info,
7687 file_storage_dir, file_driver,
7688 base_index, feedback_fn):
7689 """Generate the entire disk layout for a given template type.
7692 #TODO: compute space requirements
7694 vgname = lu.cfg.GetVGName()
7695 disk_count = len(disk_info)
7697 if template_name == constants.DT_DISKLESS:
7699 elif template_name == constants.DT_PLAIN:
7700 if len(secondary_nodes) != 0:
7701 raise errors.ProgrammerError("Wrong template configuration")
7703 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7704 for i in range(disk_count)])
7705 for idx, disk in enumerate(disk_info):
7706 disk_index = idx + base_index
7707 vg = disk.get(constants.IDISK_VG, vgname)
7708 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7709 disk_dev = objects.Disk(dev_type=constants.LD_LV,
7710 size=disk[constants.IDISK_SIZE],
7711 logical_id=(vg, names[idx]),
7712 iv_name="disk/%d" % disk_index,
7713 mode=disk[constants.IDISK_MODE])
7714 disks.append(disk_dev)
7715 elif template_name == constants.DT_DRBD8:
7716 if len(secondary_nodes) != 1:
7717 raise errors.ProgrammerError("Wrong template configuration")
7718 remote_node = secondary_nodes[0]
7719 minors = lu.cfg.AllocateDRBDMinor(
7720 [primary_node, remote_node] * len(disk_info), instance_name)
7723 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7724 for i in range(disk_count)]):
7725 names.append(lv_prefix + "_data")
7726 names.append(lv_prefix + "_meta")
7727 for idx, disk in enumerate(disk_info):
7728 disk_index = idx + base_index
7729 data_vg = disk.get(constants.IDISK_VG, vgname)
7730 meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7731 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7732 disk[constants.IDISK_SIZE],
7734 names[idx * 2:idx * 2 + 2],
7735 "disk/%d" % disk_index,
7736 minors[idx * 2], minors[idx * 2 + 1])
7737 disk_dev.mode = disk[constants.IDISK_MODE]
7738 disks.append(disk_dev)
7739 elif template_name == constants.DT_FILE:
7740 if len(secondary_nodes) != 0:
7741 raise errors.ProgrammerError("Wrong template configuration")
7743 opcodes.RequireFileStorage()
7745 for idx, disk in enumerate(disk_info):
7746 disk_index = idx + base_index
7747 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7748 size=disk[constants.IDISK_SIZE],
7749 iv_name="disk/%d" % disk_index,
7750 logical_id=(file_driver,
7751 "%s/disk%d" % (file_storage_dir,
7753 mode=disk[constants.IDISK_MODE])
7754 disks.append(disk_dev)
7755 elif template_name == constants.DT_SHARED_FILE:
7756 if len(secondary_nodes) != 0:
7757 raise errors.ProgrammerError("Wrong template configuration")
7759 opcodes.RequireSharedFileStorage()
7761 for idx, disk in enumerate(disk_info):
7762 disk_index = idx + base_index
7763 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7764 size=disk[constants.IDISK_SIZE],
7765 iv_name="disk/%d" % disk_index,
7766 logical_id=(file_driver,
7767 "%s/disk%d" % (file_storage_dir,
7769 mode=disk[constants.IDISK_MODE])
7770 disks.append(disk_dev)
7771 elif template_name == constants.DT_BLOCK:
7772 if len(secondary_nodes) != 0:
7773 raise errors.ProgrammerError("Wrong template configuration")
7775 for idx, disk in enumerate(disk_info):
7776 disk_index = idx + base_index
7777 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7778 size=disk[constants.IDISK_SIZE],
7779 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7780 disk[constants.IDISK_ADOPT]),
7781 iv_name="disk/%d" % disk_index,
7782 mode=disk[constants.IDISK_MODE])
7783 disks.append(disk_dev)
7786 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7790 def _GetInstanceInfoText(instance):
7791 """Compute that text that should be added to the disk's metadata.
7794 return "originstname+%s" % instance.name
7797 def _CalcEta(time_taken, written, total_size):
7798 """Calculates the ETA based on size written and total size.
7800 @param time_taken: The time taken so far
7801 @param written: amount written so far
7802 @param total_size: The total size of data to be written
7803 @return: The remaining time in seconds
7806 avg_time = time_taken / float(written)
7807 return (total_size - written) * avg_time
7810 def _WipeDisks(lu, instance):
7811 """Wipes instance disks.
7813 @type lu: L{LogicalUnit}
7814 @param lu: the logical unit on whose behalf we execute
7815 @type instance: L{objects.Instance}
7816 @param instance: the instance whose disks we should create
7817 @return: the success of the wipe
7820 node = instance.primary_node
7822 for device in instance.disks:
7823 lu.cfg.SetDiskID(device, node)
7825 logging.info("Pause sync of instance %s disks", instance.name)
7826 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7828 for idx, success in enumerate(result.payload):
7830 logging.warn("pause-sync of instance %s for disks %d failed",
7834 for idx, device in enumerate(instance.disks):
7835 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7836 # MAX_WIPE_CHUNK at max
7837 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7838 constants.MIN_WIPE_CHUNK_PERCENT)
7839 # we _must_ make this an int, otherwise rounding errors will
7841 wipe_chunk_size = int(wipe_chunk_size)
7843 lu.LogInfo("* Wiping disk %d", idx)
7844 logging.info("Wiping disk %d for instance %s, node %s using"
7845 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7850 start_time = time.time()
7852 while offset < size:
7853 wipe_size = min(wipe_chunk_size, size - offset)
7854 logging.debug("Wiping disk %d, offset %s, chunk %s",
7855 idx, offset, wipe_size)
7856 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7857 result.Raise("Could not wipe disk %d at offset %d for size %d" %
7858 (idx, offset, wipe_size))
7861 if now - last_output >= 60:
7862 eta = _CalcEta(now - start_time, offset, size)
7863 lu.LogInfo(" - done: %.1f%% ETA: %s" %
7864 (offset / float(size) * 100, utils.FormatSeconds(eta)))
7867 logging.info("Resume sync of instance %s disks", instance.name)
7869 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7871 for idx, success in enumerate(result.payload):
7873 lu.LogWarning("Resume sync of disk %d failed, please have a"
7874 " look at the status and troubleshoot the issue", idx)
7875 logging.warn("resume-sync of instance %s for disks %d failed",
7879 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7880 """Create all disks for an instance.
7882 This abstracts away some work from AddInstance.
7884 @type lu: L{LogicalUnit}
7885 @param lu: the logical unit on whose behalf we execute
7886 @type instance: L{objects.Instance}
7887 @param instance: the instance whose disks we should create
7889 @param to_skip: list of indices to skip
7890 @type target_node: string
7891 @param target_node: if passed, overrides the target node for creation
7893 @return: the success of the creation
7896 info = _GetInstanceInfoText(instance)
7897 if target_node is None:
7898 pnode = instance.primary_node
7899 all_nodes = instance.all_nodes
7904 if instance.disk_template in constants.DTS_FILEBASED:
7905 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7906 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7908 result.Raise("Failed to create directory '%s' on"
7909 " node %s" % (file_storage_dir, pnode))
7911 # Note: this needs to be kept in sync with adding of disks in
7912 # LUInstanceSetParams
7913 for idx, device in enumerate(instance.disks):
7914 if to_skip and idx in to_skip:
7916 logging.info("Creating volume %s for instance %s",
7917 device.iv_name, instance.name)
7919 for node in all_nodes:
7920 f_create = node == pnode
7921 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7924 def _RemoveDisks(lu, instance, target_node=None):
7925 """Remove all disks for an instance.
7927 This abstracts away some work from `AddInstance()` and
7928 `RemoveInstance()`. Note that in case some of the devices couldn't
7929 be removed, the removal will continue with the other ones (compare
7930 with `_CreateDisks()`).
7932 @type lu: L{LogicalUnit}
7933 @param lu: the logical unit on whose behalf we execute
7934 @type instance: L{objects.Instance}
7935 @param instance: the instance whose disks we should remove
7936 @type target_node: string
7937 @param target_node: used to override the node on which to remove the disks
7939 @return: the success of the removal
7942 logging.info("Removing block devices for instance %s", instance.name)
7945 for device in instance.disks:
7947 edata = [(target_node, device)]
7949 edata = device.ComputeNodeTree(instance.primary_node)
7950 for node, disk in edata:
7951 lu.cfg.SetDiskID(disk, node)
7952 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7954 lu.LogWarning("Could not remove block device %s on node %s,"
7955 " continuing anyway: %s", device.iv_name, node, msg)
7958 if instance.disk_template == constants.DT_FILE:
7959 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7963 tgt = instance.primary_node
7964 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7966 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7967 file_storage_dir, instance.primary_node, result.fail_msg)
7973 def _ComputeDiskSizePerVG(disk_template, disks):
7974 """Compute disk size requirements in the volume group
7977 def _compute(disks, payload):
7978 """Universal algorithm.
7983 vgs[disk[constants.IDISK_VG]] = \
7984 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7988 # Required free disk space as a function of disk and swap space
7990 constants.DT_DISKLESS: {},
7991 constants.DT_PLAIN: _compute(disks, 0),
7992 # 128 MB are added for drbd metadata for each disk
7993 constants.DT_DRBD8: _compute(disks, 128),
7994 constants.DT_FILE: {},
7995 constants.DT_SHARED_FILE: {},
7998 if disk_template not in req_size_dict:
7999 raise errors.ProgrammerError("Disk template '%s' size requirement"
8000 " is unknown" % disk_template)
8002 return req_size_dict[disk_template]
8005 def _ComputeDiskSize(disk_template, disks):
8006 """Compute disk size requirements in the volume group
8009 # Required free disk space as a function of disk and swap space
8011 constants.DT_DISKLESS: None,
8012 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8013 # 128 MB are added for drbd metadata for each disk
8014 constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
8015 constants.DT_FILE: None,
8016 constants.DT_SHARED_FILE: 0,
8017 constants.DT_BLOCK: 0,
8020 if disk_template not in req_size_dict:
8021 raise errors.ProgrammerError("Disk template '%s' size requirement"
8022 " is unknown" % disk_template)
8024 return req_size_dict[disk_template]
8027 def _FilterVmNodes(lu, nodenames):
8028 """Filters out non-vm_capable nodes from a list.
8030 @type lu: L{LogicalUnit}
8031 @param lu: the logical unit for which we check
8032 @type nodenames: list
8033 @param nodenames: the list of nodes on which we should check
8035 @return: the list of vm-capable nodes
8038 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8039 return [name for name in nodenames if name not in vm_nodes]
8042 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8043 """Hypervisor parameter validation.
8045 This function abstract the hypervisor parameter validation to be
8046 used in both instance create and instance modify.
8048 @type lu: L{LogicalUnit}
8049 @param lu: the logical unit for which we check
8050 @type nodenames: list
8051 @param nodenames: the list of nodes on which we should check
8052 @type hvname: string
8053 @param hvname: the name of the hypervisor we should use
8054 @type hvparams: dict
8055 @param hvparams: the parameters which we need to check
8056 @raise errors.OpPrereqError: if the parameters are not valid
8059 nodenames = _FilterVmNodes(lu, nodenames)
8060 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
8063 for node in nodenames:
8067 info.Raise("Hypervisor parameter validation failed on node %s" % node)
8070 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8071 """OS parameters validation.
8073 @type lu: L{LogicalUnit}
8074 @param lu: the logical unit for which we check
8075 @type required: boolean
8076 @param required: whether the validation should fail if the OS is not
8078 @type nodenames: list
8079 @param nodenames: the list of nodes on which we should check
8080 @type osname: string
8081 @param osname: the name of the hypervisor we should use
8082 @type osparams: dict
8083 @param osparams: the parameters which we need to check
8084 @raise errors.OpPrereqError: if the parameters are not valid
8087 nodenames = _FilterVmNodes(lu, nodenames)
8088 result = lu.rpc.call_os_validate(required, nodenames, osname,
8089 [constants.OS_VALIDATE_PARAMETERS],
8091 for node, nres in result.items():
8092 # we don't check for offline cases since this should be run only
8093 # against the master node and/or an instance's nodes
8094 nres.Raise("OS Parameters validation failed on node %s" % node)
8095 if not nres.payload:
8096 lu.LogInfo("OS %s not found on node %s, validation skipped",
8100 class LUInstanceCreate(LogicalUnit):
8101 """Create an instance.
8104 HPATH = "instance-add"
8105 HTYPE = constants.HTYPE_INSTANCE
8108 def CheckArguments(self):
8112 # do not require name_check to ease forward/backward compatibility
8114 if self.op.no_install and self.op.start:
8115 self.LogInfo("No-installation mode selected, disabling startup")
8116 self.op.start = False
8117 # validate/normalize the instance name
8118 self.op.instance_name = \
8119 netutils.Hostname.GetNormalizedName(self.op.instance_name)
8121 if self.op.ip_check and not self.op.name_check:
8122 # TODO: make the ip check more flexible and not depend on the name check
8123 raise errors.OpPrereqError("Cannot do IP address check without a name"
8124 " check", errors.ECODE_INVAL)
8126 # check nics' parameter names
8127 for nic in self.op.nics:
8128 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8130 # check disks. parameter names and consistent adopt/no-adopt strategy
8131 has_adopt = has_no_adopt = False
8132 for disk in self.op.disks:
8133 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8134 if constants.IDISK_ADOPT in disk:
8138 if has_adopt and has_no_adopt:
8139 raise errors.OpPrereqError("Either all disks are adopted or none is",
8142 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8143 raise errors.OpPrereqError("Disk adoption is not supported for the"
8144 " '%s' disk template" %
8145 self.op.disk_template,
8147 if self.op.iallocator is not None:
8148 raise errors.OpPrereqError("Disk adoption not allowed with an"
8149 " iallocator script", errors.ECODE_INVAL)
8150 if self.op.mode == constants.INSTANCE_IMPORT:
8151 raise errors.OpPrereqError("Disk adoption not allowed for"
8152 " instance import", errors.ECODE_INVAL)
8154 if self.op.disk_template in constants.DTS_MUST_ADOPT:
8155 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8156 " but no 'adopt' parameter given" %
8157 self.op.disk_template,
8160 self.adopt_disks = has_adopt
8162 # instance name verification
8163 if self.op.name_check:
8164 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8165 self.op.instance_name = self.hostname1.name
8166 # used in CheckPrereq for ip ping check
8167 self.check_ip = self.hostname1.ip
8169 self.check_ip = None
8171 # file storage checks
8172 if (self.op.file_driver and
8173 not self.op.file_driver in constants.FILE_DRIVER):
8174 raise errors.OpPrereqError("Invalid file driver name '%s'" %
8175 self.op.file_driver, errors.ECODE_INVAL)
8177 if self.op.disk_template == constants.DT_FILE:
8178 opcodes.RequireFileStorage()
8179 elif self.op.disk_template == constants.DT_SHARED_FILE:
8180 opcodes.RequireSharedFileStorage()
8182 ### Node/iallocator related checks
8183 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8185 if self.op.pnode is not None:
8186 if self.op.disk_template in constants.DTS_INT_MIRROR:
8187 if self.op.snode is None:
8188 raise errors.OpPrereqError("The networked disk templates need"
8189 " a mirror node", errors.ECODE_INVAL)
8191 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8193 self.op.snode = None
8195 self._cds = _GetClusterDomainSecret()
8197 if self.op.mode == constants.INSTANCE_IMPORT:
8198 # On import force_variant must be True, because if we forced it at
8199 # initial install, our only chance when importing it back is that it
8201 self.op.force_variant = True
8203 if self.op.no_install:
8204 self.LogInfo("No-installation mode has no effect during import")
8206 elif self.op.mode == constants.INSTANCE_CREATE:
8207 if self.op.os_type is None:
8208 raise errors.OpPrereqError("No guest OS specified",
8210 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8211 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8212 " installation" % self.op.os_type,
8214 if self.op.disk_template is None:
8215 raise errors.OpPrereqError("No disk template specified",
8218 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8219 # Check handshake to ensure both clusters have the same domain secret
8220 src_handshake = self.op.source_handshake
8221 if not src_handshake:
8222 raise errors.OpPrereqError("Missing source handshake",
8225 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8228 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8231 # Load and check source CA
8232 self.source_x509_ca_pem = self.op.source_x509_ca
8233 if not self.source_x509_ca_pem:
8234 raise errors.OpPrereqError("Missing source X509 CA",
8238 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8240 except OpenSSL.crypto.Error, err:
8241 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8242 (err, ), errors.ECODE_INVAL)
8244 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8245 if errcode is not None:
8246 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8249 self.source_x509_ca = cert
8251 src_instance_name = self.op.source_instance_name
8252 if not src_instance_name:
8253 raise errors.OpPrereqError("Missing source instance name",
8256 self.source_instance_name = \
8257 netutils.GetHostname(name=src_instance_name).name
8260 raise errors.OpPrereqError("Invalid instance creation mode %r" %
8261 self.op.mode, errors.ECODE_INVAL)
8263 def ExpandNames(self):
8264 """ExpandNames for CreateInstance.
8266 Figure out the right locks for instance creation.
8269 self.needed_locks = {}
8271 instance_name = self.op.instance_name
8272 # this is just a preventive check, but someone might still add this
8273 # instance in the meantime, and creation will fail at lock-add time
8274 if instance_name in self.cfg.GetInstanceList():
8275 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8276 instance_name, errors.ECODE_EXISTS)
8278 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8280 if self.op.iallocator:
8281 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8283 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8284 nodelist = [self.op.pnode]
8285 if self.op.snode is not None:
8286 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8287 nodelist.append(self.op.snode)
8288 self.needed_locks[locking.LEVEL_NODE] = nodelist
8290 # in case of import lock the source node too
8291 if self.op.mode == constants.INSTANCE_IMPORT:
8292 src_node = self.op.src_node
8293 src_path = self.op.src_path
8295 if src_path is None:
8296 self.op.src_path = src_path = self.op.instance_name
8298 if src_node is None:
8299 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8300 self.op.src_node = None
8301 if os.path.isabs(src_path):
8302 raise errors.OpPrereqError("Importing an instance from a path"
8303 " requires a source node option",
8306 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8307 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8308 self.needed_locks[locking.LEVEL_NODE].append(src_node)
8309 if not os.path.isabs(src_path):
8310 self.op.src_path = src_path = \
8311 utils.PathJoin(constants.EXPORT_DIR, src_path)
8313 def _RunAllocator(self):
8314 """Run the allocator based on input opcode.
8317 nics = [n.ToDict() for n in self.nics]
8318 ial = IAllocator(self.cfg, self.rpc,
8319 mode=constants.IALLOCATOR_MODE_ALLOC,
8320 name=self.op.instance_name,
8321 disk_template=self.op.disk_template,
8324 vcpus=self.be_full[constants.BE_VCPUS],
8325 memory=self.be_full[constants.BE_MEMORY],
8328 hypervisor=self.op.hypervisor,
8331 ial.Run(self.op.iallocator)
8334 raise errors.OpPrereqError("Can't compute nodes using"
8335 " iallocator '%s': %s" %
8336 (self.op.iallocator, ial.info),
8338 if len(ial.result) != ial.required_nodes:
8339 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8340 " of nodes (%s), required %s" %
8341 (self.op.iallocator, len(ial.result),
8342 ial.required_nodes), errors.ECODE_FAULT)
8343 self.op.pnode = ial.result[0]
8344 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8345 self.op.instance_name, self.op.iallocator,
8346 utils.CommaJoin(ial.result))
8347 if ial.required_nodes == 2:
8348 self.op.snode = ial.result[1]
8350 def BuildHooksEnv(self):
8353 This runs on master, primary and secondary nodes of the instance.
8357 "ADD_MODE": self.op.mode,
8359 if self.op.mode == constants.INSTANCE_IMPORT:
8360 env["SRC_NODE"] = self.op.src_node
8361 env["SRC_PATH"] = self.op.src_path
8362 env["SRC_IMAGES"] = self.src_images
8364 env.update(_BuildInstanceHookEnv(
8365 name=self.op.instance_name,
8366 primary_node=self.op.pnode,
8367 secondary_nodes=self.secondaries,
8368 status=self.op.start,
8369 os_type=self.op.os_type,
8370 memory=self.be_full[constants.BE_MEMORY],
8371 vcpus=self.be_full[constants.BE_VCPUS],
8372 nics=_NICListToTuple(self, self.nics),
8373 disk_template=self.op.disk_template,
8374 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8375 for d in self.disks],
8378 hypervisor_name=self.op.hypervisor,
8384 def BuildHooksNodes(self):
8385 """Build hooks nodes.
8388 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8391 def _ReadExportInfo(self):
8392 """Reads the export information from disk.
8394 It will override the opcode source node and path with the actual
8395 information, if these two were not specified before.
8397 @return: the export information
8400 assert self.op.mode == constants.INSTANCE_IMPORT
8402 src_node = self.op.src_node
8403 src_path = self.op.src_path
8405 if src_node is None:
8406 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8407 exp_list = self.rpc.call_export_list(locked_nodes)
8409 for node in exp_list:
8410 if exp_list[node].fail_msg:
8412 if src_path in exp_list[node].payload:
8414 self.op.src_node = src_node = node
8415 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8419 raise errors.OpPrereqError("No export found for relative path %s" %
8420 src_path, errors.ECODE_INVAL)
8422 _CheckNodeOnline(self, src_node)
8423 result = self.rpc.call_export_info(src_node, src_path)
8424 result.Raise("No export or invalid export found in dir %s" % src_path)
8426 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8427 if not export_info.has_section(constants.INISECT_EXP):
8428 raise errors.ProgrammerError("Corrupted export config",
8429 errors.ECODE_ENVIRON)
8431 ei_version = export_info.get(constants.INISECT_EXP, "version")
8432 if (int(ei_version) != constants.EXPORT_VERSION):
8433 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8434 (ei_version, constants.EXPORT_VERSION),
8435 errors.ECODE_ENVIRON)
8438 def _ReadExportParams(self, einfo):
8439 """Use export parameters as defaults.
8441 In case the opcode doesn't specify (as in override) some instance
8442 parameters, then try to use them from the export information, if
8446 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8448 if self.op.disk_template is None:
8449 if einfo.has_option(constants.INISECT_INS, "disk_template"):
8450 self.op.disk_template = einfo.get(constants.INISECT_INS,
8453 raise errors.OpPrereqError("No disk template specified and the export"
8454 " is missing the disk_template information",
8457 if not self.op.disks:
8458 if einfo.has_option(constants.INISECT_INS, "disk_count"):
8460 # TODO: import the disk iv_name too
8461 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8462 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8463 disks.append({constants.IDISK_SIZE: disk_sz})
8464 self.op.disks = disks
8466 raise errors.OpPrereqError("No disk info specified and the export"
8467 " is missing the disk information",
8470 if (not self.op.nics and
8471 einfo.has_option(constants.INISECT_INS, "nic_count")):
8473 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8475 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8476 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8481 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8482 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8484 if (self.op.hypervisor is None and
8485 einfo.has_option(constants.INISECT_INS, "hypervisor")):
8486 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8488 if einfo.has_section(constants.INISECT_HYP):
8489 # use the export parameters but do not override the ones
8490 # specified by the user
8491 for name, value in einfo.items(constants.INISECT_HYP):
8492 if name not in self.op.hvparams:
8493 self.op.hvparams[name] = value
8495 if einfo.has_section(constants.INISECT_BEP):
8496 # use the parameters, without overriding
8497 for name, value in einfo.items(constants.INISECT_BEP):
8498 if name not in self.op.beparams:
8499 self.op.beparams[name] = value
8501 # try to read the parameters old style, from the main section
8502 for name in constants.BES_PARAMETERS:
8503 if (name not in self.op.beparams and
8504 einfo.has_option(constants.INISECT_INS, name)):
8505 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8507 if einfo.has_section(constants.INISECT_OSP):
8508 # use the parameters, without overriding
8509 for name, value in einfo.items(constants.INISECT_OSP):
8510 if name not in self.op.osparams:
8511 self.op.osparams[name] = value
8513 def _RevertToDefaults(self, cluster):
8514 """Revert the instance parameters to the default values.
8518 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8519 for name in self.op.hvparams.keys():
8520 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8521 del self.op.hvparams[name]
8523 be_defs = cluster.SimpleFillBE({})
8524 for name in self.op.beparams.keys():
8525 if name in be_defs and be_defs[name] == self.op.beparams[name]:
8526 del self.op.beparams[name]
8528 nic_defs = cluster.SimpleFillNIC({})
8529 for nic in self.op.nics:
8530 for name in constants.NICS_PARAMETERS:
8531 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8534 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8535 for name in self.op.osparams.keys():
8536 if name in os_defs and os_defs[name] == self.op.osparams[name]:
8537 del self.op.osparams[name]
8539 def _CalculateFileStorageDir(self):
8540 """Calculate final instance file storage dir.
8543 # file storage dir calculation/check
8544 self.instance_file_storage_dir = None
8545 if self.op.disk_template in constants.DTS_FILEBASED:
8546 # build the full file storage dir path
8549 if self.op.disk_template == constants.DT_SHARED_FILE:
8550 get_fsd_fn = self.cfg.GetSharedFileStorageDir
8552 get_fsd_fn = self.cfg.GetFileStorageDir
8554 cfg_storagedir = get_fsd_fn()
8555 if not cfg_storagedir:
8556 raise errors.OpPrereqError("Cluster file storage dir not defined")
8557 joinargs.append(cfg_storagedir)
8559 if self.op.file_storage_dir is not None:
8560 joinargs.append(self.op.file_storage_dir)
8562 joinargs.append(self.op.instance_name)
8564 # pylint: disable=W0142
8565 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8567 def CheckPrereq(self):
8568 """Check prerequisites.
8571 self._CalculateFileStorageDir()
8573 if self.op.mode == constants.INSTANCE_IMPORT:
8574 export_info = self._ReadExportInfo()
8575 self._ReadExportParams(export_info)
8577 if (not self.cfg.GetVGName() and
8578 self.op.disk_template not in constants.DTS_NOT_LVM):
8579 raise errors.OpPrereqError("Cluster does not support lvm-based"
8580 " instances", errors.ECODE_STATE)
8582 if self.op.hypervisor is None:
8583 self.op.hypervisor = self.cfg.GetHypervisorType()
8585 cluster = self.cfg.GetClusterInfo()
8586 enabled_hvs = cluster.enabled_hypervisors
8587 if self.op.hypervisor not in enabled_hvs:
8588 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8589 " cluster (%s)" % (self.op.hypervisor,
8590 ",".join(enabled_hvs)),
8593 # Check tag validity
8594 for tag in self.op.tags:
8595 objects.TaggableObject.ValidateTag(tag)
8597 # check hypervisor parameter syntax (locally)
8598 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8599 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8601 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8602 hv_type.CheckParameterSyntax(filled_hvp)
8603 self.hv_full = filled_hvp
8604 # check that we don't specify global parameters on an instance
8605 _CheckGlobalHvParams(self.op.hvparams)
8607 # fill and remember the beparams dict
8608 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8609 self.be_full = cluster.SimpleFillBE(self.op.beparams)
8611 # build os parameters
8612 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8614 # now that hvp/bep are in final format, let's reset to defaults,
8616 if self.op.identify_defaults:
8617 self._RevertToDefaults(cluster)
8621 for idx, nic in enumerate(self.op.nics):
8622 nic_mode_req = nic.get(constants.INIC_MODE, None)
8623 nic_mode = nic_mode_req
8624 if nic_mode is None:
8625 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8627 # in routed mode, for the first nic, the default ip is 'auto'
8628 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8629 default_ip_mode = constants.VALUE_AUTO
8631 default_ip_mode = constants.VALUE_NONE
8633 # ip validity checks
8634 ip = nic.get(constants.INIC_IP, default_ip_mode)
8635 if ip is None or ip.lower() == constants.VALUE_NONE:
8637 elif ip.lower() == constants.VALUE_AUTO:
8638 if not self.op.name_check:
8639 raise errors.OpPrereqError("IP address set to auto but name checks"
8640 " have been skipped",
8642 nic_ip = self.hostname1.ip
8644 if not netutils.IPAddress.IsValid(ip):
8645 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8649 # TODO: check the ip address for uniqueness
8650 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8651 raise errors.OpPrereqError("Routed nic mode requires an ip address",
8654 # MAC address verification
8655 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8656 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8657 mac = utils.NormalizeAndValidateMac(mac)
8660 self.cfg.ReserveMAC(mac, self.proc.GetECId())
8661 except errors.ReservationError:
8662 raise errors.OpPrereqError("MAC address %s already in use"
8663 " in cluster" % mac,
8664 errors.ECODE_NOTUNIQUE)
8666 # Build nic parameters
8667 link = nic.get(constants.INIC_LINK, None)
8670 nicparams[constants.NIC_MODE] = nic_mode_req
8672 nicparams[constants.NIC_LINK] = link
8674 check_params = cluster.SimpleFillNIC(nicparams)
8675 objects.NIC.CheckParameterSyntax(check_params)
8676 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8678 # disk checks/pre-build
8679 default_vg = self.cfg.GetVGName()
8681 for disk in self.op.disks:
8682 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8683 if mode not in constants.DISK_ACCESS_SET:
8684 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8685 mode, errors.ECODE_INVAL)
8686 size = disk.get(constants.IDISK_SIZE, None)
8688 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8691 except (TypeError, ValueError):
8692 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8695 data_vg = disk.get(constants.IDISK_VG, default_vg)
8697 constants.IDISK_SIZE: size,
8698 constants.IDISK_MODE: mode,
8699 constants.IDISK_VG: data_vg,
8700 constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8702 if constants.IDISK_ADOPT in disk:
8703 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8704 self.disks.append(new_disk)
8706 if self.op.mode == constants.INSTANCE_IMPORT:
8708 # Check that the new instance doesn't have less disks than the export
8709 instance_disks = len(self.disks)
8710 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8711 if instance_disks < export_disks:
8712 raise errors.OpPrereqError("Not enough disks to import."
8713 " (instance: %d, export: %d)" %
8714 (instance_disks, export_disks),
8718 for idx in range(export_disks):
8719 option = "disk%d_dump" % idx
8720 if export_info.has_option(constants.INISECT_INS, option):
8721 # FIXME: are the old os-es, disk sizes, etc. useful?
8722 export_name = export_info.get(constants.INISECT_INS, option)
8723 image = utils.PathJoin(self.op.src_path, export_name)
8724 disk_images.append(image)
8726 disk_images.append(False)
8728 self.src_images = disk_images
8730 old_name = export_info.get(constants.INISECT_INS, "name")
8732 exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8733 except (TypeError, ValueError), err:
8734 raise errors.OpPrereqError("Invalid export file, nic_count is not"
8735 " an integer: %s" % str(err),
8737 if self.op.instance_name == old_name:
8738 for idx, nic in enumerate(self.nics):
8739 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8740 nic_mac_ini = "nic%d_mac" % idx
8741 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8743 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8745 # ip ping checks (we use the same ip that was resolved in ExpandNames)
8746 if self.op.ip_check:
8747 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8748 raise errors.OpPrereqError("IP %s of instance %s already in use" %
8749 (self.check_ip, self.op.instance_name),
8750 errors.ECODE_NOTUNIQUE)
8752 #### mac address generation
8753 # By generating here the mac address both the allocator and the hooks get
8754 # the real final mac address rather than the 'auto' or 'generate' value.
8755 # There is a race condition between the generation and the instance object
8756 # creation, which means that we know the mac is valid now, but we're not
8757 # sure it will be when we actually add the instance. If things go bad
8758 # adding the instance will abort because of a duplicate mac, and the
8759 # creation job will fail.
8760 for nic in self.nics:
8761 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8762 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8766 if self.op.iallocator is not None:
8767 self._RunAllocator()
8769 #### node related checks
8771 # check primary node
8772 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8773 assert self.pnode is not None, \
8774 "Cannot retrieve locked node %s" % self.op.pnode
8776 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8777 pnode.name, errors.ECODE_STATE)
8779 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8780 pnode.name, errors.ECODE_STATE)
8781 if not pnode.vm_capable:
8782 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8783 " '%s'" % pnode.name, errors.ECODE_STATE)
8785 self.secondaries = []
8787 # mirror node verification
8788 if self.op.disk_template in constants.DTS_INT_MIRROR:
8789 if self.op.snode == pnode.name:
8790 raise errors.OpPrereqError("The secondary node cannot be the"
8791 " primary node", errors.ECODE_INVAL)
8792 _CheckNodeOnline(self, self.op.snode)
8793 _CheckNodeNotDrained(self, self.op.snode)
8794 _CheckNodeVmCapable(self, self.op.snode)
8795 self.secondaries.append(self.op.snode)
8797 nodenames = [pnode.name] + self.secondaries
8799 if not self.adopt_disks:
8800 # Check lv size requirements, if not adopting
8801 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8802 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8804 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8805 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8806 disk[constants.IDISK_ADOPT])
8807 for disk in self.disks])
8808 if len(all_lvs) != len(self.disks):
8809 raise errors.OpPrereqError("Duplicate volume names given for adoption",
8811 for lv_name in all_lvs:
8813 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8814 # to ReserveLV uses the same syntax
8815 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8816 except errors.ReservationError:
8817 raise errors.OpPrereqError("LV named %s used by another instance" %
8818 lv_name, errors.ECODE_NOTUNIQUE)
8820 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8821 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8823 node_lvs = self.rpc.call_lv_list([pnode.name],
8824 vg_names.payload.keys())[pnode.name]
8825 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8826 node_lvs = node_lvs.payload
8828 delta = all_lvs.difference(node_lvs.keys())
8830 raise errors.OpPrereqError("Missing logical volume(s): %s" %
8831 utils.CommaJoin(delta),
8833 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8835 raise errors.OpPrereqError("Online logical volumes found, cannot"
8836 " adopt: %s" % utils.CommaJoin(online_lvs),
8838 # update the size of disk based on what is found
8839 for dsk in self.disks:
8840 dsk[constants.IDISK_SIZE] = \
8841 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8842 dsk[constants.IDISK_ADOPT])][0]))
8844 elif self.op.disk_template == constants.DT_BLOCK:
8845 # Normalize and de-duplicate device paths
8846 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8847 for disk in self.disks])
8848 if len(all_disks) != len(self.disks):
8849 raise errors.OpPrereqError("Duplicate disk names given for adoption",
8851 baddisks = [d for d in all_disks
8852 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8854 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8855 " cannot be adopted" %
8856 (", ".join(baddisks),
8857 constants.ADOPTABLE_BLOCKDEV_ROOT),
8860 node_disks = self.rpc.call_bdev_sizes([pnode.name],
8861 list(all_disks))[pnode.name]
8862 node_disks.Raise("Cannot get block device information from node %s" %
8864 node_disks = node_disks.payload
8865 delta = all_disks.difference(node_disks.keys())
8867 raise errors.OpPrereqError("Missing block device(s): %s" %
8868 utils.CommaJoin(delta),
8870 for dsk in self.disks:
8871 dsk[constants.IDISK_SIZE] = \
8872 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8874 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8876 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8877 # check OS parameters (remotely)
8878 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8880 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8882 # memory check on primary node
8884 _CheckNodeFreeMemory(self, self.pnode.name,
8885 "creating instance %s" % self.op.instance_name,
8886 self.be_full[constants.BE_MEMORY],
8889 self.dry_run_result = list(nodenames)
8891 def Exec(self, feedback_fn):
8892 """Create and add the instance to the cluster.
8895 instance = self.op.instance_name
8896 pnode_name = self.pnode.name
8898 ht_kind = self.op.hypervisor
8899 if ht_kind in constants.HTS_REQ_PORT:
8900 network_port = self.cfg.AllocatePort()
8904 disks = _GenerateDiskTemplate(self,
8905 self.op.disk_template,
8906 instance, pnode_name,
8909 self.instance_file_storage_dir,
8910 self.op.file_driver,
8914 iobj = objects.Instance(name=instance, os=self.op.os_type,
8915 primary_node=pnode_name,
8916 nics=self.nics, disks=disks,
8917 disk_template=self.op.disk_template,
8919 network_port=network_port,
8920 beparams=self.op.beparams,
8921 hvparams=self.op.hvparams,
8922 hypervisor=self.op.hypervisor,
8923 osparams=self.op.osparams,
8927 for tag in self.op.tags:
8930 if self.adopt_disks:
8931 if self.op.disk_template == constants.DT_PLAIN:
8932 # rename LVs to the newly-generated names; we need to construct
8933 # 'fake' LV disks with the old data, plus the new unique_id
8934 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8936 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
8937 rename_to.append(t_dsk.logical_id)
8938 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8939 self.cfg.SetDiskID(t_dsk, pnode_name)
8940 result = self.rpc.call_blockdev_rename(pnode_name,
8941 zip(tmp_disks, rename_to))
8942 result.Raise("Failed to rename adoped LVs")
8944 feedback_fn("* creating instance disks...")
8946 _CreateDisks(self, iobj)
8947 except errors.OpExecError:
8948 self.LogWarning("Device creation failed, reverting...")
8950 _RemoveDisks(self, iobj)
8952 self.cfg.ReleaseDRBDMinors(instance)
8955 feedback_fn("adding instance %s to cluster config" % instance)
8957 self.cfg.AddInstance(iobj, self.proc.GetECId())
8959 # Declare that we don't want to remove the instance lock anymore, as we've
8960 # added the instance to the config
8961 del self.remove_locks[locking.LEVEL_INSTANCE]
8963 if self.op.mode == constants.INSTANCE_IMPORT:
8964 # Release unused nodes
8965 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8968 _ReleaseLocks(self, locking.LEVEL_NODE)
8971 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8972 feedback_fn("* wiping instance disks...")
8974 _WipeDisks(self, iobj)
8975 except errors.OpExecError, err:
8976 logging.exception("Wiping disks failed")
8977 self.LogWarning("Wiping instance disks failed (%s)", err)
8981 # Something is already wrong with the disks, don't do anything else
8983 elif self.op.wait_for_sync:
8984 disk_abort = not _WaitForSync(self, iobj)
8985 elif iobj.disk_template in constants.DTS_INT_MIRROR:
8986 # make sure the disks are not degraded (still sync-ing is ok)
8987 feedback_fn("* checking mirrors status")
8988 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8993 _RemoveDisks(self, iobj)
8994 self.cfg.RemoveInstance(iobj.name)
8995 # Make sure the instance lock gets removed
8996 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8997 raise errors.OpExecError("There are some degraded disks for"
9000 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9001 if self.op.mode == constants.INSTANCE_CREATE:
9002 if not self.op.no_install:
9003 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9004 not self.op.wait_for_sync)
9006 feedback_fn("* pausing disk sync to install instance OS")
9007 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9009 for idx, success in enumerate(result.payload):
9011 logging.warn("pause-sync of instance %s for disk %d failed",
9014 feedback_fn("* running the instance OS create scripts...")
9015 # FIXME: pass debug option from opcode to backend
9016 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
9017 self.op.debug_level)
9019 feedback_fn("* resuming disk sync")
9020 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9022 for idx, success in enumerate(result.payload):
9024 logging.warn("resume-sync of instance %s for disk %d failed",
9027 result.Raise("Could not add os for instance %s"
9028 " on node %s" % (instance, pnode_name))
9030 elif self.op.mode == constants.INSTANCE_IMPORT:
9031 feedback_fn("* running the instance OS import scripts...")
9035 for idx, image in enumerate(self.src_images):
9039 # FIXME: pass debug option from opcode to backend
9040 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9041 constants.IEIO_FILE, (image, ),
9042 constants.IEIO_SCRIPT,
9043 (iobj.disks[idx], idx),
9045 transfers.append(dt)
9048 masterd.instance.TransferInstanceData(self, feedback_fn,
9049 self.op.src_node, pnode_name,
9050 self.pnode.secondary_ip,
9052 if not compat.all(import_result):
9053 self.LogWarning("Some disks for instance %s on node %s were not"
9054 " imported successfully" % (instance, pnode_name))
9056 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9057 feedback_fn("* preparing remote import...")
9058 # The source cluster will stop the instance before attempting to make a
9059 # connection. In some cases stopping an instance can take a long time,
9060 # hence the shutdown timeout is added to the connection timeout.
9061 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9062 self.op.source_shutdown_timeout)
9063 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9065 assert iobj.primary_node == self.pnode.name
9067 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9068 self.source_x509_ca,
9069 self._cds, timeouts)
9070 if not compat.all(disk_results):
9071 # TODO: Should the instance still be started, even if some disks
9072 # failed to import (valid for local imports, too)?
9073 self.LogWarning("Some disks for instance %s on node %s were not"
9074 " imported successfully" % (instance, pnode_name))
9076 # Run rename script on newly imported instance
9077 assert iobj.name == instance
9078 feedback_fn("Running rename script for %s" % instance)
9079 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9080 self.source_instance_name,
9081 self.op.debug_level)
9083 self.LogWarning("Failed to run rename script for %s on node"
9084 " %s: %s" % (instance, pnode_name, result.fail_msg))
9087 # also checked in the prereq part
9088 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9092 iobj.admin_up = True
9093 self.cfg.Update(iobj, feedback_fn)
9094 logging.info("Starting instance %s on node %s", instance, pnode_name)
9095 feedback_fn("* starting instance...")
9096 result = self.rpc.call_instance_start(pnode_name, iobj,
9098 result.Raise("Could not start instance")
9100 return list(iobj.all_nodes)
9103 class LUInstanceConsole(NoHooksLU):
9104 """Connect to an instance's console.
9106 This is somewhat special in that it returns the command line that
9107 you need to run on the master node in order to connect to the
9113 def ExpandNames(self):
9114 self._ExpandAndLockInstance()
9116 def CheckPrereq(self):
9117 """Check prerequisites.
9119 This checks that the instance is in the cluster.
9122 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9123 assert self.instance is not None, \
9124 "Cannot retrieve locked instance %s" % self.op.instance_name
9125 _CheckNodeOnline(self, self.instance.primary_node)
9127 def Exec(self, feedback_fn):
9128 """Connect to the console of an instance
9131 instance = self.instance
9132 node = instance.primary_node
9134 node_insts = self.rpc.call_instance_list([node],
9135 [instance.hypervisor])[node]
9136 node_insts.Raise("Can't get node information from %s" % node)
9138 if instance.name not in node_insts.payload:
9139 if instance.admin_up:
9140 state = constants.INSTST_ERRORDOWN
9142 state = constants.INSTST_ADMINDOWN
9143 raise errors.OpExecError("Instance %s is not running (state %s)" %
9144 (instance.name, state))
9146 logging.debug("Connecting to console of %s on %s", instance.name, node)
9148 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9151 def _GetInstanceConsole(cluster, instance):
9152 """Returns console information for an instance.
9154 @type cluster: L{objects.Cluster}
9155 @type instance: L{objects.Instance}
9159 hyper = hypervisor.GetHypervisor(instance.hypervisor)
9160 # beparams and hvparams are passed separately, to avoid editing the
9161 # instance and then saving the defaults in the instance itself.
9162 hvparams = cluster.FillHV(instance)
9163 beparams = cluster.FillBE(instance)
9164 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9166 assert console.instance == instance.name
9167 assert console.Validate()
9169 return console.ToDict()
9172 class LUInstanceReplaceDisks(LogicalUnit):
9173 """Replace the disks of an instance.
9176 HPATH = "mirrors-replace"
9177 HTYPE = constants.HTYPE_INSTANCE
9180 def CheckArguments(self):
9181 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9184 def ExpandNames(self):
9185 self._ExpandAndLockInstance()
9187 assert locking.LEVEL_NODE not in self.needed_locks
9188 assert locking.LEVEL_NODEGROUP not in self.needed_locks
9190 assert self.op.iallocator is None or self.op.remote_node is None, \
9191 "Conflicting options"
9193 if self.op.remote_node is not None:
9194 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9196 # Warning: do not remove the locking of the new secondary here
9197 # unless DRBD8.AddChildren is changed to work in parallel;
9198 # currently it doesn't since parallel invocations of
9199 # FindUnusedMinor will conflict
9200 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9201 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9203 self.needed_locks[locking.LEVEL_NODE] = []
9204 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9206 if self.op.iallocator is not None:
9207 # iallocator will select a new node in the same group
9208 self.needed_locks[locking.LEVEL_NODEGROUP] = []
9210 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9211 self.op.iallocator, self.op.remote_node,
9212 self.op.disks, False, self.op.early_release)
9214 self.tasklets = [self.replacer]
9216 def DeclareLocks(self, level):
9217 if level == locking.LEVEL_NODEGROUP:
9218 assert self.op.remote_node is None
9219 assert self.op.iallocator is not None
9220 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9222 self.share_locks[locking.LEVEL_NODEGROUP] = 1
9223 self.needed_locks[locking.LEVEL_NODEGROUP] = \
9224 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9226 elif level == locking.LEVEL_NODE:
9227 if self.op.iallocator is not None:
9228 assert self.op.remote_node is None
9229 assert not self.needed_locks[locking.LEVEL_NODE]
9231 # Lock member nodes of all locked groups
9232 self.needed_locks[locking.LEVEL_NODE] = [node_name
9233 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9234 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9236 self._LockInstancesNodes()
9238 def BuildHooksEnv(self):
9241 This runs on the master, the primary and all the secondaries.
9244 instance = self.replacer.instance
9246 "MODE": self.op.mode,
9247 "NEW_SECONDARY": self.op.remote_node,
9248 "OLD_SECONDARY": instance.secondary_nodes[0],
9250 env.update(_BuildInstanceHookEnvByObject(self, instance))
9253 def BuildHooksNodes(self):
9254 """Build hooks nodes.
9257 instance = self.replacer.instance
9259 self.cfg.GetMasterNode(),
9260 instance.primary_node,
9262 if self.op.remote_node is not None:
9263 nl.append(self.op.remote_node)
9266 def CheckPrereq(self):
9267 """Check prerequisites.
9270 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9271 self.op.iallocator is None)
9273 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9275 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9277 return LogicalUnit.CheckPrereq(self)
9280 class TLReplaceDisks(Tasklet):
9281 """Replaces disks for an instance.
9283 Note: Locking is not within the scope of this class.
9286 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9287 disks, delay_iallocator, early_release):
9288 """Initializes this class.
9291 Tasklet.__init__(self, lu)
9294 self.instance_name = instance_name
9296 self.iallocator_name = iallocator_name
9297 self.remote_node = remote_node
9299 self.delay_iallocator = delay_iallocator
9300 self.early_release = early_release
9303 self.instance = None
9304 self.new_node = None
9305 self.target_node = None
9306 self.other_node = None
9307 self.remote_node_info = None
9308 self.node_secondary_ip = None
9311 def CheckArguments(mode, remote_node, iallocator):
9312 """Helper function for users of this class.
9315 # check for valid parameter combination
9316 if mode == constants.REPLACE_DISK_CHG:
9317 if remote_node is None and iallocator is None:
9318 raise errors.OpPrereqError("When changing the secondary either an"
9319 " iallocator script must be used or the"
9320 " new node given", errors.ECODE_INVAL)
9322 if remote_node is not None and iallocator is not None:
9323 raise errors.OpPrereqError("Give either the iallocator or the new"
9324 " secondary, not both", errors.ECODE_INVAL)
9326 elif remote_node is not None or iallocator is not None:
9327 # Not replacing the secondary
9328 raise errors.OpPrereqError("The iallocator and new node options can"
9329 " only be used when changing the"
9330 " secondary node", errors.ECODE_INVAL)
9333 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9334 """Compute a new secondary node using an IAllocator.
9337 ial = IAllocator(lu.cfg, lu.rpc,
9338 mode=constants.IALLOCATOR_MODE_RELOC,
9340 relocate_from=list(relocate_from))
9342 ial.Run(iallocator_name)
9345 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9346 " %s" % (iallocator_name, ial.info),
9349 if len(ial.result) != ial.required_nodes:
9350 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9351 " of nodes (%s), required %s" %
9353 len(ial.result), ial.required_nodes),
9356 remote_node_name = ial.result[0]
9358 lu.LogInfo("Selected new secondary for instance '%s': %s",
9359 instance_name, remote_node_name)
9361 return remote_node_name
9363 def _FindFaultyDisks(self, node_name):
9364 """Wrapper for L{_FindFaultyInstanceDisks}.
9367 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9370 def _CheckDisksActivated(self, instance):
9371 """Checks if the instance disks are activated.
9373 @param instance: The instance to check disks
9374 @return: True if they are activated, False otherwise
9377 nodes = instance.all_nodes
9379 for idx, dev in enumerate(instance.disks):
9381 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9382 self.cfg.SetDiskID(dev, node)
9384 result = self.rpc.call_blockdev_find(node, dev)
9388 elif result.fail_msg or not result.payload:
9393 def CheckPrereq(self):
9394 """Check prerequisites.
9396 This checks that the instance is in the cluster.
9399 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9400 assert instance is not None, \
9401 "Cannot retrieve locked instance %s" % self.instance_name
9403 if instance.disk_template != constants.DT_DRBD8:
9404 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9405 " instances", errors.ECODE_INVAL)
9407 if len(instance.secondary_nodes) != 1:
9408 raise errors.OpPrereqError("The instance has a strange layout,"
9409 " expected one secondary but found %d" %
9410 len(instance.secondary_nodes),
9413 if not self.delay_iallocator:
9414 self._CheckPrereq2()
9416 def _CheckPrereq2(self):
9417 """Check prerequisites, second part.
9419 This function should always be part of CheckPrereq. It was separated and is
9420 now called from Exec because during node evacuation iallocator was only
9421 called with an unmodified cluster model, not taking planned changes into
9425 instance = self.instance
9426 secondary_node = instance.secondary_nodes[0]
9428 if self.iallocator_name is None:
9429 remote_node = self.remote_node
9431 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9432 instance.name, instance.secondary_nodes)
9434 if remote_node is None:
9435 self.remote_node_info = None
9437 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9438 "Remote node '%s' is not locked" % remote_node
9440 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9441 assert self.remote_node_info is not None, \
9442 "Cannot retrieve locked node %s" % remote_node
9444 if remote_node == self.instance.primary_node:
9445 raise errors.OpPrereqError("The specified node is the primary node of"
9446 " the instance", errors.ECODE_INVAL)
9448 if remote_node == secondary_node:
9449 raise errors.OpPrereqError("The specified node is already the"
9450 " secondary node of the instance",
9453 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9454 constants.REPLACE_DISK_CHG):
9455 raise errors.OpPrereqError("Cannot specify disks to be replaced",
9458 if self.mode == constants.REPLACE_DISK_AUTO:
9459 if not self._CheckDisksActivated(instance):
9460 raise errors.OpPrereqError("Please run activate-disks on instance %s"
9461 " first" % self.instance_name,
9463 faulty_primary = self._FindFaultyDisks(instance.primary_node)
9464 faulty_secondary = self._FindFaultyDisks(secondary_node)
9466 if faulty_primary and faulty_secondary:
9467 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9468 " one node and can not be repaired"
9469 " automatically" % self.instance_name,
9473 self.disks = faulty_primary
9474 self.target_node = instance.primary_node
9475 self.other_node = secondary_node
9476 check_nodes = [self.target_node, self.other_node]
9477 elif faulty_secondary:
9478 self.disks = faulty_secondary
9479 self.target_node = secondary_node
9480 self.other_node = instance.primary_node
9481 check_nodes = [self.target_node, self.other_node]
9487 # Non-automatic modes
9488 if self.mode == constants.REPLACE_DISK_PRI:
9489 self.target_node = instance.primary_node
9490 self.other_node = secondary_node
9491 check_nodes = [self.target_node, self.other_node]
9493 elif self.mode == constants.REPLACE_DISK_SEC:
9494 self.target_node = secondary_node
9495 self.other_node = instance.primary_node
9496 check_nodes = [self.target_node, self.other_node]
9498 elif self.mode == constants.REPLACE_DISK_CHG:
9499 self.new_node = remote_node
9500 self.other_node = instance.primary_node
9501 self.target_node = secondary_node
9502 check_nodes = [self.new_node, self.other_node]
9504 _CheckNodeNotDrained(self.lu, remote_node)
9505 _CheckNodeVmCapable(self.lu, remote_node)
9507 old_node_info = self.cfg.GetNodeInfo(secondary_node)
9508 assert old_node_info is not None
9509 if old_node_info.offline and not self.early_release:
9510 # doesn't make sense to delay the release
9511 self.early_release = True
9512 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9513 " early-release mode", secondary_node)
9516 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9519 # If not specified all disks should be replaced
9521 self.disks = range(len(self.instance.disks))
9523 for node in check_nodes:
9524 _CheckNodeOnline(self.lu, node)
9526 touched_nodes = frozenset(node_name for node_name in [self.new_node,
9529 if node_name is not None)
9531 # Release unneeded node locks
9532 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9534 # Release any owned node group
9535 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9536 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9538 # Check whether disks are valid
9539 for disk_idx in self.disks:
9540 instance.FindDisk(disk_idx)
9542 # Get secondary node IP addresses
9543 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9544 in self.cfg.GetMultiNodeInfo(touched_nodes))
9546 def Exec(self, feedback_fn):
9547 """Execute disk replacement.
9549 This dispatches the disk replacement to the appropriate handler.
9552 if self.delay_iallocator:
9553 self._CheckPrereq2()
9556 # Verify owned locks before starting operation
9557 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9558 assert set(owned_nodes) == set(self.node_secondary_ip), \
9559 ("Incorrect node locks, owning %s, expected %s" %
9560 (owned_nodes, self.node_secondary_ip.keys()))
9562 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9563 assert list(owned_instances) == [self.instance_name], \
9564 "Instance '%s' not locked" % self.instance_name
9566 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9567 "Should not own any node group lock at this point"
9570 feedback_fn("No disks need replacement")
9573 feedback_fn("Replacing disk(s) %s for %s" %
9574 (utils.CommaJoin(self.disks), self.instance.name))
9576 activate_disks = (not self.instance.admin_up)
9578 # Activate the instance disks if we're replacing them on a down instance
9580 _StartInstanceDisks(self.lu, self.instance, True)
9583 # Should we replace the secondary node?
9584 if self.new_node is not None:
9585 fn = self._ExecDrbd8Secondary
9587 fn = self._ExecDrbd8DiskOnly
9589 result = fn(feedback_fn)
9591 # Deactivate the instance disks if we're replacing them on a
9594 _SafeShutdownInstanceDisks(self.lu, self.instance)
9597 # Verify owned locks
9598 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9599 nodes = frozenset(self.node_secondary_ip)
9600 assert ((self.early_release and not owned_nodes) or
9601 (not self.early_release and not (set(owned_nodes) - nodes))), \
9602 ("Not owning the correct locks, early_release=%s, owned=%r,"
9603 " nodes=%r" % (self.early_release, owned_nodes, nodes))
9607 def _CheckVolumeGroup(self, nodes):
9608 self.lu.LogInfo("Checking volume groups")
9610 vgname = self.cfg.GetVGName()
9612 # Make sure volume group exists on all involved nodes
9613 results = self.rpc.call_vg_list(nodes)
9615 raise errors.OpExecError("Can't list volume groups on the nodes")
9619 res.Raise("Error checking node %s" % node)
9620 if vgname not in res.payload:
9621 raise errors.OpExecError("Volume group '%s' not found on node %s" %
9624 def _CheckDisksExistence(self, nodes):
9625 # Check disk existence
9626 for idx, dev in enumerate(self.instance.disks):
9627 if idx not in self.disks:
9631 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9632 self.cfg.SetDiskID(dev, node)
9634 result = self.rpc.call_blockdev_find(node, dev)
9636 msg = result.fail_msg
9637 if msg or not result.payload:
9639 msg = "disk not found"
9640 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9643 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9644 for idx, dev in enumerate(self.instance.disks):
9645 if idx not in self.disks:
9648 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9651 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9653 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9654 " replace disks for instance %s" %
9655 (node_name, self.instance.name))
9657 def _CreateNewStorage(self, node_name):
9658 """Create new storage on the primary or secondary node.
9660 This is only used for same-node replaces, not for changing the
9661 secondary node, hence we don't want to modify the existing disk.
9666 for idx, dev in enumerate(self.instance.disks):
9667 if idx not in self.disks:
9670 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9672 self.cfg.SetDiskID(dev, node_name)
9674 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9675 names = _GenerateUniqueNames(self.lu, lv_names)
9677 vg_data = dev.children[0].logical_id[0]
9678 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9679 logical_id=(vg_data, names[0]))
9680 vg_meta = dev.children[1].logical_id[0]
9681 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9682 logical_id=(vg_meta, names[1]))
9684 new_lvs = [lv_data, lv_meta]
9685 old_lvs = [child.Copy() for child in dev.children]
9686 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9688 # we pass force_create=True to force the LVM creation
9689 for new_lv in new_lvs:
9690 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9691 _GetInstanceInfoText(self.instance), False)
9695 def _CheckDevices(self, node_name, iv_names):
9696 for name, (dev, _, _) in iv_names.iteritems():
9697 self.cfg.SetDiskID(dev, node_name)
9699 result = self.rpc.call_blockdev_find(node_name, dev)
9701 msg = result.fail_msg
9702 if msg or not result.payload:
9704 msg = "disk not found"
9705 raise errors.OpExecError("Can't find DRBD device %s: %s" %
9708 if result.payload.is_degraded:
9709 raise errors.OpExecError("DRBD device %s is degraded!" % name)
9711 def _RemoveOldStorage(self, node_name, iv_names):
9712 for name, (_, old_lvs, _) in iv_names.iteritems():
9713 self.lu.LogInfo("Remove logical volumes for %s" % name)
9716 self.cfg.SetDiskID(lv, node_name)
9718 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9720 self.lu.LogWarning("Can't remove old LV: %s" % msg,
9721 hint="remove unused LVs manually")
9723 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9724 """Replace a disk on the primary or secondary for DRBD 8.
9726 The algorithm for replace is quite complicated:
9728 1. for each disk to be replaced:
9730 1. create new LVs on the target node with unique names
9731 1. detach old LVs from the drbd device
9732 1. rename old LVs to name_replaced.<time_t>
9733 1. rename new LVs to old LVs
9734 1. attach the new LVs (with the old names now) to the drbd device
9736 1. wait for sync across all devices
9738 1. for each modified disk:
9740 1. remove old LVs (which have the name name_replaces.<time_t>)
9742 Failures are not very well handled.
9747 # Step: check device activation
9748 self.lu.LogStep(1, steps_total, "Check device existence")
9749 self._CheckDisksExistence([self.other_node, self.target_node])
9750 self._CheckVolumeGroup([self.target_node, self.other_node])
9752 # Step: check other node consistency
9753 self.lu.LogStep(2, steps_total, "Check peer consistency")
9754 self._CheckDisksConsistency(self.other_node,
9755 self.other_node == self.instance.primary_node,
9758 # Step: create new storage
9759 self.lu.LogStep(3, steps_total, "Allocate new storage")
9760 iv_names = self._CreateNewStorage(self.target_node)
9762 # Step: for each lv, detach+rename*2+attach
9763 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9764 for dev, old_lvs, new_lvs in iv_names.itervalues():
9765 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9767 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9769 result.Raise("Can't detach drbd from local storage on node"
9770 " %s for device %s" % (self.target_node, dev.iv_name))
9772 #cfg.Update(instance)
9774 # ok, we created the new LVs, so now we know we have the needed
9775 # storage; as such, we proceed on the target node to rename
9776 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9777 # using the assumption that logical_id == physical_id (which in
9778 # turn is the unique_id on that node)
9780 # FIXME(iustin): use a better name for the replaced LVs
9781 temp_suffix = int(time.time())
9782 ren_fn = lambda d, suff: (d.physical_id[0],
9783 d.physical_id[1] + "_replaced-%s" % suff)
9785 # Build the rename list based on what LVs exist on the node
9786 rename_old_to_new = []
9787 for to_ren in old_lvs:
9788 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9789 if not result.fail_msg and result.payload:
9791 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9793 self.lu.LogInfo("Renaming the old LVs on the target node")
9794 result = self.rpc.call_blockdev_rename(self.target_node,
9796 result.Raise("Can't rename old LVs on node %s" % self.target_node)
9798 # Now we rename the new LVs to the old LVs
9799 self.lu.LogInfo("Renaming the new LVs on the target node")
9800 rename_new_to_old = [(new, old.physical_id)
9801 for old, new in zip(old_lvs, new_lvs)]
9802 result = self.rpc.call_blockdev_rename(self.target_node,
9804 result.Raise("Can't rename new LVs on node %s" % self.target_node)
9806 # Intermediate steps of in memory modifications
9807 for old, new in zip(old_lvs, new_lvs):
9808 new.logical_id = old.logical_id
9809 self.cfg.SetDiskID(new, self.target_node)
9811 # We need to modify old_lvs so that removal later removes the
9812 # right LVs, not the newly added ones; note that old_lvs is a
9814 for disk in old_lvs:
9815 disk.logical_id = ren_fn(disk, temp_suffix)
9816 self.cfg.SetDiskID(disk, self.target_node)
9818 # Now that the new lvs have the old name, we can add them to the device
9819 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9820 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9822 msg = result.fail_msg
9824 for new_lv in new_lvs:
9825 msg2 = self.rpc.call_blockdev_remove(self.target_node,
9828 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9829 hint=("cleanup manually the unused logical"
9831 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9834 if self.early_release:
9835 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9837 self._RemoveOldStorage(self.target_node, iv_names)
9838 # WARNING: we release both node locks here, do not do other RPCs
9839 # than WaitForSync to the primary node
9840 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9841 names=[self.target_node, self.other_node])
9844 # This can fail as the old devices are degraded and _WaitForSync
9845 # does a combined result over all disks, so we don't check its return value
9846 self.lu.LogStep(cstep, steps_total, "Sync devices")
9848 _WaitForSync(self.lu, self.instance)
9850 # Check all devices manually
9851 self._CheckDevices(self.instance.primary_node, iv_names)
9853 # Step: remove old storage
9854 if not self.early_release:
9855 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9857 self._RemoveOldStorage(self.target_node, iv_names)
9859 def _ExecDrbd8Secondary(self, feedback_fn):
9860 """Replace the secondary node for DRBD 8.
9862 The algorithm for replace is quite complicated:
9863 - for all disks of the instance:
9864 - create new LVs on the new node with same names
9865 - shutdown the drbd device on the old secondary
9866 - disconnect the drbd network on the primary
9867 - create the drbd device on the new secondary
9868 - network attach the drbd on the primary, using an artifice:
9869 the drbd code for Attach() will connect to the network if it
9870 finds a device which is connected to the good local disks but
9872 - wait for sync across all devices
9873 - remove all disks from the old secondary
9875 Failures are not very well handled.
9880 pnode = self.instance.primary_node
9882 # Step: check device activation
9883 self.lu.LogStep(1, steps_total, "Check device existence")
9884 self._CheckDisksExistence([self.instance.primary_node])
9885 self._CheckVolumeGroup([self.instance.primary_node])
9887 # Step: check other node consistency
9888 self.lu.LogStep(2, steps_total, "Check peer consistency")
9889 self._CheckDisksConsistency(self.instance.primary_node, True, True)
9891 # Step: create new storage
9892 self.lu.LogStep(3, steps_total, "Allocate new storage")
9893 for idx, dev in enumerate(self.instance.disks):
9894 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9895 (self.new_node, idx))
9896 # we pass force_create=True to force LVM creation
9897 for new_lv in dev.children:
9898 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9899 _GetInstanceInfoText(self.instance), False)
9901 # Step 4: dbrd minors and drbd setups changes
9902 # after this, we must manually remove the drbd minors on both the
9903 # error and the success paths
9904 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9905 minors = self.cfg.AllocateDRBDMinor([self.new_node
9906 for dev in self.instance.disks],
9908 logging.debug("Allocated minors %r", minors)
9911 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9912 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9913 (self.new_node, idx))
9914 # create new devices on new_node; note that we create two IDs:
9915 # one without port, so the drbd will be activated without
9916 # networking information on the new node at this stage, and one
9917 # with network, for the latter activation in step 4
9918 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9919 if self.instance.primary_node == o_node1:
9922 assert self.instance.primary_node == o_node2, "Three-node instance?"
9925 new_alone_id = (self.instance.primary_node, self.new_node, None,
9926 p_minor, new_minor, o_secret)
9927 new_net_id = (self.instance.primary_node, self.new_node, o_port,
9928 p_minor, new_minor, o_secret)
9930 iv_names[idx] = (dev, dev.children, new_net_id)
9931 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9933 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9934 logical_id=new_alone_id,
9935 children=dev.children,
9938 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9939 _GetInstanceInfoText(self.instance), False)
9940 except errors.GenericError:
9941 self.cfg.ReleaseDRBDMinors(self.instance.name)
9944 # We have new devices, shutdown the drbd on the old secondary
9945 for idx, dev in enumerate(self.instance.disks):
9946 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9947 self.cfg.SetDiskID(dev, self.target_node)
9948 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9950 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9951 "node: %s" % (idx, msg),
9952 hint=("Please cleanup this device manually as"
9953 " soon as possible"))
9955 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9956 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
9957 self.instance.disks)[pnode]
9959 msg = result.fail_msg
9961 # detaches didn't succeed (unlikely)
9962 self.cfg.ReleaseDRBDMinors(self.instance.name)
9963 raise errors.OpExecError("Can't detach the disks from the network on"
9964 " old node: %s" % (msg,))
9966 # if we managed to detach at least one, we update all the disks of
9967 # the instance to point to the new secondary
9968 self.lu.LogInfo("Updating instance configuration")
9969 for dev, _, new_logical_id in iv_names.itervalues():
9970 dev.logical_id = new_logical_id
9971 self.cfg.SetDiskID(dev, self.instance.primary_node)
9973 self.cfg.Update(self.instance, feedback_fn)
9975 # and now perform the drbd attach
9976 self.lu.LogInfo("Attaching primary drbds to new secondary"
9977 " (standalone => connected)")
9978 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9980 self.node_secondary_ip,
9981 self.instance.disks,
9984 for to_node, to_result in result.items():
9985 msg = to_result.fail_msg
9987 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9989 hint=("please do a gnt-instance info to see the"
9990 " status of disks"))
9992 if self.early_release:
9993 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9995 self._RemoveOldStorage(self.target_node, iv_names)
9996 # WARNING: we release all node locks here, do not do other RPCs
9997 # than WaitForSync to the primary node
9998 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9999 names=[self.instance.primary_node,
10004 # This can fail as the old devices are degraded and _WaitForSync
10005 # does a combined result over all disks, so we don't check its return value
10006 self.lu.LogStep(cstep, steps_total, "Sync devices")
10008 _WaitForSync(self.lu, self.instance)
10010 # Check all devices manually
10011 self._CheckDevices(self.instance.primary_node, iv_names)
10013 # Step: remove old storage
10014 if not self.early_release:
10015 self.lu.LogStep(cstep, steps_total, "Removing old storage")
10016 self._RemoveOldStorage(self.target_node, iv_names)
10019 class LURepairNodeStorage(NoHooksLU):
10020 """Repairs the volume group on a node.
10025 def CheckArguments(self):
10026 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10028 storage_type = self.op.storage_type
10030 if (constants.SO_FIX_CONSISTENCY not in
10031 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10032 raise errors.OpPrereqError("Storage units of type '%s' can not be"
10033 " repaired" % storage_type,
10034 errors.ECODE_INVAL)
10036 def ExpandNames(self):
10037 self.needed_locks = {
10038 locking.LEVEL_NODE: [self.op.node_name],
10041 def _CheckFaultyDisks(self, instance, node_name):
10042 """Ensure faulty disks abort the opcode or at least warn."""
10044 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10046 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10047 " node '%s'" % (instance.name, node_name),
10048 errors.ECODE_STATE)
10049 except errors.OpPrereqError, err:
10050 if self.op.ignore_consistency:
10051 self.proc.LogWarning(str(err.args[0]))
10055 def CheckPrereq(self):
10056 """Check prerequisites.
10059 # Check whether any instance on this node has faulty disks
10060 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10061 if not inst.admin_up:
10063 check_nodes = set(inst.all_nodes)
10064 check_nodes.discard(self.op.node_name)
10065 for inst_node_name in check_nodes:
10066 self._CheckFaultyDisks(inst, inst_node_name)
10068 def Exec(self, feedback_fn):
10069 feedback_fn("Repairing storage unit '%s' on %s ..." %
10070 (self.op.name, self.op.node_name))
10072 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10073 result = self.rpc.call_storage_execute(self.op.node_name,
10074 self.op.storage_type, st_args,
10076 constants.SO_FIX_CONSISTENCY)
10077 result.Raise("Failed to repair storage unit '%s' on %s" %
10078 (self.op.name, self.op.node_name))
10081 class LUNodeEvacuate(NoHooksLU):
10082 """Evacuates instances off a list of nodes.
10087 def CheckArguments(self):
10088 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10090 def ExpandNames(self):
10091 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10093 if self.op.remote_node is not None:
10094 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10095 assert self.op.remote_node
10097 if self.op.remote_node == self.op.node_name:
10098 raise errors.OpPrereqError("Can not use evacuated node as a new"
10099 " secondary node", errors.ECODE_INVAL)
10101 if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10102 raise errors.OpPrereqError("Without the use of an iallocator only"
10103 " secondary instances can be evacuated",
10104 errors.ECODE_INVAL)
10107 self.share_locks = _ShareAll()
10108 self.needed_locks = {
10109 locking.LEVEL_INSTANCE: [],
10110 locking.LEVEL_NODEGROUP: [],
10111 locking.LEVEL_NODE: [],
10114 if self.op.remote_node is None:
10115 # Iallocator will choose any node(s) in the same group
10116 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10118 group_nodes = frozenset([self.op.remote_node])
10120 # Determine nodes to be locked
10121 self.lock_nodes = set([self.op.node_name]) | group_nodes
10123 def _DetermineInstances(self):
10124 """Builds list of instances to operate on.
10127 assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10129 if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10130 # Primary instances only
10131 inst_fn = _GetNodePrimaryInstances
10132 assert self.op.remote_node is None, \
10133 "Evacuating primary instances requires iallocator"
10134 elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10135 # Secondary instances only
10136 inst_fn = _GetNodeSecondaryInstances
10139 assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10140 inst_fn = _GetNodeInstances
10142 return inst_fn(self.cfg, self.op.node_name)
10144 def DeclareLocks(self, level):
10145 if level == locking.LEVEL_INSTANCE:
10146 # Lock instances optimistically, needs verification once node and group
10147 # locks have been acquired
10148 self.needed_locks[locking.LEVEL_INSTANCE] = \
10149 set(i.name for i in self._DetermineInstances())
10151 elif level == locking.LEVEL_NODEGROUP:
10152 # Lock node groups optimistically, needs verification once nodes have
10154 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10155 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10157 elif level == locking.LEVEL_NODE:
10158 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10160 def CheckPrereq(self):
10162 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10163 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10164 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10166 assert owned_nodes == self.lock_nodes
10168 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10169 if owned_groups != wanted_groups:
10170 raise errors.OpExecError("Node groups changed since locks were acquired,"
10171 " current groups are '%s', used to be '%s'" %
10172 (utils.CommaJoin(wanted_groups),
10173 utils.CommaJoin(owned_groups)))
10175 # Determine affected instances
10176 self.instances = self._DetermineInstances()
10177 self.instance_names = [i.name for i in self.instances]
10179 if set(self.instance_names) != owned_instances:
10180 raise errors.OpExecError("Instances on node '%s' changed since locks"
10181 " were acquired, current instances are '%s',"
10182 " used to be '%s'" %
10183 (self.op.node_name,
10184 utils.CommaJoin(self.instance_names),
10185 utils.CommaJoin(owned_instances)))
10187 if self.instance_names:
10188 self.LogInfo("Evacuating instances from node '%s': %s",
10190 utils.CommaJoin(utils.NiceSort(self.instance_names)))
10192 self.LogInfo("No instances to evacuate from node '%s'",
10195 if self.op.remote_node is not None:
10196 for i in self.instances:
10197 if i.primary_node == self.op.remote_node:
10198 raise errors.OpPrereqError("Node %s is the primary node of"
10199 " instance %s, cannot use it as"
10201 (self.op.remote_node, i.name),
10202 errors.ECODE_INVAL)
10204 def Exec(self, feedback_fn):
10205 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10207 if not self.instance_names:
10208 # No instances to evacuate
10211 elif self.op.iallocator is not None:
10212 # TODO: Implement relocation to other group
10213 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10214 evac_mode=self.op.mode,
10215 instances=list(self.instance_names))
10217 ial.Run(self.op.iallocator)
10219 if not ial.success:
10220 raise errors.OpPrereqError("Can't compute node evacuation using"
10221 " iallocator '%s': %s" %
10222 (self.op.iallocator, ial.info),
10223 errors.ECODE_NORES)
10225 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10227 elif self.op.remote_node is not None:
10228 assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10230 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10231 remote_node=self.op.remote_node,
10233 mode=constants.REPLACE_DISK_CHG,
10234 early_release=self.op.early_release)]
10235 for instance_name in self.instance_names
10239 raise errors.ProgrammerError("No iallocator or remote node")
10241 return ResultWithJobs(jobs)
10244 def _SetOpEarlyRelease(early_release, op):
10245 """Sets C{early_release} flag on opcodes if available.
10249 op.early_release = early_release
10250 except AttributeError:
10251 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10256 def _NodeEvacDest(use_nodes, group, nodes):
10257 """Returns group or nodes depending on caller's choice.
10261 return utils.CommaJoin(nodes)
10266 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10267 """Unpacks the result of change-group and node-evacuate iallocator requests.
10269 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10270 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10272 @type lu: L{LogicalUnit}
10273 @param lu: Logical unit instance
10274 @type alloc_result: tuple/list
10275 @param alloc_result: Result from iallocator
10276 @type early_release: bool
10277 @param early_release: Whether to release locks early if possible
10278 @type use_nodes: bool
10279 @param use_nodes: Whether to display node names instead of groups
10282 (moved, failed, jobs) = alloc_result
10285 lu.LogWarning("Unable to evacuate instances %s",
10286 utils.CommaJoin("%s (%s)" % (name, reason)
10287 for (name, reason) in failed))
10290 lu.LogInfo("Instances to be moved: %s",
10291 utils.CommaJoin("%s (to %s)" %
10292 (name, _NodeEvacDest(use_nodes, group, nodes))
10293 for (name, group, nodes) in moved))
10295 return [map(compat.partial(_SetOpEarlyRelease, early_release),
10296 map(opcodes.OpCode.LoadOpCode, ops))
10300 class LUInstanceGrowDisk(LogicalUnit):
10301 """Grow a disk of an instance.
10304 HPATH = "disk-grow"
10305 HTYPE = constants.HTYPE_INSTANCE
10308 def ExpandNames(self):
10309 self._ExpandAndLockInstance()
10310 self.needed_locks[locking.LEVEL_NODE] = []
10311 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10313 def DeclareLocks(self, level):
10314 if level == locking.LEVEL_NODE:
10315 self._LockInstancesNodes()
10317 def BuildHooksEnv(self):
10318 """Build hooks env.
10320 This runs on the master, the primary and all the secondaries.
10324 "DISK": self.op.disk,
10325 "AMOUNT": self.op.amount,
10327 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10330 def BuildHooksNodes(self):
10331 """Build hooks nodes.
10334 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10337 def CheckPrereq(self):
10338 """Check prerequisites.
10340 This checks that the instance is in the cluster.
10343 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10344 assert instance is not None, \
10345 "Cannot retrieve locked instance %s" % self.op.instance_name
10346 nodenames = list(instance.all_nodes)
10347 for node in nodenames:
10348 _CheckNodeOnline(self, node)
10350 self.instance = instance
10352 if instance.disk_template not in constants.DTS_GROWABLE:
10353 raise errors.OpPrereqError("Instance's disk layout does not support"
10354 " growing", errors.ECODE_INVAL)
10356 self.disk = instance.FindDisk(self.op.disk)
10358 if instance.disk_template not in (constants.DT_FILE,
10359 constants.DT_SHARED_FILE):
10360 # TODO: check the free disk space for file, when that feature will be
10362 _CheckNodesFreeDiskPerVG(self, nodenames,
10363 self.disk.ComputeGrowth(self.op.amount))
10365 def Exec(self, feedback_fn):
10366 """Execute disk grow.
10369 instance = self.instance
10372 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10374 raise errors.OpExecError("Cannot activate block device to grow")
10376 # First run all grow ops in dry-run mode
10377 for node in instance.all_nodes:
10378 self.cfg.SetDiskID(disk, node)
10379 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10380 result.Raise("Grow request failed to node %s" % node)
10382 # We know that (as far as we can test) operations across different
10383 # nodes will succeed, time to run it for real
10384 for node in instance.all_nodes:
10385 self.cfg.SetDiskID(disk, node)
10386 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10387 result.Raise("Grow request failed to node %s" % node)
10389 # TODO: Rewrite code to work properly
10390 # DRBD goes into sync mode for a short amount of time after executing the
10391 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10392 # calling "resize" in sync mode fails. Sleeping for a short amount of
10393 # time is a work-around.
10396 disk.RecordGrow(self.op.amount)
10397 self.cfg.Update(instance, feedback_fn)
10398 if self.op.wait_for_sync:
10399 disk_abort = not _WaitForSync(self, instance, disks=[disk])
10401 self.proc.LogWarning("Disk sync-ing has not returned a good"
10402 " status; please check the instance")
10403 if not instance.admin_up:
10404 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10405 elif not instance.admin_up:
10406 self.proc.LogWarning("Not shutting down the disk even if the instance is"
10407 " not supposed to be running because no wait for"
10408 " sync mode was requested")
10411 class LUInstanceQueryData(NoHooksLU):
10412 """Query runtime instance data.
10417 def ExpandNames(self):
10418 self.needed_locks = {}
10420 # Use locking if requested or when non-static information is wanted
10421 if not (self.op.static or self.op.use_locking):
10422 self.LogWarning("Non-static data requested, locks need to be acquired")
10423 self.op.use_locking = True
10425 if self.op.instances or not self.op.use_locking:
10426 # Expand instance names right here
10427 self.wanted_names = _GetWantedInstances(self, self.op.instances)
10429 # Will use acquired locks
10430 self.wanted_names = None
10432 if self.op.use_locking:
10433 self.share_locks = _ShareAll()
10435 if self.wanted_names is None:
10436 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10438 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10440 self.needed_locks[locking.LEVEL_NODE] = []
10441 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10443 def DeclareLocks(self, level):
10444 if self.op.use_locking and level == locking.LEVEL_NODE:
10445 self._LockInstancesNodes()
10447 def CheckPrereq(self):
10448 """Check prerequisites.
10450 This only checks the optional instance list against the existing names.
10453 if self.wanted_names is None:
10454 assert self.op.use_locking, "Locking was not used"
10455 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10457 self.wanted_instances = \
10458 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10460 def _ComputeBlockdevStatus(self, node, instance_name, dev):
10461 """Returns the status of a block device
10464 if self.op.static or not node:
10467 self.cfg.SetDiskID(dev, node)
10469 result = self.rpc.call_blockdev_find(node, dev)
10473 result.Raise("Can't compute disk status for %s" % instance_name)
10475 status = result.payload
10479 return (status.dev_path, status.major, status.minor,
10480 status.sync_percent, status.estimated_time,
10481 status.is_degraded, status.ldisk_status)
10483 def _ComputeDiskStatus(self, instance, snode, dev):
10484 """Compute block device status.
10487 if dev.dev_type in constants.LDS_DRBD:
10488 # we change the snode then (otherwise we use the one passed in)
10489 if dev.logical_id[0] == instance.primary_node:
10490 snode = dev.logical_id[1]
10492 snode = dev.logical_id[0]
10494 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10495 instance.name, dev)
10496 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10499 dev_children = map(compat.partial(self._ComputeDiskStatus,
10506 "iv_name": dev.iv_name,
10507 "dev_type": dev.dev_type,
10508 "logical_id": dev.logical_id,
10509 "physical_id": dev.physical_id,
10510 "pstatus": dev_pstatus,
10511 "sstatus": dev_sstatus,
10512 "children": dev_children,
10517 def Exec(self, feedback_fn):
10518 """Gather and return data"""
10521 cluster = self.cfg.GetClusterInfo()
10523 pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10524 for i in self.wanted_instances)
10525 for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10526 if self.op.static or pnode.offline:
10527 remote_state = None
10529 self.LogWarning("Primary node %s is marked offline, returning static"
10530 " information only for instance %s" %
10531 (pnode.name, instance.name))
10533 remote_info = self.rpc.call_instance_info(instance.primary_node,
10535 instance.hypervisor)
10536 remote_info.Raise("Error checking node %s" % instance.primary_node)
10537 remote_info = remote_info.payload
10538 if remote_info and "state" in remote_info:
10539 remote_state = "up"
10541 remote_state = "down"
10543 if instance.admin_up:
10544 config_state = "up"
10546 config_state = "down"
10548 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10551 result[instance.name] = {
10552 "name": instance.name,
10553 "config_state": config_state,
10554 "run_state": remote_state,
10555 "pnode": instance.primary_node,
10556 "snodes": instance.secondary_nodes,
10558 # this happens to be the same format used for hooks
10559 "nics": _NICListToTuple(self, instance.nics),
10560 "disk_template": instance.disk_template,
10562 "hypervisor": instance.hypervisor,
10563 "network_port": instance.network_port,
10564 "hv_instance": instance.hvparams,
10565 "hv_actual": cluster.FillHV(instance, skip_globals=True),
10566 "be_instance": instance.beparams,
10567 "be_actual": cluster.FillBE(instance),
10568 "os_instance": instance.osparams,
10569 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10570 "serial_no": instance.serial_no,
10571 "mtime": instance.mtime,
10572 "ctime": instance.ctime,
10573 "uuid": instance.uuid,
10579 class LUInstanceSetParams(LogicalUnit):
10580 """Modifies an instances's parameters.
10583 HPATH = "instance-modify"
10584 HTYPE = constants.HTYPE_INSTANCE
10587 def CheckArguments(self):
10588 if not (self.op.nics or self.op.disks or self.op.disk_template or
10589 self.op.hvparams or self.op.beparams or self.op.os_name):
10590 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10592 if self.op.hvparams:
10593 _CheckGlobalHvParams(self.op.hvparams)
10597 for disk_op, disk_dict in self.op.disks:
10598 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10599 if disk_op == constants.DDM_REMOVE:
10600 disk_addremove += 1
10602 elif disk_op == constants.DDM_ADD:
10603 disk_addremove += 1
10605 if not isinstance(disk_op, int):
10606 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10607 if not isinstance(disk_dict, dict):
10608 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10609 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10611 if disk_op == constants.DDM_ADD:
10612 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10613 if mode not in constants.DISK_ACCESS_SET:
10614 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10615 errors.ECODE_INVAL)
10616 size = disk_dict.get(constants.IDISK_SIZE, None)
10618 raise errors.OpPrereqError("Required disk parameter size missing",
10619 errors.ECODE_INVAL)
10622 except (TypeError, ValueError), err:
10623 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10624 str(err), errors.ECODE_INVAL)
10625 disk_dict[constants.IDISK_SIZE] = size
10627 # modification of disk
10628 if constants.IDISK_SIZE in disk_dict:
10629 raise errors.OpPrereqError("Disk size change not possible, use"
10630 " grow-disk", errors.ECODE_INVAL)
10632 if disk_addremove > 1:
10633 raise errors.OpPrereqError("Only one disk add or remove operation"
10634 " supported at a time", errors.ECODE_INVAL)
10636 if self.op.disks and self.op.disk_template is not None:
10637 raise errors.OpPrereqError("Disk template conversion and other disk"
10638 " changes not supported at the same time",
10639 errors.ECODE_INVAL)
10641 if (self.op.disk_template and
10642 self.op.disk_template in constants.DTS_INT_MIRROR and
10643 self.op.remote_node is None):
10644 raise errors.OpPrereqError("Changing the disk template to a mirrored"
10645 " one requires specifying a secondary node",
10646 errors.ECODE_INVAL)
10650 for nic_op, nic_dict in self.op.nics:
10651 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10652 if nic_op == constants.DDM_REMOVE:
10655 elif nic_op == constants.DDM_ADD:
10658 if not isinstance(nic_op, int):
10659 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10660 if not isinstance(nic_dict, dict):
10661 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10662 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10664 # nic_dict should be a dict
10665 nic_ip = nic_dict.get(constants.INIC_IP, None)
10666 if nic_ip is not None:
10667 if nic_ip.lower() == constants.VALUE_NONE:
10668 nic_dict[constants.INIC_IP] = None
10670 if not netutils.IPAddress.IsValid(nic_ip):
10671 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10672 errors.ECODE_INVAL)
10674 nic_bridge = nic_dict.get("bridge", None)
10675 nic_link = nic_dict.get(constants.INIC_LINK, None)
10676 if nic_bridge and nic_link:
10677 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10678 " at the same time", errors.ECODE_INVAL)
10679 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10680 nic_dict["bridge"] = None
10681 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10682 nic_dict[constants.INIC_LINK] = None
10684 if nic_op == constants.DDM_ADD:
10685 nic_mac = nic_dict.get(constants.INIC_MAC, None)
10686 if nic_mac is None:
10687 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10689 if constants.INIC_MAC in nic_dict:
10690 nic_mac = nic_dict[constants.INIC_MAC]
10691 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10692 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10694 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10695 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10696 " modifying an existing nic",
10697 errors.ECODE_INVAL)
10699 if nic_addremove > 1:
10700 raise errors.OpPrereqError("Only one NIC add or remove operation"
10701 " supported at a time", errors.ECODE_INVAL)
10703 def ExpandNames(self):
10704 self._ExpandAndLockInstance()
10705 self.needed_locks[locking.LEVEL_NODE] = []
10706 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10708 def DeclareLocks(self, level):
10709 if level == locking.LEVEL_NODE:
10710 self._LockInstancesNodes()
10711 if self.op.disk_template and self.op.remote_node:
10712 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10713 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10715 def BuildHooksEnv(self):
10716 """Build hooks env.
10718 This runs on the master, primary and secondaries.
10722 if constants.BE_MEMORY in self.be_new:
10723 args["memory"] = self.be_new[constants.BE_MEMORY]
10724 if constants.BE_VCPUS in self.be_new:
10725 args["vcpus"] = self.be_new[constants.BE_VCPUS]
10726 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10727 # information at all.
10730 nic_override = dict(self.op.nics)
10731 for idx, nic in enumerate(self.instance.nics):
10732 if idx in nic_override:
10733 this_nic_override = nic_override[idx]
10735 this_nic_override = {}
10736 if constants.INIC_IP in this_nic_override:
10737 ip = this_nic_override[constants.INIC_IP]
10740 if constants.INIC_MAC in this_nic_override:
10741 mac = this_nic_override[constants.INIC_MAC]
10744 if idx in self.nic_pnew:
10745 nicparams = self.nic_pnew[idx]
10747 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10748 mode = nicparams[constants.NIC_MODE]
10749 link = nicparams[constants.NIC_LINK]
10750 args["nics"].append((ip, mac, mode, link))
10751 if constants.DDM_ADD in nic_override:
10752 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10753 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10754 nicparams = self.nic_pnew[constants.DDM_ADD]
10755 mode = nicparams[constants.NIC_MODE]
10756 link = nicparams[constants.NIC_LINK]
10757 args["nics"].append((ip, mac, mode, link))
10758 elif constants.DDM_REMOVE in nic_override:
10759 del args["nics"][-1]
10761 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10762 if self.op.disk_template:
10763 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10767 def BuildHooksNodes(self):
10768 """Build hooks nodes.
10771 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10774 def CheckPrereq(self):
10775 """Check prerequisites.
10777 This only checks the instance list against the existing names.
10780 # checking the new params on the primary/secondary nodes
10782 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10783 cluster = self.cluster = self.cfg.GetClusterInfo()
10784 assert self.instance is not None, \
10785 "Cannot retrieve locked instance %s" % self.op.instance_name
10786 pnode = instance.primary_node
10787 nodelist = list(instance.all_nodes)
10790 if self.op.os_name and not self.op.force:
10791 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10792 self.op.force_variant)
10793 instance_os = self.op.os_name
10795 instance_os = instance.os
10797 if self.op.disk_template:
10798 if instance.disk_template == self.op.disk_template:
10799 raise errors.OpPrereqError("Instance already has disk template %s" %
10800 instance.disk_template, errors.ECODE_INVAL)
10802 if (instance.disk_template,
10803 self.op.disk_template) not in self._DISK_CONVERSIONS:
10804 raise errors.OpPrereqError("Unsupported disk template conversion from"
10805 " %s to %s" % (instance.disk_template,
10806 self.op.disk_template),
10807 errors.ECODE_INVAL)
10808 _CheckInstanceDown(self, instance, "cannot change disk template")
10809 if self.op.disk_template in constants.DTS_INT_MIRROR:
10810 if self.op.remote_node == pnode:
10811 raise errors.OpPrereqError("Given new secondary node %s is the same"
10812 " as the primary node of the instance" %
10813 self.op.remote_node, errors.ECODE_STATE)
10814 _CheckNodeOnline(self, self.op.remote_node)
10815 _CheckNodeNotDrained(self, self.op.remote_node)
10816 # FIXME: here we assume that the old instance type is DT_PLAIN
10817 assert instance.disk_template == constants.DT_PLAIN
10818 disks = [{constants.IDISK_SIZE: d.size,
10819 constants.IDISK_VG: d.logical_id[0]}
10820 for d in instance.disks]
10821 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10822 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10824 # hvparams processing
10825 if self.op.hvparams:
10826 hv_type = instance.hypervisor
10827 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10828 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10829 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10832 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10833 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10834 self.hv_new = hv_new # the new actual values
10835 self.hv_inst = i_hvdict # the new dict (without defaults)
10837 self.hv_new = self.hv_inst = {}
10839 # beparams processing
10840 if self.op.beparams:
10841 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10843 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10844 be_new = cluster.SimpleFillBE(i_bedict)
10845 self.be_new = be_new # the new actual values
10846 self.be_inst = i_bedict # the new dict (without defaults)
10848 self.be_new = self.be_inst = {}
10849 be_old = cluster.FillBE(instance)
10851 # osparams processing
10852 if self.op.osparams:
10853 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10854 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10855 self.os_inst = i_osdict # the new dict (without defaults)
10861 if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10862 be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10863 mem_check_list = [pnode]
10864 if be_new[constants.BE_AUTO_BALANCE]:
10865 # either we changed auto_balance to yes or it was from before
10866 mem_check_list.extend(instance.secondary_nodes)
10867 instance_info = self.rpc.call_instance_info(pnode, instance.name,
10868 instance.hypervisor)
10869 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10870 instance.hypervisor)
10871 pninfo = nodeinfo[pnode]
10872 msg = pninfo.fail_msg
10874 # Assume the primary node is unreachable and go ahead
10875 self.warn.append("Can't get info from primary node %s: %s" %
10877 elif not isinstance(pninfo.payload.get("memory_free", None), int):
10878 self.warn.append("Node data from primary node %s doesn't contain"
10879 " free memory information" % pnode)
10880 elif instance_info.fail_msg:
10881 self.warn.append("Can't get instance runtime information: %s" %
10882 instance_info.fail_msg)
10884 if instance_info.payload:
10885 current_mem = int(instance_info.payload["memory"])
10887 # Assume instance not running
10888 # (there is a slight race condition here, but it's not very probable,
10889 # and we have no other way to check)
10891 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10892 pninfo.payload["memory_free"])
10894 raise errors.OpPrereqError("This change will prevent the instance"
10895 " from starting, due to %d MB of memory"
10896 " missing on its primary node" % miss_mem,
10897 errors.ECODE_NORES)
10899 if be_new[constants.BE_AUTO_BALANCE]:
10900 for node, nres in nodeinfo.items():
10901 if node not in instance.secondary_nodes:
10903 nres.Raise("Can't get info from secondary node %s" % node,
10904 prereq=True, ecode=errors.ECODE_STATE)
10905 if not isinstance(nres.payload.get("memory_free", None), int):
10906 raise errors.OpPrereqError("Secondary node %s didn't return free"
10907 " memory information" % node,
10908 errors.ECODE_STATE)
10909 elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
10910 raise errors.OpPrereqError("This change will prevent the instance"
10911 " from failover to its secondary node"
10912 " %s, due to not enough memory" % node,
10913 errors.ECODE_STATE)
10917 self.nic_pinst = {}
10918 for nic_op, nic_dict in self.op.nics:
10919 if nic_op == constants.DDM_REMOVE:
10920 if not instance.nics:
10921 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10922 errors.ECODE_INVAL)
10924 if nic_op != constants.DDM_ADD:
10926 if not instance.nics:
10927 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10928 " no NICs" % nic_op,
10929 errors.ECODE_INVAL)
10930 if nic_op < 0 or nic_op >= len(instance.nics):
10931 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10933 (nic_op, len(instance.nics) - 1),
10934 errors.ECODE_INVAL)
10935 old_nic_params = instance.nics[nic_op].nicparams
10936 old_nic_ip = instance.nics[nic_op].ip
10938 old_nic_params = {}
10941 update_params_dict = dict([(key, nic_dict[key])
10942 for key in constants.NICS_PARAMETERS
10943 if key in nic_dict])
10945 if "bridge" in nic_dict:
10946 update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
10948 new_nic_params = _GetUpdatedParams(old_nic_params,
10949 update_params_dict)
10950 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10951 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10952 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10953 self.nic_pinst[nic_op] = new_nic_params
10954 self.nic_pnew[nic_op] = new_filled_nic_params
10955 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10957 if new_nic_mode == constants.NIC_MODE_BRIDGED:
10958 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10959 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10961 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10963 self.warn.append(msg)
10965 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10966 if new_nic_mode == constants.NIC_MODE_ROUTED:
10967 if constants.INIC_IP in nic_dict:
10968 nic_ip = nic_dict[constants.INIC_IP]
10970 nic_ip = old_nic_ip
10972 raise errors.OpPrereqError("Cannot set the nic ip to None"
10973 " on a routed nic", errors.ECODE_INVAL)
10974 if constants.INIC_MAC in nic_dict:
10975 nic_mac = nic_dict[constants.INIC_MAC]
10976 if nic_mac is None:
10977 raise errors.OpPrereqError("Cannot set the nic mac to None",
10978 errors.ECODE_INVAL)
10979 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10980 # otherwise generate the mac
10981 nic_dict[constants.INIC_MAC] = \
10982 self.cfg.GenerateMAC(self.proc.GetECId())
10984 # or validate/reserve the current one
10986 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10987 except errors.ReservationError:
10988 raise errors.OpPrereqError("MAC address %s already in use"
10989 " in cluster" % nic_mac,
10990 errors.ECODE_NOTUNIQUE)
10993 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10994 raise errors.OpPrereqError("Disk operations not supported for"
10995 " diskless instances",
10996 errors.ECODE_INVAL)
10997 for disk_op, _ in self.op.disks:
10998 if disk_op == constants.DDM_REMOVE:
10999 if len(instance.disks) == 1:
11000 raise errors.OpPrereqError("Cannot remove the last disk of"
11001 " an instance", errors.ECODE_INVAL)
11002 _CheckInstanceDown(self, instance, "cannot remove disks")
11004 if (disk_op == constants.DDM_ADD and
11005 len(instance.disks) >= constants.MAX_DISKS):
11006 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11007 " add more" % constants.MAX_DISKS,
11008 errors.ECODE_STATE)
11009 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11011 if disk_op < 0 or disk_op >= len(instance.disks):
11012 raise errors.OpPrereqError("Invalid disk index %s, valid values"
11014 (disk_op, len(instance.disks)),
11015 errors.ECODE_INVAL)
11019 def _ConvertPlainToDrbd(self, feedback_fn):
11020 """Converts an instance from plain to drbd.
11023 feedback_fn("Converting template to drbd")
11024 instance = self.instance
11025 pnode = instance.primary_node
11026 snode = self.op.remote_node
11028 # create a fake disk info for _GenerateDiskTemplate
11029 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11030 constants.IDISK_VG: d.logical_id[0]}
11031 for d in instance.disks]
11032 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11033 instance.name, pnode, [snode],
11034 disk_info, None, None, 0, feedback_fn)
11035 info = _GetInstanceInfoText(instance)
11036 feedback_fn("Creating aditional volumes...")
11037 # first, create the missing data and meta devices
11038 for disk in new_disks:
11039 # unfortunately this is... not too nice
11040 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11042 for child in disk.children:
11043 _CreateSingleBlockDev(self, snode, instance, child, info, True)
11044 # at this stage, all new LVs have been created, we can rename the
11046 feedback_fn("Renaming original volumes...")
11047 rename_list = [(o, n.children[0].logical_id)
11048 for (o, n) in zip(instance.disks, new_disks)]
11049 result = self.rpc.call_blockdev_rename(pnode, rename_list)
11050 result.Raise("Failed to rename original LVs")
11052 feedback_fn("Initializing DRBD devices...")
11053 # all child devices are in place, we can now create the DRBD devices
11054 for disk in new_disks:
11055 for node in [pnode, snode]:
11056 f_create = node == pnode
11057 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11059 # at this point, the instance has been modified
11060 instance.disk_template = constants.DT_DRBD8
11061 instance.disks = new_disks
11062 self.cfg.Update(instance, feedback_fn)
11064 # disks are created, waiting for sync
11065 disk_abort = not _WaitForSync(self, instance,
11066 oneshot=not self.op.wait_for_sync)
11068 raise errors.OpExecError("There are some degraded disks for"
11069 " this instance, please cleanup manually")
11071 def _ConvertDrbdToPlain(self, feedback_fn):
11072 """Converts an instance from drbd to plain.
11075 instance = self.instance
11076 assert len(instance.secondary_nodes) == 1
11077 pnode = instance.primary_node
11078 snode = instance.secondary_nodes[0]
11079 feedback_fn("Converting template to plain")
11081 old_disks = instance.disks
11082 new_disks = [d.children[0] for d in old_disks]
11084 # copy over size and mode
11085 for parent, child in zip(old_disks, new_disks):
11086 child.size = parent.size
11087 child.mode = parent.mode
11089 # update instance structure
11090 instance.disks = new_disks
11091 instance.disk_template = constants.DT_PLAIN
11092 self.cfg.Update(instance, feedback_fn)
11094 feedback_fn("Removing volumes on the secondary node...")
11095 for disk in old_disks:
11096 self.cfg.SetDiskID(disk, snode)
11097 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11099 self.LogWarning("Could not remove block device %s on node %s,"
11100 " continuing anyway: %s", disk.iv_name, snode, msg)
11102 feedback_fn("Removing unneeded volumes on the primary node...")
11103 for idx, disk in enumerate(old_disks):
11104 meta = disk.children[1]
11105 self.cfg.SetDiskID(meta, pnode)
11106 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11108 self.LogWarning("Could not remove metadata for disk %d on node %s,"
11109 " continuing anyway: %s", idx, pnode, msg)
11111 def Exec(self, feedback_fn):
11112 """Modifies an instance.
11114 All parameters take effect only at the next restart of the instance.
11117 # Process here the warnings from CheckPrereq, as we don't have a
11118 # feedback_fn there.
11119 for warn in self.warn:
11120 feedback_fn("WARNING: %s" % warn)
11123 instance = self.instance
11125 for disk_op, disk_dict in self.op.disks:
11126 if disk_op == constants.DDM_REMOVE:
11127 # remove the last disk
11128 device = instance.disks.pop()
11129 device_idx = len(instance.disks)
11130 for node, disk in device.ComputeNodeTree(instance.primary_node):
11131 self.cfg.SetDiskID(disk, node)
11132 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11134 self.LogWarning("Could not remove disk/%d on node %s: %s,"
11135 " continuing anyway", device_idx, node, msg)
11136 result.append(("disk/%d" % device_idx, "remove"))
11137 elif disk_op == constants.DDM_ADD:
11139 if instance.disk_template in (constants.DT_FILE,
11140 constants.DT_SHARED_FILE):
11141 file_driver, file_path = instance.disks[0].logical_id
11142 file_path = os.path.dirname(file_path)
11144 file_driver = file_path = None
11145 disk_idx_base = len(instance.disks)
11146 new_disk = _GenerateDiskTemplate(self,
11147 instance.disk_template,
11148 instance.name, instance.primary_node,
11149 instance.secondary_nodes,
11153 disk_idx_base, feedback_fn)[0]
11154 instance.disks.append(new_disk)
11155 info = _GetInstanceInfoText(instance)
11157 logging.info("Creating volume %s for instance %s",
11158 new_disk.iv_name, instance.name)
11159 # Note: this needs to be kept in sync with _CreateDisks
11161 for node in instance.all_nodes:
11162 f_create = node == instance.primary_node
11164 _CreateBlockDev(self, node, instance, new_disk,
11165 f_create, info, f_create)
11166 except errors.OpExecError, err:
11167 self.LogWarning("Failed to create volume %s (%s) on"
11169 new_disk.iv_name, new_disk, node, err)
11170 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11171 (new_disk.size, new_disk.mode)))
11173 # change a given disk
11174 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11175 result.append(("disk.mode/%d" % disk_op,
11176 disk_dict[constants.IDISK_MODE]))
11178 if self.op.disk_template:
11179 r_shut = _ShutdownInstanceDisks(self, instance)
11181 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11182 " proceed with disk template conversion")
11183 mode = (instance.disk_template, self.op.disk_template)
11185 self._DISK_CONVERSIONS[mode](self, feedback_fn)
11187 self.cfg.ReleaseDRBDMinors(instance.name)
11189 result.append(("disk_template", self.op.disk_template))
11192 for nic_op, nic_dict in self.op.nics:
11193 if nic_op == constants.DDM_REMOVE:
11194 # remove the last nic
11195 del instance.nics[-1]
11196 result.append(("nic.%d" % len(instance.nics), "remove"))
11197 elif nic_op == constants.DDM_ADD:
11198 # mac and bridge should be set, by now
11199 mac = nic_dict[constants.INIC_MAC]
11200 ip = nic_dict.get(constants.INIC_IP, None)
11201 nicparams = self.nic_pinst[constants.DDM_ADD]
11202 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11203 instance.nics.append(new_nic)
11204 result.append(("nic.%d" % (len(instance.nics) - 1),
11205 "add:mac=%s,ip=%s,mode=%s,link=%s" %
11206 (new_nic.mac, new_nic.ip,
11207 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11208 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11211 for key in (constants.INIC_MAC, constants.INIC_IP):
11212 if key in nic_dict:
11213 setattr(instance.nics[nic_op], key, nic_dict[key])
11214 if nic_op in self.nic_pinst:
11215 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11216 for key, val in nic_dict.iteritems():
11217 result.append(("nic.%s/%d" % (key, nic_op), val))
11220 if self.op.hvparams:
11221 instance.hvparams = self.hv_inst
11222 for key, val in self.op.hvparams.iteritems():
11223 result.append(("hv/%s" % key, val))
11226 if self.op.beparams:
11227 instance.beparams = self.be_inst
11228 for key, val in self.op.beparams.iteritems():
11229 result.append(("be/%s" % key, val))
11232 if self.op.os_name:
11233 instance.os = self.op.os_name
11236 if self.op.osparams:
11237 instance.osparams = self.os_inst
11238 for key, val in self.op.osparams.iteritems():
11239 result.append(("os/%s" % key, val))
11241 self.cfg.Update(instance, feedback_fn)
11245 _DISK_CONVERSIONS = {
11246 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11247 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11251 class LUInstanceChangeGroup(LogicalUnit):
11252 HPATH = "instance-change-group"
11253 HTYPE = constants.HTYPE_INSTANCE
11256 def ExpandNames(self):
11257 self.share_locks = _ShareAll()
11258 self.needed_locks = {
11259 locking.LEVEL_NODEGROUP: [],
11260 locking.LEVEL_NODE: [],
11263 self._ExpandAndLockInstance()
11265 if self.op.target_groups:
11266 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11267 self.op.target_groups)
11269 self.req_target_uuids = None
11271 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11273 def DeclareLocks(self, level):
11274 if level == locking.LEVEL_NODEGROUP:
11275 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11277 if self.req_target_uuids:
11278 lock_groups = set(self.req_target_uuids)
11280 # Lock all groups used by instance optimistically; this requires going
11281 # via the node before it's locked, requiring verification later on
11282 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11283 lock_groups.update(instance_groups)
11285 # No target groups, need to lock all of them
11286 lock_groups = locking.ALL_SET
11288 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11290 elif level == locking.LEVEL_NODE:
11291 if self.req_target_uuids:
11292 # Lock all nodes used by instances
11293 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11294 self._LockInstancesNodes()
11296 # Lock all nodes in all potential target groups
11297 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11298 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11299 member_nodes = [node_name
11300 for group in lock_groups
11301 for node_name in self.cfg.GetNodeGroup(group).members]
11302 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11304 # Lock all nodes as all groups are potential targets
11305 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11307 def CheckPrereq(self):
11308 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11309 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11310 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11312 assert (self.req_target_uuids is None or
11313 owned_groups.issuperset(self.req_target_uuids))
11314 assert owned_instances == set([self.op.instance_name])
11316 # Get instance information
11317 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11319 # Check if node groups for locked instance are still correct
11320 assert owned_nodes.issuperset(self.instance.all_nodes), \
11321 ("Instance %s's nodes changed while we kept the lock" %
11322 self.op.instance_name)
11324 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11327 if self.req_target_uuids:
11328 # User requested specific target groups
11329 self.target_uuids = self.req_target_uuids
11331 # All groups except those used by the instance are potential targets
11332 self.target_uuids = owned_groups - inst_groups
11334 conflicting_groups = self.target_uuids & inst_groups
11335 if conflicting_groups:
11336 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11337 " used by the instance '%s'" %
11338 (utils.CommaJoin(conflicting_groups),
11339 self.op.instance_name),
11340 errors.ECODE_INVAL)
11342 if not self.target_uuids:
11343 raise errors.OpPrereqError("There are no possible target groups",
11344 errors.ECODE_INVAL)
11346 def BuildHooksEnv(self):
11347 """Build hooks env.
11350 assert self.target_uuids
11353 "TARGET_GROUPS": " ".join(self.target_uuids),
11356 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11360 def BuildHooksNodes(self):
11361 """Build hooks nodes.
11364 mn = self.cfg.GetMasterNode()
11365 return ([mn], [mn])
11367 def Exec(self, feedback_fn):
11368 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11370 assert instances == [self.op.instance_name], "Instance not locked"
11372 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11373 instances=instances, target_groups=list(self.target_uuids))
11375 ial.Run(self.op.iallocator)
11377 if not ial.success:
11378 raise errors.OpPrereqError("Can't compute solution for changing group of"
11379 " instance '%s' using iallocator '%s': %s" %
11380 (self.op.instance_name, self.op.iallocator,
11382 errors.ECODE_NORES)
11384 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11386 self.LogInfo("Iallocator returned %s job(s) for changing group of"
11387 " instance '%s'", len(jobs), self.op.instance_name)
11389 return ResultWithJobs(jobs)
11392 class LUBackupQuery(NoHooksLU):
11393 """Query the exports list
11398 def ExpandNames(self):
11399 self.needed_locks = {}
11400 self.share_locks[locking.LEVEL_NODE] = 1
11401 if not self.op.nodes:
11402 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11404 self.needed_locks[locking.LEVEL_NODE] = \
11405 _GetWantedNodes(self, self.op.nodes)
11407 def Exec(self, feedback_fn):
11408 """Compute the list of all the exported system images.
11411 @return: a dictionary with the structure node->(export-list)
11412 where export-list is a list of the instances exported on
11416 self.nodes = self.owned_locks(locking.LEVEL_NODE)
11417 rpcresult = self.rpc.call_export_list(self.nodes)
11419 for node in rpcresult:
11420 if rpcresult[node].fail_msg:
11421 result[node] = False
11423 result[node] = rpcresult[node].payload
11428 class LUBackupPrepare(NoHooksLU):
11429 """Prepares an instance for an export and returns useful information.
11434 def ExpandNames(self):
11435 self._ExpandAndLockInstance()
11437 def CheckPrereq(self):
11438 """Check prerequisites.
11441 instance_name = self.op.instance_name
11443 self.instance = self.cfg.GetInstanceInfo(instance_name)
11444 assert self.instance is not None, \
11445 "Cannot retrieve locked instance %s" % self.op.instance_name
11446 _CheckNodeOnline(self, self.instance.primary_node)
11448 self._cds = _GetClusterDomainSecret()
11450 def Exec(self, feedback_fn):
11451 """Prepares an instance for an export.
11454 instance = self.instance
11456 if self.op.mode == constants.EXPORT_MODE_REMOTE:
11457 salt = utils.GenerateSecret(8)
11459 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11460 result = self.rpc.call_x509_cert_create(instance.primary_node,
11461 constants.RIE_CERT_VALIDITY)
11462 result.Raise("Can't create X509 key and certificate on %s" % result.node)
11464 (name, cert_pem) = result.payload
11466 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11470 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11471 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11473 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11479 class LUBackupExport(LogicalUnit):
11480 """Export an instance to an image in the cluster.
11483 HPATH = "instance-export"
11484 HTYPE = constants.HTYPE_INSTANCE
11487 def CheckArguments(self):
11488 """Check the arguments.
11491 self.x509_key_name = self.op.x509_key_name
11492 self.dest_x509_ca_pem = self.op.destination_x509_ca
11494 if self.op.mode == constants.EXPORT_MODE_REMOTE:
11495 if not self.x509_key_name:
11496 raise errors.OpPrereqError("Missing X509 key name for encryption",
11497 errors.ECODE_INVAL)
11499 if not self.dest_x509_ca_pem:
11500 raise errors.OpPrereqError("Missing destination X509 CA",
11501 errors.ECODE_INVAL)
11503 def ExpandNames(self):
11504 self._ExpandAndLockInstance()
11506 # Lock all nodes for local exports
11507 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11508 # FIXME: lock only instance primary and destination node
11510 # Sad but true, for now we have do lock all nodes, as we don't know where
11511 # the previous export might be, and in this LU we search for it and
11512 # remove it from its current node. In the future we could fix this by:
11513 # - making a tasklet to search (share-lock all), then create the
11514 # new one, then one to remove, after
11515 # - removing the removal operation altogether
11516 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11518 def DeclareLocks(self, level):
11519 """Last minute lock declaration."""
11520 # All nodes are locked anyway, so nothing to do here.
11522 def BuildHooksEnv(self):
11523 """Build hooks env.
11525 This will run on the master, primary node and target node.
11529 "EXPORT_MODE": self.op.mode,
11530 "EXPORT_NODE": self.op.target_node,
11531 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11532 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11533 # TODO: Generic function for boolean env variables
11534 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11537 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11541 def BuildHooksNodes(self):
11542 """Build hooks nodes.
11545 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11547 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11548 nl.append(self.op.target_node)
11552 def CheckPrereq(self):
11553 """Check prerequisites.
11555 This checks that the instance and node names are valid.
11558 instance_name = self.op.instance_name
11560 self.instance = self.cfg.GetInstanceInfo(instance_name)
11561 assert self.instance is not None, \
11562 "Cannot retrieve locked instance %s" % self.op.instance_name
11563 _CheckNodeOnline(self, self.instance.primary_node)
11565 if (self.op.remove_instance and self.instance.admin_up and
11566 not self.op.shutdown):
11567 raise errors.OpPrereqError("Can not remove instance without shutting it"
11570 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11571 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11572 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11573 assert self.dst_node is not None
11575 _CheckNodeOnline(self, self.dst_node.name)
11576 _CheckNodeNotDrained(self, self.dst_node.name)
11579 self.dest_disk_info = None
11580 self.dest_x509_ca = None
11582 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11583 self.dst_node = None
11585 if len(self.op.target_node) != len(self.instance.disks):
11586 raise errors.OpPrereqError(("Received destination information for %s"
11587 " disks, but instance %s has %s disks") %
11588 (len(self.op.target_node), instance_name,
11589 len(self.instance.disks)),
11590 errors.ECODE_INVAL)
11592 cds = _GetClusterDomainSecret()
11594 # Check X509 key name
11596 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11597 except (TypeError, ValueError), err:
11598 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11600 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11601 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11602 errors.ECODE_INVAL)
11604 # Load and verify CA
11606 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11607 except OpenSSL.crypto.Error, err:
11608 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11609 (err, ), errors.ECODE_INVAL)
11611 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11612 if errcode is not None:
11613 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11614 (msg, ), errors.ECODE_INVAL)
11616 self.dest_x509_ca = cert
11618 # Verify target information
11620 for idx, disk_data in enumerate(self.op.target_node):
11622 (host, port, magic) = \
11623 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11624 except errors.GenericError, err:
11625 raise errors.OpPrereqError("Target info for disk %s: %s" %
11626 (idx, err), errors.ECODE_INVAL)
11628 disk_info.append((host, port, magic))
11630 assert len(disk_info) == len(self.op.target_node)
11631 self.dest_disk_info = disk_info
11634 raise errors.ProgrammerError("Unhandled export mode %r" %
11637 # instance disk type verification
11638 # TODO: Implement export support for file-based disks
11639 for disk in self.instance.disks:
11640 if disk.dev_type == constants.LD_FILE:
11641 raise errors.OpPrereqError("Export not supported for instances with"
11642 " file-based disks", errors.ECODE_INVAL)
11644 def _CleanupExports(self, feedback_fn):
11645 """Removes exports of current instance from all other nodes.
11647 If an instance in a cluster with nodes A..D was exported to node C, its
11648 exports will be removed from the nodes A, B and D.
11651 assert self.op.mode != constants.EXPORT_MODE_REMOTE
11653 nodelist = self.cfg.GetNodeList()
11654 nodelist.remove(self.dst_node.name)
11656 # on one-node clusters nodelist will be empty after the removal
11657 # if we proceed the backup would be removed because OpBackupQuery
11658 # substitutes an empty list with the full cluster node list.
11659 iname = self.instance.name
11661 feedback_fn("Removing old exports for instance %s" % iname)
11662 exportlist = self.rpc.call_export_list(nodelist)
11663 for node in exportlist:
11664 if exportlist[node].fail_msg:
11666 if iname in exportlist[node].payload:
11667 msg = self.rpc.call_export_remove(node, iname).fail_msg
11669 self.LogWarning("Could not remove older export for instance %s"
11670 " on node %s: %s", iname, node, msg)
11672 def Exec(self, feedback_fn):
11673 """Export an instance to an image in the cluster.
11676 assert self.op.mode in constants.EXPORT_MODES
11678 instance = self.instance
11679 src_node = instance.primary_node
11681 if self.op.shutdown:
11682 # shutdown the instance, but not the disks
11683 feedback_fn("Shutting down instance %s" % instance.name)
11684 result = self.rpc.call_instance_shutdown(src_node, instance,
11685 self.op.shutdown_timeout)
11686 # TODO: Maybe ignore failures if ignore_remove_failures is set
11687 result.Raise("Could not shutdown instance %s on"
11688 " node %s" % (instance.name, src_node))
11690 # set the disks ID correctly since call_instance_start needs the
11691 # correct drbd minor to create the symlinks
11692 for disk in instance.disks:
11693 self.cfg.SetDiskID(disk, src_node)
11695 activate_disks = (not instance.admin_up)
11698 # Activate the instance disks if we'exporting a stopped instance
11699 feedback_fn("Activating disks for %s" % instance.name)
11700 _StartInstanceDisks(self, instance, None)
11703 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11706 helper.CreateSnapshots()
11708 if (self.op.shutdown and instance.admin_up and
11709 not self.op.remove_instance):
11710 assert not activate_disks
11711 feedback_fn("Starting instance %s" % instance.name)
11712 result = self.rpc.call_instance_start(src_node, instance,
11714 msg = result.fail_msg
11716 feedback_fn("Failed to start instance: %s" % msg)
11717 _ShutdownInstanceDisks(self, instance)
11718 raise errors.OpExecError("Could not start instance: %s" % msg)
11720 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11721 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11722 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11723 connect_timeout = constants.RIE_CONNECT_TIMEOUT
11724 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11726 (key_name, _, _) = self.x509_key_name
11729 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11732 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11733 key_name, dest_ca_pem,
11738 # Check for backwards compatibility
11739 assert len(dresults) == len(instance.disks)
11740 assert compat.all(isinstance(i, bool) for i in dresults), \
11741 "Not all results are boolean: %r" % dresults
11745 feedback_fn("Deactivating disks for %s" % instance.name)
11746 _ShutdownInstanceDisks(self, instance)
11748 if not (compat.all(dresults) and fin_resu):
11751 failures.append("export finalization")
11752 if not compat.all(dresults):
11753 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11755 failures.append("disk export: disk(s) %s" % fdsk)
11757 raise errors.OpExecError("Export failed, errors in %s" %
11758 utils.CommaJoin(failures))
11760 # At this point, the export was successful, we can cleanup/finish
11762 # Remove instance if requested
11763 if self.op.remove_instance:
11764 feedback_fn("Removing instance %s" % instance.name)
11765 _RemoveInstance(self, feedback_fn, instance,
11766 self.op.ignore_remove_failures)
11768 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11769 self._CleanupExports(feedback_fn)
11771 return fin_resu, dresults
11774 class LUBackupRemove(NoHooksLU):
11775 """Remove exports related to the named instance.
11780 def ExpandNames(self):
11781 self.needed_locks = {}
11782 # We need all nodes to be locked in order for RemoveExport to work, but we
11783 # don't need to lock the instance itself, as nothing will happen to it (and
11784 # we can remove exports also for a removed instance)
11785 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11787 def Exec(self, feedback_fn):
11788 """Remove any export.
11791 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11792 # If the instance was not found we'll try with the name that was passed in.
11793 # This will only work if it was an FQDN, though.
11795 if not instance_name:
11797 instance_name = self.op.instance_name
11799 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11800 exportlist = self.rpc.call_export_list(locked_nodes)
11802 for node in exportlist:
11803 msg = exportlist[node].fail_msg
11805 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11807 if instance_name in exportlist[node].payload:
11809 result = self.rpc.call_export_remove(node, instance_name)
11810 msg = result.fail_msg
11812 logging.error("Could not remove export for instance %s"
11813 " on node %s: %s", instance_name, node, msg)
11815 if fqdn_warn and not found:
11816 feedback_fn("Export not found. If trying to remove an export belonging"
11817 " to a deleted instance please use its Fully Qualified"
11821 class LUGroupAdd(LogicalUnit):
11822 """Logical unit for creating node groups.
11825 HPATH = "group-add"
11826 HTYPE = constants.HTYPE_GROUP
11829 def ExpandNames(self):
11830 # We need the new group's UUID here so that we can create and acquire the
11831 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11832 # that it should not check whether the UUID exists in the configuration.
11833 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11834 self.needed_locks = {}
11835 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11837 def CheckPrereq(self):
11838 """Check prerequisites.
11840 This checks that the given group name is not an existing node group
11845 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11846 except errors.OpPrereqError:
11849 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11850 " node group (UUID: %s)" %
11851 (self.op.group_name, existing_uuid),
11852 errors.ECODE_EXISTS)
11854 if self.op.ndparams:
11855 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11857 def BuildHooksEnv(self):
11858 """Build hooks env.
11862 "GROUP_NAME": self.op.group_name,
11865 def BuildHooksNodes(self):
11866 """Build hooks nodes.
11869 mn = self.cfg.GetMasterNode()
11870 return ([mn], [mn])
11872 def Exec(self, feedback_fn):
11873 """Add the node group to the cluster.
11876 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11877 uuid=self.group_uuid,
11878 alloc_policy=self.op.alloc_policy,
11879 ndparams=self.op.ndparams)
11881 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11882 del self.remove_locks[locking.LEVEL_NODEGROUP]
11885 class LUGroupAssignNodes(NoHooksLU):
11886 """Logical unit for assigning nodes to groups.
11891 def ExpandNames(self):
11892 # These raise errors.OpPrereqError on their own:
11893 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11894 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11896 # We want to lock all the affected nodes and groups. We have readily
11897 # available the list of nodes, and the *destination* group. To gather the
11898 # list of "source" groups, we need to fetch node information later on.
11899 self.needed_locks = {
11900 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11901 locking.LEVEL_NODE: self.op.nodes,
11904 def DeclareLocks(self, level):
11905 if level == locking.LEVEL_NODEGROUP:
11906 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11908 # Try to get all affected nodes' groups without having the group or node
11909 # lock yet. Needs verification later in the code flow.
11910 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11912 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11914 def CheckPrereq(self):
11915 """Check prerequisites.
11918 assert self.needed_locks[locking.LEVEL_NODEGROUP]
11919 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
11920 frozenset(self.op.nodes))
11922 expected_locks = (set([self.group_uuid]) |
11923 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11924 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
11925 if actual_locks != expected_locks:
11926 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11927 " current groups are '%s', used to be '%s'" %
11928 (utils.CommaJoin(expected_locks),
11929 utils.CommaJoin(actual_locks)))
11931 self.node_data = self.cfg.GetAllNodesInfo()
11932 self.group = self.cfg.GetNodeGroup(self.group_uuid)
11933 instance_data = self.cfg.GetAllInstancesInfo()
11935 if self.group is None:
11936 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11937 (self.op.group_name, self.group_uuid))
11939 (new_splits, previous_splits) = \
11940 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11941 for node in self.op.nodes],
11942 self.node_data, instance_data)
11945 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11947 if not self.op.force:
11948 raise errors.OpExecError("The following instances get split by this"
11949 " change and --force was not given: %s" %
11952 self.LogWarning("This operation will split the following instances: %s",
11955 if previous_splits:
11956 self.LogWarning("In addition, these already-split instances continue"
11957 " to be split across groups: %s",
11958 utils.CommaJoin(utils.NiceSort(previous_splits)))
11960 def Exec(self, feedback_fn):
11961 """Assign nodes to a new group.
11964 for node in self.op.nodes:
11965 self.node_data[node].group = self.group_uuid
11967 # FIXME: Depends on side-effects of modifying the result of
11968 # C{cfg.GetAllNodesInfo}
11970 self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
11973 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
11974 """Check for split instances after a node assignment.
11976 This method considers a series of node assignments as an atomic operation,
11977 and returns information about split instances after applying the set of
11980 In particular, it returns information about newly split instances, and
11981 instances that were already split, and remain so after the change.
11983 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
11986 @type changes: list of (node_name, new_group_uuid) pairs.
11987 @param changes: list of node assignments to consider.
11988 @param node_data: a dict with data for all nodes
11989 @param instance_data: a dict with all instances to consider
11990 @rtype: a two-tuple
11991 @return: a list of instances that were previously okay and result split as a
11992 consequence of this change, and a list of instances that were previously
11993 split and this change does not fix.
11996 changed_nodes = dict((node, group) for node, group in changes
11997 if node_data[node].group != group)
11999 all_split_instances = set()
12000 previously_split_instances = set()
12002 def InstanceNodes(instance):
12003 return [instance.primary_node] + list(instance.secondary_nodes)
12005 for inst in instance_data.values():
12006 if inst.disk_template not in constants.DTS_INT_MIRROR:
12009 instance_nodes = InstanceNodes(inst)
12011 if len(set(node_data[node].group for node in instance_nodes)) > 1:
12012 previously_split_instances.add(inst.name)
12014 if len(set(changed_nodes.get(node, node_data[node].group)
12015 for node in instance_nodes)) > 1:
12016 all_split_instances.add(inst.name)
12018 return (list(all_split_instances - previously_split_instances),
12019 list(previously_split_instances & all_split_instances))
12022 class _GroupQuery(_QueryBase):
12023 FIELDS = query.GROUP_FIELDS
12025 def ExpandNames(self, lu):
12026 lu.needed_locks = {}
12028 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12029 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12032 self.wanted = [name_to_uuid[name]
12033 for name in utils.NiceSort(name_to_uuid.keys())]
12035 # Accept names to be either names or UUIDs.
12038 all_uuid = frozenset(self._all_groups.keys())
12040 for name in self.names:
12041 if name in all_uuid:
12042 self.wanted.append(name)
12043 elif name in name_to_uuid:
12044 self.wanted.append(name_to_uuid[name])
12046 missing.append(name)
12049 raise errors.OpPrereqError("Some groups do not exist: %s" %
12050 utils.CommaJoin(missing),
12051 errors.ECODE_NOENT)
12053 def DeclareLocks(self, lu, level):
12056 def _GetQueryData(self, lu):
12057 """Computes the list of node groups and their attributes.
12060 do_nodes = query.GQ_NODE in self.requested_data
12061 do_instances = query.GQ_INST in self.requested_data
12063 group_to_nodes = None
12064 group_to_instances = None
12066 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12067 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12068 # latter GetAllInstancesInfo() is not enough, for we have to go through
12069 # instance->node. Hence, we will need to process nodes even if we only need
12070 # instance information.
12071 if do_nodes or do_instances:
12072 all_nodes = lu.cfg.GetAllNodesInfo()
12073 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12076 for node in all_nodes.values():
12077 if node.group in group_to_nodes:
12078 group_to_nodes[node.group].append(node.name)
12079 node_to_group[node.name] = node.group
12082 all_instances = lu.cfg.GetAllInstancesInfo()
12083 group_to_instances = dict((uuid, []) for uuid in self.wanted)
12085 for instance in all_instances.values():
12086 node = instance.primary_node
12087 if node in node_to_group:
12088 group_to_instances[node_to_group[node]].append(instance.name)
12091 # Do not pass on node information if it was not requested.
12092 group_to_nodes = None
12094 return query.GroupQueryData([self._all_groups[uuid]
12095 for uuid in self.wanted],
12096 group_to_nodes, group_to_instances)
12099 class LUGroupQuery(NoHooksLU):
12100 """Logical unit for querying node groups.
12105 def CheckArguments(self):
12106 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12107 self.op.output_fields, False)
12109 def ExpandNames(self):
12110 self.gq.ExpandNames(self)
12112 def DeclareLocks(self, level):
12113 self.gq.DeclareLocks(self, level)
12115 def Exec(self, feedback_fn):
12116 return self.gq.OldStyleQuery(self)
12119 class LUGroupSetParams(LogicalUnit):
12120 """Modifies the parameters of a node group.
12123 HPATH = "group-modify"
12124 HTYPE = constants.HTYPE_GROUP
12127 def CheckArguments(self):
12130 self.op.alloc_policy,
12133 if all_changes.count(None) == len(all_changes):
12134 raise errors.OpPrereqError("Please pass at least one modification",
12135 errors.ECODE_INVAL)
12137 def ExpandNames(self):
12138 # This raises errors.OpPrereqError on its own:
12139 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12141 self.needed_locks = {
12142 locking.LEVEL_NODEGROUP: [self.group_uuid],
12145 def CheckPrereq(self):
12146 """Check prerequisites.
12149 self.group = self.cfg.GetNodeGroup(self.group_uuid)
12151 if self.group is None:
12152 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12153 (self.op.group_name, self.group_uuid))
12155 if self.op.ndparams:
12156 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12157 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12158 self.new_ndparams = new_ndparams
12160 def BuildHooksEnv(self):
12161 """Build hooks env.
12165 "GROUP_NAME": self.op.group_name,
12166 "NEW_ALLOC_POLICY": self.op.alloc_policy,
12169 def BuildHooksNodes(self):
12170 """Build hooks nodes.
12173 mn = self.cfg.GetMasterNode()
12174 return ([mn], [mn])
12176 def Exec(self, feedback_fn):
12177 """Modifies the node group.
12182 if self.op.ndparams:
12183 self.group.ndparams = self.new_ndparams
12184 result.append(("ndparams", str(self.group.ndparams)))
12186 if self.op.alloc_policy:
12187 self.group.alloc_policy = self.op.alloc_policy
12189 self.cfg.Update(self.group, feedback_fn)
12193 class LUGroupRemove(LogicalUnit):
12194 HPATH = "group-remove"
12195 HTYPE = constants.HTYPE_GROUP
12198 def ExpandNames(self):
12199 # This will raises errors.OpPrereqError on its own:
12200 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12201 self.needed_locks = {
12202 locking.LEVEL_NODEGROUP: [self.group_uuid],
12205 def CheckPrereq(self):
12206 """Check prerequisites.
12208 This checks that the given group name exists as a node group, that is
12209 empty (i.e., contains no nodes), and that is not the last group of the
12213 # Verify that the group is empty.
12214 group_nodes = [node.name
12215 for node in self.cfg.GetAllNodesInfo().values()
12216 if node.group == self.group_uuid]
12219 raise errors.OpPrereqError("Group '%s' not empty, has the following"
12221 (self.op.group_name,
12222 utils.CommaJoin(utils.NiceSort(group_nodes))),
12223 errors.ECODE_STATE)
12225 # Verify the cluster would not be left group-less.
12226 if len(self.cfg.GetNodeGroupList()) == 1:
12227 raise errors.OpPrereqError("Group '%s' is the only group,"
12228 " cannot be removed" %
12229 self.op.group_name,
12230 errors.ECODE_STATE)
12232 def BuildHooksEnv(self):
12233 """Build hooks env.
12237 "GROUP_NAME": self.op.group_name,
12240 def BuildHooksNodes(self):
12241 """Build hooks nodes.
12244 mn = self.cfg.GetMasterNode()
12245 return ([mn], [mn])
12247 def Exec(self, feedback_fn):
12248 """Remove the node group.
12252 self.cfg.RemoveNodeGroup(self.group_uuid)
12253 except errors.ConfigurationError:
12254 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12255 (self.op.group_name, self.group_uuid))
12257 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12260 class LUGroupRename(LogicalUnit):
12261 HPATH = "group-rename"
12262 HTYPE = constants.HTYPE_GROUP
12265 def ExpandNames(self):
12266 # This raises errors.OpPrereqError on its own:
12267 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12269 self.needed_locks = {
12270 locking.LEVEL_NODEGROUP: [self.group_uuid],
12273 def CheckPrereq(self):
12274 """Check prerequisites.
12276 Ensures requested new name is not yet used.
12280 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12281 except errors.OpPrereqError:
12284 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12285 " node group (UUID: %s)" %
12286 (self.op.new_name, new_name_uuid),
12287 errors.ECODE_EXISTS)
12289 def BuildHooksEnv(self):
12290 """Build hooks env.
12294 "OLD_NAME": self.op.group_name,
12295 "NEW_NAME": self.op.new_name,
12298 def BuildHooksNodes(self):
12299 """Build hooks nodes.
12302 mn = self.cfg.GetMasterNode()
12304 all_nodes = self.cfg.GetAllNodesInfo()
12305 all_nodes.pop(mn, None)
12308 run_nodes.extend(node.name for node in all_nodes.values()
12309 if node.group == self.group_uuid)
12311 return (run_nodes, run_nodes)
12313 def Exec(self, feedback_fn):
12314 """Rename the node group.
12317 group = self.cfg.GetNodeGroup(self.group_uuid)
12320 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12321 (self.op.group_name, self.group_uuid))
12323 group.name = self.op.new_name
12324 self.cfg.Update(group, feedback_fn)
12326 return self.op.new_name
12329 class LUGroupEvacuate(LogicalUnit):
12330 HPATH = "group-evacuate"
12331 HTYPE = constants.HTYPE_GROUP
12334 def ExpandNames(self):
12335 # This raises errors.OpPrereqError on its own:
12336 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12338 if self.op.target_groups:
12339 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12340 self.op.target_groups)
12342 self.req_target_uuids = []
12344 if self.group_uuid in self.req_target_uuids:
12345 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12346 " as a target group (targets are %s)" %
12348 utils.CommaJoin(self.req_target_uuids)),
12349 errors.ECODE_INVAL)
12351 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12353 self.share_locks = _ShareAll()
12354 self.needed_locks = {
12355 locking.LEVEL_INSTANCE: [],
12356 locking.LEVEL_NODEGROUP: [],
12357 locking.LEVEL_NODE: [],
12360 def DeclareLocks(self, level):
12361 if level == locking.LEVEL_INSTANCE:
12362 assert not self.needed_locks[locking.LEVEL_INSTANCE]
12364 # Lock instances optimistically, needs verification once node and group
12365 # locks have been acquired
12366 self.needed_locks[locking.LEVEL_INSTANCE] = \
12367 self.cfg.GetNodeGroupInstances(self.group_uuid)
12369 elif level == locking.LEVEL_NODEGROUP:
12370 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12372 if self.req_target_uuids:
12373 lock_groups = set([self.group_uuid] + self.req_target_uuids)
12375 # Lock all groups used by instances optimistically; this requires going
12376 # via the node before it's locked, requiring verification later on
12377 lock_groups.update(group_uuid
12378 for instance_name in
12379 self.owned_locks(locking.LEVEL_INSTANCE)
12381 self.cfg.GetInstanceNodeGroups(instance_name))
12383 # No target groups, need to lock all of them
12384 lock_groups = locking.ALL_SET
12386 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12388 elif level == locking.LEVEL_NODE:
12389 # This will only lock the nodes in the group to be evacuated which
12390 # contain actual instances
12391 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12392 self._LockInstancesNodes()
12394 # Lock all nodes in group to be evacuated and target groups
12395 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12396 assert self.group_uuid in owned_groups
12397 member_nodes = [node_name
12398 for group in owned_groups
12399 for node_name in self.cfg.GetNodeGroup(group).members]
12400 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12402 def CheckPrereq(self):
12403 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12404 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12405 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12407 assert owned_groups.issuperset(self.req_target_uuids)
12408 assert self.group_uuid in owned_groups
12410 # Check if locked instances are still correct
12411 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12413 # Get instance information
12414 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12416 # Check if node groups for locked instances are still correct
12417 for instance_name in owned_instances:
12418 inst = self.instances[instance_name]
12419 assert owned_nodes.issuperset(inst.all_nodes), \
12420 "Instance %s's nodes changed while we kept the lock" % instance_name
12422 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12425 assert self.group_uuid in inst_groups, \
12426 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12428 if self.req_target_uuids:
12429 # User requested specific target groups
12430 self.target_uuids = self.req_target_uuids
12432 # All groups except the one to be evacuated are potential targets
12433 self.target_uuids = [group_uuid for group_uuid in owned_groups
12434 if group_uuid != self.group_uuid]
12436 if not self.target_uuids:
12437 raise errors.OpPrereqError("There are no possible target groups",
12438 errors.ECODE_INVAL)
12440 def BuildHooksEnv(self):
12441 """Build hooks env.
12445 "GROUP_NAME": self.op.group_name,
12446 "TARGET_GROUPS": " ".join(self.target_uuids),
12449 def BuildHooksNodes(self):
12450 """Build hooks nodes.
12453 mn = self.cfg.GetMasterNode()
12455 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12457 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12459 return (run_nodes, run_nodes)
12461 def Exec(self, feedback_fn):
12462 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12464 assert self.group_uuid not in self.target_uuids
12466 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12467 instances=instances, target_groups=self.target_uuids)
12469 ial.Run(self.op.iallocator)
12471 if not ial.success:
12472 raise errors.OpPrereqError("Can't compute group evacuation using"
12473 " iallocator '%s': %s" %
12474 (self.op.iallocator, ial.info),
12475 errors.ECODE_NORES)
12477 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12479 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12480 len(jobs), self.op.group_name)
12482 return ResultWithJobs(jobs)
12485 class TagsLU(NoHooksLU): # pylint: disable=W0223
12486 """Generic tags LU.
12488 This is an abstract class which is the parent of all the other tags LUs.
12491 def ExpandNames(self):
12492 self.group_uuid = None
12493 self.needed_locks = {}
12494 if self.op.kind == constants.TAG_NODE:
12495 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12496 self.needed_locks[locking.LEVEL_NODE] = self.op.name
12497 elif self.op.kind == constants.TAG_INSTANCE:
12498 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12499 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12500 elif self.op.kind == constants.TAG_NODEGROUP:
12501 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12503 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12504 # not possible to acquire the BGL based on opcode parameters)
12506 def CheckPrereq(self):
12507 """Check prerequisites.
12510 if self.op.kind == constants.TAG_CLUSTER:
12511 self.target = self.cfg.GetClusterInfo()
12512 elif self.op.kind == constants.TAG_NODE:
12513 self.target = self.cfg.GetNodeInfo(self.op.name)
12514 elif self.op.kind == constants.TAG_INSTANCE:
12515 self.target = self.cfg.GetInstanceInfo(self.op.name)
12516 elif self.op.kind == constants.TAG_NODEGROUP:
12517 self.target = self.cfg.GetNodeGroup(self.group_uuid)
12519 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12520 str(self.op.kind), errors.ECODE_INVAL)
12523 class LUTagsGet(TagsLU):
12524 """Returns the tags of a given object.
12529 def ExpandNames(self):
12530 TagsLU.ExpandNames(self)
12532 # Share locks as this is only a read operation
12533 self.share_locks = _ShareAll()
12535 def Exec(self, feedback_fn):
12536 """Returns the tag list.
12539 return list(self.target.GetTags())
12542 class LUTagsSearch(NoHooksLU):
12543 """Searches the tags for a given pattern.
12548 def ExpandNames(self):
12549 self.needed_locks = {}
12551 def CheckPrereq(self):
12552 """Check prerequisites.
12554 This checks the pattern passed for validity by compiling it.
12558 self.re = re.compile(self.op.pattern)
12559 except re.error, err:
12560 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12561 (self.op.pattern, err), errors.ECODE_INVAL)
12563 def Exec(self, feedback_fn):
12564 """Returns the tag list.
12568 tgts = [("/cluster", cfg.GetClusterInfo())]
12569 ilist = cfg.GetAllInstancesInfo().values()
12570 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12571 nlist = cfg.GetAllNodesInfo().values()
12572 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12573 tgts.extend(("/nodegroup/%s" % n.name, n)
12574 for n in cfg.GetAllNodeGroupsInfo().values())
12576 for path, target in tgts:
12577 for tag in target.GetTags():
12578 if self.re.search(tag):
12579 results.append((path, tag))
12583 class LUTagsSet(TagsLU):
12584 """Sets a tag on a given object.
12589 def CheckPrereq(self):
12590 """Check prerequisites.
12592 This checks the type and length of the tag name and value.
12595 TagsLU.CheckPrereq(self)
12596 for tag in self.op.tags:
12597 objects.TaggableObject.ValidateTag(tag)
12599 def Exec(self, feedback_fn):
12604 for tag in self.op.tags:
12605 self.target.AddTag(tag)
12606 except errors.TagError, err:
12607 raise errors.OpExecError("Error while setting tag: %s" % str(err))
12608 self.cfg.Update(self.target, feedback_fn)
12611 class LUTagsDel(TagsLU):
12612 """Delete a list of tags from a given object.
12617 def CheckPrereq(self):
12618 """Check prerequisites.
12620 This checks that we have the given tag.
12623 TagsLU.CheckPrereq(self)
12624 for tag in self.op.tags:
12625 objects.TaggableObject.ValidateTag(tag)
12626 del_tags = frozenset(self.op.tags)
12627 cur_tags = self.target.GetTags()
12629 diff_tags = del_tags - cur_tags
12631 diff_names = ("'%s'" % i for i in sorted(diff_tags))
12632 raise errors.OpPrereqError("Tag(s) %s not found" %
12633 (utils.CommaJoin(diff_names), ),
12634 errors.ECODE_NOENT)
12636 def Exec(self, feedback_fn):
12637 """Remove the tag from the object.
12640 for tag in self.op.tags:
12641 self.target.RemoveTag(tag)
12642 self.cfg.Update(self.target, feedback_fn)
12645 class LUTestDelay(NoHooksLU):
12646 """Sleep for a specified amount of time.
12648 This LU sleeps on the master and/or nodes for a specified amount of
12654 def ExpandNames(self):
12655 """Expand names and set required locks.
12657 This expands the node list, if any.
12660 self.needed_locks = {}
12661 if self.op.on_nodes:
12662 # _GetWantedNodes can be used here, but is not always appropriate to use
12663 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12664 # more information.
12665 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12666 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12668 def _TestDelay(self):
12669 """Do the actual sleep.
12672 if self.op.on_master:
12673 if not utils.TestDelay(self.op.duration):
12674 raise errors.OpExecError("Error during master delay test")
12675 if self.op.on_nodes:
12676 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12677 for node, node_result in result.items():
12678 node_result.Raise("Failure during rpc call to node %s" % node)
12680 def Exec(self, feedback_fn):
12681 """Execute the test delay opcode, with the wanted repetitions.
12684 if self.op.repeat == 0:
12687 top_value = self.op.repeat - 1
12688 for i in range(self.op.repeat):
12689 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12693 class LUTestJqueue(NoHooksLU):
12694 """Utility LU to test some aspects of the job queue.
12699 # Must be lower than default timeout for WaitForJobChange to see whether it
12700 # notices changed jobs
12701 _CLIENT_CONNECT_TIMEOUT = 20.0
12702 _CLIENT_CONFIRM_TIMEOUT = 60.0
12705 def _NotifyUsingSocket(cls, cb, errcls):
12706 """Opens a Unix socket and waits for another program to connect.
12709 @param cb: Callback to send socket name to client
12710 @type errcls: class
12711 @param errcls: Exception class to use for errors
12714 # Using a temporary directory as there's no easy way to create temporary
12715 # sockets without writing a custom loop around tempfile.mktemp and
12717 tmpdir = tempfile.mkdtemp()
12719 tmpsock = utils.PathJoin(tmpdir, "sock")
12721 logging.debug("Creating temporary socket at %s", tmpsock)
12722 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12727 # Send details to client
12730 # Wait for client to connect before continuing
12731 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12733 (conn, _) = sock.accept()
12734 except socket.error, err:
12735 raise errcls("Client didn't connect in time (%s)" % err)
12739 # Remove as soon as client is connected
12740 shutil.rmtree(tmpdir)
12742 # Wait for client to close
12745 # pylint: disable=E1101
12746 # Instance of '_socketobject' has no ... member
12747 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12749 except socket.error, err:
12750 raise errcls("Client failed to confirm notification (%s)" % err)
12754 def _SendNotification(self, test, arg, sockname):
12755 """Sends a notification to the client.
12758 @param test: Test name
12759 @param arg: Test argument (depends on test)
12760 @type sockname: string
12761 @param sockname: Socket path
12764 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12766 def _Notify(self, prereq, test, arg):
12767 """Notifies the client of a test.
12770 @param prereq: Whether this is a prereq-phase test
12772 @param test: Test name
12773 @param arg: Test argument (depends on test)
12777 errcls = errors.OpPrereqError
12779 errcls = errors.OpExecError
12781 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12785 def CheckArguments(self):
12786 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12787 self.expandnames_calls = 0
12789 def ExpandNames(self):
12790 checkargs_calls = getattr(self, "checkargs_calls", 0)
12791 if checkargs_calls < 1:
12792 raise errors.ProgrammerError("CheckArguments was not called")
12794 self.expandnames_calls += 1
12796 if self.op.notify_waitlock:
12797 self._Notify(True, constants.JQT_EXPANDNAMES, None)
12799 self.LogInfo("Expanding names")
12801 # Get lock on master node (just to get a lock, not for a particular reason)
12802 self.needed_locks = {
12803 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12806 def Exec(self, feedback_fn):
12807 if self.expandnames_calls < 1:
12808 raise errors.ProgrammerError("ExpandNames was not called")
12810 if self.op.notify_exec:
12811 self._Notify(False, constants.JQT_EXEC, None)
12813 self.LogInfo("Executing")
12815 if self.op.log_messages:
12816 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12817 for idx, msg in enumerate(self.op.log_messages):
12818 self.LogInfo("Sending log message %s", idx + 1)
12819 feedback_fn(constants.JQT_MSGPREFIX + msg)
12820 # Report how many test messages have been sent
12821 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12824 raise errors.OpExecError("Opcode failure was requested")
12829 class IAllocator(object):
12830 """IAllocator framework.
12832 An IAllocator instance has three sets of attributes:
12833 - cfg that is needed to query the cluster
12834 - input data (all members of the _KEYS class attribute are required)
12835 - four buffer attributes (in|out_data|text), that represent the
12836 input (to the external script) in text and data structure format,
12837 and the output from it, again in two formats
12838 - the result variables from the script (success, info, nodes) for
12842 # pylint: disable=R0902
12843 # lots of instance attributes
12845 def __init__(self, cfg, rpc, mode, **kwargs):
12848 # init buffer variables
12849 self.in_text = self.out_text = self.in_data = self.out_data = None
12850 # init all input fields so that pylint is happy
12852 self.memory = self.disks = self.disk_template = None
12853 self.os = self.tags = self.nics = self.vcpus = None
12854 self.hypervisor = None
12855 self.relocate_from = None
12857 self.instances = None
12858 self.evac_mode = None
12859 self.target_groups = []
12861 self.required_nodes = None
12862 # init result fields
12863 self.success = self.info = self.result = None
12866 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12868 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12869 " IAllocator" % self.mode)
12871 keyset = [n for (n, _) in keydata]
12874 if key not in keyset:
12875 raise errors.ProgrammerError("Invalid input parameter '%s' to"
12876 " IAllocator" % key)
12877 setattr(self, key, kwargs[key])
12880 if key not in kwargs:
12881 raise errors.ProgrammerError("Missing input parameter '%s' to"
12882 " IAllocator" % key)
12883 self._BuildInputData(compat.partial(fn, self), keydata)
12885 def _ComputeClusterData(self):
12886 """Compute the generic allocator input data.
12888 This is the data that is independent of the actual operation.
12892 cluster_info = cfg.GetClusterInfo()
12895 "version": constants.IALLOCATOR_VERSION,
12896 "cluster_name": cfg.GetClusterName(),
12897 "cluster_tags": list(cluster_info.GetTags()),
12898 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12899 # we don't have job IDs
12901 ninfo = cfg.GetAllNodesInfo()
12902 iinfo = cfg.GetAllInstancesInfo().values()
12903 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12906 node_list = [n.name for n in ninfo.values() if n.vm_capable]
12908 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12909 hypervisor_name = self.hypervisor
12910 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12911 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12913 hypervisor_name = cluster_info.enabled_hypervisors[0]
12915 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12918 self.rpc.call_all_instances_info(node_list,
12919 cluster_info.enabled_hypervisors)
12921 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12923 config_ndata = self._ComputeBasicNodeData(ninfo)
12924 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12925 i_list, config_ndata)
12926 assert len(data["nodes"]) == len(ninfo), \
12927 "Incomplete node data computed"
12929 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12931 self.in_data = data
12934 def _ComputeNodeGroupData(cfg):
12935 """Compute node groups data.
12938 ng = dict((guuid, {
12939 "name": gdata.name,
12940 "alloc_policy": gdata.alloc_policy,
12942 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12947 def _ComputeBasicNodeData(node_cfg):
12948 """Compute global node data.
12951 @returns: a dict of name: (node dict, node config)
12954 # fill in static (config-based) values
12955 node_results = dict((ninfo.name, {
12956 "tags": list(ninfo.GetTags()),
12957 "primary_ip": ninfo.primary_ip,
12958 "secondary_ip": ninfo.secondary_ip,
12959 "offline": ninfo.offline,
12960 "drained": ninfo.drained,
12961 "master_candidate": ninfo.master_candidate,
12962 "group": ninfo.group,
12963 "master_capable": ninfo.master_capable,
12964 "vm_capable": ninfo.vm_capable,
12966 for ninfo in node_cfg.values())
12968 return node_results
12971 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
12973 """Compute global node data.
12975 @param node_results: the basic node structures as filled from the config
12978 # make a copy of the current dict
12979 node_results = dict(node_results)
12980 for nname, nresult in node_data.items():
12981 assert nname in node_results, "Missing basic data for node %s" % nname
12982 ninfo = node_cfg[nname]
12984 if not (ninfo.offline or ninfo.drained):
12985 nresult.Raise("Can't get data for node %s" % nname)
12986 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
12988 remote_info = nresult.payload
12990 for attr in ["memory_total", "memory_free", "memory_dom0",
12991 "vg_size", "vg_free", "cpu_total"]:
12992 if attr not in remote_info:
12993 raise errors.OpExecError("Node '%s' didn't return attribute"
12994 " '%s'" % (nname, attr))
12995 if not isinstance(remote_info[attr], int):
12996 raise errors.OpExecError("Node '%s' returned invalid value"
12998 (nname, attr, remote_info[attr]))
12999 # compute memory used by primary instances
13000 i_p_mem = i_p_up_mem = 0
13001 for iinfo, beinfo in i_list:
13002 if iinfo.primary_node == nname:
13003 i_p_mem += beinfo[constants.BE_MEMORY]
13004 if iinfo.name not in node_iinfo[nname].payload:
13007 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13008 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13009 remote_info["memory_free"] -= max(0, i_mem_diff)
13012 i_p_up_mem += beinfo[constants.BE_MEMORY]
13014 # compute memory used by instances
13016 "total_memory": remote_info["memory_total"],
13017 "reserved_memory": remote_info["memory_dom0"],
13018 "free_memory": remote_info["memory_free"],
13019 "total_disk": remote_info["vg_size"],
13020 "free_disk": remote_info["vg_free"],
13021 "total_cpus": remote_info["cpu_total"],
13022 "i_pri_memory": i_p_mem,
13023 "i_pri_up_memory": i_p_up_mem,
13025 pnr_dyn.update(node_results[nname])
13026 node_results[nname] = pnr_dyn
13028 return node_results
13031 def _ComputeInstanceData(cluster_info, i_list):
13032 """Compute global instance data.
13036 for iinfo, beinfo in i_list:
13038 for nic in iinfo.nics:
13039 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13043 "mode": filled_params[constants.NIC_MODE],
13044 "link": filled_params[constants.NIC_LINK],
13046 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13047 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13048 nic_data.append(nic_dict)
13050 "tags": list(iinfo.GetTags()),
13051 "admin_up": iinfo.admin_up,
13052 "vcpus": beinfo[constants.BE_VCPUS],
13053 "memory": beinfo[constants.BE_MEMORY],
13055 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13057 "disks": [{constants.IDISK_SIZE: dsk.size,
13058 constants.IDISK_MODE: dsk.mode}
13059 for dsk in iinfo.disks],
13060 "disk_template": iinfo.disk_template,
13061 "hypervisor": iinfo.hypervisor,
13063 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13065 instance_data[iinfo.name] = pir
13067 return instance_data
13069 def _AddNewInstance(self):
13070 """Add new instance data to allocator structure.
13072 This in combination with _AllocatorGetClusterData will create the
13073 correct structure needed as input for the allocator.
13075 The checks for the completeness of the opcode must have already been
13079 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13081 if self.disk_template in constants.DTS_INT_MIRROR:
13082 self.required_nodes = 2
13084 self.required_nodes = 1
13088 "disk_template": self.disk_template,
13091 "vcpus": self.vcpus,
13092 "memory": self.memory,
13093 "disks": self.disks,
13094 "disk_space_total": disk_space,
13096 "required_nodes": self.required_nodes,
13097 "hypervisor": self.hypervisor,
13102 def _AddRelocateInstance(self):
13103 """Add relocate instance data to allocator structure.
13105 This in combination with _IAllocatorGetClusterData will create the
13106 correct structure needed as input for the allocator.
13108 The checks for the completeness of the opcode must have already been
13112 instance = self.cfg.GetInstanceInfo(self.name)
13113 if instance is None:
13114 raise errors.ProgrammerError("Unknown instance '%s' passed to"
13115 " IAllocator" % self.name)
13117 if instance.disk_template not in constants.DTS_MIRRORED:
13118 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13119 errors.ECODE_INVAL)
13121 if instance.disk_template in constants.DTS_INT_MIRROR and \
13122 len(instance.secondary_nodes) != 1:
13123 raise errors.OpPrereqError("Instance has not exactly one secondary node",
13124 errors.ECODE_STATE)
13126 self.required_nodes = 1
13127 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13128 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13132 "disk_space_total": disk_space,
13133 "required_nodes": self.required_nodes,
13134 "relocate_from": self.relocate_from,
13138 def _AddNodeEvacuate(self):
13139 """Get data for node-evacuate requests.
13143 "instances": self.instances,
13144 "evac_mode": self.evac_mode,
13147 def _AddChangeGroup(self):
13148 """Get data for node-evacuate requests.
13152 "instances": self.instances,
13153 "target_groups": self.target_groups,
13156 def _BuildInputData(self, fn, keydata):
13157 """Build input data structures.
13160 self._ComputeClusterData()
13163 request["type"] = self.mode
13164 for keyname, keytype in keydata:
13165 if keyname not in request:
13166 raise errors.ProgrammerError("Request parameter %s is missing" %
13168 val = request[keyname]
13169 if not keytype(val):
13170 raise errors.ProgrammerError("Request parameter %s doesn't pass"
13171 " validation, value %s, expected"
13172 " type %s" % (keyname, val, keytype))
13173 self.in_data["request"] = request
13175 self.in_text = serializer.Dump(self.in_data)
13177 _STRING_LIST = ht.TListOf(ht.TString)
13178 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13179 # pylint: disable=E1101
13180 # Class '...' has no 'OP_ID' member
13181 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13182 opcodes.OpInstanceMigrate.OP_ID,
13183 opcodes.OpInstanceReplaceDisks.OP_ID])
13187 ht.TListOf(ht.TAnd(ht.TIsLength(3),
13188 ht.TItems([ht.TNonEmptyString,
13189 ht.TNonEmptyString,
13190 ht.TListOf(ht.TNonEmptyString),
13193 ht.TListOf(ht.TAnd(ht.TIsLength(2),
13194 ht.TItems([ht.TNonEmptyString,
13197 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13198 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13201 constants.IALLOCATOR_MODE_ALLOC:
13204 ("name", ht.TString),
13205 ("memory", ht.TInt),
13206 ("disks", ht.TListOf(ht.TDict)),
13207 ("disk_template", ht.TString),
13208 ("os", ht.TString),
13209 ("tags", _STRING_LIST),
13210 ("nics", ht.TListOf(ht.TDict)),
13211 ("vcpus", ht.TInt),
13212 ("hypervisor", ht.TString),
13214 constants.IALLOCATOR_MODE_RELOC:
13215 (_AddRelocateInstance,
13216 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13218 constants.IALLOCATOR_MODE_NODE_EVAC:
13219 (_AddNodeEvacuate, [
13220 ("instances", _STRING_LIST),
13221 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13223 constants.IALLOCATOR_MODE_CHG_GROUP:
13224 (_AddChangeGroup, [
13225 ("instances", _STRING_LIST),
13226 ("target_groups", _STRING_LIST),
13230 def Run(self, name, validate=True, call_fn=None):
13231 """Run an instance allocator and return the results.
13234 if call_fn is None:
13235 call_fn = self.rpc.call_iallocator_runner
13237 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13238 result.Raise("Failure while running the iallocator script")
13240 self.out_text = result.payload
13242 self._ValidateResult()
13244 def _ValidateResult(self):
13245 """Process the allocator results.
13247 This will process and if successful save the result in
13248 self.out_data and the other parameters.
13252 rdict = serializer.Load(self.out_text)
13253 except Exception, err:
13254 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13256 if not isinstance(rdict, dict):
13257 raise errors.OpExecError("Can't parse iallocator results: not a dict")
13259 # TODO: remove backwards compatiblity in later versions
13260 if "nodes" in rdict and "result" not in rdict:
13261 rdict["result"] = rdict["nodes"]
13264 for key in "success", "info", "result":
13265 if key not in rdict:
13266 raise errors.OpExecError("Can't parse iallocator results:"
13267 " missing key '%s'" % key)
13268 setattr(self, key, rdict[key])
13270 if not self._result_check(self.result):
13271 raise errors.OpExecError("Iallocator returned invalid result,"
13272 " expected %s, got %s" %
13273 (self._result_check, self.result),
13274 errors.ECODE_INVAL)
13276 if self.mode == constants.IALLOCATOR_MODE_RELOC:
13277 assert self.relocate_from is not None
13278 assert self.required_nodes == 1
13280 node2group = dict((name, ndata["group"])
13281 for (name, ndata) in self.in_data["nodes"].items())
13283 fn = compat.partial(self._NodesToGroups, node2group,
13284 self.in_data["nodegroups"])
13286 instance = self.cfg.GetInstanceInfo(self.name)
13287 request_groups = fn(self.relocate_from + [instance.primary_node])
13288 result_groups = fn(rdict["result"] + [instance.primary_node])
13290 if self.success and not set(result_groups).issubset(request_groups):
13291 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13292 " differ from original groups (%s)" %
13293 (utils.CommaJoin(result_groups),
13294 utils.CommaJoin(request_groups)))
13296 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13297 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13299 self.out_data = rdict
13302 def _NodesToGroups(node2group, groups, nodes):
13303 """Returns a list of unique group names for a list of nodes.
13305 @type node2group: dict
13306 @param node2group: Map from node name to group UUID
13308 @param groups: Group information
13310 @param nodes: Node names
13317 group_uuid = node2group[node]
13319 # Ignore unknown node
13323 group = groups[group_uuid]
13325 # Can't find group, let's use UUID
13326 group_name = group_uuid
13328 group_name = group["name"]
13330 result.add(group_name)
13332 return sorted(result)
13335 class LUTestAllocator(NoHooksLU):
13336 """Run allocator tests.
13338 This LU runs the allocator tests
13341 def CheckPrereq(self):
13342 """Check prerequisites.
13344 This checks the opcode parameters depending on the director and mode test.
13347 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13348 for attr in ["memory", "disks", "disk_template",
13349 "os", "tags", "nics", "vcpus"]:
13350 if not hasattr(self.op, attr):
13351 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13352 attr, errors.ECODE_INVAL)
13353 iname = self.cfg.ExpandInstanceName(self.op.name)
13354 if iname is not None:
13355 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13356 iname, errors.ECODE_EXISTS)
13357 if not isinstance(self.op.nics, list):
13358 raise errors.OpPrereqError("Invalid parameter 'nics'",
13359 errors.ECODE_INVAL)
13360 if not isinstance(self.op.disks, list):
13361 raise errors.OpPrereqError("Invalid parameter 'disks'",
13362 errors.ECODE_INVAL)
13363 for row in self.op.disks:
13364 if (not isinstance(row, dict) or
13365 constants.IDISK_SIZE not in row or
13366 not isinstance(row[constants.IDISK_SIZE], int) or
13367 constants.IDISK_MODE not in row or
13368 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13369 raise errors.OpPrereqError("Invalid contents of the 'disks'"
13370 " parameter", errors.ECODE_INVAL)
13371 if self.op.hypervisor is None:
13372 self.op.hypervisor = self.cfg.GetHypervisorType()
13373 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13374 fname = _ExpandInstanceName(self.cfg, self.op.name)
13375 self.op.name = fname
13376 self.relocate_from = \
13377 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13378 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13379 constants.IALLOCATOR_MODE_NODE_EVAC):
13380 if not self.op.instances:
13381 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13382 self.op.instances = _GetWantedInstances(self, self.op.instances)
13384 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13385 self.op.mode, errors.ECODE_INVAL)
13387 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13388 if self.op.allocator is None:
13389 raise errors.OpPrereqError("Missing allocator name",
13390 errors.ECODE_INVAL)
13391 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13392 raise errors.OpPrereqError("Wrong allocator test '%s'" %
13393 self.op.direction, errors.ECODE_INVAL)
13395 def Exec(self, feedback_fn):
13396 """Run the allocator test.
13399 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13400 ial = IAllocator(self.cfg, self.rpc,
13403 memory=self.op.memory,
13404 disks=self.op.disks,
13405 disk_template=self.op.disk_template,
13409 vcpus=self.op.vcpus,
13410 hypervisor=self.op.hypervisor,
13412 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13413 ial = IAllocator(self.cfg, self.rpc,
13416 relocate_from=list(self.relocate_from),
13418 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13419 ial = IAllocator(self.cfg, self.rpc,
13421 instances=self.op.instances,
13422 target_groups=self.op.target_groups)
13423 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13424 ial = IAllocator(self.cfg, self.rpc,
13426 instances=self.op.instances,
13427 evac_mode=self.op.evac_mode)
13429 raise errors.ProgrammerError("Uncatched mode %s in"
13430 " LUTestAllocator.Exec", self.op.mode)
13432 if self.op.direction == constants.IALLOCATOR_DIR_IN:
13433 result = ial.in_text
13435 ial.Run(self.op.allocator, validate=False)
13436 result = ial.out_text
13440 #: Query type implementations
13442 constants.QR_INSTANCE: _InstanceQuery,
13443 constants.QR_NODE: _NodeQuery,
13444 constants.QR_GROUP: _GroupQuery,
13445 constants.QR_OS: _OsQuery,
13448 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13451 def _GetQueryImplementation(name):
13452 """Returns the implemtnation for a query type.
13454 @param name: Query type, must be one of L{constants.QR_VIA_OP}
13458 return _QUERY_IMPL[name]
13460 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13461 errors.ECODE_INVAL)